Add run(), most instructions
diff --git a/cpu.c b/cpu.c
index fe5cf6a..5a7c391 100644
--- a/cpu.c
+++ b/cpu.c
@@ -30,21 +30,69 @@
 	return cpu;
 }
 
+uint16_t le_to_native(uint8_t a, uint8_t b)
+{
+#ifdef LITTLE_ENDIAN
+	return b << 8 | a;
+#else
+	return a << 8 | b;
+#endif
+}
+
+void native_to_le(uint16_t n, uint8_t *a, uint8_t *b)
+{
+#ifdef LITTLE_ENDIAN
+	*a = n >> 8;
+	*b = n & 0xFF;
+#else
+	*a = n & 0xFF;
+	*b = n >> 8;
+#endif
+}
+
 void stack_push(cpu_t *cpu, uint8_t v)
 {
 	cpu->mem[cpu->regs[SP]-- + 0x100] = v;
 }
 
+void stack_pushle(cpu_t *cpu, uint16_t v)
+{
+	uint8_t a, b;
+	native_to_le(v, &a, &b);
+	// push in "reverse" order so that the address is stored as LE
+	stack_push(cpu, b);
+	stack_push(cpu, a);
+}
+
 uint8_t stack_pop(cpu_t *cpu)
 {
 	return cpu->mem[cpu->regs[SP]++ + 0x100];
 }
 
+uint16_t stack_pople(cpu_t *cpu)
+{
+	uint8_t a = stack_pop(cpu);
+	uint8_t b = stack_pop(cpu);
+	return le_to_native(a, b);
+}
+
 void free_cpu(cpu_t *cpu)
 {
 	free(cpu->mem);
 }
 
+// rotate right
+uint8_t ror(uint8_t a, uint8_t n)
+{
+	return (a >> n) | (a << (8 - n));
+}
+
+// rotate left
+uint8_t rol(uint8_t a, uint8_t n)
+{
+	return (a << n) | (a >> (8 - n));
+}
+
 void stat_nz(cpu_t *cpu, int8_t v)
 {
 	cpu->status.negative = v < 0;
@@ -63,7 +111,27 @@
 	cpu->status.carry = c < a || c < b;
 }
 
-void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t a)
+void cmp(cpu_t *cpu, uint8_t reg, uint8_t mem)
+{
+	cpu->status.negative = 0;
+	cpu->status.zero = 0;
+	cpu->status.carry = 0;
+	if (cpu->regs[reg] < mem)
+	{
+		cpu->status.negative = 1;
+	}
+	else if (cpu->regs[reg] == mem)
+	{
+		cpu->status.zero = 1;
+		cpu->status.carry = 1;
+	}
+	else
+	{
+		cpu->status.carry = 1;
+	}
+}
+
+void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t a, uint8_t am)
 {
 	// used to save space
 	#define REGS \
@@ -131,19 +199,205 @@
 			cpu->mem[a.ptr]--;
 			stat_nz(cpu, cpu->mem[a.ptr]);
 			break;
+
+		case DEX:
+			cpu->regs[X]--;
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case DEY:
+			cpu->regs[Y]--;
+			stat_nz(cpu, cpu->regs[Y]);
+			break;
+
+		case ASL:
+			// This check must be done here unfortunately, it would be nice
+			// to do this while decoding operands but it would require
+			// a substantial change to the architecture of the emulator
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] >> 7;
+				cpu->regs[A] <<= 1;
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] >> 7;
+				cpu->mem[a.ptr] <<= 1;
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case LSR:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] & 1;
+				cpu->regs[A] >>= 1;
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] & 7;
+				cpu->mem[a.ptr] >>= 1;
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case ROL:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] >> 7;
+				cpu->regs[A] = rol(cpu->regs[A], 1);
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] >> 7;
+				cpu->mem[a.ptr] = rol(a.val, 1);
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case ROR:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] & 1;
+				cpu->regs[A] = ror(cpu->regs[A], 1);
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] & 1;
+				cpu->mem[a.ptr] = ror(a.val, 1);
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case AND:
+			cpu->regs[A] &= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case ORA:
+			cpu->regs[A] |= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case EOR:
+			cpu->regs[A] ^= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case CMP:
+			cmp(cpu, A, a.val);
+			break;
+
+		case CPX:
+			cmp(cpu, X, a.val);
+			break;
+
+		case CPY:
+			cmp(cpu, Y, a.val);
+			break;
+
+		// TODO: implement BIT here
+
+		#define BRANCHES \
+			B(BCC, carry == 0) \
+			B(BCS, carry == 1) \
+			B(BNE, zero == 0) \
+			B(BEQ, zero == 1) \
+			B(BPL, negative == 0) \
+			B(BMI, negative == 1) \
+			B(BVC, overflow == 0) \
+			B(BVS, overflow == 1)
+
+		#define B(i, c) \
+			case i: \
+				if (cpu->status . c) \
+					cpu->pc = a.ptr;\
+				break;
+
+			BRANCHES
+
+		#undef B
+		#undef BRANCHES
+
+		#define TRANSFERS \
+			T(A, X) \
+			T(X, A) \
+			T(A, Y) \
+			T(Y, A)
+
+		#define T(a, b) \
+			case T ## a ## b: \
+				cpu->regs[b] = cpu->regs[a]; \
+				stat_nz(cpu, cpu->regs[b]); \
+				break;
+
+			TRANSFERS
+
+		#undef T
+		#undef TRANSFERS
+
+		case TSX:
+			cpu->regs[X] = cpu->regs[SP];
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case TXS:
+			cpu->regs[SP] = cpu->regs[X];
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case PHA:
+			stack_push(cpu, cpu->regs[A]);
+			break;
+
+		case PLA:
+			cpu->regs[A] = stack_pop(cpu);
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case PHP:
+			stack_push(cpu, *(uint8_t *)(&cpu->status));
+			break;
+
+		case PLP:
+		{
+			uint8_t s = stack_pop(cpu);
+			*(uint8_t *)(&cpu->status) = s;
+		}
+
+		case JMP:
+			cpu->pc = a.ptr;
+			break;
+
+		case JSR:
+			stack_pushle(cpu, cpu->pc);
+			break;
+
+		case RTS:
+			cpu->pc = stack_pople(cpu);
+			break;
+
+		// TODO: implement RTI
+		// TODO: implement flag instructions
+		
+		case BRK:
+			// TODO: trigger an interrupt
+			cpu->running = false;
+			break;
+
+		case NOP:
+			break;
+
+		default:
+			die("Unsupported opcode: %x\n", op);
 	}
 	#undef REGS
 }
 
-uint16_t le_to_native(uint8_t a, uint8_t b)
-{
-#ifdef LITTLE_ENDIAN
-	return b << 8 | a;
-#else
-	return a << 8 | b;
-#endif
-}
-
 uint16_t fetch_le(cpu_t *cpu)
 {
 	uint8_t a = cpu->mem[cpu->pc++];
@@ -255,7 +509,7 @@
 	{
 #define INST(mn, am, op) \
 		case op: \
-			execute(cpu, #mn, mn, fetch_addr(cpu, am, 0)); \
+			execute(cpu, #mn, mn, fetch_addr(cpu, am, 0), am); \
 			break;
 
 		INSTRUCTIONS
@@ -328,6 +582,14 @@
 	}
 }
 
+void disas_num(cpu_t *cpu, uint16_t num)
+{
+	for (int i = 0; i < num; i++)
+	{
+		disas_step(cpu);
+	}
+}
+
 void disas(cpu_t *cpu)
 {
 	// Raw binary, no way to know what's code what isn't
@@ -336,3 +598,12 @@
 		disas_step(cpu);
 	}
 }
+void run(cpu_t *cpu)
+{
+	while (cpu->running)
+	{
+		step(cpu);
+	}
+
+	printf("CPU Halted\n");
+}