diff --git a/README.md b/README.md
index 11e1de0..7fc2fec 100644
--- a/README.md
+++ b/README.md
@@ -7,4 +7,23 @@
 
 The `instructions.h` header is generated from `6502.csv` and contains
 definitions of every 6502 opcode, its mnemonic and addressing mode.
-It is built automatically by cmake. 
\ No newline at end of file
+It is built automatically by cmake. 
+
+
+```
+ ____________________________________
+/ On the subject of C program        \
+| indentation: "In My Egotistical    |
+| Opinion, most people's C programs  |
+| should be indented                 |
+| six feet downward and covered with |
+| dirt."                             |
+|                                    |
+\ -- Blair P. Houghton               /
+ ------------------------------------
+        \   ^__^
+         \  (oo)\_______
+            (__)\       )\/\
+                ||----w |
+                ||     ||
+```
diff --git a/cpu.c b/cpu.c
index fe5cf6a..5a7c391 100644
--- a/cpu.c
+++ b/cpu.c
@@ -30,21 +30,69 @@
 	return cpu;
 }
 
+uint16_t le_to_native(uint8_t a, uint8_t b)
+{
+#ifdef LITTLE_ENDIAN
+	return b << 8 | a;
+#else
+	return a << 8 | b;
+#endif
+}
+
+void native_to_le(uint16_t n, uint8_t *a, uint8_t *b)
+{
+#ifdef LITTLE_ENDIAN
+	*a = n >> 8;
+	*b = n & 0xFF;
+#else
+	*a = n & 0xFF;
+	*b = n >> 8;
+#endif
+}
+
 void stack_push(cpu_t *cpu, uint8_t v)
 {
 	cpu->mem[cpu->regs[SP]-- + 0x100] = v;
 }
 
+void stack_pushle(cpu_t *cpu, uint16_t v)
+{
+	uint8_t a, b;
+	native_to_le(v, &a, &b);
+	// push in "reverse" order so that the address is stored as LE
+	stack_push(cpu, b);
+	stack_push(cpu, a);
+}
+
 uint8_t stack_pop(cpu_t *cpu)
 {
 	return cpu->mem[cpu->regs[SP]++ + 0x100];
 }
 
+uint16_t stack_pople(cpu_t *cpu)
+{
+	uint8_t a = stack_pop(cpu);
+	uint8_t b = stack_pop(cpu);
+	return le_to_native(a, b);
+}
+
 void free_cpu(cpu_t *cpu)
 {
 	free(cpu->mem);
 }
 
+// rotate right
+uint8_t ror(uint8_t a, uint8_t n)
+{
+	return (a >> n) | (a << (8 - n));
+}
+
+// rotate left
+uint8_t rol(uint8_t a, uint8_t n)
+{
+	return (a << n) | (a >> (8 - n));
+}
+
 void stat_nz(cpu_t *cpu, int8_t v)
 {
 	cpu->status.negative = v < 0;
@@ -63,7 +111,27 @@
 	cpu->status.carry = c < a || c < b;
 }
 
-void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t a)
+void cmp(cpu_t *cpu, uint8_t reg, uint8_t mem)
+{
+	cpu->status.negative = 0;
+	cpu->status.zero = 0;
+	cpu->status.carry = 0;
+	if (cpu->regs[reg] < mem)
+	{
+		cpu->status.negative = 1;
+	}
+	else if (cpu->regs[reg] == mem)
+	{
+		cpu->status.zero = 1;
+		cpu->status.carry = 1;
+	}
+	else
+	{
+		cpu->status.carry = 1;
+	}
+}
+
+void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t a, uint8_t am)
 {
 	// used to save space
 	#define REGS \
@@ -131,19 +199,205 @@
 			cpu->mem[a.ptr]--;
 			stat_nz(cpu, cpu->mem[a.ptr]);
 			break;
+
+		case DEX:
+			cpu->regs[X]--;
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case DEY:
+			cpu->regs[Y]--;
+			stat_nz(cpu, cpu->regs[Y]);
+			break;
+
+		case ASL:
+			// This check must be done here unfortunately, it would be nice
+			// to do this while decoding operands but it would require
+			// a substantial change to the architecture of the emulator
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] >> 7;
+				cpu->regs[A] <<= 1;
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] >> 7;
+				cpu->mem[a.ptr] <<= 1;
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case LSR:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] & 1;
+				cpu->regs[A] >>= 1;
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] & 7;
+				cpu->mem[a.ptr] >>= 1;
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case ROL:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] >> 7;
+				cpu->regs[A] = rol(cpu->regs[A], 1);
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] >> 7;
+				cpu->mem[a.ptr] = rol(a.val, 1);
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case ROR:
+			if (am == AM_ACC)
+			{
+				cpu->status.carry = cpu->regs[A] & 1;
+				cpu->regs[A] = ror(cpu->regs[A], 1);
+				stat_nz(cpu, cpu->regs[A]);
+			}
+			else
+			{
+				cpu->status.carry = cpu->mem[a.val] & 1;
+				cpu->mem[a.ptr] = ror(a.val, 1);
+				stat_nz(cpu, cpu->mem[a.ptr]);
+			}
+			break;
+
+		case AND:
+			cpu->regs[A] &= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case ORA:
+			cpu->regs[A] |= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case EOR:
+			cpu->regs[A] ^= a.val;
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case CMP:
+			cmp(cpu, A, a.val);
+			break;
+
+		case CPX:
+			cmp(cpu, X, a.val);
+			break;
+
+		case CPY:
+			cmp(cpu, Y, a.val);
+			break;
+
+		// TODO: implement BIT here
+
+		#define BRANCHES \
+			B(BCC, carry == 0) \
+			B(BCS, carry == 1) \
+			B(BNE, zero == 0) \
+			B(BEQ, zero == 1) \
+			B(BPL, negative == 0) \
+			B(BMI, negative == 1) \
+			B(BVC, overflow == 0) \
+			B(BVS, overflow == 1)
+
+		#define B(i, c) \
+			case i: \
+				if (cpu->status . c) \
+					cpu->pc = a.ptr;\
+				break;
+
+			BRANCHES
+
+		#undef B
+		#undef BRANCHES
+
+		#define TRANSFERS \
+			T(A, X) \
+			T(X, A) \
+			T(A, Y) \
+			T(Y, A)
+
+		#define T(a, b) \
+			case T ## a ## b: \
+				cpu->regs[b] = cpu->regs[a]; \
+				stat_nz(cpu, cpu->regs[b]); \
+				break;
+
+			TRANSFERS
+
+		#undef T
+		#undef TRANSFERS
+
+		case TSX:
+			cpu->regs[X] = cpu->regs[SP];
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case TXS:
+			cpu->regs[SP] = cpu->regs[X];
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case PHA:
+			stack_push(cpu, cpu->regs[A]);
+			break;
+
+		case PLA:
+			cpu->regs[A] = stack_pop(cpu);
+			stat_nz(cpu, cpu->regs[A]);
+			break;
+
+		case PHP:
+			stack_push(cpu, *(uint8_t *)(&cpu->status));
+			break;
+
+		case PLP:
+		{
+			uint8_t s = stack_pop(cpu);
+			*(uint8_t *)(&cpu->status) = s;
+		}
+
+		case JMP:
+			cpu->pc = a.ptr;
+			break;
+
+		case JSR:
+			stack_pushle(cpu, cpu->pc);
+			break;
+
+		case RTS:
+			cpu->pc = stack_pople(cpu);
+			break;
+
+		// TODO: implement RTI
+		// TODO: implement flag instructions
+		
+		case BRK:
+			// TODO: trigger an interrupt
+			cpu->running = false;
+			break;
+
+		case NOP:
+			break;
+
+		default:
+			die("Unsupported opcode: %x\n", op);
 	}
 	#undef REGS
 }
 
-uint16_t le_to_native(uint8_t a, uint8_t b)
-{
-#ifdef LITTLE_ENDIAN
-	return b << 8 | a;
-#else
-	return a << 8 | b;
-#endif
-}
-
 uint16_t fetch_le(cpu_t *cpu)
 {
 	uint8_t a = cpu->mem[cpu->pc++];
@@ -255,7 +509,7 @@
 	{
 #define INST(mn, am, op) \
 		case op: \
-			execute(cpu, #mn, mn, fetch_addr(cpu, am, 0)); \
+			execute(cpu, #mn, mn, fetch_addr(cpu, am, 0), am); \
 			break;
 
 		INSTRUCTIONS
@@ -328,6 +582,14 @@
 	}
 }
 
+void disas_num(cpu_t *cpu, uint16_t num)
+{
+	for (int i = 0; i < num; i++)
+	{
+		disas_step(cpu);
+	}
+}
+
 void disas(cpu_t *cpu)
 {
 	// Raw binary, no way to know what's code what isn't
@@ -336,3 +598,12 @@
 		disas_step(cpu);
 	}
 }
+void run(cpu_t *cpu)
+{
+	while (cpu->running)
+	{
+		step(cpu);
+	}
+
+	printf("CPU Halted\n");
+}
diff --git a/cpu.h b/cpu.h
index 651f97f..888972c 100644
--- a/cpu.h
+++ b/cpu.h
@@ -138,4 +138,8 @@
 void step(cpu_t *cpu);
 void free_cpu(cpu_t *cpu);
 void die(const char *message);
+// IMPORTANT: all disassembly functions mess with the PC
 void disas(cpu_t *cpu);
+void disas_num(cpu_t *cpu, uint16_t num);
+void disas_step(cpu_t *cpu);
+void run(cpu_t *cpu);
diff --git a/main.c b/main.c
index 5ab918d..888168c 100644
--- a/main.c
+++ b/main.c
@@ -9,17 +9,19 @@
 
 int main(int argc, char **argv)
 {
-	bool disflag = 0,
-		runflag = 0,
-		helpflag = 0,
-		debugflag = 0,
-		should_read = 0;
+	bool disflag = false,
+		runflag = false,
+		helpflag = false,
+		debugflag = false,
+		should_read = false;
+
+	int disasm_len = 0;
 
 	FILE *input = stdin;
 
 	char c;
 
-	while ((c = getopt(argc, argv, "Ddrhi:")) != -1)
+	while ((c = getopt(argc, argv, "Ddrhi:n:")) != -1)
 	{
 		switch (c)
 		{
@@ -38,6 +40,9 @@
 		case 'i':
 			input = fopen(optarg, "r");
 			break;
+		case 'n':
+			disasm_len = atoi(optarg);
+			break;
 		case 'h':
 		case '?':
 			helpflag = 1;
@@ -53,6 +58,7 @@
 			"	-r run input\n"
 			"	-D debug input (open debug prompt)\n"
 			"	-i <input> set input file, defaults to standard input\n"
+			"	-n <number> number of instructions to disassemble, 0 for all\n"
 			"	-h, -? show this help page\n");
 		return 0;
 	}
@@ -64,11 +70,20 @@
 		cpu = new_cpu();
 		fread(cpu.mem, 0xFFFF, 1, input);
 	}
+	else
+	{
+		puts("6502 toolchain by swissChili <swisschili.sh>");
+		printf("%s -h  for help\n", argv[0]);
+	}
 
 	if (disflag)
 	{
 		disas(&cpu);
 	}
+	else if (runflag)
+	{
+		run(&cpu);
+	}
 	else if (debugflag)
 	{
 		debug(&cpu);
diff --git a/test.dat b/test.dat
index d5569e0..310bd9e 100644
--- a/test.dat
+++ b/test.dat
Binary files differ
