diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c4167f..a913e68 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,4 +12,4 @@
 		WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
 endif()
 
-add_executable(emu-6502 main.c cpu.c cpu.h instructions.h)
+add_executable(emu-6502 main.c cpu.c cpu.h dbg.c dbg.h instructions.h)
diff --git a/cpu.c b/cpu.c
index af9ad27..fe5cf6a 100644
--- a/cpu.c
+++ b/cpu.c
@@ -7,15 +7,18 @@
 #include <string.h>
 
 #define die(m, ...) \
-	printf("\033[31mError: " m "\033[0m\n", ##__VA_ARGS__); \
+	printf("\033[31m" m "\033[0m\n", ##__VA_ARGS__); \
 	exit(1);
 
+#define warn(m, ...) \
+	printf("\033[33m" m "\033[0m\n", ##__VA_ARGS__);
+
 cpu_t new_cpu()
 {
 	cpu_t cpu = { 0 };
-	cpu.regs[SR] = UNUSED; // unused flag always set
 	cpu.regs[SP] = 0xFD; // stack at is 0x100 + SP
 	cpu.pc = 0; // arbitrary program counter start
+	cpu.running = true;
 	cpu.mem = malloc(0xFFFF);
 	memset(cpu.mem, 0, 0xFFFF);
 
@@ -27,24 +30,117 @@
 	return cpu;
 }
 
+void stack_push(cpu_t *cpu, uint8_t v)
+{
+	cpu->mem[cpu->regs[SP]-- + 0x100] = v;
+}
+
+uint8_t stack_pop(cpu_t *cpu)
+{
+	return cpu->mem[cpu->regs[SP]++ + 0x100];
+}
+
 void free_cpu(cpu_t *cpu)
 {
 	free(cpu->mem);
 }
 
-void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t addr)
+void stat_nz(cpu_t *cpu, int8_t v)
 {
+	cpu->status.negative = v < 0;
+	cpu->status.zero = v == 0;
+}
+
+// Used to check for overflow, is c unique?
+bool last_unique(bool a, bool b, bool c)
+{
+	return a == b && a != c;
+}
+
+void stat_cv(cpu_t *cpu, uint8_t a, uint8_t b, uint8_t c)
+{
+	cpu->status.overflow = last_unique(a >> 7, b >> 7, c >> 7);
+	cpu->status.carry = c < a || c < b;
+}
+
+void execute(cpu_t *cpu, const char *mnemonic, uint8_t op, arg_t a)
+{
+	// used to save space
+	#define REGS \
+		R(X) R(A) R(Y)
+
 	switch (op) {
-		
+		// Load and store instructions:
+		#define R(reg) \
+			case LD##reg: \
+				cpu->regs[reg] = a.val; \
+				stat_nz(cpu, a.val); \
+				break;
+
+			REGS
+
+		#undef R
+
+		#define R(reg) \
+			case ST##reg: \
+				cpu->mem[a.ptr] = cpu->regs[reg]; \
+				break; \
+
+			REGS
+
+		#undef R
+
+		// Arithmetic instructions:
+		// NOTE: binary coded decimals are NOT SUPPORTED because I don't want
+		// to implement them.
+		case ADC:
+		{
+			uint8_t sum = cpu->regs[A] + a.val + cpu->status.carry;
+			// signed overflow
+			stat_cv(cpu, cpu->regs[A], a.val + cpu->status.carry, sum);
+			stat_nz(cpu, sum);
+			cpu->regs[A] = sum;
+			break;
+		}
+
+		case SBC:
+		{
+			uint8_t diff = cpu->regs[A] - a.val - !cpu->status.carry;
+			stat_cv(cpu, cpu->regs[A], a.val - !cpu->status.carry, diff);
+			stat_nz(cpu, diff);
+			cpu->regs[A] = diff;
+			break;
+		}
+
+		case INC:
+			cpu->mem[a.ptr]++;
+			stat_nz(cpu, cpu->mem[a.ptr]);
+			break;
+
+		case INX:
+			cpu->regs[X]++;
+			stat_nz(cpu, cpu->regs[X]);
+			break;
+
+		case INY:
+			cpu->regs[Y]++;
+			stat_nz(cpu, cpu->regs[Y]);
+			break;
+
+		case DEC:
+			cpu->mem[a.ptr]--;
+			stat_nz(cpu, cpu->mem[a.ptr]);
+			break;
 	}
+	#undef REGS
 }
 
 uint16_t le_to_native(uint8_t a, uint8_t b)
 {
 #ifdef LITTLE_ENDIAN
-	return a << 8 | b;
-#else
 	return b << 8 | a;
+#else
+	return a << 8 | b;
 #endif
 }
 
@@ -91,15 +187,21 @@
 
 		case AM_REL:
 		{
-			// PC should point to the opcode
-			// braces needed to avoid c stupidity
-			uint16_t pc = cpu->pc - 1;
-			return arg_ptr(cpu, f, cpu->mem[cpu->pc++] + pc);
+			// Aparently, PC should will point to the NEXT opcode
+			// I can't find any documentation on this unfortunately, but
+			// I have discovered this through testing the output of other
+			// assemblers.
+			uint16_t pc = cpu->pc + 1;
+			return arg_ptr(cpu, f, (int8_t)cpu->mem[cpu->pc++] + pc);
 		}
 
 		case AM_IND:
 		{
 			uint16_t addr = fetch_le(cpu);
+
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_imm(addr);
+
 			uint8_t low = cpu->mem[addr],
 				high = cpu->mem[addr + 1];
 
@@ -121,6 +223,10 @@
 		case AM_ZIX:
 		{
 			uint8_t zp = cpu->mem[cpu->pc++];
+
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_imm(zp);
+
 			uint16_t addr = zp + cpu->regs[X];
 			uint16_t indirect = le_to_native(cpu->mem[addr], cpu->mem[addr + 1]);
 			return arg_ptr(cpu, f, indirect);
@@ -129,6 +235,10 @@
 		case AM_ZIY:
 		{
 			uint8_t zp = cpu->mem[cpu->pc++];
+
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_imm(zp);
+
 			uint16_t base = le_to_native(cpu->mem[zp], cpu->mem[zp + 1]);
 			return arg_ptr(cpu, f, base + cpu->regs[Y]);
 		}
@@ -199,7 +309,7 @@
 
 void disas_step(cpu_t *cpu)
 {
-	printf("%x", cpu->pc);
+	printf("$%x", cpu->pc);
 	uint8_t op = cpu->mem[cpu->pc++];
 	switch (op)
 	{
@@ -214,12 +324,13 @@
 #undef INST
 
 		default:
-			die("Undefined opcode %x", op);
+			warn("\tUndefined opcode %x", op);
 	}
 }
 
 void disas(cpu_t *cpu)
 {
+	// Raw binary, no way to know what's code what isn't
 	while (cpu->pc < 0xFFFF)
 	{
 		disas_step(cpu);
diff --git a/cpu.h b/cpu.h
index 20551ec..651f97f 100644
--- a/cpu.h
+++ b/cpu.h
@@ -1,10 +1,13 @@
 #pragma once
 
 #include <stdint.h>
+#include <stdbool.h>
+
+#define REGISTERS R(A) R(X) R(Y) R(SP)
 
 enum // Registers
 {
-	A, X, Y, SR, SP
+	A, X, Y, SP
 };
 
 enum // Flags
@@ -101,12 +104,27 @@
 	FETCH_NO_INDIRECTION = 1, // Do not follow indirection (used for disassembly)
 };
 
+// Status register
+typedef struct __attribute__((packed))
+{
+	bool negative	: 1;
+	bool overflow	: 1;
+	bool unused		: 1;
+	bool break_		: 1;
+	bool decimal	: 1;
+	bool no_int		: 1;
+	bool zero		: 1;
+	bool carry		: 1;
+} status_t;
+
 // Emulator instance, create with new_cpu()
 typedef struct
 {
-	uint8_t regs[5]; // A, X, Y, SP and SR registers
+	uint8_t regs[4]; // A, X, Y, SP registers
 	uint16_t pc;
+	status_t status;
 	uint8_t *mem;
+	bool running;
 } cpu_t;
 
 // Argument type, includes both pointer and its value
@@ -117,6 +135,7 @@
 } arg_t;
 
 cpu_t new_cpu();
+void step(cpu_t *cpu);
 void free_cpu(cpu_t *cpu);
 void die(const char *message);
 void disas(cpu_t *cpu);
diff --git a/dbg.c b/dbg.c
new file mode 100644
index 0000000..bdc2b31
--- /dev/null
+++ b/dbg.c
@@ -0,0 +1,52 @@
+#include "dbg.h"
+#include "cpu.h"
+
+#include <readline/readline.h>
+#include <readline/history.h>
+#include <string.h>
+#include <stdlib.h>
+
+void debug(cpu_t *cpu)
+{
+	while (true)
+	{
+		char *input = readline("\033[33m> \033[0m");
+		if (!input)
+			continue;
+
+		char *tok = strtok(input, " \t\r\n\v");
+		
+		if (!strcmp(tok, "step") || !strcmp(tok, "s"))
+		{
+			step(cpu);
+		}
+		else if (!strcmp(tok, "show") || !strcmp(tok, "print"))
+		{
+			if ((tok = strtok(NULL, " ")))
+			{
+				char *ok = 0;
+				if (tok[0] == '$')
+				{
+					uint16_t addr = strtol(tok + 1, &ok, 16);
+
+					if (ok == 0)
+					{
+						printf("Memory:\n");
+						printf("\t$%x	%x\n", addr, cpu->mem[addr]);
+						printf("\t$%x	%x\n", addr + 1, cpu->mem[addr + 1]);
+					}
+				}
+				else
+				{
+					printf("Registers:\n");
+
+					#define R(r) printf("\t" #r ":	%x\n", cpu->regs[r]);
+						REGISTERS
+					#undef R
+				}
+			}
+		}
+
+		add_history(input);
+	}
+}
diff --git a/dbg.h b/dbg.h
new file mode 100644
index 0000000..c31c1c3
--- /dev/null
+++ b/dbg.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "cpu.h"
+
+void debug(cpu_t *cpu);
diff --git a/test.dat b/test.dat
index 0d6ba15..d5569e0 100644
--- a/test.dat
+++ b/test.dat
Binary files differ
