diff --git a/README.md b/README.md
index 2d235ba..eb391f0 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,10 @@
 # 6502 Toolchain
 
-![Screenshot](screenshot.png)
+<center>
+	**Click for demonstration:**
+
+	[![Screenshot](screenshot.png)](colors.webm)
+</center>
 
 This project aims to create a portable toolchain for developing,
 testing and debugging programs for the 6502 processor. An assembler
diff --git a/colors.dat b/colors.dat
new file mode 100644
index 0000000..fc0ee0a
--- /dev/null
+++ b/colors.dat
Binary files differ
diff --git a/colors.webm b/colors.webm
new file mode 100644
index 0000000..e32b42f
--- /dev/null
+++ b/colors.webm
Binary files differ
diff --git a/cpu.c b/cpu.c
index fbe72b2..cfcd89d 100644
--- a/cpu.c
+++ b/cpu.c
@@ -1,5 +1,7 @@
 #include "cpu.h"
 #include "instructions.h"
+#define SCREEN_ONLY_SDL
+#include "screen.h"
 
 #include <endian.h>
 #include <stdio.h>
@@ -13,6 +15,10 @@
 #define warn(m, ...) \
 	printf("\033[33m" m "\033[0m\n", ##__VA_ARGS__);
 
+
+sdl_screen_t *g_scr = NULL;
+
+
 void reset(cpu_t *cpu)
 {
 	cpu->regs[SP] = 0xFD; // stack at is 0x100 + SP
@@ -89,36 +95,36 @@
 }
 
 // rotate right
-uint8_t ror(uint8_t a, uint8_t n)
+inline uint8_t ror(uint8_t a, uint8_t n)
 {
 	return (a >> n) | (a << (8 - n));
 }
 
 // rotate left
-uint8_t rol(uint8_t a, uint8_t n)
+inline uint8_t rol(uint8_t a, uint8_t n)
 {
 	return (a << n) | (a >> (8 - n));
 }
 
-void stat_nz(cpu_t *cpu, int8_t v)
+inline void stat_nz(cpu_t *cpu, int8_t v)
 {
 	cpu->status.negative = v < 0;
 	cpu->status.zero = v == 0;
 }
 
 // Used to check for overflow, is c unique?
-bool last_unique(bool a, bool b, bool c)
+inline bool last_unique(bool a, bool b, bool c)
 {
 	return a == b && a != c;
 }
 
-void stat_cv(cpu_t *cpu, uint8_t a, uint8_t b, uint8_t c)
+inline void stat_cv(cpu_t *cpu, uint8_t a, uint8_t b, uint8_t c)
 {
 	cpu->status.overflow = last_unique(a >> 7, b >> 7, c >> 7);
 	cpu->status.carry = c < a || c < b;
 }
 
-void cmp(cpu_t *cpu, uint8_t reg, uint8_t mem)
+inline void cmp(cpu_t *cpu, uint8_t reg, uint8_t mem)
 {
 	cpu->status.negative = 0;
 	cpu->status.zero = 0;
@@ -405,19 +411,19 @@
 	#undef REGS
 }
 
-uint16_t fetch_le(cpu_t *cpu)
+inline uint16_t fetch_le(cpu_t *cpu)
 {
 	uint8_t a = cpu->mem[cpu->pc++];
 	uint8_t b = cpu->mem[cpu->pc++];
 	return le_to_native(a, b);
 }
 
-arg_t arg_imm(uint16_t a)
+inline arg_t arg_imm(uint16_t a)
 {
 	return (arg_t){ a, a };
 }
 
-arg_t arg_ptr(cpu_t *c, uint flags, uint16_t p)
+inline arg_t arg_ptr(cpu_t *c, uint flags, uint16_t p)
 {
 	if (flags & FETCH_NO_INDIRECTION)
 		return arg_imm(p);
@@ -425,7 +431,7 @@
 	return (arg_t){ c->mem[p], p };
 }
 
-arg_t arg(uint16_t v, uint16_t a)
+inline arg_t arg(uint16_t v, uint16_t a)
 {
 	return (arg_t){ v, a };
 }
@@ -470,15 +476,25 @@
 		}
 
 		case AM_AX:
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_ptr(cpu, f, fetch_le(cpu));
+
 			return arg_ptr(cpu, f, fetch_le(cpu) + cpu->regs[X]);
 
 		case AM_AY:
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_ptr(cpu, f, fetch_le(cpu));
+		
 			return arg_ptr(cpu, f, fetch_le(cpu) + cpu->regs[Y]);
 
 		case AM_ZPX:
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_ptr(cpu, f, cpu->mem[cpu->pc++]);
 			return arg_ptr(cpu, f, cpu->mem[cpu->pc++] + cpu->regs[X]);
 
 		case AM_ZPY:
+			if (f & FETCH_NO_INDIRECTION)
+				return arg_ptr(cpu, f, cpu->mem[cpu->pc++]);
 			return arg_ptr(cpu, f, cpu->mem[cpu->pc++] + cpu->regs[Y]);
 
 		case AM_ZIX:
@@ -510,8 +526,10 @@
 	}
 }
 
-void step(cpu_t *cpu)
+inline void step(cpu_t *cpu)
 {
+	static int steps;
+	steps++;
 	switch (cpu->mem[cpu->pc++])
 	{
 #define INST(mn, am, op) \
@@ -526,6 +544,14 @@
 		default:
 			die("Undefined opcode");
 	}
+
+	if (steps % 100 == 0)
+		printf("%d\n", steps);
+
+	if (g_scr)
+	{
+		sdl_screen(g_scr, cpu->mem + CPU_FB_ADDR);
+	}
 }
 
 int dump_inst(cpu_t *cpu, char *buf, const char *mn, uint16_t addr, uint8_t am)
diff --git a/gui.c b/gui.c
index cfaa097..201fdae 100644
--- a/gui.c
+++ b/gui.c
@@ -15,6 +15,7 @@
 #define NK_SDL_GL3_IMPLEMENTATION
 #include "nuklear/nuklear.h"
 #include "nuklear/demo/sdl_opengl3/nuklear_sdl_gl3.h"
+#undef SCREEN_ONLY_SDL
 #include "screen.h"
 
 #define WINDOW_WIDTH 720
@@ -116,7 +117,7 @@
 		{
 			nk_layout_row_dynamic(ctx, 24, 1);
 			screen_scale = nk_propertyi(ctx, "Scale", 1, screen_scale, 8, 1, 1);
-			
+
 			nk_layout_row_static(ctx, screen_scale * 32, screen_scale * 32, 1);
 			screen(ctx, cpu->mem + CPU_FB_ADDR, screen_scale);
 		}
diff --git a/main.c b/main.c
index 07af040..84e9779 100644
--- a/main.c
+++ b/main.c
@@ -1,6 +1,7 @@
 #include "cpu.h"
 #include "dbg.h"
 #include "gui.h"
+#include "screen.h"
 
 #include <bits/getopt_core.h>
 #include <ctype.h>
@@ -8,6 +9,8 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+extern sdl_screen_t *g_scr;
+
 int main(int argc, char **argv)
 {
 	bool disflag = false,
@@ -15,7 +18,8 @@
 		helpflag = false,
 		debugflag = false,
 		should_read = false,
-		guiflag = false;
+		guiflag = false,
+		scrflag = false;
 
 	int disasm_len = 0;
 
@@ -23,7 +27,7 @@
 
 	char c;
 
-	while ((c = getopt(argc, argv, "Ddrhgi:n:")) != -1)
+	while ((c = getopt(argc, argv, "Dsdrhgi:n:")) != -1)
 	{
 		switch (c)
 		{
@@ -49,6 +53,9 @@
 		case 'n':
 			disasm_len = atoi(optarg);
 			break;
+		case 's':
+			scrflag = true;
+			break;
 		case 'h':
 		case '?':
 			helpflag = 1;
@@ -83,6 +90,12 @@
 		printf("%s -h  for help\n", argv[0]);
 	}
 
+	if (scrflag)
+	{
+		sdl_screen_t scr = new_sdl_screen(8);
+		g_scr = &scr;
+	}
+
 	if (guiflag)
 	{
 		gui(&cpu);
diff --git a/screen.c b/screen.c
index 2eb17a2..fedc151 100644
--- a/screen.c
+++ b/screen.c
@@ -1,10 +1,12 @@
 #include "screen.h"
 #include "cpu.h"
 
+#include <SDL2/SDL.h>
+
 struct nk_color byte_to_color(uint8_t b)
 {
 	struct nk_color c;
-	c.r = (b >> 6) * (255 / 0b11);
+	c.r = (b >> 5) * (255 / 0b111);
 	c.g = ((b >> 2) & 0b111) * (255 / 0b111);
 	c.b = (b & 0b11) * (255 / 0b11);
 	c.a = 255;
@@ -32,7 +34,59 @@
 			nk_fill_rect(out,
 				nk_rect(bounds.x + i * size, bounds.y + j * size,
 					size, size), 0.0f,
-				byte_to_color(mem[i * CPU_FB_H + j]));
+				byte_to_color(mem[i + CPU_FB_H * j]));
 		}
 	}
 }
+
+sdl_screen_t new_sdl_screen(uint8_t size)
+{
+	sdl_screen_t scr;
+	scr.win = SDL_CreateWindow("6502",
+		SDL_WINDOWPOS_CENTERED,
+		SDL_WINDOWPOS_CENTERED,
+		size * 32,
+		size * 32,
+		0);
+	scr.size = size;
+	scr.r = SDL_CreateRenderer(scr.win, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
+
+	return scr;
+}
+
+void sdl_screen(sdl_screen_t *scr, uint8_t *mem)
+{
+	SDL_RenderClear(scr->r);
+
+	SDL_Event e;
+
+	while (SDL_PollEvent(&e))
+	{
+		switch (e.type)
+		{
+			case SDL_QUIT:
+				exit(0);
+		}
+	}
+
+	for (int i = 0; i < CPU_FB_H; i++)
+	{
+		for (int j = 0; j < CPU_FB_W; j++)
+		{
+			SDL_Rect r =
+			{
+				i * scr->size,
+				j * scr->size,
+				scr->size,
+				scr->size,
+			};
+
+			struct nk_color c = byte_to_color(mem[i + CPU_FB_H * j]);
+
+			SDL_SetRenderDrawColor(scr->r, c.r, c.g, c.b, c.a);
+			SDL_RenderFillRect(scr->r, &r);
+		}
+	}
+
+	SDL_RenderPresent(scr->r);	
+}
diff --git a/screen.h b/screen.h
index e504617..4128005 100644
--- a/screen.h
+++ b/screen.h
@@ -2,6 +2,8 @@
 
 #include <stdint.h>
 
+#ifndef SCREEN_ONLY_SDL
+
 #undef NK_IMPLEMENTATION
 #define NK_INCLUDE_FIXED_TYPES
 #define NK_INCLUDE_STANDARD_IO
@@ -12,5 +14,20 @@
 #define NK_INCLUDE_DEFAULT_FONT
 #include "nuklear/nuklear.h"
 
-// draw the CPU screen
 void screen(struct nk_context *ctx, uint8_t *mem, uint8_t size);
+
+#endif
+
+#include <SDL2/SDL.h>
+
+
+typedef struct
+{
+	SDL_Window *win;
+	SDL_Renderer *r;
+	uint8_t size;
+} sdl_screen_t;
+
+// draw the CPU screen
+sdl_screen_t new_sdl_screen(uint8_t size);
+void sdl_screen(sdl_screen_t *scr, uint8_t *mem);
diff --git a/screenshot.png b/screenshot.png
index 026a05f..be57434 100644
--- a/screenshot.png
+++ b/screenshot.png
Binary files differ
diff --git a/test.dat b/test.dat
index a1ba7d1..b004f7a 100644
--- a/test.dat
+++ b/test.dat
Binary files differ
