Add fast bitset, inode search to EXT2
diff --git a/README.md b/README.md
index 0db66bc..6fa78a7 100644
--- a/README.md
+++ b/README.md
@@ -24,14 +24,17 @@
   - [x] Efficient kernel virtual allocator
 - [ ] Preemptive multitasking
   - [x] Multi-threading
-  - [ ] Multi-process support (waiting on FS)
+  - [ ] Multi-process support (in progress)
 - [ ] Device drivers
   - [x] PCI
   - [ ] USB
     - [ ] Mouse + keyboard drivers
   - [ ] Storage device drivers
-    - [x] ATA PIO (broken)
+    - [x] ATA PIO
     - [ ] SATA
+- [x] Synchronization primitives
+  - [x] Process-local spin lock
+  - [x] Kernel-level semaphore
 - [ ] Filesystem
   - [x] Virtual file system
   - [x] Initial ramdisk
@@ -52,12 +55,11 @@
     - [x] Lexical closures
   - [ ] Standard library (in progress)
     - [ ] CLOS-style OO library
-- [ ] Lisp integrated into kernel
-- [ ] User-space driver API
+- [ ] Kernel module API
   - [ ] Lisp API
 - [ ] Graphical subsystem
-  - [ ] Graphical environment in Lisp
-- [ ] Network stack in Lisp
+  - [ ] Graphical environment
+- [ ] Network stack
   - [ ] Ethernet driver
   - [ ] IP
   - [ ] TCP
@@ -66,5 +68,6 @@
 
 ## Documentation
 
-The [Bluejay manual](https://bluejay.readthedocs.io) contains up to date
-documentation.
+The [Bluejay manual](https://bluejay.readthedocs.io) contains out dated
+documentation. A complete manual will be available before the 1.0 release. For
+now there isn't a ton to document as the user-space API is non-existent.
diff --git a/include/kernel/dri/fs/ext2/ext2.h b/include/kernel/dri/fs/ext2/ext2.h
index 2c04c8a..87191b1 100644
--- a/include/kernel/dri/fs/ext2/ext2.h
+++ b/include/kernel/dri/fs/ext2/ext2.h
@@ -107,33 +107,33 @@
 struct ext2_block_group_descriptor
 {
 	/// Address of block usage bitmap
-	uint block_bitmap;                           // 4
+	uint block_bitmap; // 4
 
 	/// Address of inode usage bitmap
-	uint inode_bitmap;                           // 8
+	uint inode_bitmap; // 8
 
 	/// Starting block address of inode table
-	uint inode_table_start_block;                // 12
+	uint inode_table_start_block; // 12
 
 	/// Number of unallocated blocks in this group
-	ushort unallocated_blocks;                   // 14
+	ushort unallocated_blocks; // 14
 
 	/// Number of unallocated inodes in this group
-	ushort unallocated_inodes;                   // 16
+	ushort unallocated_inodes; // 16
 
 	/// Number of directories in this group
-	ushort num_dirs;                             // 18
+	ushort num_dirs; // 18
 
-	ushort padding;
+	ushort padding; // 20
 
-	uchar reserved[12];                          // 32
+	uchar reserved[12]; // 32
 };
 
 struct ext2_inode
 {
 	/// The format and permissions of the file, see EXT2_S_*
 	ushort mode;
-	
+
 	/// The user id of the owner
 	ushort uid;
 
@@ -249,6 +249,7 @@
 /// Read a file system block (0-indexed), if necessary multiple disk blocks
 /// will be read automatically
 void ext2_read_block(struct ext2_superblock *sb, void *buffer, uint block);
+void ext2_write_block(struct ext2_superblock *sb, void *buffer, uint block);
 
 struct ext2_superblock ext2_read_superblock();
 
@@ -282,4 +283,38 @@
 						   void *buffer,
 						   uint block);
 
-ssize_t ext2_read_inode(struct ext2_superblock *sb, struct ext2_inode *inode, void *buffer, ssize_t size);
\ No newline at end of file
+ssize_t ext2_read_inode(struct ext2_superblock *sb, struct ext2_inode *inode, void *buffer, ssize_t size);
+
+/**
+ * @brief Set a block in a bitmap
+ * @param bitmap_block The first block of the bitmap, not necessarily the block
+ * that this specific bit is in.
+ * @param index The bit to check, starting at 0
+ * @returns The value of the bit
+ */
+bool ext2_check_in_bitmap(struct ext2_superblock *sb, uint bitmap_block,
+						  uint index);
+
+/**
+ * @brief Set a block in a bitmap
+ * @param bitmap_block The first block of the bitmap, not necessarily the block
+ * that this specific bit is in.
+ * @param index The bit to set, starting at 0
+ * @param value The value of the bit, 0 for 0, non-0 for 1
+ */
+void ext2_set_in_bitmap(struct ext2_superblock *sb, uint bitmap_block,
+						uint index, bool value);
+
+/**
+ * @brief Find the first zero bit in a bitset
+ * @param bitmap_block The first block of the bitmap
+ * @param num_blocks The number of blocks in the bitmap, or ~0 for no limit
+ * (UNSAFE).
+ * @param start_at The first bit to search from, useful if you want to ignore
+ * the first reserved inodes.
+ * @returns The index of the first zero bit, or -1 if there is none
+ */
+uint ext2_first_zero_bit(struct ext2_superblock *sb, uint bitmap_block,
+						 uint num_blocks, uint start_at);
+
+uint ext2_first_free_inode(struct ext2_superblock *sb);
diff --git a/src/kernel/dri/ata_pio/ata_pio.c b/src/kernel/dri/ata_pio/ata_pio.c
index 1554702..ab6981e 100644
--- a/src/kernel/dri/ata_pio/ata_pio.c
+++ b/src/kernel/dri/ata_pio/ata_pio.c
@@ -3,12 +3,15 @@
 #include <io.h>
 #include <log.h>
 #include <pic.h>
+#include <sync.h>
 
 /* TODO: Rewrite all of this to work with dri_ide in the case of multiple
  * devices */
 
 static ushort test_buffer[256];
 
+static semaphore_t lock;
+
 void ata_pio_wait_bsy()
 {
 	while (inb(ATA_PORT_CMD) & ATA_BSY)
@@ -40,6 +43,8 @@
 
 void ata_pio_read_sectors(void *buffer, uint lba, uchar num_sectors)
 {
+	sm_wait(lock);
+
 	ushort *word_buffer = buffer;
 
 	for (int sector = 0; sector < num_sectors; sector++)
@@ -64,10 +69,14 @@
 		ata_pio_wait_bsy();
 		outw(ATA_PORT_CMD, ATA_CMD_FLUSH_CACHE);
 	}
+
+	sm_signal(lock);
 }
 
 void ata_pio_write_sectors(uint lba, uchar num_sectors, ushort *buffer)
 {
+	sm_wait(lock);
+
 	for (int sector = 0; sector < num_sectors; sector++)
 	{
 		ata_pio_wait_bsy();
@@ -90,6 +99,8 @@
 		ata_pio_wait_bsy();
 		outw(ATA_PORT_CMD, ATA_CMD_FLUSH_CACHE);
 	}
+
+	sm_signal(lock);
 }
 
 static void print_buffer()
@@ -104,33 +115,9 @@
 
 void test_ata_pio()
 {
-//	for (int i = 0; i < 2; i++)
-	{
-		kprintf(DEBUG "Writing 0s and reading back\n");
-
-		// for (int i = 0; i < 256; i++)
-		// 	test_buffer[i] = 0;
-		// ata_pio_write_sectors(0, 1, test_buffer);
-
-		ata_pio_read_sectors(test_buffer, 2, 1);
-		print_buffer();
-		kprintf(DEBUG "again...\n");
-		ata_pio_read_sectors(test_buffer, 2, 1);
-		print_buffer();
-
-		for (int i = 0; i < 256; i++)
-			test_buffer[i] = i;
-
-		kprintf(DEBUG "Writing 0..256 and reading back\n");
-
-		ata_pio_write_sectors(0, 1, test_buffer);
-
-		ata_pio_read_sectors(test_buffer, 2, 1);
-		print_buffer();
-		kprintf(DEBUG "again...\n");
-		ata_pio_read_sectors(test_buffer, 2, 1);
-		print_buffer();
-	}
+	kprintf(DEBUG "Testing ATA PIO\n");
+	ata_pio_read_sectors(test_buffer, 2, 1);
+	print_buffer();
 }
 
 void ata_pio_handle_irq(struct registers *regs)
@@ -149,4 +136,5 @@
 void init_ata_pio()
 {
 	add_interrupt_handler(46, ata_pio_handle_irq);
+	lock = sm_new();
 }
diff --git a/src/kernel/dri/fs/ext2/ext2.c b/src/kernel/dri/fs/ext2/ext2.c
index 2cf15f5..ad416be 100644
--- a/src/kernel/dri/fs/ext2/ext2.c
+++ b/src/kernel/dri/fs/ext2/ext2.c
@@ -1,9 +1,9 @@
+#include <alloc.h>
 #include <dri/ata_pio/ata_pio.h>
 #include <dri/fs/ext2/ext2.h>
+#include <io.h>
 #include <kint.h>
 #include <log.h>
-#include <io.h>
-#include <alloc.h>
 
 inline uint ext2_block_size(struct ext2_superblock *sb)
 {
@@ -18,6 +18,14 @@
 	ata_pio_read_sectors(buffer, block_start, block_size);
 }
 
+void ext2_write_block(struct ext2_superblock *sb, void *buffer, uint block)
+{
+	uint block_size = ext2_block_size(sb) / 512;
+	uint block_start = block_size * block;
+
+	ata_pio_write_sectors(block_start, block_size, buffer);
+}
+
 struct ext2_superblock ext2_read_superblock()
 {
 	uchar buffer[512 * 2];
@@ -104,15 +112,17 @@
 	struct ext2_superblock sb = ext2_read_superblock();
 
 	kprintf(DEBUG "EXT2 magic = 0x%x\n", sb.signature);
-	
+
 	// Read the root inode 2
 	struct ext2_inode root;
 
 	if (ext2_find_inode(&sb, 2, &root))
 	{
-		kprintf(OKAY "Found root inode 2 (size=0x%x, num_blocks=0x%x)\n", root.size, root.num_blocks);
+		kprintf(OKAY "Found root inode 2 (size=0x%x, num_blocks=0x%x)\n",
+				root.size, root.num_blocks);
 		// kprintf(DEBUG "Root.mode = 0x%x\n", root.mode & 0xf000);
-		kassert((root.mode & 0xf000) == EXT2_S_IFDIR, "Root (inode 2) is not a directory.");
+		kassert((root.mode & 0xf000) == EXT2_S_IFDIR,
+				"Root (inode 2) is not a directory.");
 
 		kprintf("ls /\n");
 		kprintf("inode\t name\n");
@@ -123,13 +133,16 @@
 	{
 		kprintf(WARN "Failed to find root inode 2\n");
 	}
+
+	kprintf(INFO "First free inode is %d\n", ext2_first_free_inode(&sb));
 }
 
 bool ext2_valid_filesystem()
 {
 	struct ext2_superblock sb = ext2_read_superblock();
 
-	kprintf(DEBUG "superblock signature is %d (0x%x)\n", sb.signature, sb.signature);
+	kprintf(DEBUG "superblock signature is %d (0x%x)\n", sb.signature,
+			sb.signature);
 
 	return sb.signature == EXT2_SIGNATURE;
 }
@@ -148,7 +161,8 @@
 	ext2_write_superblock(&sb);
 }
 
-bool ext2_find_inode(struct ext2_superblock *sb, uint number, struct ext2_inode *inode)
+bool ext2_find_inode(struct ext2_superblock *sb, uint number,
+					 struct ext2_inode *inode)
 {
 	if (number == 0)
 		return false;
@@ -160,7 +174,8 @@
 	struct ext2_block_group_descriptor descriptor =
 		ext2_load_block_group_descriptor(sb, block_group);
 
-	// kprintf(DEBUG "Descriptor inode_table = 0x%x\n", descriptor.inode_table_start_block);
+	// kprintf(DEBUG "Descriptor inode_table = 0x%x\n",
+	// descriptor.inode_table_start_block);
 
 	// We need to figure out what FS block the inode is on, we know how many
 	// inodes there are total in this BGD and the number per page, so this is
@@ -175,19 +190,16 @@
 
 	struct ext2_inode inodes[block_size / sizeof(struct ext2_inode)];
 
-	ext2_read_block(sb, inodes, descriptor.inode_table_start_block +
-								inode_block);
+	ext2_read_block(sb, inodes,
+					descriptor.inode_table_start_block + inode_block);
 
 	*inode = inodes[inode_index];
 
 	return true;
 }
 
-bool ext2_dir_ls(struct ext2_superblock *sb,
-				 struct ext2_inode *dir,
-				 void (*cb)(uint inode,
-							const char *name,
-							void *data),
+bool ext2_dir_ls(struct ext2_superblock *sb, struct ext2_inode *dir,
+				 void (*cb)(uint inode, const char *name, void *data),
 				 void *data)
 {
 	if ((dir->mode & 0xf000) != EXT2_S_IFDIR)
@@ -225,7 +237,8 @@
 	return true;
 }
 
-ssize_t ext2_read_inode(struct ext2_superblock *sb, struct ext2_inode *inode, void *buffer, ssize_t size)
+ssize_t ext2_read_inode(struct ext2_superblock *sb, struct ext2_inode *inode,
+						void *buffer, ssize_t size)
 {
 	const uint block_size = ext2_block_size(sb);
 	char transfer[block_size];
@@ -239,7 +252,8 @@
 		ext2_read_inode_block(sb, inode, buffer + i * block_size, i);
 	}
 
-	// If we have part of a block left over read it here first, then transfer what we need
+	// If we have part of a block left over read it here first, then transfer
+	// what we need
 	if (i * block_size < fsize)
 	{
 		uint remainder = fsize % block_size;
@@ -251,15 +265,14 @@
 	return fsize;
 }
 
-bool ext2_read_inode_block(struct ext2_superblock *sb,
-						   struct ext2_inode *inode,
-						   void *buffer,
-						   uint block)
+bool ext2_read_inode_block(struct ext2_superblock *sb, struct ext2_inode *inode,
+						   void *buffer, uint block)
 {
 	if (block >= 12)
 	{
-		kprintf(ERROR "Sorry, EXT2 can only access the first 12 (direct) blocks "
-					  "of an inode for now. Indirect look-up will be added later\n");
+		kprintf(ERROR
+				"Sorry, EXT2 can only access the first 12 (direct) blocks "
+				"of an inode for now. Indirect look-up will be added later\n");
 		kpanic("Invalid inode block");
 	}
 
@@ -269,3 +282,124 @@
 
 	return true;
 }
+
+static const uint ext2_bitmap_block(struct ext2_superblock *sb,
+									uint *bitmap_block, uint *index)
+{
+	const uint block_size = ext2_block_size(sb);
+
+	while (index > block_size)
+	{
+		index -= block_size;
+		bitmap_block += 1;
+	}
+
+	return block_size;
+}
+
+bool ext2_check_in_bitmap(struct ext2_superblock *sb, uint bitmap_block,
+						  uint index)
+{
+	const uint block_size = ext2_bitmap_block(sb, &bitmap_block, &index);
+
+	uint byte = index / 8;
+	uint bit = index % 8;
+
+	uchar buffer[block_size];
+
+	ext2_read_block(sb, buffer, bitmap_block);
+
+	return !!(buffer[byte] & (1 << bit));
+}
+
+void ext2_set_in_bitmap(struct ext2_superblock *sb, uint bitmap_block,
+						uint index, bool value)
+{
+	const uint block_size = ext2_bitmap_block(sb, &bitmap_block, &index);
+
+	uint byte = index / 8;
+	uint bit = index % 8;
+
+	uchar buffer[block_size];
+
+	ext2_read_block(sb, buffer, bitmap_block);
+
+	uchar target_bit = 1 << bit;
+	buffer[byte] =
+		value ? (buffer[byte] | target_bit) : (buffer[byte] | ~target_bit);
+
+	ext2_write_block(sb, buffer, bitmap_block);
+}
+
+uint ext2_first_zero_bit(struct ext2_superblock *sb, uint bitmap_block,
+						 uint num_blocks, uint start_at)
+{
+	uint block_size = ext2_block_size(sb);
+
+	for (uint block = bitmap_block; block < bitmap_block + num_blocks; block++)
+	{
+		// dword-array for performance
+		uint buffer[block_size / 4];
+
+		ext2_read_block(sb, buffer, block);
+
+		// If this is the first block start at start_at, otherwise 0
+		for (int i = 0; i < block_size / 4; i++)
+		{
+			// The bitwise negative will be non-zero if there are zero bits in
+			// the original.
+			if (~buffer[i])
+			{
+				// 4 bytes * 8 bits * i dwords
+				uint index =
+					(4 * 8 * i) + (block - bitmap_block) * 8 * block_size;
+
+				kprintf(DEBUG "buffer[i] = 0x%x, i = %d, index = %d\n",
+						buffer[i], i, index);
+
+				// __builtin_ffs gives us the index of the least-significant 1
+				// bit. Since we take the bitwise inverse this is actuall the
+				// least significant 0 bit. This is a GCC intrinsic. This works
+				// particularly well on little-endian systems where the least
+				// significant bit happens to also correspond to the first bit
+				// in the dword bitset.
+				//
+				// ________ ________ ________ ________
+				//        ^ this is the LSB   ^
+				//                            | this is the MSB
+				//
+				// This means that the LSB is also the first bit in the bitset.
+				uint trailing = __builtin_ffs(~buffer[i]);
+
+				kprintf(DEBUG "Trailing = %d, 0x%x\n", trailing, trailing);
+
+				return trailing + index;
+			}
+		}
+	}
+
+	return -1;
+}
+
+uint ext2_first_free_inode(struct ext2_superblock *sb)
+{
+	// For now just check the first block group
+	struct ext2_block_group_descriptor bgd =
+		ext2_load_block_group_descriptor(sb, 0);
+
+	const uint block_size = ext2_block_size(sb);
+	// + 1 because we need to round up (ie 1025 for 1024 size blocks will yield
+	// 1, should 2)
+	uint bitset_blocks = (sb->inodes_per_block_group / 8) / block_size + 1;
+
+	// inodes start at 1
+	uint inode = ext2_first_zero_bit(sb, bgd.inode_bitmap, bitset_blocks, 12) + 1;
+	// This will overflow back to zero if no inode was found
+
+	if (!inode)
+	{
+		kpanic("No inodes left in first block group, FIXME");
+	}
+
+	return inode;
+}
diff --git a/src/kernel/main.c b/src/kernel/main.c
index 59fc111..ab013fd 100644
--- a/src/kernel/main.c
+++ b/src/kernel/main.c
@@ -4,6 +4,7 @@
 #include "log.h"
 #include "multiboot.h"
 #include "paging.h"
+#include "sync.h"
 #include "syscall.h"
 #include "task.h"
 #include "timer.h"
@@ -11,9 +12,9 @@
 #include "vfs_initrd.h"
 #include "vga.h"
 #include <dri/ata_pio/ata_pio.h>
-#include <dri/pci/pci.h>
-#include <dri/ide/ide.h>
 #include <dri/fs/ext2/ext2.h>
+#include <dri/ide/ide.h>
+#include <dri/pci/pci.h>
 
 void greet()
 {
@@ -77,6 +78,8 @@
 	asm("sti");
 
 	init_tasks();
+	init_sync();
+
 	pci_init();
 
 	// Register PCI drivers
@@ -108,7 +111,9 @@
 	}
 	else
 	{
-		kprintf(WARN "Filesystem is not a valid EXT2 format, only EXT2 is supported\n");
+		kprintf(
+			WARN
+			"Filesystem is not a valid EXT2 format, only EXT2 is supported\n");
 	}
 
 	while (true)
diff --git a/src/kernel/sync.c b/src/kernel/sync.c
index 2bac9e4..0db5022 100644
--- a/src/kernel/sync.c
+++ b/src/kernel/sync.c
@@ -2,10 +2,9 @@
 #include <alloc.h>
 #include <task.h>
 #include <sys.h>
-#include "syscall.h"
+#include <log.h>
 
 #define SM_PER_LIST 1024
-#define SM_MAX_WAITING 64
 
 struct sm_task
 {
@@ -50,7 +49,20 @@
 	return 0;
 }
 
-void sm_unsafe_wait(struct semaphore *sm)
+static struct semaphore *sm_from_id(semaphore_t sm)
+{
+	struct sm_list *first = sm_first;
+
+	while (sm >= SM_PER_LIST)
+	{
+		first = first->next;
+		sm -= SM_PER_LIST;
+	}
+
+	return &first->semaphores[sm];
+}
+
+static void sm_unsafe_wait(struct semaphore *sm)
 {
 	sm->sm--;
 
@@ -60,6 +72,8 @@
 		// This will be quick, so just use a spinlock
 		sl_acquire(sm->task_lock);
 
+		kprintf(INFO "Semaphore waiting\n");
+
 		struct sm_task *task = malloc(sizeof(struct sm_task));
 
 		task->next = NULL;
@@ -84,7 +98,7 @@
 	// Otherwise there's nobody else waiting, just go ahead
 }
 
-void sm_unsafe_signal(struct semaphore *sm)
+static void sm_unsafe_signal(struct semaphore *sm)
 {
 	sm->sm++;
 
@@ -137,6 +151,18 @@
 	return num;
 }
 
+void sm_wait(semaphore_t sm)
+{
+	sm_unsafe_wait(sm_from_id(sm));
+}
+
+void sm_signal(semaphore_t sm)
+{
+	sm_unsafe_signal(sm_from_id(sm));
+}
+
+
+
 void init_sync()
 {
 }