diff options
| author | Lucas Faria Mendes <lucas.oliveira1676@etec.sp.gov.br> | 2025-12-05 15:09:51 +0000 |
|---|---|---|
| committer | Lucas Faria Mendes <lucas.oliveira1676@etec.sp.gov.br> | 2025-12-05 15:09:51 +0000 |
| commit | de09f812fd11665010daadb60f101046d459f5ec (patch) | |
| tree | 9884a06b674c9a0fb8e7a8f6698976dc764e7d04 /src | |
| parent | 11b7033a351226696290983811928d22ccc85256 (diff) | |
| download | sqlite-zig-de09f812fd11665010daadb60f101046d459f5ec.tar.gz sqlite-zig-de09f812fd11665010daadb60f101046d459f5ec.zip | |
codecrafters submit [skip ci]main
Diffstat (limited to 'src')
| -rw-r--r-- | src/btree.zig | 194 | ||||
| -rw-r--r-- | src/index.zig | 180 | ||||
| -rwxr-xr-x | src/main.zig | 14 | ||||
| -rw-r--r-- | src/query.zig | 318 | ||||
| -rw-r--r-- | src/schema.zig | 838 |
5 files changed, 780 insertions, 764 deletions
diff --git a/src/btree.zig b/src/btree.zig new file mode 100644 index 0000000..42affc0 --- /dev/null +++ b/src/btree.zig @@ -0,0 +1,194 @@ +const std = @import("std"); +const varint = @import("varint.zig"); +const record = @import("record.zig"); + +/// Helper function to calculate size of a serial type +pub inline fn serialTypeSize(st: u64) usize { + if (st == 0 or st == 8 or st == 9) { + return 0; + } else if (st >= 13 and (st % 2) == 1) { + return (st - 13) / 2; + } else if (st >= 12 and (st % 2) == 0) { + return (st - 12) / 2; + } else if (st >= 1 and st <= 6) { + return @as(usize, st); + } else if (st == 7) { + return 8; + } + return 0; +} + +/// Read and process rows from a leaf table page +pub fn readLeafPageRows(page_data: []const u8, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { + const page_type = page_data[0]; + if (page_type != 0x0d) return; + + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + if (num_cells == 0) return; + + for (0..num_cells) |i| { + const offset = 8 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); + if (cell_ptr >= page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + + var parsed = varint.parse(cell_data); + var pos = parsed.len; + if (pos >= cell_data.len) continue; + + parsed = varint.parse(cell_data[pos..]); + const rowid = parsed.value; + pos += parsed.len; + if (pos >= cell_data.len) continue; + + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + if (header_size > record_data.len or header_size == 0) continue; + var header_pos = parsed.len; + + // Parse serial types + var serial_types: [256]u64 = undefined; + var num_columns: usize = 0; + while (header_pos < header_size and num_columns < 256) { + parsed = varint.parse(record_data[header_pos..]); + serial_types[num_columns] = parsed.value; + num_columns += 1; + header_pos += parsed.len; + } + + // Check WHERE clause if present (early rejection) + if (where_column_idx) |where_idx| { + if (where_value) |expected_value| { + if (where_idx >= num_columns) continue; + + const st = serial_types[where_idx]; + + // Fast path: check serial type first + if (st >= 13 and (st % 2) == 1) { + // String comparison - check length first for early rejection + const expected_len = (st - 13) / 2; + if (expected_len != expected_value.len) continue; + + var where_body_pos: usize = header_size; + for (0..where_idx) |col| { + where_body_pos += serialTypeSize(serial_types[col]); + } + + const str_result = record.readString(record_data[where_body_pos..], st); + if (!std.mem.eql(u8, str_result.value, expected_value)) continue; + } else if (st >= 1 and st <= 6) { + var where_body_pos: usize = header_size; + for (0..where_idx) |col| { + where_body_pos += serialTypeSize(serial_types[col]); + } + const int_result = record.readInt(record_data[where_body_pos..], st); + const expected_int = std.fmt.parseInt(i64, expected_value, 10) catch 0; + if (int_result.value != expected_int) continue; + } else { + continue; // Unsupported type for WHERE + } + } + } + + // Print matching row + for (column_indices, 0..) |column_idx, col_num| { + if (col_num > 0) try stdout.print("|", .{}); + if (column_idx >= num_columns) continue; + + var body_pos: usize = header_size; + for (0..column_idx) |col| { + body_pos += serialTypeSize(serial_types[col]); + } + + const st = serial_types[column_idx]; + if (st == 0) { + try stdout.print("{}", .{rowid}); + } else if (st == 8) { + try stdout.print("0", .{}); + } else if (st == 9) { + try stdout.print("1", .{}); + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + try stdout.print("{}", .{int_result.value}); + } else if (st >= 13 and (st % 2) == 1) { + const str_result = record.readString(record_data[body_pos..], st); + try stdout.print("{s}", .{str_result.value}); + } + } + try stdout.print("\n", .{}); + } +} + +/// Traverse B-tree iteratively with WHERE clause filtering +pub fn traverseBTree(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { + // Read entire file into memory for fast random access + const file_size = try file.getEndPos(); + const file_data = try allocator.alloc(u8, file_size); + defer allocator.free(file_data); + + _ = try file.seekTo(0); + _ = try file.read(file_data); + + var stack = std.ArrayList(u32){}; + defer stack.deinit(allocator); + try stack.append(allocator, page_num); + + while (stack.items.len > 0) { + const current_page = stack.pop() orelse continue; + + const page_offset = (current_page - 1) * @as(usize, page_size); + if (page_offset + page_size > file_data.len) continue; + + const page_data = file_data[page_offset .. page_offset + page_size]; + const page_type = page_data[0]; + + if (page_type == 0x0d) { + try readLeafPageRows(page_data, column_indices, where_column_idx, where_value, stdout); + } else if (page_type == 0x05) { + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big); + + // Add rightmost first so it's processed last (stack LIFO) + if (rightmost_ptr != 0) { + try stack.append(allocator, rightmost_ptr); + } + + // Add children in reverse order for correct traversal + var i = num_cells; + while (i > 0) { + i -= 1; + const offset = 12 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr_bytes = page_data[offset .. offset + 2]; + const cell_ptr = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); + if (cell_ptr + 4 > page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); + + if (left_child_page != 0) { + try stack.append(allocator, left_child_page); + } + } + } + } +} + +/// Count total rows in a B-tree +pub fn countRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32) !u64 { + const page_offset = (rootpage - 1) * @as(u64, page_size); + var page_data = try allocator.alloc(u8, page_size); + defer allocator.free(page_data); + + _ = try file.seekTo(page_offset); + _ = try file.read(page_data); + + const page_type = page_data[0]; + if (page_type != 0x0d) return 0; + + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + return num_cells; +} diff --git a/src/index.zig b/src/index.zig new file mode 100644 index 0000000..4f8b6dc --- /dev/null +++ b/src/index.zig @@ -0,0 +1,180 @@ +const std = @import("std"); +const varint = @import("varint.zig"); +const record = @import("record.zig"); + +/// Search for an index by table and column name +pub fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, column_name: []const u8) !?u32 { + var buf: [2]u8 = undefined; + _ = try file.seekTo(103); + _ = try file.read(&buf); + const num_cells = std.mem.readInt(u16, &buf, .big); + + if (num_cells == 0) return null; + + var cell_pointers = try allocator.alloc(u16, num_cells); + defer allocator.free(cell_pointers); + + for (0..num_cells) |i| { + _ = try file.seekTo(108 + i * 2); + _ = try file.read(&buf); + cell_pointers[i] = std.mem.readInt(u16, &buf, .big); + } + + var page_data = try allocator.alloc(u8, page_size); + defer allocator.free(page_data); + + _ = try file.seekTo(0); + _ = try file.read(page_data); + + for (0..num_cells) |i| { + if (cell_pointers[i] >= page_data.len) continue; + const cell_data = page_data[cell_pointers[i]..]; + + var parsed = varint.parse(cell_data); + var pos = parsed.len; + if (pos >= cell_data.len) continue; + + parsed = varint.parse(cell_data[pos..]); + pos += parsed.len; + if (pos >= cell_data.len) continue; + + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + if (header_size > record_data.len) continue; + var header_pos = parsed.len; + + var serial_types: [5]u64 = undefined; + for (0..5) |col| { + if (header_pos >= header_size) break; + parsed = varint.parse(record_data[header_pos..]); + serial_types[col] = parsed.value; + header_pos += parsed.len; + } + + const type_st = serial_types[0]; + const name_st = serial_types[1]; + const tbl_name_st = serial_types[2]; + + var body_pos: usize = header_size; + const type_result = record.readString(record_data[body_pos..], type_st); + if (!std.mem.eql(u8, type_result.value, "index")) continue; + + body_pos += type_result.len; + const name_result = record.readString(record_data[body_pos..], name_st); + + body_pos += name_result.len; + const tbl_name_result = record.readString(record_data[body_pos..], tbl_name_st); + + const expected_index_name = try std.fmt.allocPrint(allocator, "idx_{s}_{s}", .{ table_name, column_name }); + defer allocator.free(expected_index_name); + + if (std.mem.eql(u8, name_result.value, expected_index_name) and + std.mem.eql(u8, tbl_name_result.value, table_name)) + { + body_pos += tbl_name_result.len; + const rootpage_st = serial_types[3]; + const rootpage_result = record.readInt(record_data[body_pos..], rootpage_st); + return @as(u32, @intCast(rootpage_result.value)); + } + } + + return null; +} + +/// Search index B-tree for matching values and collect rowids +pub fn searchIndexForValue(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, index_rootpage: u32, search_value: []const u8, rowids: *std.ArrayList(u64)) !void { + // Read file into memory once + const file_size = try file.getEndPos(); + const file_data = try allocator.alloc(u8, file_size); + defer allocator.free(file_data); + + _ = try file.seekTo(0); + _ = try file.read(file_data); + + // Use stack to iterate all pages + var stack = std.ArrayList(u32){}; + defer stack.deinit(allocator); + try stack.append(allocator, index_rootpage); + + while (stack.items.len > 0) { + const page_num = stack.pop() orelse continue; + if (page_num == 0) continue; + + const page_offset = (page_num - 1) * @as(usize, page_size); + if (page_offset + page_size > file_data.len) continue; + + const page_data = file_data[page_offset .. page_offset + page_size]; + const page_type = page_data[0]; + + if (page_type == 0x0a) { + // Leaf index page - search for matching values + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + + for (0..num_cells) |i| { + const offset = 8 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); + if (cell_ptr >= page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + var parsed = varint.parse(cell_data); + const pos = parsed.len; + if (pos >= cell_data.len) continue; + + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + if (header_size > record_data.len) continue; + var header_pos = parsed.len; + + // Parse serial types + var serial_types: [8]u64 = undefined; + var num_st: usize = 0; + while (header_pos < header_size and num_st < 8) { + parsed = varint.parse(record_data[header_pos..]); + serial_types[num_st] = parsed.value; + num_st += 1; + header_pos += parsed.len; + } + + if (num_st >= 2) { + const st = serial_types[0]; + var body_pos: usize = header_size; + + if (st >= 13 and (st % 2) == 1) { + const str_result = record.readString(record_data[body_pos..], st); + if (std.mem.eql(u8, str_result.value, search_value)) { + body_pos += str_result.len; + const rowid_st = serial_types[1]; + const rowid_result = record.readInt(record_data[body_pos..], rowid_st); + try rowids.append(allocator, @as(u64, @intCast(rowid_result.value))); + } + } + } + } + } else if (page_type == 0x02) { + // Interior index page - add ALL children to stack + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big); + + // Add all cell pointers + for (0..num_cells) |i| { + const offset = 12 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); + if (cell_ptr + 4 > page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); + if (left_child_page > 0) { + try stack.append(allocator, left_child_page); + } + } + + if (rightmost_ptr > 0) { + try stack.append(allocator, rightmost_ptr); + } + } + } +} diff --git a/src/main.zig b/src/main.zig index ba98b50..96fc0a4 100755 --- a/src/main.zig +++ b/src/main.zig @@ -3,6 +3,8 @@ const varint = @import("varint.zig"); const record = @import("record.zig"); const schema = @import("schema.zig"); const tables = @import("tables.zig"); +const query = @import("query.zig"); +const btree = @import("btree.zig"); var stdout_buffer: [1024]u8 = undefined; var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); @@ -82,7 +84,7 @@ pub fn main() !void { // Check if this is COUNT(*) if (std.mem.indexOf(u8, column_part, "count(") != null or std.mem.indexOf(u8, column_part, "COUNT(") != null) { const rootpage = try schema.getRootpage(allocator, &file, page_size, table_name); - const row_count = try schema.countRows(allocator, &file, page_size, rootpage); + const row_count = try btree.countRows(allocator, &file, page_size, rootpage); try stdout.print("{}\n", .{row_count}); } else { // Get the CREATE TABLE statement to find column order @@ -111,7 +113,7 @@ pub fn main() !void { if (column_list.items.len == 1) { // Single column query const column_idx = try schema.parseColumnIndex(create_sql, column_list.items[0]); - try schema.readTableRows(allocator, &file, page_size, rootpage, column_idx, stdout); + try query.readTableRows(allocator, &file, page_size, rootpage, column_idx, stdout); } else { // Multiple column query var column_indices = std.ArrayList(usize){}; @@ -125,16 +127,16 @@ pub fn main() !void { // Try to use index scan if WHERE clause exists if (where_column != null and where_value != null) { // Try index scan first - schema.readTableRowsWithIndex(allocator, &file, page_size, table_name, rootpage, column_indices.items, where_column.?, where_value.?, stdout) catch |err| { + query.readTableRowsWithIndex(allocator, &file, page_size, table_name, rootpage, column_indices.items, where_column.?, where_value.?, stdout) catch |err| { if (err == error.NoIndexFound) { // Fall back to table scan - try schema.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout); + try query.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout); } else { return err; } }; } else { - try schema.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout); + try query.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout); } } } @@ -146,7 +148,7 @@ pub fn main() !void { } const rootpage = try schema.getRootpage(allocator, &file, page_size, last_token); - const row_count = try schema.countRows(allocator, &file, page_size, rootpage); + const row_count = try btree.countRows(allocator, &file, page_size, rootpage); try stdout.print("{}\n", .{row_count}); } diff --git a/src/query.zig b/src/query.zig new file mode 100644 index 0000000..3a28395 --- /dev/null +++ b/src/query.zig @@ -0,0 +1,318 @@ +const std = @import("std"); +const varint = @import("varint.zig"); +const record = @import("record.zig"); +const btree = @import("btree.zig"); +const index = @import("index.zig"); +const schema = @import("schema.zig"); + +/// Read a specific record by rowid from table +fn readRecordByRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_rootpage: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void { + try searchTableForRowid(allocator, file, page_size, table_rootpage, target_rowid, column_indices, stdout); +} + +/// Search for a specific rowid in table B-tree +fn searchTableForRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void { + var current_page = page_num; + var depth: u32 = 0; + const max_depth = 50; + + while (current_page != 0 and depth < max_depth) { + depth += 1; + + const page_offset = (current_page - 1) * @as(u64, page_size); + var page_data = try allocator.alloc(u8, page_size); + defer allocator.free(page_data); + + _ = try file.seekTo(page_offset); + _ = try file.read(page_data); + + const page_type = page_data[0]; + + if (page_type == 0x0d) { + // Leaf table page + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + + for (0..num_cells) |i| { + const offset = 8 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); + if (cell_ptr >= page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + var parsed = varint.parse(cell_data); + var pos = parsed.len; + if (pos >= cell_data.len) continue; + + parsed = varint.parse(cell_data[pos..]); + const rowid = parsed.value; + pos += parsed.len; + if (pos >= cell_data.len) continue; + + if (rowid == target_rowid) { + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + if (header_size > record_data.len) continue; + var header_pos = parsed.len; + + var serial_types = std.ArrayList(u64){}; + defer serial_types.deinit(allocator); + + while (header_pos < header_size) { + parsed = varint.parse(record_data[header_pos..]); + serial_types.append(allocator, parsed.value) catch break; + header_pos += parsed.len; + } + + for (column_indices, 0..) |column_idx, col_num| { + if (col_num > 0) try stdout.print("|", .{}); + + if (column_idx >= serial_types.items.len) continue; + + var body_pos: usize = header_size; + for (0..column_idx) |col| { + if (col >= serial_types.items.len) break; + const st = serial_types.items[col]; + if (st == 0 or st == 8 or st == 9) { + // NULL, 0, or 1 - no data + } else if (st >= 13 and (st % 2) == 1) { + body_pos += (st - 13) / 2; + } else if (st >= 12 and (st % 2) == 0) { + body_pos += (st - 12) / 2; + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + body_pos += int_result.len; + } else if (st == 7) { + body_pos += 8; + } + } + + const st = serial_types.items[column_idx]; + if (st == 0) { + try stdout.print("{}", .{rowid}); + } else if (st == 8) { + try stdout.print("0", .{}); + } else if (st == 9) { + try stdout.print("1", .{}); + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + try stdout.print("{}", .{int_result.value}); + } else if (st == 7) { + const float_result = record.readFloat(record_data[body_pos..]); + try stdout.print("{d}", .{float_result.value}); + } else if (st >= 13 and (st % 2) == 1) { + const str_result = record.readString(record_data[body_pos..], st); + try stdout.print("{s}", .{str_result.value}); + } + } + try stdout.print("\n", .{}); + return; + } + } + return; + } else if (page_type == 0x05) { + // Interior table page + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + + var next_page: u32 = 0; + for (0..num_cells) |i| { + const offset = 12 + i * 2; + if (offset + 2 > page_data.len) continue; + const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); + if (cell_ptr + 8 > page_data.len) continue; + + const cell_data = page_data[cell_ptr..]; + const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); + + const parsed = varint.parse(cell_data[4..]); + const key = parsed.value; + + if (target_rowid <= key) { + next_page = left_child_page; + break; + } + } + + if (next_page == 0) { + next_page = std.mem.readInt(u32, page_data[8..12], .big); + } + + current_page = next_page; + } else { + return; + } + } +} + +/// Read table rows - single column +pub fn readTableRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_idx: usize, stdout: anytype) !void { + if (rootpage == 0) return; + + const page_offset = (rootpage - 1) * @as(u64, page_size); + var page_data = try allocator.alloc(u8, page_size); + defer allocator.free(page_data); + + _ = try file.seekTo(page_offset); + _ = try file.read(page_data); + + const page_type = page_data[0]; + if (page_type != 0x0d) return; + + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + + var cell_pointers = try allocator.alloc(u16, num_cells); + defer allocator.free(cell_pointers); + + for (0..num_cells) |i| { + const offset = 8 + i * 2; + const cell_ptr_bytes = page_data[offset .. offset + 2]; + cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); + } + + for (0..num_cells) |i| { + const cell_data = page_data[cell_pointers[i]..]; + + var parsed = varint.parse(cell_data); + var pos = parsed.len; + + parsed = varint.parse(cell_data[pos..]); + pos += parsed.len; + + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + var header_pos = parsed.len; + + var serial_types = std.ArrayList(u64){}; + defer serial_types.deinit(allocator); + + while (header_pos < header_size) { + parsed = varint.parse(record_data[header_pos..]); + try serial_types.append(allocator, parsed.value); + header_pos += parsed.len; + } + + if (column_idx >= serial_types.items.len) continue; + + var body_pos: usize = header_size; + for (0..column_idx) |col| { + if (col >= serial_types.items.len) break; + const st = serial_types.items[col]; + if (st >= 13 and (st % 2) == 1) { + body_pos += (st - 13) / 2; + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + body_pos += int_result.len; + } + } + + const st = serial_types.items[column_idx]; + if (st >= 13 and (st % 2) == 1) { + const str_result = record.readString(record_data[body_pos..], st); + try stdout.print("{s}\n", .{str_result.value}); + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + try stdout.print("{}\n", .{int_result.value}); + } + } +} + +/// Read table rows - multiple columns +pub fn readTableRowsMultiColumn(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, stdout: anytype) !void { + if (rootpage == 0) return; + + const page_offset = (rootpage - 1) * @as(u64, page_size); + var page_data = try allocator.alloc(u8, page_size); + defer allocator.free(page_data); + + _ = try file.seekTo(page_offset); + _ = try file.read(page_data); + + const page_type = page_data[0]; + if (page_type != 0x0d) return; + + const num_cells = std.mem.readInt(u16, page_data[3..5], .big); + + var cell_pointers = try allocator.alloc(u16, num_cells); + defer allocator.free(cell_pointers); + + for (0..num_cells) |i| { + const offset = 8 + i * 2; + const cell_ptr_bytes = page_data[offset .. offset + 2]; + cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); + } + + for (0..num_cells) |i| { + const cell_data = page_data[cell_pointers[i]..]; + + var parsed = varint.parse(cell_data); + var pos = parsed.len; + + parsed = varint.parse(cell_data[pos..]); + pos += parsed.len; + + const record_data = cell_data[pos..]; + parsed = varint.parse(record_data); + const header_size = parsed.value; + var header_pos = parsed.len; + + var serial_types = std.ArrayList(u64){}; + defer serial_types.deinit(allocator); + + while (header_pos < header_size) { + parsed = varint.parse(record_data[header_pos..]); + try serial_types.append(allocator, parsed.value); + header_pos += parsed.len; + } + + for (column_indices, 0..) |column_idx, col_num| { + var body_pos: usize = header_size; + for (0..column_idx) |col| { + if (col >= serial_types.items.len) break; + const st = serial_types.items[col]; + if (st >= 13 and (st % 2) == 1) { + body_pos += (st - 13) / 2; + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + body_pos += int_result.len; + } + } + + if (col_num > 0) { + try stdout.print("|", .{}); + } + + if (column_idx < serial_types.items.len) { + const st = serial_types.items[column_idx]; + if (st >= 13 and (st % 2) == 1) { + const str_result = record.readString(record_data[body_pos..], st); + try stdout.print("{s}", .{str_result.value}); + } else if (st >= 1 and st <= 6) { + const int_result = record.readInt(record_data[body_pos..], st); + try stdout.print("{}", .{int_result.value}); + } + } + } + try stdout.print("\n", .{}); + } +} + +/// Read table rows with WHERE clause - uses optimized B-tree traversal +pub fn readTableRowsMultiColumnWhere(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { + try btree.traverseBTree(allocator, file, page_size, rootpage, column_indices, where_column_idx, where_value, stdout); +} + +/// Read table rows using index scan +pub fn readTableRowsWithIndex(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, table_rootpage: u32, column_indices: []const usize, where_column: []const u8, where_value: []const u8, stdout: anytype) !void { + _ = allocator; + _ = file; + _ = page_size; + _ = table_name; + _ = table_rootpage; + _ = column_indices; + _ = where_column; + _ = where_value; + _ = stdout; + // Index scan disabled - use optimized table scan + return error.NoIndexFound; +} diff --git a/src/schema.zig b/src/schema.zig index 1ed2bf0..f6dc31d 100644 --- a/src/schema.zig +++ b/src/schema.zig @@ -2,14 +2,13 @@ const std = @import("std"); const varint = @import("varint.zig"); const record = @import("record.zig"); -fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, column_name: []const u8) !?u32 { +/// Get the rootpage number for a table by name +pub fn getRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) !u32 { var buf: [2]u8 = undefined; _ = try file.seekTo(103); _ = try file.read(&buf); const num_cells = std.mem.readInt(u16, &buf, .big); - if (num_cells == 0) return null; - var cell_pointers = try allocator.alloc(u16, num_cells); defer allocator.free(cell_pointers); @@ -51,542 +50,29 @@ fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: header_pos += parsed.len; } - var body_pos: usize = header_size; - if (body_pos >= record_data.len) continue; - - // Field 0: type - const st0 = serial_types[0]; - const type_result = record.readString(record_data[body_pos..], st0); - if (!std.mem.eql(u8, type_result.value, "index")) continue; - body_pos += type_result.len; - if (body_pos >= record_data.len) continue; - - // Field 1: name - skip it - const st1 = serial_types[1]; - if (st1 >= 13 and (st1 % 2) == 1) { - body_pos += (st1 - 13) / 2; - } - if (body_pos >= record_data.len) continue; - - // Field 2: tbl_name - const st2 = serial_types[2]; - const tbl_name_result = record.readString(record_data[body_pos..], st2); - if (!std.mem.eql(u8, tbl_name_result.value, table_name)) continue; - body_pos += tbl_name_result.len; - if (body_pos >= record_data.len) continue; - - // Field 3: rootpage - const st3 = serial_types[3]; - const rp = record.readInt(record_data[body_pos..], st3); - body_pos += rp.len; - if (body_pos >= record_data.len) continue; - - // Field 4: sql - const st4 = serial_types[4]; - const sql_result = record.readString(record_data[body_pos..], st4); - - if (std.mem.indexOf(u8, sql_result.value, column_name) != null) { - return @as(u32, @intCast(rp.value)); - } - } - - return null; -} - -fn searchIndexForValue(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, index_rootpage: u32, search_value: []const u8, rowids: *std.ArrayList(u64)) !void { - // Read file into memory once - const file_size = try file.getEndPos(); - const file_data = try allocator.alloc(u8, file_size); - defer allocator.free(file_data); - - _ = try file.seekTo(0); - _ = try file.read(file_data); - - // Use stack to iterate all pages (BFS approach - visit ALL index pages to find ALL matches) - var stack = std.ArrayList(u32){}; - defer stack.deinit(allocator); - try stack.append(allocator, index_rootpage); - - while (stack.items.len > 0) { - const page_num = stack.pop() orelse continue; - if (page_num == 0) continue; - - const page_offset = (page_num - 1) * @as(usize, page_size); - if (page_offset + page_size > file_data.len) continue; - - const page_data = file_data[page_offset .. page_offset + page_size]; - const page_type = page_data[0]; - - if (page_type == 0x0a) { - // Leaf index page - search for matching values - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - - for (0..num_cells) |i| { - const offset = 8 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); - if (cell_ptr >= page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - var parsed = varint.parse(cell_data); - const pos = parsed.len; - if (pos >= cell_data.len) continue; - - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - if (header_size > record_data.len) continue; - var header_pos = parsed.len; - - // Parse serial types - var serial_types: [8]u64 = undefined; - var num_st: usize = 0; - while (header_pos < header_size and num_st < 8) { - parsed = varint.parse(record_data[header_pos..]); - serial_types[num_st] = parsed.value; - num_st += 1; - header_pos += parsed.len; - } - - if (num_st >= 2) { - const st = serial_types[0]; - var body_pos: usize = header_size; - - if (st >= 13 and (st % 2) == 1) { - const str_result = record.readString(record_data[body_pos..], st); - if (std.mem.eql(u8, str_result.value, search_value)) { - body_pos += str_result.len; - const rowid_st = serial_types[1]; - const rowid_result = record.readInt(record_data[body_pos..], rowid_st); - try rowids.append(allocator, @as(u64, @intCast(rowid_result.value))); - } - } - } - } - } else if (page_type == 0x02) { - // Interior index page - add ALL children to stack - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big); - - // Add all cell pointers - for (0..num_cells) |i| { - const offset = 12 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); - if (cell_ptr + 4 > page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); - if (left_child_page > 0) { - try stack.append(allocator, left_child_page); - } - } - - if (rightmost_ptr > 0) { - try stack.append(allocator, rightmost_ptr); - } - } - } -} -fn readRecordByRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_rootpage: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void { - try searchTableForRowid(allocator, file, page_size, table_rootpage, target_rowid, column_indices, stdout); -} - -fn searchTableForRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void { - // Use recursive approach but with depth limit to avoid infinite loops - var current_page = page_num; - var depth: u32 = 0; - const max_depth = 50; // Safety limit - - while (current_page != 0 and depth < max_depth) { - depth += 1; - - const page_offset = (current_page - 1) * @as(u64, page_size); - var page_data = try allocator.alloc(u8, page_size); - defer allocator.free(page_data); - - _ = try file.seekTo(page_offset); - _ = try file.read(page_data); - - const page_type = page_data[0]; - - if (page_type == 0x0d) { - // Leaf table page - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - - for (0..num_cells) |i| { - const offset = 8 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_bytes: *const [2]u8 = page_data[offset .. offset + 2][0..2]; - const cell_ptr = std.mem.readInt(u16, cell_bytes, .big); - if (cell_ptr >= page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - - var parsed = varint.parse(cell_data); - var pos = parsed.len; - if (pos >= cell_data.len) continue; - - parsed = varint.parse(cell_data[pos..]); - const rowid = parsed.value; - pos += parsed.len; - if (pos >= cell_data.len) continue; - - if (rowid == target_rowid) { - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - if (header_size > record_data.len) continue; - var header_pos = parsed.len; - - var serial_types = std.ArrayList(u64){}; - defer serial_types.deinit(allocator); - - while (header_pos < header_size) { - parsed = varint.parse(record_data[header_pos..]); - serial_types.append(allocator, parsed.value) catch break; - header_pos += parsed.len; - } - - for (column_indices, 0..) |column_idx, col_num| { - if (col_num > 0) try stdout.print("|", .{}); - - if (column_idx >= serial_types.items.len) continue; - - var body_pos: usize = header_size; - for (0..column_idx) |col| { - if (col >= serial_types.items.len) break; - const st = serial_types.items[col]; - if (st == 0 or st == 8 or st == 9) { - // NULL, 0, or 1 - no data - } else if (st >= 13 and (st % 2) == 1) { - body_pos += (st - 13) / 2; - } else if (st >= 12 and (st % 2) == 0) { - body_pos += (st - 12) / 2; - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - body_pos += int_result.len; - } else if (st == 7) { - body_pos += 8; - } - } - - const st = serial_types.items[column_idx]; - if (st == 0) { - try stdout.print("{}", .{rowid}); - } else if (st == 8) { - try stdout.print("0", .{}); - } else if (st == 9) { - try stdout.print("1", .{}); - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - try stdout.print("{}", .{int_result.value}); - } else if (st >= 13 and (st % 2) == 1) { - const str_result = record.readString(record_data[body_pos..], st); - try stdout.print("{s}", .{str_result.value}); - } - } - try stdout.print("\n", .{}); - return; - } - } - // Not found in this leaf, stop searching - return; - } else if (page_type == 0x05) { - // Interior table page - use binary search to find the right child - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big); - - // Find which child to follow based on rowid - var next_page: u32 = rightmost_ptr; - for (0..num_cells) |i| { - const offset = 12 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_bytes: *const [2]u8 = page_data[offset .. offset + 2][0..2]; - const cell_ptr = std.mem.readInt(u16, cell_bytes, .big); - if (cell_ptr + 4 > page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); - - const parsed_key = varint.parse(cell_data[4..]); - const cell_rowid = parsed_key.value; - - if (target_rowid < cell_rowid) { - next_page = left_child_page; - break; - } - } - - current_page = next_page; - } else { - return; - } - } -} - -inline fn serialTypeSize(st: u64) usize { - if (st == 0 or st == 8 or st == 9) { - return 0; - } else if (st >= 13 and (st % 2) == 1) { - return (st - 13) / 2; - } else if (st >= 12 and (st % 2) == 0) { - return (st - 12) / 2; - } else if (st >= 1 and st <= 6) { - return @as(usize, st); - } else if (st == 7) { - return 8; - } - return 0; -} - -fn readLeafPageRows(page_data: []const u8, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { - const page_type = page_data[0]; - if (page_type != 0x0d) return; - - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - if (num_cells == 0) return; - - for (0..num_cells) |i| { - const offset = 8 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big); - if (cell_ptr >= page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - - var parsed = varint.parse(cell_data); - var pos = parsed.len; - if (pos >= cell_data.len) continue; - - parsed = varint.parse(cell_data[pos..]); - const rowid = parsed.value; - pos += parsed.len; - if (pos >= cell_data.len) continue; - - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - if (header_size > record_data.len or header_size == 0) continue; - var header_pos = parsed.len; - - // Parse serial types - var serial_types: [256]u64 = undefined; - var num_columns: usize = 0; - while (header_pos < header_size and num_columns < 256) { - parsed = varint.parse(record_data[header_pos..]); - serial_types[num_columns] = parsed.value; - num_columns += 1; - header_pos += parsed.len; - } - - // Check WHERE clause if present (early rejection) - if (where_column_idx) |where_idx| { - if (where_value) |expected_value| { - if (where_idx >= num_columns) continue; - - const st = serial_types[where_idx]; - - // Fast path: check serial type first - if (st >= 13 and (st % 2) == 1) { - // String comparison - check length first for early rejection - const expected_len = (st - 13) / 2; - if (expected_len != expected_value.len) continue; - - var where_body_pos: usize = header_size; - for (0..where_idx) |col| { - where_body_pos += serialTypeSize(serial_types[col]); - } - - const str_result = record.readString(record_data[where_body_pos..], st); - if (!std.mem.eql(u8, str_result.value, expected_value)) continue; - } else if (st >= 1 and st <= 6) { - var where_body_pos: usize = header_size; - for (0..where_idx) |col| { - where_body_pos += serialTypeSize(serial_types[col]); - } - const int_result = record.readInt(record_data[where_body_pos..], st); - const expected_int = std.fmt.parseInt(i64, expected_value, 10) catch 0; - if (int_result.value != expected_int) continue; - } else { - continue; // Unsupported type for WHERE - } - } - } - - // Print matching row - for (column_indices, 0..) |column_idx, col_num| { - if (col_num > 0) try stdout.print("|", .{}); - if (column_idx >= num_columns) continue; - - var body_pos: usize = header_size; - for (0..column_idx) |col| { - body_pos += serialTypeSize(serial_types[col]); - } - - const st = serial_types[column_idx]; - if (st == 0) { - try stdout.print("{}", .{rowid}); - } else if (st == 8) { - try stdout.print("0", .{}); - } else if (st == 9) { - try stdout.print("1", .{}); - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - try stdout.print("{}", .{int_result.value}); - } else if (st >= 13 and (st % 2) == 1) { - const str_result = record.readString(record_data[body_pos..], st); - try stdout.print("{s}", .{str_result.value}); - } - } - try stdout.print("\n", .{}); - } -} - -fn traverseBTree(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { - // Read entire file into memory for fast random access - const file_size = try file.getEndPos(); - const file_data = try allocator.alloc(u8, file_size); - defer allocator.free(file_data); - - _ = try file.seekTo(0); - _ = try file.read(file_data); - - var stack = std.ArrayList(u32){}; - defer stack.deinit(allocator); - try stack.append(allocator, page_num); - - while (stack.items.len > 0) { - const current_page = stack.pop() orelse continue; - - const page_offset = (current_page - 1) * @as(usize, page_size); - if (page_offset + page_size > file_data.len) continue; - - const page_data = file_data[page_offset .. page_offset + page_size]; - const page_type = page_data[0]; - - if (page_type == 0x0d) { - try readLeafPageRows(page_data, column_indices, where_column_idx, where_value, stdout); - } else if (page_type == 0x05) { - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big); - - // Add rightmost first so it's processed last (stack LIFO) - if (rightmost_ptr != 0) { - try stack.append(allocator, rightmost_ptr); - } - - // Add children in reverse order for correct traversal - var i = num_cells; - while (i > 0) { - i -= 1; - const offset = 12 + i * 2; - if (offset + 2 > page_data.len) continue; - const cell_ptr_bytes = page_data[offset .. offset + 2]; - const cell_ptr = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); - if (cell_ptr + 4 > page_data.len) continue; - - const cell_data = page_data[cell_ptr..]; - const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big); - - if (left_child_page != 0) { - try stack.append(allocator, left_child_page); - } - } - } - } -} - -pub fn getRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) !u32 { - var buf: [2]u8 = undefined; - _ = try file.seekTo(103); - _ = try file.read(&buf); - const num_cells = std.mem.readInt(u16, &buf, .big); - - var cell_pointers = try allocator.alloc(u16, num_cells); - defer allocator.free(cell_pointers); - - for (0..num_cells) |i| { - _ = try file.seekTo(108 + i * 2); - _ = try file.read(&buf); - cell_pointers[i] = std.mem.readInt(u16, &buf, .big); - } - - var page_data = try allocator.alloc(u8, page_size); - defer allocator.free(page_data); - - _ = try file.seekTo(0); - _ = try file.read(page_data); - - for (0..num_cells) |i| { - const cell_data = page_data[cell_pointers[i]..]; - - var parsed = varint.parse(cell_data); - var pos = parsed.len; - - parsed = varint.parse(cell_data[pos..]); - pos += parsed.len; - - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - var header_pos = parsed.len; - - var serial_types: [5]u64 = undefined; - for (0..5) |col| { - parsed = varint.parse(record_data[header_pos..]); - serial_types[col] = parsed.value; - header_pos += parsed.len; - } + const type_st = serial_types[0]; + const name_st = serial_types[1]; + const rootpage_st = serial_types[3]; var body_pos: usize = header_size; + const type_result = record.readString(record_data[body_pos..], type_st); + if (!std.mem.eql(u8, type_result.value, "table")) continue; - const st0 = serial_types[0]; - if (st0 >= 13 and (st0 % 2) == 1) { - body_pos += (st0 - 13) / 2; - } else if (st0 >= 1 and st0 <= 6) { - const r0 = record.readInt(record_data[body_pos..], st0); - body_pos += r0.len; - } - - const st1 = serial_types[1]; - if (st1 >= 13 and (st1 % 2) == 1) { - body_pos += (st1 - 13) / 2; - } else if (st1 >= 1 and st1 <= 6) { - const r1 = record.readInt(record_data[body_pos..], st1); - body_pos += r1.len; - } - - const tbl_name_result = record.readString(record_data[body_pos..], serial_types[2]); - body_pos += tbl_name_result.len; - - if (std.mem.eql(u8, tbl_name_result.value, table_name)) { - const rp = record.readInt(record_data[body_pos..], serial_types[3]); - return @as(u32, @intCast(rp.value)); - } - } - - return 0; -} - -pub fn countRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32) !u64 { - if (rootpage == 0) return 0; - - const page_offset = (rootpage - 1) * @as(u64, page_size); - var page_data = try allocator.alloc(u8, page_size); - defer allocator.free(page_data); + body_pos += type_result.len; + const name_result = record.readString(record_data[body_pos..], name_st); + if (!std.mem.eql(u8, name_result.value, table_name)) continue; - _ = try file.seekTo(page_offset); - _ = try file.read(page_data); + body_pos += name_result.len; + body_pos += (serial_types[2] - 13) / 2; - const page_type = page_data[0]; - if (page_type == 0x05 or page_type == 0x0d) { - return std.mem.readInt(u16, page_data[3..5], .big); + const rootpage_result = record.readInt(record_data[body_pos..], rootpage_st); + return @as(u32, @intCast(rootpage_result.value)); } - return 0; + return error.TableNotFound; } +/// Get CREATE TABLE SQL for a table pub fn getCreateTableSQL(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) ![]const u8 { var buf: [2]u8 = undefined; _ = try file.seekTo(103); @@ -609,276 +95,112 @@ pub fn getCreateTableSQL(allocator: std.mem.Allocator, file: *std.fs.File, page_ _ = try file.read(page_data); for (0..num_cells) |i| { + if (cell_pointers[i] >= page_data.len) continue; const cell_data = page_data[cell_pointers[i]..]; var parsed = varint.parse(cell_data); var pos = parsed.len; + if (pos >= cell_data.len) continue; parsed = varint.parse(cell_data[pos..]); pos += parsed.len; + if (pos >= cell_data.len) continue; const record_data = cell_data[pos..]; parsed = varint.parse(record_data); const header_size = parsed.value; + if (header_size > record_data.len) continue; var header_pos = parsed.len; var serial_types: [5]u64 = undefined; for (0..5) |col| { + if (header_pos >= header_size) break; parsed = varint.parse(record_data[header_pos..]); serial_types[col] = parsed.value; header_pos += parsed.len; } - var body_pos: usize = header_size; + const type_st = serial_types[0]; + const name_st = serial_types[1]; + const sql_st = serial_types[4]; - const st0 = serial_types[0]; - if (st0 >= 13 and (st0 % 2) == 1) { - body_pos += (st0 - 13) / 2; - } else if (st0 >= 1 and st0 <= 6) { - const r0 = record.readInt(record_data[body_pos..], st0); - body_pos += r0.len; - } + var body_pos: usize = header_size; + const type_result = record.readString(record_data[body_pos..], type_st); + if (!std.mem.eql(u8, type_result.value, "table")) continue; - const st1 = serial_types[1]; - if (st1 >= 13 and (st1 % 2) == 1) { - body_pos += (st1 - 13) / 2; - } else if (st1 >= 1 and st1 <= 6) { - const r1 = record.readInt(record_data[body_pos..], st1); - body_pos += r1.len; - } + body_pos += type_result.len; + const name_result = record.readString(record_data[body_pos..], name_st); + if (!std.mem.eql(u8, name_result.value, table_name)) continue; - const tbl_name_result = record.readString(record_data[body_pos..], serial_types[2]); - body_pos += tbl_name_result.len; + body_pos += name_result.len; + body_pos += (serial_types[2] - 13) / 2; - if (std.mem.eql(u8, tbl_name_result.value, table_name)) { - const rp = record.readInt(record_data[body_pos..], serial_types[3]); - body_pos += rp.len; + const rootpage_result = record.readInt(record_data[body_pos..], serial_types[3]); + body_pos += rootpage_result.len; - const sql_result = record.readString(record_data[body_pos..], serial_types[4]); - return try allocator.dupe(u8, sql_result.value); - } + const sql_result = record.readString(record_data[body_pos..], sql_st); + return try allocator.dupe(u8, sql_result.value); } return error.TableNotFound; } +/// Parse column index from CREATE TABLE SQL pub fn parseColumnIndex(sql: []const u8, column_name: []const u8) !usize { - var paren_idx: ?usize = null; + var in_parens = false; + var paren_start: usize = 0; + for (sql, 0..) |c, i| { if (c == '(') { - paren_idx = i; - break; - } - } - - if (paren_idx == null) return error.InvalidSQL; + if (!in_parens) { + paren_start = i + 1; + in_parens = true; + } + } else if (c == ')') { + if (in_parens) { + const columns_part = sql[paren_start..i]; + + var column_idx: usize = 0; + var start: usize = 0; + var in_col = false; + + for (columns_part, 0..) |ch, j| { + if (!in_col) { + if (ch != ' ' and ch != '\t' and ch != '\n' and ch != '\r') { + start = j; + in_col = true; + } + } else { + if (ch == ',' or j == columns_part.len - 1) { + var end = j; + if (j == columns_part.len - 1 and ch != ',') { + end = j + 1; + } - var col_idx: usize = 0; - var in_col_name = false; - var col_start: usize = paren_idx.? + 1; + const col_def = columns_part[start..end]; - for (sql[paren_idx.? + 1 ..], 0..) |c, i| { - const actual_idx = paren_idx.? + 1 + i; + var space_idx: usize = 0; + for (col_def, 0..) |col_ch, col_i| { + if (col_ch == ' ' or col_ch == '\t') { + space_idx = col_i; + break; + } + } - if (c == ')') break; + const col_name = if (space_idx > 0) col_def[0..space_idx] else col_def; - if (c == ' ' or c == '\t' or c == '\n') { - if (in_col_name) { - const col_name = std.mem.trim(u8, sql[col_start..actual_idx], " \t\n"); - if (std.mem.eql(u8, col_name, column_name)) { - return col_idx; - } - in_col_name = false; - } - continue; - } + if (std.mem.eql(u8, col_name, column_name)) { + return column_idx; + } - if (c == ',') { - if (in_col_name) { - const col_name = std.mem.trim(u8, sql[col_start..actual_idx], " \t\n"); - if (std.mem.eql(u8, col_name, column_name)) { - return col_idx; + column_idx += 1; + in_col = false; + } + } } } - col_idx += 1; - in_col_name = false; - continue; - } - - if (!in_col_name) { - col_start = actual_idx; - in_col_name = true; } } return error.ColumnNotFound; } - -pub fn readTableRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_idx: usize, stdout: anytype) !void { - if (rootpage == 0) return; - - const page_offset = (rootpage - 1) * @as(u64, page_size); - var page_data = try allocator.alloc(u8, page_size); - defer allocator.free(page_data); - - _ = try file.seekTo(page_offset); - _ = try file.read(page_data); - - const page_type = page_data[0]; - if (page_type != 0x0d) return; - - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - - var cell_pointers = try allocator.alloc(u16, num_cells); - defer allocator.free(cell_pointers); - - for (0..num_cells) |i| { - const offset = 8 + i * 2; - const cell_ptr_bytes = page_data[offset .. offset + 2]; - cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); - } - - for (0..num_cells) |i| { - const cell_data = page_data[cell_pointers[i]..]; - - var parsed = varint.parse(cell_data); - var pos = parsed.len; - - parsed = varint.parse(cell_data[pos..]); - pos += parsed.len; - - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - var header_pos = parsed.len; - - var serial_types = std.ArrayList(u64){}; - defer serial_types.deinit(allocator); - - while (header_pos < header_size) { - parsed = varint.parse(record_data[header_pos..]); - try serial_types.append(allocator, parsed.value); - header_pos += parsed.len; - } - - var body_pos: usize = header_size; - for (0..column_idx) |col| { - if (col >= serial_types.items.len) break; - const st = serial_types.items[col]; - if (st >= 13 and (st % 2) == 1) { - body_pos += (st - 13) / 2; - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - body_pos += int_result.len; - } - } - - if (column_idx < serial_types.items.len) { - const st = serial_types.items[column_idx]; - if (st >= 13 and (st % 2) == 1) { - const str_result = record.readString(record_data[body_pos..], st); - try stdout.print("{s}\n", .{str_result.value}); - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - try stdout.print("{}\n", .{int_result.value}); - } - } - } -} - -pub fn readTableRowsMultiColumn(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, stdout: anytype) !void { - if (rootpage == 0) return; - - const page_offset = (rootpage - 1) * @as(u64, page_size); - var page_data = try allocator.alloc(u8, page_size); - defer allocator.free(page_data); - - _ = try file.seekTo(page_offset); - _ = try file.read(page_data); - - const page_type = page_data[0]; - if (page_type != 0x0d) return; - - const num_cells = std.mem.readInt(u16, page_data[3..5], .big); - - var cell_pointers = try allocator.alloc(u16, num_cells); - defer allocator.free(cell_pointers); - - for (0..num_cells) |i| { - const offset = 8 + i * 2; - const cell_ptr_bytes = page_data[offset .. offset + 2]; - cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big); - } - - for (0..num_cells) |i| { - const cell_data = page_data[cell_pointers[i]..]; - - var parsed = varint.parse(cell_data); - var pos = parsed.len; - - parsed = varint.parse(cell_data[pos..]); - pos += parsed.len; - - const record_data = cell_data[pos..]; - parsed = varint.parse(record_data); - const header_size = parsed.value; - var header_pos = parsed.len; - - var serial_types = std.ArrayList(u64){}; - defer serial_types.deinit(allocator); - - while (header_pos < header_size) { - parsed = varint.parse(record_data[header_pos..]); - try serial_types.append(allocator, parsed.value); - header_pos += parsed.len; - } - - for (column_indices, 0..) |column_idx, col_num| { - var body_pos: usize = header_size; - for (0..column_idx) |col| { - if (col >= serial_types.items.len) break; - const st = serial_types.items[col]; - if (st >= 13 and (st % 2) == 1) { - body_pos += (st - 13) / 2; - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - body_pos += int_result.len; - } - } - - if (col_num > 0) { - try stdout.print("|", .{}); - } - - if (column_idx < serial_types.items.len) { - const st = serial_types.items[column_idx]; - if (st >= 13 and (st % 2) == 1) { - const str_result = record.readString(record_data[body_pos..], st); - try stdout.print("{s}", .{str_result.value}); - } else if (st >= 1 and st <= 6) { - const int_result = record.readInt(record_data[body_pos..], st); - try stdout.print("{}", .{int_result.value}); - } - } - } - try stdout.print("\n", .{}); - } -} - -pub fn readTableRowsMultiColumnWhere(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void { - try traverseBTree(allocator, file, page_size, rootpage, column_indices, where_column_idx, where_value, stdout); -} - -pub fn readTableRowsWithIndex(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, table_rootpage: u32, column_indices: []const usize, where_column: []const u8, where_value: []const u8, stdout: anytype) !void { - _ = allocator; - _ = file; - _ = page_size; - _ = table_name; - _ = table_rootpage; - _ = column_indices; - _ = where_column; - _ = where_value; - _ = stdout; - // Temporarily disable to test table scan - return error.NoIndexFound; -} |