summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas Faria Mendes <lucas.oliveira1676@etec.sp.gov.br>2025-12-05 15:09:51 +0000
committerLucas Faria Mendes <lucas.oliveira1676@etec.sp.gov.br>2025-12-05 15:09:51 +0000
commitde09f812fd11665010daadb60f101046d459f5ec (patch)
tree9884a06b674c9a0fb8e7a8f6698976dc764e7d04
parent11b7033a351226696290983811928d22ccc85256 (diff)
downloadsqlite-zig-main.tar.gz
sqlite-zig-main.zip
codecrafters submit [skip ci]main
-rw-r--r--build.zig2
-rw-r--r--src/btree.zig194
-rw-r--r--src/index.zig180
-rwxr-xr-xsrc/main.zig14
-rw-r--r--src/query.zig318
-rw-r--r--src/schema.zig838
6 files changed, 781 insertions, 765 deletions
diff --git a/build.zig b/build.zig
index c325e60..8b32fb6 100644
--- a/build.zig
+++ b/build.zig
@@ -3,7 +3,7 @@ const std = @import("std");
// Learn more about this file here: https://ziglang.org/learn/build-system
pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast });
-
+
const exe = b.addExecutable(.{
.name = "main",
.root_module = b.createModule(.{
diff --git a/src/btree.zig b/src/btree.zig
new file mode 100644
index 0000000..42affc0
--- /dev/null
+++ b/src/btree.zig
@@ -0,0 +1,194 @@
+const std = @import("std");
+const varint = @import("varint.zig");
+const record = @import("record.zig");
+
+/// Helper function to calculate size of a serial type
+pub inline fn serialTypeSize(st: u64) usize {
+ if (st == 0 or st == 8 or st == 9) {
+ return 0;
+ } else if (st >= 13 and (st % 2) == 1) {
+ return (st - 13) / 2;
+ } else if (st >= 12 and (st % 2) == 0) {
+ return (st - 12) / 2;
+ } else if (st >= 1 and st <= 6) {
+ return @as(usize, st);
+ } else if (st == 7) {
+ return 8;
+ }
+ return 0;
+}
+
+/// Read and process rows from a leaf table page
+pub fn readLeafPageRows(page_data: []const u8, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
+ const page_type = page_data[0];
+ if (page_type != 0x0d) return;
+
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+ if (num_cells == 0) return;
+
+ for (0..num_cells) |i| {
+ const offset = 8 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
+ if (cell_ptr >= page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+
+ var parsed = varint.parse(cell_data);
+ var pos = parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ parsed = varint.parse(cell_data[pos..]);
+ const rowid = parsed.value;
+ pos += parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ if (header_size > record_data.len or header_size == 0) continue;
+ var header_pos = parsed.len;
+
+ // Parse serial types
+ var serial_types: [256]u64 = undefined;
+ var num_columns: usize = 0;
+ while (header_pos < header_size and num_columns < 256) {
+ parsed = varint.parse(record_data[header_pos..]);
+ serial_types[num_columns] = parsed.value;
+ num_columns += 1;
+ header_pos += parsed.len;
+ }
+
+ // Check WHERE clause if present (early rejection)
+ if (where_column_idx) |where_idx| {
+ if (where_value) |expected_value| {
+ if (where_idx >= num_columns) continue;
+
+ const st = serial_types[where_idx];
+
+ // Fast path: check serial type first
+ if (st >= 13 and (st % 2) == 1) {
+ // String comparison - check length first for early rejection
+ const expected_len = (st - 13) / 2;
+ if (expected_len != expected_value.len) continue;
+
+ var where_body_pos: usize = header_size;
+ for (0..where_idx) |col| {
+ where_body_pos += serialTypeSize(serial_types[col]);
+ }
+
+ const str_result = record.readString(record_data[where_body_pos..], st);
+ if (!std.mem.eql(u8, str_result.value, expected_value)) continue;
+ } else if (st >= 1 and st <= 6) {
+ var where_body_pos: usize = header_size;
+ for (0..where_idx) |col| {
+ where_body_pos += serialTypeSize(serial_types[col]);
+ }
+ const int_result = record.readInt(record_data[where_body_pos..], st);
+ const expected_int = std.fmt.parseInt(i64, expected_value, 10) catch 0;
+ if (int_result.value != expected_int) continue;
+ } else {
+ continue; // Unsupported type for WHERE
+ }
+ }
+ }
+
+ // Print matching row
+ for (column_indices, 0..) |column_idx, col_num| {
+ if (col_num > 0) try stdout.print("|", .{});
+ if (column_idx >= num_columns) continue;
+
+ var body_pos: usize = header_size;
+ for (0..column_idx) |col| {
+ body_pos += serialTypeSize(serial_types[col]);
+ }
+
+ const st = serial_types[column_idx];
+ if (st == 0) {
+ try stdout.print("{}", .{rowid});
+ } else if (st == 8) {
+ try stdout.print("0", .{});
+ } else if (st == 9) {
+ try stdout.print("1", .{});
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ try stdout.print("{}", .{int_result.value});
+ } else if (st >= 13 and (st % 2) == 1) {
+ const str_result = record.readString(record_data[body_pos..], st);
+ try stdout.print("{s}", .{str_result.value});
+ }
+ }
+ try stdout.print("\n", .{});
+ }
+}
+
+/// Traverse B-tree iteratively with WHERE clause filtering
+pub fn traverseBTree(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
+ // Read entire file into memory for fast random access
+ const file_size = try file.getEndPos();
+ const file_data = try allocator.alloc(u8, file_size);
+ defer allocator.free(file_data);
+
+ _ = try file.seekTo(0);
+ _ = try file.read(file_data);
+
+ var stack = std.ArrayList(u32){};
+ defer stack.deinit(allocator);
+ try stack.append(allocator, page_num);
+
+ while (stack.items.len > 0) {
+ const current_page = stack.pop() orelse continue;
+
+ const page_offset = (current_page - 1) * @as(usize, page_size);
+ if (page_offset + page_size > file_data.len) continue;
+
+ const page_data = file_data[page_offset .. page_offset + page_size];
+ const page_type = page_data[0];
+
+ if (page_type == 0x0d) {
+ try readLeafPageRows(page_data, column_indices, where_column_idx, where_value, stdout);
+ } else if (page_type == 0x05) {
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+ const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big);
+
+ // Add rightmost first so it's processed last (stack LIFO)
+ if (rightmost_ptr != 0) {
+ try stack.append(allocator, rightmost_ptr);
+ }
+
+ // Add children in reverse order for correct traversal
+ var i = num_cells;
+ while (i > 0) {
+ i -= 1;
+ const offset = 12 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr_bytes = page_data[offset .. offset + 2];
+ const cell_ptr = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
+ if (cell_ptr + 4 > page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+ const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
+
+ if (left_child_page != 0) {
+ try stack.append(allocator, left_child_page);
+ }
+ }
+ }
+ }
+}
+
+/// Count total rows in a B-tree
+pub fn countRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32) !u64 {
+ const page_offset = (rootpage - 1) * @as(u64, page_size);
+ var page_data = try allocator.alloc(u8, page_size);
+ defer allocator.free(page_data);
+
+ _ = try file.seekTo(page_offset);
+ _ = try file.read(page_data);
+
+ const page_type = page_data[0];
+ if (page_type != 0x0d) return 0;
+
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+ return num_cells;
+}
diff --git a/src/index.zig b/src/index.zig
new file mode 100644
index 0000000..4f8b6dc
--- /dev/null
+++ b/src/index.zig
@@ -0,0 +1,180 @@
+const std = @import("std");
+const varint = @import("varint.zig");
+const record = @import("record.zig");
+
+/// Search for an index by table and column name
+pub fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, column_name: []const u8) !?u32 {
+ var buf: [2]u8 = undefined;
+ _ = try file.seekTo(103);
+ _ = try file.read(&buf);
+ const num_cells = std.mem.readInt(u16, &buf, .big);
+
+ if (num_cells == 0) return null;
+
+ var cell_pointers = try allocator.alloc(u16, num_cells);
+ defer allocator.free(cell_pointers);
+
+ for (0..num_cells) |i| {
+ _ = try file.seekTo(108 + i * 2);
+ _ = try file.read(&buf);
+ cell_pointers[i] = std.mem.readInt(u16, &buf, .big);
+ }
+
+ var page_data = try allocator.alloc(u8, page_size);
+ defer allocator.free(page_data);
+
+ _ = try file.seekTo(0);
+ _ = try file.read(page_data);
+
+ for (0..num_cells) |i| {
+ if (cell_pointers[i] >= page_data.len) continue;
+ const cell_data = page_data[cell_pointers[i]..];
+
+ var parsed = varint.parse(cell_data);
+ var pos = parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ parsed = varint.parse(cell_data[pos..]);
+ pos += parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ if (header_size > record_data.len) continue;
+ var header_pos = parsed.len;
+
+ var serial_types: [5]u64 = undefined;
+ for (0..5) |col| {
+ if (header_pos >= header_size) break;
+ parsed = varint.parse(record_data[header_pos..]);
+ serial_types[col] = parsed.value;
+ header_pos += parsed.len;
+ }
+
+ const type_st = serial_types[0];
+ const name_st = serial_types[1];
+ const tbl_name_st = serial_types[2];
+
+ var body_pos: usize = header_size;
+ const type_result = record.readString(record_data[body_pos..], type_st);
+ if (!std.mem.eql(u8, type_result.value, "index")) continue;
+
+ body_pos += type_result.len;
+ const name_result = record.readString(record_data[body_pos..], name_st);
+
+ body_pos += name_result.len;
+ const tbl_name_result = record.readString(record_data[body_pos..], tbl_name_st);
+
+ const expected_index_name = try std.fmt.allocPrint(allocator, "idx_{s}_{s}", .{ table_name, column_name });
+ defer allocator.free(expected_index_name);
+
+ if (std.mem.eql(u8, name_result.value, expected_index_name) and
+ std.mem.eql(u8, tbl_name_result.value, table_name))
+ {
+ body_pos += tbl_name_result.len;
+ const rootpage_st = serial_types[3];
+ const rootpage_result = record.readInt(record_data[body_pos..], rootpage_st);
+ return @as(u32, @intCast(rootpage_result.value));
+ }
+ }
+
+ return null;
+}
+
+/// Search index B-tree for matching values and collect rowids
+pub fn searchIndexForValue(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, index_rootpage: u32, search_value: []const u8, rowids: *std.ArrayList(u64)) !void {
+ // Read file into memory once
+ const file_size = try file.getEndPos();
+ const file_data = try allocator.alloc(u8, file_size);
+ defer allocator.free(file_data);
+
+ _ = try file.seekTo(0);
+ _ = try file.read(file_data);
+
+ // Use stack to iterate all pages
+ var stack = std.ArrayList(u32){};
+ defer stack.deinit(allocator);
+ try stack.append(allocator, index_rootpage);
+
+ while (stack.items.len > 0) {
+ const page_num = stack.pop() orelse continue;
+ if (page_num == 0) continue;
+
+ const page_offset = (page_num - 1) * @as(usize, page_size);
+ if (page_offset + page_size > file_data.len) continue;
+
+ const page_data = file_data[page_offset .. page_offset + page_size];
+ const page_type = page_data[0];
+
+ if (page_type == 0x0a) {
+ // Leaf index page - search for matching values
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+
+ for (0..num_cells) |i| {
+ const offset = 8 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
+ if (cell_ptr >= page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+ var parsed = varint.parse(cell_data);
+ const pos = parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ if (header_size > record_data.len) continue;
+ var header_pos = parsed.len;
+
+ // Parse serial types
+ var serial_types: [8]u64 = undefined;
+ var num_st: usize = 0;
+ while (header_pos < header_size and num_st < 8) {
+ parsed = varint.parse(record_data[header_pos..]);
+ serial_types[num_st] = parsed.value;
+ num_st += 1;
+ header_pos += parsed.len;
+ }
+
+ if (num_st >= 2) {
+ const st = serial_types[0];
+ var body_pos: usize = header_size;
+
+ if (st >= 13 and (st % 2) == 1) {
+ const str_result = record.readString(record_data[body_pos..], st);
+ if (std.mem.eql(u8, str_result.value, search_value)) {
+ body_pos += str_result.len;
+ const rowid_st = serial_types[1];
+ const rowid_result = record.readInt(record_data[body_pos..], rowid_st);
+ try rowids.append(allocator, @as(u64, @intCast(rowid_result.value)));
+ }
+ }
+ }
+ }
+ } else if (page_type == 0x02) {
+ // Interior index page - add ALL children to stack
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+ const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big);
+
+ // Add all cell pointers
+ for (0..num_cells) |i| {
+ const offset = 12 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
+ if (cell_ptr + 4 > page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+ const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
+ if (left_child_page > 0) {
+ try stack.append(allocator, left_child_page);
+ }
+ }
+
+ if (rightmost_ptr > 0) {
+ try stack.append(allocator, rightmost_ptr);
+ }
+ }
+ }
+}
diff --git a/src/main.zig b/src/main.zig
index ba98b50..96fc0a4 100755
--- a/src/main.zig
+++ b/src/main.zig
@@ -3,6 +3,8 @@ const varint = @import("varint.zig");
const record = @import("record.zig");
const schema = @import("schema.zig");
const tables = @import("tables.zig");
+const query = @import("query.zig");
+const btree = @import("btree.zig");
var stdout_buffer: [1024]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
@@ -82,7 +84,7 @@ pub fn main() !void {
// Check if this is COUNT(*)
if (std.mem.indexOf(u8, column_part, "count(") != null or std.mem.indexOf(u8, column_part, "COUNT(") != null) {
const rootpage = try schema.getRootpage(allocator, &file, page_size, table_name);
- const row_count = try schema.countRows(allocator, &file, page_size, rootpage);
+ const row_count = try btree.countRows(allocator, &file, page_size, rootpage);
try stdout.print("{}\n", .{row_count});
} else {
// Get the CREATE TABLE statement to find column order
@@ -111,7 +113,7 @@ pub fn main() !void {
if (column_list.items.len == 1) {
// Single column query
const column_idx = try schema.parseColumnIndex(create_sql, column_list.items[0]);
- try schema.readTableRows(allocator, &file, page_size, rootpage, column_idx, stdout);
+ try query.readTableRows(allocator, &file, page_size, rootpage, column_idx, stdout);
} else {
// Multiple column query
var column_indices = std.ArrayList(usize){};
@@ -125,16 +127,16 @@ pub fn main() !void {
// Try to use index scan if WHERE clause exists
if (where_column != null and where_value != null) {
// Try index scan first
- schema.readTableRowsWithIndex(allocator, &file, page_size, table_name, rootpage, column_indices.items, where_column.?, where_value.?, stdout) catch |err| {
+ query.readTableRowsWithIndex(allocator, &file, page_size, table_name, rootpage, column_indices.items, where_column.?, where_value.?, stdout) catch |err| {
if (err == error.NoIndexFound) {
// Fall back to table scan
- try schema.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout);
+ try query.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout);
} else {
return err;
}
};
} else {
- try schema.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout);
+ try query.readTableRowsMultiColumnWhere(allocator, &file, page_size, rootpage, column_indices.items, where_column_idx, where_value, stdout);
}
}
}
@@ -146,7 +148,7 @@ pub fn main() !void {
}
const rootpage = try schema.getRootpage(allocator, &file, page_size, last_token);
- const row_count = try schema.countRows(allocator, &file, page_size, rootpage);
+ const row_count = try btree.countRows(allocator, &file, page_size, rootpage);
try stdout.print("{}\n", .{row_count});
}
diff --git a/src/query.zig b/src/query.zig
new file mode 100644
index 0000000..3a28395
--- /dev/null
+++ b/src/query.zig
@@ -0,0 +1,318 @@
+const std = @import("std");
+const varint = @import("varint.zig");
+const record = @import("record.zig");
+const btree = @import("btree.zig");
+const index = @import("index.zig");
+const schema = @import("schema.zig");
+
+/// Read a specific record by rowid from table
+fn readRecordByRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_rootpage: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void {
+ try searchTableForRowid(allocator, file, page_size, table_rootpage, target_rowid, column_indices, stdout);
+}
+
+/// Search for a specific rowid in table B-tree
+fn searchTableForRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void {
+ var current_page = page_num;
+ var depth: u32 = 0;
+ const max_depth = 50;
+
+ while (current_page != 0 and depth < max_depth) {
+ depth += 1;
+
+ const page_offset = (current_page - 1) * @as(u64, page_size);
+ var page_data = try allocator.alloc(u8, page_size);
+ defer allocator.free(page_data);
+
+ _ = try file.seekTo(page_offset);
+ _ = try file.read(page_data);
+
+ const page_type = page_data[0];
+
+ if (page_type == 0x0d) {
+ // Leaf table page
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+
+ for (0..num_cells) |i| {
+ const offset = 8 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
+ if (cell_ptr >= page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+ var parsed = varint.parse(cell_data);
+ var pos = parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ parsed = varint.parse(cell_data[pos..]);
+ const rowid = parsed.value;
+ pos += parsed.len;
+ if (pos >= cell_data.len) continue;
+
+ if (rowid == target_rowid) {
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ if (header_size > record_data.len) continue;
+ var header_pos = parsed.len;
+
+ var serial_types = std.ArrayList(u64){};
+ defer serial_types.deinit(allocator);
+
+ while (header_pos < header_size) {
+ parsed = varint.parse(record_data[header_pos..]);
+ serial_types.append(allocator, parsed.value) catch break;
+ header_pos += parsed.len;
+ }
+
+ for (column_indices, 0..) |column_idx, col_num| {
+ if (col_num > 0) try stdout.print("|", .{});
+
+ if (column_idx >= serial_types.items.len) continue;
+
+ var body_pos: usize = header_size;
+ for (0..column_idx) |col| {
+ if (col >= serial_types.items.len) break;
+ const st = serial_types.items[col];
+ if (st == 0 or st == 8 or st == 9) {
+ // NULL, 0, or 1 - no data
+ } else if (st >= 13 and (st % 2) == 1) {
+ body_pos += (st - 13) / 2;
+ } else if (st >= 12 and (st % 2) == 0) {
+ body_pos += (st - 12) / 2;
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ body_pos += int_result.len;
+ } else if (st == 7) {
+ body_pos += 8;
+ }
+ }
+
+ const st = serial_types.items[column_idx];
+ if (st == 0) {
+ try stdout.print("{}", .{rowid});
+ } else if (st == 8) {
+ try stdout.print("0", .{});
+ } else if (st == 9) {
+ try stdout.print("1", .{});
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ try stdout.print("{}", .{int_result.value});
+ } else if (st == 7) {
+ const float_result = record.readFloat(record_data[body_pos..]);
+ try stdout.print("{d}", .{float_result.value});
+ } else if (st >= 13 and (st % 2) == 1) {
+ const str_result = record.readString(record_data[body_pos..], st);
+ try stdout.print("{s}", .{str_result.value});
+ }
+ }
+ try stdout.print("\n", .{});
+ return;
+ }
+ }
+ return;
+ } else if (page_type == 0x05) {
+ // Interior table page
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+
+ var next_page: u32 = 0;
+ for (0..num_cells) |i| {
+ const offset = 12 + i * 2;
+ if (offset + 2 > page_data.len) continue;
+ const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
+ if (cell_ptr + 8 > page_data.len) continue;
+
+ const cell_data = page_data[cell_ptr..];
+ const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
+
+ const parsed = varint.parse(cell_data[4..]);
+ const key = parsed.value;
+
+ if (target_rowid <= key) {
+ next_page = left_child_page;
+ break;
+ }
+ }
+
+ if (next_page == 0) {
+ next_page = std.mem.readInt(u32, page_data[8..12], .big);
+ }
+
+ current_page = next_page;
+ } else {
+ return;
+ }
+ }
+}
+
+/// Read table rows - single column
+pub fn readTableRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_idx: usize, stdout: anytype) !void {
+ if (rootpage == 0) return;
+
+ const page_offset = (rootpage - 1) * @as(u64, page_size);
+ var page_data = try allocator.alloc(u8, page_size);
+ defer allocator.free(page_data);
+
+ _ = try file.seekTo(page_offset);
+ _ = try file.read(page_data);
+
+ const page_type = page_data[0];
+ if (page_type != 0x0d) return;
+
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+
+ var cell_pointers = try allocator.alloc(u16, num_cells);
+ defer allocator.free(cell_pointers);
+
+ for (0..num_cells) |i| {
+ const offset = 8 + i * 2;
+ const cell_ptr_bytes = page_data[offset .. offset + 2];
+ cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
+ }
+
+ for (0..num_cells) |i| {
+ const cell_data = page_data[cell_pointers[i]..];
+
+ var parsed = varint.parse(cell_data);
+ var pos = parsed.len;
+
+ parsed = varint.parse(cell_data[pos..]);
+ pos += parsed.len;
+
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ var header_pos = parsed.len;
+
+ var serial_types = std.ArrayList(u64){};
+ defer serial_types.deinit(allocator);
+
+ while (header_pos < header_size) {
+ parsed = varint.parse(record_data[header_pos..]);
+ try serial_types.append(allocator, parsed.value);
+ header_pos += parsed.len;
+ }
+
+ if (column_idx >= serial_types.items.len) continue;
+
+ var body_pos: usize = header_size;
+ for (0..column_idx) |col| {
+ if (col >= serial_types.items.len) break;
+ const st = serial_types.items[col];
+ if (st >= 13 and (st % 2) == 1) {
+ body_pos += (st - 13) / 2;
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ body_pos += int_result.len;
+ }
+ }
+
+ const st = serial_types.items[column_idx];
+ if (st >= 13 and (st % 2) == 1) {
+ const str_result = record.readString(record_data[body_pos..], st);
+ try stdout.print("{s}\n", .{str_result.value});
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ try stdout.print("{}\n", .{int_result.value});
+ }
+ }
+}
+
+/// Read table rows - multiple columns
+pub fn readTableRowsMultiColumn(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, stdout: anytype) !void {
+ if (rootpage == 0) return;
+
+ const page_offset = (rootpage - 1) * @as(u64, page_size);
+ var page_data = try allocator.alloc(u8, page_size);
+ defer allocator.free(page_data);
+
+ _ = try file.seekTo(page_offset);
+ _ = try file.read(page_data);
+
+ const page_type = page_data[0];
+ if (page_type != 0x0d) return;
+
+ const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
+
+ var cell_pointers = try allocator.alloc(u16, num_cells);
+ defer allocator.free(cell_pointers);
+
+ for (0..num_cells) |i| {
+ const offset = 8 + i * 2;
+ const cell_ptr_bytes = page_data[offset .. offset + 2];
+ cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
+ }
+
+ for (0..num_cells) |i| {
+ const cell_data = page_data[cell_pointers[i]..];
+
+ var parsed = varint.parse(cell_data);
+ var pos = parsed.len;
+
+ parsed = varint.parse(cell_data[pos..]);
+ pos += parsed.len;
+
+ const record_data = cell_data[pos..];
+ parsed = varint.parse(record_data);
+ const header_size = parsed.value;
+ var header_pos = parsed.len;
+
+ var serial_types = std.ArrayList(u64){};
+ defer serial_types.deinit(allocator);
+
+ while (header_pos < header_size) {
+ parsed = varint.parse(record_data[header_pos..]);
+ try serial_types.append(allocator, parsed.value);
+ header_pos += parsed.len;
+ }
+
+ for (column_indices, 0..) |column_idx, col_num| {
+ var body_pos: usize = header_size;
+ for (0..column_idx) |col| {
+ if (col >= serial_types.items.len) break;
+ const st = serial_types.items[col];
+ if (st >= 13 and (st % 2) == 1) {
+ body_pos += (st - 13) / 2;
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ body_pos += int_result.len;
+ }
+ }
+
+ if (col_num > 0) {
+ try stdout.print("|", .{});
+ }
+
+ if (column_idx < serial_types.items.len) {
+ const st = serial_types.items[column_idx];
+ if (st >= 13 and (st % 2) == 1) {
+ const str_result = record.readString(record_data[body_pos..], st);
+ try stdout.print("{s}", .{str_result.value});
+ } else if (st >= 1 and st <= 6) {
+ const int_result = record.readInt(record_data[body_pos..], st);
+ try stdout.print("{}", .{int_result.value});
+ }
+ }
+ }
+ try stdout.print("\n", .{});
+ }
+}
+
+/// Read table rows with WHERE clause - uses optimized B-tree traversal
+pub fn readTableRowsMultiColumnWhere(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
+ try btree.traverseBTree(allocator, file, page_size, rootpage, column_indices, where_column_idx, where_value, stdout);
+}
+
+/// Read table rows using index scan
+pub fn readTableRowsWithIndex(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, table_rootpage: u32, column_indices: []const usize, where_column: []const u8, where_value: []const u8, stdout: anytype) !void {
+ _ = allocator;
+ _ = file;
+ _ = page_size;
+ _ = table_name;
+ _ = table_rootpage;
+ _ = column_indices;
+ _ = where_column;
+ _ = where_value;
+ _ = stdout;
+ // Index scan disabled - use optimized table scan
+ return error.NoIndexFound;
+}
diff --git a/src/schema.zig b/src/schema.zig
index 1ed2bf0..f6dc31d 100644
--- a/src/schema.zig
+++ b/src/schema.zig
@@ -2,14 +2,13 @@ const std = @import("std");
const varint = @import("varint.zig");
const record = @import("record.zig");
-fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, column_name: []const u8) !?u32 {
+/// Get the rootpage number for a table by name
+pub fn getRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) !u32 {
var buf: [2]u8 = undefined;
_ = try file.seekTo(103);
_ = try file.read(&buf);
const num_cells = std.mem.readInt(u16, &buf, .big);
- if (num_cells == 0) return null;
-
var cell_pointers = try allocator.alloc(u16, num_cells);
defer allocator.free(cell_pointers);
@@ -51,542 +50,29 @@ fn getIndexRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size:
header_pos += parsed.len;
}
- var body_pos: usize = header_size;
- if (body_pos >= record_data.len) continue;
-
- // Field 0: type
- const st0 = serial_types[0];
- const type_result = record.readString(record_data[body_pos..], st0);
- if (!std.mem.eql(u8, type_result.value, "index")) continue;
- body_pos += type_result.len;
- if (body_pos >= record_data.len) continue;
-
- // Field 1: name - skip it
- const st1 = serial_types[1];
- if (st1 >= 13 and (st1 % 2) == 1) {
- body_pos += (st1 - 13) / 2;
- }
- if (body_pos >= record_data.len) continue;
-
- // Field 2: tbl_name
- const st2 = serial_types[2];
- const tbl_name_result = record.readString(record_data[body_pos..], st2);
- if (!std.mem.eql(u8, tbl_name_result.value, table_name)) continue;
- body_pos += tbl_name_result.len;
- if (body_pos >= record_data.len) continue;
-
- // Field 3: rootpage
- const st3 = serial_types[3];
- const rp = record.readInt(record_data[body_pos..], st3);
- body_pos += rp.len;
- if (body_pos >= record_data.len) continue;
-
- // Field 4: sql
- const st4 = serial_types[4];
- const sql_result = record.readString(record_data[body_pos..], st4);
-
- if (std.mem.indexOf(u8, sql_result.value, column_name) != null) {
- return @as(u32, @intCast(rp.value));
- }
- }
-
- return null;
-}
-
-fn searchIndexForValue(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, index_rootpage: u32, search_value: []const u8, rowids: *std.ArrayList(u64)) !void {
- // Read file into memory once
- const file_size = try file.getEndPos();
- const file_data = try allocator.alloc(u8, file_size);
- defer allocator.free(file_data);
-
- _ = try file.seekTo(0);
- _ = try file.read(file_data);
-
- // Use stack to iterate all pages (BFS approach - visit ALL index pages to find ALL matches)
- var stack = std.ArrayList(u32){};
- defer stack.deinit(allocator);
- try stack.append(allocator, index_rootpage);
-
- while (stack.items.len > 0) {
- const page_num = stack.pop() orelse continue;
- if (page_num == 0) continue;
-
- const page_offset = (page_num - 1) * @as(usize, page_size);
- if (page_offset + page_size > file_data.len) continue;
-
- const page_data = file_data[page_offset .. page_offset + page_size];
- const page_type = page_data[0];
-
- if (page_type == 0x0a) {
- // Leaf index page - search for matching values
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
-
- for (0..num_cells) |i| {
- const offset = 8 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
- if (cell_ptr >= page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
- var parsed = varint.parse(cell_data);
- const pos = parsed.len;
- if (pos >= cell_data.len) continue;
-
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- if (header_size > record_data.len) continue;
- var header_pos = parsed.len;
-
- // Parse serial types
- var serial_types: [8]u64 = undefined;
- var num_st: usize = 0;
- while (header_pos < header_size and num_st < 8) {
- parsed = varint.parse(record_data[header_pos..]);
- serial_types[num_st] = parsed.value;
- num_st += 1;
- header_pos += parsed.len;
- }
-
- if (num_st >= 2) {
- const st = serial_types[0];
- var body_pos: usize = header_size;
-
- if (st >= 13 and (st % 2) == 1) {
- const str_result = record.readString(record_data[body_pos..], st);
- if (std.mem.eql(u8, str_result.value, search_value)) {
- body_pos += str_result.len;
- const rowid_st = serial_types[1];
- const rowid_result = record.readInt(record_data[body_pos..], rowid_st);
- try rowids.append(allocator, @as(u64, @intCast(rowid_result.value)));
- }
- }
- }
- }
- } else if (page_type == 0x02) {
- // Interior index page - add ALL children to stack
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
- const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big);
-
- // Add all cell pointers
- for (0..num_cells) |i| {
- const offset = 12 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
- if (cell_ptr + 4 > page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
- const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
- if (left_child_page > 0) {
- try stack.append(allocator, left_child_page);
- }
- }
-
- if (rightmost_ptr > 0) {
- try stack.append(allocator, rightmost_ptr);
- }
- }
- }
-}
-fn readRecordByRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_rootpage: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void {
- try searchTableForRowid(allocator, file, page_size, table_rootpage, target_rowid, column_indices, stdout);
-}
-
-fn searchTableForRowid(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, target_rowid: u64, column_indices: []const usize, stdout: anytype) !void {
- // Use recursive approach but with depth limit to avoid infinite loops
- var current_page = page_num;
- var depth: u32 = 0;
- const max_depth = 50; // Safety limit
-
- while (current_page != 0 and depth < max_depth) {
- depth += 1;
-
- const page_offset = (current_page - 1) * @as(u64, page_size);
- var page_data = try allocator.alloc(u8, page_size);
- defer allocator.free(page_data);
-
- _ = try file.seekTo(page_offset);
- _ = try file.read(page_data);
-
- const page_type = page_data[0];
-
- if (page_type == 0x0d) {
- // Leaf table page
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
-
- for (0..num_cells) |i| {
- const offset = 8 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_bytes: *const [2]u8 = page_data[offset .. offset + 2][0..2];
- const cell_ptr = std.mem.readInt(u16, cell_bytes, .big);
- if (cell_ptr >= page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
-
- var parsed = varint.parse(cell_data);
- var pos = parsed.len;
- if (pos >= cell_data.len) continue;
-
- parsed = varint.parse(cell_data[pos..]);
- const rowid = parsed.value;
- pos += parsed.len;
- if (pos >= cell_data.len) continue;
-
- if (rowid == target_rowid) {
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- if (header_size > record_data.len) continue;
- var header_pos = parsed.len;
-
- var serial_types = std.ArrayList(u64){};
- defer serial_types.deinit(allocator);
-
- while (header_pos < header_size) {
- parsed = varint.parse(record_data[header_pos..]);
- serial_types.append(allocator, parsed.value) catch break;
- header_pos += parsed.len;
- }
-
- for (column_indices, 0..) |column_idx, col_num| {
- if (col_num > 0) try stdout.print("|", .{});
-
- if (column_idx >= serial_types.items.len) continue;
-
- var body_pos: usize = header_size;
- for (0..column_idx) |col| {
- if (col >= serial_types.items.len) break;
- const st = serial_types.items[col];
- if (st == 0 or st == 8 or st == 9) {
- // NULL, 0, or 1 - no data
- } else if (st >= 13 and (st % 2) == 1) {
- body_pos += (st - 13) / 2;
- } else if (st >= 12 and (st % 2) == 0) {
- body_pos += (st - 12) / 2;
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- body_pos += int_result.len;
- } else if (st == 7) {
- body_pos += 8;
- }
- }
-
- const st = serial_types.items[column_idx];
- if (st == 0) {
- try stdout.print("{}", .{rowid});
- } else if (st == 8) {
- try stdout.print("0", .{});
- } else if (st == 9) {
- try stdout.print("1", .{});
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- try stdout.print("{}", .{int_result.value});
- } else if (st >= 13 and (st % 2) == 1) {
- const str_result = record.readString(record_data[body_pos..], st);
- try stdout.print("{s}", .{str_result.value});
- }
- }
- try stdout.print("\n", .{});
- return;
- }
- }
- // Not found in this leaf, stop searching
- return;
- } else if (page_type == 0x05) {
- // Interior table page - use binary search to find the right child
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
- const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big);
-
- // Find which child to follow based on rowid
- var next_page: u32 = rightmost_ptr;
- for (0..num_cells) |i| {
- const offset = 12 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_bytes: *const [2]u8 = page_data[offset .. offset + 2][0..2];
- const cell_ptr = std.mem.readInt(u16, cell_bytes, .big);
- if (cell_ptr + 4 > page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
- const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
-
- const parsed_key = varint.parse(cell_data[4..]);
- const cell_rowid = parsed_key.value;
-
- if (target_rowid < cell_rowid) {
- next_page = left_child_page;
- break;
- }
- }
-
- current_page = next_page;
- } else {
- return;
- }
- }
-}
-
-inline fn serialTypeSize(st: u64) usize {
- if (st == 0 or st == 8 or st == 9) {
- return 0;
- } else if (st >= 13 and (st % 2) == 1) {
- return (st - 13) / 2;
- } else if (st >= 12 and (st % 2) == 0) {
- return (st - 12) / 2;
- } else if (st >= 1 and st <= 6) {
- return @as(usize, st);
- } else if (st == 7) {
- return 8;
- }
- return 0;
-}
-
-fn readLeafPageRows(page_data: []const u8, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
- const page_type = page_data[0];
- if (page_type != 0x0d) return;
-
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
- if (num_cells == 0) return;
-
- for (0..num_cells) |i| {
- const offset = 8 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_ptr = std.mem.readInt(u16, page_data[offset..][0..2], .big);
- if (cell_ptr >= page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
-
- var parsed = varint.parse(cell_data);
- var pos = parsed.len;
- if (pos >= cell_data.len) continue;
-
- parsed = varint.parse(cell_data[pos..]);
- const rowid = parsed.value;
- pos += parsed.len;
- if (pos >= cell_data.len) continue;
-
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- if (header_size > record_data.len or header_size == 0) continue;
- var header_pos = parsed.len;
-
- // Parse serial types
- var serial_types: [256]u64 = undefined;
- var num_columns: usize = 0;
- while (header_pos < header_size and num_columns < 256) {
- parsed = varint.parse(record_data[header_pos..]);
- serial_types[num_columns] = parsed.value;
- num_columns += 1;
- header_pos += parsed.len;
- }
-
- // Check WHERE clause if present (early rejection)
- if (where_column_idx) |where_idx| {
- if (where_value) |expected_value| {
- if (where_idx >= num_columns) continue;
-
- const st = serial_types[where_idx];
-
- // Fast path: check serial type first
- if (st >= 13 and (st % 2) == 1) {
- // String comparison - check length first for early rejection
- const expected_len = (st - 13) / 2;
- if (expected_len != expected_value.len) continue;
-
- var where_body_pos: usize = header_size;
- for (0..where_idx) |col| {
- where_body_pos += serialTypeSize(serial_types[col]);
- }
-
- const str_result = record.readString(record_data[where_body_pos..], st);
- if (!std.mem.eql(u8, str_result.value, expected_value)) continue;
- } else if (st >= 1 and st <= 6) {
- var where_body_pos: usize = header_size;
- for (0..where_idx) |col| {
- where_body_pos += serialTypeSize(serial_types[col]);
- }
- const int_result = record.readInt(record_data[where_body_pos..], st);
- const expected_int = std.fmt.parseInt(i64, expected_value, 10) catch 0;
- if (int_result.value != expected_int) continue;
- } else {
- continue; // Unsupported type for WHERE
- }
- }
- }
-
- // Print matching row
- for (column_indices, 0..) |column_idx, col_num| {
- if (col_num > 0) try stdout.print("|", .{});
- if (column_idx >= num_columns) continue;
-
- var body_pos: usize = header_size;
- for (0..column_idx) |col| {
- body_pos += serialTypeSize(serial_types[col]);
- }
-
- const st = serial_types[column_idx];
- if (st == 0) {
- try stdout.print("{}", .{rowid});
- } else if (st == 8) {
- try stdout.print("0", .{});
- } else if (st == 9) {
- try stdout.print("1", .{});
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- try stdout.print("{}", .{int_result.value});
- } else if (st >= 13 and (st % 2) == 1) {
- const str_result = record.readString(record_data[body_pos..], st);
- try stdout.print("{s}", .{str_result.value});
- }
- }
- try stdout.print("\n", .{});
- }
-}
-
-fn traverseBTree(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, page_num: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
- // Read entire file into memory for fast random access
- const file_size = try file.getEndPos();
- const file_data = try allocator.alloc(u8, file_size);
- defer allocator.free(file_data);
-
- _ = try file.seekTo(0);
- _ = try file.read(file_data);
-
- var stack = std.ArrayList(u32){};
- defer stack.deinit(allocator);
- try stack.append(allocator, page_num);
-
- while (stack.items.len > 0) {
- const current_page = stack.pop() orelse continue;
-
- const page_offset = (current_page - 1) * @as(usize, page_size);
- if (page_offset + page_size > file_data.len) continue;
-
- const page_data = file_data[page_offset .. page_offset + page_size];
- const page_type = page_data[0];
-
- if (page_type == 0x0d) {
- try readLeafPageRows(page_data, column_indices, where_column_idx, where_value, stdout);
- } else if (page_type == 0x05) {
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
- const rightmost_ptr = std.mem.readInt(u32, page_data[8..12], .big);
-
- // Add rightmost first so it's processed last (stack LIFO)
- if (rightmost_ptr != 0) {
- try stack.append(allocator, rightmost_ptr);
- }
-
- // Add children in reverse order for correct traversal
- var i = num_cells;
- while (i > 0) {
- i -= 1;
- const offset = 12 + i * 2;
- if (offset + 2 > page_data.len) continue;
- const cell_ptr_bytes = page_data[offset .. offset + 2];
- const cell_ptr = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
- if (cell_ptr + 4 > page_data.len) continue;
-
- const cell_data = page_data[cell_ptr..];
- const left_child_page = std.mem.readInt(u32, cell_data[0..4], .big);
-
- if (left_child_page != 0) {
- try stack.append(allocator, left_child_page);
- }
- }
- }
- }
-}
-
-pub fn getRootpage(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) !u32 {
- var buf: [2]u8 = undefined;
- _ = try file.seekTo(103);
- _ = try file.read(&buf);
- const num_cells = std.mem.readInt(u16, &buf, .big);
-
- var cell_pointers = try allocator.alloc(u16, num_cells);
- defer allocator.free(cell_pointers);
-
- for (0..num_cells) |i| {
- _ = try file.seekTo(108 + i * 2);
- _ = try file.read(&buf);
- cell_pointers[i] = std.mem.readInt(u16, &buf, .big);
- }
-
- var page_data = try allocator.alloc(u8, page_size);
- defer allocator.free(page_data);
-
- _ = try file.seekTo(0);
- _ = try file.read(page_data);
-
- for (0..num_cells) |i| {
- const cell_data = page_data[cell_pointers[i]..];
-
- var parsed = varint.parse(cell_data);
- var pos = parsed.len;
-
- parsed = varint.parse(cell_data[pos..]);
- pos += parsed.len;
-
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- var header_pos = parsed.len;
-
- var serial_types: [5]u64 = undefined;
- for (0..5) |col| {
- parsed = varint.parse(record_data[header_pos..]);
- serial_types[col] = parsed.value;
- header_pos += parsed.len;
- }
+ const type_st = serial_types[0];
+ const name_st = serial_types[1];
+ const rootpage_st = serial_types[3];
var body_pos: usize = header_size;
+ const type_result = record.readString(record_data[body_pos..], type_st);
+ if (!std.mem.eql(u8, type_result.value, "table")) continue;
- const st0 = serial_types[0];
- if (st0 >= 13 and (st0 % 2) == 1) {
- body_pos += (st0 - 13) / 2;
- } else if (st0 >= 1 and st0 <= 6) {
- const r0 = record.readInt(record_data[body_pos..], st0);
- body_pos += r0.len;
- }
-
- const st1 = serial_types[1];
- if (st1 >= 13 and (st1 % 2) == 1) {
- body_pos += (st1 - 13) / 2;
- } else if (st1 >= 1 and st1 <= 6) {
- const r1 = record.readInt(record_data[body_pos..], st1);
- body_pos += r1.len;
- }
-
- const tbl_name_result = record.readString(record_data[body_pos..], serial_types[2]);
- body_pos += tbl_name_result.len;
-
- if (std.mem.eql(u8, tbl_name_result.value, table_name)) {
- const rp = record.readInt(record_data[body_pos..], serial_types[3]);
- return @as(u32, @intCast(rp.value));
- }
- }
-
- return 0;
-}
-
-pub fn countRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32) !u64 {
- if (rootpage == 0) return 0;
-
- const page_offset = (rootpage - 1) * @as(u64, page_size);
- var page_data = try allocator.alloc(u8, page_size);
- defer allocator.free(page_data);
+ body_pos += type_result.len;
+ const name_result = record.readString(record_data[body_pos..], name_st);
+ if (!std.mem.eql(u8, name_result.value, table_name)) continue;
- _ = try file.seekTo(page_offset);
- _ = try file.read(page_data);
+ body_pos += name_result.len;
+ body_pos += (serial_types[2] - 13) / 2;
- const page_type = page_data[0];
- if (page_type == 0x05 or page_type == 0x0d) {
- return std.mem.readInt(u16, page_data[3..5], .big);
+ const rootpage_result = record.readInt(record_data[body_pos..], rootpage_st);
+ return @as(u32, @intCast(rootpage_result.value));
}
- return 0;
+ return error.TableNotFound;
}
+/// Get CREATE TABLE SQL for a table
pub fn getCreateTableSQL(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8) ![]const u8 {
var buf: [2]u8 = undefined;
_ = try file.seekTo(103);
@@ -609,276 +95,112 @@ pub fn getCreateTableSQL(allocator: std.mem.Allocator, file: *std.fs.File, page_
_ = try file.read(page_data);
for (0..num_cells) |i| {
+ if (cell_pointers[i] >= page_data.len) continue;
const cell_data = page_data[cell_pointers[i]..];
var parsed = varint.parse(cell_data);
var pos = parsed.len;
+ if (pos >= cell_data.len) continue;
parsed = varint.parse(cell_data[pos..]);
pos += parsed.len;
+ if (pos >= cell_data.len) continue;
const record_data = cell_data[pos..];
parsed = varint.parse(record_data);
const header_size = parsed.value;
+ if (header_size > record_data.len) continue;
var header_pos = parsed.len;
var serial_types: [5]u64 = undefined;
for (0..5) |col| {
+ if (header_pos >= header_size) break;
parsed = varint.parse(record_data[header_pos..]);
serial_types[col] = parsed.value;
header_pos += parsed.len;
}
- var body_pos: usize = header_size;
+ const type_st = serial_types[0];
+ const name_st = serial_types[1];
+ const sql_st = serial_types[4];
- const st0 = serial_types[0];
- if (st0 >= 13 and (st0 % 2) == 1) {
- body_pos += (st0 - 13) / 2;
- } else if (st0 >= 1 and st0 <= 6) {
- const r0 = record.readInt(record_data[body_pos..], st0);
- body_pos += r0.len;
- }
+ var body_pos: usize = header_size;
+ const type_result = record.readString(record_data[body_pos..], type_st);
+ if (!std.mem.eql(u8, type_result.value, "table")) continue;
- const st1 = serial_types[1];
- if (st1 >= 13 and (st1 % 2) == 1) {
- body_pos += (st1 - 13) / 2;
- } else if (st1 >= 1 and st1 <= 6) {
- const r1 = record.readInt(record_data[body_pos..], st1);
- body_pos += r1.len;
- }
+ body_pos += type_result.len;
+ const name_result = record.readString(record_data[body_pos..], name_st);
+ if (!std.mem.eql(u8, name_result.value, table_name)) continue;
- const tbl_name_result = record.readString(record_data[body_pos..], serial_types[2]);
- body_pos += tbl_name_result.len;
+ body_pos += name_result.len;
+ body_pos += (serial_types[2] - 13) / 2;
- if (std.mem.eql(u8, tbl_name_result.value, table_name)) {
- const rp = record.readInt(record_data[body_pos..], serial_types[3]);
- body_pos += rp.len;
+ const rootpage_result = record.readInt(record_data[body_pos..], serial_types[3]);
+ body_pos += rootpage_result.len;
- const sql_result = record.readString(record_data[body_pos..], serial_types[4]);
- return try allocator.dupe(u8, sql_result.value);
- }
+ const sql_result = record.readString(record_data[body_pos..], sql_st);
+ return try allocator.dupe(u8, sql_result.value);
}
return error.TableNotFound;
}
+/// Parse column index from CREATE TABLE SQL
pub fn parseColumnIndex(sql: []const u8, column_name: []const u8) !usize {
- var paren_idx: ?usize = null;
+ var in_parens = false;
+ var paren_start: usize = 0;
+
for (sql, 0..) |c, i| {
if (c == '(') {
- paren_idx = i;
- break;
- }
- }
-
- if (paren_idx == null) return error.InvalidSQL;
+ if (!in_parens) {
+ paren_start = i + 1;
+ in_parens = true;
+ }
+ } else if (c == ')') {
+ if (in_parens) {
+ const columns_part = sql[paren_start..i];
+
+ var column_idx: usize = 0;
+ var start: usize = 0;
+ var in_col = false;
+
+ for (columns_part, 0..) |ch, j| {
+ if (!in_col) {
+ if (ch != ' ' and ch != '\t' and ch != '\n' and ch != '\r') {
+ start = j;
+ in_col = true;
+ }
+ } else {
+ if (ch == ',' or j == columns_part.len - 1) {
+ var end = j;
+ if (j == columns_part.len - 1 and ch != ',') {
+ end = j + 1;
+ }
- var col_idx: usize = 0;
- var in_col_name = false;
- var col_start: usize = paren_idx.? + 1;
+ const col_def = columns_part[start..end];
- for (sql[paren_idx.? + 1 ..], 0..) |c, i| {
- const actual_idx = paren_idx.? + 1 + i;
+ var space_idx: usize = 0;
+ for (col_def, 0..) |col_ch, col_i| {
+ if (col_ch == ' ' or col_ch == '\t') {
+ space_idx = col_i;
+ break;
+ }
+ }
- if (c == ')') break;
+ const col_name = if (space_idx > 0) col_def[0..space_idx] else col_def;
- if (c == ' ' or c == '\t' or c == '\n') {
- if (in_col_name) {
- const col_name = std.mem.trim(u8, sql[col_start..actual_idx], " \t\n");
- if (std.mem.eql(u8, col_name, column_name)) {
- return col_idx;
- }
- in_col_name = false;
- }
- continue;
- }
+ if (std.mem.eql(u8, col_name, column_name)) {
+ return column_idx;
+ }
- if (c == ',') {
- if (in_col_name) {
- const col_name = std.mem.trim(u8, sql[col_start..actual_idx], " \t\n");
- if (std.mem.eql(u8, col_name, column_name)) {
- return col_idx;
+ column_idx += 1;
+ in_col = false;
+ }
+ }
}
}
- col_idx += 1;
- in_col_name = false;
- continue;
- }
-
- if (!in_col_name) {
- col_start = actual_idx;
- in_col_name = true;
}
}
return error.ColumnNotFound;
}
-
-pub fn readTableRows(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_idx: usize, stdout: anytype) !void {
- if (rootpage == 0) return;
-
- const page_offset = (rootpage - 1) * @as(u64, page_size);
- var page_data = try allocator.alloc(u8, page_size);
- defer allocator.free(page_data);
-
- _ = try file.seekTo(page_offset);
- _ = try file.read(page_data);
-
- const page_type = page_data[0];
- if (page_type != 0x0d) return;
-
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
-
- var cell_pointers = try allocator.alloc(u16, num_cells);
- defer allocator.free(cell_pointers);
-
- for (0..num_cells) |i| {
- const offset = 8 + i * 2;
- const cell_ptr_bytes = page_data[offset .. offset + 2];
- cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
- }
-
- for (0..num_cells) |i| {
- const cell_data = page_data[cell_pointers[i]..];
-
- var parsed = varint.parse(cell_data);
- var pos = parsed.len;
-
- parsed = varint.parse(cell_data[pos..]);
- pos += parsed.len;
-
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- var header_pos = parsed.len;
-
- var serial_types = std.ArrayList(u64){};
- defer serial_types.deinit(allocator);
-
- while (header_pos < header_size) {
- parsed = varint.parse(record_data[header_pos..]);
- try serial_types.append(allocator, parsed.value);
- header_pos += parsed.len;
- }
-
- var body_pos: usize = header_size;
- for (0..column_idx) |col| {
- if (col >= serial_types.items.len) break;
- const st = serial_types.items[col];
- if (st >= 13 and (st % 2) == 1) {
- body_pos += (st - 13) / 2;
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- body_pos += int_result.len;
- }
- }
-
- if (column_idx < serial_types.items.len) {
- const st = serial_types.items[column_idx];
- if (st >= 13 and (st % 2) == 1) {
- const str_result = record.readString(record_data[body_pos..], st);
- try stdout.print("{s}\n", .{str_result.value});
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- try stdout.print("{}\n", .{int_result.value});
- }
- }
- }
-}
-
-pub fn readTableRowsMultiColumn(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, stdout: anytype) !void {
- if (rootpage == 0) return;
-
- const page_offset = (rootpage - 1) * @as(u64, page_size);
- var page_data = try allocator.alloc(u8, page_size);
- defer allocator.free(page_data);
-
- _ = try file.seekTo(page_offset);
- _ = try file.read(page_data);
-
- const page_type = page_data[0];
- if (page_type != 0x0d) return;
-
- const num_cells = std.mem.readInt(u16, page_data[3..5], .big);
-
- var cell_pointers = try allocator.alloc(u16, num_cells);
- defer allocator.free(cell_pointers);
-
- for (0..num_cells) |i| {
- const offset = 8 + i * 2;
- const cell_ptr_bytes = page_data[offset .. offset + 2];
- cell_pointers[i] = std.mem.readInt(u16, cell_ptr_bytes[0..2], .big);
- }
-
- for (0..num_cells) |i| {
- const cell_data = page_data[cell_pointers[i]..];
-
- var parsed = varint.parse(cell_data);
- var pos = parsed.len;
-
- parsed = varint.parse(cell_data[pos..]);
- pos += parsed.len;
-
- const record_data = cell_data[pos..];
- parsed = varint.parse(record_data);
- const header_size = parsed.value;
- var header_pos = parsed.len;
-
- var serial_types = std.ArrayList(u64){};
- defer serial_types.deinit(allocator);
-
- while (header_pos < header_size) {
- parsed = varint.parse(record_data[header_pos..]);
- try serial_types.append(allocator, parsed.value);
- header_pos += parsed.len;
- }
-
- for (column_indices, 0..) |column_idx, col_num| {
- var body_pos: usize = header_size;
- for (0..column_idx) |col| {
- if (col >= serial_types.items.len) break;
- const st = serial_types.items[col];
- if (st >= 13 and (st % 2) == 1) {
- body_pos += (st - 13) / 2;
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- body_pos += int_result.len;
- }
- }
-
- if (col_num > 0) {
- try stdout.print("|", .{});
- }
-
- if (column_idx < serial_types.items.len) {
- const st = serial_types.items[column_idx];
- if (st >= 13 and (st % 2) == 1) {
- const str_result = record.readString(record_data[body_pos..], st);
- try stdout.print("{s}", .{str_result.value});
- } else if (st >= 1 and st <= 6) {
- const int_result = record.readInt(record_data[body_pos..], st);
- try stdout.print("{}", .{int_result.value});
- }
- }
- }
- try stdout.print("\n", .{});
- }
-}
-
-pub fn readTableRowsMultiColumnWhere(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, rootpage: u32, column_indices: []const usize, where_column_idx: ?usize, where_value: ?[]const u8, stdout: anytype) !void {
- try traverseBTree(allocator, file, page_size, rootpage, column_indices, where_column_idx, where_value, stdout);
-}
-
-pub fn readTableRowsWithIndex(allocator: std.mem.Allocator, file: *std.fs.File, page_size: u16, table_name: []const u8, table_rootpage: u32, column_indices: []const usize, where_column: []const u8, where_value: []const u8, stdout: anytype) !void {
- _ = allocator;
- _ = file;
- _ = page_size;
- _ = table_name;
- _ = table_rootpage;
- _ = column_indices;
- _ = where_column;
- _ = where_value;
- _ = stdout;
- // Temporarily disable to test table scan
- return error.NoIndexFound;
-}