diff --git a/db.c b/db.c index ce61718..52cd0db 100644 --- a/db.c +++ b/db.c @@ -67,29 +67,81 @@ const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE; const uint32_t PAGE_SIZE = 4096; const uint32_t TABLE_MAX_PAGES = 100; -const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; -const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; struct Pager_t { int file_descriptor; - uint32_t file_length; + uint32_t file_length; + uint32_t num_pages; void* pages[TABLE_MAX_PAGES]; }; typedef struct Pager_t Pager; struct Table_t { Pager* pager; - uint32_t num_rows; + uint32_t root_page_num; }; typedef struct Table_t Table; struct Cursor_t { Table* table; - uint32_t row_num; + uint32_t page_num; + uint32_t cell_num; bool end_of_table; // Indicates a position one past the last element }; typedef struct Cursor_t Cursor; +enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; +typedef enum NodeType_t NodeType; + +/* + * Common Node Header Layout + */ +const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t); +const uint32_t NODE_TYPE_OFFSET = 0; +const uint32_t IS_ROOT_SIZE = sizeof(uint8_t); +const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE; +const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t); +const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE; +const uint32_t COMMON_NODE_HEADER_SIZE = NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE; + +/* + * Leaf Node Header Layout + */ +const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t); +const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE; +const uint32_t LEAF_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE; + +/* + * Leaf Node Body Layout + */ +const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t); +const uint32_t LEAF_NODE_KEY_OFFSET = 0; +const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE; +const uint32_t LEAF_NODE_VALUE_OFFSET = LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE; +const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE; +const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE; +const uint32_t LEAF_NODE_MAX_CELLS = LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE; + +uint32_t* leaf_node_num_cells(void* node) { + return (char *)node + LEAF_NODE_NUM_CELLS_OFFSET; +} + +void* leaf_node_cell(void* node, uint32_t cell_num) { + return (char *)node + LEAF_NODE_HEADER_SIZE + cell_num; +} + +uint32_t* leaf_node_key(void* node, uint32_t cell_num) { + return leaf_node_cell(node, cell_num); +} + +void* leaf_node_value(void* node, uint32_t cell_num) { + return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE; +} + +void initialize_leaf_node(void* node) { + *leaf_node_num_cells(node) = 0; +} + InputBuffer* new_input_buffer() { InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); @@ -104,6 +156,24 @@ void print_prompt() { printf("db > "); } +void print_constants() { + printf("ROW_SIZE: %d\n", ROW_SIZE); + printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE); + printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE); + printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE); + printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS); + printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS); +} + +void print_leaf_node(void* node) { + uint32_t num_cells = *leaf_node_num_cells(node); + printf("leaf (size %d)\n", num_cells); + for (uint32_t i = 0; i < num_cells; i++) { + uint32_t key = *leaf_node_key(node, i); + printf(" - %d : %d\n", i, key); + } +} + void read_input(InputBuffer* input_buffer) { ssize_t bytes_read = getline( &(input_buffer->buffer), @@ -121,7 +191,7 @@ void read_input(InputBuffer* input_buffer) { input_buffer->buffer[bytes_read - 1] = 0; } -void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) { +void pager_flush(Pager* pager, uint32_t page_num) { if (pager->pages[page_num] == NULL) { printf("Tried to flush null page\n"); exit(EXIT_FAILURE); @@ -134,7 +204,7 @@ void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) { exit(EXIT_FAILURE); } - ssize_t bytes_written = write(pager->file_descriptor, pager->pages[page_num], size); + ssize_t bytes_written = write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE); if (bytes_written == -1) { printf("Error writing: %d\n", errno); @@ -144,29 +214,16 @@ void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) { void db_close(Table* table) { Pager* pager = table->pager; - uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE; - for (uint32_t i = 0; i < num_full_pages; i++) { + for (uint32_t i = 0; i < pager->num_pages; i++) { if (pager->pages[i] == NULL) { continue; } - pager_flush(pager, i, PAGE_SIZE); + pager_flush(pager, i); free(pager->pages[i]); pager->pages[i] = NULL; } - // There may be a partial page to write to the end of the file - // This should not be needed after we switch to a B-tree - uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE; - if (num_additional_rows > 0) { - uint32_t page_num = num_full_pages; - if (pager->pages[page_num] != NULL) { - pager_flush(pager, page_num, num_additional_rows * ROW_SIZE); - free(pager->pages[page_num]); - pager->pages[page_num] = NULL; - } - } - int result = close(pager->file_descriptor); if (result == -1) { printf("Error closing db file.\n"); @@ -182,15 +239,6 @@ void db_close(Table* table) { free(pager); } -MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) { - if (strcmp(input_buffer->buffer, ".exit") == 0) { - db_close(table); - exit(EXIT_SUCCESS); - } else { - return META_COMMAND_UNRECOGNIZED_COMMAND; - } -} - PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) { statement->type = STATEMENT_INSERT; @@ -249,24 +297,6 @@ void deserialize_row(void* source, Row* destination) { memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); } -Cursor* table_start(Table* table) { - Cursor* cursor = malloc(sizeof(Cursor)); - cursor->table = table; - cursor->row_num = 0; - cursor->end_of_table = (table->num_rows == 0); - - return cursor; -} - -Cursor* table_end(Table* table) { - Cursor* cursor = malloc(sizeof(Cursor)); - cursor->table = table; - cursor->row_num = table->num_rows; - cursor->end_of_table = true; - - return cursor; -} - void* get_page(Pager* pager, uint32_t page_num) { if (page_num > TABLE_MAX_PAGES) { printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES); @@ -293,37 +323,90 @@ void* get_page(Pager* pager, uint32_t page_num) { } pager->pages[page_num] = page; + + if (page_num >= pager->num_pages) { + pager->num_pages = page_num + 1; + } } return pager->pages[page_num]; } +void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) { + void* node = get_page(cursor->table->pager, cursor->page_num); + + uint32_t num_cells = *leaf_node_num_cells(node); + if (num_cells >= LEAF_NODE_MAX_CELLS) { + // Node full + printf("Need to implement splitting a leaf node.\n"); + exit(EXIT_FAILURE); + } + + if (cursor->cell_num < num_cells) { + // Make room for new cell + for (uint32_t i = num_cells; i > cursor->cell_num; i--) { + memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1), + LEAF_NODE_CELL_SIZE); + } + + *(leaf_node_num_cells(node)) += 1; + *(leaf_node_key(node, cursor->cell_num)) = key; + serialize_row(value, leaf_node_value(node, cursor->cell_num)); + } +} + +Cursor* table_start(Table* table) { + Cursor* cursor = malloc(sizeof(Cursor)); + cursor->table = table; + cursor->page_num = table->root_page_num; + cursor->cell_num = 0; + + void* root_node = get_page(table->pager, table->root_page_num); + uint32_t num_cells = *leaf_node_num_cells(root_node); + cursor->end_of_table = (num_cells == 0); + + return cursor; +} + +Cursor* table_end(Table* table) { + Cursor* cursor = malloc(sizeof(Cursor)); + cursor->table = table; + cursor->page_num = table->root_page_num; + + void* root_node = get_page(table->pager, table->root_page_num); + uint32_t num_cells = *leaf_node_num_cells(root_node); + cursor->cell_num = num_cells; + cursor->end_of_table = true; + + return cursor; +} + void* cursor_value(Cursor* cursor) { - uint32_t row_num = cursor->row_num; - uint32_t page_num = row_num / ROWS_PER_PAGE; + uint32_t page_num = cursor->page_num; void* page = get_page(cursor->table->pager, page_num); - uint32_t row_offset = row_num % ROWS_PER_PAGE; - uint32_t byte_offset = row_offset * ROW_SIZE; - return page + byte_offset; + return leaf_node_value(page, cursor->cell_num); } void cursor_advance(Cursor* cursor) { - cursor->row_num += 1; - if (cursor->row_num >= cursor->table->num_rows) { + uint32_t page_num = cursor->page_num; + void* node = get_page(cursor->table->pager, page_num); + + cursor->cell_num += 1; + if (cursor->cell_num >= (*leaf_node_num_cells(node))) { cursor->end_of_table = true; } } ExecuteResult execute_insert(Statement* statement, Table* table) { - if (table->num_rows >= TABLE_MAX_ROWS) { + void* node = get_page(table->pager, table->root_page_num); + if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) { return EXECUTE_TABLE_FULL; } Row* row_to_insert = &(statement->row_to_insert); Cursor* cursor = table_end(table); - serialize_row(row_to_insert, cursor_value(cursor)); - table->num_rows += 1; + leaf_node_insert(cursor, row_to_insert->id, row_to_insert); free(cursor); @@ -369,6 +452,12 @@ Pager* pager_open(const char* filename) { Pager* pager = malloc(sizeof(Pager)); pager->file_descriptor = fd; pager->file_length = file_length; + pager->num_pages = (file_length / PAGE_SIZE); + + if (file_length % PAGE_SIZE != 0) { + printf("Db file is not a whole number of pages. Corrupt file.\n"); + exit(EXIT_FAILURE); + } for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) { pager->pages[i] = NULL; @@ -377,19 +466,39 @@ Pager* pager_open(const char* filename) { return pager; } - - Table* db_open(const char* filename) { Pager* pager = pager_open(filename); - uint32_t num_rows = pager->file_length / ROW_SIZE; Table* table = malloc(sizeof(Table)); table->pager = pager; - table->num_rows = num_rows; + table->root_page_num = 0; + + if (pager->num_pages == 0) { + // New database file. Initialize page 0 as leaf node. + void* root_node = get_page(pager, 0); + initialize_leaf_node(root_node); + } return table; } +MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) { + if (strcmp(input_buffer->buffer, ".exit") == 0) { + db_close(table); + exit(EXIT_SUCCESS); + } else if (strcmp(input_buffer->buffer, ".btree") == 0){ + printf("Tree:\n"); + print_leaf_node(get_page(table->pager, 0)); + return META_COMMAND_SUCCESS; + } else if (strcmp(input_buffer->buffer, ".constants") == 0) { + printf("Constants:\n"); + print_constants(); + return META_COMMAND_SUCCESS; + } else { + return META_COMMAND_UNRECOGNIZED_COMMAND; + } +} + int main(int argc, char* argv[]) { if (argc < 2) { printf("Must supply a database filename\n"); diff --git a/spec/main_spec.rb b/spec/main_spec.rb index 0bd17ea..7fd3f99 100644 --- a/spec/main_spec.rb +++ b/spec/main_spec.rb @@ -108,4 +108,45 @@ describe 'database' do ]) end + it 'prints constants' do + script = [ + ".constants", + ".exit", + ] + result = run_script(script) + + expect(result).to match_array([ + "db > Constants:", + "ROW_SIZE: 293", + "COMMON_NODE_HEADER_SIZE: 6", + "LEAF_NODE_HEADER_SIZE: 10", + "LEAF_NODE_CELL_SIZE: 297", + "LEAF_NODE_SPACE_FOR_CELLS: 4086", + "LEAF_NODE_MAX_CELLS: 13", + "db > ", + ]) + end + + it 'allows printing out the structure of a one-node btree' do + script = [3, 1, 2].map do |i| + "insert #{i} user#{i} person#{i}@example.com" + end + + script << ".btree" + script << ".exit" + result = run_script(script) + + expect(result).to match_array([ + "db > Executed.", + "db > Executed.", + "db > Executed.", + "db > Tree:", + "leaf (size 3)", + " - 0 : 3", + " - 1 : 1", + " - 2 : 2", + "db > " + ]) + end + end