From 53c27d8cdf8c36eb863a6424d700bfdfa75af784 Mon Sep 17 00:00:00 2001 From: Timothy Warren Date: Tue, 30 Apr 2019 15:17:58 -0400 Subject: [PATCH] Step 9 --- db.c | 408 ++++++++++++++++++++++++++-------------------- spec/main_spec.rb | 22 ++- 2 files changed, 252 insertions(+), 178 deletions(-) diff --git a/db.c b/db.c index 52cd0db..a62dd1a 100644 --- a/db.c +++ b/db.c @@ -15,6 +15,7 @@ typedef struct InputBuffer_t InputBuffer; enum ExecuteResult_t { EXECUTE_SUCCESS, + EXECUTE_DUPLICATE_KEY, EXECUTE_TABLE_FULL }; typedef enum ExecuteResult_t ExecuteResult; @@ -90,6 +91,10 @@ struct Cursor_t { }; typedef struct Cursor_t Cursor; +void print_row(Row* row) { + printf("(%d, %s, %s)\n", row->id, row->username, row->email); +} + enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; typedef enum NodeType_t NodeType; @@ -122,6 +127,16 @@ const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE; const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE; const uint32_t LEAF_NODE_MAX_CELLS = LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE; +NodeType get_node_type(void* node) { + uint8_t value = *((uint8_t*)(node + NODE_TYPE_OFFSET)); + return (NodeType)value; +} + +void set_node_type(void* node, NodeType type) { + uint8_t value = type; + *((uint8_t*)(node + NODE_TYPE_OFFSET)) = value; +} + uint32_t* leaf_node_num_cells(void* node) { return (char *)node + LEAF_NODE_NUM_CELLS_OFFSET; } @@ -138,10 +153,186 @@ void* leaf_node_value(void* node, uint32_t cell_num) { return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE; } +void print_constants() { + printf("ROW_SIZE: %d\n", ROW_SIZE); + printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE); + printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE); + printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE); + printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS); + printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS); +} + +void* get_page(Pager* pager, uint32_t page_num) { + if (page_num > TABLE_MAX_PAGES) { + printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES); + exit(EXIT_FAILURE); + } + + if (pager->pages[page_num] == NULL) { + // Cache miss. Allocate memory and load from file. + void* page = malloc(PAGE_SIZE); + uint32_t num_pages = pager->file_length / PAGE_SIZE; + + // We might save a partial page at the end of the file + if (pager->file_length % PAGE_SIZE) { + num_pages += 1; + } + + if (page_num <= num_pages) { + lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET); + ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE); + if (bytes_read == -1) { + printf("Error reading file: %d\n", errno); + exit(EXIT_FAILURE); + } + } + + pager->pages[page_num] = page; + + if (page_num >= pager->num_pages) { + pager->num_pages = page_num + 1; + } + } + + return pager->pages[page_num]; +} + +void serialize_row(Row* source, void* destination) { + memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE); + memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); + memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); +} + +void deserialize_row(void* source, Row* destination) { + memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE); + memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE); + memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); +} + void initialize_leaf_node(void* node) { + set_node_type(node, NODE_LEAF); *leaf_node_num_cells(node) = 0; } +Cursor* leaf_node_find(Table* table, uint32_t page_num, uint32_t key) { + void* node = get_page(table->pager, page_num); + uint32_t num_cells = *leaf_node_num_cells(node); + + Cursor* cursor = malloc(sizeof(Cursor)); + cursor->table = table; + cursor->page_num = page_num; + + // Binary search + uint32_t min_index = 0; + uint32_t one_past_max_index = num_cells; + while (one_past_max_index != min_index) { + uint32_t index = (min_index + one_past_max_index) / 2; + uint32_t key_at_index = *leaf_node_key(node, index); + if (key == key_at_index) { + cursor->cell_num = index; + return cursor; + } + if (key < key_at_index) { + one_past_max_index = index; + } else { + min_index = index + 1; + } + } + + cursor->cell_num = min_index; + return cursor; +} + +/* + * Return the position of the given key. + * If the key is not present, return the position + * where it should be inserted + */ +Cursor* table_find(Table* table, uint32_t key) { + uint32_t root_page_num = table->root_page_num; + void* root_node = get_page(table->pager, root_page_num); + + if (get_node_type(root_node) == NODE_LEAF) { + return leaf_node_find(table, root_page_num, key); + } else { + printf("Need to implement searching an internal node\n"); + exit(EXIT_FAILURE); + } +} + +Cursor* table_start(Table* table) { + Cursor* cursor = malloc(sizeof(Cursor)); + cursor->table = table; + cursor->page_num = table->root_page_num; + cursor->cell_num = 0; + + void* root_node = get_page(table->pager, table->root_page_num); + uint32_t num_cells = *leaf_node_num_cells(root_node); + cursor->end_of_table = (num_cells == 0); + + return cursor; +} + +void* cursor_value(Cursor* cursor) { + uint32_t page_num = cursor->page_num; + void* page = get_page(cursor->table->pager, page_num); + return leaf_node_value(page, cursor->cell_num); +} + +void cursor_advance(Cursor* cursor) { + uint32_t page_num = cursor->page_num; + void* node = get_page(cursor->table->pager, page_num); + + cursor->cell_num += 1; + if (cursor->cell_num >= (*leaf_node_num_cells(node))) { + cursor->end_of_table = true; + } +} + +Pager* pager_open(const char* filename) { + // Read/Write mode, Create non-existent file, user read and write permission + int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + + if (fd == -1) { + printf("Unable to open file\n"); + exit(EXIT_FAILURE); + } + + off_t file_length = lseek(fd, 0, SEEK_END); + + Pager* pager = malloc(sizeof(Pager)); + pager->file_descriptor = fd; + pager->file_length = file_length; + pager->num_pages = (file_length / PAGE_SIZE); + + if (file_length % PAGE_SIZE != 0) { + printf("Db file is not a whole number of pages. Corrupt file.\n"); + exit(EXIT_FAILURE); + } + + for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) { + pager->pages[i] = NULL; + } + + return pager; +} + +Table* db_open(const char* filename) { + Pager* pager = pager_open(filename); + + Table* table = malloc(sizeof(Table)); + table->pager = pager; + table->root_page_num = 0; + + if (pager->num_pages == 0) { + // New database file. Initialize page 0 as leaf node. + void* root_node = get_page(pager, 0); + initialize_leaf_node(root_node); + } + + return table; +} + InputBuffer* new_input_buffer() { InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); @@ -156,24 +347,6 @@ void print_prompt() { printf("db > "); } -void print_constants() { - printf("ROW_SIZE: %d\n", ROW_SIZE); - printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE); - printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE); - printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE); - printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS); - printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS); -} - -void print_leaf_node(void* node) { - uint32_t num_cells = *leaf_node_num_cells(node); - printf("leaf (size %d)\n", num_cells); - for (uint32_t i = 0; i < num_cells; i++) { - uint32_t key = *leaf_node_key(node, i); - printf(" - %d : %d\n", i, key); - } -} - void read_input(InputBuffer* input_buffer) { ssize_t bytes_read = getline( &(input_buffer->buffer), @@ -239,6 +412,32 @@ void db_close(Table* table) { free(pager); } +void print_leaf_node(void* node) { + uint32_t num_cells = *leaf_node_num_cells(node); + printf("leaf (size %d)\n", num_cells); + for (uint32_t i = 0; i < num_cells; i++) { + uint32_t key = *leaf_node_key(node, i); + printf(" - %d : %d\n", i, key); + } +} + +MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) { + if (strcmp(input_buffer->buffer, ".exit") == 0) { + db_close(table); + exit(EXIT_SUCCESS); + } else if (strcmp(input_buffer->buffer, ".btree") == 0){ + printf("Tree:\n"); + print_leaf_node(get_page(table->pager, 0)); + return META_COMMAND_SUCCESS; + } else if (strcmp(input_buffer->buffer, ".constants") == 0) { + printf("Constants:\n"); + print_constants(); + return META_COMMAND_SUCCESS; + } else { + return META_COMMAND_UNRECOGNIZED_COMMAND; + } +} + PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) { statement->type = STATEMENT_INSERT; @@ -281,57 +480,6 @@ PrepareResult prepare_statement(InputBuffer* input_buffer, Statement* statement) return PREPARE_UNRECOGNIZED_STATEMENT; } -void print_row(Row* row) { - printf("(%d, %s, %s)\n", row->id, row->username, row->email); -} - -void serialize_row(Row* source, void* destination) { - memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE); - memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); - memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); -} - -void deserialize_row(void* source, Row* destination) { - memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE); - memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE); - memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); -} - -void* get_page(Pager* pager, uint32_t page_num) { - if (page_num > TABLE_MAX_PAGES) { - printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES); - exit(EXIT_FAILURE); - } - - if (pager->pages[page_num] == NULL) { - // Cache miss. Allocate memory and load from file. - void* page = malloc(PAGE_SIZE); - uint32_t num_pages = pager->file_length / PAGE_SIZE; - - // We might save a partial page at the end of the file - if (pager->file_length % PAGE_SIZE) { - num_pages += 1; - } - - if (page_num <= num_pages) { - lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET); - ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE); - if (bytes_read == -1) { - printf("Error reading file: %d\n", errno); - exit(EXIT_FAILURE); - } - } - - pager->pages[page_num] = page; - - if (page_num >= pager->num_pages) { - pager->num_pages = page_num + 1; - } - } - - return pager->pages[page_num]; -} - void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) { void* node = get_page(cursor->table->pager, cursor->page_num); @@ -346,7 +494,7 @@ void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) { // Make room for new cell for (uint32_t i = num_cells; i > cursor->cell_num; i--) { memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1), - LEAF_NODE_CELL_SIZE); + LEAF_NODE_CELL_SIZE); } *(leaf_node_num_cells(node)) += 1; @@ -355,56 +503,23 @@ void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) { } } -Cursor* table_start(Table* table) { - Cursor* cursor = malloc(sizeof(Cursor)); - cursor->table = table; - cursor->page_num = table->root_page_num; - cursor->cell_num = 0; - - void* root_node = get_page(table->pager, table->root_page_num); - uint32_t num_cells = *leaf_node_num_cells(root_node); - cursor->end_of_table = (num_cells == 0); - - return cursor; -} - -Cursor* table_end(Table* table) { - Cursor* cursor = malloc(sizeof(Cursor)); - cursor->table = table; - cursor->page_num = table->root_page_num; - - void* root_node = get_page(table->pager, table->root_page_num); - uint32_t num_cells = *leaf_node_num_cells(root_node); - cursor->cell_num = num_cells; - cursor->end_of_table = true; - - return cursor; -} - -void* cursor_value(Cursor* cursor) { - uint32_t page_num = cursor->page_num; - void* page = get_page(cursor->table->pager, page_num); - return leaf_node_value(page, cursor->cell_num); -} - -void cursor_advance(Cursor* cursor) { - uint32_t page_num = cursor->page_num; - void* node = get_page(cursor->table->pager, page_num); - - cursor->cell_num += 1; - if (cursor->cell_num >= (*leaf_node_num_cells(node))) { - cursor->end_of_table = true; - } -} - ExecuteResult execute_insert(Statement* statement, Table* table) { void* node = get_page(table->pager, table->root_page_num); - if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) { + uint32_t num_cells = (*leaf_node_num_cells(node)); + if (num_cells >= LEAF_NODE_MAX_CELLS) { return EXECUTE_TABLE_FULL; } Row* row_to_insert = &(statement->row_to_insert); - Cursor* cursor = table_end(table); + uint32_t key_to_insert = row_to_insert->id; + Cursor* cursor = table_find(table, key_to_insert); + + if (cursor->cell_num < num_cells) { + uint32_t key_at_index = *leaf_node_key(node, cursor->cell_num); + if (key_at_index == key_to_insert) { + return EXECUTE_DUPLICATE_KEY; + } + } leaf_node_insert(cursor, row_to_insert->id, row_to_insert); @@ -438,67 +553,6 @@ ExecuteResult execute_statement(Statement* statement, Table* table) { } } -Pager* pager_open(const char* filename) { - // Read/Write mode, Create non-existent file, user read and write permission - int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); - - if (fd == -1) { - printf("Unable to open file\n"); - exit(EXIT_FAILURE); - } - - off_t file_length = lseek(fd, 0, SEEK_END); - - Pager* pager = malloc(sizeof(Pager)); - pager->file_descriptor = fd; - pager->file_length = file_length; - pager->num_pages = (file_length / PAGE_SIZE); - - if (file_length % PAGE_SIZE != 0) { - printf("Db file is not a whole number of pages. Corrupt file.\n"); - exit(EXIT_FAILURE); - } - - for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) { - pager->pages[i] = NULL; - } - - return pager; -} - -Table* db_open(const char* filename) { - Pager* pager = pager_open(filename); - - Table* table = malloc(sizeof(Table)); - table->pager = pager; - table->root_page_num = 0; - - if (pager->num_pages == 0) { - // New database file. Initialize page 0 as leaf node. - void* root_node = get_page(pager, 0); - initialize_leaf_node(root_node); - } - - return table; -} - -MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) { - if (strcmp(input_buffer->buffer, ".exit") == 0) { - db_close(table); - exit(EXIT_SUCCESS); - } else if (strcmp(input_buffer->buffer, ".btree") == 0){ - printf("Tree:\n"); - print_leaf_node(get_page(table->pager, 0)); - return META_COMMAND_SUCCESS; - } else if (strcmp(input_buffer->buffer, ".constants") == 0) { - printf("Constants:\n"); - print_constants(); - return META_COMMAND_SUCCESS; - } else { - return META_COMMAND_UNRECOGNIZED_COMMAND; - } -} - int main(int argc, char* argv[]) { if (argc < 2) { printf("Must supply a database filename\n"); @@ -548,6 +602,10 @@ int main(int argc, char* argv[]) { printf("Executed.\n"); break; + case (EXECUTE_DUPLICATE_KEY): + printf("Error: Duplicate key.\n"); + break; + case (EXECUTE_TABLE_FULL): printf("Error: Table full.\n"); break; diff --git a/spec/main_spec.rb b/spec/main_spec.rb index 7fd3f99..8260b2c 100644 --- a/spec/main_spec.rb +++ b/spec/main_spec.rb @@ -142,11 +142,27 @@ describe 'database' do "db > Executed.", "db > Tree:", "leaf (size 3)", - " - 0 : 3", - " - 1 : 1", - " - 2 : 2", + " - 0 : 1", + " - 1 : 2", + " - 2 : 3", "db > " ]) end + it 'prints an error message if there is a duplicate id' do + script = [ + "insert 1 user1 person1@example.com", + "insert 1 user1 person1@example.com", + "select", + ".exit", + ] + result = run_script(script) + expect(result).to match_array([ + "db > Executed.", + "db > Error: Duplicate key.", + "db > (1, user1, person1@example.com)", + "Executed.", + "db > ", + ]) + end end