End of step 8

This commit is contained in:
Timothy Warren 2019-04-30 14:43:55 -04:00
parent 7b36ca8c16
commit 1e4a7defdc
2 changed files with 213 additions and 63 deletions

235
db.c
View File

@ -67,29 +67,81 @@ const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
const uint32_t PAGE_SIZE = 4096; const uint32_t PAGE_SIZE = 4096;
const uint32_t TABLE_MAX_PAGES = 100; const uint32_t TABLE_MAX_PAGES = 100;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
struct Pager_t { struct Pager_t {
int file_descriptor; int file_descriptor;
uint32_t file_length; uint32_t file_length;
uint32_t num_pages;
void* pages[TABLE_MAX_PAGES]; void* pages[TABLE_MAX_PAGES];
}; };
typedef struct Pager_t Pager; typedef struct Pager_t Pager;
struct Table_t { struct Table_t {
Pager* pager; Pager* pager;
uint32_t num_rows; uint32_t root_page_num;
}; };
typedef struct Table_t Table; typedef struct Table_t Table;
struct Cursor_t { struct Cursor_t {
Table* table; Table* table;
uint32_t row_num; uint32_t page_num;
uint32_t cell_num;
bool end_of_table; // Indicates a position one past the last element bool end_of_table; // Indicates a position one past the last element
}; };
typedef struct Cursor_t Cursor; typedef struct Cursor_t Cursor;
enum NodeType_t { NODE_INTERNAL, NODE_LEAF };
typedef enum NodeType_t NodeType;
/*
* Common Node Header Layout
*/
const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
const uint32_t NODE_TYPE_OFFSET = 0;
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
const uint32_t COMMON_NODE_HEADER_SIZE = NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;
/*
* Leaf Node Header Layout
*/
const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;
/*
* Leaf Node Body Layout
*/
const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
const uint32_t LEAF_NODE_VALUE_OFFSET = LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS = LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;
uint32_t* leaf_node_num_cells(void* node) {
return (char *)node + LEAF_NODE_NUM_CELLS_OFFSET;
}
void* leaf_node_cell(void* node, uint32_t cell_num) {
return (char *)node + LEAF_NODE_HEADER_SIZE + cell_num;
}
uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num);
}
void* leaf_node_value(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}
void initialize_leaf_node(void* node) {
*leaf_node_num_cells(node) = 0;
}
InputBuffer* new_input_buffer() { InputBuffer* new_input_buffer() {
InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
@ -104,6 +156,24 @@ void print_prompt() {
printf("db > "); printf("db > ");
} }
void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
uint32_t num_cells = *leaf_node_num_cells(node);
printf("leaf (size %d)\n", num_cells);
for (uint32_t i = 0; i < num_cells; i++) {
uint32_t key = *leaf_node_key(node, i);
printf(" - %d : %d\n", i, key);
}
}
void read_input(InputBuffer* input_buffer) { void read_input(InputBuffer* input_buffer) {
ssize_t bytes_read = getline( ssize_t bytes_read = getline(
&(input_buffer->buffer), &(input_buffer->buffer),
@ -121,7 +191,7 @@ void read_input(InputBuffer* input_buffer) {
input_buffer->buffer[bytes_read - 1] = 0; input_buffer->buffer[bytes_read - 1] = 0;
} }
void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) { void pager_flush(Pager* pager, uint32_t page_num) {
if (pager->pages[page_num] == NULL) { if (pager->pages[page_num] == NULL) {
printf("Tried to flush null page\n"); printf("Tried to flush null page\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -134,7 +204,7 @@ void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
ssize_t bytes_written = write(pager->file_descriptor, pager->pages[page_num], size); ssize_t bytes_written = write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
if (bytes_written == -1) { if (bytes_written == -1) {
printf("Error writing: %d\n", errno); printf("Error writing: %d\n", errno);
@ -144,29 +214,16 @@ void pager_flush(Pager* pager, uint32_t page_num, uint32_t size) {
void db_close(Table* table) { void db_close(Table* table) {
Pager* pager = table->pager; Pager* pager = table->pager;
uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE;
for (uint32_t i = 0; i < num_full_pages; i++) { for (uint32_t i = 0; i < pager->num_pages; i++) {
if (pager->pages[i] == NULL) { if (pager->pages[i] == NULL) {
continue; continue;
} }
pager_flush(pager, i, PAGE_SIZE); pager_flush(pager, i);
free(pager->pages[i]); free(pager->pages[i]);
pager->pages[i] = NULL; pager->pages[i] = NULL;
} }
// There may be a partial page to write to the end of the file
// This should not be needed after we switch to a B-tree
uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE;
if (num_additional_rows > 0) {
uint32_t page_num = num_full_pages;
if (pager->pages[page_num] != NULL) {
pager_flush(pager, page_num, num_additional_rows * ROW_SIZE);
free(pager->pages[page_num]);
pager->pages[page_num] = NULL;
}
}
int result = close(pager->file_descriptor); int result = close(pager->file_descriptor);
if (result == -1) { if (result == -1) {
printf("Error closing db file.\n"); printf("Error closing db file.\n");
@ -182,15 +239,6 @@ void db_close(Table* table) {
free(pager); free(pager);
} }
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
db_close(table);
exit(EXIT_SUCCESS);
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) { PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
statement->type = STATEMENT_INSERT; statement->type = STATEMENT_INSERT;
@ -249,24 +297,6 @@ void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
} }
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = 0;
cursor->end_of_table = (table->num_rows == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->row_num = table->num_rows;
cursor->end_of_table = true;
return cursor;
}
void* get_page(Pager* pager, uint32_t page_num) { void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) { if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES); printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES);
@ -293,37 +323,90 @@ void* get_page(Pager* pager, uint32_t page_num) {
} }
pager->pages[page_num] = page; pager->pages[page_num] = page;
if (page_num >= pager->num_pages) {
pager->num_pages = page_num + 1;
}
} }
return pager->pages[page_num]; return pager->pages[page_num];
} }
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
void* node = get_page(cursor->table->pager, cursor->page_num);
uint32_t num_cells = *leaf_node_num_cells(node);
if (num_cells >= LEAF_NODE_MAX_CELLS) {
// Node full
printf("Need to implement splitting a leaf node.\n");
exit(EXIT_FAILURE);
}
if (cursor->cell_num < num_cells) {
// Make room for new cell
for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
LEAF_NODE_CELL_SIZE);
}
*(leaf_node_num_cells(node)) += 1;
*(leaf_node_key(node, cursor->cell_num)) = key;
serialize_row(value, leaf_node_value(node, cursor->cell_num));
}
}
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
cursor->cell_num = 0;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->end_of_table = (num_cells == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->cell_num = num_cells;
cursor->end_of_table = true;
return cursor;
}
void* cursor_value(Cursor* cursor) { void* cursor_value(Cursor* cursor) {
uint32_t row_num = cursor->row_num; uint32_t page_num = cursor->page_num;
uint32_t page_num = row_num / ROWS_PER_PAGE;
void* page = get_page(cursor->table->pager, page_num); void* page = get_page(cursor->table->pager, page_num);
uint32_t row_offset = row_num % ROWS_PER_PAGE; return leaf_node_value(page, cursor->cell_num);
uint32_t byte_offset = row_offset * ROW_SIZE;
return page + byte_offset;
} }
void cursor_advance(Cursor* cursor) { void cursor_advance(Cursor* cursor) {
cursor->row_num += 1; uint32_t page_num = cursor->page_num;
if (cursor->row_num >= cursor->table->num_rows) { void* node = get_page(cursor->table->pager, page_num);
cursor->cell_num += 1;
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
cursor->end_of_table = true; cursor->end_of_table = true;
} }
} }
ExecuteResult execute_insert(Statement* statement, Table* table) { ExecuteResult execute_insert(Statement* statement, Table* table) {
if (table->num_rows >= TABLE_MAX_ROWS) { void* node = get_page(table->pager, table->root_page_num);
if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) {
return EXECUTE_TABLE_FULL; return EXECUTE_TABLE_FULL;
} }
Row* row_to_insert = &(statement->row_to_insert); Row* row_to_insert = &(statement->row_to_insert);
Cursor* cursor = table_end(table); Cursor* cursor = table_end(table);
serialize_row(row_to_insert, cursor_value(cursor)); leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
table->num_rows += 1;
free(cursor); free(cursor);
@ -369,6 +452,12 @@ Pager* pager_open(const char* filename) {
Pager* pager = malloc(sizeof(Pager)); Pager* pager = malloc(sizeof(Pager));
pager->file_descriptor = fd; pager->file_descriptor = fd;
pager->file_length = file_length; pager->file_length = file_length;
pager->num_pages = (file_length / PAGE_SIZE);
if (file_length % PAGE_SIZE != 0) {
printf("Db file is not a whole number of pages. Corrupt file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) { for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL; pager->pages[i] = NULL;
@ -377,19 +466,39 @@ Pager* pager_open(const char* filename) {
return pager; return pager;
} }
Table* db_open(const char* filename) { Table* db_open(const char* filename) {
Pager* pager = pager_open(filename); Pager* pager = pager_open(filename);
uint32_t num_rows = pager->file_length / ROW_SIZE;
Table* table = malloc(sizeof(Table)); Table* table = malloc(sizeof(Table));
table->pager = pager; table->pager = pager;
table->num_rows = num_rows; table->root_page_num = 0;
if (pager->num_pages == 0) {
// New database file. Initialize page 0 as leaf node.
void* root_node = get_page(pager, 0);
initialize_leaf_node(root_node);
}
return table; return table;
} }
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
db_close(table);
exit(EXIT_SUCCESS);
} else if (strcmp(input_buffer->buffer, ".btree") == 0){
printf("Tree:\n");
print_leaf_node(get_page(table->pager, 0));
return META_COMMAND_SUCCESS;
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
printf("Constants:\n");
print_constants();
return META_COMMAND_SUCCESS;
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (argc < 2) { if (argc < 2) {
printf("Must supply a database filename\n"); printf("Must supply a database filename\n");

View File

@ -108,4 +108,45 @@ describe 'database' do
]) ])
end end
it 'prints constants' do
script = [
".constants",
".exit",
]
result = run_script(script)
expect(result).to match_array([
"db > Constants:",
"ROW_SIZE: 293",
"COMMON_NODE_HEADER_SIZE: 6",
"LEAF_NODE_HEADER_SIZE: 10",
"LEAF_NODE_CELL_SIZE: 297",
"LEAF_NODE_SPACE_FOR_CELLS: 4086",
"LEAF_NODE_MAX_CELLS: 13",
"db > ",
])
end
it 'allows printing out the structure of a one-node btree' do
script = [3, 1, 2].map do |i|
"insert #{i} user#{i} person#{i}@example.com"
end
script << ".btree"
script << ".exit"
result = run_script(script)
expect(result).to match_array([
"db > Executed.",
"db > Executed.",
"db > Executed.",
"db > Tree:",
"leaf (size 3)",
" - 0 : 3",
" - 1 : 1",
" - 2 : 2",
"db > "
])
end
end end