diff --git a/src/builder.rs b/src/builder.rs index c34069f..6cb626c 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -21,7 +21,7 @@ impl Builder { } } - pub fn add_record(&mut self, record: Record) { + pub fn add_record(&mut self, mut record: Record) { if self.current_page_is_full(&record) { self.finish_current_page(); self.leaf_pages.push(mem::replace(&mut self.current_page, Page::new_leaf())); @@ -29,8 +29,9 @@ impl Builder { } self.current_page.key = record.rowid; //clone? - self.current_page.put_vec_u8_bw(record.to_bytes()); - self.current_page.put_u16(self.current_page.get_bw_position() as u16); + let bytes: Vec = record.into(); + self.current_page.put_bytes_bw(&bytes); + self.current_page.put_u16(self.current_page.bw_position as u16); self.n_records_on_current_page += 1; } @@ -39,26 +40,26 @@ impl Builder { } pub fn build(mut self) -> Database { - self.current_page.set_fw_position(page::POSITION_CELL_COUNT); + self.current_page.fw_position = page::POSITION_CELL_COUNT; self.current_page.put_u16(self.n_records_on_current_page); if self.n_records_on_current_page > 0 { - self.current_page.put_u16(self.current_page.get_bw_position()); + self.current_page.put_u16(self.current_page.bw_position); } else { - self.current_page.put_u16(self.current_page.get_bw_position() - 1); + self.current_page.put_u16(self.current_page.bw_position - 1); } Database::new(self.schema.unwrap(), self.leaf_pages) //panics is schema is not set } fn current_page_is_full(&self, record: &Record) -> bool { - self.current_page.get_bw_position() - record.get_length() <= self.current_page.get_fw_position() + 5 + self.current_page.bw_position - record.bytes_len() <= self.current_page.fw_position + 5 } fn finish_current_page(&mut self) { - self.current_page.set_fw_position(page::POSITION_CELL_COUNT); + self.current_page.fw_position = page::POSITION_CELL_COUNT; self.current_page.put_u16(self.n_records_on_current_page); - self.current_page.put_u16(self.current_page.get_bw_position()); + self.current_page.put_u16(self.current_page.bw_position); } } diff --git a/src/bytebuffer.rs b/src/bytebuffer.rs deleted file mode 100644 index 512ff92..0000000 --- a/src/bytebuffer.rs +++ /dev/null @@ -1,163 +0,0 @@ -use byteorder::{BigEndian, ByteOrder}; - -/// bytebuffer that supports forward and backward writing (this is not endianness) -/// Reason: SQLite pages are written in 2 directions: from the front for the cell-pointers and from the back for the cells -/// - fixed size -/// - big endian only -pub struct ByteBuffer { - pub data: Vec, - pub fw_position: u16, - pub bw_position: u16, -} - -impl ByteBuffer { - pub fn new(size: u16) -> Self { - Self { - data: vec![0; size as usize], - fw_position: 0, - bw_position: size, - } - } - - /// forward put unsigned byte array - pub fn put_bytes(&mut self, bytes: &[u8]) { - for v in bytes { - self.data[self.fw_position as usize] = *v; - self.fw_position += 1; - } - } - - /// backward put unsigned byte array - pub fn put_bytes_bw(&mut self, bytes: &[u8]) { - self.bw_position -= bytes.len() as u16; - for v in bytes { - self.data[self.bw_position as usize] = *v; - self.bw_position += 1; - } - } - - /// forward put unsigned byte - pub fn put_u8(&mut self, byte: u8) { - self.put_bytes(&[byte]); - } - - /// backward put unsigned byte - pub fn put_u8_bw(&mut self, byte: u8) { - self.put_bytes_bw(&[byte]); - } - - /// forward put unsigned 16bit integer - pub fn put_u16(&mut self, val: u16) { - let mut buf = [0; 2]; - BigEndian::write_u16(&mut buf, val); - self.put_bytes(&buf); - } - - /// backward put unsigned 16bit integer - pub fn put_u16_bw(&mut self, val: u16) { - let mut buf = [0; 2]; - BigEndian::write_u16(&mut buf, val); - self.put_bytes_bw(&buf); - } - - /// forward put unsigned 16bit integer - pub fn put_u32(&mut self, val: u32) { - let mut buf = [0; 4]; - BigEndian::write_u32(&mut buf, val); - self.put_bytes(&buf); - } - - /// backward put unsigned 32bit integer - pub fn put_u32_bw(&mut self, val: u32) { - let mut buf = [0; 4]; - BigEndian::write_u32(&mut buf, val); - self.put_bytes_bw(&buf); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_u8() { - let mut b = ByteBuffer::new(1); - b.put_u8(64_u8); - assert_eq!(b.data[0], 64); - } - - #[test] - fn test_u8a() { - let mut b = ByteBuffer::new(2); - b.put_bytes(&[1, 2]); - assert_eq!(b.data[0], 1); - assert_eq!(b.data[1], 2); - } - - #[test] - fn test_u16() { - let mut b = ByteBuffer::new(2); - b.put_u16(4096); - assert_eq!(b.data[0], 16); - assert_eq!(b.data[1], 0); - } - - #[test] - fn test_u32() { - let mut b = ByteBuffer::new(4); - b.put_u32(0xFFFFFFFF); - assert_eq!(b.data[0], 0xFF); - assert_eq!(b.data[1], 0xFF); - assert_eq!(b.data[2], 0xFF); - assert_eq!(b.data[3], 0xFF); - } - - #[test] - fn test_u16_position() { - let mut b = ByteBuffer::new(4); - b.fw_position = 2; - b.put_u16(4096); - assert_eq!(b.data[0], 0); - assert_eq!(b.data[1], 0); - assert_eq!(b.data[2], 16); - assert_eq!(b.data[3], 0); - } - - #[test] - fn test_u16_backwards() { - let mut b = ByteBuffer::new(4); - b.put_u16_bw(0x1000); - assert_eq!(b.data[0], 0); - assert_eq!(b.data[1], 0); - assert_eq!(b.data[2], 0x10); - assert_eq!(b.data[3], 0x00); - } - - #[test] - fn test_u16_2_directions() { - let mut b = ByteBuffer::new(5); - b.put_u16(0x1001); - b.put_u16_bw(0x1000); - assert_eq!(b.data[0], 0x10); - assert_eq!(b.data[1], 0x01); - assert_eq!(b.data[2], 0); // decimal suggests this value has not been written - assert_eq!(b.data[3], 0x10); - assert_eq!(b.data[4], 0x00); - } - - #[test] - fn test_u32_2_directions() { - let mut b = ByteBuffer::new(9); - b.put_u32(0x1001); - b.put_u32_bw(0x1002); - assert_eq!(b.data[0], 0x00); - assert_eq!(b.data[1], 0x00); - assert_eq!(b.data[2], 0x10); - assert_eq!(b.data[3], 0x01); - assert_eq!(b.data[4], 0); - assert_eq!(b.data[5], 0x00); - assert_eq!(b.data[6], 0x00); - assert_eq!(b.data[7], 0x10); - assert_eq!(b.data[8], 0x02); - } -} \ No newline at end of file diff --git a/src/database.rs b/src/database.rs index ad2db64..aae0e0a 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,3 +1,7 @@ +use std::io::{BufWriter, Write}; +use std::mem; +use crate::varint; +use crate::page; use crate::page::Page; pub struct Database { @@ -16,9 +20,66 @@ impl Database { pub struct SchemaRecord {} +pub fn write(database: Database, writer: BufWriter) { + let mut current_top_layer = database.leaf_pages; + let mut n_pages = current_top_layer.len(); + while current_top_layer.len() > 1 { // interior page needed? + current_top_layer = create_interior_pages(current_top_layer); + n_pages += current_top_layer.len(); + } + + let table_root_page = current_top_layer.get(0); // + // writeFromStart(writer, createHeaderPage(n_pages + 1)); // 1 for header page + // + // recursiveAssignPagenumbers(table_root_page); // 3 extra passes... :( + // recursiveSetPageReferences(table_root_page); // don't think combining is possible + // recursiveWritePages(channel, table_root_page); +} + +fn create_interior_pages(mut child_pages: Vec) -> Vec { + let mut interior_pages = Vec::new(); + let mut interior_page = Page::new_interior(); + interior_page.key = child_pages.iter().map(|p| p.key).max().unwrap(); + interior_page.fw_position = page::START_OF_INTERIOR_PAGE; + let mut page_index = 0; + let children_length = child_pages.len(); + let mut child_count = 0; + let mut last_leaf: Page = Page::new_leaf(); // have to assign :( + for mut leaf_page in child_pages { + if child_count < children_length - 1 { + if interior_page.bw_position <= interior_page.fw_position + 15 { // 15 is somewhat arbitrary + interior_page.fw_position = page::START_OF_CONTENT_AREA; + interior_page.put_u16(interior_page.bw_position); + interior_page.put_bytes(&[0, 0, 0, 0, 0]); + + interior_pages.push(mem::replace(&mut interior_page, Page::new_interior())); + interior_page.fw_position = page::START_OF_INTERIOR_PAGE; + } + create_cell(&mut leaf_page); + interior_page.add_child(leaf_page); + page_index += 1; + } else { + last_leaf = leaf_page; + } + } + + interior_page.fw_position = page::START_OF_CONTENT_AREA; + interior_page.put_u16(interior_page.bw_position); + interior_page.put_bytes(&[0, 0, 0, 0, 0]); + interior_page.add_child(last_leaf); + interior_pages.push(interior_page); + interior_pages +} + +fn create_cell(page: &mut Page) { + let mut cell: Vec = vec![0, 0, 0, 0]; // not an expensive call right? + cell.append(&mut varint::write(page.key)); + page.put_bytes_bw(&cell); + page.put_u16(page.bw_position); +} fn write_header(mut rootpage: Page, n_pages: u32) { - rootpage.put_u8a(&MAGIC_HEADER); + rootpage.put_bytes(&MAGIC_HEADER); rootpage.put_u16(DEFAULT_PAGE_SIZE); rootpage.put_u8(FILE_FORMAT_WRITE_VERSION); rootpage.put_u8(FILE_FORMAT_READ_VERSION); @@ -38,9 +99,9 @@ fn write_header(mut rootpage: Page, n_pages: u32) { rootpage.put_u32(USER_VERSION); rootpage.put_u32(VACUUM_MODE_OFF);// True (non-zero) for incremental-vacuum mode. False (zero) otherwise. rootpage.put_u32(APP_ID);// Application ID - rootpage.put_u8a(&FILLER);// Reserved for expansion. Must be zero. - rootpage.put_u8a(&VERSION_VALID_FOR);// The version-valid-for number - rootpage.put_u8a(&SQLITE_VERSION);// SQLITE_VERSION_NUMBER + rootpage.put_bytes(&FILLER);// Reserved for expansion. Must be zero. + rootpage.put_bytes(&VERSION_VALID_FOR);// The version-valid-for number + rootpage.put_bytes(&SQLITE_VERSION);// SQLITE_VERSION_NUMBER rootpage.put_u8(TABLE_LEAF_PAGE); // leaf table b-tree page for schema rootpage.put_u16(NO_FREE_BLOCKS); // zero if there are no freeblocks rootpage.put_u16(1); // the number of cells on this page diff --git a/src/lib.rs b/src/lib.rs index 05edccd..302978e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,6 @@ mod page; mod database; -mod bytebuffer; mod values; mod varint; mod record; diff --git a/src/page.rs b/src/page.rs index 074325b..b94cc4e 100644 --- a/src/page.rs +++ b/src/page.rs @@ -1,17 +1,22 @@ -use crate::bytebuffer::ByteBuffer; +use byteorder::{BigEndian, ByteOrder}; use crate::database; pub const POSITION_CELL_COUNT: u16 = 3; -const START_OF_CONTENT_AREA: u32 = 5; +pub const START_OF_CONTENT_AREA: u16 = 5; +pub const START_OF_INTERIOR_PAGE: u16 = 12; pub enum PageType { Leaf, Interior, + Root, + Other } /// Represents an SQLite page pub struct Page { - data: ByteBuffer, + pub data: Vec, + pub fw_position: u16, + pub bw_position: u16, pub key: u64, children: Vec, number: u32, @@ -19,9 +24,11 @@ pub struct Page { } impl Page { - pub fn with_capacity(size: u16, page_type: PageType) -> Self { + fn with_capacity(size: u16, page_type: PageType) -> Self { Self { - data: ByteBuffer::new(size as u16), + data: vec![0; size as usize], + fw_position: 0, + bw_position: size, key: 0, children: Vec::new(), number: 0, @@ -29,6 +36,18 @@ impl Page { } } + fn default(size: usize) -> Self{ + Self { + data: vec![0; size], + fw_position: 0, + bw_position: size as u16, + key: 0, + children: Vec::new(), + number: 0, + page_type: PageType::Other, + } + } + pub fn new_leaf() -> Self { let mut page = Page::with_capacity(database::DEFAULT_PAGE_SIZE, PageType::Leaf); page.put_u8(database::TABLE_LEAF_PAGE); @@ -45,51 +64,146 @@ impl Page { self.children.push(child); } - pub fn set_fw_position(&mut self, new_position: u16) { - self.data.fw_position = new_position; + pub fn put_bytes(&mut self, bytes: &[u8]) { + for v in bytes { + self.data[self.fw_position as usize] = *v; + self.fw_position += 1; + } } - pub fn get_fw_position(&self) -> u16 { - self.data.fw_position - } - pub fn set_bw_position(&mut self, new_position: u16) { - self.data.bw_position = new_position; - } - - pub fn get_bw_position(&self) -> u16 { - self.data.bw_position - } - - pub fn put_u8a(&mut self, value: &[u8]) { - self.data.put_bytes(value); - } - - pub fn put_u8a_bw(&mut self, value: &[u8]) { - self.data.put_bytes_bw(value); - } - - pub fn put_vec_u8_bw(&mut self, value: Vec) { - self.data.put_bytes_bw(&value); + pub fn put_bytes_bw(&mut self, bytes: &[u8]) { + self.bw_position -= bytes.len() as u16; + for v in bytes { + self.data[self.bw_position as usize] = *v; + self.bw_position += 1; + } } pub fn put_u8(&mut self, value: u8) { - self.data.put_u8(value); + self.put_bytes(&[value]); } pub fn put_u8_bw(&mut self, value: u8) { - self.data.put_u8_bw(value); + self.put_bytes_bw(&[value]); } pub fn put_u16(&mut self, value: u16) { - self.data.put_u16(value); + self.put_bytes(&u16_to_bytes(value)); + } + + pub fn put_u16_bw(&mut self, value: u16) { + self.put_bytes_bw(&u16_to_bytes(value)); } pub fn put_u32(&mut self, value: u32) { - self.data.put_u32(value); + self.put_bytes(&u32_to_bytes(value)); + } + + pub fn put_u32_bw(&mut self, value: u32) { + self.put_bytes_bw(&u32_to_bytes(value)); } // may panic pub fn get_page_nr_last_child(self) -> u32 { self.children[self.children.len() - 1].number } +} + +fn u16_to_bytes(value: u16) -> [u8; 2] { + let mut buf = [0; 2]; + BigEndian::write_u16(&mut buf, value); + buf +} + +fn u32_to_bytes(value: u32) -> [u8; 4] { + let mut buf = [0; 4]; + BigEndian::write_u32(&mut buf, value); + buf +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_u8() { + let mut b = Page::default(1); + b.put_u8(64_u8); + assert_eq!(b.data[0], 64); + } + + #[test] + fn test_u8a() { + let mut b = Page::default(2); + b.put_bytes(&[1, 2]); + assert_eq!(b.data[0], 1); + assert_eq!(b.data[1], 2); + } + + #[test] + fn test_u16() { + let mut b = Page::default(2); + b.put_u16(4096); + assert_eq!(b.data[0], 16); + assert_eq!(b.data[1], 0); + } + + #[test] + fn test_u32() { + let mut b = Page::default(4); + b.put_u32(0xFFFFFFFF); + assert_eq!(b.data[0], 0xFF); + assert_eq!(b.data[1], 0xFF); + assert_eq!(b.data[2], 0xFF); + assert_eq!(b.data[3], 0xFF); + } + + #[test] + fn test_u16_position() { + let mut b = Page::default(4); + b.fw_position = 2; + b.put_u16(4096); + assert_eq!(b.data[0], 0); + assert_eq!(b.data[1], 0); + assert_eq!(b.data[2], 16); + assert_eq!(b.data[3], 0); + } + + #[test] + fn test_u16_backwards() { + let mut b = Page::default(4); + b.put_u16_bw(0x1000); + assert_eq!(b.data[0], 0); + assert_eq!(b.data[1], 0); + assert_eq!(b.data[2], 0x10); + assert_eq!(b.data[3], 0x00); + } + + #[test] + fn test_u16_2_directions() { + let mut b = Page::default(5); + b.put_u16(0x1001); + b.put_u16_bw(0x1000); + assert_eq!(b.data[0], 0x10); + assert_eq!(b.data[1], 0x01); + assert_eq!(b.data[2], 0); // decimal suggests this value has not been written + assert_eq!(b.data[3], 0x10); + assert_eq!(b.data[4], 0x00); + } + + #[test] + fn test_u32_2_directions() { + let mut b = Page::default(9); + b.put_u32(0x1001); + b.put_u32_bw(0x1002); + assert_eq!(b.data[0], 0x00); + assert_eq!(b.data[1], 0x00); + assert_eq!(b.data[2], 0x10); + assert_eq!(b.data[3], 0x01); + assert_eq!(b.data[4], 0); + assert_eq!(b.data[5], 0x00); + assert_eq!(b.data[6], 0x00); + assert_eq!(b.data[7], 0x10); + assert_eq!(b.data[8], 0x02); + } } \ No newline at end of file diff --git a/src/record.rs b/src/record.rs index 2ade232..56fdc12 100644 --- a/src/record.rs +++ b/src/record.rs @@ -1,4 +1,3 @@ -use crate::bytebuffer::ByteBuffer; use crate::values::*; use crate::varint; @@ -20,38 +19,42 @@ impl Record { self.values.push(value); } - pub fn to_bytes(&self) -> Vec { - let record_length = self.get_length(); - let length_bytes = varint::write(record_length as u64); - let rowid_bytes = varint::write(self.rowid); + /// length of the byte representation + pub fn bytes_len(&self) -> u16 { + let record_length: u16 = self.values.iter() + .map(|v| v.len()) + .sum(); + record_length + } +} - let mut buffer = ByteBuffer::new(length_bytes.len() as u16 + rowid_bytes.len() as u16 + record_length); - buffer.put_bytes(&length_bytes); - buffer.put_bytes(&rowid_bytes); +impl Into> for Record{ + fn into(mut self) -> Vec { + let record_length = self.bytes_len(); + let mut length_bytes = varint::write(record_length as u64); + let mut rowid_bytes = varint::write(self.rowid); + + // let mut buffer = Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length); + let mut buffer = Vec::new(); + buffer.append(&mut length_bytes); + buffer.append(&mut rowid_bytes); // 'The initial portion of the payload that does not spill to overflow pages.' let length_of_encoded_column_types: usize = self.values.iter() .map(|v| v.datatype.len()) .sum(); - buffer.put_bytes(&varint::write((length_of_encoded_column_types + 1) as u64)); + buffer.append(&mut varint::write((length_of_encoded_column_types + 1) as u64)); //write all types - for v in self.values.iter() { - buffer.put_bytes(&v.datatype) + for v in self.values.iter_mut() { + buffer.append(&mut v.datatype) } // write all values - for v in self.values.iter() { - buffer.put_bytes(&v.data) //copies individual bytes into a buffer...should I avoid copying? + for v in self.values.iter_mut() { + buffer.append(&mut v.data) } - buffer.data - } - - pub fn get_length(&self) -> u16 { - let record_length: u16 = self.values.iter() - .map(|v| v.get_length()) - .sum(); - record_length + buffer } }