simplified and removed getters

This commit is contained in:
Sander Hautvast 2022-10-28 20:49:56 +02:00
parent 9633938790
commit f5408dfc3d
6 changed files with 245 additions and 230 deletions

View file

@ -21,7 +21,7 @@ impl Builder {
} }
} }
pub fn add_record(&mut self, record: Record) { pub fn add_record(&mut self, mut record: Record) {
if self.current_page_is_full(&record) { if self.current_page_is_full(&record) {
self.finish_current_page(); self.finish_current_page();
self.leaf_pages.push(mem::replace(&mut self.current_page, Page::new_leaf())); self.leaf_pages.push(mem::replace(&mut self.current_page, Page::new_leaf()));
@ -29,8 +29,9 @@ impl Builder {
} }
self.current_page.key = record.rowid; //clone? self.current_page.key = record.rowid; //clone?
self.current_page.put_vec_u8_bw(record.to_bytes()); let bytes: Vec<u8> = record.into();
self.current_page.put_u16(self.current_page.get_bw_position() as u16); self.current_page.put_bytes_bw(&bytes);
self.current_page.put_u16(self.current_page.bw_position as u16);
self.n_records_on_current_page += 1; self.n_records_on_current_page += 1;
} }
@ -39,26 +40,26 @@ impl Builder {
} }
pub fn build(mut self) -> Database { pub fn build(mut self) -> Database {
self.current_page.set_fw_position(page::POSITION_CELL_COUNT); self.current_page.fw_position = page::POSITION_CELL_COUNT;
self.current_page.put_u16(self.n_records_on_current_page); self.current_page.put_u16(self.n_records_on_current_page);
if self.n_records_on_current_page > 0 { if self.n_records_on_current_page > 0 {
self.current_page.put_u16(self.current_page.get_bw_position()); self.current_page.put_u16(self.current_page.bw_position);
} else { } else {
self.current_page.put_u16(self.current_page.get_bw_position() - 1); self.current_page.put_u16(self.current_page.bw_position - 1);
} }
Database::new(self.schema.unwrap(), self.leaf_pages) //panics is schema is not set Database::new(self.schema.unwrap(), self.leaf_pages) //panics is schema is not set
} }
fn current_page_is_full(&self, record: &Record) -> bool { fn current_page_is_full(&self, record: &Record) -> bool {
self.current_page.get_bw_position() - record.get_length() <= self.current_page.get_fw_position() + 5 self.current_page.bw_position - record.bytes_len() <= self.current_page.fw_position + 5
} }
fn finish_current_page(&mut self) { fn finish_current_page(&mut self) {
self.current_page.set_fw_position(page::POSITION_CELL_COUNT); self.current_page.fw_position = page::POSITION_CELL_COUNT;
self.current_page.put_u16(self.n_records_on_current_page); self.current_page.put_u16(self.n_records_on_current_page);
self.current_page.put_u16(self.current_page.get_bw_position()); self.current_page.put_u16(self.current_page.bw_position);
} }
} }

View file

@ -1,163 +0,0 @@
use byteorder::{BigEndian, ByteOrder};
/// bytebuffer that supports forward and backward writing (this is not endianness)
/// Reason: SQLite pages are written in 2 directions: from the front for the cell-pointers and from the back for the cells
/// - fixed size
/// - big endian only
pub struct ByteBuffer {
pub data: Vec<u8>,
pub fw_position: u16,
pub bw_position: u16,
}
impl ByteBuffer {
pub fn new(size: u16) -> Self {
Self {
data: vec![0; size as usize],
fw_position: 0,
bw_position: size,
}
}
/// forward put unsigned byte array
pub fn put_bytes(&mut self, bytes: &[u8]) {
for v in bytes {
self.data[self.fw_position as usize] = *v;
self.fw_position += 1;
}
}
/// backward put unsigned byte array
pub fn put_bytes_bw(&mut self, bytes: &[u8]) {
self.bw_position -= bytes.len() as u16;
for v in bytes {
self.data[self.bw_position as usize] = *v;
self.bw_position += 1;
}
}
/// forward put unsigned byte
pub fn put_u8(&mut self, byte: u8) {
self.put_bytes(&[byte]);
}
/// backward put unsigned byte
pub fn put_u8_bw(&mut self, byte: u8) {
self.put_bytes_bw(&[byte]);
}
/// forward put unsigned 16bit integer
pub fn put_u16(&mut self, val: u16) {
let mut buf = [0; 2];
BigEndian::write_u16(&mut buf, val);
self.put_bytes(&buf);
}
/// backward put unsigned 16bit integer
pub fn put_u16_bw(&mut self, val: u16) {
let mut buf = [0; 2];
BigEndian::write_u16(&mut buf, val);
self.put_bytes_bw(&buf);
}
/// forward put unsigned 16bit integer
pub fn put_u32(&mut self, val: u32) {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, val);
self.put_bytes(&buf);
}
/// backward put unsigned 32bit integer
pub fn put_u32_bw(&mut self, val: u32) {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, val);
self.put_bytes_bw(&buf);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_u8() {
let mut b = ByteBuffer::new(1);
b.put_u8(64_u8);
assert_eq!(b.data[0], 64);
}
#[test]
fn test_u8a() {
let mut b = ByteBuffer::new(2);
b.put_bytes(&[1, 2]);
assert_eq!(b.data[0], 1);
assert_eq!(b.data[1], 2);
}
#[test]
fn test_u16() {
let mut b = ByteBuffer::new(2);
b.put_u16(4096);
assert_eq!(b.data[0], 16);
assert_eq!(b.data[1], 0);
}
#[test]
fn test_u32() {
let mut b = ByteBuffer::new(4);
b.put_u32(0xFFFFFFFF);
assert_eq!(b.data[0], 0xFF);
assert_eq!(b.data[1], 0xFF);
assert_eq!(b.data[2], 0xFF);
assert_eq!(b.data[3], 0xFF);
}
#[test]
fn test_u16_position() {
let mut b = ByteBuffer::new(4);
b.fw_position = 2;
b.put_u16(4096);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 16);
assert_eq!(b.data[3], 0);
}
#[test]
fn test_u16_backwards() {
let mut b = ByteBuffer::new(4);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x00);
}
#[test]
fn test_u16_2_directions() {
let mut b = ByteBuffer::new(5);
b.put_u16(0x1001);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0x10);
assert_eq!(b.data[1], 0x01);
assert_eq!(b.data[2], 0); // decimal suggests this value has not been written
assert_eq!(b.data[3], 0x10);
assert_eq!(b.data[4], 0x00);
}
#[test]
fn test_u32_2_directions() {
let mut b = ByteBuffer::new(9);
b.put_u32(0x1001);
b.put_u32_bw(0x1002);
assert_eq!(b.data[0], 0x00);
assert_eq!(b.data[1], 0x00);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x01);
assert_eq!(b.data[4], 0);
assert_eq!(b.data[5], 0x00);
assert_eq!(b.data[6], 0x00);
assert_eq!(b.data[7], 0x10);
assert_eq!(b.data[8], 0x02);
}
}

View file

@ -1,3 +1,7 @@
use std::io::{BufWriter, Write};
use std::mem;
use crate::varint;
use crate::page;
use crate::page::Page; use crate::page::Page;
pub struct Database { pub struct Database {
@ -16,9 +20,66 @@ impl Database {
pub struct SchemaRecord {} pub struct SchemaRecord {}
pub fn write<W: Write>(database: Database, writer: BufWriter<W>) {
let mut current_top_layer = database.leaf_pages;
let mut n_pages = current_top_layer.len();
while current_top_layer.len() > 1 { // interior page needed?
current_top_layer = create_interior_pages(current_top_layer);
n_pages += current_top_layer.len();
}
let table_root_page = current_top_layer.get(0); //
// writeFromStart(writer, createHeaderPage(n_pages + 1)); // 1 for header page
//
// recursiveAssignPagenumbers(table_root_page); // 3 extra passes... :(
// recursiveSetPageReferences(table_root_page); // don't think combining is possible
// recursiveWritePages(channel, table_root_page);
}
fn create_interior_pages(mut child_pages: Vec<Page>) -> Vec<Page> {
let mut interior_pages = Vec::new();
let mut interior_page = Page::new_interior();
interior_page.key = child_pages.iter().map(|p| p.key).max().unwrap();
interior_page.fw_position = page::START_OF_INTERIOR_PAGE;
let mut page_index = 0;
let children_length = child_pages.len();
let mut child_count = 0;
let mut last_leaf: Page = Page::new_leaf(); // have to assign :(
for mut leaf_page in child_pages {
if child_count < children_length - 1 {
if interior_page.bw_position <= interior_page.fw_position + 15 { // 15 is somewhat arbitrary
interior_page.fw_position = page::START_OF_CONTENT_AREA;
interior_page.put_u16(interior_page.bw_position);
interior_page.put_bytes(&[0, 0, 0, 0, 0]);
interior_pages.push(mem::replace(&mut interior_page, Page::new_interior()));
interior_page.fw_position = page::START_OF_INTERIOR_PAGE;
}
create_cell(&mut leaf_page);
interior_page.add_child(leaf_page);
page_index += 1;
} else {
last_leaf = leaf_page;
}
}
interior_page.fw_position = page::START_OF_CONTENT_AREA;
interior_page.put_u16(interior_page.bw_position);
interior_page.put_bytes(&[0, 0, 0, 0, 0]);
interior_page.add_child(last_leaf);
interior_pages.push(interior_page);
interior_pages
}
fn create_cell(page: &mut Page) {
let mut cell: Vec<u8> = vec![0, 0, 0, 0]; // not an expensive call right?
cell.append(&mut varint::write(page.key));
page.put_bytes_bw(&cell);
page.put_u16(page.bw_position);
}
fn write_header(mut rootpage: Page, n_pages: u32) { fn write_header(mut rootpage: Page, n_pages: u32) {
rootpage.put_u8a(&MAGIC_HEADER); rootpage.put_bytes(&MAGIC_HEADER);
rootpage.put_u16(DEFAULT_PAGE_SIZE); rootpage.put_u16(DEFAULT_PAGE_SIZE);
rootpage.put_u8(FILE_FORMAT_WRITE_VERSION); rootpage.put_u8(FILE_FORMAT_WRITE_VERSION);
rootpage.put_u8(FILE_FORMAT_READ_VERSION); rootpage.put_u8(FILE_FORMAT_READ_VERSION);
@ -38,9 +99,9 @@ fn write_header(mut rootpage: Page, n_pages: u32) {
rootpage.put_u32(USER_VERSION); rootpage.put_u32(USER_VERSION);
rootpage.put_u32(VACUUM_MODE_OFF);// True (non-zero) for incremental-vacuum mode. False (zero) otherwise. rootpage.put_u32(VACUUM_MODE_OFF);// True (non-zero) for incremental-vacuum mode. False (zero) otherwise.
rootpage.put_u32(APP_ID);// Application ID rootpage.put_u32(APP_ID);// Application ID
rootpage.put_u8a(&FILLER);// Reserved for expansion. Must be zero. rootpage.put_bytes(&FILLER);// Reserved for expansion. Must be zero.
rootpage.put_u8a(&VERSION_VALID_FOR);// The version-valid-for number rootpage.put_bytes(&VERSION_VALID_FOR);// The version-valid-for number
rootpage.put_u8a(&SQLITE_VERSION);// SQLITE_VERSION_NUMBER rootpage.put_bytes(&SQLITE_VERSION);// SQLITE_VERSION_NUMBER
rootpage.put_u8(TABLE_LEAF_PAGE); // leaf table b-tree page for schema rootpage.put_u8(TABLE_LEAF_PAGE); // leaf table b-tree page for schema
rootpage.put_u16(NO_FREE_BLOCKS); // zero if there are no freeblocks rootpage.put_u16(NO_FREE_BLOCKS); // zero if there are no freeblocks
rootpage.put_u16(1); // the number of cells on this page rootpage.put_u16(1); // the number of cells on this page

View file

@ -2,7 +2,6 @@
mod page; mod page;
mod database; mod database;
mod bytebuffer;
mod values; mod values;
mod varint; mod varint;
mod record; mod record;

View file

@ -1,17 +1,22 @@
use crate::bytebuffer::ByteBuffer; use byteorder::{BigEndian, ByteOrder};
use crate::database; use crate::database;
pub const POSITION_CELL_COUNT: u16 = 3; pub const POSITION_CELL_COUNT: u16 = 3;
const START_OF_CONTENT_AREA: u32 = 5; pub const START_OF_CONTENT_AREA: u16 = 5;
pub const START_OF_INTERIOR_PAGE: u16 = 12;
pub enum PageType { pub enum PageType {
Leaf, Leaf,
Interior, Interior,
Root,
Other
} }
/// Represents an SQLite page /// Represents an SQLite page
pub struct Page { pub struct Page {
data: ByteBuffer, pub data: Vec<u8>,
pub fw_position: u16,
pub bw_position: u16,
pub key: u64, pub key: u64,
children: Vec<Page>, children: Vec<Page>,
number: u32, number: u32,
@ -19,9 +24,11 @@ pub struct Page {
} }
impl Page { impl Page {
pub fn with_capacity(size: u16, page_type: PageType) -> Self { fn with_capacity(size: u16, page_type: PageType) -> Self {
Self { Self {
data: ByteBuffer::new(size as u16), data: vec![0; size as usize],
fw_position: 0,
bw_position: size,
key: 0, key: 0,
children: Vec::new(), children: Vec::new(),
number: 0, number: 0,
@ -29,6 +36,18 @@ impl Page {
} }
} }
fn default(size: usize) -> Self{
Self {
data: vec![0; size],
fw_position: 0,
bw_position: size as u16,
key: 0,
children: Vec::new(),
number: 0,
page_type: PageType::Other,
}
}
pub fn new_leaf() -> Self { pub fn new_leaf() -> Self {
let mut page = Page::with_capacity(database::DEFAULT_PAGE_SIZE, PageType::Leaf); let mut page = Page::with_capacity(database::DEFAULT_PAGE_SIZE, PageType::Leaf);
page.put_u8(database::TABLE_LEAF_PAGE); page.put_u8(database::TABLE_LEAF_PAGE);
@ -45,51 +64,146 @@ impl Page {
self.children.push(child); self.children.push(child);
} }
pub fn set_fw_position(&mut self, new_position: u16) { pub fn put_bytes(&mut self, bytes: &[u8]) {
self.data.fw_position = new_position; for v in bytes {
self.data[self.fw_position as usize] = *v;
self.fw_position += 1;
}
} }
pub fn get_fw_position(&self) -> u16 { pub fn put_bytes_bw(&mut self, bytes: &[u8]) {
self.data.fw_position self.bw_position -= bytes.len() as u16;
} for v in bytes {
pub fn set_bw_position(&mut self, new_position: u16) { self.data[self.bw_position as usize] = *v;
self.data.bw_position = new_position; self.bw_position += 1;
} }
pub fn get_bw_position(&self) -> u16 {
self.data.bw_position
}
pub fn put_u8a(&mut self, value: &[u8]) {
self.data.put_bytes(value);
}
pub fn put_u8a_bw(&mut self, value: &[u8]) {
self.data.put_bytes_bw(value);
}
pub fn put_vec_u8_bw(&mut self, value: Vec<u8>) {
self.data.put_bytes_bw(&value);
} }
pub fn put_u8(&mut self, value: u8) { pub fn put_u8(&mut self, value: u8) {
self.data.put_u8(value); self.put_bytes(&[value]);
} }
pub fn put_u8_bw(&mut self, value: u8) { pub fn put_u8_bw(&mut self, value: u8) {
self.data.put_u8_bw(value); self.put_bytes_bw(&[value]);
} }
pub fn put_u16(&mut self, value: u16) { pub fn put_u16(&mut self, value: u16) {
self.data.put_u16(value); self.put_bytes(&u16_to_bytes(value));
}
pub fn put_u16_bw(&mut self, value: u16) {
self.put_bytes_bw(&u16_to_bytes(value));
} }
pub fn put_u32(&mut self, value: u32) { pub fn put_u32(&mut self, value: u32) {
self.data.put_u32(value); self.put_bytes(&u32_to_bytes(value));
}
pub fn put_u32_bw(&mut self, value: u32) {
self.put_bytes_bw(&u32_to_bytes(value));
} }
// may panic // may panic
pub fn get_page_nr_last_child(self) -> u32 { pub fn get_page_nr_last_child(self) -> u32 {
self.children[self.children.len() - 1].number self.children[self.children.len() - 1].number
} }
}
fn u16_to_bytes(value: u16) -> [u8; 2] {
let mut buf = [0; 2];
BigEndian::write_u16(&mut buf, value);
buf
}
fn u32_to_bytes(value: u32) -> [u8; 4] {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, value);
buf
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_u8() {
let mut b = Page::default(1);
b.put_u8(64_u8);
assert_eq!(b.data[0], 64);
}
#[test]
fn test_u8a() {
let mut b = Page::default(2);
b.put_bytes(&[1, 2]);
assert_eq!(b.data[0], 1);
assert_eq!(b.data[1], 2);
}
#[test]
fn test_u16() {
let mut b = Page::default(2);
b.put_u16(4096);
assert_eq!(b.data[0], 16);
assert_eq!(b.data[1], 0);
}
#[test]
fn test_u32() {
let mut b = Page::default(4);
b.put_u32(0xFFFFFFFF);
assert_eq!(b.data[0], 0xFF);
assert_eq!(b.data[1], 0xFF);
assert_eq!(b.data[2], 0xFF);
assert_eq!(b.data[3], 0xFF);
}
#[test]
fn test_u16_position() {
let mut b = Page::default(4);
b.fw_position = 2;
b.put_u16(4096);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 16);
assert_eq!(b.data[3], 0);
}
#[test]
fn test_u16_backwards() {
let mut b = Page::default(4);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x00);
}
#[test]
fn test_u16_2_directions() {
let mut b = Page::default(5);
b.put_u16(0x1001);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0x10);
assert_eq!(b.data[1], 0x01);
assert_eq!(b.data[2], 0); // decimal suggests this value has not been written
assert_eq!(b.data[3], 0x10);
assert_eq!(b.data[4], 0x00);
}
#[test]
fn test_u32_2_directions() {
let mut b = Page::default(9);
b.put_u32(0x1001);
b.put_u32_bw(0x1002);
assert_eq!(b.data[0], 0x00);
assert_eq!(b.data[1], 0x00);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x01);
assert_eq!(b.data[4], 0);
assert_eq!(b.data[5], 0x00);
assert_eq!(b.data[6], 0x00);
assert_eq!(b.data[7], 0x10);
assert_eq!(b.data[8], 0x02);
}
} }

View file

@ -1,4 +1,3 @@
use crate::bytebuffer::ByteBuffer;
use crate::values::*; use crate::values::*;
use crate::varint; use crate::varint;
@ -20,38 +19,42 @@ impl Record {
self.values.push(value); self.values.push(value);
} }
pub fn to_bytes(&self) -> Vec<u8> { /// length of the byte representation
let record_length = self.get_length(); pub fn bytes_len(&self) -> u16 {
let length_bytes = varint::write(record_length as u64); let record_length: u16 = self.values.iter()
let rowid_bytes = varint::write(self.rowid); .map(|v| v.len())
.sum();
record_length
}
}
let mut buffer = ByteBuffer::new(length_bytes.len() as u16 + rowid_bytes.len() as u16 + record_length); impl Into<Vec<u8>> for Record{
buffer.put_bytes(&length_bytes); fn into(mut self) -> Vec<u8> {
buffer.put_bytes(&rowid_bytes); let record_length = self.bytes_len();
let mut length_bytes = varint::write(record_length as u64);
let mut rowid_bytes = varint::write(self.rowid);
// let mut buffer = Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length);
let mut buffer = Vec::new();
buffer.append(&mut length_bytes);
buffer.append(&mut rowid_bytes);
// 'The initial portion of the payload that does not spill to overflow pages.' // 'The initial portion of the payload that does not spill to overflow pages.'
let length_of_encoded_column_types: usize = self.values.iter() let length_of_encoded_column_types: usize = self.values.iter()
.map(|v| v.datatype.len()) .map(|v| v.datatype.len())
.sum(); .sum();
buffer.put_bytes(&varint::write((length_of_encoded_column_types + 1) as u64)); buffer.append(&mut varint::write((length_of_encoded_column_types + 1) as u64));
//write all types //write all types
for v in self.values.iter() { for v in self.values.iter_mut() {
buffer.put_bytes(&v.datatype) buffer.append(&mut v.datatype)
} }
// write all values // write all values
for v in self.values.iter() { for v in self.values.iter_mut() {
buffer.put_bytes(&v.data) //copies individual bytes into a buffer...should I avoid copying? buffer.append(&mut v.data)
} }
buffer.data buffer
}
pub fn get_length(&self) -> u16 {
let record_length: u16 = self.values.iter()
.map(|v| v.get_length())
.sum();
record_length
} }
} }