From 8bcedcc03274f299e751a68aeaaf46e54f23f4bf Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Thu, 20 Feb 2025 20:38:27 +0100 Subject: [PATCH] added comments here and there --- src/page.rs | 18 +++++++++--------- src/read.rs | 4 ++-- src/record.rs | 31 +++++++++++++++++++++++++------ src/table.rs | 46 +++++++++++++++++++++++++++++++++------------- src/value.rs | 10 ++++++---- src/vm/mod.rs | 1 + 6 files changed, 76 insertions(+), 34 deletions(-) diff --git a/src/page.rs b/src/page.rs index 56d21f2..23e0c08 100644 --- a/src/page.rs +++ b/src/page.rs @@ -15,15 +15,15 @@ pub enum PageType { #[derive(Debug)] pub struct Page { pagetype: PageType, - id: usize, - start: Value, - end: Value, - data: Vec, - index_pos: u16, - data_pos: u16, - key: usize, - children: Vec, - n_records: usize, + id: usize, // rowid + start: Value, // first value in page + end: Value, // last value in page + data: Vec, // page data + index_pos: u16, // current write position for indexes (to the page data) + data_pos: u16, // current write position for data (written backwards from the end of the page) + key: usize, // ? + children: Vec, // child pages + n_records: usize, // nr of records in the page } impl Page { diff --git a/src/read.rs b/src/read.rs index 4aa8f35..b02e76e 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,7 +1,7 @@ use crate::table::Table; -use std::fs; use crate::record::Record; +use std::fs; const EOL: &str = "\n"; @@ -31,7 +31,7 @@ impl Table { //TODO quoted values record.add_value(value); } - table.add_record(record); + table.insert(record); } } table diff --git a/src/record.rs b/src/record.rs index 7a8d881..c9a134d 100644 --- a/src/record.rs +++ b/src/record.rs @@ -10,19 +10,24 @@ pub struct Record { } impl Record { + /// returns the length of the string representation, + /// for display purposes pub fn string_len(&self) -> usize { self.values.iter().map(Value::string_len).sum() } + /// returns the length of the internal byte representation pub fn bytes_len(&self) -> u16 { let record_length: u16 = self.values.iter().map(Value::bytes_len).sum(); record_length + 1 } + /// pushes a value to the record pub fn add_value(&mut self, value: impl Into) { self.values.push(value.into()); } + /// gets the value at the column index of the record pub fn get(&self, index: usize) -> &Value { self.values.get(index).unwrap() //TODO } @@ -31,6 +36,7 @@ impl Record { impl Add for &Record { type Output = Record; + /// returns a new records that is the 'join' of the two inputs fn add(self, rhs: Self) -> Self::Output { let mut sum = Record::default(); sum.values.append(&mut self.values.clone()); @@ -40,17 +46,23 @@ impl Add for &Record { } impl From for Vec { + /// returns the byte reprsentation of the record + /// which will be stored physically in the page (and some day on disk) fn from(mut record: Record) -> Vec { - let record_length = record.bytes_len(); - let mut length_bytes = varint::write(u64::from(record_length)); - let mut rowid_bytes = varint::write(record.rowid); + let record_length = record.bytes_len(); // len of all the values + let mut length_bytes = varint::write(u64::from(record_length)); // the length of the above in bytes representation + let mut rowid_bytes = varint::write(record.rowid); // the bytes representation of the rowid let mut buffer = Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize); buffer.append(&mut length_bytes); buffer.append(&mut rowid_bytes); - // 'The initial portion of the payload that does not spill to overflow pages.' + // sqlite docs: 'The initial portion of the payload that does not spill to overflow pages.' + // the length of the byte representation of all value types in the record + // -> after the record header, first all types (text, int, float etc) for the record are written + // after that come the values themselves + // so decoders first read this value to know how many types there are (how many bytes to read to decode the type bytes) let length_of_encoded_column_types: usize = record.values.iter().map(|v| v.datatype_bytes.len()).sum(); buffer.append(&mut varint::write( @@ -70,6 +82,10 @@ impl From for Vec { } } +/// returns the Record from the byte representation +/// tuple (len, byte buffer) +/// len is the length that was read from the bytes before calling this +// needs improving, for clarity get rid of the tuple impl Into for (u64, &[u8]) { fn into(self) -> Record { let (len, data) = self; @@ -77,14 +93,17 @@ impl Into for (u64, &[u8]) { let (mut offset, rowid) = varint::read(data); let mut datatypes = vec![]; - + //read n of fields - while (offset < len) { + + while offset < len { + //WRONG, read this len first from the buffer let (inc, datatype) = varint::read(&data[offset..]); datatypes.push(datatype); offset += inc; } + // decode the values let mut values: Vec = vec![]; for dt in datatypes { match dt { diff --git a/src/table.rs b/src/table.rs index 2b470ff..5bec221 100644 --- a/src/table.rs +++ b/src/table.rs @@ -6,27 +6,30 @@ use std::cell::RefCell; use std::rc::Rc; use std::{ cmp::Ordering, - collections::{BTreeMap, HashMap} + collections::{BTreeMap, HashMap}, }; +// work in progress #[derive(Debug)] pub struct View { records: BTreeMap, } +/// table struct #[derive(Debug)] pub struct Table { name: String, - cols_by_name: HashMap, - pub(crate) cols: Vec, - pub(crate) root: Rc>, - pub views: HashMap, - page_ids: ThreadSafeIdGenerator, - row_ids: ThreadSafeIdGenerator, - current_page: Rc>, + cols_by_name: HashMap, // map names to the internal column indexes, for fetching record values + pub(crate) cols: Vec, // column names + pub(crate) root: Rc>, // table root page + pub views: HashMap, // cache all internally used views // not sure about this design + page_ids: ThreadSafeIdGenerator, // generate page ids + row_ids: ThreadSafeIdGenerator, // generate row ids + current_page: Rc>, // ref to current page for (bulk) loading } impl Table { + /// returns a new empty table pub fn new(name: impl Into) -> Self { let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0))); Self { @@ -52,14 +55,20 @@ impl Table { result } - pub fn add_record(&mut self, record: Record) { + /// insert a new record + /// use: individual insert query, bulk loading + pub fn insert(&mut self, record: Record) { self.current_page.borrow_mut().insert(record); } + /// true if the column name is contained in the table pub fn has_column(&self, name: impl Into) -> bool { self.cols_by_name.contains_key(&name.into()) } + /// add column, for alter table + /// also for computing joins + /// allows duplicates by adding an index -> name, name => name, name2 pub fn add_column(&mut self, name: impl Into, allow_duplicates: bool) { let col_index = self.cols.len(); let orig_name: String = name.into(); @@ -83,6 +92,8 @@ impl Table { self.cols.push(name); } + /// from a comma separated list of strings, return the column indexes in the record + /// TODO invalid names pub fn get_column_indexes(&self, expression: &str) -> Vec { expression .split(",") @@ -91,16 +102,19 @@ impl Table { } pub fn get_index(&self, col_name: &str) -> usize { - *self.cols_by_name.get(col_name).unwrap() + *self.cols_by_name.get(col_name).unwrap() // TODO handle invalid names better } + // work in progress pub fn iter(&self) -> TableIter { TableIter { - rootPage: Rc::clone(&self.root), + root_page: Rc::clone(&self.root), index: 0, } } + /// iterate records, only returning "subrecords" -> not all columns in the records + /// 'select name from table' pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> { OwnedColIter { cols: columns, @@ -108,6 +122,7 @@ impl Table { } } + /// iterate the column names pub fn iter_colums(&self) -> ColIter { ColIter { cols: &self.cols, @@ -115,6 +130,7 @@ impl Table { } } + // work in progress // pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> { // for record in self.iter() { // let r = record.get(colindex); @@ -126,8 +142,10 @@ impl Table { // } } +// iterators + pub struct TableIter { - rootPage: Rc>, + root_page: Rc>, index: usize, } @@ -135,7 +153,7 @@ impl Iterator for TableIter { type Item = Record; fn next(&mut self) -> Option { - self.rootPage.borrow().get(self.index) + self.root_page.borrow().get(self.index) } } @@ -175,6 +193,8 @@ impl<'a> Iterator for OwnedColIter<'a> { } } +/// keys for indexes. Allow compound keys +// move to separate file #[derive(Debug)] pub struct Key { values: Vec, diff --git a/src/value.rs b/src/value.rs index 61f7bfd..24efb14 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,8 +1,8 @@ use std::{cmp::Ordering, fmt::Display}; +use crate::varint; use anyhow::anyhow; use byteorder::{BigEndian, ByteOrder}; -use crate::varint; #[derive(Debug, Clone, PartialEq, Eq, Ord)] pub struct Value { @@ -69,12 +69,14 @@ impl Value { } } + /// get the length of the encoding of the value pub fn bytes_len(&self) -> u16 { (self.datatype_bytes.len() + self.data.len()) as u16 } + // can this be a constant? pub fn null() -> Self { - Self::new(0 ,vec![]) + Self::new(0, vec![]) } pub fn from_f64(value: f64) -> Self { @@ -92,14 +94,14 @@ impl Value { (int_datatype(data.len()), data) } }; - Self::new(datatype,data) + Self::new(datatype, data) } pub fn from_text(value: impl Into) -> Self { let value: String = value.into(); let datatype = (13 + value.len() * 2) as u64; let data = value.as_bytes().to_vec(); - Self::new(datatype,data) + Self::new(datatype, data) } pub fn datatype(&self) -> anyhow::Result { diff --git a/src/vm/mod.rs b/src/vm/mod.rs index e294de2..73eff0f 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use crate::table::Table; use crate::value::Value; +// beginnetje struct Vm { tables: HashMap, stack: Vec,