added comments here and there

This commit is contained in:
Sander Hautvast 2025-02-20 20:38:27 +01:00
parent 68a2224dc7
commit 8bcedcc032
6 changed files with 76 additions and 34 deletions

View file

@ -15,15 +15,15 @@ pub enum PageType {
#[derive(Debug)] #[derive(Debug)]
pub struct Page { pub struct Page {
pagetype: PageType, pagetype: PageType,
id: usize, id: usize, // rowid
start: Value, start: Value, // first value in page
end: Value, end: Value, // last value in page
data: Vec<u8>, data: Vec<u8>, // page data
index_pos: u16, index_pos: u16, // current write position for indexes (to the page data)
data_pos: u16, data_pos: u16, // current write position for data (written backwards from the end of the page)
key: usize, key: usize, // ?
children: Vec<Page>, children: Vec<Page>, // child pages
n_records: usize, n_records: usize, // nr of records in the page
} }
impl Page { impl Page {

View file

@ -1,7 +1,7 @@
use crate::table::Table; use crate::table::Table;
use std::fs;
use crate::record::Record; use crate::record::Record;
use std::fs;
const EOL: &str = "\n"; const EOL: &str = "\n";
@ -31,7 +31,7 @@ impl Table {
//TODO quoted values //TODO quoted values
record.add_value(value); record.add_value(value);
} }
table.add_record(record); table.insert(record);
} }
} }
table table

View file

@ -10,19 +10,24 @@ pub struct Record {
} }
impl Record { impl Record {
/// returns the length of the string representation,
/// for display purposes
pub fn string_len(&self) -> usize { pub fn string_len(&self) -> usize {
self.values.iter().map(Value::string_len).sum() self.values.iter().map(Value::string_len).sum()
} }
/// returns the length of the internal byte representation
pub fn bytes_len(&self) -> u16 { pub fn bytes_len(&self) -> u16 {
let record_length: u16 = self.values.iter().map(Value::bytes_len).sum(); let record_length: u16 = self.values.iter().map(Value::bytes_len).sum();
record_length + 1 record_length + 1
} }
/// pushes a value to the record
pub fn add_value(&mut self, value: impl Into<Value>) { pub fn add_value(&mut self, value: impl Into<Value>) {
self.values.push(value.into()); self.values.push(value.into());
} }
/// gets the value at the column index of the record
pub fn get(&self, index: usize) -> &Value { pub fn get(&self, index: usize) -> &Value {
self.values.get(index).unwrap() //TODO self.values.get(index).unwrap() //TODO
} }
@ -31,6 +36,7 @@ impl Record {
impl Add for &Record { impl Add for &Record {
type Output = Record; type Output = Record;
/// returns a new records that is the 'join' of the two inputs
fn add(self, rhs: Self) -> Self::Output { fn add(self, rhs: Self) -> Self::Output {
let mut sum = Record::default(); let mut sum = Record::default();
sum.values.append(&mut self.values.clone()); sum.values.append(&mut self.values.clone());
@ -40,17 +46,23 @@ impl Add for &Record {
} }
impl From<Record> for Vec<u8> { impl From<Record> for Vec<u8> {
/// returns the byte reprsentation of the record
/// which will be stored physically in the page (and some day on disk)
fn from(mut record: Record) -> Vec<u8> { fn from(mut record: Record) -> Vec<u8> {
let record_length = record.bytes_len(); let record_length = record.bytes_len(); // len of all the values
let mut length_bytes = varint::write(u64::from(record_length)); let mut length_bytes = varint::write(u64::from(record_length)); // the length of the above in bytes representation
let mut rowid_bytes = varint::write(record.rowid); let mut rowid_bytes = varint::write(record.rowid); // the bytes representation of the rowid
let mut buffer = let mut buffer =
Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize); Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize);
buffer.append(&mut length_bytes); buffer.append(&mut length_bytes);
buffer.append(&mut rowid_bytes); buffer.append(&mut rowid_bytes);
// 'The initial portion of the payload that does not spill to overflow pages.' // sqlite docs: 'The initial portion of the payload that does not spill to overflow pages.'
// the length of the byte representation of all value types in the record
// -> after the record header, first all types (text, int, float etc) for the record are written
// after that come the values themselves
// so decoders first read this value to know how many types there are (how many bytes to read to decode the type bytes)
let length_of_encoded_column_types: usize = let length_of_encoded_column_types: usize =
record.values.iter().map(|v| v.datatype_bytes.len()).sum(); record.values.iter().map(|v| v.datatype_bytes.len()).sum();
buffer.append(&mut varint::write( buffer.append(&mut varint::write(
@ -70,6 +82,10 @@ impl From<Record> for Vec<u8> {
} }
} }
/// returns the Record from the byte representation
/// tuple (len, byte buffer)
/// len is the length that was read from the bytes before calling this
// needs improving, for clarity get rid of the tuple
impl Into<Record> for (u64, &[u8]) { impl Into<Record> for (u64, &[u8]) {
fn into(self) -> Record { fn into(self) -> Record {
let (len, data) = self; let (len, data) = self;
@ -77,14 +93,17 @@ impl Into<Record> for (u64, &[u8]) {
let (mut offset, rowid) = varint::read(data); let (mut offset, rowid) = varint::read(data);
let mut datatypes = vec![]; let mut datatypes = vec![];
//read n of fields //read n of fields
while (offset < len) {
while offset < len {
//WRONG, read this len first from the buffer
let (inc, datatype) = varint::read(&data[offset..]); let (inc, datatype) = varint::read(&data[offset..]);
datatypes.push(datatype); datatypes.push(datatype);
offset += inc; offset += inc;
} }
// decode the values
let mut values: Vec<Value> = vec![]; let mut values: Vec<Value> = vec![];
for dt in datatypes { for dt in datatypes {
match dt { match dt {

View file

@ -6,27 +6,30 @@ use std::cell::RefCell;
use std::rc::Rc; use std::rc::Rc;
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
collections::{BTreeMap, HashMap} collections::{BTreeMap, HashMap},
}; };
// work in progress
#[derive(Debug)] #[derive(Debug)]
pub struct View { pub struct View {
records: BTreeMap<Key, Key>, records: BTreeMap<Key, Key>,
} }
/// table struct
#[derive(Debug)] #[derive(Debug)]
pub struct Table { pub struct Table {
name: String, name: String,
cols_by_name: HashMap<String, usize>, cols_by_name: HashMap<String, usize>, // map names to the internal column indexes, for fetching record values
pub(crate) cols: Vec<String>, pub(crate) cols: Vec<String>, // column names
pub(crate) root: Rc<RefCell<Page>>, pub(crate) root: Rc<RefCell<Page>>, // table root page
pub views: HashMap<String, View>, pub views: HashMap<String, View>, // cache all internally used views // not sure about this design
page_ids: ThreadSafeIdGenerator, page_ids: ThreadSafeIdGenerator, // generate page ids
row_ids: ThreadSafeIdGenerator, row_ids: ThreadSafeIdGenerator, // generate row ids
current_page: Rc<RefCell<Page>>, current_page: Rc<RefCell<Page>>, // ref to current page for (bulk) loading
} }
impl Table { impl Table {
/// returns a new empty table
pub fn new(name: impl Into<String>) -> Self { pub fn new(name: impl Into<String>) -> Self {
let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0))); let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0)));
Self { Self {
@ -52,14 +55,20 @@ impl Table {
result result
} }
pub fn add_record(&mut self, record: Record) { /// insert a new record
/// use: individual insert query, bulk loading
pub fn insert(&mut self, record: Record) {
self.current_page.borrow_mut().insert(record); self.current_page.borrow_mut().insert(record);
} }
/// true if the column name is contained in the table
pub fn has_column(&self, name: impl Into<String>) -> bool { pub fn has_column(&self, name: impl Into<String>) -> bool {
self.cols_by_name.contains_key(&name.into()) self.cols_by_name.contains_key(&name.into())
} }
/// add column, for alter table
/// also for computing joins
/// allows duplicates by adding an index -> name, name => name, name2
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) { pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
let col_index = self.cols.len(); let col_index = self.cols.len();
let orig_name: String = name.into(); let orig_name: String = name.into();
@ -83,6 +92,8 @@ impl Table {
self.cols.push(name); self.cols.push(name);
} }
/// from a comma separated list of strings, return the column indexes in the record
/// TODO invalid names
pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> { pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
expression expression
.split(",") .split(",")
@ -91,16 +102,19 @@ impl Table {
} }
pub fn get_index(&self, col_name: &str) -> usize { pub fn get_index(&self, col_name: &str) -> usize {
*self.cols_by_name.get(col_name).unwrap() *self.cols_by_name.get(col_name).unwrap() // TODO handle invalid names better
} }
// work in progress
pub fn iter(&self) -> TableIter { pub fn iter(&self) -> TableIter {
TableIter { TableIter {
rootPage: Rc::clone(&self.root), root_page: Rc::clone(&self.root),
index: 0, index: 0,
} }
} }
/// iterate records, only returning "subrecords" -> not all columns in the records
/// 'select name from table'
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> { pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
OwnedColIter { OwnedColIter {
cols: columns, cols: columns,
@ -108,6 +122,7 @@ impl Table {
} }
} }
/// iterate the column names
pub fn iter_colums(&self) -> ColIter { pub fn iter_colums(&self) -> ColIter {
ColIter { ColIter {
cols: &self.cols, cols: &self.cols,
@ -115,6 +130,7 @@ impl Table {
} }
} }
// work in progress
// pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> { // pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
// for record in self.iter() { // for record in self.iter() {
// let r = record.get(colindex); // let r = record.get(colindex);
@ -126,8 +142,10 @@ impl Table {
// } // }
} }
// iterators
pub struct TableIter { pub struct TableIter {
rootPage: Rc<RefCell<Page>>, root_page: Rc<RefCell<Page>>,
index: usize, index: usize,
} }
@ -135,7 +153,7 @@ impl Iterator for TableIter {
type Item = Record; type Item = Record;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.rootPage.borrow().get(self.index) self.root_page.borrow().get(self.index)
} }
} }
@ -175,6 +193,8 @@ impl<'a> Iterator for OwnedColIter<'a> {
} }
} }
/// keys for indexes. Allow compound keys
// move to separate file
#[derive(Debug)] #[derive(Debug)]
pub struct Key { pub struct Key {
values: Vec<Value>, values: Vec<Value>,

View file

@ -1,8 +1,8 @@
use std::{cmp::Ordering, fmt::Display}; use std::{cmp::Ordering, fmt::Display};
use crate::varint;
use anyhow::anyhow; use anyhow::anyhow;
use byteorder::{BigEndian, ByteOrder}; use byteorder::{BigEndian, ByteOrder};
use crate::varint;
#[derive(Debug, Clone, PartialEq, Eq, Ord)] #[derive(Debug, Clone, PartialEq, Eq, Ord)]
pub struct Value { pub struct Value {
@ -69,12 +69,14 @@ impl Value {
} }
} }
/// get the length of the encoding of the value
pub fn bytes_len(&self) -> u16 { pub fn bytes_len(&self) -> u16 {
(self.datatype_bytes.len() + self.data.len()) as u16 (self.datatype_bytes.len() + self.data.len()) as u16
} }
// can this be a constant?
pub fn null() -> Self { pub fn null() -> Self {
Self::new(0 ,vec![]) Self::new(0, vec![])
} }
pub fn from_f64(value: f64) -> Self { pub fn from_f64(value: f64) -> Self {
@ -92,14 +94,14 @@ impl Value {
(int_datatype(data.len()), data) (int_datatype(data.len()), data)
} }
}; };
Self::new(datatype,data) Self::new(datatype, data)
} }
pub fn from_text(value: impl Into<String>) -> Self { pub fn from_text(value: impl Into<String>) -> Self {
let value: String = value.into(); let value: String = value.into();
let datatype = (13 + value.len() * 2) as u64; let datatype = (13 + value.len() * 2) as u64;
let data = value.as_bytes().to_vec(); let data = value.as_bytes().to_vec();
Self::new(datatype,data) Self::new(datatype, data)
} }
pub fn datatype(&self) -> anyhow::Result<Datatype> { pub fn datatype(&self) -> anyhow::Result<Datatype> {

View file

@ -3,6 +3,7 @@ use std::collections::HashMap;
use crate::table::Table; use crate::table::Table;
use crate::value::Value; use crate::value::Value;
// beginnetje
struct Vm { struct Vm {
tables: HashMap<String, Table>, tables: HashMap<String, Table>,
stack: Vec<Value>, stack: Vec<Value>,