added comments here and there

This commit is contained in:
Sander Hautvast 2025-02-20 20:38:27 +01:00
parent 68a2224dc7
commit 8bcedcc032
6 changed files with 76 additions and 34 deletions

View file

@ -15,15 +15,15 @@ pub enum PageType {
#[derive(Debug)]
pub struct Page {
pagetype: PageType,
id: usize,
start: Value,
end: Value,
data: Vec<u8>,
index_pos: u16,
data_pos: u16,
key: usize,
children: Vec<Page>,
n_records: usize,
id: usize, // rowid
start: Value, // first value in page
end: Value, // last value in page
data: Vec<u8>, // page data
index_pos: u16, // current write position for indexes (to the page data)
data_pos: u16, // current write position for data (written backwards from the end of the page)
key: usize, // ?
children: Vec<Page>, // child pages
n_records: usize, // nr of records in the page
}
impl Page {

View file

@ -1,7 +1,7 @@
use crate::table::Table;
use std::fs;
use crate::record::Record;
use std::fs;
const EOL: &str = "\n";
@ -31,7 +31,7 @@ impl Table {
//TODO quoted values
record.add_value(value);
}
table.add_record(record);
table.insert(record);
}
}
table

View file

@ -10,19 +10,24 @@ pub struct Record {
}
impl Record {
/// returns the length of the string representation,
/// for display purposes
pub fn string_len(&self) -> usize {
self.values.iter().map(Value::string_len).sum()
}
/// returns the length of the internal byte representation
pub fn bytes_len(&self) -> u16 {
let record_length: u16 = self.values.iter().map(Value::bytes_len).sum();
record_length + 1
}
/// pushes a value to the record
pub fn add_value(&mut self, value: impl Into<Value>) {
self.values.push(value.into());
}
/// gets the value at the column index of the record
pub fn get(&self, index: usize) -> &Value {
self.values.get(index).unwrap() //TODO
}
@ -31,6 +36,7 @@ impl Record {
impl Add for &Record {
type Output = Record;
/// returns a new records that is the 'join' of the two inputs
fn add(self, rhs: Self) -> Self::Output {
let mut sum = Record::default();
sum.values.append(&mut self.values.clone());
@ -40,17 +46,23 @@ impl Add for &Record {
}
impl From<Record> for Vec<u8> {
/// returns the byte reprsentation of the record
/// which will be stored physically in the page (and some day on disk)
fn from(mut record: Record) -> Vec<u8> {
let record_length = record.bytes_len();
let mut length_bytes = varint::write(u64::from(record_length));
let mut rowid_bytes = varint::write(record.rowid);
let record_length = record.bytes_len(); // len of all the values
let mut length_bytes = varint::write(u64::from(record_length)); // the length of the above in bytes representation
let mut rowid_bytes = varint::write(record.rowid); // the bytes representation of the rowid
let mut buffer =
Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize);
buffer.append(&mut length_bytes);
buffer.append(&mut rowid_bytes);
// 'The initial portion of the payload that does not spill to overflow pages.'
// sqlite docs: 'The initial portion of the payload that does not spill to overflow pages.'
// the length of the byte representation of all value types in the record
// -> after the record header, first all types (text, int, float etc) for the record are written
// after that come the values themselves
// so decoders first read this value to know how many types there are (how many bytes to read to decode the type bytes)
let length_of_encoded_column_types: usize =
record.values.iter().map(|v| v.datatype_bytes.len()).sum();
buffer.append(&mut varint::write(
@ -70,6 +82,10 @@ impl From<Record> for Vec<u8> {
}
}
/// returns the Record from the byte representation
/// tuple (len, byte buffer)
/// len is the length that was read from the bytes before calling this
// needs improving, for clarity get rid of the tuple
impl Into<Record> for (u64, &[u8]) {
fn into(self) -> Record {
let (len, data) = self;
@ -79,12 +95,15 @@ impl Into<Record> for (u64, &[u8]) {
let mut datatypes = vec![];
//read n of fields
while (offset < len) {
while offset < len {
//WRONG, read this len first from the buffer
let (inc, datatype) = varint::read(&data[offset..]);
datatypes.push(datatype);
offset += inc;
}
// decode the values
let mut values: Vec<Value> = vec![];
for dt in datatypes {
match dt {

View file

@ -6,27 +6,30 @@ use std::cell::RefCell;
use std::rc::Rc;
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap}
collections::{BTreeMap, HashMap},
};
// work in progress
#[derive(Debug)]
pub struct View {
records: BTreeMap<Key, Key>,
}
/// table struct
#[derive(Debug)]
pub struct Table {
name: String,
cols_by_name: HashMap<String, usize>,
pub(crate) cols: Vec<String>,
pub(crate) root: Rc<RefCell<Page>>,
pub views: HashMap<String, View>,
page_ids: ThreadSafeIdGenerator,
row_ids: ThreadSafeIdGenerator,
current_page: Rc<RefCell<Page>>,
cols_by_name: HashMap<String, usize>, // map names to the internal column indexes, for fetching record values
pub(crate) cols: Vec<String>, // column names
pub(crate) root: Rc<RefCell<Page>>, // table root page
pub views: HashMap<String, View>, // cache all internally used views // not sure about this design
page_ids: ThreadSafeIdGenerator, // generate page ids
row_ids: ThreadSafeIdGenerator, // generate row ids
current_page: Rc<RefCell<Page>>, // ref to current page for (bulk) loading
}
impl Table {
/// returns a new empty table
pub fn new(name: impl Into<String>) -> Self {
let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0)));
Self {
@ -52,14 +55,20 @@ impl Table {
result
}
pub fn add_record(&mut self, record: Record) {
/// insert a new record
/// use: individual insert query, bulk loading
pub fn insert(&mut self, record: Record) {
self.current_page.borrow_mut().insert(record);
}
/// true if the column name is contained in the table
pub fn has_column(&self, name: impl Into<String>) -> bool {
self.cols_by_name.contains_key(&name.into())
}
/// add column, for alter table
/// also for computing joins
/// allows duplicates by adding an index -> name, name => name, name2
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
let col_index = self.cols.len();
let orig_name: String = name.into();
@ -83,6 +92,8 @@ impl Table {
self.cols.push(name);
}
/// from a comma separated list of strings, return the column indexes in the record
/// TODO invalid names
pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
expression
.split(",")
@ -91,16 +102,19 @@ impl Table {
}
pub fn get_index(&self, col_name: &str) -> usize {
*self.cols_by_name.get(col_name).unwrap()
*self.cols_by_name.get(col_name).unwrap() // TODO handle invalid names better
}
// work in progress
pub fn iter(&self) -> TableIter {
TableIter {
rootPage: Rc::clone(&self.root),
root_page: Rc::clone(&self.root),
index: 0,
}
}
/// iterate records, only returning "subrecords" -> not all columns in the records
/// 'select name from table'
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
OwnedColIter {
cols: columns,
@ -108,6 +122,7 @@ impl Table {
}
}
/// iterate the column names
pub fn iter_colums(&self) -> ColIter {
ColIter {
cols: &self.cols,
@ -115,6 +130,7 @@ impl Table {
}
}
// work in progress
// pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
// for record in self.iter() {
// let r = record.get(colindex);
@ -126,8 +142,10 @@ impl Table {
// }
}
// iterators
pub struct TableIter {
rootPage: Rc<RefCell<Page>>,
root_page: Rc<RefCell<Page>>,
index: usize,
}
@ -135,7 +153,7 @@ impl Iterator for TableIter {
type Item = Record;
fn next(&mut self) -> Option<Self::Item> {
self.rootPage.borrow().get(self.index)
self.root_page.borrow().get(self.index)
}
}
@ -175,6 +193,8 @@ impl<'a> Iterator for OwnedColIter<'a> {
}
}
/// keys for indexes. Allow compound keys
// move to separate file
#[derive(Debug)]
pub struct Key {
values: Vec<Value>,

View file

@ -1,8 +1,8 @@
use std::{cmp::Ordering, fmt::Display};
use crate::varint;
use anyhow::anyhow;
use byteorder::{BigEndian, ByteOrder};
use crate::varint;
#[derive(Debug, Clone, PartialEq, Eq, Ord)]
pub struct Value {
@ -69,12 +69,14 @@ impl Value {
}
}
/// get the length of the encoding of the value
pub fn bytes_len(&self) -> u16 {
(self.datatype_bytes.len() + self.data.len()) as u16
}
// can this be a constant?
pub fn null() -> Self {
Self::new(0 ,vec![])
Self::new(0, vec![])
}
pub fn from_f64(value: f64) -> Self {
@ -92,14 +94,14 @@ impl Value {
(int_datatype(data.len()), data)
}
};
Self::new(datatype,data)
Self::new(datatype, data)
}
pub fn from_text(value: impl Into<String>) -> Self {
let value: String = value.into();
let datatype = (13 + value.len() * 2) as u64;
let data = value.as_bytes().to_vec();
Self::new(datatype,data)
Self::new(datatype, data)
}
pub fn datatype(&self) -> anyhow::Result<Datatype> {

View file

@ -3,6 +3,7 @@ use std::collections::HashMap;
use crate::table::Table;
use crate::value::Value;
// beginnetje
struct Vm {
tables: HashMap<String, Table>,
stack: Vec<Value>,