added comments here and there
This commit is contained in:
parent
68a2224dc7
commit
8bcedcc032
6 changed files with 76 additions and 34 deletions
18
src/page.rs
18
src/page.rs
|
|
@ -15,15 +15,15 @@ pub enum PageType {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Page {
|
pub struct Page {
|
||||||
pagetype: PageType,
|
pagetype: PageType,
|
||||||
id: usize,
|
id: usize, // rowid
|
||||||
start: Value,
|
start: Value, // first value in page
|
||||||
end: Value,
|
end: Value, // last value in page
|
||||||
data: Vec<u8>,
|
data: Vec<u8>, // page data
|
||||||
index_pos: u16,
|
index_pos: u16, // current write position for indexes (to the page data)
|
||||||
data_pos: u16,
|
data_pos: u16, // current write position for data (written backwards from the end of the page)
|
||||||
key: usize,
|
key: usize, // ?
|
||||||
children: Vec<Page>,
|
children: Vec<Page>, // child pages
|
||||||
n_records: usize,
|
n_records: usize, // nr of records in the page
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Page {
|
impl Page {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::table::Table;
|
use crate::table::Table;
|
||||||
|
|
||||||
use std::fs;
|
|
||||||
use crate::record::Record;
|
use crate::record::Record;
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
const EOL: &str = "\n";
|
const EOL: &str = "\n";
|
||||||
|
|
||||||
|
|
@ -31,7 +31,7 @@ impl Table {
|
||||||
//TODO quoted values
|
//TODO quoted values
|
||||||
record.add_value(value);
|
record.add_value(value);
|
||||||
}
|
}
|
||||||
table.add_record(record);
|
table.insert(record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
table
|
table
|
||||||
|
|
|
||||||
|
|
@ -10,19 +10,24 @@ pub struct Record {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Record {
|
impl Record {
|
||||||
|
/// returns the length of the string representation,
|
||||||
|
/// for display purposes
|
||||||
pub fn string_len(&self) -> usize {
|
pub fn string_len(&self) -> usize {
|
||||||
self.values.iter().map(Value::string_len).sum()
|
self.values.iter().map(Value::string_len).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns the length of the internal byte representation
|
||||||
pub fn bytes_len(&self) -> u16 {
|
pub fn bytes_len(&self) -> u16 {
|
||||||
let record_length: u16 = self.values.iter().map(Value::bytes_len).sum();
|
let record_length: u16 = self.values.iter().map(Value::bytes_len).sum();
|
||||||
record_length + 1
|
record_length + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// pushes a value to the record
|
||||||
pub fn add_value(&mut self, value: impl Into<Value>) {
|
pub fn add_value(&mut self, value: impl Into<Value>) {
|
||||||
self.values.push(value.into());
|
self.values.push(value.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// gets the value at the column index of the record
|
||||||
pub fn get(&self, index: usize) -> &Value {
|
pub fn get(&self, index: usize) -> &Value {
|
||||||
self.values.get(index).unwrap() //TODO
|
self.values.get(index).unwrap() //TODO
|
||||||
}
|
}
|
||||||
|
|
@ -31,6 +36,7 @@ impl Record {
|
||||||
impl Add for &Record {
|
impl Add for &Record {
|
||||||
type Output = Record;
|
type Output = Record;
|
||||||
|
|
||||||
|
/// returns a new records that is the 'join' of the two inputs
|
||||||
fn add(self, rhs: Self) -> Self::Output {
|
fn add(self, rhs: Self) -> Self::Output {
|
||||||
let mut sum = Record::default();
|
let mut sum = Record::default();
|
||||||
sum.values.append(&mut self.values.clone());
|
sum.values.append(&mut self.values.clone());
|
||||||
|
|
@ -40,17 +46,23 @@ impl Add for &Record {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Record> for Vec<u8> {
|
impl From<Record> for Vec<u8> {
|
||||||
|
/// returns the byte reprsentation of the record
|
||||||
|
/// which will be stored physically in the page (and some day on disk)
|
||||||
fn from(mut record: Record) -> Vec<u8> {
|
fn from(mut record: Record) -> Vec<u8> {
|
||||||
let record_length = record.bytes_len();
|
let record_length = record.bytes_len(); // len of all the values
|
||||||
let mut length_bytes = varint::write(u64::from(record_length));
|
let mut length_bytes = varint::write(u64::from(record_length)); // the length of the above in bytes representation
|
||||||
let mut rowid_bytes = varint::write(record.rowid);
|
let mut rowid_bytes = varint::write(record.rowid); // the bytes representation of the rowid
|
||||||
|
|
||||||
let mut buffer =
|
let mut buffer =
|
||||||
Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize);
|
Vec::with_capacity(length_bytes.len() + rowid_bytes.len() + record_length as usize);
|
||||||
buffer.append(&mut length_bytes);
|
buffer.append(&mut length_bytes);
|
||||||
buffer.append(&mut rowid_bytes);
|
buffer.append(&mut rowid_bytes);
|
||||||
|
|
||||||
// 'The initial portion of the payload that does not spill to overflow pages.'
|
// sqlite docs: 'The initial portion of the payload that does not spill to overflow pages.'
|
||||||
|
// the length of the byte representation of all value types in the record
|
||||||
|
// -> after the record header, first all types (text, int, float etc) for the record are written
|
||||||
|
// after that come the values themselves
|
||||||
|
// so decoders first read this value to know how many types there are (how many bytes to read to decode the type bytes)
|
||||||
let length_of_encoded_column_types: usize =
|
let length_of_encoded_column_types: usize =
|
||||||
record.values.iter().map(|v| v.datatype_bytes.len()).sum();
|
record.values.iter().map(|v| v.datatype_bytes.len()).sum();
|
||||||
buffer.append(&mut varint::write(
|
buffer.append(&mut varint::write(
|
||||||
|
|
@ -70,6 +82,10 @@ impl From<Record> for Vec<u8> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns the Record from the byte representation
|
||||||
|
/// tuple (len, byte buffer)
|
||||||
|
/// len is the length that was read from the bytes before calling this
|
||||||
|
// needs improving, for clarity get rid of the tuple
|
||||||
impl Into<Record> for (u64, &[u8]) {
|
impl Into<Record> for (u64, &[u8]) {
|
||||||
fn into(self) -> Record {
|
fn into(self) -> Record {
|
||||||
let (len, data) = self;
|
let (len, data) = self;
|
||||||
|
|
@ -79,12 +95,15 @@ impl Into<Record> for (u64, &[u8]) {
|
||||||
let mut datatypes = vec![];
|
let mut datatypes = vec![];
|
||||||
|
|
||||||
//read n of fields
|
//read n of fields
|
||||||
while (offset < len) {
|
|
||||||
|
while offset < len {
|
||||||
|
//WRONG, read this len first from the buffer
|
||||||
let (inc, datatype) = varint::read(&data[offset..]);
|
let (inc, datatype) = varint::read(&data[offset..]);
|
||||||
datatypes.push(datatype);
|
datatypes.push(datatype);
|
||||||
offset += inc;
|
offset += inc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decode the values
|
||||||
let mut values: Vec<Value> = vec![];
|
let mut values: Vec<Value> = vec![];
|
||||||
for dt in datatypes {
|
for dt in datatypes {
|
||||||
match dt {
|
match dt {
|
||||||
|
|
|
||||||
46
src/table.rs
46
src/table.rs
|
|
@ -6,27 +6,30 @@ use std::cell::RefCell;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
collections::{BTreeMap, HashMap}
|
collections::{BTreeMap, HashMap},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// work in progress
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct View {
|
pub struct View {
|
||||||
records: BTreeMap<Key, Key>,
|
records: BTreeMap<Key, Key>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// table struct
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Table {
|
pub struct Table {
|
||||||
name: String,
|
name: String,
|
||||||
cols_by_name: HashMap<String, usize>,
|
cols_by_name: HashMap<String, usize>, // map names to the internal column indexes, for fetching record values
|
||||||
pub(crate) cols: Vec<String>,
|
pub(crate) cols: Vec<String>, // column names
|
||||||
pub(crate) root: Rc<RefCell<Page>>,
|
pub(crate) root: Rc<RefCell<Page>>, // table root page
|
||||||
pub views: HashMap<String, View>,
|
pub views: HashMap<String, View>, // cache all internally used views // not sure about this design
|
||||||
page_ids: ThreadSafeIdGenerator,
|
page_ids: ThreadSafeIdGenerator, // generate page ids
|
||||||
row_ids: ThreadSafeIdGenerator,
|
row_ids: ThreadSafeIdGenerator, // generate row ids
|
||||||
current_page: Rc<RefCell<Page>>,
|
current_page: Rc<RefCell<Page>>, // ref to current page for (bulk) loading
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Table {
|
impl Table {
|
||||||
|
/// returns a new empty table
|
||||||
pub fn new(name: impl Into<String>) -> Self {
|
pub fn new(name: impl Into<String>) -> Self {
|
||||||
let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0)));
|
let root = Rc::new(RefCell::new(Page::new(PageType::Root, 0)));
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -52,14 +55,20 @@ impl Table {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_record(&mut self, record: Record) {
|
/// insert a new record
|
||||||
|
/// use: individual insert query, bulk loading
|
||||||
|
pub fn insert(&mut self, record: Record) {
|
||||||
self.current_page.borrow_mut().insert(record);
|
self.current_page.borrow_mut().insert(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// true if the column name is contained in the table
|
||||||
pub fn has_column(&self, name: impl Into<String>) -> bool {
|
pub fn has_column(&self, name: impl Into<String>) -> bool {
|
||||||
self.cols_by_name.contains_key(&name.into())
|
self.cols_by_name.contains_key(&name.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// add column, for alter table
|
||||||
|
/// also for computing joins
|
||||||
|
/// allows duplicates by adding an index -> name, name => name, name2
|
||||||
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
|
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
|
||||||
let col_index = self.cols.len();
|
let col_index = self.cols.len();
|
||||||
let orig_name: String = name.into();
|
let orig_name: String = name.into();
|
||||||
|
|
@ -83,6 +92,8 @@ impl Table {
|
||||||
self.cols.push(name);
|
self.cols.push(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// from a comma separated list of strings, return the column indexes in the record
|
||||||
|
/// TODO invalid names
|
||||||
pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
|
pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
|
||||||
expression
|
expression
|
||||||
.split(",")
|
.split(",")
|
||||||
|
|
@ -91,16 +102,19 @@ impl Table {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_index(&self, col_name: &str) -> usize {
|
pub fn get_index(&self, col_name: &str) -> usize {
|
||||||
*self.cols_by_name.get(col_name).unwrap()
|
*self.cols_by_name.get(col_name).unwrap() // TODO handle invalid names better
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// work in progress
|
||||||
pub fn iter(&self) -> TableIter {
|
pub fn iter(&self) -> TableIter {
|
||||||
TableIter {
|
TableIter {
|
||||||
rootPage: Rc::clone(&self.root),
|
root_page: Rc::clone(&self.root),
|
||||||
index: 0,
|
index: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// iterate records, only returning "subrecords" -> not all columns in the records
|
||||||
|
/// 'select name from table'
|
||||||
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
|
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
|
||||||
OwnedColIter {
|
OwnedColIter {
|
||||||
cols: columns,
|
cols: columns,
|
||||||
|
|
@ -108,6 +122,7 @@ impl Table {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// iterate the column names
|
||||||
pub fn iter_colums(&self) -> ColIter {
|
pub fn iter_colums(&self) -> ColIter {
|
||||||
ColIter {
|
ColIter {
|
||||||
cols: &self.cols,
|
cols: &self.cols,
|
||||||
|
|
@ -115,6 +130,7 @@ impl Table {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// work in progress
|
||||||
// pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
|
// pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
|
||||||
// for record in self.iter() {
|
// for record in self.iter() {
|
||||||
// let r = record.get(colindex);
|
// let r = record.get(colindex);
|
||||||
|
|
@ -126,8 +142,10 @@ impl Table {
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// iterators
|
||||||
|
|
||||||
pub struct TableIter {
|
pub struct TableIter {
|
||||||
rootPage: Rc<RefCell<Page>>,
|
root_page: Rc<RefCell<Page>>,
|
||||||
index: usize,
|
index: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -135,7 +153,7 @@ impl Iterator for TableIter {
|
||||||
type Item = Record;
|
type Item = Record;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.rootPage.borrow().get(self.index)
|
self.root_page.borrow().get(self.index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -175,6 +193,8 @@ impl<'a> Iterator for OwnedColIter<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// keys for indexes. Allow compound keys
|
||||||
|
// move to separate file
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Key {
|
pub struct Key {
|
||||||
values: Vec<Value>,
|
values: Vec<Value>,
|
||||||
|
|
|
||||||
10
src/value.rs
10
src/value.rs
|
|
@ -1,8 +1,8 @@
|
||||||
use std::{cmp::Ordering, fmt::Display};
|
use std::{cmp::Ordering, fmt::Display};
|
||||||
|
|
||||||
|
use crate::varint;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use byteorder::{BigEndian, ByteOrder};
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
use crate::varint;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Ord)]
|
#[derive(Debug, Clone, PartialEq, Eq, Ord)]
|
||||||
pub struct Value {
|
pub struct Value {
|
||||||
|
|
@ -69,12 +69,14 @@ impl Value {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// get the length of the encoding of the value
|
||||||
pub fn bytes_len(&self) -> u16 {
|
pub fn bytes_len(&self) -> u16 {
|
||||||
(self.datatype_bytes.len() + self.data.len()) as u16
|
(self.datatype_bytes.len() + self.data.len()) as u16
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// can this be a constant?
|
||||||
pub fn null() -> Self {
|
pub fn null() -> Self {
|
||||||
Self::new(0 ,vec![])
|
Self::new(0, vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_f64(value: f64) -> Self {
|
pub fn from_f64(value: f64) -> Self {
|
||||||
|
|
@ -92,14 +94,14 @@ impl Value {
|
||||||
(int_datatype(data.len()), data)
|
(int_datatype(data.len()), data)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Self::new(datatype,data)
|
Self::new(datatype, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_text(value: impl Into<String>) -> Self {
|
pub fn from_text(value: impl Into<String>) -> Self {
|
||||||
let value: String = value.into();
|
let value: String = value.into();
|
||||||
let datatype = (13 + value.len() * 2) as u64;
|
let datatype = (13 + value.len() * 2) as u64;
|
||||||
let data = value.as_bytes().to_vec();
|
let data = value.as_bytes().to_vec();
|
||||||
Self::new(datatype,data)
|
Self::new(datatype, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn datatype(&self) -> anyhow::Result<Datatype> {
|
pub fn datatype(&self) -> anyhow::Result<Datatype> {
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ use std::collections::HashMap;
|
||||||
use crate::table::Table;
|
use crate::table::Table;
|
||||||
use crate::value::Value;
|
use crate::value::Value;
|
||||||
|
|
||||||
|
// beginnetje
|
||||||
struct Vm {
|
struct Vm {
|
||||||
tables: HashMap<String, Table>,
|
tables: HashMap<String, Table>,
|
||||||
stack: Vec<Value>,
|
stack: Vec<Value>,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue