From 3281edc3509dd55e1b4d08d3b8d1d811cd3e93ea Mon Sep 17 00:00:00 2001
From: Sander Hautvast <sander.hautvast@ing.com>
Date: Tue, 18 Feb 2025 22:09:50 +0100
Subject: [PATCH] binary storage, start of vm

---
 Cargo.lock               |   7 +
 Cargo.toml               |   1 +
 examples/join/main.rs    |   6 +-
 examples/orderby/main.rs |   4 +-
 src/join.rs              |   2 +-
 src/lib.rs               | 263 +----------------------------
 src/main.rs              |   7 +-
 src/order.rs             |   6 +-
 src/page.rs              |  71 ++++++++
 src/print.rs             |   6 +-
 src/read.rs              |  57 ++++++-
 src/sql/scanner.rs       |   4 +-
 src/table.rs             | 273 ++++++++++++++++++++++++++++++
 src/value.rs             | 348 +++++++++++++++++++++++++++++++++++----
 src/varint.rs            | 132 +++++++++++++++
 src/vm/mod.rs            |  40 +++++
 16 files changed, 921 insertions(+), 306 deletions(-)
 create mode 100644 src/page.rs
 create mode 100644 src/table.rs
 create mode 100644 src/varint.rs
 create mode 100644 src/vm/mod.rs
diff --git a/Cargo.lock b/Cargo.lock
index 00761aa..3d8f336 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8,9 +8,16 @@ version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "csv"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "byteorder",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 9c93cee..3928faa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,4 +4,5 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
+byteorder = "1.5"
 anyhow = "1.0"
diff --git a/examples/join/main.rs b/examples/join/main.rs
index 0eb9532..0a77ba7 100644
--- a/examples/join/main.rs
+++ b/examples/join/main.rs
@@ -1,8 +1,8 @@
-use csv::Table;
+use csv::table::Table;
 
 fn main() {
-    let left = Table::from_csv(include_str!("data/left.csv"), "\t");
-    let right = Table::from_csv(include_str!("data/right.csv"), "\t");
+    let left = Table::from_csv(include_str!("data/left.csv"), Some("\t"));
+    let right = Table::from_csv(include_str!("data/right.csv"), Some("\t"));
     println!("left:");
     left.select("*");
     println!("\nright:");
diff --git a/examples/orderby/main.rs b/examples/orderby/main.rs
index 4aa8c7d..24ab143 100644
--- a/examples/orderby/main.rs
+++ b/examples/orderby/main.rs
@@ -1,7 +1,7 @@
-use csv::Table;
+use csv::table::Table;
 
 fn main() {
-    let table = Table::from_csv(include_str!("data/table.csv"), "\t");
+    let table = Table::from_csv(include_str!("data/table.csv"), Some("\t"));
     println!("not ordered:");
     table.select("*");
 
diff --git a/src/join.rs b/src/join.rs
index e55cde6..c85be63 100644
--- a/src/join.rs
+++ b/src/join.rs
@@ -1,4 +1,4 @@
-use crate::Table;
+use crate::table::Table;
 
 impl Table {
     pub fn left_join(&self, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table {
diff --git a/src/lib.rs b/src/lib.rs
index a0faeb5..595d069 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,266 +1,11 @@
 pub mod groupby;
 pub mod join;
 pub mod order;
+mod page;
 pub mod print;
 pub mod read;
 pub mod sql;
+pub mod table;
 pub mod value;
-
-use std::{
-    cmp::Ordering,
-    collections::{BTreeMap, HashMap},
-    ops::Add,
-};
-
-use value::Value;
-
-pub struct Table {
-    name: String,
-    cols_by_name: HashMap<String, usize>,
-    cols: Vec<String>,
-    records: BTreeMap<Key, Record>,
-}
-
-impl Table {
-    pub fn new(name: impl Into<String>) -> Self {
-        Self {
-            name: name.into(),
-            cols_by_name: HashMap::new(),
-            cols: vec![],
-            records: BTreeMap::new(),
-        }
-    }
-
-    /// Creates a new table with the same name and columns as self,
-    /// but without data
-    // Note to self: be careful, might be dangerous to use once tables can be altered.
-    // That is not yet implemented. May need full copies
-    pub fn empty_copy(&self) -> Self {
-        let mut result = Table::new(self.name.clone());
-        result.cols_by_name = self.cols_by_name.clone();
-        result.cols = self.cols.clone();
-        result
-    }
-
-    pub fn add_record(&mut self, record: Record) {
-        let index = self.records.len();
-        self.records.insert(Key::integer(index), record);
-    }
-
-    pub fn has_column(&self, name: impl Into<String>) -> bool {
-        self.cols_by_name.contains_key(&name.into())
-    }
-
-    pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
-        let col_index = self.cols.len();
-        let orig_name: String = name.into();
-
-        let name = if allow_duplicates {
-            // append an index when there are duplicate column names
-            let mut col_name = orig_name.to_string();
-            let mut index = 2;
-
-            while self.has_column(&col_name) {
-                col_name = orig_name.to_string();
-                col_name.push_str(format!("{}", index).as_str());
-                index += 1;
-            }
-            col_name
-        } else {
-            orig_name
-        };
-
-        self.cols_by_name.insert(name.clone(), col_index);
-        self.cols.push(name);
-    }
-
-    fn get_indexes(&self, expression: &str) -> Vec<usize> {
-        expression
-            .split(",")
-            .map(|c| self.get_index(c.trim()))
-            .collect::<Vec<usize>>()
-    }
-
-    fn get_index(&self, col_name: &str) -> usize {
-        *self.cols_by_name.get(col_name).unwrap()
-    }
-
-    pub fn iter(&self) -> TableIter {
-        self.iter_records()
-    }
-
-    pub fn iter_records(&self) -> TableIter {
-        TableIter {
-            table_iter: self.records.iter(),
-        }
-    }
-
-    pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
-        OwnedColIter {
-            cols: columns,
-            index: 0,
-        }
-    }
-
-    pub fn iter_colums(&self) -> ColIter {
-        ColIter {
-            cols: &self.cols,
-            index: 0,
-        }
-    }
-
-    pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
-        for record in self.iter_records() {
-            let r = record.get(colindex);
-            if r == value {
-                return Some(record);
-            }
-        }
-        None
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct Record {
-    values: Vec<Value>,
-}
-
-impl Record {
-    pub fn len(&self) -> usize {
-        self.values.iter().map(Value::len).sum()
-    }
-
-    pub fn add_value(&mut self, value: impl Into<Value>) {
-        self.values.push(value.into());
-    }
-
-    pub fn get(&self, index: usize) -> &Value {
-        self.values.get(index).unwrap_or(&Value::NULL)
-    }
-}
-
-impl Add for &Record {
-    type Output = Record;
-
-    fn add(self, rhs: Self) -> Self::Output {
-        let mut sum = Record::default();
-        sum.values.append(&mut self.values.clone());
-        sum.values.append(&mut rhs.values.clone()); // use refs?
-        sum
-    }
-}
-
-impl Default for Record {
-    fn default() -> Self {
-        Self { values: vec![] }
-    }
-}
-
-pub struct TableIter<'a> {
-    table_iter: std::collections::btree_map::Iter<'a, Key, Record>,
-}
-
-impl<'a> Iterator for TableIter<'a> {
-    type Item = &'a Record;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.table_iter.next().map(|e| e.1)
-    }
-}
-
-pub struct ColIter<'a> {
-    cols: &'a Vec<String>,
-    index: usize,
-}
-
-pub struct OwnedColIter<'a> {
-    cols: &'a Vec<&'a str>,
-    index: usize,
-}
-
-impl<'a> Iterator for ColIter<'a> {
-    type Item = &'a String;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(v) = self.cols.get(self.index) {
-            self.index += 1;
-            Some(v)
-        } else {
-            None
-        }
-    }
-}
-
-impl<'a> Iterator for OwnedColIter<'a> {
-    type Item = &'a str;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(v) = self.cols.get(self.index) {
-            self.index += 1;
-            Some(v)
-        } else {
-            None
-        }
-    }
-}
-
-struct Key {
-    values: Vec<Value>,
-}
-
-impl Key {
-    fn integer(integer: usize) -> Self {
-        Self {
-            values: vec![Value::Integer(integer as i64)],
-        }
-    }
-
-    fn compound(keys: Vec<Value>) -> Self {
-        Self { values: keys }
-    }
-}
-impl Ord for Key {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.partial_cmp(other).unwrap()
-    }
-}
-impl Eq for Key {}
-
-impl PartialEq for Key {
-    fn eq(&self, other: &Self) -> bool {
-        if self.values.len() != other.values.len() {
-            false
-        } else {
-            for (l, r) in self.values.iter().zip(&other.values) {
-                if l != r {
-                    return false;
-                }
-            }
-            true
-        }
-    }
-}
-
-impl PartialOrd for Key {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        let len = self.values.len().min(other.values.len());
-        for i in 0..len {
-            let ord = self
-                .values
-                .get(i)
-                .unwrap()
-                .partial_cmp(other.values.get(i).unwrap())
-                .unwrap();
-            match ord {
-                Ordering::Less => {
-                    return Some(Ordering::Less);
-                }
-                Ordering::Greater => {
-                    return Some(Ordering::Greater);
-                }
-                _ => {}
-            }
-        }
-        Some(Ordering::Equal)
-    }
-}
+mod varint;
+pub mod vm;
diff --git a/src/main.rs b/src/main.rs
index bc56ed2..94bb43f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,7 @@
+use csv::table::Table;
 
-fn main (){
-
+fn main() {
+    let csv = include_str!("data/portfolios.csv");
+    let table = Table::from_csv(csv, None);
+    table.order_by("name").select("*");
 }
diff --git a/src/order.rs b/src/order.rs
index 8b7d3c0..5fc07ac 100644
--- a/src/order.rs
+++ b/src/order.rs
@@ -1,10 +1,12 @@
 use std::collections::BTreeMap;
 
-use crate::{Key, Table};
+use crate::table::{Key, Table};
 
 impl Table {
     pub fn order_by(&self, expression: &str) -> Self {
-        let indexes = self.get_indexes(expression);
+        let indexes = self.get_column_indexes(expression);
+        if self.views.contains_key(expression) {}
+
         let mut sorted_records = BTreeMap::new();
         for record in self.iter() {
             let key = indexes.iter().map(|i| record.get(*i).clone()).collect();
diff --git a/src/page.rs b/src/page.rs
new file mode 100644
index 0000000..80b2918
--- /dev/null
+++ b/src/page.rs
@@ -0,0 +1,71 @@
+use std::ops::Add;
+
+use crate::value::{Value, NULL};
+
+const PAGE_SIZE: usize = 4096;
+
+pub enum PageType {
+    Root,
+    Interior,
+    Leaf,
+}
+
+pub struct Page {
+    pagetype: PageType,
+    data: Vec<u8>,
+    index_pos: u16,
+    data_pos: u16,
+    key: usize,
+    children: Vec<Page>,
+}
+
+impl Page {
+    pub fn new(pagetype: PageType) -> Self {
+        Self {
+            pagetype,
+            data: vec![0; PAGE_SIZE],
+            index_pos: 0,
+            data_pos: (PAGE_SIZE - 1) as u16,
+            key: 0,
+            children: vec![],
+        }
+    }
+
+    pub fn add_record(&mut self, record: Record) {}
+}
+
+#[derive(Debug, Clone)]
+pub struct Record {
+    values: Vec<Value>,
+}
+
+impl Record {
+    pub fn string_len(&self) -> usize {
+        self.values.iter().map(Value::string_len).sum()
+    }
+
+    pub fn add_value(&mut self, value: impl Into<Value>) {
+        self.values.push(value.into());
+    }
+
+    pub fn get(&self, index: usize) -> Value {
+        self.values.get(index).map(|v| v.clone()).unwrap_or(NULL)
+    }
+}
+
+impl Add for &Record {
+    type Output = Record;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        let mut sum = Record::default();
+        sum.values.append(&mut self.values.clone());
+        sum.values.append(&mut rhs.values.clone()); // use refs?
+        sum
+    }
+}
+
+impl Default for Record {
+    fn default() -> Self {
+        Self { values: vec![] }
+    }
+}
diff --git a/src/print.rs b/src/print.rs
index d542b3d..9b7c9de 100644
--- a/src/print.rs
+++ b/src/print.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;
 
-use crate::Table;
+use crate::table::Table;
 
 impl Table {
     /// prints the table contents in nice columns on the command line
@@ -70,7 +70,7 @@ impl Table {
             for col in self.iter_colums() {
                 let e = widths.get_mut(&col).unwrap();
                 let index = self.get_index(col);
-                *e = (*e).max(record.get(index).len());
+                *e = (*e).max(record.get(index).string_len());
             }
         }
         widths
@@ -96,7 +96,7 @@ impl Table {
             for col in self.select_columns(columns) {
                 let e = widths.get_mut(&col).unwrap();
                 let index = self.get_index(&col);
-                *e = (*e).max(record.get(index).len());
+                *e = (*e).max(record.get(index).string_len());
             }
         }
         widths
diff --git a/src/read.rs b/src/read.rs
index d084ecd..0614df9 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -1,16 +1,32 @@
-use crate::{Record, Table};
+use crate::table::{Record, Table};
+use std::fs;
+
+const EOL: &str = "\n";
 
 impl Table {
-    pub fn from_csv(csv: &str, separator: &str) -> Self {
-        let mut table = Table::new("test");
-        for (index, row) in csv.split("\n").enumerate() {
+    pub fn from_csv_file(name: &str, separator: Option<&str>) -> anyhow::Result<Table> {
+        let csv = fs::read_to_string(name)?;
+
+        Ok(Table::from_csv(csv, separator))
+    }
+
+    pub fn from_csv(csv: impl Into<String>, separator: Option<&str>) -> Self {
+        let csv = csv.into();
+        let separator = separator.unwrap_or(
+            guess_separator(&csv)
+                .expect("You did not give me a separator and I could not guess it from the data"),
+        );
+        let mut table = Table::new("");
+        for (index, row) in csv.split(EOL).enumerate() {
             if index == 0 {
                 for col in row.split(separator) {
                     table.add_column(col, true);
                 }
             } else if row.len() > 0 {
+                // skip empty lines
                 let mut record = Record::default();
                 for value in row.split(separator) {
+                    //TODO quoted values
                     record.add_value(value);
                 }
                 table.add_record(record);
@@ -19,3 +35,36 @@ impl Table {
         table
     }
 }
+
+fn guess_separator(csv: &String) -> Option<&'static str> {
+    let mut tabs = 0;
+    let mut semis = 0;
+    let mut commas = 0;
+    let mut pipes = 0;
+    for c in csv.chars() {
+        match c {
+            '\t' => tabs += 1,
+            ';' => semis += 1,
+            ',' => commas += 1,
+            '|' => pipes += 1,
+            _ => {}
+        }
+    }
+    let values = vec![(tabs, 0), (semis, 1), (commas, 2), (pipes, 3)];
+    values.iter().max().map(|m| match m.1 {
+        0 => "\t",
+        1 => ";",
+        2 => ",",
+        3 => "|",
+        _ => "\0", //?
+    })
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test() {
+        guess_separator(&"a,b,c|d".to_string());
+    }
+}
diff --git a/src/sql/scanner.rs b/src/sql/scanner.rs
index 59fbf24..6f7b6ee 100644
--- a/src/sql/scanner.rs
+++ b/src/sql/scanner.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 
 use anyhow::anyhow;
 
-use crate::value::Value;
+use crate::value::{Value, NULL};
 
 use super::tokens::{Token, TokenType};
 
@@ -154,7 +154,7 @@ impl Scanner {
 
     fn add_token(&mut self, tokentype: TokenType) {
         let text = self.source[self.start..self.current].to_string();
-        self.tokens.push(Token::new(tokentype, text, Value::NULL));
+        self.tokens.push(Token::new(tokentype, text, NULL));
     }
 
     fn add_literal(&mut self, tokentype: TokenType, literal: Value) {
diff --git a/src/table.rs b/src/table.rs
new file mode 100644
index 0000000..0b8293e
--- /dev/null
+++ b/src/table.rs
@@ -0,0 +1,273 @@
+use std::{
+    cmp::Ordering,
+    collections::{BTreeMap, HashMap},
+    iter::Map,
+    ops::Add,
+};
+
+use crate::value::Value;
+
+pub struct View {
+    records: BTreeMap<Key, Key>,
+}
+
+pub struct Table {
+    name: String,
+    cols_by_name: HashMap<String, usize>,
+    pub(crate) cols: Vec<String>,
+    pub(crate) records: BTreeMap<Key, Record>,
+    pub views: HashMap<String, View>,
+}
+
+impl Table {
+    pub fn new(name: impl Into<String>) -> Self {
+        Self {
+            name: name.into(),
+            cols_by_name: HashMap::new(),
+            cols: vec![],
+            records: BTreeMap::new(),
+            views: HashMap::new(),
+        }
+    }
+
+    /// Creates a new table with the same name and columns as self,
+    /// but without data
+    // Note to self: be careful, might be dangerous to use once tables can be altered.
+    // That is not yet implemented. May need full copies
+    pub fn empty_copy(&self) -> Self {
+        let mut result = Table::new(self.name.clone());
+        result.cols_by_name = self.cols_by_name.clone();
+        result.cols = self.cols.clone();
+        result
+    }
+
+    pub fn add_record(&mut self, record: Record) {
+        let index = self.records.len();
+        self.records.insert(Key::integer(index), record);
+    }
+
+    pub fn has_column(&self, name: impl Into<String>) -> bool {
+        self.cols_by_name.contains_key(&name.into())
+    }
+
+    pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
+        let col_index = self.cols.len();
+        let orig_name: String = name.into();
+
+        let name = if allow_duplicates {
+            // append an index when there are duplicate column names
+            let mut col_name = orig_name.to_string();
+            let mut index = 2;
+
+            while self.has_column(&col_name) {
+                col_name = orig_name.to_string();
+                col_name.push_str(format!("{}", index).as_str());
+                index += 1;
+            }
+            col_name
+        } else {
+            orig_name
+        };
+
+        self.cols_by_name.insert(name.clone(), col_index);
+        self.cols.push(name);
+    }
+
+    pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
+        expression
+            .split(",")
+            .map(|c| self.get_index(c.trim()))
+            .collect::<Vec<usize>>()
+    }
+
+    pub fn get_index(&self, col_name: &str) -> usize {
+        *self.cols_by_name.get(col_name).unwrap()
+    }
+
+    pub fn iter(&self) -> TableIter {
+        self.iter_records()
+    }
+
+    pub fn iter_records(&self) -> TableIter {
+        TableIter {
+            table_iter: self.records.iter(),
+        }
+    }
+
+    pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
+        OwnedColIter {
+            cols: columns,
+            index: 0,
+        }
+    }
+
+    pub fn iter_colums(&self) -> ColIter {
+        ColIter {
+            cols: &self.cols,
+            index: 0,
+        }
+    }
+
+    pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
+        for record in self.iter_records() {
+            let r = record.get(colindex);
+            if r == value {
+                return Some(record);
+            }
+        }
+        None
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Record {
+    values: Vec<Value>,
+}
+
+impl Record {
+    pub fn string_len(&self) -> usize {
+        self.values.iter().map(Value::string_len).sum()
+    }
+
+    pub fn add_value(&mut self, value: impl Into<Value>) {
+        self.values.push(value.into());
+    }
+
+    pub fn get(&self, index: usize) -> &Value {
+        self.values.get(index).unwrap() //TODO
+    }
+}
+
+impl Add for &Record {
+    type Output = Record;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        let mut sum = Record::default();
+        sum.values.append(&mut self.values.clone());
+        sum.values.append(&mut rhs.values.clone()); // use refs?
+        sum
+    }
+}
+
+impl Default for Record {
+    fn default() -> Self {
+        Self { values: vec![] }
+    }
+}
+
+pub struct TableIter<'a> {
+    table_iter: std::collections::btree_map::Iter<'a, Key, Record>,
+}
+
+pub struct ViewIter<'a> {
+    iter: Map<
+        std::collections::btree_map::Iter<'a, Key, Key>,
+        Box<dyn Fn((&'a Key, &'a Key)) -> Option<&'a Record>>,
+    >,
+}
+
+impl<'a> Iterator for TableIter<'a> {
+    type Item = &'a Record;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.table_iter.next().map(|e| e.1)
+    }
+}
+
+pub struct ColIter<'a> {
+    cols: &'a Vec<String>,
+    index: usize,
+}
+
+pub struct OwnedColIter<'a> {
+    cols: &'a Vec<&'a str>,
+    index: usize,
+}
+
+impl<'a> Iterator for ColIter<'a> {
+    type Item = &'a String;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(v) = self.cols.get(self.index) {
+            self.index += 1;
+            Some(v)
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a> Iterator for OwnedColIter<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(v) = self.cols.get(self.index) {
+            self.index += 1;
+            Some(v)
+        } else {
+            None
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Key {
+    values: Vec<Value>,
+}
+
+impl Key {
+    pub fn integer(integer: usize) -> Self {
+        Self {
+            values: vec![integer.into()],
+        }
+    }
+
+    pub fn compound(keys: Vec<Value>) -> Self {
+        Self { values: keys }
+    }
+}
+impl Ord for Key {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.partial_cmp(other).unwrap_or(Ordering::Equal)
+    }
+}
+impl Eq for Key {}
+
+impl PartialEq for Key {
+    fn eq(&self, other: &Self) -> bool {
+        if self.values.len() != other.values.len() {
+            false
+        } else {
+            for (l, r) in self.values.iter().zip(&other.values) {
+                if l != r {
+                    return false;
+                }
+            }
+            true
+        }
+    }
+}
+
+impl PartialOrd for Key {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        let len = self.values.len().min(other.values.len());
+        for i in 0..len {
+            let ord = self
+                .values
+                .get(i)
+                .unwrap()
+                .partial_cmp(other.values.get(i).unwrap());
+
+            match ord {
+                Some(Ordering::Less) => {
+                    return Some(Ordering::Less);
+                }
+                Some(Ordering::Greater) => {
+                    return Some(Ordering::Greater);
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
diff --git a/src/value.rs b/src/value.rs
index d0eda9e..5e550f1 100644
--- a/src/value.rs
+++ b/src/value.rs
@@ -1,44 +1,180 @@
-use std::fmt::Display;
+use std::{cmp::Ordering, fmt::Display};
 
-#[derive(Debug, PartialEq, PartialOrd, Clone)]
-pub enum Value {
-    Text(String),
-    Float(f64),
-    Integer(i64),
-    NULL,
+use anyhow::anyhow;
+use byteorder::{BigEndian, ByteOrder};
+
+pub const NULL: Value = Value::null();
+
+#[derive(Debug, Clone, PartialEq, Eq, Ord)]
+pub struct Value {
+    datatype: u64,
+    data: Vec<u8>,
+}
+
+impl PartialOrd for Value {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        match self.datatype {
+            13.. if self.datatype % 2 == 1 => Some(self.to_string().cmp(&other.to_string())),
+            12.. if self.datatype % 2 == 0 => None, // can't use blob as key
+            8..=9 => integer_cmp(self, other),
+            7 => {
+                let l: anyhow::Result<f64> = self.into();
+                let r: anyhow::Result<f64> = other.into();
+                if let Ok(l) = l {
+                    if let Ok(r) = r {
+                        l.partial_cmp(&r)
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            }
+            1..=6 => integer_cmp(self, other),
+            0 => None,
+            _ => None,
+        }
+    }
+}
+
+fn integer_cmp(l: &Value, r: &Value) -> Option<Ordering> {
+    let l: anyhow::Result<i64> = l.into();
+    let r: anyhow::Result<i64> = r.into();
+    if let Ok(l) = l {
+        if let Ok(r) = r {
+            l.partial_cmp(&r)
+        } else {
+            None
+        }
+    } else {
+        None
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Datatype {
+    Text,
+    Blob,
+    Integer,
+    Float,
+    Null,
 }
 
 impl Value {
-    pub fn len(&self) -> usize {
-        match self {
-            Value::Text(text) => text.len(),
-            Value::Float(float) => format!("{}", float).len(),
-            Value::Integer(integer) => format!("{}", integer).len(),
-            Value::NULL => 0,
+    pub const fn null() -> Self {
+        // NULL
+        Self {
+            data: vec![],
+            datatype: 0,
         }
     }
+
+    pub fn from_f64(value: f64) -> Self {
+        let mut buf = vec![0; 8];
+        BigEndian::write_f64(&mut buf, value);
+        Self {
+            datatype: 7,
+            data: buf,
+        }
+    }
+
+    pub fn from_i64(value: i64) -> Self {
+        let (datatype, data) = match value {
+            0 => (8, vec![]),
+            1 => (9, vec![]),
+            _ => {
+                let data = as_bytes(value);
+                (int_datatype(data.len()), data)
+            }
+        };
+        Self { datatype, data }
+    }
+
+    pub fn from_text(value: impl Into<String>) -> Self {
+        let value: String = value.into();
+        let datatype = (13 + value.len() * 2) as u64;
+        let data = value.as_bytes().to_vec();
+        Self { datatype, data }
+    }
+
+    pub fn datatype(&self) -> anyhow::Result<Datatype> {
+        match self.datatype {
+            13.. if self.datatype % 2 == 1 => Ok(Datatype::Text),
+            12.. if self.datatype % 2 == 0 => Ok(Datatype::Blob),
+            8..=9 => Ok(Datatype::Integer),
+            7 => Ok(Datatype::Float),
+            1..=6 => Ok(Datatype::Integer),
+            0 => Ok(Datatype::Null),
+            _ => Err(anyhow!("Illegal type '{}'", self.datatype)),
+        }
+    }
+
+    pub fn string_len(&self) -> usize {
+        match self.datatype {
+            13.. if self.datatype % 2 == 1 => ((self.datatype - 13) >> 1) as usize,
+            12.. if self.datatype % 2 == 0 => ((self.datatype - 12) >> 1) as usize,
+            8..=9 => 1,
+            7 => {
+                let f = BigEndian::read_f64(&self.data);
+                format!("{}", f).len()
+            }
+            1..=6 => {
+                let f = BigEndian::read_i64(&self.data);
+                format!("{}", f).len()
+            }
+            0 => 4, // NULL
+            _ => 0, // should be Err
+        }
+    }
+}
+
+fn int_datatype(encoded_len: usize) -> u64 {
+    match encoded_len {
+        ..5 => encoded_len as u64,
+        ..7 => 5,
+        _ => 6,
+    }
+}
+
+fn as_bytes(v: i64) -> Vec<u8> {
+    encode(v, encoding_len(v))
+}
+
+fn encode(v: i64, len: usize) -> Vec<u8> {
+    let mut buf = Vec::with_capacity(len);
+    for i in 0..len {
+        buf.push((v >> ((len - i - 1) * 8)) as u8);
+    }
+    buf
+}
+
+fn encoding_len(v: i64) -> usize {
+    let u = if v < 0 { !v } else { v };
+    match u {
+        ..128 => 1,
+        ..32768 => 2,
+        ..8388607 => 3,
+        ..2147483648 => 4,
+        ..140737488355327 => 6,
+        _ => 8,
+    }
 }
 
 impl Display for Value {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let text = match self {
-            Value::Float(float) => format!("{}", float),
-            Value::Integer(integer) => format!("{}", integer),
-            Value::Text(text) => format!("\"{}\"", text),
-            Value::NULL => "NULL".to_string(),
-        };
-        write!(f, "{}", text)
+        let s: String = self.into();
+        write!(f, "{}", s)
     }
 }
 
 impl Into<Value> for &str {
     fn into(self) -> Value {
         if let Ok(f) = self.parse::<f64>() {
-            Value::Float(f)
+            Value::from_f64(f)
         } else if let Ok(i) = self.parse::<i64>() {
-            Value::Integer(i)
+            Value::from_i64(i)
         } else {
-            Value::Text(strip_quotes(self))
+            Value::from_text(strip_quotes(self))
         }
     }
 }
@@ -46,24 +182,102 @@ impl Into<Value> for &str {
 impl Into<Value> for String {
     fn into(self) -> Value {
         if let Ok(f) = self.parse::<f64>() {
-            Value::Float(f)
+            Value::from_f64(f)
         } else if let Ok(i) = self.parse::<i64>() {
-            Value::Integer(i)
+            Value::from_i64(i)
         } else {
-            Value::Text(strip_quotes(self))
+            Value::from_text(strip_quotes(self))
         }
     }
 }
 
 impl Into<Value> for f64 {
     fn into(self) -> Value {
-        Value::Float(self)
+        Value::from_f64(self)
     }
 }
 
 impl Into<Value> for i64 {
     fn into(self) -> Value {
-        Value::Integer(self)
+        Value::from_i64(self)
+    }
+}
+
+impl Into<Value> for usize {
+    fn into(self) -> Value {
+        Value::from_i64(self as i64)
+    }
+}
+
+impl Into<Value> for i32 {
+    fn into(self) -> Value {
+        Value::from_i64(self as i64)
+    }
+}
+
+impl Into<String> for Value {
+    fn into(self) -> String {
+        (&self).into()
+    }
+}
+
+impl Into<String> for &Value {
+    fn into(self) -> String {
+        match self.datatype {
+            13.. if self.datatype % 2 == 1 => String::from_utf8_lossy(&self.data).into_owned(), // valid?
+            12.. if self.datatype % 2 == 0 => String::from_utf8_lossy(&self.data).into_owned(),
+            8 => "0".to_string(),
+            9 => "1".to_string(),
+            7 => {
+                let f: anyhow::Result<f64> = self.into();
+                format!("{}", f.unwrap())
+            }
+            1..=6 => {
+                let i: anyhow::Result<i64> = self.into();
+                format!("{}", i.unwrap())
+            }
+            0 => "NULL".to_string(),                          // NULL
+            _ => format!("Illegal type '{}'", self.datatype), // should be Err
+        }
+    }
+}
+
+impl Into<anyhow::Result<f64>> for Value {
+    fn into(self) -> anyhow::Result<f64> {
+        (&self).into()
+    }
+}
+
+impl Into<anyhow::Result<f64>> for &Value {
+    fn into(self) -> anyhow::Result<f64> {
+        if self.datatype == 7 {
+            Ok(BigEndian::read_f64(&self.data))
+        } else {
+            Err(anyhow!("not a float"))
+        }
+    }
+}
+
+impl Into<anyhow::Result<i64>> for Value {
+    fn into(self) -> anyhow::Result<i64> {
+        (&self).into()
+    }
+}
+
+impl Into<anyhow::Result<i64>> for &Value {
+    fn into(self) -> anyhow::Result<i64> {
+        match self.datatype {
+            0 => Err(anyhow!("value is NULL")),
+            1 => Ok(BigEndian::read_int(&self.data, 1) as i64),
+            2 => Ok(BigEndian::read_int(&self.data, 2) as i64),
+            3 => Ok(BigEndian::read_int(&self.data, 3) as i64),
+            4 => Ok(BigEndian::read_int(&self.data, 4) as i64),
+            5 => Ok(BigEndian::read_int(&self.data, 6) as i64),
+            6 => Ok(BigEndian::read_int(&self.data, 8) as i64),
+            8 => Ok(0),
+            9 => Ok(1),
+            _ => Err(anyhow!("not an integer")),
+        }
     }
 }
 
@@ -74,3 +288,81 @@ fn strip_quotes(text: impl Into<String>) -> String {
     }
     text
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_int0() {
+        let i: Value = 0.into();
+        assert_eq!(i.datatype, 8);
+        assert_eq!(i.data, vec![]);
+        assert_eq!(i.to_string(), "0");
+        assert_eq!(i.string_len(), 1);
+        assert_eq!(i.datatype().unwrap(), Datatype::Integer);
+    }
+
+    #[test]
+    fn test_int1() {
+        let i: Value = 1.into();
+        assert_eq!(i.datatype, 9);
+        assert_eq!(i.data, vec![]);
+        assert_eq!(i.to_string(), "1");
+        assert_eq!(i.string_len(), 1);
+        assert_eq!(i.datatype().unwrap(), Datatype::Integer);
+    }
+
+    #[test]
+    fn test_int50000() {
+        let i: Value = 50000.into();
+        assert_eq!(i.datatype, 3);
+        assert_eq!(i.data, vec![0, 195, 80]);
+        assert_eq!(i.to_string(), "50000");
+        // assert_eq!(i.string_len(), 5);
+        assert_eq!(i.datatype().unwrap(), Datatype::Integer);
+    }
+
+    #[test]
+    fn test_float0() {
+        let i: Value = 0.0.into();
+        assert_eq!(i.datatype, 7);
+        assert_eq!(i.data, vec![0; 8]);
+        assert_eq!(i.to_string(), "0");
+        assert_eq!(i.string_len(), 1);
+        assert_eq!(i.datatype().unwrap(), Datatype::Float);
+    }
+
+    #[test]
+    fn test_float1() {
+        let i: Value = 1.0.into();
+        assert_eq!(i.datatype, 7);
+        assert_eq!(i.data, vec![63, 240, 0, 0, 0, 0, 0, 0]);
+        assert_eq!(i.to_string(), "1");
+        assert_eq!(i.string_len(), 1);
+        assert_eq!(i.datatype().unwrap(), Datatype::Float);
+    }
+
+    #[test]
+    fn test_float50000() {
+        let i: Value = 50000.2.into();
+        assert_eq!(i.datatype, 7);
+        assert_eq!(i.data, vec![64, 232, 106, 6, 102, 102, 102, 102]);
+        assert_eq!(i.to_string(), "50000.2");
+        assert_eq!(i.string_len(), 7);
+        assert_eq!(i.datatype().unwrap(), Datatype::Float);
+    }
+
+    #[test]
+    fn test_string() {
+        let i: Value = "hello world".into();
+        assert_eq!(i.datatype, ("hello world".len() * 2 + 13) as u64);
+        assert_eq!(
+            i.data,
+            vec![104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]
+        );
+        assert_eq!(i.to_string(), "hello world");
+        assert_eq!(i.string_len(), 11);
+        assert_eq!(i.datatype().unwrap(), Datatype::Text);
+    }
+}
diff --git a/src/varint.rs b/src/varint.rs
new file mode 100644
index 0000000..9a36c59
--- /dev/null
+++ b/src/varint.rs
@@ -0,0 +1,132 @@
+const SLOT_2_0: u64 = 0x001fc07f;
+const SLOT_4_2_0: u64 = 0xf01fc07f;
+
+/// varints as implemented in `SQLite`
+
+pub fn write(value: i64) -> Vec<u8> {
+    let mut v = value;
+    if (v & ((0xff00_0000) << 32)) == 0 {
+        if v == 0 {
+            return vec![0];
+        }
+        let mut result = Vec::new();
+        while v != 0 {
+            result.push(((v & 0x7f) | 0x80) as u8);
+            v >>= 7;
+        }
+        result[0] &= 0x7f;
+
+        result.reverse();
+        result
+    } else {
+        let mut result = vec![0_u8; 9];
+        result[8] = v as u8;
+        v >>= 8;
+        for i in (0..=7).rev() {
+            result[i] = ((v & 0x7f) | 0x80) as u8;
+            v >>= 7;
+        }
+        result
+    }
+}
+
+pub fn read(data: Vec<u8>) -> u64 {
+    let mut a = data[0] as u64;
+    if (data[0] as i8) >= 0 {
+        return a;
+    }
+
+    let mut b = data[1] as u64;
+    if (b & 0x80) == 0 {
+        return ((a & 0x7f) << 7) | b;
+    }
+
+    a = (a << 14) | data[2] as u64;
+    if (a & 0x80) == 0 {
+        a &= SLOT_2_0;
+        b = (b & 0x7f) << 7;
+        a |= b;
+        return a;
+    }
+
+    a &= SLOT_2_0;
+    b = b << 14;
+    b |= data[3] as u64;
+    if (b & 0x80) == 0 {
+        b &= SLOT_2_0;
+        a = (a << 7) | b;
+        return a;
+    }
+
+    b &= SLOT_2_0;
+    let mut s = a;
+    a = a << 14;
+    let m = data[4] as u64;
+    a |= m;
+    if (a & 0x80) == 0 {
+        b = b << 7;
+        a |= b;
+        s = s >> 18;
+        return (s << 32) | a;
+    }
+
+    s = (s << 7) | b;
+    b = (b << 14) | data[5] as u64;
+    if (b & 0x80) == 0 {
+        a &= SLOT_2_0;
+        a = (a << 7) | b;
+        s = s >> 18;
+        return (s << 32) | a;
+    }
+
+    a = a << 14;
+    a |= data[6] as u64;
+    if (a & 0x80) == 0 {
+        a &= SLOT_4_2_0;
+        b &= SLOT_2_0;
+        b = b << 7;
+        a |= b;
+        s = s >> 11;
+        return (s << 32) | a;
+    }
+
+    a &= SLOT_2_0;
+    b = (b << 14) | data[7] as u64;
+    if (b & 0x80) == 0 {
+        b &= SLOT_4_2_0;
+        a = (a << 7) | b;
+        s = s >> 14;
+        return (s << 32) | a;
+    }
+
+    a = a << 15;
+    a |= data[8] as u64;
+    b &= SLOT_2_0;
+    b = b << 8;
+    a |= b;
+    s = s << 14;
+    b = m;
+    b &= 0x7f;
+    b = b >> 3;
+    s |= b;
+    (s << 32) | a
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_0() {
+        assert_eq!(0, read(write(0)));
+    }
+
+    #[test]
+    fn test_127() {
+        assert_eq!(127, read(write(127)));
+    }
+    #[test]
+    fn test_m127() {
+        assert_eq!(398639861, read(write(398639861)));
+    }
+}
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
new file mode 100644
index 0000000..e294de2
--- /dev/null
+++ b/src/vm/mod.rs
@@ -0,0 +1,40 @@
+use std::collections::HashMap;
+
+use crate::table::Table;
+use crate::value::Value;
+
+struct Vm {
+    tables: HashMap<String, Table>,
+    stack: Vec<Value>,
+    code: Vec<Opcode>,
+    table_register: String,
+    ip: usize,
+}
+
+enum Opcode {
+    LoadTable(String),
+    ApplyIndex(String),
+    FetchRow,
+    FilterRow,
+    IncRowPointer,
+}
+
+impl Vm {
+    fn run(&mut self) {
+        for op in &self.code {
+            // match op {
+            //     Opcode::LoadTable(name) => {
+            //         if !self.tables.contains_key(name) {
+            //             let table = self.load_table(name).unwrap();
+            //             self.tables.insert(name.clone(), table);
+            //         }
+            //         self.table_register = name.clone();
+            //     }
+            // }
+        }
+    }
+
+    fn load_table(&self, name: &String) -> anyhow::Result<Table> {
+        Ok(Table::from_csv_file(name, None)?)
+    }
+}