binary storage, start of vm

This commit is contained in:
Sander Hautvast 2025-02-18 22:09:50 +01:00
parent eb9b62e47c
commit 3281edc350
16 changed files with 921 additions and 306 deletions

7
Cargo.lock generated
View file

@ -8,9 +8,16 @@ version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "csv" name = "csv"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"byteorder",
] ]

View file

@ -4,4 +4,5 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
byteorder = "1.5"
anyhow = "1.0" anyhow = "1.0"

View file

@ -1,8 +1,8 @@
use csv::Table; use csv::table::Table;
fn main() { fn main() {
let left = Table::from_csv(include_str!("data/left.csv"), "\t"); let left = Table::from_csv(include_str!("data/left.csv"), Some("\t"));
let right = Table::from_csv(include_str!("data/right.csv"), "\t"); let right = Table::from_csv(include_str!("data/right.csv"), Some("\t"));
println!("left:"); println!("left:");
left.select("*"); left.select("*");
println!("\nright:"); println!("\nright:");

View file

@ -1,7 +1,7 @@
use csv::Table; use csv::table::Table;
fn main() { fn main() {
let table = Table::from_csv(include_str!("data/table.csv"), "\t"); let table = Table::from_csv(include_str!("data/table.csv"), Some("\t"));
println!("not ordered:"); println!("not ordered:");
table.select("*"); table.select("*");

View file

@ -1,4 +1,4 @@
use crate::Table; use crate::table::Table;
impl Table { impl Table {
pub fn left_join(&self, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table { pub fn left_join(&self, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table {

View file

@ -1,266 +1,11 @@
pub mod groupby; pub mod groupby;
pub mod join; pub mod join;
pub mod order; pub mod order;
mod page;
pub mod print; pub mod print;
pub mod read; pub mod read;
pub mod sql; pub mod sql;
pub mod table;
pub mod value; pub mod value;
mod varint;
use std::{ pub mod vm;
cmp::Ordering,
collections::{BTreeMap, HashMap},
ops::Add,
};
use value::Value;
pub struct Table {
name: String,
cols_by_name: HashMap<String, usize>,
cols: Vec<String>,
records: BTreeMap<Key, Record>,
}
impl Table {
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
cols_by_name: HashMap::new(),
cols: vec![],
records: BTreeMap::new(),
}
}
/// Creates a new table with the same name and columns as self,
/// but without data
// Note to self: be careful, might be dangerous to use once tables can be altered.
// That is not yet implemented. May need full copies
pub fn empty_copy(&self) -> Self {
let mut result = Table::new(self.name.clone());
result.cols_by_name = self.cols_by_name.clone();
result.cols = self.cols.clone();
result
}
pub fn add_record(&mut self, record: Record) {
let index = self.records.len();
self.records.insert(Key::integer(index), record);
}
pub fn has_column(&self, name: impl Into<String>) -> bool {
self.cols_by_name.contains_key(&name.into())
}
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
let col_index = self.cols.len();
let orig_name: String = name.into();
let name = if allow_duplicates {
// append an index when there are duplicate column names
let mut col_name = orig_name.to_string();
let mut index = 2;
while self.has_column(&col_name) {
col_name = orig_name.to_string();
col_name.push_str(format!("{}", index).as_str());
index += 1;
}
col_name
} else {
orig_name
};
self.cols_by_name.insert(name.clone(), col_index);
self.cols.push(name);
}
fn get_indexes(&self, expression: &str) -> Vec<usize> {
expression
.split(",")
.map(|c| self.get_index(c.trim()))
.collect::<Vec<usize>>()
}
fn get_index(&self, col_name: &str) -> usize {
*self.cols_by_name.get(col_name).unwrap()
}
pub fn iter(&self) -> TableIter {
self.iter_records()
}
pub fn iter_records(&self) -> TableIter {
TableIter {
table_iter: self.records.iter(),
}
}
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
OwnedColIter {
cols: columns,
index: 0,
}
}
pub fn iter_colums(&self) -> ColIter {
ColIter {
cols: &self.cols,
index: 0,
}
}
pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
for record in self.iter_records() {
let r = record.get(colindex);
if r == value {
return Some(record);
}
}
None
}
}
#[derive(Debug, Clone)]
pub struct Record {
values: Vec<Value>,
}
impl Record {
pub fn len(&self) -> usize {
self.values.iter().map(Value::len).sum()
}
pub fn add_value(&mut self, value: impl Into<Value>) {
self.values.push(value.into());
}
pub fn get(&self, index: usize) -> &Value {
self.values.get(index).unwrap_or(&Value::NULL)
}
}
impl Add for &Record {
type Output = Record;
fn add(self, rhs: Self) -> Self::Output {
let mut sum = Record::default();
sum.values.append(&mut self.values.clone());
sum.values.append(&mut rhs.values.clone()); // use refs?
sum
}
}
impl Default for Record {
fn default() -> Self {
Self { values: vec![] }
}
}
pub struct TableIter<'a> {
table_iter: std::collections::btree_map::Iter<'a, Key, Record>,
}
impl<'a> Iterator for TableIter<'a> {
type Item = &'a Record;
fn next(&mut self) -> Option<Self::Item> {
self.table_iter.next().map(|e| e.1)
}
}
pub struct ColIter<'a> {
cols: &'a Vec<String>,
index: usize,
}
pub struct OwnedColIter<'a> {
cols: &'a Vec<&'a str>,
index: usize,
}
impl<'a> Iterator for ColIter<'a> {
type Item = &'a String;
fn next(&mut self) -> Option<Self::Item> {
if let Some(v) = self.cols.get(self.index) {
self.index += 1;
Some(v)
} else {
None
}
}
}
impl<'a> Iterator for OwnedColIter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if let Some(v) = self.cols.get(self.index) {
self.index += 1;
Some(v)
} else {
None
}
}
}
struct Key {
values: Vec<Value>,
}
impl Key {
fn integer(integer: usize) -> Self {
Self {
values: vec![Value::Integer(integer as i64)],
}
}
fn compound(keys: Vec<Value>) -> Self {
Self { values: keys }
}
}
impl Ord for Key {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl Eq for Key {}
impl PartialEq for Key {
fn eq(&self, other: &Self) -> bool {
if self.values.len() != other.values.len() {
false
} else {
for (l, r) in self.values.iter().zip(&other.values) {
if l != r {
return false;
}
}
true
}
}
}
impl PartialOrd for Key {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
let len = self.values.len().min(other.values.len());
for i in 0..len {
let ord = self
.values
.get(i)
.unwrap()
.partial_cmp(other.values.get(i).unwrap())
.unwrap();
match ord {
Ordering::Less => {
return Some(Ordering::Less);
}
Ordering::Greater => {
return Some(Ordering::Greater);
}
_ => {}
}
}
Some(Ordering::Equal)
}
}

View file

@ -1,4 +1,7 @@
use csv::table::Table;
fn main() { fn main() {
let csv = include_str!("data/portfolios.csv");
let table = Table::from_csv(csv, None);
table.order_by("name").select("*");
} }

View file

@ -1,10 +1,12 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use crate::{Key, Table}; use crate::table::{Key, Table};
impl Table { impl Table {
pub fn order_by(&self, expression: &str) -> Self { pub fn order_by(&self, expression: &str) -> Self {
let indexes = self.get_indexes(expression); let indexes = self.get_column_indexes(expression);
if self.views.contains_key(expression) {}
let mut sorted_records = BTreeMap::new(); let mut sorted_records = BTreeMap::new();
for record in self.iter() { for record in self.iter() {
let key = indexes.iter().map(|i| record.get(*i).clone()).collect(); let key = indexes.iter().map(|i| record.get(*i).clone()).collect();

71
src/page.rs Normal file
View file

@ -0,0 +1,71 @@
use std::ops::Add;
use crate::value::{Value, NULL};
const PAGE_SIZE: usize = 4096;
pub enum PageType {
Root,
Interior,
Leaf,
}
pub struct Page {
pagetype: PageType,
data: Vec<u8>,
index_pos: u16,
data_pos: u16,
key: usize,
children: Vec<Page>,
}
impl Page {
pub fn new(pagetype: PageType) -> Self {
Self {
pagetype,
data: vec![0; PAGE_SIZE],
index_pos: 0,
data_pos: (PAGE_SIZE - 1) as u16,
key: 0,
children: vec![],
}
}
pub fn add_record(&mut self, record: Record) {}
}
#[derive(Debug, Clone)]
pub struct Record {
values: Vec<Value>,
}
impl Record {
pub fn string_len(&self) -> usize {
self.values.iter().map(Value::string_len).sum()
}
pub fn add_value(&mut self, value: impl Into<Value>) {
self.values.push(value.into());
}
pub fn get(&self, index: usize) -> Value {
self.values.get(index).map(|v| v.clone()).unwrap_or(NULL)
}
}
impl Add for &Record {
type Output = Record;
fn add(self, rhs: Self) -> Self::Output {
let mut sum = Record::default();
sum.values.append(&mut self.values.clone());
sum.values.append(&mut rhs.values.clone()); // use refs?
sum
}
}
impl Default for Record {
fn default() -> Self {
Self { values: vec![] }
}
}

View file

@ -1,6 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::Table; use crate::table::Table;
impl Table { impl Table {
/// prints the table contents in nice columns on the command line /// prints the table contents in nice columns on the command line
@ -70,7 +70,7 @@ impl Table {
for col in self.iter_colums() { for col in self.iter_colums() {
let e = widths.get_mut(&col).unwrap(); let e = widths.get_mut(&col).unwrap();
let index = self.get_index(col); let index = self.get_index(col);
*e = (*e).max(record.get(index).len()); *e = (*e).max(record.get(index).string_len());
} }
} }
widths widths
@ -96,7 +96,7 @@ impl Table {
for col in self.select_columns(columns) { for col in self.select_columns(columns) {
let e = widths.get_mut(&col).unwrap(); let e = widths.get_mut(&col).unwrap();
let index = self.get_index(&col); let index = self.get_index(&col);
*e = (*e).max(record.get(index).len()); *e = (*e).max(record.get(index).string_len());
} }
} }
widths widths

View file

@ -1,16 +1,32 @@
use crate::{Record, Table}; use crate::table::{Record, Table};
use std::fs;
const EOL: &str = "\n";
impl Table { impl Table {
pub fn from_csv(csv: &str, separator: &str) -> Self { pub fn from_csv_file(name: &str, separator: Option<&str>) -> anyhow::Result<Table> {
let mut table = Table::new("test"); let csv = fs::read_to_string(name)?;
for (index, row) in csv.split("\n").enumerate() {
Ok(Table::from_csv(csv, separator))
}
pub fn from_csv(csv: impl Into<String>, separator: Option<&str>) -> Self {
let csv = csv.into();
let separator = separator.unwrap_or(
guess_separator(&csv)
.expect("You did not give me a separator and I could not guess it from the data"),
);
let mut table = Table::new("");
for (index, row) in csv.split(EOL).enumerate() {
if index == 0 { if index == 0 {
for col in row.split(separator) { for col in row.split(separator) {
table.add_column(col, true); table.add_column(col, true);
} }
} else if row.len() > 0 { } else if row.len() > 0 {
// skip empty lines
let mut record = Record::default(); let mut record = Record::default();
for value in row.split(separator) { for value in row.split(separator) {
//TODO quoted values
record.add_value(value); record.add_value(value);
} }
table.add_record(record); table.add_record(record);
@ -19,3 +35,36 @@ impl Table {
table table
} }
} }
fn guess_separator(csv: &String) -> Option<&'static str> {
let mut tabs = 0;
let mut semis = 0;
let mut commas = 0;
let mut pipes = 0;
for c in csv.chars() {
match c {
'\t' => tabs += 1,
';' => semis += 1,
',' => commas += 1,
'|' => pipes += 1,
_ => {}
}
}
let values = vec![(tabs, 0), (semis, 1), (commas, 2), (pipes, 3)];
values.iter().max().map(|m| match m.1 {
0 => "\t",
1 => ";",
2 => ",",
3 => "|",
_ => "\0", //?
})
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test() {
guess_separator(&"a,b,c|d".to_string());
}
}

View file

@ -2,7 +2,7 @@ use std::collections::HashMap;
use anyhow::anyhow; use anyhow::anyhow;
use crate::value::Value; use crate::value::{Value, NULL};
use super::tokens::{Token, TokenType}; use super::tokens::{Token, TokenType};
@ -154,7 +154,7 @@ impl Scanner {
fn add_token(&mut self, tokentype: TokenType) { fn add_token(&mut self, tokentype: TokenType) {
let text = self.source[self.start..self.current].to_string(); let text = self.source[self.start..self.current].to_string();
self.tokens.push(Token::new(tokentype, text, Value::NULL)); self.tokens.push(Token::new(tokentype, text, NULL));
} }
fn add_literal(&mut self, tokentype: TokenType, literal: Value) { fn add_literal(&mut self, tokentype: TokenType, literal: Value) {

273
src/table.rs Normal file
View file

@ -0,0 +1,273 @@
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap},
iter::Map,
ops::Add,
};
use crate::value::Value;
pub struct View {
records: BTreeMap<Key, Key>,
}
pub struct Table {
name: String,
cols_by_name: HashMap<String, usize>,
pub(crate) cols: Vec<String>,
pub(crate) records: BTreeMap<Key, Record>,
pub views: HashMap<String, View>,
}
impl Table {
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
cols_by_name: HashMap::new(),
cols: vec![],
records: BTreeMap::new(),
views: HashMap::new(),
}
}
/// Creates a new table with the same name and columns as self,
/// but without data
// Note to self: be careful, might be dangerous to use once tables can be altered.
// That is not yet implemented. May need full copies
pub fn empty_copy(&self) -> Self {
let mut result = Table::new(self.name.clone());
result.cols_by_name = self.cols_by_name.clone();
result.cols = self.cols.clone();
result
}
pub fn add_record(&mut self, record: Record) {
let index = self.records.len();
self.records.insert(Key::integer(index), record);
}
pub fn has_column(&self, name: impl Into<String>) -> bool {
self.cols_by_name.contains_key(&name.into())
}
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
let col_index = self.cols.len();
let orig_name: String = name.into();
let name = if allow_duplicates {
// append an index when there are duplicate column names
let mut col_name = orig_name.to_string();
let mut index = 2;
while self.has_column(&col_name) {
col_name = orig_name.to_string();
col_name.push_str(format!("{}", index).as_str());
index += 1;
}
col_name
} else {
orig_name
};
self.cols_by_name.insert(name.clone(), col_index);
self.cols.push(name);
}
pub fn get_column_indexes(&self, expression: &str) -> Vec<usize> {
expression
.split(",")
.map(|c| self.get_index(c.trim()))
.collect::<Vec<usize>>()
}
pub fn get_index(&self, col_name: &str) -> usize {
*self.cols_by_name.get(col_name).unwrap()
}
pub fn iter(&self) -> TableIter {
self.iter_records()
}
pub fn iter_records(&self) -> TableIter {
TableIter {
table_iter: self.records.iter(),
}
}
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
OwnedColIter {
cols: columns,
index: 0,
}
}
pub fn iter_colums(&self) -> ColIter {
ColIter {
cols: &self.cols,
index: 0,
}
}
pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
for record in self.iter_records() {
let r = record.get(colindex);
if r == value {
return Some(record);
}
}
None
}
}
#[derive(Debug, Clone)]
pub struct Record {
values: Vec<Value>,
}
impl Record {
pub fn string_len(&self) -> usize {
self.values.iter().map(Value::string_len).sum()
}
pub fn add_value(&mut self, value: impl Into<Value>) {
self.values.push(value.into());
}
pub fn get(&self, index: usize) -> &Value {
self.values.get(index).unwrap() //TODO
}
}
impl Add for &Record {
type Output = Record;
fn add(self, rhs: Self) -> Self::Output {
let mut sum = Record::default();
sum.values.append(&mut self.values.clone());
sum.values.append(&mut rhs.values.clone()); // use refs?
sum
}
}
impl Default for Record {
fn default() -> Self {
Self { values: vec![] }
}
}
pub struct TableIter<'a> {
table_iter: std::collections::btree_map::Iter<'a, Key, Record>,
}
pub struct ViewIter<'a> {
iter: Map<
std::collections::btree_map::Iter<'a, Key, Key>,
Box<dyn Fn((&'a Key, &'a Key)) -> Option<&'a Record>>,
>,
}
impl<'a> Iterator for TableIter<'a> {
type Item = &'a Record;
fn next(&mut self) -> Option<Self::Item> {
self.table_iter.next().map(|e| e.1)
}
}
pub struct ColIter<'a> {
cols: &'a Vec<String>,
index: usize,
}
pub struct OwnedColIter<'a> {
cols: &'a Vec<&'a str>,
index: usize,
}
impl<'a> Iterator for ColIter<'a> {
type Item = &'a String;
fn next(&mut self) -> Option<Self::Item> {
if let Some(v) = self.cols.get(self.index) {
self.index += 1;
Some(v)
} else {
None
}
}
}
impl<'a> Iterator for OwnedColIter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if let Some(v) = self.cols.get(self.index) {
self.index += 1;
Some(v)
} else {
None
}
}
}
#[derive(Debug)]
pub struct Key {
values: Vec<Value>,
}
impl Key {
pub fn integer(integer: usize) -> Self {
Self {
values: vec![integer.into()],
}
}
pub fn compound(keys: Vec<Value>) -> Self {
Self { values: keys }
}
}
impl Ord for Key {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap_or(Ordering::Equal)
}
}
impl Eq for Key {}
impl PartialEq for Key {
fn eq(&self, other: &Self) -> bool {
if self.values.len() != other.values.len() {
false
} else {
for (l, r) in self.values.iter().zip(&other.values) {
if l != r {
return false;
}
}
true
}
}
}
impl PartialOrd for Key {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
let len = self.values.len().min(other.values.len());
for i in 0..len {
let ord = self
.values
.get(i)
.unwrap()
.partial_cmp(other.values.get(i).unwrap());
match ord {
Some(Ordering::Less) => {
return Some(Ordering::Less);
}
Some(Ordering::Greater) => {
return Some(Ordering::Greater);
}
_ => {}
}
}
None
}
}

View file

@ -1,44 +1,180 @@
use std::fmt::Display; use std::{cmp::Ordering, fmt::Display};
#[derive(Debug, PartialEq, PartialOrd, Clone)] use anyhow::anyhow;
pub enum Value { use byteorder::{BigEndian, ByteOrder};
Text(String),
Float(f64), pub const NULL: Value = Value::null();
Integer(i64),
NULL, #[derive(Debug, Clone, PartialEq, Eq, Ord)]
pub struct Value {
datatype: u64,
data: Vec<u8>,
}
impl PartialOrd for Value {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match self.datatype {
13.. if self.datatype % 2 == 1 => Some(self.to_string().cmp(&other.to_string())),
12.. if self.datatype % 2 == 0 => None, // can't use blob as key
8..=9 => integer_cmp(self, other),
7 => {
let l: anyhow::Result<f64> = self.into();
let r: anyhow::Result<f64> = other.into();
if let Ok(l) = l {
if let Ok(r) = r {
l.partial_cmp(&r)
} else {
None
}
} else {
None
}
}
1..=6 => integer_cmp(self, other),
0 => None,
_ => None,
}
}
}
fn integer_cmp(l: &Value, r: &Value) -> Option<Ordering> {
let l: anyhow::Result<i64> = l.into();
let r: anyhow::Result<i64> = r.into();
if let Ok(l) = l {
if let Ok(r) = r {
l.partial_cmp(&r)
} else {
None
}
} else {
None
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Datatype {
Text,
Blob,
Integer,
Float,
Null,
} }
impl Value { impl Value {
pub fn len(&self) -> usize { pub const fn null() -> Self {
match self { // NULL
Value::Text(text) => text.len(), Self {
Value::Float(float) => format!("{}", float).len(), data: vec![],
Value::Integer(integer) => format!("{}", integer).len(), datatype: 0,
Value::NULL => 0,
} }
} }
pub fn from_f64(value: f64) -> Self {
let mut buf = vec![0; 8];
BigEndian::write_f64(&mut buf, value);
Self {
datatype: 7,
data: buf,
}
}
pub fn from_i64(value: i64) -> Self {
let (datatype, data) = match value {
0 => (8, vec![]),
1 => (9, vec![]),
_ => {
let data = as_bytes(value);
(int_datatype(data.len()), data)
}
};
Self { datatype, data }
}
pub fn from_text(value: impl Into<String>) -> Self {
let value: String = value.into();
let datatype = (13 + value.len() * 2) as u64;
let data = value.as_bytes().to_vec();
Self { datatype, data }
}
pub fn datatype(&self) -> anyhow::Result<Datatype> {
match self.datatype {
13.. if self.datatype % 2 == 1 => Ok(Datatype::Text),
12.. if self.datatype % 2 == 0 => Ok(Datatype::Blob),
8..=9 => Ok(Datatype::Integer),
7 => Ok(Datatype::Float),
1..=6 => Ok(Datatype::Integer),
0 => Ok(Datatype::Null),
_ => Err(anyhow!("Illegal type '{}'", self.datatype)),
}
}
pub fn string_len(&self) -> usize {
match self.datatype {
13.. if self.datatype % 2 == 1 => ((self.datatype - 13) >> 1) as usize,
12.. if self.datatype % 2 == 0 => ((self.datatype - 12) >> 1) as usize,
8..=9 => 1,
7 => {
let f = BigEndian::read_f64(&self.data);
format!("{}", f).len()
}
1..=6 => {
let f = BigEndian::read_i64(&self.data);
format!("{}", f).len()
}
0 => 4, // NULL
_ => 0, // should be Err
}
}
}
fn int_datatype(encoded_len: usize) -> u64 {
match encoded_len {
..5 => encoded_len as u64,
..7 => 5,
_ => 6,
}
}
fn as_bytes(v: i64) -> Vec<u8> {
encode(v, encoding_len(v))
}
fn encode(v: i64, len: usize) -> Vec<u8> {
let mut buf = Vec::with_capacity(len);
for i in 0..len {
buf.push((v >> ((len - i - 1) * 8)) as u8);
}
buf
}
fn encoding_len(v: i64) -> usize {
let u = if v < 0 { !v } else { v };
match u {
..128 => 1,
..32768 => 2,
..8388607 => 3,
..2147483648 => 4,
..140737488355327 => 6,
_ => 8,
}
} }
impl Display for Value { impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let text = match self { let s: String = self.into();
Value::Float(float) => format!("{}", float), write!(f, "{}", s)
Value::Integer(integer) => format!("{}", integer),
Value::Text(text) => format!("\"{}\"", text),
Value::NULL => "NULL".to_string(),
};
write!(f, "{}", text)
} }
} }
impl Into<Value> for &str { impl Into<Value> for &str {
fn into(self) -> Value { fn into(self) -> Value {
if let Ok(f) = self.parse::<f64>() { if let Ok(f) = self.parse::<f64>() {
Value::Float(f) Value::from_f64(f)
} else if let Ok(i) = self.parse::<i64>() { } else if let Ok(i) = self.parse::<i64>() {
Value::Integer(i) Value::from_i64(i)
} else { } else {
Value::Text(strip_quotes(self)) Value::from_text(strip_quotes(self))
} }
} }
} }
@ -46,24 +182,102 @@ impl Into<Value> for &str {
impl Into<Value> for String { impl Into<Value> for String {
fn into(self) -> Value { fn into(self) -> Value {
if let Ok(f) = self.parse::<f64>() { if let Ok(f) = self.parse::<f64>() {
Value::Float(f) Value::from_f64(f)
} else if let Ok(i) = self.parse::<i64>() { } else if let Ok(i) = self.parse::<i64>() {
Value::Integer(i) Value::from_i64(i)
} else { } else {
Value::Text(strip_quotes(self)) Value::from_text(strip_quotes(self))
} }
} }
} }
impl Into<Value> for f64 { impl Into<Value> for f64 {
fn into(self) -> Value { fn into(self) -> Value {
Value::Float(self) Value::from_f64(self)
} }
} }
impl Into<Value> for i64 { impl Into<Value> for i64 {
fn into(self) -> Value { fn into(self) -> Value {
Value::Integer(self) Value::from_i64(self)
}
}
impl Into<Value> for usize {
fn into(self) -> Value {
Value::from_i64(self as i64)
}
}
impl Into<Value> for i32 {
fn into(self) -> Value {
Value::from_i64(self as i64)
}
}
impl Into<String> for Value {
fn into(self) -> String {
(&self).into()
}
}
impl Into<String> for &Value {
fn into(self) -> String {
match self.datatype {
13.. if self.datatype % 2 == 1 => String::from_utf8_lossy(&self.data).into_owned(), // valid?
12.. if self.datatype % 2 == 0 => String::from_utf8_lossy(&self.data).into_owned(),
8 => "0".to_string(),
9 => "1".to_string(),
7 => {
let f: anyhow::Result<f64> = self.into();
format!("{}", f.unwrap())
}
1..=6 => {
let i: anyhow::Result<i64> = self.into();
format!("{}", i.unwrap())
}
0 => "NULL".to_string(), // NULL
_ => format!("Illegal type '{}'", self.datatype), // should be Err
}
}
}
impl Into<anyhow::Result<f64>> for Value {
fn into(self) -> anyhow::Result<f64> {
(&self).into()
}
}
impl Into<anyhow::Result<f64>> for &Value {
fn into(self) -> anyhow::Result<f64> {
if self.datatype == 7 {
Ok(BigEndian::read_f64(&self.data))
} else {
Err(anyhow!("not a float"))
}
}
}
impl Into<anyhow::Result<i64>> for Value {
fn into(self) -> anyhow::Result<i64> {
(&self).into()
}
}
impl Into<anyhow::Result<i64>> for &Value {
fn into(self) -> anyhow::Result<i64> {
match self.datatype {
0 => Err(anyhow!("value is NULL")),
1 => Ok(BigEndian::read_int(&self.data, 1) as i64),
2 => Ok(BigEndian::read_int(&self.data, 2) as i64),
3 => Ok(BigEndian::read_int(&self.data, 3) as i64),
4 => Ok(BigEndian::read_int(&self.data, 4) as i64),
5 => Ok(BigEndian::read_int(&self.data, 6) as i64),
6 => Ok(BigEndian::read_int(&self.data, 8) as i64),
8 => Ok(0),
9 => Ok(1),
_ => Err(anyhow!("not an integer")),
}
} }
} }
@ -74,3 +288,81 @@ fn strip_quotes(text: impl Into<String>) -> String {
} }
text text
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_int0() {
let i: Value = 0.into();
assert_eq!(i.datatype, 8);
assert_eq!(i.data, vec![]);
assert_eq!(i.to_string(), "0");
assert_eq!(i.string_len(), 1);
assert_eq!(i.datatype().unwrap(), Datatype::Integer);
}
#[test]
fn test_int1() {
let i: Value = 1.into();
assert_eq!(i.datatype, 9);
assert_eq!(i.data, vec![]);
assert_eq!(i.to_string(), "1");
assert_eq!(i.string_len(), 1);
assert_eq!(i.datatype().unwrap(), Datatype::Integer);
}
#[test]
fn test_int50000() {
let i: Value = 50000.into();
assert_eq!(i.datatype, 3);
assert_eq!(i.data, vec![0, 195, 80]);
assert_eq!(i.to_string(), "50000");
// assert_eq!(i.string_len(), 5);
assert_eq!(i.datatype().unwrap(), Datatype::Integer);
}
#[test]
fn test_float0() {
let i: Value = 0.0.into();
assert_eq!(i.datatype, 7);
assert_eq!(i.data, vec![0; 8]);
assert_eq!(i.to_string(), "0");
assert_eq!(i.string_len(), 1);
assert_eq!(i.datatype().unwrap(), Datatype::Float);
}
#[test]
fn test_float1() {
let i: Value = 1.0.into();
assert_eq!(i.datatype, 7);
assert_eq!(i.data, vec![63, 240, 0, 0, 0, 0, 0, 0]);
assert_eq!(i.to_string(), "1");
assert_eq!(i.string_len(), 1);
assert_eq!(i.datatype().unwrap(), Datatype::Float);
}
#[test]
fn test_float50000() {
let i: Value = 50000.2.into();
assert_eq!(i.datatype, 7);
assert_eq!(i.data, vec![64, 232, 106, 6, 102, 102, 102, 102]);
assert_eq!(i.to_string(), "50000.2");
assert_eq!(i.string_len(), 7);
assert_eq!(i.datatype().unwrap(), Datatype::Float);
}
#[test]
fn test_string() {
let i: Value = "hello world".into();
assert_eq!(i.datatype, ("hello world".len() * 2 + 13) as u64);
assert_eq!(
i.data,
vec![104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]
);
assert_eq!(i.to_string(), "hello world");
assert_eq!(i.string_len(), 11);
assert_eq!(i.datatype().unwrap(), Datatype::Text);
}
}

132
src/varint.rs Normal file
View file

@ -0,0 +1,132 @@
const SLOT_2_0: u64 = 0x001fc07f;
const SLOT_4_2_0: u64 = 0xf01fc07f;
/// varints as implemented in `SQLite`
pub fn write(value: i64) -> Vec<u8> {
let mut v = value;
if (v & ((0xff00_0000) << 32)) == 0 {
if v == 0 {
return vec![0];
}
let mut result = Vec::new();
while v != 0 {
result.push(((v & 0x7f) | 0x80) as u8);
v >>= 7;
}
result[0] &= 0x7f;
result.reverse();
result
} else {
let mut result = vec![0_u8; 9];
result[8] = v as u8;
v >>= 8;
for i in (0..=7).rev() {
result[i] = ((v & 0x7f) | 0x80) as u8;
v >>= 7;
}
result
}
}
pub fn read(data: Vec<u8>) -> u64 {
let mut a = data[0] as u64;
if (data[0] as i8) >= 0 {
return a;
}
let mut b = data[1] as u64;
if (b & 0x80) == 0 {
return ((a & 0x7f) << 7) | b;
}
a = (a << 14) | data[2] as u64;
if (a & 0x80) == 0 {
a &= SLOT_2_0;
b = (b & 0x7f) << 7;
a |= b;
return a;
}
a &= SLOT_2_0;
b = b << 14;
b |= data[3] as u64;
if (b & 0x80) == 0 {
b &= SLOT_2_0;
a = (a << 7) | b;
return a;
}
b &= SLOT_2_0;
let mut s = a;
a = a << 14;
let m = data[4] as u64;
a |= m;
if (a & 0x80) == 0 {
b = b << 7;
a |= b;
s = s >> 18;
return (s << 32) | a;
}
s = (s << 7) | b;
b = (b << 14) | data[5] as u64;
if (b & 0x80) == 0 {
a &= SLOT_2_0;
a = (a << 7) | b;
s = s >> 18;
return (s << 32) | a;
}
a = a << 14;
a |= data[6] as u64;
if (a & 0x80) == 0 {
a &= SLOT_4_2_0;
b &= SLOT_2_0;
b = b << 7;
a |= b;
s = s >> 11;
return (s << 32) | a;
}
a &= SLOT_2_0;
b = (b << 14) | data[7] as u64;
if (b & 0x80) == 0 {
b &= SLOT_4_2_0;
a = (a << 7) | b;
s = s >> 14;
return (s << 32) | a;
}
a = a << 15;
a |= data[8] as u64;
b &= SLOT_2_0;
b = b << 8;
a |= b;
s = s << 14;
b = m;
b &= 0x7f;
b = b >> 3;
s |= b;
(s << 32) | a
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_0() {
assert_eq!(0, read(write(0)));
}
#[test]
fn test_127() {
assert_eq!(127, read(write(127)));
}
#[test]
fn test_m127() {
assert_eq!(398639861, read(write(398639861)));
}
}

40
src/vm/mod.rs Normal file
View file

@ -0,0 +1,40 @@
use std::collections::HashMap;
use crate::table::Table;
use crate::value::Value;
struct Vm {
tables: HashMap<String, Table>,
stack: Vec<Value>,
code: Vec<Opcode>,
table_register: String,
ip: usize,
}
enum Opcode {
LoadTable(String),
ApplyIndex(String),
FetchRow,
FilterRow,
IncRowPointer,
}
impl Vm {
fn run(&mut self) {
for op in &self.code {
// match op {
// Opcode::LoadTable(name) => {
// if !self.tables.contains_key(name) {
// let table = self.load_table(name).unwrap();
// self.tables.insert(name.clone(), table);
// }
// self.table_register = name.clone();
// }
// }
}
}
fn load_table(&self, name: &String) -> anyhow::Result<Table> {
Ok(Table::from_csv_file(name, None)?)
}
}