From 553dfb409987849e1d059744f46af4708488a352 Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Thu, 27 Oct 2022 17:27:47 +0200 Subject: [PATCH] initial stage --- .gitignore | 4 ++ Cargo.toml | 9 +++ src/bytebuffer.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++ src/database.rs | 0 src/lib.rs | 19 ++++++ src/page.rs | 76 +++++++++++++++++++++ src/record.rs | 30 +++++++++ src/values.rs | 149 ++++++++++++++++++++++++++++++++++++++++++ src/varint.rs | 41 ++++++++++++ 9 files changed, 491 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/bytebuffer.rs create mode 100644 src/database.rs create mode 100644 src/lib.rs create mode 100644 src/page.rs create mode 100644 src/record.rs create mode 100644 src/values.rs create mode 100644 src/varint.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b423c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +Cargo.lock +*.iml +/.idea \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..287f2ee --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "sqlighters" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +byteorder = "1.4.3" \ No newline at end of file diff --git a/src/bytebuffer.rs b/src/bytebuffer.rs new file mode 100644 index 0000000..0a23d3c --- /dev/null +++ b/src/bytebuffer.rs @@ -0,0 +1,163 @@ +use byteorder::{BigEndian, ByteOrder}; + +/// bytebuffer that supports forward and backward writing (this is not endianness) +/// Reason: SQLite pages are written in 2 directions: from the front for the cell-pointers and from the back for the cells +/// - fixed size +/// - big endian only +pub struct ByteBuffer { + data: Vec, + pub fw_position: usize, + pub bw_position: usize, +} + +impl ByteBuffer { + pub fn new(size: usize) -> Self { + Self { + data: vec![0; size], + fw_position: 0, + bw_position: size, + } + } + + /// forward put unsigned byte array + pub fn put_u8a(&mut self, bytes: &[u8]) { + for v in bytes { + self.data[self.fw_position] = *v; + self.fw_position += 1; + } + } + + /// backward put unsigned byte array + pub fn put_u8a_bw(&mut self, bytes: &[u8]) { + self.bw_position -= bytes.len(); + for v in bytes { + self.data[self.bw_position] = *v; + self.bw_position += 1; + } + } + + /// forward put unsigned byte + pub fn put_u8(&mut self, byte: u8) { + self.put_u8a(&[byte]); + } + + /// backward put unsigned byte + pub fn put_u8_bw(&mut self, byte: u8) { + self.put_u8a_bw(&[byte]); + } + + /// forward put unsigned 16bit integer + pub fn put_u16(&mut self, val: u16) { + let mut buf = [0; 2]; + BigEndian::write_u16(&mut buf, val); + self.put_u8a(&buf); + } + + /// backward put unsigned 16bit integer + pub fn put_u16_bw(&mut self, val: u16) { + let mut buf = [0; 2]; + BigEndian::write_u16(&mut buf, val); + self.put_u8a_bw(&buf); + } + + /// forward put unsigned 16bit integer + pub fn put_u32(&mut self, val: u32) { + let mut buf = [0; 4]; + BigEndian::write_u32(&mut buf, val); + self.put_u8a(&buf); + } + + /// backward put unsigned 32bit integer + pub fn put_u32_bw(&mut self, val: u32) { + let mut buf = [0; 4]; + BigEndian::write_u32(&mut buf, val); + self.put_u8a_bw(&buf); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_u8() { + let mut b = ByteBuffer::new(1); + b.put_u8(64_u8); + assert_eq!(b.data[0], 64); + } + + #[test] + fn test_u8a() { + let mut b = ByteBuffer::new(2); + b.put_u8a(&[1, 2]); + assert_eq!(b.data[0], 1); + assert_eq!(b.data[1], 2); + } + + #[test] + fn test_u16() { + let mut b = ByteBuffer::new(2); + b.put_u16(4096); + assert_eq!(b.data[0], 16); + assert_eq!(b.data[1], 0); + } + + #[test] + fn test_u32() { + let mut b = ByteBuffer::new(4); + b.put_u32(0xFFFFFFFF); + assert_eq!(b.data[0], 0xFF); + assert_eq!(b.data[1], 0xFF); + assert_eq!(b.data[2], 0xFF); + assert_eq!(b.data[3], 0xFF); + } + + #[test] + fn test_u16_position() { + let mut b = ByteBuffer::new(4); + b.fw_position = 2; + b.put_u16(4096); + assert_eq!(b.data[0], 0); + assert_eq!(b.data[1], 0); + assert_eq!(b.data[2], 16); + assert_eq!(b.data[3], 0); + } + + #[test] + fn test_u16_backwards() { + let mut b = ByteBuffer::new(4); + b.put_u16_bw(0x1000); + assert_eq!(b.data[0], 0); + assert_eq!(b.data[1], 0); + assert_eq!(b.data[2], 0x10); + assert_eq!(b.data[3], 0x00); + } + + #[test] + fn test_u16_2_directions() { + let mut b = ByteBuffer::new(5); + b.put_u16(0x1001); + b.put_u16_bw(0x1000); + assert_eq!(b.data[0], 0x10); + assert_eq!(b.data[1], 0x01); + assert_eq!(b.data[2], 0); // decimal suggests this value has not been written + assert_eq!(b.data[3], 0x10); + assert_eq!(b.data[4], 0x00); + } + + #[test] + fn test_u32_2_directions() { + let mut b = ByteBuffer::new(9); + b.put_u32(0x1001); + b.put_u32_bw(0x1002); + assert_eq!(b.data[0], 0x00); + assert_eq!(b.data[1], 0x00); + assert_eq!(b.data[2], 0x10); + assert_eq!(b.data[3], 0x01); + assert_eq!(b.data[4], 0); + assert_eq!(b.data[5], 0x00); + assert_eq!(b.data[6], 0x00); + assert_eq!(b.data[7], 0x10); + assert_eq!(b.data[8], 0x02); + } +} \ No newline at end of file diff --git a/src/database.rs b/src/database.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..a5e1935 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,19 @@ +mod page; +mod database; +mod bytebuffer; +mod values; +mod varint; +mod record; + +const DEFAULT_PAGE_SIZE: usize = 4096; +const TABLE_INTERIOR_PAGE: u8 = 0x05; +const TABLE_LEAF_PAGE: u8 = 0x0D; + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/src/page.rs b/src/page.rs new file mode 100644 index 0000000..4d76b14 --- /dev/null +++ b/src/page.rs @@ -0,0 +1,76 @@ +use crate::{DEFAULT_PAGE_SIZE, TABLE_LEAF_PAGE}; +use crate::bytebuffer::ByteBuffer; + +const POSITION_CELL_COUNT: u32 = 3; +const START_OF_CONTENT_AREA: u32 = 5; + +pub enum PageType { + Leaf, + Interior, +} + +/// Represents an SQLite page +struct Page { + data: ByteBuffer, + key: i64, + children: Vec, + number: u32, + page_type: PageType, +} + +impl Page { + fn with_capacity(size: usize, page_type: PageType) -> Self { + Self { + data: ByteBuffer::new(size), + key: 0, + children: Vec::new(), + number: 0, + page_type, + } + } + + fn new_leaf() -> Self { + let mut page = Page::with_capacity(DEFAULT_PAGE_SIZE, PageType::Leaf); + page.put_u8(TABLE_LEAF_PAGE); + page + } + + fn new_interior() -> Self { + let mut page = Page::with_capacity(DEFAULT_PAGE_SIZE, PageType::Interior); + page.put_u8(TABLE_LEAF_PAGE); + page + } + + fn add_child(&mut self, child: Self) { + self.children.push(child); + } + + fn fw_position(&mut self, new_position: usize) { + self.data.fw_position = new_position; + } + + fn bw_position(&mut self, new_position: usize) { + self.data.bw_position = new_position; + } + + fn put_u8a(&mut self, value: &[u8]) { + self.data.put_u8a(value); + } + + fn put_u8(&mut self, value: u8) { + self.data.put_u8(value); + } + + fn put_u16(&mut self, value: u16) { + self.data.put_u16(value); + } + + fn put_u32(&mut self, value: u32) { + self.data.put_u32(value); + } + + // may panic + fn get_page_nr_last_child(self) -> u32 { + self.children[self.children.len()-1].number + } +} \ No newline at end of file diff --git a/src/record.rs b/src/record.rs new file mode 100644 index 0000000..989029a --- /dev/null +++ b/src/record.rs @@ -0,0 +1,30 @@ +use crate::values::*; + +struct Record { + rowid: i64, + values: Vec, +} + +impl Record { + fn new(rowid: i64) -> Self { + Self { + rowid, + values: vec![], + } + } + + fn add_value(&mut self, value: Value) { + self.values.push(value); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test() { + let mut record = Record::new(1); + record.add_value(Value::String("hello".to_owned())); + } +} \ No newline at end of file diff --git a/src/values.rs b/src/values.rs new file mode 100644 index 0000000..d5fbd2e --- /dev/null +++ b/src/values.rs @@ -0,0 +1,149 @@ +use byteorder::{BigEndian, ByteOrder}; +use crate::varint; + +pub enum Value { + String(String), + Blob(Vec), + Integer(i64), + Float(f64), +} + +/// returns (datatype, value) +pub fn get_bytes(value: Value) -> (Vec, Vec) { + match value { + Value::String(value) => { + let bytes = value.chars().map(|c| c as u8).collect::>(); + (varint::write((bytes.len() * 2 + 13) as u64), bytes) + } + Value::Blob(value) => { + (varint::write((value.len() * 2 + 12) as u64), value) + } + Value::Integer(value) => { + (get_int_type(value), integer_to_bytes(value)) + } + Value::Float(value) => { + let mut buffer = [0 as u8; 8]; + BigEndian::write_f64(&mut buffer, value); + (vec![7], buffer.to_vec()) + } + } +} + +/// returns a variable length Vec of u8 +fn integer_to_bytes(value: i64) -> Vec { + if value == 0 || value == 1 { + vec![] + } else { + return long_to_bytes(value, get_length_of_byte_encoding(value)); + } +} + +fn long_to_bytes(n: i64, nbytes: u8) -> Vec { + let mut bytes = vec![]; + for i in 0..nbytes { + bytes.push(((n >> (nbytes - i - 1) * 8) & 0xFF) as u8); + } + + bytes +} + +fn get_int_type(value: i64) -> Vec { + if value == 0 { + vec![8] + } else if value == 1 { + vec![9] + } else { + let length = get_length_of_byte_encoding(value); + if length < 5 { + varint::write(length as u64) + } else if length < 7 { + varint::write(5) + } else { + varint::write(5) + } + } +} + +fn get_length_of_byte_encoding(value: i64) -> u8 { + let u = + if value < 0 { + !value + } else { + value + }; + if u <= 127 { + 1 + } else if u <= 32767 { + 2 + } else if u <= 8388607 { + 3 + } else if u <= 2147483647 { + 4 + } else if u <= 140737488355327 { + 6 + } else { + 8 + } +} + +#[cfg(test)] +mod tests { + use std::mem; + use crate::values::{get_bytes, Value}; + + #[test] + fn test_string() { + let v = Value::String("hello".to_owned()); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![23]); + assert_eq!(byte_rep.1, vec![0x68, 0x65, 0x6C, 0x6C, 0x6F]); + } + + #[test] + fn test_blob() { + let v = Value::Blob(vec![1, 2, 3, 4, 5]); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![22]); + assert_eq!(byte_rep.1, vec![1, 2, 3, 4, 5]); + } + + #[test] + fn test_float() { + let v = Value::Float(1.1); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![7]); + assert_eq!(byte_rep.1, vec![0x3f, 0xf1, 0x99, 0x99, 0x99, 0x99, 0x99, 0x9a]); + } + + #[test] + fn test_integer0() { + let v = Value::Integer(0); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![8]); + assert_eq!(byte_rep.1, vec![]); + } + + #[test] + fn test_integer1() { + let v = Value::Integer(1); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![9]); + assert_eq!(byte_rep.1, vec![]); + } + + #[test] + fn test_integer2() { + let v = Value::Integer(2); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![1]); + assert_eq!(byte_rep.1, vec![2]); + } + + #[test] + fn test_integer128() { + let v = Value::Integer(128); + let byte_rep = get_bytes(v); + assert_eq!(byte_rep.0, vec![2]); + assert_eq!(byte_rep.1, vec![0, 128]); + } +} \ No newline at end of file diff --git a/src/varint.rs b/src/varint.rs new file mode 100644 index 0000000..cc33068 --- /dev/null +++ b/src/varint.rs @@ -0,0 +1,41 @@ +/// varints as implemented in SQLite +pub fn write(value: u64) -> Vec { + let mut v = value; + if (v & ((0xff000000) << 32)) != 0 { + let mut result = vec![0_u8; 9]; + result[8] = v as u8; + v >>= 8; + for i in (0..=7).rev() { + result[i] = ((v & 0x7f) | 0x80) as u8; + v >>= 7; + } + result + } else { + let mut result = Vec::new(); + while v != 0 { + result.push(((v & 0x7f) | 0x80) as u8); + v >>= 7; + } + result[0] &= 0x7f; + + result.reverse(); + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test() { + assert_eq!(vec![0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], write(0xffffffffffffffff)); + } + + #[test] + fn test_write1() { + let a:i16 = -1; + println!("{}", a as u16); + assert_eq!(vec![1], write(0x01)); + } +} \ No newline at end of file