initial stage

This commit is contained in:
Sander Hautvast 2022-10-27 17:27:47 +02:00
parent 0f3c4676c6
commit 553dfb4099
9 changed files with 491 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
/target
Cargo.lock
*.iml
/.idea

9
Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "sqlighters"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
byteorder = "1.4.3"

163
src/bytebuffer.rs Normal file
View file

@ -0,0 +1,163 @@
use byteorder::{BigEndian, ByteOrder};
/// bytebuffer that supports forward and backward writing (this is not endianness)
/// Reason: SQLite pages are written in 2 directions: from the front for the cell-pointers and from the back for the cells
/// - fixed size
/// - big endian only
pub struct ByteBuffer {
data: Vec<u8>,
pub fw_position: usize,
pub bw_position: usize,
}
impl ByteBuffer {
pub fn new(size: usize) -> Self {
Self {
data: vec![0; size],
fw_position: 0,
bw_position: size,
}
}
/// forward put unsigned byte array
pub fn put_u8a(&mut self, bytes: &[u8]) {
for v in bytes {
self.data[self.fw_position] = *v;
self.fw_position += 1;
}
}
/// backward put unsigned byte array
pub fn put_u8a_bw(&mut self, bytes: &[u8]) {
self.bw_position -= bytes.len();
for v in bytes {
self.data[self.bw_position] = *v;
self.bw_position += 1;
}
}
/// forward put unsigned byte
pub fn put_u8(&mut self, byte: u8) {
self.put_u8a(&[byte]);
}
/// backward put unsigned byte
pub fn put_u8_bw(&mut self, byte: u8) {
self.put_u8a_bw(&[byte]);
}
/// forward put unsigned 16bit integer
pub fn put_u16(&mut self, val: u16) {
let mut buf = [0; 2];
BigEndian::write_u16(&mut buf, val);
self.put_u8a(&buf);
}
/// backward put unsigned 16bit integer
pub fn put_u16_bw(&mut self, val: u16) {
let mut buf = [0; 2];
BigEndian::write_u16(&mut buf, val);
self.put_u8a_bw(&buf);
}
/// forward put unsigned 16bit integer
pub fn put_u32(&mut self, val: u32) {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, val);
self.put_u8a(&buf);
}
/// backward put unsigned 32bit integer
pub fn put_u32_bw(&mut self, val: u32) {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, val);
self.put_u8a_bw(&buf);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_u8() {
let mut b = ByteBuffer::new(1);
b.put_u8(64_u8);
assert_eq!(b.data[0], 64);
}
#[test]
fn test_u8a() {
let mut b = ByteBuffer::new(2);
b.put_u8a(&[1, 2]);
assert_eq!(b.data[0], 1);
assert_eq!(b.data[1], 2);
}
#[test]
fn test_u16() {
let mut b = ByteBuffer::new(2);
b.put_u16(4096);
assert_eq!(b.data[0], 16);
assert_eq!(b.data[1], 0);
}
#[test]
fn test_u32() {
let mut b = ByteBuffer::new(4);
b.put_u32(0xFFFFFFFF);
assert_eq!(b.data[0], 0xFF);
assert_eq!(b.data[1], 0xFF);
assert_eq!(b.data[2], 0xFF);
assert_eq!(b.data[3], 0xFF);
}
#[test]
fn test_u16_position() {
let mut b = ByteBuffer::new(4);
b.fw_position = 2;
b.put_u16(4096);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 16);
assert_eq!(b.data[3], 0);
}
#[test]
fn test_u16_backwards() {
let mut b = ByteBuffer::new(4);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0);
assert_eq!(b.data[1], 0);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x00);
}
#[test]
fn test_u16_2_directions() {
let mut b = ByteBuffer::new(5);
b.put_u16(0x1001);
b.put_u16_bw(0x1000);
assert_eq!(b.data[0], 0x10);
assert_eq!(b.data[1], 0x01);
assert_eq!(b.data[2], 0); // decimal suggests this value has not been written
assert_eq!(b.data[3], 0x10);
assert_eq!(b.data[4], 0x00);
}
#[test]
fn test_u32_2_directions() {
let mut b = ByteBuffer::new(9);
b.put_u32(0x1001);
b.put_u32_bw(0x1002);
assert_eq!(b.data[0], 0x00);
assert_eq!(b.data[1], 0x00);
assert_eq!(b.data[2], 0x10);
assert_eq!(b.data[3], 0x01);
assert_eq!(b.data[4], 0);
assert_eq!(b.data[5], 0x00);
assert_eq!(b.data[6], 0x00);
assert_eq!(b.data[7], 0x10);
assert_eq!(b.data[8], 0x02);
}
}

0
src/database.rs Normal file
View file

19
src/lib.rs Normal file
View file

@ -0,0 +1,19 @@
mod page;
mod database;
mod bytebuffer;
mod values;
mod varint;
mod record;
const DEFAULT_PAGE_SIZE: usize = 4096;
const TABLE_INTERIOR_PAGE: u8 = 0x05;
const TABLE_LEAF_PAGE: u8 = 0x0D;
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
let result = 2 + 2;
assert_eq!(result, 4);
}
}

76
src/page.rs Normal file
View file

@ -0,0 +1,76 @@
use crate::{DEFAULT_PAGE_SIZE, TABLE_LEAF_PAGE};
use crate::bytebuffer::ByteBuffer;
const POSITION_CELL_COUNT: u32 = 3;
const START_OF_CONTENT_AREA: u32 = 5;
pub enum PageType {
Leaf,
Interior,
}
/// Represents an SQLite page
struct Page {
data: ByteBuffer,
key: i64,
children: Vec<Page>,
number: u32,
page_type: PageType,
}
impl Page {
fn with_capacity(size: usize, page_type: PageType) -> Self {
Self {
data: ByteBuffer::new(size),
key: 0,
children: Vec::new(),
number: 0,
page_type,
}
}
fn new_leaf() -> Self {
let mut page = Page::with_capacity(DEFAULT_PAGE_SIZE, PageType::Leaf);
page.put_u8(TABLE_LEAF_PAGE);
page
}
fn new_interior() -> Self {
let mut page = Page::with_capacity(DEFAULT_PAGE_SIZE, PageType::Interior);
page.put_u8(TABLE_LEAF_PAGE);
page
}
fn add_child(&mut self, child: Self) {
self.children.push(child);
}
fn fw_position(&mut self, new_position: usize) {
self.data.fw_position = new_position;
}
fn bw_position(&mut self, new_position: usize) {
self.data.bw_position = new_position;
}
fn put_u8a(&mut self, value: &[u8]) {
self.data.put_u8a(value);
}
fn put_u8(&mut self, value: u8) {
self.data.put_u8(value);
}
fn put_u16(&mut self, value: u16) {
self.data.put_u16(value);
}
fn put_u32(&mut self, value: u32) {
self.data.put_u32(value);
}
// may panic
fn get_page_nr_last_child(self) -> u32 {
self.children[self.children.len()-1].number
}
}

30
src/record.rs Normal file
View file

@ -0,0 +1,30 @@
use crate::values::*;
struct Record {
rowid: i64,
values: Vec<Value>,
}
impl Record {
fn new(rowid: i64) -> Self {
Self {
rowid,
values: vec![],
}
}
fn add_value(&mut self, value: Value) {
self.values.push(value);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test() {
let mut record = Record::new(1);
record.add_value(Value::String("hello".to_owned()));
}
}

149
src/values.rs Normal file
View file

@ -0,0 +1,149 @@
use byteorder::{BigEndian, ByteOrder};
use crate::varint;
pub enum Value {
String(String),
Blob(Vec<u8>),
Integer(i64),
Float(f64),
}
/// returns (datatype, value)
pub fn get_bytes(value: Value) -> (Vec<u8>, Vec<u8>) {
match value {
Value::String(value) => {
let bytes = value.chars().map(|c| c as u8).collect::<Vec<_>>();
(varint::write((bytes.len() * 2 + 13) as u64), bytes)
}
Value::Blob(value) => {
(varint::write((value.len() * 2 + 12) as u64), value)
}
Value::Integer(value) => {
(get_int_type(value), integer_to_bytes(value))
}
Value::Float(value) => {
let mut buffer = [0 as u8; 8];
BigEndian::write_f64(&mut buffer, value);
(vec![7], buffer.to_vec())
}
}
}
/// returns a variable length Vec of u8
fn integer_to_bytes(value: i64) -> Vec<u8> {
if value == 0 || value == 1 {
vec![]
} else {
return long_to_bytes(value, get_length_of_byte_encoding(value));
}
}
fn long_to_bytes(n: i64, nbytes: u8) -> Vec<u8> {
let mut bytes = vec![];
for i in 0..nbytes {
bytes.push(((n >> (nbytes - i - 1) * 8) & 0xFF) as u8);
}
bytes
}
fn get_int_type(value: i64) -> Vec<u8> {
if value == 0 {
vec![8]
} else if value == 1 {
vec![9]
} else {
let length = get_length_of_byte_encoding(value);
if length < 5 {
varint::write(length as u64)
} else if length < 7 {
varint::write(5)
} else {
varint::write(5)
}
}
}
fn get_length_of_byte_encoding(value: i64) -> u8 {
let u =
if value < 0 {
!value
} else {
value
};
if u <= 127 {
1
} else if u <= 32767 {
2
} else if u <= 8388607 {
3
} else if u <= 2147483647 {
4
} else if u <= 140737488355327 {
6
} else {
8
}
}
#[cfg(test)]
mod tests {
use std::mem;
use crate::values::{get_bytes, Value};
#[test]
fn test_string() {
let v = Value::String("hello".to_owned());
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![23]);
assert_eq!(byte_rep.1, vec![0x68, 0x65, 0x6C, 0x6C, 0x6F]);
}
#[test]
fn test_blob() {
let v = Value::Blob(vec![1, 2, 3, 4, 5]);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![22]);
assert_eq!(byte_rep.1, vec![1, 2, 3, 4, 5]);
}
#[test]
fn test_float() {
let v = Value::Float(1.1);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![7]);
assert_eq!(byte_rep.1, vec![0x3f, 0xf1, 0x99, 0x99, 0x99, 0x99, 0x99, 0x9a]);
}
#[test]
fn test_integer0() {
let v = Value::Integer(0);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![8]);
assert_eq!(byte_rep.1, vec![]);
}
#[test]
fn test_integer1() {
let v = Value::Integer(1);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![9]);
assert_eq!(byte_rep.1, vec![]);
}
#[test]
fn test_integer2() {
let v = Value::Integer(2);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![1]);
assert_eq!(byte_rep.1, vec![2]);
}
#[test]
fn test_integer128() {
let v = Value::Integer(128);
let byte_rep = get_bytes(v);
assert_eq!(byte_rep.0, vec![2]);
assert_eq!(byte_rep.1, vec![0, 128]);
}
}

41
src/varint.rs Normal file
View file

@ -0,0 +1,41 @@
/// varints as implemented in SQLite
pub fn write(value: u64) -> Vec<u8> {
let mut v = value;
if (v & ((0xff000000) << 32)) != 0 {
let mut result = vec![0_u8; 9];
result[8] = v as u8;
v >>= 8;
for i in (0..=7).rev() {
result[i] = ((v & 0x7f) | 0x80) as u8;
v >>= 7;
}
result
} else {
let mut result = Vec::new();
while v != 0 {
result.push(((v & 0x7f) | 0x80) as u8);
v >>= 7;
}
result[0] &= 0x7f;
result.reverse();
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test() {
assert_eq!(vec![0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], write(0xffffffffffffffff));
}
#[test]
fn test_write1() {
let a:i16 = -1;
println!("{}", a as u16);
assert_eq!(vec![1], write(0x01));
}
}