From 518622947dcb65be8882562ae04da0b4a6379da8 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Thu, 20 Feb 2025 21:43:28 +0100 Subject: [PATCH] can read and write --- src/main.rs | 2 +- src/page.rs | 10 +++++----- src/record.rs | 42 ++++++++++++++++++++---------------------- src/table.rs | 3 ++- src/value.rs | 16 ++++++++++++---- 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/src/main.rs b/src/main.rs index c75f2fe..8a2ad3a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,6 @@ use csv::table::Table; fn main() { let csv = include_str!("data/test.csv"); let table = Table::from_csv(csv, None); - println!("{:?}",table); + // println!("{:?}",table); table.select("*"); } diff --git a/src/page.rs b/src/page.rs index 23e0c08..965d134 100644 --- a/src/page.rs +++ b/src/page.rs @@ -62,14 +62,14 @@ impl Page { self.data.splice(start..self.index_pos as usize, bytes); } - pub fn get(&self, index: usize) -> Option { - if index < self.n_records { - let index = BigEndian::read_u16(&self.data[index * 2..=index * 2 + 1]); - let (nbytes, len) = varint::read(&self.data[index as usize..]); + pub fn get(&self, row_index: usize) -> Option { + if row_index < self.n_records { + let physical_index = BigEndian::read_u16(&self.data[row_index * 2..=row_index * 2 + 1]); + let (bytes_read, len) = varint::read(&self.data[physical_index as usize..]); Some( ( len, - &self.data[nbytes + index as usize..nbytes + index as usize + len as usize], + &self.data[bytes_read + physical_index as usize..=bytes_read + physical_index as usize + len as usize], ) .into(), ) diff --git a/src/record.rs b/src/record.rs index c9a134d..eb3ebe4 100644 --- a/src/record.rs +++ b/src/record.rs @@ -88,15 +88,17 @@ impl From for Vec { // needs improving, for clarity get rid of the tuple impl Into for (u64, &[u8]) { fn into(self) -> Record { - let (len, data) = self; - let len = len as usize; //meh - let (mut offset, rowid) = varint::read(data); - + let (_, data) = self; + let mut offset = 0; + let (inc, rowid) = varint::read(data); + offset += inc; let mut datatypes = vec![]; + let (inc, dt_len) = varint::read(&data[offset..]); + offset += inc; + let end_of_dt = offset + dt_len as usize - 1; // why -1? + //read n of fields - //read n of fields - - while offset < len { + while offset < end_of_dt { //WRONG, read this len first from the buffer let (inc, datatype) = varint::read(&data[offset..]); datatypes.push(datatype); @@ -107,27 +109,24 @@ impl Into for (u64, &[u8]) { let mut values: Vec = vec![]; for dt in datatypes { match dt { - 13.. if dt % 2 == 0 => { - let len = ((dt >> 1) - 13) as usize; - if let Ok(text) = String::from_utf8(data[offset..len].to_vec()) { - values.push(text.into()); - } + 13.. if dt % 2 == 1 => { + let len = ((dt - 13) >> 1) as usize; + values.push(Value::new(dt, data[offset..offset + len].to_vec())); offset += len; } 12.. if dt % 2 == 0 => { let len = ((dt >> 1) - 12) as usize; - // no blobs yet + values.push(Value::new(dt, data[offset..offset + len].to_vec())); offset += len; } - 9 => values.push(1.into()), - 8 => values.push(0.into()), + 8 | 9 => values.push(Value::new(dt, vec![])), 7 => { - values.push(BigEndian::read_f64(&data[offset..offset + 8]).into()); + values.push(Value::new(dt, data[offset..offset + 8].to_vec())); offset += 8; } 1..=6 => { - let (inc, v) = read_int(&data[offset..], dt); - values.push(v.into()); + let inc = read_int_len(dt); + values.push(Value::new(dt, data[offset..offset + inc].to_vec())); offset += inc; } 0 => { @@ -141,13 +140,12 @@ impl Into for (u64, &[u8]) { } } -fn read_int(buf: &[u8], datatype: u64) -> (usize, i64) { - let nb = match datatype { +fn read_int_len(datatype: u64) -> usize { + match datatype { 6 => 8, 5 => 6, _ => datatype as usize, - }; - (nb, BigEndian::read_i64(&buf[..nb])) + } } impl Default for Record { diff --git a/src/table.rs b/src/table.rs index 5bec221..d90219a 100644 --- a/src/table.rs +++ b/src/table.rs @@ -153,7 +153,8 @@ impl Iterator for TableIter { type Item = Record; fn next(&mut self) -> Option { - self.root_page.borrow().get(self.index) + self.index += 1; + self.root_page.borrow().get(self.index - 1) } } diff --git a/src/value.rs b/src/value.rs index 24efb14..e305104 100644 --- a/src/value.rs +++ b/src/value.rs @@ -61,7 +61,7 @@ pub enum Datatype { } impl Value { - fn new(datatype: u64, data: Vec) -> Self { + pub(crate) fn new(datatype: u64, data: Vec) -> Self { Self { datatype, data, @@ -69,6 +69,14 @@ impl Value { } } + pub(crate) fn from_raw(datatype_bytes: Vec, data: Vec) -> Self { + Self{ + datatype: varint::read(&datatype_bytes).1, + datatype_bytes, + data, + } + } + /// get the length of the encoding of the value pub fn bytes_len(&self) -> u16 { (self.datatype_bytes.len() + self.data.len()) as u16 @@ -188,10 +196,10 @@ impl Into for &str { impl Into for String { fn into(self) -> Value { - if let Ok(f) = self.parse::() { - Value::from_f64(f) - } else if let Ok(i) = self.parse::() { + if let Ok(i) = self.parse::() { Value::from_i64(i) + } else if let Ok(f) = self.parse::() { + Value::from_f64(f) } else { Value::from_text(strip_quotes(self)) }