first commit
This commit is contained in:
commit
9e808b2b47
12 changed files with 586 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
.DS_Store
|
||||
23
Cargo.lock
generated
Normal file
23
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
]
|
||||
8
Cargo.toml
Normal file
8
Cargo.toml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "csv"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.5"
|
||||
anyhow = "1.0"
|
||||
1
README.md
Normal file
1
README.md
Normal file
|
|
@ -0,0 +1 @@
|
|||
An attempt to query csv files, (like in SQLite after csv import)
|
||||
19
src/groupby.rs
Normal file
19
src/groupby.rs
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
use crate::Table;
|
||||
|
||||
impl Table {
|
||||
pub fn group_by(&self, select_expression: &str, group_by_expression: &str) -> Table {
|
||||
let table = Table::new("");
|
||||
|
||||
table
|
||||
}
|
||||
}
|
||||
|
||||
// fn parse_select(select: &str){
|
||||
|
||||
// }
|
||||
|
||||
// enum Aggregation {
|
||||
// Sum(String),
|
||||
// Max(String),
|
||||
// Min(String),
|
||||
// }
|
||||
29
src/join.rs
Normal file
29
src/join.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
use crate::Table;
|
||||
|
||||
impl Table {
|
||||
pub fn left_join(&self, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table {
|
||||
join(self, right, left_col, right_col, outer)
|
||||
}
|
||||
|
||||
pub fn right_join(&self, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table {
|
||||
join(right, self, right_col, left_col, outer)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn join(left: &Table, right: &Table, left_col: &str, right_col: &str, outer: bool) -> Table {
|
||||
let mut joined = Table::new("join");
|
||||
left.cols.iter().for_each(|c| joined.add_column(c, true));
|
||||
right.cols.iter().for_each(|c| joined.add_column(c, true));
|
||||
let left_col_index = left.get_index(left_col);
|
||||
let right_col_index = right.get_index(right_col);
|
||||
|
||||
for record in left.iter_records() {
|
||||
let lv = record.get(left_col_index);
|
||||
if let Some(right_record) = right.where_clause(right_col_index, lv) {
|
||||
joined.add_record(record + right_record);
|
||||
} else if outer {
|
||||
joined.add_record(record.clone());
|
||||
}
|
||||
}
|
||||
joined
|
||||
}
|
||||
265
src/lib.rs
Normal file
265
src/lib.rs
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
pub mod groupby;
|
||||
pub mod join;
|
||||
pub mod order;
|
||||
pub mod print;
|
||||
pub mod read;
|
||||
pub mod value;
|
||||
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{BTreeMap, HashMap},
|
||||
ops::Add,
|
||||
};
|
||||
|
||||
use value::Value;
|
||||
|
||||
pub struct Table {
|
||||
name: String,
|
||||
cols_by_name: HashMap<String, usize>,
|
||||
cols: Vec<String>,
|
||||
records: BTreeMap<Key, Record>,
|
||||
}
|
||||
|
||||
impl Table {
|
||||
pub fn new(name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
cols_by_name: HashMap::new(),
|
||||
cols: vec![],
|
||||
records: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new table with the same name and columns as self,
|
||||
/// but without data
|
||||
// Note to self: be careful, might be dangerous to use once tables can be altered.
|
||||
// That is not yet implemented. May need full copies
|
||||
pub fn empty_copy(&self) -> Self {
|
||||
let mut result = Table::new(self.name.clone());
|
||||
result.cols_by_name = self.cols_by_name.clone();
|
||||
result.cols = self.cols.clone();
|
||||
result
|
||||
}
|
||||
|
||||
pub fn add_record(&mut self, record: Record) {
|
||||
let index = self.records.len();
|
||||
self.records.insert(Key::integer(index), record);
|
||||
}
|
||||
|
||||
pub fn has_column(&self, name: impl Into<String>) -> bool {
|
||||
self.cols_by_name.contains_key(&name.into())
|
||||
}
|
||||
|
||||
pub fn add_column(&mut self, name: impl Into<String>, allow_duplicates: bool) {
|
||||
let col_index = self.cols.len();
|
||||
let orig_name: String = name.into();
|
||||
|
||||
let name = if allow_duplicates {
|
||||
// append an index when there are duplicate column names
|
||||
let mut col_name = orig_name.to_string();
|
||||
let mut index = 2;
|
||||
|
||||
while self.has_column(&col_name) {
|
||||
col_name = orig_name.to_string();
|
||||
col_name.push_str(format!("{}", index).as_str());
|
||||
index += 1;
|
||||
}
|
||||
col_name
|
||||
} else {
|
||||
orig_name
|
||||
};
|
||||
|
||||
self.cols_by_name.insert(name.clone(), col_index);
|
||||
self.cols.push(name);
|
||||
}
|
||||
|
||||
fn get_indexes(&self, expression: &str) -> Vec<usize> {
|
||||
expression
|
||||
.split(",")
|
||||
.map(|c| self.get_index(c.trim()))
|
||||
.collect::<Vec<usize>>()
|
||||
}
|
||||
|
||||
fn get_index(&self, col_name: &str) -> usize {
|
||||
*self.cols_by_name.get(col_name).unwrap()
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> TableIter {
|
||||
self.iter_records()
|
||||
}
|
||||
|
||||
pub fn iter_records(&self) -> TableIter {
|
||||
TableIter {
|
||||
table_iter: self.records.iter(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn select_columns<'a>(&'a self, columns: &'a Vec<&'a str>) -> OwnedColIter<'a> {
|
||||
OwnedColIter {
|
||||
cols: columns,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter_colums(&self) -> ColIter {
|
||||
ColIter {
|
||||
cols: &self.cols,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn where_clause(&self, colindex: usize, value: &Value) -> Option<&Record> {
|
||||
for record in self.iter_records() {
|
||||
let r = record.get(colindex);
|
||||
if r == value {
|
||||
return Some(record);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Record {
|
||||
values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Record {
|
||||
pub fn len(&self) -> usize {
|
||||
self.values.iter().map(Value::len).sum()
|
||||
}
|
||||
|
||||
pub fn add_value(&mut self, value: impl Into<Value>) {
|
||||
self.values.push(value.into());
|
||||
}
|
||||
|
||||
pub fn get(&self, index: usize) -> &Value {
|
||||
self.values.get(index).unwrap_or(&Value::NULL)
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for &Record {
|
||||
type Output = Record;
|
||||
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
let mut sum = Record::default();
|
||||
sum.values.append(&mut self.values.clone());
|
||||
sum.values.append(&mut rhs.values.clone()); // use refs?
|
||||
sum
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Record {
|
||||
fn default() -> Self {
|
||||
Self { values: vec![] }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TableIter<'a> {
|
||||
table_iter: std::collections::btree_map::Iter<'a, Key, Record>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TableIter<'a> {
|
||||
type Item = &'a Record;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.table_iter.next().map(|e| e.1)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ColIter<'a> {
|
||||
cols: &'a Vec<String>,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
pub struct OwnedColIter<'a> {
|
||||
cols: &'a Vec<&'a str>,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ColIter<'a> {
|
||||
type Item = &'a String;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(v) = self.cols.get(self.index) {
|
||||
self.index += 1;
|
||||
Some(v)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for OwnedColIter<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(v) = self.cols.get(self.index) {
|
||||
self.index += 1;
|
||||
Some(v)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Key {
|
||||
values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Key {
|
||||
fn integer(integer: usize) -> Self {
|
||||
Self {
|
||||
values: vec![Value::Integer(integer as i64)],
|
||||
}
|
||||
}
|
||||
|
||||
fn compound(keys: Vec<Value>) -> Self {
|
||||
Self { values: keys }
|
||||
}
|
||||
}
|
||||
impl Ord for Key {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.partial_cmp(other).unwrap()
|
||||
}
|
||||
}
|
||||
impl Eq for Key {}
|
||||
|
||||
impl PartialEq for Key {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if self.values.len() != other.values.len() {
|
||||
false
|
||||
} else {
|
||||
for (l, r) in self.values.iter().zip(&other.values) {
|
||||
if l != r {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Key {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
let len = self.values.len().min(other.values.len());
|
||||
for i in 0..len {
|
||||
let ord = self
|
||||
.values
|
||||
.get(i)
|
||||
.unwrap()
|
||||
.partial_cmp(other.values.get(i).unwrap())
|
||||
.unwrap();
|
||||
match ord {
|
||||
Ordering::Less => {
|
||||
return Some(Ordering::Less);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
return Some(Ordering::Greater);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
||||
21
src/main.rs
Normal file
21
src/main.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
use csv::Table;
|
||||
|
||||
fn main() {
|
||||
let companies = Table::from_csv(include_str!("data/companies.csv"), "\t");
|
||||
let remove = Table::from_csv(include_str!("data/remove.csv"), "\t");
|
||||
|
||||
// companies.pretty_print("*");
|
||||
// remove.pretty_print("*");
|
||||
let left = Table::from_csv(include_str!("data/left.csv"), "\t");
|
||||
let right = Table::from_csv(include_str!("data/right.csv"), "\t");
|
||||
// left.pretty_print("*");
|
||||
// right.pretty_print("*");
|
||||
let join1 = left.left_join(&right, "name", "name", true);
|
||||
let join2 = left.right_join(&right, "name", "name", true);
|
||||
//
|
||||
companies
|
||||
.left_join(&remove, "aisAccountID", "aisaccountid", false)
|
||||
.order_by("aisAccountID")
|
||||
.select("aisAccountID");
|
||||
// join2.pretty_print("*");
|
||||
}
|
||||
17
src/order.rs
Normal file
17
src/order.rs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::{Key, Table};
|
||||
|
||||
impl Table {
|
||||
pub fn order_by(&self, expression: &str) -> Self {
|
||||
let indexes = self.get_indexes(expression);
|
||||
let mut sorted_records = BTreeMap::new();
|
||||
for record in self.iter() {
|
||||
let key = indexes.iter().map(|i| record.get(*i).clone()).collect();
|
||||
sorted_records.insert(Key::compound(key), record.clone());
|
||||
}
|
||||
let mut ordered = Table::empty_copy(self);
|
||||
ordered.records = sorted_records;
|
||||
ordered
|
||||
}
|
||||
}
|
||||
104
src/print.rs
Normal file
104
src/print.rs
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::Table;
|
||||
|
||||
impl Table {
|
||||
/// prints the table contents in nice columns on the command line
|
||||
pub fn select(&self, expression: &str) {
|
||||
if expression == "*" {
|
||||
self.pretty_print_all();
|
||||
} else {
|
||||
let cols = expression
|
||||
.split(",")
|
||||
.map(|c| c.trim())
|
||||
.collect::<Vec<&str>>();
|
||||
cols.iter()
|
||||
.filter(|c| !self.has_column(**c))
|
||||
.any(|invalid| panic!("{} is not a column in this table", invalid));
|
||||
self.pretty_print_select(cols);
|
||||
}
|
||||
}
|
||||
|
||||
fn pretty_print_all(&self) {
|
||||
let column_widths = self.get_column_widths(0, usize::MAX);
|
||||
// let total = column_widths.values().iter();
|
||||
for col in self.iter_colums() {
|
||||
let w = column_widths.get(col).unwrap_or(&0);
|
||||
print!("| {:<w$} ", col);
|
||||
}
|
||||
println!("|");
|
||||
for record in self.iter_records() {
|
||||
for col in self.iter_colums() {
|
||||
let w = column_widths.get(col).unwrap_or(&0);
|
||||
// eprintln!("{}", w);
|
||||
print!("| {:<w$} ", record.get(self.get_index(col)).to_string());
|
||||
}
|
||||
println!("|");
|
||||
}
|
||||
}
|
||||
|
||||
fn pretty_print_select(&self, columns: Vec<&str>) {
|
||||
let column_widths = self.select_column_widths(0, usize::MAX, &columns);
|
||||
// let total = column_widths.values().iter();
|
||||
for col in self.select_columns(&columns) {
|
||||
let w = column_widths.get(col).unwrap_or(&0);
|
||||
print!("| {:<w$} ", col);
|
||||
}
|
||||
println!("|");
|
||||
for record in self.iter_records() {
|
||||
for col in self.select_columns(&columns) {
|
||||
let w = column_widths.get(col).unwrap_or(&0);
|
||||
// eprintln!("{}", w);
|
||||
print!("| {:<w$} ", record.get(self.get_index(col)).to_string());
|
||||
}
|
||||
println!("|");
|
||||
}
|
||||
}
|
||||
|
||||
/// returns a map of column index -> max length of column name/value in any of the rows
|
||||
/// needed for printing nice columns
|
||||
/// the following parameters allow for paging views
|
||||
/// offset: start at rowindex
|
||||
/// nrecords: take n records after offset
|
||||
fn get_column_widths(&self, offset: usize, nrecords: usize) -> HashMap<&String, usize> {
|
||||
let mut widths = HashMap::new();
|
||||
// initialize count with the length of the column name
|
||||
for col in self.iter_colums() {
|
||||
widths.insert(col, col.len());
|
||||
}
|
||||
for record in self.iter_records().skip(offset).take(nrecords) {
|
||||
for col in self.iter_colums() {
|
||||
let e = widths.get_mut(&col).unwrap();
|
||||
let index = self.get_index(col);
|
||||
*e = (*e).max(record.get(index).len());
|
||||
}
|
||||
}
|
||||
widths
|
||||
}
|
||||
|
||||
// returns a map of column index -> max length of column name/value in any of the rows
|
||||
/// needed for printing nice columns
|
||||
/// the following parameters allow for paging views
|
||||
/// offset: start at rowindex
|
||||
/// nrecords: take n records after offset
|
||||
fn select_column_widths<'a>(
|
||||
&'a self,
|
||||
offset: usize,
|
||||
nrecords: usize,
|
||||
columns: &'a Vec<&'a str>,
|
||||
) -> HashMap<&'a str, usize> {
|
||||
let mut widths = HashMap::new();
|
||||
// initialize count with the length of the column name
|
||||
for col in self.select_columns(columns) {
|
||||
widths.insert(col, col.len());
|
||||
}
|
||||
for record in self.iter_records().skip(offset).take(nrecords) {
|
||||
for col in self.select_columns(columns) {
|
||||
let e = widths.get_mut(&col).unwrap();
|
||||
let index = self.get_index(&col);
|
||||
*e = (*e).max(record.get(index).len());
|
||||
}
|
||||
}
|
||||
widths
|
||||
}
|
||||
}
|
||||
21
src/read.rs
Normal file
21
src/read.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
use crate::{Record, Table};
|
||||
|
||||
impl Table {
|
||||
pub fn from_csv(csv: &str, separator: &str) -> Self {
|
||||
let mut table = Table::new("test");
|
||||
for (index, row) in csv.split("\n").enumerate() {
|
||||
if index == 0 {
|
||||
for col in row.split(separator) {
|
||||
table.add_column(col, true);
|
||||
}
|
||||
} else if row.len() > 0 {
|
||||
let mut record = Record::default();
|
||||
for value in row.split(separator) {
|
||||
record.add_value(value);
|
||||
}
|
||||
table.add_record(record);
|
||||
}
|
||||
}
|
||||
table
|
||||
}
|
||||
}
|
||||
76
src/value.rs
Normal file
76
src/value.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, PartialEq, PartialOrd, Clone)]
|
||||
pub enum Value {
|
||||
Text(String),
|
||||
Float(f64),
|
||||
Integer(i64),
|
||||
NULL,
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Value::Text(text) => text.len(),
|
||||
Value::Float(float) => format!("{}", float).len(),
|
||||
Value::Integer(integer) => format!("{}", integer).len(),
|
||||
Value::NULL => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Value {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let text = match self {
|
||||
Value::Float(float) => format!("{}", float),
|
||||
Value::Integer(integer) => format!("{}", integer),
|
||||
Value::Text(text) => format!("\"{}\"", text),
|
||||
Value::NULL => "NULL".to_string(),
|
||||
};
|
||||
write!(f, "{}", text)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Value> for &str {
|
||||
fn into(self) -> Value {
|
||||
if let Ok(f) = self.parse::<f64>() {
|
||||
Value::Float(f)
|
||||
} else if let Ok(i) = self.parse::<i64>() {
|
||||
Value::Integer(i)
|
||||
} else {
|
||||
Value::Text(strip_quotes(self))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Value> for String {
|
||||
fn into(self) -> Value {
|
||||
if let Ok(f) = self.parse::<f64>() {
|
||||
Value::Float(f)
|
||||
} else if let Ok(i) = self.parse::<i64>() {
|
||||
Value::Integer(i)
|
||||
} else {
|
||||
Value::Text(strip_quotes(self))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Value> for f64 {
|
||||
fn into(self) -> Value {
|
||||
Value::Float(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Value> for i64 {
|
||||
fn into(self) -> Value {
|
||||
Value::Integer(self)
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_quotes(text: impl Into<String>) -> String {
|
||||
let mut text = text.into();
|
||||
if text.starts_with("\"") && text.ends_with("\"") {
|
||||
text = text[1..text.len() - 1].to_string();
|
||||
}
|
||||
text
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue