added scanner

This commit is contained in:
Sander Hautvast 2025-02-14 17:46:52 +01:00
parent 386218eecb
commit eb9b62e47c
5 changed files with 303 additions and 0 deletions

View file

@ -3,6 +3,7 @@ pub mod join;
pub mod order; pub mod order;
pub mod print; pub mod print;
pub mod read; pub mod read;
pub mod sql;
pub mod value; pub mod value;
use std::{ use std::{

4
src/main.rs Normal file
View file

@ -0,0 +1,4 @@
fn main (){
}

2
src/sql/mod.rs Normal file
View file

@ -0,0 +1,2 @@
pub mod scanner;
pub mod tokens;

205
src/sql/scanner.rs Normal file
View file

@ -0,0 +1,205 @@
use std::collections::HashMap;
use anyhow::anyhow;
use crate::value::Value;
use super::tokens::{Token, TokenType};
pub fn parse(sql: &str) -> anyhow::Result<Vec<Token>> {
let mut scanner = Scanner::new(sql);
scanner.scan_tokens()?;
Ok(scanner.tokens)
}
struct Scanner {
source: String,
source_chars: Vec<char>,
tokens: Vec<Token>,
start: usize,
current: usize,
keywords: HashMap<String, TokenType>,
}
impl Scanner {
fn new(sql: &str) -> Self {
let mut new = Self {
source: sql.to_string(),
source_chars: sql.to_string().chars().collect(),
tokens: vec![],
start: 0,
current: 0,
keywords: HashMap::new(),
};
crate::sql::tokens::add_keywords(&mut new.keywords);
new
}
fn scan_tokens(&mut self) -> anyhow::Result<()> {
self.start = self.current;
while !self.is_at_end() {
self.start = self.current;
self.scan_token()?;
}
Ok(())
}
fn scan_token(&mut self) -> anyhow::Result<()> {
let c = self.advance();
match c {
'(' => self.add_token(TokenType::LeftParen),
')' => self.add_token(TokenType::RightParen),
',' => self.add_token(TokenType::Comma),
'.' => self.add_token(TokenType::Dot),
'-' => {
if self.match_token('-') {
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
} else {
self.add_token(TokenType::Minus);
}
}
'+' => self.add_token(TokenType::Plus),
';' => self.add_token(TokenType::Semicolon),
'*' => self.add_token(TokenType::Star),
'<' => {
let token = if self.match_token('=') {
TokenType::LessEqual
} else {
TokenType::Less
};
self.add_token(token)
}
'>' => {
let token = if self.match_token('=') {
TokenType::GreaterEqual
} else {
TokenType::Greater
};
self.add_token(token)
}
' ' | '\t' | '\r' | '\n' => {}
'\'' => self.string()?,
_ => {
if is_digit(c) {
self.number();
} else if is_alpha(c) {
self.identifier();
} else {
return Err(anyhow!("Unexpected character '{}'", c));
}
}
}
Ok(())
}
fn identifier(&mut self) {
while is_alphanumeric(self.peek()) {
self.advance();
}
let text = self.source[self.start..self.current].to_string();
let tokentype = self.keywords.get(&text.to_lowercase());
self.add_token(if let Some(tokentype) = tokentype {
*tokentype
} else {
TokenType::Identifier
});
}
fn number(&mut self) {
while is_digit(self.peek()) {
self.advance();
}
if (self.peek() == '.' || self.peek() == ',') && is_digit(self.peek_next()) {
self.advance();
}
self.add_literal(TokenType::Num, self.source[self.start..self.current].into());
}
fn string(&mut self) -> anyhow::Result<()> {
while self.peek() != '\'' && !self.is_at_end() {
self.advance();
}
if self.is_at_end() {
return Err(anyhow!("Unterminated string value"));
}
self.advance();
let string = self.source[self.start + 1..self.current - 1].to_string();
self.add_literal(TokenType::Str, string.into());
Ok(())
}
fn peek(&self) -> char {
if self.is_at_end() {
'\0'
} else {
self.source_chars[self.current]
}
}
fn peek_next(&self) -> char {
if self.current + 1 > self.source_chars.len() {
'\0'
} else {
self.source_chars[self.current + 1]
}
}
fn add_token(&mut self, tokentype: TokenType) {
let text = self.source[self.start..self.current].to_string();
self.tokens.push(Token::new(tokentype, text, Value::NULL));
}
fn add_literal(&mut self, tokentype: TokenType, literal: Value) {
let text = self.source[self.start..self.current].to_string();
self.tokens.push(Token::new(tokentype, text, literal));
}
fn advance(&mut self) -> char {
self.current += 1;
self.source_chars[self.current - 1]
}
fn match_token(&mut self, expected: char) -> bool {
if self.is_at_end() || self.source_chars[self.current] != expected {
false
} else {
self.current += 1;
true
}
}
fn is_at_end(&self) -> bool {
self.current >= self.source_chars.len()
}
}
fn is_digit(c: char) -> bool {
c.is_digit(10)
}
fn is_alpha(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_alphanumeric(c: char) -> bool {
is_alpha(c) || is_digit(c)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_parse() {
let tokens = parse("select name from employee;");
println!("{:?}", tokens);
}
}

91
src/sql/tokens.rs Normal file
View file

@ -0,0 +1,91 @@
use std::collections::HashMap;
use crate::value::Value;
#[derive(Debug)]
pub struct Token {
tokentype: TokenType,
lexeme: String,
literal: Value,
}
impl Token {
pub fn new(tokentype: TokenType, lexeme: impl Into<String>, literal: Value) -> Self {
Self {
tokentype,
lexeme: lexeme.into(),
literal,
}
}
}
pub(crate) fn add_keywords(keywords: &mut HashMap<String, TokenType>) {
keywords.insert("and".to_string(), TokenType::And);
keywords.insert("else".to_string(), TokenType::Else);
keywords.insert("false".to_string(), TokenType::False);
keywords.insert("NIL".to_string(), TokenType::Nil);
keywords.insert("or".to_string(), TokenType::Or);
keywords.insert("true".to_string(), TokenType::True);
keywords.insert("select".to_string(), TokenType::Select);
keywords.insert("from".to_string(), TokenType::From);
keywords.insert("where".to_string(), TokenType::Where);
keywords.insert("union".to_string(), TokenType::Union);
keywords.insert("update".to_string(), TokenType::Update);
keywords.insert("insert".to_string(), TokenType::Insert);
keywords.insert("group".to_string(), TokenType::Group);
keywords.insert("order".to_string(), TokenType::Order);
keywords.insert("by".to_string(), TokenType::By);
keywords.insert("having".to_string(), TokenType::Having);
keywords.insert("sum".to_string(), TokenType::Sum);
keywords.insert("max".to_string(), TokenType::Max);
keywords.insert("min".to_string(), TokenType::Min);
keywords.insert("delete".to_string(), TokenType::Delete);
keywords.insert("commit".to_string(), TokenType::Commit);
keywords.insert("describe".to_string(), TokenType::Describe);
}
#[derive(Debug, Clone, Copy)]
pub enum TokenType {
LeftParen,
RightParen,
Comma,
Dot,
Minus,
Plus,
Star,
Semicolon,
Colon,
Bang, // !
Equals,
Less,
LessEqual,
Greater,
GreaterEqual,
BangEquals, // !=
Unequal, // <>
Str,
Num,
Identifier,
And,
Else,
False,
Nil,
Or,
True,
Select,
From,
Where,
Union,
Update,
Insert,
Group,
Order,
By,
Having,
Sum,
Max,
Min,
Delete,
Commit,
Describe,
}