use crate::ast_compiler::Expression::{ FieldGet, FunctionCall, ListGet, MapGet, NamedParameter, Stop, Variable, }; use crate::errors::CompilerError::{ self, Expected, ParseError, TooManyParameters, UndeclaredVariable, UnexpectedIndent, UninitializedVariable, }; use crate::errors::CompilerErrorAtLine; use crate::symbol_builder::{Symbol, calculate_type, infer_type}; use crate::tokens::TokenType::{ Bang, Bool, Char, Colon, DateTime, Dot, Eof, Eol, Equal, F32, F64, False, FloatingPoint, Fn, Greater, GreaterEqual, GreaterGreater, I32, I64, Identifier, Indent, Integer, LeftBrace, LeftBracket, LeftParen, Less, LessEqual, LessLess, Let, ListType, MapType, Minus, Object, Plus, Print, RightBrace, RightBracket, RightParen, SignedInteger, SingleRightArrow, Slash, Star, StringType, True, U32, U64, Unknown, UnsignedInteger, }; use crate::tokens::{Token, TokenType}; use crate::value::Value; use log::debug; use std::collections::HashMap; pub fn compile( path: Option<&str>, tokens: Vec, symbol_table: &mut HashMap, ) -> Result, CompilerErrorAtLine> { let mut compiler = AstCompiler::new(path.unwrap_or(""), tokens); compiler.compile_tokens(symbol_table) } #[derive(Debug, Clone)] pub struct Function { pub(crate) name: Token, pub(crate) parameters: Vec, pub(crate) return_type: TokenType, pub(crate) body: Vec, } struct AstCompiler { tokens: Vec, current: usize, had_error: bool, indent: Vec, } impl AstCompiler { fn new(_name: &str, tokens: Vec) -> Self { Self { tokens, current: 0, had_error: false, indent: vec![0], } } fn reset(&mut self) { self.current = 0; } fn compile_tokens( &mut self, symbol_table: &mut HashMap, ) -> Result, CompilerErrorAtLine> { self.reset(); self.compile(symbol_table) } fn compile( &mut self, symbol_table: &mut HashMap, ) -> Result, CompilerErrorAtLine> { self.current_line(); if !self.had_error { let mut statements = vec![]; while !self.is_at_end() { let statement = self.indent(symbol_table)?; if let Some(statement) = statement { statements.push(statement); } else { break; } } debug!("AST {:?}", statements); Ok(statements) } else { Err(self.raise(CompilerError::Failure)) } } fn raise(&self, error: CompilerError) -> CompilerErrorAtLine { CompilerErrorAtLine::raise(error, self.current_line()) } fn indent( &mut self, symbol_table: &mut HashMap, ) -> Result, CompilerErrorAtLine> { let expected_indent = *self.indent.last().unwrap(); // skip empty lines while self.check(Eol) { self.advance(); } let mut indent_on_line = 0; // keep track of indent level while self.match_token(vec![Indent]) { indent_on_line += 1; } if indent_on_line > expected_indent { Err(self.raise(UnexpectedIndent(indent_on_line, expected_indent))) } else if indent_on_line < expected_indent { self.indent.pop(); return Ok(None); } else { Ok(Some(self.declaration(symbol_table)?)) } } fn declaration( &mut self, symbol_table: &mut HashMap, ) -> Result { if self.match_token(vec![Fn]) { self.function_declaration(symbol_table) } else if self.match_token(vec![Let]) { self.let_declaration(symbol_table) } else if self.match_token(vec![Object]) { self.object_declaration() } else if self.match_token(vec![TokenType::Pipe]) { self.guard_declaration(symbol_table) } else { self.statement(symbol_table) } } // | /. -> service.get_all() // | /{uuid} -> service.get(uuid)? // | ?{query.firstname} -> service.get_by_firstname(fname)? fn guard_declaration( &mut self, symbol_table: &mut HashMap, ) -> Result { let if_expr = self.guard_if_expr(symbol_table)?; let then_expr = self.expression(symbol_table)?; Ok(Statement::GuardStatement { if_expr, then_expr }) } fn guard_if_expr( &mut self, symbol_table: &mut HashMap, ) -> Result { while !self.check(SingleRightArrow) { if self.match_token(vec![Slash]) { return self.path_guard_expr(); } else if self.match_token(vec![TokenType::Question]) { return self.query_guard_expr(symbol_table); } else { return Err(self.raise(Expected("-> or ?"))); } } Ok(Stop { line: self.peek().line, }) } fn query_guard_expr( &mut self, symbol_table: &mut HashMap, ) -> Result { if self.match_token(vec![LeftBrace]) { let query_params = self.expression(symbol_table)?; self.consume(RightBrace, Expected("'}' after guard expression."))?; Ok(query_params) } else { Ok(Stop { line: self.peek().line, }) } } fn path_guard_expr(&mut self) -> Result { if self.match_token(vec![LeftBrace]) { let path_params = self.match_expression()?; self.consume(RightBrace, Expected("'}' after guard expression."))?; Ok(path_params) } else { Ok(Stop { line: self.peek().line, }) } } fn match_expression(&mut self) -> Result { Err(self.raise(Expected("unimplemented"))) } fn object_declaration(&mut self) -> Result { let type_name = self.consume(Identifier, Expected("object name."))?; self.consume(Colon, Expected("':' after object name."))?; self.consume(Eol, Expected("end of line."))?; let mut fields = vec![]; let expected_indent = self.indent.last().unwrap() + 1; // self.indent.push(expected_indent); let mut done = false; while !done && !self.match_token(vec![Eof]) { for _ in 0..expected_indent { if self.peek().token_type == Indent { self.advance(); } else { done = true; } } if !done { let field_name = self.consume(Identifier, Expected("an object field name."))?; self.consume(Colon, Expected("':' after field name."))?; let field_type = self.peek().token_type.clone(); if field_type.is_type() { self.advance(); } else { Err(self.raise(Expected("a type")))? } fields.push(Parameter { name: field_name, var_type: field_type, }); } } self.consume(Eol, Expected("end of line."))?; Ok(Statement::ObjectStmt { name: type_name, fields, }) } fn function_declaration( &mut self, symbol_table: &mut HashMap, ) -> Result { let name_token = self.consume(Identifier, Expected("function name."))?; self.consume(LeftParen, Expected("'(' after function name."))?; let mut parameters = vec![]; while !self.check(RightParen) { if parameters.len() >= 25 { return Err(self.raise(TooManyParameters)); } let parm_name = self.consume(Identifier, Expected("a parameter name."))?; self.consume(Colon, Expected(": after parameter name"))?; let var_type = self.peek().token_type.clone(); self.advance(); parameters.push(Parameter { name: parm_name, var_type, }); if self.peek().token_type == TokenType::Comma { self.advance(); } } self.consume(RightParen, Expected(" ')' after parameters."))?; let return_type = if self.check(SingleRightArrow) { self.consume(SingleRightArrow, Expected("->"))?; self.advance().token_type.clone() } else { TokenType::Void }; self.consume(Colon, Expected("colon (:) after function declaration."))?; self.consume(Eol, Expected("end of line."))?; let current_indent = self.indent.last().unwrap(); self.indent.push(current_indent + 1); let body = self.compile(symbol_table)?; let function = Function { name: name_token.clone(), parameters, return_type, body, }; Ok(Statement::FunctionStmt { function }) } fn let_declaration( &mut self, symbol_table: &mut HashMap, ) -> Result { if self.peek().token_type.is_type() { return Err(self.raise(CompilerError::KeywordNotAllowedAsIdentifier( self.peek().token_type.clone(), ))); } let name_token = self.consume(Identifier, Expected("variable name."))?; let declared_type = if self.check(Colon) { self.advance(); Some(self.advance().token_type.clone()) } else { None }; if self.match_token(vec![Equal]) { let initializer = self.expression(symbol_table)?; let declared_type = declared_type.unwrap_or(Unknown); let inferred_type = initializer.infer_type(); let var_type = calculate_type(&declared_type, &inferred_type).map_err(|e| self.raise(e))?; symbol_table.insert( name_token.lexeme.clone(), Symbol::Variable { name: name_token.lexeme.clone(), var_type: var_type.clone(), }, ); self.consume(Eol, Expected("end of line after initializer."))?; Ok(Statement::VarStmt { name: name_token, var_type, initializer, }) } else { Err(self.raise(UninitializedVariable))? } } fn statement( &mut self, symbol_table: &mut HashMap, ) -> Result { if self.match_token(vec![Print]) { self.print_statement(symbol_table) } else { self.expr_statement(symbol_table) } } fn print_statement( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.expression(symbol_table)?; self.consume(Eol, Expected("end of line after print statement."))?; Ok(Statement::PrintStmt { value: expr }) } fn expr_statement( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.expression(symbol_table)?; if !self.is_at_end() { self.consume(Eol, Expected("end of line after expression."))?; } Ok(Statement::ExpressionStmt { expression: expr }) } fn expression( &mut self, symbol_table: &mut HashMap, ) -> Result { self.or(symbol_table) } fn or( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.and(symbol_table)?; self.binary(vec![TokenType::LogicalOr], expr, symbol_table) } fn and( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.bit_and(symbol_table)?; self.binary(vec![TokenType::LogicalAnd], expr, symbol_table) } fn bit_and( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.bit_or(symbol_table)?; self.binary(vec![TokenType::BitAnd], expr, symbol_table) } fn bit_or( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.bit_xor(symbol_table)?; self.binary(vec![TokenType::Pipe], expr, symbol_table) } fn bit_xor( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.equality(symbol_table)?; self.binary(vec![TokenType::BitXor], expr, symbol_table) } fn equality( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.comparison(symbol_table)?; self.binary( vec![TokenType::EqualEqual, TokenType::BangEqual], expr, symbol_table, ) } fn comparison( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.bitshift(symbol_table)?; self.binary( vec![Greater, GreaterEqual, Less, LessEqual], expr, symbol_table, ) } fn bitshift( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.term(symbol_table)?; self.binary(vec![GreaterGreater, LessLess], expr, symbol_table) } fn term( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.factor(symbol_table)?; self.binary(vec![Minus, Plus], expr, symbol_table) } fn factor( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.unary(symbol_table)?; self.binary(vec![Slash, Star], expr, symbol_table) } fn binary( &mut self, types: Vec, mut expr: Expression, symbol_table: &mut HashMap, ) -> Result { while self.match_token(types.clone()) { let operator = self.previous().clone(); let right = self.comparison(symbol_table)?; expr = Expression::Binary { line: operator.line, left: Box::new(expr), operator, right: Box::new(right), }; } Ok(expr) } fn unary( &mut self, symbol_table: &mut HashMap, ) -> Result { if self.match_token(vec![Bang, Minus]) { let operator = self.previous().clone(); let right = self.unary(symbol_table)?; Ok(Expression::Unary { line: self.peek().line, operator, right: Box::new(right), }) } else { let expr = self.get(symbol_table); expr } } fn get( &mut self, symbol_table: &mut HashMap, ) -> Result { let expr = self.primary(symbol_table)?; if self.match_token(vec![LeftParen]) { let name = self.peek().clone(); self.advance(); self.function_call(name, symbol_table) } else if self.match_token(vec![LeftBracket]) { let index = self.expression(symbol_table)?; self.index(expr, index) } else if self.match_token(vec![Dot]) { let name = self.peek().clone(); self.advance(); self.field(expr, name) } else { Ok(expr) } } fn index( &mut self, operand: Expression, index: Expression, ) -> Result { let get = (match &operand { Expression::Map { .. } => MapGet { key: Box::new(index), }, Expression::List { .. } => ListGet { list: Box::new(operand), index: Box::new(index), }, Variable { var_type, .. } => { if var_type == &ListType { ListGet { list: Box::new(operand), index: Box::new(index), } } else { return Err(self.raise(CompilerError::IllegalTypeToIndex(var_type.to_string()))); } } _ => return Err(self.raise(CompilerError::IllegalTypeToIndex("Unknown".to_string()))), }); self.consume(RightBracket, Expected("']' after index."))?; Ok(get) } fn field( &mut self, operand: Expression, index: Token, ) -> Result { //TODO? Ok(Expression::FieldGet { field: index.lexeme.clone(), }) } fn primary( &mut self, symbol_table: &mut HashMap, ) -> Result { debug!("primary {:?}", self.peek()); Ok(if self.match_token(vec![LeftBracket]) { self.list(symbol_table)? } else if self.match_token(vec![LeftBrace]) { self.map(symbol_table)? } else if self.match_token(vec![False]) { Expression::Literal { line: self.peek().line, literaltype: Bool, value: Value::Bool(false), } } else if self.match_token(vec![True]) { Expression::Literal { line: self.peek().line, literaltype: Bool, value: Value::Bool(true), } //, FloatingPoint, Text } else if self.match_token(vec![Integer]) { Expression::Literal { line: self.peek().line, literaltype: Integer, value: Value::I64( self.previous() .lexeme .parse() .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, ), } } else if self.match_token(vec![U32]) { Expression::Literal { line: self.peek().line, literaltype: Integer, value: Value::U32( u32::from_str_radix(&self.previous().lexeme.trim_start_matches("0x"), 16) .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, ), } } else if self.match_token(vec![U64]) { Expression::Literal { line: self.peek().line, literaltype: Integer, value: Value::U64( u64::from_str_radix(&self.previous().lexeme.trim_start_matches("0x"), 16) .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, ), } } else if self.match_token(vec![FloatingPoint]) { Expression::Literal { line: self.peek().line, literaltype: FloatingPoint, value: Value::F64( self.previous() .lexeme .parse() .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, ), } } else if self.match_token(vec![StringType]) { Expression::Literal { line: self.peek().line, literaltype: StringType, value: Value::String(self.previous().lexeme.clone()), } } else if self.match_token(vec![Char]) { Expression::Literal { line: self.peek().line, literaltype: Char, value: Value::Char(self.previous().lexeme.chars().next().unwrap()), } } else if self.match_token(vec![DateTime]) { Expression::Literal { line: self.peek().line, literaltype: DateTime, value: Value::DateTime( chrono::DateTime::parse_from_str( &self.previous().lexeme, "%Y-%m-%d %H:%M:%S%.3f %z", ) .map_err(|e| self.raise(ParseError(self.previous().lexeme.clone())))? .into(), ), } } else if self.match_token(vec![LeftParen]) { let expr = self.expression(symbol_table)?; self.consume(RightParen, Expected("')' after expression."))?; Expression::Grouping { line: self.peek().line, expression: Box::new(expr), } } else { let token = self.advance().clone(); debug!("{:?}", token); // function call? if self.match_token(vec![LeftParen]) { self.function_call(token.clone(), symbol_table)? } else if self.match_token(vec![Colon]) { self.named_parameter(&token, symbol_table)? } else { // } else if self.check(Dot) { // chain of variable or function lookups? // let mut name = "/".to_string(); // name.push_str(&self.previous().lexeme); // while self.match_token(vec![Dot]) { // name.push_str("/"); // name.push_str(&self.peek().lexeme); // self.advance(); // } // chained function call? // if self.match_token(vec![LeftParen]) { // self.function_call(name())? // } else { // empty line // return if self.match_token(vec![Eol, Eof]) { // Ok(Expression::Literal { // value: Value::Void, // literaltype: Object, // line: token.line, // }) // } else { // Err(self.raise(UndeclaredVariable(token.lexeme.clone()))) // }; // } // } else { // none of the above, must be a variable lookup self.variable_lookup(&token, symbol_table)? } }) } fn named_parameter( &mut self, name: &Token, symbol_table: &mut HashMap, ) -> Result { let value = self.expression(symbol_table)?; let line = name.line; Ok(NamedParameter { name: name.clone(), value: Box::new(value), line, }) } fn list( &mut self, symbol_table: &mut HashMap, ) -> Result { let mut list = vec![]; while !self.match_token(vec![RightBracket]) { list.push(self.expression(symbol_table)?); if self.peek().token_type == TokenType::Comma { self.advance(); } else { self.consume(RightBracket, Expected("']' at the end of the list."))?; break; } } Ok(Expression::List { values: list, literaltype: ListType, line: self.peek().line, }) } fn map( &mut self, symbol_table: &mut HashMap, ) -> Result { let mut entries = vec![]; while !self.match_token(vec![RightBrace]) { let key = self.expression(symbol_table)?; self.consume(Colon, Expected("':' after map key."))?; let value = self.expression(symbol_table)?; entries.push((key, value)); if self.peek().token_type == TokenType::Comma { self.advance(); } else { self.consume(RightBrace, Expected("'}' after map."))?; break; } } Ok(Expression::Map { entries, literaltype: MapType, line: self.peek().line, }) } fn variable_lookup( &mut self, name: &Token, symbol_table: &mut HashMap, ) -> Result { let var = symbol_table.get(&name.lexeme); let var_type = if let Some(Symbol::Variable { var_type, .. }) = var { var_type } else { &Unknown }; Ok(Variable { name: name.lexeme.to_string(), var_type: var_type.clone(), line: name.line, }) } fn function_call( &mut self, name: Token, symbol_table: &mut HashMap, ) -> Result { let mut arguments = vec![]; while !self.match_token(vec![RightParen]) { if arguments.len() >= 25 { return Err(self.raise(TooManyParameters)); } let arg = self.expression(symbol_table)?; arguments.push(arg); if self.peek().token_type == TokenType::Comma { self.advance(); } else { self.consume(RightParen, Expected("')' after arguments."))?; break; } } Ok(FunctionCall { line: self.peek().line, name: name.lexeme.to_string(), arguments, }) } fn consume( &mut self, token_type: TokenType, message: CompilerError, ) -> Result { if self.check(token_type) { self.advance(); } else { self.had_error = true; return Err(self.raise(message)); } Ok(self.previous().clone()) } fn match_token(&mut self, tokens: Vec) -> bool { for tt in tokens { if self.check(tt) { self.advance(); return true; } } false } fn check(&self, token_type: TokenType) -> bool { if self.is_at_end() { false } else { self.peek().token_type == token_type } } fn peek(&self) -> &Token { &self.tokens[self.current] } fn previous(&self) -> &Token { &self.tokens[self.current - 1] } fn advance(&mut self) -> &Token { if !self.is_at_end() { self.current += 1; } &self.previous() } fn is_at_end(&self) -> bool { self.peek().token_type == Eof } fn current_line(&self) -> usize { self.peek().line } } #[derive(Debug, Clone)] pub enum Statement { ExpressionStmt { expression: Expression, }, VarStmt { name: Token, var_type: TokenType, initializer: Expression, }, PrintStmt { value: Expression, }, FunctionStmt { function: Function, }, ObjectStmt { name: Token, fields: Vec, }, GuardStatement { if_expr: Expression, then_expr: Expression, }, } impl Statement { pub fn line(&self) -> usize { match self { Statement::ExpressionStmt { expression } => expression.line(), Statement::VarStmt { name, .. } => name.line, Statement::PrintStmt { value } => value.line(), Statement::FunctionStmt { function, .. } => function.name.line, Statement::ObjectStmt { name, .. } => name.line, Statement::GuardStatement { if_expr, .. } => if_expr.line(), } } } #[derive(Debug, Clone)] pub struct Parameter { pub(crate) name: Token, pub(crate) var_type: TokenType, } #[derive(Debug, Clone)] pub enum Expression { Binary { line: usize, left: Box, operator: Token, right: Box, }, Unary { line: usize, operator: Token, right: Box, }, Grouping { line: usize, expression: Box, }, Literal { line: usize, literaltype: TokenType, value: Value, }, List { line: usize, literaltype: TokenType, values: Vec, }, Map { line: usize, literaltype: TokenType, entries: Vec<(Expression, Expression)>, }, Variable { line: usize, name: String, var_type: TokenType, }, FunctionCall { line: usize, name: String, arguments: Vec, }, Stop { line: usize, }, // PathMatch { // line: usize, // condition: Box, // }, NamedParameter { line: usize, name: Token, value: Box, }, MapGet { key: Box, }, ListGet { list: Box, index: Box, }, FieldGet { field: String, }, } impl Expression { pub fn line(&self) -> usize { match self { Self::Binary { line, .. } => *line, Self::Unary { line, .. } => *line, Self::Grouping { line, .. } => *line, Self::Literal { line, .. } => *line, Self::List { line, .. } => *line, Self::Map { line, .. } => *line, Variable { line, .. } => *line, FunctionCall { line, .. } => *line, Stop { line } => *line, // Expression::PathMatch { line, .. } => *line, NamedParameter { line, .. } => *line, MapGet { .. } => 0, ListGet { .. } => 0, FieldGet { .. } => 0, } } pub fn infer_type(&self) -> TokenType { match self { Expression::Binary { left, operator, right, .. } => { let left_type = left.infer_type(); let right_type = right.infer_type(); if vec![Greater, Less, GreaterEqual, LessEqual].contains(&operator.token_type) { Bool } else if left_type == right_type { // map to determined numeric type if yet undetermined (32 or 64 bits) match left_type { FloatingPoint => F64, Integer => I64, _ => left_type, } } else { if let Plus = operator.token_type { // includes string concatenation with numbers // followed by type coercion to 64 bits for numeric types debug!("coerce {} : {}", left_type, right_type); match (left_type, right_type) { (_, StringType) => StringType, (StringType, _) => StringType, (FloatingPoint, _) => F64, (Integer, FloatingPoint) => F64, (Integer, _) => I64, (I64, Integer) => I64, (F64, _) => F64, (U64, U32) => U64, (I64, I32) => I64, // could add a date and a duration. future work // could add a List and a value. also future work // could add a Map and a tuple. Will I add tuple types? Future work! _ => panic!("Unexpected coercion"), } // could have done some fall through here, but this will fail less gracefully, // so if my thinking is wrong or incomplete it will panic } else { // type coercion to 64 bits for numeric types debug!("coerce {} : {}", left_type, right_type); match (left_type, right_type) { (FloatingPoint, _) => F64, (Integer, FloatingPoint) => F64, (Integer, I64) => I64, (I64, FloatingPoint) => F64, (F64, _) => F64, (U64, U32) => U64, (I64, I32) => I64, (I64, Integer) => I64, _ => panic!("Unexpected coercion"), } } } } Expression::Grouping { expression, .. } => expression.infer_type(), Expression::Literal { literaltype, .. } => literaltype.clone(), Expression::List { literaltype, .. } => literaltype.clone(), Expression::Map { literaltype, .. } => literaltype.clone(), Expression::Unary { right, operator, .. } => { let literal_type = right.infer_type(); if literal_type == Integer && operator.token_type == Minus { SignedInteger } else { UnsignedInteger } } Expression::Variable { var_type, .. } => var_type.clone(), Expression::Stop { .. } => TokenType::Unknown, // Expression::PathMatch { .. } => TokenType::Unknown, Expression::NamedParameter { .. } => TokenType::Unknown, Expression::ListGet { .. } => TokenType::Unknown, Expression::MapGet { .. } => TokenType::Unknown, Expression::FieldGet { .. } => TokenType::Unknown, FunctionCall { .. } => TokenType::Unknown, } } }