From 9b6f265c55d8c160b692b246f3102b661e86418c Mon Sep 17 00:00:00 2001 From: Shautvast Date: Sat, 8 Nov 2025 21:58:19 +0100 Subject: [PATCH] added a lot of work in progress, but also a symbol table. Means I could refactor a lot of stuff and remove code. The symbol table is a global storage for vars, functions and objects. Helps with type inference and checking on external symbols --- README.md | 8 +- source/hello/web.crud | 6 +- src/ast_compiler.rs | 454 ++++++++++++++------------------------- src/bytecode_compiler.rs | 148 +++++++++---- src/chunk.rs | 2 + src/compiler_tests.rs | 93 +++++--- src/lib.rs | 24 ++- src/scanner.rs | 5 +- src/symbol_builder.rs | 235 ++++++++++++++++++++ src/tokens.rs | 11 +- src/vm.rs | 2 +- 11 files changed, 605 insertions(+), 383 deletions(-) create mode 100644 src/symbol_builder.rs diff --git a/README.md b/README.md index 9d9081b..1424490 100644 --- a/README.md +++ b/README.md @@ -112,10 +112,10 @@ fn get(path: string, headers: map, query: map) -> string: * guards: this will be the way to deal with input ``` fn get() -> [Customer] | Customer? | (): - | /. -> service.get_all() - | /./{uuid} -> service.get(uuid)? - | /.?{query.firstname} -> service.get_by_firstname(fname)? - | /.?{query.last_name} -> service.get_by_lastname(lname)? + | / -> service.get_all() + | /{uuid} -> service.get(uuid)? + | ?{query.firstname} -> service.get_by_firstname(fname)? + | ?{query.last_name} -> service.get_by_lastname(lname)? | _ -> 404 ``` * this may also require ADT's... diff --git a/source/hello/web.crud b/source/hello/web.crud index 1e221e6..70b30a4 100644 --- a/source/hello/web.crud +++ b/source/hello/web.crud @@ -1,2 +1,6 @@ +object Person: + name: string + fn get(path: string) -> string: - service.add("hello", path) + let p = Person(name: path) + service.add("hello", p.name) diff --git a/src/ast_compiler.rs b/src/ast_compiler.rs index de495e9..b4a2397 100644 --- a/src/ast_compiler.rs +++ b/src/ast_compiler.rs @@ -1,4 +1,6 @@ -use crate::ast_compiler::Expression::{FunctionCall, Literal, RemoteFunctionCall, Variable}; +use crate::ast_compiler::Expression::{ + FunctionCall, NamedParameter, Stop, Variable, +}; use crate::errors::CompilerError::{ self, Expected, IncompatibleTypes, ParseError, TooManyParameters, TypeError, UndeclaredVariable, UnexpectedIndent, UninitializedVariable, @@ -14,7 +16,6 @@ use crate::tokens::TokenType::{ use crate::tokens::{Token, TokenType}; use crate::value::Value; use log::debug; -use std::collections::HashMap; pub fn compile( path: Option<&str>, @@ -36,9 +37,7 @@ struct AstCompiler { tokens: Vec, current: usize, had_error: bool, - vars: Vec, indent: Vec, - functions: HashMap, } impl AstCompiler { @@ -47,9 +46,7 @@ impl AstCompiler { tokens, current: 0, had_error: false, - vars: vec![], indent: vec![0], - functions: HashMap::new(), } } @@ -58,7 +55,6 @@ impl AstCompiler { } fn compile_tokens(&mut self) -> Result, CompilerErrorAtLine> { - self.collect_functions()?; self.reset(); self.compile() } @@ -86,59 +82,6 @@ impl AstCompiler { CompilerErrorAtLine::raise(error, self.current_line()) } - fn collect_functions(&mut self) -> Result<(), CompilerErrorAtLine> { - while !self.is_at_end() { - if self.match_token(vec![Fn]) { - let name_token = self.consume(Identifier, Expected("function name."))?; - self.consume(LeftParen, Expected("'(' after function name."))?; - let mut parameters = vec![]; - while !self.check(RightParen) { - if parameters.len() >= 25 { - return Err(self.raise(TooManyParameters)); - } - let parm_name = self.consume(Identifier, Expected("a parameter name."))?; - - self.consume(Colon, Expected(": after parameter name"))?; - let var_type = self.peek().token_type; - self.vars.push(Expression::Variable { - name: parm_name.lexeme.to_string(), - var_type, - line: parm_name.line, - }); - self.advance(); - parameters.push(Parameter { - name: parm_name, - var_type, - }); - if self.peek().token_type == TokenType::Comma { - self.advance(); - } - } - self.consume(RightParen, Expected(" ')' after parameters."))?; - let return_type = if self.check(SingleRightArrow) { - self.consume(SingleRightArrow, Expected("->"))?; - self.advance().token_type - } else { - TokenType::Void - }; - self.consume(Colon, Expected("colon (:) after function declaration."))?; - self.consume(Eol, Expected("end of line."))?; - - let function = Function { - name: name_token.clone(), - parameters, - return_type, - body: vec![], - }; - - self.functions.insert(name_token.lexeme, function); - } else { - self.advance(); - } - } - Ok(()) - } - fn indent(&mut self) -> Result, CompilerErrorAtLine> { let expected_indent = *self.indent.last().unwrap(); // skip empty lines @@ -168,11 +111,65 @@ impl AstCompiler { self.let_declaration() } else if self.match_token(vec![Object]) { self.object_declaration() + } else if self.match_token(vec![TokenType::Pipe]) { + self.guard_declaration() } else { self.statement() } } + // | /. -> service.get_all() + // | /{uuid} -> service.get(uuid)? + // | ?{query.firstname} -> service.get_by_firstname(fname)? + fn guard_declaration(&mut self) -> Result { + let if_expr = self.guard_if_expr()?; + let then_expr = self.expression()?; + Ok(Statement::GuardStatement { if_expr, then_expr }) + } + + fn guard_if_expr(&mut self) -> Result { + while !self.check(SingleRightArrow) { + if self.match_token(vec![Slash]) { + return self.path_guard_expr(); + } else if self.match_token(vec![TokenType::Question]) { + return self.query_guard_expr(); + } else { + return Err(self.raise(Expected("-> or ?"))); + } + } + Ok(Stop { + line: self.peek().line, + }) + } + + fn query_guard_expr(&mut self) -> Result { + if self.match_token(vec![LeftBrace]) { + let query_params = self.expression()?; + self.consume(RightBrace, Expected("'}' after guard expression."))?; + Ok(query_params) + } else { + Ok(Stop { + line: self.peek().line, + }) + } + } + + fn path_guard_expr(&mut self) -> Result { + if self.match_token(vec![LeftBrace]) { + let path_params = self.match_expression()?; + self.consume(RightBrace, Expected("'}' after guard expression."))?; + Ok(path_params) + } else { + Ok(Stop { + line: self.peek().line, + }) + } + } + + fn match_expression(&mut self) -> Result { + Err(self.raise(Expected("unimplemented"))) + } + fn object_declaration(&mut self) -> Result { let type_name = self.consume(Identifier, Expected("object name."))?; self.consume(Colon, Expected("':' after object name."))?; @@ -194,7 +191,7 @@ impl AstCompiler { if !done { let field_name = self.consume(Identifier, Expected("an object field name."))?; self.consume(Colon, Expected("':' after field name."))?; - let field_type = self.peek().token_type; + let field_type = self.peek().token_type.clone(); if field_type.is_type() { self.advance(); } else { @@ -216,40 +213,61 @@ impl AstCompiler { fn function_declaration(&mut self) -> Result { let name_token = self.consume(Identifier, Expected("function name."))?; self.consume(LeftParen, Expected("'(' after function name."))?; + let mut parameters = vec![]; while !self.check(RightParen) { - self.advance(); - } + if parameters.len() >= 25 { + return Err(self.raise(TooManyParameters)); + } + let parm_name = self.consume(Identifier, Expected("a parameter name."))?; + + self.consume(Colon, Expected(": after parameter name"))?; + let var_type = self.peek().token_type.clone(); - self.consume(RightParen, Expected("')' after parameters."))?; - while !self.check(Colon) { self.advance(); + parameters.push(Parameter { + name: parm_name, + var_type, + }); + if self.peek().token_type == TokenType::Comma { + self.advance(); + } } + self.consume(RightParen, Expected(" ')' after parameters."))?; + let return_type = if self.check(SingleRightArrow) { + self.consume(SingleRightArrow, Expected("->"))?; + self.advance().token_type.clone() + } else { + TokenType::Void + }; self.consume(Colon, Expected("colon (:) after function declaration."))?; self.consume(Eol, Expected("end of line."))?; let current_indent = self.indent.last().unwrap(); self.indent.push(current_indent + 1); + let body = self.compile()?; - self.functions.get_mut(&name_token.lexeme).unwrap().body = body; - - let function_stmt = Statement::FunctionStmt { - function: self.functions.get(&name_token.lexeme).unwrap().clone(), + let function = Function { + name: name_token.clone(), + parameters, + return_type, + body, }; - Ok(function_stmt) + + Ok(Statement::FunctionStmt { function }) } fn let_declaration(&mut self) -> Result { if self.peek().token_type.is_type() { return Err(self.raise(CompilerError::KeywordNotAllowedAsIdentifier( - self.peek().token_type, + self.peek().token_type.clone(), ))); } let name_token = self.consume(Identifier, Expected("variable name."))?; let declared_type = if self.check(Colon) { self.advance(); - Some(self.advance().token_type) + Some(self.advance().token_type.clone()) } else { None }; @@ -258,22 +276,22 @@ impl AstCompiler { let initializer = self.expression()?; self.consume(Eol, Expected("end of line after initializer."))?; - let inferred_type = initializer.infer_type(); - let var_type = match calculate_type(declared_type, inferred_type) { - Ok(var_type) => var_type, - Err(e) => { - self.had_error = true; - return Err(self.raise(TypeError(Box::new(e)))); - } - }; - self.vars.push(Expression::Variable { - name: name_token.lexeme.to_string(), - var_type, - line: name_token.line, - }); + // let inferred_type = initializer.infer_type(); + // let var_type = match calculate_type(declared_type, inferred_type) { + // Ok(var_type) => var_type, + // Err(e) => { + // self.had_error = true; + // return Err(self.raise(TypeError(Box::new(e)))); + // } + // }; + // self.vars.push(Variable { + // name: name_token.lexeme.to_string(), + // var_type, + // line: name_token.line, + // }); Ok(Statement::VarStmt { name: name_token, - var_type, + var_type: declared_type.unwrap_or(TokenType::Unknown), initializer, }) } else { @@ -324,7 +342,7 @@ impl AstCompiler { fn bit_or(&mut self) -> Result { let expr = self.bit_xor()?; - self.binary(vec![TokenType::BitOr], expr) + self.binary(vec![TokenType::Pipe], expr) } fn bit_xor(&mut self) -> Result { @@ -469,9 +487,13 @@ impl AstCompiler { } else { let token = self.advance().clone(); debug!("{:?}", token); + // function call? if self.match_token(vec![LeftParen]) { self.function_call(token.lexeme)? + } else if self.match_token(vec![Colon]) { + self.named_parameter(&token)? } else if self.check(Dot) { + // chain of variable or function lookups? let mut name = "/".to_string(); name.push_str(&self.previous().lexeme); while self.match_token(vec![Dot]) { @@ -479,11 +501,13 @@ impl AstCompiler { name.push_str(&self.peek().lexeme); self.advance(); } + // chained function call? if self.match_token(vec![LeftParen]) { self.function_call(name)? } else { + // empty line return if self.match_token(vec![Eol, Eof]) { - Ok(Literal { + Ok(Expression::Literal { value: Value::Void, literaltype: Object, line: token.line, @@ -493,11 +517,22 @@ impl AstCompiler { }; } } else { + // none of the above, must be a variable lookup self.variable_lookup(&token)? } }) } + fn named_parameter(&mut self, name: &Token) -> Result { + let value = self.expression()?; + let line = name.line; + Ok(NamedParameter { + name: name.clone(), + value: Box::new(value), + line, + }) + } + fn list(&mut self) -> Result { let mut list = vec![]; while !self.match_token(vec![RightBracket]) { @@ -537,92 +572,34 @@ impl AstCompiler { }) } - fn variable_lookup(&mut self, token: &Token) -> Result { - if let Some((var_name, var_type)) = self - .vars - .iter() - .filter_map(|e| { - if let Variable { name, var_type, .. } = e { - Some((name, var_type)) - } else { - None - } - }) - .find(|e| e.0 == &token.lexeme) - { - Ok(Variable { - name: var_name.to_string(), - var_type: var_type.clone(), - line: token.line, - }) - } else { - if self.match_token(vec![Dot]) { - let right = self.primary()?; - self.binary(vec![Dot], right) - } else { - if self.is_at_end() { - Ok(Literal { - value: Value::Void, - literaltype: Object, - line: token.line, - }) - } else { - Err(self.raise(UndeclaredVariable(token.lexeme.clone()))) - } - } - } + fn variable_lookup(&mut self, name: &Token) -> Result { + Ok(Variable { + name: name.lexeme.to_string(), + var_type: TokenType::Unknown, + line: name.line, + }) } fn function_call(&mut self, name: String) -> Result { - if let Some(function) = self.functions.get(&name).cloned() { - let mut arguments = vec![]; - while !self.match_token(vec![RightParen]) { - if arguments.len() >= 25 { - return Err(self.raise(TooManyParameters)); - } - let arg = self.expression()?; - let arg_type = arg.infer_type(); - if arg_type != function.parameters[arguments.len()].var_type { - return Err(self.raise(IncompatibleTypes( - function.parameters[arguments.len()].var_type, - arg_type, - ))); - } - arguments.push(arg); - if self.peek().token_type == TokenType::Comma { - self.advance(); - } else { - self.consume(RightParen, Expected("')' after arguments."))?; - break; - } + let mut arguments = vec![]; + while !self.match_token(vec![RightParen]) { + if arguments.len() >= 25 { + return Err(self.raise(TooManyParameters)); } - Ok(FunctionCall { - line: self.peek().line, - name, - arguments, - return_type: function.return_type, - }) - } else { - let mut arguments = vec![]; - while !self.match_token(vec![RightParen]) { - if arguments.len() >= 25 { - return Err(self.raise(TooManyParameters)); - } - let arg = self.expression()?; - arguments.push(arg); - if self.peek().token_type == TokenType::Comma { - self.advance(); - } else { - self.consume(RightParen, Expected("')' after arguments."))?; - break; - } + let arg = self.expression()?; + arguments.push(arg); + if self.peek().token_type == TokenType::Comma { + self.advance(); + } else { + self.consume(RightParen, Expected("')' after arguments."))?; + break; } - Ok(RemoteFunctionCall { - line: self.peek().line, - name, - arguments, - }) } + Ok(FunctionCall { + line: self.peek().line, + name, + arguments, + }) } fn consume( @@ -681,45 +658,6 @@ impl AstCompiler { } } -fn calculate_type( - declared_type: Option, - inferred_type: TokenType, -) -> Result { - Ok(if let Some(declared_type) = declared_type { - if declared_type != inferred_type { - match (declared_type, inferred_type) { - (I32, I64) => I32, //need this? - (I32, Integer) => I32, - (U32, I64) => U32, - (U32, Integer) => U32, - (F32, F64) => F32, - (F32, FloatingPoint) => F32, - (F64, I64) => F64, - (F64, FloatingPoint) => F64, - (U64, I64) => U64, - (U64, I32) => U64, - (StringType, _) => StringType, // meh, this all needs rigorous testing. Update: this is in progress - _ => { - return Err(IncompatibleTypes(declared_type, inferred_type)); - } - } - } else { - declared_type - } - } else { - match inferred_type { - Integer | I64 => I64, - FloatingPoint => F64, - Bool => Bool, - Date => Date, - ListType => ListType, - MapType => MapType, - Object => Object, - _ => return Err(CompilerError::UnexpectedType(inferred_type)), - } - }) -} - #[derive(Debug, Clone)] pub enum Statement { ExpressionStmt { @@ -740,6 +678,10 @@ pub enum Statement { name: Token, fields: Vec, }, + GuardStatement { + if_expr: Expression, + then_expr: Expression, + }, } impl Statement { @@ -750,6 +692,7 @@ impl Statement { Statement::PrintStmt { value } => value.line(), Statement::FunctionStmt { function, .. } => function.name.line, Statement::ObjectStmt { name, .. } => name.line, + Statement::GuardStatement { if_expr, .. } => if_expr.line(), } } } @@ -801,13 +744,18 @@ pub enum Expression { line: usize, name: String, arguments: Vec, - return_type: TokenType, }, - // a remote function call is a function call that is not defined in the current scope - RemoteFunctionCall { + Stop { line: usize, - name: String, - arguments: Vec, + }, + PathMatch { + line: usize, + condition: Box, + }, + NamedParameter { + line: usize, + name: Token, + value: Box, }, } @@ -822,85 +770,9 @@ impl Expression { Self::Map { line, .. } => *line, Variable { line, .. } => *line, FunctionCall { line, .. } => *line, - RemoteFunctionCall { line, .. } => *line, - } - } - - pub fn infer_type(&self) -> TokenType { - match self { - Self::Binary { - left, - operator, - right, - .. - } => { - let left_type = left.infer_type(); - let right_type = right.infer_type(); - if vec![Greater, Less, GreaterEqual, LessEqual].contains(&operator.token_type) { - Bool - } else if left_type == right_type { - // map to determined numeric type if yet undetermined (32 or 64 bits) - match left_type { - FloatingPoint => F64, - Integer => I64, - _ => left_type, - } - } else { - if let Plus = operator.token_type { - // includes string concatenation with numbers - // followed by type coercion to 64 bits for numeric types - debug!("coerce {} : {}", left_type, right_type); - match (left_type, right_type) { - (_, StringType) => StringType, - (StringType, _) => StringType, - (FloatingPoint, _) => F64, - (Integer, FloatingPoint) => F64, - (Integer, _) => I64, - (I64, Integer) => I64, - (F64, _) => F64, - (U64, U32) => U64, - (I64, I32) => I64, - // could add a date and a duration. future work - // could add a List and a value. also future work - // could add a Map and a tuple. Will I add tuple types? Future work! - _ => panic!("Unexpected coercion"), - } - // could have done some fall through here, but this will fail less gracefully, - // so if my thinking is wrong or incomplete it will panic - } else { - // type coercion to 64 bits for numeric types - debug!("coerce {} : {}", left_type, right_type); - match (left_type, right_type) { - (FloatingPoint, _) => F64, - (Integer, FloatingPoint) => F64, - (Integer, I64) => I64, - (I64, FloatingPoint) => F64, - (F64, _) => F64, - (U64, U32) => U64, - (I64, I32) => I64, - (I64, Integer) => I64, - _ => panic!("Unexpected coercion"), - } - } - } - } - Self::Grouping { expression, .. } => expression.infer_type(), - Self::Literal { literaltype, .. } => literaltype.clone(), - Self::List { literaltype, .. } => literaltype.clone(), - Self::Map { literaltype, .. } => literaltype.clone(), - Self::Unary { - right, operator, .. - } => { - let literal_type = right.infer_type(); - if literal_type == Integer && operator.token_type == Minus { - SignedInteger - } else { - UnsignedInteger - } - } - Variable { var_type, .. } => var_type.clone(), - FunctionCall { return_type, .. } => return_type.clone(), - RemoteFunctionCall { .. } => TokenType::Unknown, + Stop { line } => *line, + Expression::PathMatch { line, .. } => *line, + NamedParameter { line, .. } => *line, } } } diff --git a/src/bytecode_compiler.rs b/src/bytecode_compiler.rs index 44597d4..d1052fd 100644 --- a/src/bytecode_compiler.rs +++ b/src/bytecode_compiler.rs @@ -1,6 +1,8 @@ -use crate::ast_compiler::{Expression, Function, Statement}; +use crate::ast_compiler::Expression::NamedParameter; +use crate::ast_compiler::{Expression, Function, Parameter, Statement}; use crate::chunk::Chunk; -use crate::errors::CompilerErrorAtLine; +use crate::errors::{CompilerError, CompilerErrorAtLine, RuntimeError}; +use crate::symbol_builder::{Symbol, calculate_type, infer_type}; use crate::tokens::TokenType; use crate::value::Value; use crate::vm::{ @@ -9,17 +11,20 @@ use crate::vm::{ OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_OR, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, }; use std::collections::HashMap; +use crate::tokens::TokenType::Unknown; pub fn compile( qualified_name: Option<&str>, ast: &Vec, + symbols: &HashMap, registry: &mut HashMap, ) -> Result<(), CompilerErrorAtLine> { - compile_in_namespace(ast, qualified_name, registry) + compile_in_namespace(ast, qualified_name, symbols, registry) } pub(crate) fn compile_function( function: &Function, + symbols: &HashMap, registry: &mut HashMap, namespace: &str, ) -> Result { @@ -31,18 +36,20 @@ pub(crate) fn compile_function( compiler.vars.insert(name, var_index); } - - Ok(compiler.compile(&function.body, registry, namespace)?) + let mut chunk = compiler.compile(&function.body, symbols, registry, namespace)?; + chunk.function_parameters = function.parameters.to_vec(); + Ok(chunk) } pub(crate) fn compile_in_namespace( ast: &Vec, namespace: Option<&str>, + symbols: &HashMap, registry: &mut HashMap, ) -> Result<(), CompilerErrorAtLine> { let name = namespace.unwrap_or("main"); let compiler = Compiler::new(name); - let chunk = compiler.compile(ast, registry, name)?; + let chunk = compiler.compile(ast, symbols, registry, name)?; let qname = if let Some(namespace) = namespace { format!("{}/{}", namespace, "main") } else { @@ -72,11 +79,12 @@ impl Compiler { fn compile( mut self, ast: &Vec, + symbols: &HashMap, registry: &mut HashMap, namespace: &str, ) -> Result { for statement in ast { - self.compile_statement(statement, registry, namespace)?; + self.compile_statement(statement, symbols, registry, namespace)?; } self.emit_byte(OP_RETURN); @@ -86,6 +94,7 @@ impl Compiler { fn compile_statement( &mut self, statement: &Statement, + symbols: &HashMap, registry: &mut HashMap, namespace: &str, ) -> Result<(), CompilerErrorAtLine> { @@ -96,21 +105,44 @@ impl Compiler { var_type, initializer, } => { - let name_index = self.chunk.add_var(var_type, &name.lexeme); - self.vars.insert(name.lexeme.clone(), name_index); - self.compile_expression(namespace, initializer, registry)?; - self.emit_bytes(OP_ASSIGN, name_index as u16); + let name = name.lexeme.as_str(); + let var = symbols.get(name); + if let Some(Symbol::Variable { + var_type, + initializer, + .. + }) = var + { + let inferred_type = infer_type(initializer, symbols); + let calculated_type = calculate_type(var_type, &inferred_type, symbols) + .map_err(|e| CompilerErrorAtLine::raise(e, statement.line()))?; + if var_type != &Unknown && var_type != &calculated_type { + return Err(CompilerErrorAtLine::raise( + CompilerError::IncompatibleTypes(var_type.clone(), calculated_type), + statement.line(), + )); + } + let name_index = self.chunk.add_var(var_type, name); + self.vars.insert(name.to_string(), name_index); + self.compile_expression(namespace, initializer, symbols, registry)?; + self.emit_bytes(OP_ASSIGN, name_index as u16); + } else { + return Err(CompilerErrorAtLine::raise( + CompilerError::UndeclaredVariable(name.to_string()), + statement.line(), + )); + } } Statement::PrintStmt { value } => { - self.compile_expression(namespace, value, registry)?; + self.compile_expression(namespace, value, symbols, registry)?; self.emit_byte(OP_PRINT); } Statement::ExpressionStmt { expression } => { - self.compile_expression(namespace, expression, registry)?; + self.compile_expression(namespace, expression, symbols, registry)?; } Statement::FunctionStmt { function } => { let function_name = function.name.lexeme.clone(); - let compiled_function = compile_function(function, registry, namespace)?; + let compiled_function = compile_function(function, symbols, registry, namespace)?; registry.insert( format!("{}/{}", self.chunk.name, function_name), compiled_function, @@ -119,6 +151,9 @@ impl Compiler { Statement::ObjectStmt { name, fields } => { self.chunk.add_object_def(&name.lexeme, fields); } + Statement::GuardStatement { .. } => { + unimplemented!("guard statement") + } } Ok(()) } @@ -127,37 +162,63 @@ impl Compiler { &mut self, namespace: &str, expression: &Expression, + symbols: &HashMap, registry: &mut HashMap, ) -> Result<(), CompilerErrorAtLine> { match expression { + // Expression::FunctionCall { + // name, arguments, .. + // } => { + // let qname = format!("{}.{}", namespace, name); + // let name_index = self + // .chunk + // .find_constant(&qname) + // .unwrap_or_else(|| self.chunk.add_constant(Value::String(qname))); + // + // for argument in arguments { + // self.compile_expression(namespace, argument, registry)?; + // } + // self.emit_bytes(OP_CALL, name_index as u16); + // self.emit_byte(arguments.len() as u16); + // } Expression::FunctionCall { name, arguments, .. - } => { - let qname = format!("{}.{}", namespace, name); - let name_index = self - .chunk - .find_constant(&qname) - .unwrap_or_else(|| self.chunk.add_constant(Value::String(qname))); - - for argument in arguments { - self.compile_expression(namespace, argument, registry)?; - } - self.emit_bytes(OP_CALL, name_index as u16); - self.emit_byte(arguments.len() as u16); - } - Expression::RemoteFunctionCall { - name, arguments, .. } => { let name_index = self .chunk .find_constant(&name) .unwrap_or_else(|| self.chunk.add_constant(Value::String(name.to_string()))); - - for argument in arguments { - self.compile_expression(namespace, argument, registry)?; + let function = symbols.get(name); + if let Some(Symbol::Function { + name, + parameters, + return_type, + body, + }) = function + { + for parameter in parameters { + for argument in arguments { + if let NamedParameter { name, .. } = argument { + if name.lexeme == parameter.name.lexeme { + self.compile_expression( + namespace, argument, symbols, registry, + )?; + break; + } + } else { + self.compile_expression(namespace, argument, symbols, registry)?; + break; + } + } + } + self.emit_bytes(OP_CALL, name_index as u16); + self.emit_byte(arguments.len() as u16); + } else { + return Err(CompilerErrorAtLine::raise( + CompilerError::FunctionNotFound(name.to_string()), + 0, + )); } - self.emit_bytes(OP_CALL, name_index as u16); - self.emit_byte(arguments.len() as u16); } Expression::Variable { name, .. } => { let name_index = self.vars.get(name).unwrap(); @@ -168,24 +229,24 @@ impl Compiler { } Expression::List { values, .. } => { for expr in values { - self.compile_expression(namespace, expr, registry)?; + self.compile_expression(namespace, expr, symbols, registry)?; } self.emit_bytes(OP_DEF_LIST, values.len() as u16); } Expression::Map { entries, .. } => { for (key, value) in entries { - self.compile_expression(namespace, key, registry)?; - self.compile_expression(namespace, value, registry)?; + self.compile_expression(namespace, key, symbols, registry)?; + self.compile_expression(namespace, value, symbols, registry)?; } self.emit_bytes(OP_DEF_MAP, entries.len() as u16); } Expression::Grouping { expression, .. } => { - self.compile_expression(namespace, expression, registry)? + self.compile_expression(namespace, expression, symbols, registry)? } Expression::Unary { operator, right, .. } => { - self.compile_expression(namespace, right, registry)?; + self.compile_expression(namespace, right, symbols, registry)?; match operator.token_type { TokenType::Minus => { self.emit_byte(OP_NEGATE); @@ -202,15 +263,15 @@ impl Compiler { right, .. } => { - self.compile_expression(namespace, left, registry)?; - self.compile_expression(namespace, right, registry)?; + self.compile_expression(namespace, left, symbols, registry)?; + self.compile_expression(namespace, right, symbols, registry)?; match operator.token_type { TokenType::Plus => self.emit_byte(OP_ADD), TokenType::Minus => self.emit_byte(OP_SUBTRACT), TokenType::Star => self.emit_byte(OP_MULTIPLY), TokenType::Slash => self.emit_byte(OP_DIVIDE), TokenType::BitAnd => self.emit_byte(OP_BITAND), - TokenType::BitOr => self.emit_byte(OP_BITOR), + TokenType::Pipe => self.emit_byte(OP_BITOR), TokenType::BitXor => self.emit_byte(OP_BITXOR), TokenType::GreaterGreater => self.emit_byte(OP_SHR), TokenType::LessLess => self.emit_byte(OP_SHL), @@ -224,6 +285,9 @@ impl Compiler { _ => unimplemented!("binary other than plus, minus, star, slash"), } } + Expression::Stop { line } => {} + Expression::PathMatch { line, .. } => {} + Expression::NamedParameter { line, .. } => {} } Ok(()) } diff --git a/src/chunk.rs b/src/chunk.rs index cb19807..7cafa00 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -16,6 +16,7 @@ pub struct Chunk { pub constants: Vec, lines: Vec, object_defs: HashMap>, + pub(crate) function_parameters: Vec, pub vars: Vec<(TokenType, String)> } @@ -40,6 +41,7 @@ impl Chunk { constants: vec![], lines: vec![], object_defs: HashMap::new(), + function_parameters: vec![], vars: vec![] } } diff --git a/src/compiler_tests.rs b/src/compiler_tests.rs index 30cd3e9..4a98887 100644 --- a/src/compiler_tests.rs +++ b/src/compiler_tests.rs @@ -25,19 +25,31 @@ mod tests { #[test] fn literal_list() { - assert_eq!(run(r#"["abc","def"]"#), Ok(Value::List(vec![Value::String("abc".into()), Value::String("def".into())]))); + assert_eq!( + run(r#"["abc","def"]"#), + Ok(Value::List(vec![ + Value::String("abc".into()), + Value::String("def".into()) + ])) + ); } #[test] fn infer_type() { - assert_eq!(run(r#"let a=1 -a"#), Ok(Value::I64(1))); + assert_eq!( + run(r#"let a=1 +a"#), + Ok(Value::I64(1)) + ); } #[test] fn define_u32() { - assert_eq!(run(r#"let a:u32=1 -a"#), Ok(Value::U32(1))); + assert_eq!( + run(r#"let a:u32=1 +a"#), + Ok(Value::U32(1)) + ); } #[test] @@ -56,7 +68,7 @@ a"#), if let Err(e) = &r { assert_eq!( e.to_string(), - "Compilation failed: error at line 1, Type mismatch: Expected u32, found i32/64" + "Compilation failed: error at line 1, Expected u32, found i32/64" ); } } @@ -68,7 +80,7 @@ a"#), if let Err(e) = &r { assert_eq!( e.to_string(), - "Compilation failed: error at line 1, Type mismatch: Expected u64, found i32/64" + "Compilation failed: error at line 1, Expected u64, found i32/64" ); } } @@ -80,7 +92,7 @@ a"#), if let Err(e) = &r { assert_eq!( e.to_string(), - "Compilation failed: error at line 1, Type mismatch: Expected u64, found string" + "Compilation failed: error at line 1, Expected u64, found string" ); } } @@ -106,18 +118,20 @@ object Person: assert!(r.is_ok()); // does nothing runtime } - // #[test] - // fn object_() { - // let r = compile(r#" - // object Person: - // name: string - // - // let p = Person{name: "Sander"} - // print p - // "#, ); - // println!("{:?}", r); - // assert!(r.is_ok()); - // } +// #[test] +// fn object_() { +// let r = run( +// r#" +// object Person: +// name: string +// +// let p = Person(name: "Sander") +// print p +// "#, +// ); +// println!("{:?}", r); +// assert!(r.is_ok()); +// } #[test] fn literal_map() { @@ -151,39 +165,54 @@ m"#); } #[test] - fn keyword_error(){ + fn keyword_error() { let result = run(r#"let map = {"name": "Dent"}"#); assert!(result.is_err()); - assert_eq!("Compilation failed: error at line 1, 'map' is a keyword. You cannot use it as an identifier",result.unwrap_err().to_string()); + assert_eq!( + "Compilation failed: error at line 1, 'map' is a keyword. You cannot use it as an identifier", + result.unwrap_err().to_string() + ); } #[test] - fn add_strings(){ + fn add_strings() { assert_eq!(run(r#""a"+"b""#), Ok(Value::String("ab".into()))); } #[test] - fn add_string_and_int(){ + fn add_string_and_int() { assert_eq!(run(r#""a"+42"#), Ok(Value::String("a42".into()))); } #[test] - fn add_string_and_bool(){ + fn add_string_and_bool() { assert_eq!(run(r#""a"+false"#), Ok(Value::String("afalse".into()))); } #[test] - fn add_string_and_scientific_float(){ - assert_eq!(run(r#""a"+4.2e10"#), Ok(Value::String("a42000000000".into()))); + fn add_string_and_scientific_float() { + assert_eq!( + run(r#""a"+4.2e10"#), + Ok(Value::String("a42000000000".into())) + ); } #[test] - fn add_hex_ints(){ + fn add_hex_ints() { assert_eq!(run(r#"0x10 + 0x20"#), Ok(Value::U32(48))); } - #[test] - fn package(){ - assert_eq!(run(r#"a.b.c()"#), Ok(Value::U32(48))); - } + // #[test] + // fn package() { + // assert_eq!(run(r#"a.b.c()"#), Ok(Value::U32(48))); + // } + + // #[test] + // fn guards() { + // assert_eq!( + // run(r#"fn get_all_users() -> list: + // | /{uuid} -> service.get_by_uuid(uuid)?"#), + // Ok(Value::Void) + // ); + // } } diff --git a/src/lib.rs b/src/lib.rs index bc656db..c76c01d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,6 +19,7 @@ pub mod file_watch; mod keywords; pub mod repl; pub mod scanner; +mod symbol_builder; mod tokens; mod value; pub mod vm; @@ -34,9 +35,12 @@ pub fn compile_sourcedir(source_dir: &str) -> Result, Cru let tokens = scan(&source)?; match ast_compiler::compile(Some(&path), tokens) { Ok(statements) => { - println!("{}", path); let path = path.strip_prefix(source_dir).unwrap().replace(".crud", ""); - bytecode_compiler::compile(Some(&path), &statements, &mut registry)?; + + let mut symbol_table = HashMap::new(); + symbol_builder::build(&path, &statements, &mut symbol_table); + + bytecode_compiler::compile(Some(&path), &statements, &symbol_table, &mut registry)?; } Err(e) => { println!("{}", e); @@ -56,7 +60,9 @@ pub fn map_underlying() -> fn(std::io::Error) -> CrudLangError { pub fn recompile(src: &str, registry: &mut HashMap) -> Result<(), CrudLangError> { let tokens = scan(src)?; let ast = ast_compiler::compile(None, tokens)?; - bytecode_compiler::compile(None, &ast, registry)?; + let mut symbol_table = HashMap::new(); + symbol_builder::build("", &ast, &mut symbol_table); + bytecode_compiler::compile(None, &ast, &symbol_table, registry)?; Ok(()) } @@ -64,15 +70,19 @@ pub fn compile(src: &str) -> Result, CrudLangError> { let tokens = scan(src)?; let mut registry = HashMap::new(); let ast = ast_compiler::compile(None, tokens)?; - bytecode_compiler::compile(None, &ast, &mut registry)?; + let mut symbol_table = HashMap::new(); + symbol_builder::build("", &ast, &mut symbol_table); + bytecode_compiler::compile(None, &ast, &symbol_table, &mut registry)?; Ok(registry) } -fn run(src: &str) -> Result { +pub(crate) fn run(src: &str) -> Result { let tokens = scan(src)?; - let mut registry = HashMap::new(); let ast = ast_compiler::compile(None, tokens)?; - bytecode_compiler::compile(None, &ast, &mut registry)?; + let mut symbol_table = HashMap::new(); + symbol_builder::build("", &ast, &mut symbol_table); + let mut registry = HashMap::new(); + bytecode_compiler::compile(None, &ast, &symbol_table, &mut registry)?; let registry = ArcSwap::from(Arc::new(registry)); interpret(registry.load(), "main").map_err(CrudLangError::from) } diff --git a/src/scanner.rs b/src/scanner.rs index 28858cf..aa1ce2e 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,6 +1,6 @@ use crate::errors::CompilerError::{IllegalCharLength, UnexpectedIdentifier, Unterminated}; use crate::errors::{CompilerError, CompilerErrorAtLine}; -use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer, U32, U64}; +use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer, Question, U32, U64}; use crate::{ keywords, tokens::{ @@ -129,11 +129,12 @@ impl Scanner { let t = if self.match_next('|') { TokenType::LogicalOr } else { - TokenType::BitOr + TokenType::Pipe }; self.add_token(t); } '^' => self.add_token(BitXor), + '?' => self.add_token(Question), _ => { if c == '0' && self.peek() == 'x' { self.hex_number()?; diff --git a/src/symbol_builder.rs b/src/symbol_builder.rs new file mode 100644 index 0000000..1b282ac --- /dev/null +++ b/src/symbol_builder.rs @@ -0,0 +1,235 @@ +use crate::ast_compiler::{Expression, Parameter, Statement}; +use crate::errors::CompilerError; +use crate::errors::CompilerError::{IncompatibleTypes, TypeError}; +use crate::tokens::{Token, TokenType}; +use crate::tokens::TokenType::{ + Bool, Date, F32, F64, FloatingPoint, Greater, GreaterEqual, I32, I64, Integer, Less, LessEqual, + ListType, MapType, Minus, Object, Plus, SignedInteger, StringType, U32, U64, Unknown, + UnsignedInteger, +}; +use log::debug; +use std::collections::HashMap; + +pub enum Symbol { + Function { + name: String, + parameters: Vec, + return_type: TokenType, + body: Vec, + }, + Variable { + name: String, + var_type: TokenType, + initializer: Expression, + }, + Object { + name: String, + fields: Vec, + }, +} + +fn make_qname(path: &str, name: &Token) -> String { + if path == "" { + name.lexeme.to_string() + } else { + format!("{}.{}", path, name.lexeme) + } +} + +pub fn build(path: &str, ast: &[Statement], symbols: &mut HashMap) { + for statement in ast { + match statement { + Statement::VarStmt { + name, + var_type, + initializer, + } => { + + symbols.insert( + make_qname(path, name), + Symbol::Variable { + name: name.lexeme.to_string(), + var_type: var_type.clone(), + initializer: initializer.clone(), + }, + ); + } + Statement::FunctionStmt { function } => { + symbols.insert( + make_qname(path, &function.name), + Symbol::Function { + name: function.name.lexeme.to_string(), + parameters: function.parameters.to_vec(), + return_type: function.return_type.clone(), + body: function.body.to_vec(), + }, + ); + } + Statement::ObjectStmt { name, fields } => { + symbols.insert( + make_qname(path, name), + Symbol::Object { + name: name.lexeme.to_string(), + fields: fields.to_vec(), + }, + ); + } + _ => {} + } + } +} + +pub fn add_types( + path: &str, + ast: &[Statement], + symbols: &mut HashMap, +) -> Result<(), CompilerError> { + for statement in ast { + match statement { + Statement::VarStmt { + name, + var_type, + initializer, + } => { + let inferred_type = infer_type(initializer, symbols); + let calculated_type = calculate_type(var_type, &inferred_type, symbols)?; + let entry = symbols.get_mut(&format!("{}.{}", path, name.lexeme)); + if let Some(Symbol::Variable { var_type, .. }) = entry { + *var_type = calculated_type; + } + } + _ => {} + } + } + Ok(()) +} + +pub fn calculate_type( + declared_type: &TokenType, + inferred_type: &TokenType, + _symbols: &HashMap, +) -> Result { + Ok(if declared_type != &Unknown { + if declared_type != inferred_type { + match (declared_type, inferred_type) { + (I32, I64) => I32, //need this? + (I32, Integer) => I32, + (U32, I64) => U32, + (U32, Integer) => U32, + (F32, F64) => F32, + (F32, FloatingPoint) => F32, + (F64, I64) => F64, + (F64, FloatingPoint) => F64, + (U64, I64) => U64, + (U64, I32) => U64, + (StringType, _) => StringType, // meh, this all needs rigorous testing. Update: this is in progress + _ => { + return Err(IncompatibleTypes( + declared_type.clone(), + inferred_type.clone(), + )); + } + } + } else { + declared_type.clone() + } + } else { + match inferred_type { + Integer | I64 => I64, + FloatingPoint => F64, + Bool => Bool, + Date => Date, + ListType => ListType, + MapType => MapType, + Object => Object, + _ => return Err(CompilerError::UnexpectedType(inferred_type.clone())), + } + }) +} + +pub fn infer_type(expr: &Expression, symbols: &HashMap) -> TokenType { + match expr { + Expression::Binary { + left, + operator, + right, + .. + } => { + let left_type = infer_type(left, symbols); + let right_type = infer_type(right, symbols); + if vec![Greater, Less, GreaterEqual, LessEqual].contains(&operator.token_type) { + Bool + } else if left_type == right_type { + // map to determined numeric type if yet undetermined (32 or 64 bits) + match left_type { + FloatingPoint => F64, + Integer => I64, + _ => left_type, + } + } else { + if let Plus = operator.token_type { + // includes string concatenation with numbers + // followed by type coercion to 64 bits for numeric types + debug!("coerce {} : {}", left_type, right_type); + match (left_type, right_type) { + (_, StringType) => StringType, + (StringType, _) => StringType, + (FloatingPoint, _) => F64, + (Integer, FloatingPoint) => F64, + (Integer, _) => I64, + (I64, Integer) => I64, + (F64, _) => F64, + (U64, U32) => U64, + (I64, I32) => I64, + // could add a date and a duration. future work + // could add a List and a value. also future work + // could add a Map and a tuple. Will I add tuple types? Future work! + _ => panic!("Unexpected coercion"), + } + // could have done some fall through here, but this will fail less gracefully, + // so if my thinking is wrong or incomplete it will panic + } else { + // type coercion to 64 bits for numeric types + debug!("coerce {} : {}", left_type, right_type); + match (left_type, right_type) { + (FloatingPoint, _) => F64, + (Integer, FloatingPoint) => F64, + (Integer, I64) => I64, + (I64, FloatingPoint) => F64, + (F64, _) => F64, + (U64, U32) => U64, + (I64, I32) => I64, + (I64, Integer) => I64, + _ => panic!("Unexpected coercion"), + } + } + } + } + Expression::Grouping { expression, .. } => infer_type(expression, symbols), + Expression::Literal { literaltype, .. } => literaltype.clone(), + Expression::List { literaltype, .. } => literaltype.clone(), + Expression::Map { literaltype, .. } => literaltype.clone(), + Expression::Unary { + right, operator, .. + } => { + let literal_type = infer_type(right, symbols); + if literal_type == Integer && operator.token_type == Minus { + SignedInteger + } else { + UnsignedInteger + } + } + Expression::Variable { var_type, .. } => var_type.clone(), + Expression::FunctionCall { name, .. } => { + let symbol = symbols.get(name); + if let Some(Symbol::Function { return_type, .. }) = symbol { + return_type.clone() + } else { + Unknown + } + } + Expression::Stop { .. } => TokenType::Unknown, + Expression::PathMatch { .. } => TokenType::Unknown, + Expression::NamedParameter { .. } => TokenType::Unknown, + } +} diff --git a/src/tokens.rs b/src/tokens.rs index 4e1ef18..1edf92a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -17,12 +17,12 @@ impl Token { } } -#[derive(Debug, PartialEq, Clone, Copy, Hash)] +#[derive(Debug, PartialEq, Clone, Hash)] pub enum TokenType { Bang, BangEqual, BitAnd, - BitOr, + Pipe, BitXor, Bool, Char, @@ -71,6 +71,7 @@ pub enum TokenType { Object, Plus, Print, + Question, Return, RightParen, RightBrace, @@ -86,6 +87,7 @@ pub enum TokenType { Unknown, Void, While, + ObjectType(String) } impl fmt::Display for TokenType { @@ -104,7 +106,7 @@ impl fmt::Display for TokenType { TokenType::Bang => write!(f, "!"), TokenType::BangEqual => write!(f, "!="), TokenType::BitAnd => write!(f, "&"), - TokenType::BitOr => write!(f, "|"), + TokenType::Pipe => write!(f, "|"), TokenType::BitXor => write!(f, "^"), TokenType::Colon => write!(f, ":"), TokenType::Comma => write!(f, ","), @@ -142,8 +144,10 @@ impl fmt::Display for TokenType { TokenType::Minus => write!(f, "-"), TokenType::Not => write!(f, "not"), TokenType::Object => write!(f, "object"), + TokenType::ObjectType(_) => write!(f, "object"), TokenType::Plus => write!(f, "+"), TokenType::Print => write!(f, "print"), + TokenType::Question => write!(f, "?"), TokenType::Return => write!(f, "return"), TokenType::RightParen => write!(f, ")"), TokenType::RightBrace => write!(f, "}}"), @@ -158,6 +162,7 @@ impl fmt::Display for TokenType { TokenType::While => write!(f, "while"), TokenType::SignedInteger => write!(f, "i32/64"), TokenType::UnsignedInteger => write!(f, "u32/64"), + } } } diff --git a/src/vm.rs b/src/vm.rs index 2cadd6b..6fb54c2 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -203,7 +203,7 @@ impl Vm { let function_name = chunk.constants[function_name_index].to_string(); let function_chunk = self.registry.get(&function_name) - .or_else(|| self.registry.get(&format!("{}{}", context, function_name))); + .or_else(|| self.registry.get(&format!("{}/{}", context, function_name))); if function_chunk.is_none() { return Err(RuntimeError::FunctionNotFound(function_name)); } else {