diff --git a/src/ast_compiler.rs b/src/ast_compiler.rs new file mode 100644 index 0000000..35eb85d --- /dev/null +++ b/src/ast_compiler.rs @@ -0,0 +1,439 @@ +use log::debug; +use crate::tokens::TokenType::{ + Bang, Bool, Char, Colon, Date, Eol, Equal, F32, F64, False, FloatingPoint, Greater, + GreaterEqual, I32, I64, Identifier, Integer, LeftParen, Less, LessEqual, Let, ListType, + MapType, Minus, Object, Plus, Print, RightParen, Slash, Star, String, Text, True, U32, U64, +}; +use crate::tokens::{Token, TokenType}; +use crate::value::Value; + +pub fn compile(tokens: Vec) -> anyhow::Result> { + let mut compiler = AstCompiler::new(tokens); + compiler.compile() +} + +struct AstCompiler { + tokens: Vec, + current: usize, + had_error: bool, +} + +impl AstCompiler { + fn new(tokens: Vec) -> Self { + Self { + tokens, + current: 0, + had_error: false, + } + } + + fn compile(&mut self) -> anyhow::Result> { + let mut statements = vec![]; + while !self.is_at_end() { + statements.push(self.declaration()?) + } + Ok(statements) + } + + fn declaration(&mut self) -> anyhow::Result { + if self.match_token(vec![Let]) { + self.let_declaration() + } else { + self.statement() + } + } + + fn let_declaration(&mut self) -> anyhow::Result { + let name = self.consume(Identifier, "Expect variable name.")?; + + let declared_type = if self.check(Colon) { + self.advance(); + Some(self.advance().token_type) + } else { + None + }; + + if self.match_token(vec![Equal]) { + let initializer = self.expression()?; + self.consume(Eol, "Expect end of line after initializer.")?; + + let inferred_type = initializer.infer_type(); + let var_type = match calculate_type(declared_type, inferred_type) { + Ok(var_type) => var_type, + Err(e) => { + println!("error at line {}", name.line); + self.had_error = true; + return Err(e); + } + }; + Ok(Statement::VarStmt { + name, + var_type, + initializer, + }) + } else { + Err(anyhow::anyhow!("Uninitialized variables are not allowed."))? + } + } + + fn statement(&mut self) -> anyhow::Result { + if self.match_token(vec![Print]) { + self.print_statement() + } else { + self.expr_statement() + } + } + + fn print_statement(&mut self) -> anyhow::Result { + let expr = self.expression()?; + self.consume(Eol, "Expect end of line after expression.")?; + Ok(Statement::Print { value: expr }) + } + + fn expr_statement(&mut self) -> anyhow::Result { + let expr = self.expression()?; + self.consume(Eol, "Expect end of line after expression.")?; + Ok(Statement::ExpressionStmt { expression: expr }) + } + + fn expression(&mut self) -> anyhow::Result { + self.equality() + } + + fn equality(&mut self) -> anyhow::Result { + let mut expr = self.comparison()?; + while self.match_token(vec![TokenType::BangEqual, TokenType::EqualEqual]) { + let operator = self.previous().clone(); + let right = self.comparison()?; + expr = Expression::Binary { + line: operator.line, + left: Box::new(expr), + operator, + right: Box::new(right), + }; + } + Ok(expr) + } + + fn comparison(&mut self) -> anyhow::Result { + let mut expr = self.term()?; + while self.match_token(vec![Greater, GreaterEqual, Less, LessEqual]) { + let operator = self.previous().clone(); + let right = self.term()?; + expr = Expression::Binary { + line: operator.line, + left: Box::new(expr), + operator, + right: Box::new(right), + }; + } + Ok(expr) + } + + fn term(&mut self) -> anyhow::Result { + let mut expr = self.factor()?; + while self.match_token(vec![Minus, Plus]) { + let operator = self.previous().clone(); + let right = self.factor()?; + expr = Expression::Binary { + line: operator.line, + left: Box::new(expr), + operator, + right: Box::new(right), + }; + } + Ok(expr) + } + + fn factor(&mut self) -> anyhow::Result { + let mut expr = self.unary()?; + while self.match_token(vec![Slash, Star]) { + let operator = self.previous().clone(); + let right = self.unary()?; + expr = Expression::Binary { + line: operator.line, + left: Box::new(expr), + operator, + right: Box::new(right), + }; + } + Ok(expr) + } + + fn unary(&mut self) -> anyhow::Result { + if self.match_token(vec![Bang, Minus]) { + let operator = self.previous().clone(); + let right = self.unary()?; + Ok(Expression::Unary { + line: self.peek().line, + operator, + right: Box::new(right), + }) + } else { + self.primary() + } + } + + fn primary(&mut self) -> anyhow::Result { + Ok(if self.match_token(vec![False]) { + Expression::Literal { + line: self.peek().line, + literaltype: Bool, + value: Value::Bool(false), + } + } else if self.match_token(vec![True]) { + Expression::Literal { + line: self.peek().line, + literaltype: Bool, + value: Value::Bool(true), + } //, FloatingPoint, Text + } else if self.match_token(vec![Integer]) { + Expression::Literal { + line: self.peek().line, + literaltype: Integer, + value: Value::I64(self.previous().lexeme.parse()?), + } + } else if self.match_token(vec![FloatingPoint]) { + Expression::Literal { + line: self.peek().line, + literaltype: FloatingPoint, + value: Value::F64(self.previous().lexeme.parse()?), + } + } else if self.match_token(vec![Text]) { + Expression::Literal { + line: self.peek().line, + literaltype: Text, + value: Value::String(self.previous().lexeme.to_string()), + } + } else if self.match_token(vec![LeftParen]) { + let expr = self.expression()?; + self.consume(RightParen, "Expect ')' after expression.")?; + Expression::Grouping { + line: self.peek().line, + expression: Box::new(expr), + } + } else { + unimplemented!() + }) + } + + fn consume(&mut self, token_type: TokenType, message: &str) -> anyhow::Result { + if self.check(token_type) { + self.advance(); + } else { + self.had_error = true; + return Err(anyhow::anyhow!(message.to_string())); + } + Ok(self.previous().clone()) + } + + fn match_token(&mut self, tokens: Vec) -> bool { + for tt in tokens { + if self.check(tt) { + self.advance(); + return true; + } + } + false + } + + fn check(&self, token_type: TokenType) -> bool { + if self.is_at_end() { + false + } else { + self.peek().token_type == token_type + } + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } + + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + self.current += 1; + } + &self.previous() + } + + fn is_at_end(&self) -> bool { + self.peek().token_type == TokenType::Eof + } +} + +fn calculate_type( + declared_type: Option, + inferred_type: TokenType, +) -> anyhow::Result { + Ok(if let Some(declared_type) = declared_type { + if declared_type != inferred_type { + match (declared_type, inferred_type) { + (I32, I64) => I32, + (U32, U64) => U32, + (F32, F64) => F32, + (F64, I64) => F64, + (U64, I64) => U64, + (U64, I32) => U64, + _ => { + return Err(anyhow::anyhow!( + "Incompatible types. Expected {}, found {}", + declared_type, + inferred_type + )); + } + } + } else { + declared_type + } + } else { + inferred_type + }) +} + +#[derive(Debug)] +pub enum Statement { + ExpressionStmt { + expression: Expression, + }, + VarStmt { + name: Token, + var_type: TokenType, + initializer: Expression, + }, + Print { + value: Expression, + }, +} + +impl Statement { + pub fn line(&self) -> usize { + match self { + Statement::ExpressionStmt { expression } => expression.line(), + Statement::VarStmt { + name, + var_type, + initializer, + } => name.line, + Statement::Print { value } => value.line(), + } + } +} + +#[derive(Debug)] +pub enum Expression { + Binary { + line: usize, + left: Box, + operator: Token, + right: Box, + }, + Unary { + line: usize, + operator: Token, + right: Box, + }, + Grouping { + line: usize, + expression: Box, + }, + Literal { + line: usize, + literaltype: TokenType, + value: Value, + }, +} + +impl Expression { + pub fn line(&self) -> usize { + match self { + Expression::Binary { + line, + left, + operator, + right, + } => *line, + Expression::Unary { + line, + operator, + right, + } => *line, + Expression::Grouping { line, expression } => *line, + Expression::Literal { + line, + literaltype, + value, + } => *line, + } + } + pub fn infer_type(&self) -> TokenType { + match self { + Self::Binary { + line, + left, + operator, + right, + } => { + let left_type = left.infer_type(); + let right_type = right.infer_type(); + if left_type == right_type { + // map to determined numeric type if yet undetermined (32 or 64 bits) + match left_type { + FloatingPoint => F64, + Integer => I64, + _ => left_type, + } + } else { + if let Plus = operator.token_type { + // includes string concatenation with numbers + // followed by type coercion to 64 bits for numeric types + debug!("coerce {} : {}",left_type,right_type); + match (left_type, right_type) { + (_, Text) => Text, + (Text, _) => Text, + (FloatingPoint, _) => F64, + (Integer, FloatingPoint) => F64, + (Integer, _) => I64, + (F64, _) => F64, + (U64, U32) => U64, + (I64, I32) => I64, + // could add a date and a duration. future work + // could add a List and a value. also future work + // could add a Map and a tuple. Will I add tuple types? Future work! + _ => panic!("Unexpected coercion"), + } + // could have done some fall through here, but this will fail less gracefully, + // so if my thinking is wrong or incomplete it will panic + } else { + // type coercion to 64 bits for numeric types + debug!("coerce {} : {}",left_type,right_type); + match (left_type, right_type) { + (FloatingPoint, _) => F64, + (Integer, FloatingPoint) => F64, + (I64, FloatingPoint) => F64, + (F64, _) => F64, + (U64, U32) => U64, + (I64, I32) => I64, + (I64, Integer) => I64, + _ => panic!("Unexpected coercion"), + } + } + } + } + Self::Grouping { line, expression } => expression.infer_type(), + Self::Literal { + line, + literaltype, + value, + } => literaltype.clone(), + Self::Unary { + line, + operator, + right, + } => right.infer_type(), + } + } +} diff --git a/src/bytecode_compiler.rs b/src/bytecode_compiler.rs new file mode 100644 index 0000000..4548183 --- /dev/null +++ b/src/bytecode_compiler.rs @@ -0,0 +1,140 @@ +use crate::ast_compiler::{Expression, Statement}; +use crate::chunk::Chunk; +use crate::tokens::TokenType; +use crate::value::Value; +use crate::vm::{OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT, OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, OP_DEF_F32, OP_GET, OP_PRINT}; + +pub fn compile(ast: Vec) -> anyhow::Result { + let compiler = Compiler::new(); + Ok(compiler.compile(ast)?) +} + +struct Compiler { + chunk: Chunk, + had_error: bool, + current_line: usize, +} + +impl Compiler { + fn new() -> Self { + Self { + chunk: Chunk::new("main"), + had_error: false, + current_line: 0, + } + } + + fn compile(mut self, ast: Vec) -> anyhow::Result { + for statement in &ast { + self.compile_statement(statement)? + } + + self.emit_byte(OP_RETURN); + Ok(self.chunk) + } + + fn compile_statement(&mut self, statement: &Statement) -> anyhow::Result<()> { + self.current_line = statement.line(); + match statement { + Statement::VarStmt { + name, + var_type, + initializer, + } => { + let name_index= self.chunk.add_constant(Value::String(name.lexeme.clone())); + self.compile_expression(initializer)?; + self.define_variable(var_type, name_index)? + } + Statement::Print {value} => { + self.compile_expression(value)?; + self.emit_byte(OP_PRINT); + } + _ => unimplemented!(), + } + Ok(()) + } + + fn compile_expression(&mut self, expression: &Expression) -> anyhow::Result<()> { + match expression { + Expression::Literal { value, .. } => self.emit_constant(value), + Expression::Grouping { expression, .. } => self.compile_expression(expression)?, + Expression::Unary { + operator, right, .. + } => { + self.compile_expression(right)?; + match operator.token_type { + TokenType::Minus => { + self.emit_byte(OP_NEGATE); + } + TokenType::Bang => { + self.emit_byte(OP_NOT); + } + _ => unimplemented!("unary other than ! and -"), + } + } + Expression::Binary { + left, + operator, + right, + .. + } => { + self.compile_expression(left)?; + self.compile_expression(right)?; + match operator.token_type { + TokenType::Plus => self.emit_byte(OP_ADD), + TokenType::Minus => self.emit_byte(OP_SUBTRACT), + TokenType::Star => self.emit_byte(OP_MULTIPLY), + TokenType::Slash => self.emit_byte(OP_DIVIDE), + TokenType::BitAnd => self.emit_byte(OP_BITAND), + TokenType::BitOr => self.emit_byte(OP_BITOR), + TokenType::BitXor => self.emit_byte(OP_BITXOR), + TokenType::GreaterGreater => self.emit_byte(OP_SHR), + TokenType::LessLess => self.emit_byte(OP_SHL), + TokenType::EqualEqual => self.emit_byte(OP_EQUAL), + TokenType::Greater => self.emit_byte(OP_GREATER), + TokenType::GreaterEqual => self.emit_byte(OP_GREATER_EQUAL), + TokenType::Less => self.emit_byte(OP_LESS), + TokenType::LessEqual => self.emit_byte(OP_LESS_EQUAL), + _ => unimplemented!("binary other than plus, minus, star, slash"), + } + } + } + Ok(()) + } + + fn define_variable(&mut self, var_type: &TokenType, name_index: usize) -> anyhow::Result<()> { + let def_op = match var_type { + TokenType::I32 => OP_DEF_I32, + TokenType::I64 => OP_DEF_I64, + TokenType::U32 => OP_DEF_I64, + TokenType::U64 => OP_DEF_I64, + TokenType::F32 => OP_DEF_F32, + TokenType::F64 => OP_DEF_F64, + TokenType::Date => OP_DEF_DATE, + TokenType::String => OP_DEF_STRING, + TokenType::Char => OP_DEF_CHAR, + TokenType::Bool => OP_DEF_BOOL, + TokenType::ListType => OP_DEF_LIST, + TokenType::MapType => OP_DEF_MAP, + TokenType::Object => OP_DEF_STRUCT, + _ => unimplemented!("{}", var_type), + }; + + self.emit_bytes(def_op, name_index as u16); + Ok(()) + } + + fn emit_byte(&mut self, byte: u16) { + self.chunk.add(byte, self.current_line); + } + + fn emit_bytes(&mut self, b1: u16, b2: u16) { + self.emit_byte(b1); + self.emit_byte(b2); + } + + fn emit_constant(&mut self, value: &Value) { + let index = self.chunk.add_constant(value.clone()); + self.emit_bytes(OP_CONSTANT, index as u16); + } +} diff --git a/src/chunk.rs b/src/chunk.rs index 1ac7191..3aeda47 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,9 +1,10 @@ use tracing::debug; use crate::value::Value; use crate::vm::{ - OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DIVIDE, OP_FALSE, OP_MULTIPLY, - OP_NEGATE, OP_RETURN, OP_SUBTRACT, OP_TRUE, OP_NOT, OP_SHL, OP_SHR, OP_LESS, OP_LESS_EQUAL, - OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET,OP_DEF_STRING, OP_DEF_BOOL + OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DIVIDE, OP_MULTIPLY, + OP_NEGATE, OP_RETURN, OP_SUBTRACT, OP_NOT, OP_SHL, OP_SHR, OP_LESS, OP_LESS_EQUAL, + OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET,OP_DEF_STRING, + OP_DEF_I32, OP_DEF_BOOL, OP_DEF_F32, OP_DEF_F64, }; pub struct Chunk { @@ -53,8 +54,6 @@ impl Chunk { match instruction { OP_CONSTANT => self.constant_inst("LDC", offset), OP_ADD => self.simple_inst("ADD", offset), - OP_FALSE => self.simple_inst("LDC_false", offset), - OP_TRUE => self.simple_inst("LDC_true", offset), OP_SUBTRACT => self.simple_inst("SUB", offset), OP_MULTIPLY => self.simple_inst("MUL", offset), OP_DIVIDE => self.simple_inst("DIV", offset), @@ -75,6 +74,9 @@ impl Chunk { OP_POP => self.simple_inst("POP", offset), OP_DEFINE => self.constant_inst("DEF", offset), OP_DEF_STRING => self.constant_inst("DEFSTR", offset), + OP_DEF_I32 => self.constant_inst("DEFI32", offset), + OP_DEF_F32 => self.constant_inst("DEFF32", offset), + OP_DEF_F64 => self.constant_inst("DEFF64", offset), OP_DEF_BOOL => self.constant_inst("DEFBOOL", offset), OP_GET => self.constant_inst("GET", offset), _ => { diff --git a/src/compiler.rs b/src/compiler.rs index 7611b29..0f963eb 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -4,17 +4,22 @@ use crate::tokens::{Token, TokenType}; use crate::value::Value; use crate::vm::{ OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, - OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRUCT, OP_DEF_STRING, - OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_FALSE, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, - OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, - OP_SUBTRACT, OP_TRUE, + OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT, + OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, + OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, }; use anyhow::anyhow; use std::collections::HashMap; -use std::mem::discriminant; use std::sync::LazyLock; use tracing::debug; +macro_rules! parse_num { + ($s:ident, $variant:ident, $number:ident) => {{ + $s.typestack.push(TokenType::$variant); + Value::$variant($number.parse()?) + }}; +} + pub fn compile(source: &str) -> anyhow::Result { let tokens = scan(source); debug!("Scanned tokens: {:?}", tokens); @@ -26,7 +31,7 @@ pub fn compile(source: &str) -> anyhow::Result { current_token: &tokens[0], tokens: &tokens, current: 0, - types: vec![], + typestack: vec![], locals: vec![], previous: 0, had_error: false, @@ -41,7 +46,7 @@ struct Compiler<'a> { current: usize, previous_token: &'a Token, current_token: &'a Token, - types: Vec, + typestack: Vec, locals: Vec, previous: usize, had_error: bool, @@ -279,16 +284,25 @@ impl Rule { } } -fn number(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { +fn number(s: &mut Compiler, mut expected_type: Option) -> anyhow::Result<()> { + debug!("number: expected type {:?}", expected_type); + + // coerce unknown numeric type to the expected type of the expression if any + if let None = expected_type { + if !s.typestack.is_empty() { + expected_type = Some(*s.typestack.last().unwrap()); + } + } + let number = &s.previous_token.lexeme; let value = if let Some(expected_type) = expected_type { match expected_type { - TokenType::I32 => Value::I32(number.parse()?), - TokenType::I64 => Value::I64(number.parse()?), - TokenType::U32 => Value::U32(number.parse()?), - TokenType::U64 => Value::U64(number.parse()?), - TokenType::F32 => Value::U32(number.parse()?), - TokenType::F64 => Value::U64(number.parse()?), + TokenType::I32 => parse_num!(s, I32, number), + TokenType::I64 => parse_num!(s, I64, number), + TokenType::U32 => parse_num!(s, U32, number), + TokenType::U64 => parse_num!(s, U64, number), + TokenType::F32 => parse_num!(s, F32, number), + TokenType::F64 => parse_num!(s, F64, number), _ => { return Err(anyhow!( @@ -300,14 +314,10 @@ fn number(s: &mut Compiler, expected_type: Option) -> anyhow::Result< } } } else { - if let TokenType::Number = s.previous_token.token_type { - if number.contains('.') { - Value::F64(number.parse()?) - } else { - Value::I64(number.parse()?) - } - } else { - return Err(anyhow!("I did not think this would happen")); + match s.previous_token.token_type { + TokenType::Integer => Value::I64(number.parse()?), + TokenType::FloatingPoint => Value::F64(number.parse()?), + _ => panic!("I did not think this would happen") } }; s.emit_constant(value); @@ -318,11 +328,21 @@ fn literal(s: &mut Compiler, expected_type: Option) -> anyhow::Result let actual_type = &s.previous_token.token_type; if let Some(expected_type) = expected_type { match (actual_type, expected_type) { - (TokenType::False, TokenType::Bool) => s.emit_constant(Value::Bool(false)), - (TokenType::True, TokenType::Bool) => s.emit_constant(Value::Bool(true)), + (TokenType::False, TokenType::Bool) => { + s.typestack.push(TokenType::Bool); + s.emit_constant(Value::Bool(false)) + } + (TokenType::True, TokenType::Bool) => { + s.typestack.push(TokenType::Bool); + s.emit_constant(Value::Bool(true)) + } (TokenType::Text, TokenType::String) => { + s.typestack.push(TokenType::String); s.emit_constant(Value::String(s.previous_token.lexeme.clone())) } + //list + //map + //struct value _ => { return Err(anyhow!( "Invalid type: expected {} value, got {}({})", @@ -343,11 +363,11 @@ fn literal(s: &mut Compiler, expected_type: Option) -> anyhow::Result Ok(()) } -fn skip(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { +fn skip(s: &mut Compiler, _expected_type: Option) -> anyhow::Result<()> { Ok(()) } -fn grouping(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { +fn grouping(s: &mut Compiler, _expected_type: Option) -> anyhow::Result<()> { s.expression(None)?; s.consume(TokenType::RightParen, "Expect ')' after expression.") } @@ -372,6 +392,7 @@ fn unary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<( fn binary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { let operator_type = &s.previous_token.token_type; debug!("operator {:?}", operator_type); + debug!("expected type {:?}", expected_type); let rule = get_rule(operator_type); s.parse_precedence(rule.precedence + 1, None)?; match operator_type { @@ -429,6 +450,7 @@ static RULES: LazyLock> = LazyLock::new(|| { Rule::new(None, Some(binary), PREC_EQUALITY), ); rules.insert(TokenType::False, Rule::new(Some(literal), None, PREC_NONE)); + rules.insert(TokenType::FloatingPoint, Rule::new(Some(number), None, PREC_NONE)); rules.insert( TokenType::Greater, Rule::new(None, Some(binary), PREC_COMPARISON), @@ -447,12 +469,13 @@ static RULES: LazyLock> = LazyLock::new(|| { TokenType::Identifier, Rule::new(Some(variable), None, PREC_NONE), ); + rules.insert(TokenType::Integer, Rule::new(Some(number), None, PREC_NONE)); rules.insert(TokenType::Indent, Rule::new(Some(skip), None, PREC_NONE)); rules.insert(TokenType::LeftBrace, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::LeftBracket, Rule::new(None, None, PREC_NONE)); rules.insert( TokenType::LeftParen, - Rule::new(Some(binary), None, PREC_NONE), + Rule::new(Some(grouping), None, PREC_NONE), ); rules.insert( TokenType::Less, @@ -475,7 +498,6 @@ static RULES: LazyLock> = LazyLock::new(|| { TokenType::Minus, Rule::new(Some(unary), Some(binary), PREC_TERM), ); - rules.insert(TokenType::Number, Rule::new(Some(number), None, PREC_NONE)); rules.insert(TokenType::Plus, Rule::new(None, Some(binary), PREC_TERM)); rules.insert(TokenType::Print, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Return, Rule::new(None, None, PREC_NONE)); diff --git a/src/lib.rs b/src/lib.rs index c31ebf8..e18eb3b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,5 @@ pub mod scanner; mod tokens; mod value; pub mod vm; +pub mod ast_compiler; +pub mod bytecode_compiler; diff --git a/src/main.rs b/src/main.rs index c2bc311..5f59d6e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,21 +1,41 @@ +use crudlang::{ast_compiler, chunk}; +use crudlang::bytecode_compiler::compile; +use crudlang::scanner::scan; +use crudlang::vm::{interpret, Vm}; + fn main() -> anyhow::Result<()> { tracing_subscriber::fmt::init(); - let chunk = crudlang::compiler::compile( - r#"let a: bool = "koe""#, - ); - match chunk { - Err(e) => { - println!("{}", e); - return Ok(()); + let tokens = scan(r#"let a = ((1+2)*3.0)*4"#); + match ast_compiler::compile(tokens) { + Ok(statements) => { + // println!("{:?}", statements); + let chunk = compile(statements)?; + // chunk.disassemble(); + interpret(chunk); } - Ok(chunk) => { - chunk.disassemble(); - - let result = crudlang::vm::interpret(chunk)?; - println!("{}", result); + Err(e) => { + println!("{}", e) } } + // println!("{}",expression.infer_type()); + + // let chunk = crudlang::compiler::compile( + // r#"let a ="hello " + 42"#, + // ); + // match chunk { + // Err(e) => { + // println!("{}", e); + // return Ok(()); + // } + // Ok(chunk) => { + // chunk.disassemble(); + // + // let result = crudlang::vm::interpret(chunk)?; + // println!("{}", result); + // } + // } + Ok(()) } diff --git a/src/scanner.rs b/src/scanner.rs index 43b88a7..f14de88 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use crate::tokens::TokenType::BitXor; +use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer}; use crate::{ keywords, tokens::{ @@ -12,7 +12,7 @@ pub fn scan(source: &str) -> Vec { chars: source.chars().collect(), current: 0, start: 0, - line: 0, + line: 1, tokens: vec![], new_line: true, }; @@ -51,7 +51,7 @@ impl Scanner { '-' => self.add_token(TokenType::Minus), '+' => self.add_token(TokenType::Plus), ':' => self.add_token(TokenType::Colon), - ';' => self.add_token(TokenType::Semicolon), + ';' => println!("Warning: Ignoring semicolon at line {}", self.line), '*' => self.add_token(TokenType::Star), '!' => { let t = if self.match_next('=') { @@ -150,8 +150,9 @@ impl Scanner { while is_digit(self.peek()) { self.advance(); } - + let mut has_dot = false; if self.peek() == '.' && is_digit(self.peek_next()) { + has_dot = true; self.advance(); } @@ -159,7 +160,7 @@ impl Scanner { self.advance(); } let value: String = self.chars[self.start..self.current].iter().collect(); - self.add_token_with_value(TokenType::Number, value); + self.add_token_with_value(if has_dot { FloatingPoint } else { Integer }, value); } fn string(&mut self) { diff --git a/src/tokens.rs b/src/tokens.rs index e8a1bb3..861af61 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,6 +1,6 @@ use std::fmt; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Token { pub token_type: TokenType, pub lexeme: String, @@ -23,7 +23,7 @@ enum Value { } #[derive(Debug, PartialEq, Clone, Copy, Hash)] -pub(crate) enum TokenType { +pub enum TokenType { Bang, BangEqual, BitAnd, @@ -52,9 +52,10 @@ pub(crate) enum TokenType { Hash, I32, I64, + Identifier, If, Indent, - Identifier, + Integer, //undetermined integer type LeftBrace, LeftBracket, LeftParen, @@ -68,7 +69,7 @@ pub(crate) enum TokenType { LogicalOr, Minus, Not, - Number, + FloatingPoint, //undetermined float type Object, Plus, Print, @@ -101,13 +102,14 @@ impl fmt::Display for TokenType { TokenType::F32 => write!(f, "f32"), TokenType::F64 => write!(f, "f64"), TokenType::Bool => write!(f, "bool"), - TokenType::Bang=> write!(f, "!"), - TokenType::BangEqual=> write!(f, "!="), - TokenType::BitAnd=> write!(f, "&"), - TokenType::BitOr=> write!(f, "|"), - TokenType::BitXor=> write!(f, "^"), - TokenType::Colon=> write!(f, ":"), - TokenType::Comma=> write!(f, ","), + TokenType::Bang => write!(f, "!"), + TokenType::BangEqual => write!(f, "!="), + TokenType::BitAnd => write!(f, "&"), + TokenType::BitOr => write!(f, "|"), + TokenType::BitXor => write!(f, "^"), + TokenType::Colon => write!(f, ":"), + TokenType::Comma => write!(f, ","), + TokenType::FloatingPoint => write!(f, "float"), TokenType::MapType => write!(f, "map"), TokenType::ListType => write!(f, "list"), TokenType::Dot => write!(f, "."), @@ -125,8 +127,9 @@ impl fmt::Display for TokenType { TokenType::GreaterGreater => write!(f, ">>"), TokenType::Hash => write!(f, "#"), TokenType::If => write!(f, "if"), - TokenType::Indent => write!(f, "indent"), TokenType::Identifier => write!(f, "identifier"), + TokenType::Indent => write!(f, "indent"), + TokenType::Integer => write!(f, "integer"), TokenType::LeftBrace => write!(f, "{{"), TokenType::LeftBracket => write!(f, "["), TokenType::LeftParen => write!(f, "("), @@ -138,7 +141,6 @@ impl fmt::Display for TokenType { TokenType::LogicalOr => write!(f, "||"), TokenType::Minus => write!(f, "-"), TokenType::Not => write!(f, "not"), - TokenType::Number => write!(f, "number"), TokenType::Object => write!(f, "object"), TokenType::Plus => write!(f, "+"), TokenType::Print => write!(f, "print"), @@ -157,6 +159,4 @@ impl fmt::Display for TokenType { } } -impl Eq for TokenType { - -} \ No newline at end of file +impl Eq for TokenType {} diff --git a/src/value.rs b/src/value.rs index d379827..26a0b07 100644 --- a/src/value.rs +++ b/src/value.rs @@ -2,31 +2,31 @@ use anyhow::anyhow; use chrono::{DateTime, Utc}; use std::cmp::Ordering; use std::collections::HashMap; -use std::fmt::{write, Display, Formatter}; +use std::fmt::{Display, Formatter, write}; use std::hash::{Hash, Hasher}; use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Shl, Shr, Sub}; #[derive(Debug, Clone)] pub struct StructDefinition { - fields: Vec + fields: Vec, } #[derive(Debug, Clone)] -pub struct Instance { +pub struct StructValue { definition: StructDefinition, - fields: Vec + fields: Vec, } -impl Instance { +impl StructValue { pub fn new(definition: StructDefinition) -> Self { Self { definition, - fields: Vec::new() + fields: Vec::new(), } } } -impl Display for Instance { +impl Display for StructValue { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for (i, field) in self.definition.fields.iter().enumerate() { write!(f, "{}: {}", field, self.fields[i])?; @@ -50,9 +50,9 @@ pub enum Value { Enum, List(Vec), Map(HashMap), - Struct(Instance), + Struct(StructValue), Error(String), - Void + Void, } impl Into for i32 { @@ -219,7 +219,6 @@ impl Mul<&Value> for &Value { (Value::U64(a), Value::U64(b)) => Ok(Value::U64(a * b)), (Value::F32(a), Value::F32(b)) => Ok(Value::F32(a * b)), (Value::F64(a), Value::F64(b)) => Ok(Value::F64(a * b)), - //enum? _ => Err(anyhow!("Cannot multiply")), } } @@ -338,7 +337,8 @@ impl PartialEq for Value { (Value::Map(a), Value::Map(b)) => { let mut equal = true; for (k, v) in a.iter() { - if !b.contains_key(k) || b.get(k).unwrap() != v { //safe unwrap + if !b.contains_key(k) || b.get(k).unwrap() != v { + //safe unwrap equal = false; break; } @@ -359,8 +359,8 @@ impl PartialOrd for Value { (Value::I32(a), Value::I32(b)) => Some(a.partial_cmp(b)?), (Value::I64(a), Value::I64(b)) => Some(a.partial_cmp(b)?), (Value::U32(a), Value::U32(b)) => Some(a.partial_cmp(b)?), - (Value::U64(a), Value::U64(b)) =>Some(a.partial_cmp(b)?), - (Value::F32(a), Value::F32(b)) =>Some(a.partial_cmp(b)?), + (Value::U64(a), Value::U64(b)) => Some(a.partial_cmp(b)?), + (Value::F32(a), Value::F32(b)) => Some(a.partial_cmp(b)?), (Value::F64(a), Value::F64(b)) => Some(a.partial_cmp(b)?), (Value::String(a), Value::String(b)) => Some(a.partial_cmp(b)?), (Value::Char(a), Value::Char(b)) => Some(a.partial_cmp(b)?), @@ -370,7 +370,7 @@ impl PartialOrd for Value { } } -impl Hash for Value{ +impl Hash for Value { fn hash(&self, state: &mut H) { std::mem::discriminant(self).hash(state); diff --git a/src/vm.rs b/src/vm.rs index 03a4030..7f29b05 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -19,31 +19,36 @@ macro_rules! define_var { }}; } +pub struct Vm { + chunk: Chunk, + ip: usize, + stack: Vec, + local_vars: HashMap, + error_occurred: bool, +} + pub fn interpret(chunk: Chunk) -> anyhow::Result { let mut vm = Vm { chunk, ip: 0, stack: vec![], local_vars: HashMap::new(), + error_occurred: false, }; vm.run() } -pub struct Vm { - chunk: Chunk, - ip: usize, - stack: Vec, - local_vars: HashMap, -} - impl Vm { fn run(&mut self) -> anyhow::Result { loop { + if self.error_occurred { + return Err(anyhow!("Error occurred")); + } debug!("{:?}", self.stack); let opcode = self.chunk.code[self.ip]; self.ip += 1; match opcode { - OP_CONSTANT | OP_FALSE | OP_TRUE => { + OP_CONSTANT => { let value = &self.chunk.constants[self.chunk.code[self.ip] as usize]; self.ip += 1; self.push(value.clone()); @@ -104,7 +109,7 @@ impl Vm { OP_DEF_F64 => define_var!(self, F64), OP_DEF_STRING => define_var!(self, String), OP_DEF_CHAR => define_var!(self, Char), - OP_DEF_BOOL =>define_var!(self, Bool), + OP_DEF_BOOL => define_var!(self, Bool), OP_DEF_DATE => define_var!(self, Date), OP_DEF_LIST => define_var!(self, List), OP_DEF_MAP => define_var!(self, Map), @@ -148,7 +153,10 @@ fn binary_op(vm: &mut Vm, op: impl Fn(&Value, &Value) -> anyhow::Result + let result = op(&a, &b); match result { Ok(result) => vm.push(result), - Err(e) => println!("Error: {} {:?} and {:?}", e.to_string(), a, b), + Err(e) => { + println!("Error: {} {:?} and {:?}", e.to_string(), a, b); + vm.error_occurred = true; + } } } @@ -169,8 +177,8 @@ pub const OP_DIVIDE: u16 = 5; pub const OP_NEGATE: u16 = 6; pub const OP_PRINT: u16 = 7; pub const OP_RETURN: u16 = 8; -pub const OP_TRUE: u16 = 9; -pub const OP_FALSE: u16 = 10; +// pub const OP_TRUE: u16 = 9; +// pub const OP_FALSE: u16 = 10; // obsolete, vacant space pub const OP_AND: u16 = 11; pub const OP_OR: u16 = 12; pub const OP_NOT: u16 = 13; @@ -186,7 +194,7 @@ pub const OP_BITXOR: u16 = 22; pub const OP_SHR: u16 = 23; pub const OP_SHL: u16 = 24; pub const OP_POP: u16 = 25; -pub const OP_DEFINE: u16 = 26;// may be obsolete already +pub const OP_DEFINE: u16 = 26; // may be obsolete already pub const OP_GET: u16 = 27; pub const OP_DEF_I32: u16 = 28; pub const OP_DEF_I64: u16 = 29;