From e2ddf94a002d5fb9a39d99abf0c92cdba6bd6b16 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Wed, 22 Oct 2025 16:14:38 +0200 Subject: [PATCH] first step in type checking --- src/chunk.rs | 5 +- src/compiler.rs | 169 +++++++++++++++++++++++++++++++++++++----------- src/keywords.rs | 20 +++--- src/main.rs | 4 +- src/scanner.rs | 2 +- src/tokens.rs | 91 +++++++++++++++++++++++--- src/vm.rs | 17 +++++ 7 files changed, 245 insertions(+), 63 deletions(-) diff --git a/src/chunk.rs b/src/chunk.rs index 144e46b..5da8f4d 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -3,7 +3,7 @@ use crate::value::Value; use crate::vm::{ OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DIVIDE, OP_FALSE, OP_MULTIPLY, OP_NEGATE, OP_RETURN, OP_SUBTRACT, OP_TRUE, OP_NOT, OP_SHL, OP_SHR, OP_LESS, OP_LESS_EQUAL, - OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET + OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET,OP_DEF_STRING }; pub struct Chunk { @@ -74,9 +74,10 @@ impl Chunk { OP_PRINT => self.simple_inst("PRT", offset), OP_POP => self.simple_inst("POP", offset), OP_DEFINE => self.constant_inst("DEF", offset), + OP_DEF_STRING => self.constant_inst("DEFSTR", offset), OP_GET => self.constant_inst("GET", offset), _ => { - println!("Unknown instruction"); + println!("Unknown instruction {}", instruction); offset + 1 } } diff --git a/src/compiler.rs b/src/compiler.rs index acb4b40..90a2b03 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -3,12 +3,15 @@ use crate::scanner::scan; use crate::tokens::{Token, TokenType}; use crate::value::Value; use crate::vm::{ - OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_FALSE, - OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, - OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, OP_TRUE, + OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, + OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_OBJ, OP_DEF_STRING, OP_DEFINE, + OP_DIVIDE, OP_EQUAL, OP_FALSE, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, + OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, + OP_TRUE, }; use anyhow::anyhow; use std::collections::HashMap; +use std::mem::discriminant; use std::sync::LazyLock; use tracing::debug; @@ -23,6 +26,8 @@ pub fn compile(source: &str) -> anyhow::Result { current_token: &tokens[0], tokens: &tokens, current: 0, + types: vec![], + locals: vec![], previous: 0, had_error: false, }; @@ -36,6 +41,8 @@ struct Compiler<'a> { current: usize, previous_token: &'a Token, current_token: &'a Token, + types: Vec, + locals: Vec, previous: usize, had_error: bool, } @@ -63,10 +70,28 @@ impl<'a> Compiler<'a> { fn let_declaration(&mut self) -> anyhow::Result<()> { let index = self.parse_variable("Expect variable name")?; + let mut var_type = None; + if self.check(TokenType::Colon) { + self.consume(TokenType::Colon, "must not happen")?; + match self.current_token.token_type { + TokenType::I32 + | TokenType::I64 + | TokenType::U32 + | TokenType::U64 + | TokenType::Date + | TokenType::String + | TokenType::Char + | TokenType::Bool + | TokenType::ListType + | TokenType::MapType => var_type = Some(self.current_token.token_type), + _ => return Err(anyhow!("Invalid type {:?}", self.current_token.token_type)), + } + self.advance()?; + } if self.match_token(TokenType::Equal) { - self.expression()?; + self.expression(var_type)?; self.consume(TokenType::Eol, "Expect end of line")?; - self.define_variable(index)?; + self.define_variable(var_type, index)?; } else { return Err(anyhow!( "You cannot declare a variable without initializing it." @@ -86,8 +111,23 @@ impl<'a> Compiler<'a> { Ok(index) } - fn define_variable(&mut self, index: usize) -> anyhow::Result<()> { - self.emit_bytes(OP_DEFINE, index as u16); + fn define_variable(&mut self, var_type: Option, index: usize) -> anyhow::Result<()> { + let def_op = match var_type { + Some(TokenType::I32) => OP_DEF_I32, + Some(TokenType::I64) => OP_DEF_I64, + Some(TokenType::U32) => OP_DEF_I64, + Some(TokenType::U64) => OP_DEF_I64, + Some(TokenType::Date) => OP_DEF_DATE, + Some(TokenType::String) => OP_DEF_STRING, + Some(TokenType::Char) => OP_DEF_CHAR, + Some(TokenType::Bool) => OP_DEF_BOOL, + Some(TokenType::ListType) => OP_DEF_LIST, + Some(TokenType::MapType) => OP_DEF_MAP, + Some(TokenType::Object) => OP_DEF_OBJ, + _ => OP_DEFINE, + }; + + self.emit_bytes(def_op, index as u16); Ok(()) } @@ -101,14 +141,17 @@ impl<'a> Compiler<'a> { fn expression_statement(&mut self) -> anyhow::Result<()> { debug!("expression statement"); - self.expression()?; + self.expression(None)?; self.emit_byte(OP_POP); Ok(()) } fn print_statement(&mut self) -> anyhow::Result<()> { - self.expression()?; - self.consume(TokenType::Eol, "No further expressions expected. Please continue on a new line after the first.\n")?; + self.expression(None)?; + self.consume( + TokenType::Eol, + "No further statements expected. Please start on a new line after the first one.\n", + )?; self.emit_byte(OP_PRINT); Ok(()) } @@ -158,23 +201,27 @@ impl<'a> Compiler<'a> { self.current_token.token_type == token_type } - fn expression(&mut self) -> anyhow::Result<()> { - self.parse_precedence(PREC_ASSIGNMENT)?; + fn expression(&mut self, expected_type: Option) -> anyhow::Result<()> { + self.parse_precedence(PREC_ASSIGNMENT, expected_type)?; Ok(()) } - fn parse_precedence(&mut self, precedence: usize) -> anyhow::Result<()> { + fn parse_precedence( + &mut self, + precedence: usize, + expected_type: Option, + ) -> anyhow::Result<()> { self.advance()?; let rule = get_rule(&self.previous_token.token_type); debug!("Precedence rule: {:?}", rule); if let Some(prefix) = rule.prefix { - prefix(self)?; + prefix(self, expected_type)?; while precedence <= get_rule(&self.current_token.token_type).precedence { self.advance()?; let infix_rule = get_rule(&self.previous_token.token_type).infix; if let Some(infix) = infix_rule { - infix(self)?; + infix(self, expected_type)?; } } } else { @@ -198,7 +245,7 @@ impl<'a> Compiler<'a> { } } -type ParseFn = fn(&mut Compiler) -> anyhow::Result<()>; +type ParseFn = fn(&mut Compiler, expected_type: Option) -> anyhow::Result<()>; #[derive(Debug)] struct Rule { @@ -217,38 +264,66 @@ impl Rule { } } -fn number(s: &mut Compiler) -> anyhow::Result<()> { - s.emit_constant(match s.previous_token.token_type { - TokenType::Number => Value::F64(s.previous_token.lexeme.parse()?), - _ => unimplemented!(), // TODO numeric types - }); +fn number(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + let number = &s.previous_token.lexeme; + let value = if let Some(expected_type) = expected_type { + match expected_type { + TokenType::I32 => Value::I32(number.parse()?), + TokenType::I64 => Value::I64(number.parse()?), + TokenType::U32 => Value::U32(number.parse()?), + TokenType::U64 => Value::U64(number.parse()?), + TokenType::F32 => Value::U32(number.parse()?), + TokenType::F64 => Value::U64(number.parse()?), + + _ => {return Err(anyhow!("Invalid type: expected {} value, got {}({})", expected_type, &s.previous_token.token_type, number));} + } + } else { + if let TokenType::Number = s.previous_token.token_type { + if number.contains('.'){ + Value::F64(number.parse()?) + } else { + Value::I64(number.parse()?) + } + } else { + return Err(anyhow!("I did not think this would happen")) + } + }; + s.emit_constant(value); Ok(()) } -fn literal(s: &mut Compiler) -> anyhow::Result<()> { +fn literal(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + if let Some(expected_type) = expected_type { + if discriminant(&expected_type) != discriminant(&s.previous_token.token_type) { + return Err(anyhow!( + "Cannot assign {:?} to {:?}", + s.previous_token.token_type, + expected_type + )); + } + } match s.previous_token.token_type { TokenType::False => s.emit_constant(Value::Bool(false)), TokenType::True => s.emit_constant(Value::Bool(true)), - TokenType::String => s.emit_constant(Value::String(s.previous_token.lexeme.clone())), + TokenType::Text => s.emit_constant(Value::String(s.previous_token.lexeme.clone())), _ => {} } Ok(()) } -fn skip(s: &mut Compiler) -> anyhow::Result<()> { - // s.advance() +fn skip(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { Ok(()) } -fn grouping(s: &mut Compiler) -> anyhow::Result<()> { - s.expression()?; +fn grouping(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + s.expression(None)?; s.consume(TokenType::RightParen, "Expect ')' after expression.") } -fn unary(s: &mut Compiler) -> anyhow::Result<()> { +fn unary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { let operator_type = s.previous_token.token_type; - s.parse_precedence(PREC_UNARY)?; + s.parse_precedence(PREC_UNARY, None)?; match operator_type { TokenType::Minus => { @@ -262,11 +337,11 @@ fn unary(s: &mut Compiler) -> anyhow::Result<()> { Ok(()) } -fn binary(s: &mut Compiler) -> anyhow::Result<()> { +fn binary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { let operator_type = &s.previous_token.token_type; debug!("operator {:?}", operator_type); let rule = get_rule(operator_type); - s.parse_precedence(rule.precedence + 1)?; + s.parse_precedence(rule.precedence + 1, None)?; match operator_type { TokenType::Plus => s.emit_byte(OP_ADD), TokenType::Minus => s.emit_byte(OP_SUBTRACT), @@ -287,7 +362,7 @@ fn binary(s: &mut Compiler) -> anyhow::Result<()> { Ok(()) } -fn variable(s: &mut Compiler) -> anyhow::Result<()> { +fn variable(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { let index = s.identifier_constant(s.previous_token)?; s.emit_bytes(OP_GET, index as u16); Ok(()) @@ -309,7 +384,7 @@ static RULES: LazyLock> = LazyLock::new(|| { ); rules.insert(TokenType::Colon, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Comma, Rule::new(None, None, PREC_NONE)); - rules.insert(TokenType::DateType, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Date, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Dot, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Else, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Eof, Rule::new(Some(skip), None, PREC_NONE)); @@ -334,8 +409,8 @@ static RULES: LazyLock> = LazyLock::new(|| { TokenType::GreaterGreater, Rule::new(None, Some(binary), PREC_BITSHIFT), ); - rules.insert(TokenType::I32Type, Rule::new(None, None, PREC_NONE)); - rules.insert(TokenType::I64Type, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::I32, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::I64, Rule::new(None, None, PREC_NONE)); rules.insert( TokenType::Identifier, Rule::new(Some(variable), None, PREC_NONE), @@ -377,16 +452,16 @@ static RULES: LazyLock> = LazyLock::new(|| { rules.insert(TokenType::RightBracket, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Slash, Rule::new(None, Some(binary), PREC_FACTOR)); rules.insert(TokenType::Star, Rule::new(None, Some(binary), PREC_FACTOR)); - rules.insert(TokenType::String, Rule::new(Some(literal), None, PREC_NONE)); + rules.insert(TokenType::Text, Rule::new(Some(literal), None, PREC_NONE)); rules.insert( TokenType::BitAnd, Rule::new(None, Some(binary), PREC_BITAND), ); - rules.insert(TokenType::StringType, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::String, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Struct, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::True, Rule::new(Some(literal), None, PREC_NONE)); - rules.insert(TokenType::U32Type, Rule::new(None, None, PREC_NONE)); - rules.insert(TokenType::U64Type, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::U32, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::U64, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::While, Rule::new(None, None, PREC_NONE)); rules @@ -407,3 +482,19 @@ const PREC_FACTOR: usize = 11; const PREC_UNARY: usize = 12; const PREC_CALL: usize = 13; const PREC_PRIMARY: usize = 14; + +enum ValueType{ + DateType, + BoolType, + CharType, + F32Type, + F64Type, + I32Type, + I64Type, + ObjectType, + U32Type, + U64Type, + StringType, + ListType, + MapType, +} \ No newline at end of file diff --git a/src/keywords.rs b/src/keywords.rs index 95730e2..bbfcb40 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -3,27 +3,29 @@ use crate::tokens::TokenType; pub(crate) fn get_keyword(lexeme: &str) -> Option { match lexeme { "and" => Some(TokenType::LogicalAnd), - "bool" => Some(TokenType::BoolType), - "char" => Some(TokenType::CharType), - "date" => Some(TokenType::DateType), + "bool" => Some(TokenType::Bool), + "char" => Some(TokenType::Char), + "date" => Some(TokenType::Date), "else" => Some(TokenType::Else), "false" => Some(TokenType::False), + "f32" => Some(TokenType::F32), + "f64" => Some(TokenType::F64), "fn" => Some(TokenType::Fn), "for" => Some(TokenType::For), "if" => Some(TokenType::If), - "i32" => Some(TokenType::I32Type), - "i64" => Some(TokenType::I64Type), + "i32" => Some(TokenType::I32), + "i64" => Some(TokenType::I64), "let" => Some(TokenType::Let), "list" => Some(TokenType::ListType), "map" => Some(TokenType::MapType), "or" => Some(TokenType::LogicalOr), - "object" => Some(TokenType::ObjectType), + "object" => Some(TokenType::Object), "print" => Some(TokenType::Print), "struct" => Some(TokenType::Struct), - "string" => Some(TokenType::StringType), + "string" => Some(TokenType::String), "true" => Some(TokenType::True), - "u32" => Some(TokenType::U32Type), - "u64" => Some(TokenType::U64Type), + "u32" => Some(TokenType::U32), + "u64" => Some(TokenType::U64), "while" => Some(TokenType::While), _ => None, diff --git a/src/main.rs b/src/main.rs index 86baf7d..f826fb0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,9 +2,7 @@ fn main() -> anyhow::Result<()> { tracing_subscriber::fmt::init(); let chunk = crudlang::compiler::compile( - r#"let a = "hello " + 42 - print a print a - print a"#, + r#"let a:bool = 42"#, ); match chunk { Err(e) => { diff --git a/src/scanner.rs b/src/scanner.rs index 2908f78..43b88a7 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -179,7 +179,7 @@ impl Scanner { let value: String = self.chars[self.start + 1..self.current - 1] .iter() .collect(); - self.add_token_with_value(TokenType::String, value); + self.add_token_with_value(TokenType::Text, value); } fn peek(&self) -> char { diff --git a/src/tokens.rs b/src/tokens.rs index f251d72..e8a1bb3 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,3 +1,5 @@ +use std::fmt; + #[derive(Debug)] pub struct Token { pub token_type: TokenType, @@ -27,11 +29,11 @@ pub(crate) enum TokenType { BitAnd, BitOr, BitXor, - BoolType, - CharType, + Bool, + Char, Colon, Comma, - DateType, + Date, Dot, Else, Eof, @@ -39,6 +41,8 @@ pub(crate) enum TokenType { Equal, EqualEqual, Error, + F32, + F64, False, Fn, For, @@ -46,8 +50,8 @@ pub(crate) enum TokenType { GreaterEqual, GreaterGreater, Hash, - I32Type, - I64Type, + I32, + I64, If, Indent, Identifier, @@ -65,7 +69,7 @@ pub(crate) enum TokenType { Minus, Not, Number, - ObjectType, + Object, Plus, Print, Return, @@ -75,15 +79,84 @@ pub(crate) enum TokenType { Semicolon, Slash, Star, + Text, String, - StringType, Struct, True, - U32Type, - U64Type, + U32, + U64, While, } +impl fmt::Display for TokenType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TokenType::String => write!(f, "string"), + TokenType::Date => write!(f, "date"), + TokenType::Char => write!(f, "char"), + TokenType::I32 => write!(f, "i32"), + TokenType::I64 => write!(f, "i64"), + TokenType::U32 => write!(f, "u32"), + TokenType::U64 => write!(f, "u64"), + TokenType::F32 => write!(f, "f32"), + TokenType::F64 => write!(f, "f64"), + TokenType::Bool => write!(f, "bool"), + TokenType::Bang=> write!(f, "!"), + TokenType::BangEqual=> write!(f, "!="), + TokenType::BitAnd=> write!(f, "&"), + TokenType::BitOr=> write!(f, "|"), + TokenType::BitXor=> write!(f, "^"), + TokenType::Colon=> write!(f, ":"), + TokenType::Comma=> write!(f, ","), + TokenType::MapType => write!(f, "map"), + TokenType::ListType => write!(f, "list"), + TokenType::Dot => write!(f, "."), + TokenType::Else => write!(f, "else"), + TokenType::Eof => write!(f, "EOF"), + TokenType::Eol => write!(f, "EOL"), + TokenType::Equal => write!(f, "="), + TokenType::EqualEqual => write!(f, "=="), + TokenType::Error => write!(f, "error"), + TokenType::False => write!(f, "false"), + TokenType::Fn => write!(f, "fn"), + TokenType::For => write!(f, "for"), + TokenType::Greater => write!(f, ">"), + TokenType::GreaterEqual => write!(f, ">="), + TokenType::GreaterGreater => write!(f, ">>"), + TokenType::Hash => write!(f, "#"), + TokenType::If => write!(f, "if"), + TokenType::Indent => write!(f, "indent"), + TokenType::Identifier => write!(f, "identifier"), + TokenType::LeftBrace => write!(f, "{{"), + TokenType::LeftBracket => write!(f, "["), + TokenType::LeftParen => write!(f, "("), + TokenType::Less => write!(f, "<"), + TokenType::LessEqual => write!(f, "<="), + TokenType::LessLess => write!(f, "<<"), + TokenType::Let => write!(f, "let"), + TokenType::LogicalAnd => write!(f, "&&"), + TokenType::LogicalOr => write!(f, "||"), + TokenType::Minus => write!(f, "-"), + TokenType::Not => write!(f, "not"), + TokenType::Number => write!(f, "number"), + TokenType::Object => write!(f, "object"), + TokenType::Plus => write!(f, "+"), + TokenType::Print => write!(f, "print"), + TokenType::Return => write!(f, "return"), + TokenType::RightParen => write!(f, ")"), + TokenType::RightBrace => write!(f, "}}"), + TokenType::RightBracket => write!(f, "]"), + TokenType::Semicolon => write!(f, ";"), + TokenType::Slash => write!(f, "/"), + TokenType::Star => write!(f, "*"), + TokenType::Text => write!(f, "text"), + TokenType::Struct => write!(f, "struct"), + TokenType::True => write!(f, "true"), + TokenType::While => write!(f, "while"), + } + } +} + impl Eq for TokenType { } \ No newline at end of file diff --git a/src/vm.rs b/src/vm.rs index 9e10fb5..87e6904 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -81,6 +81,13 @@ impl Vm { let value = self.pop(); self.local_vars.insert(name, value); } + OP_DEF_I32 => { + let name = self.read_constant(); + let value = self.pop(); + if let Value::I32(v) = value { + self.local_vars.insert(name, value); + } + } OP_GET => { let name = self.read_constant(); let value = self.local_vars.get(&name).unwrap(); @@ -160,3 +167,13 @@ pub const OP_SHL: u16 = 24; pub const OP_POP: u16 = 25; pub const OP_DEFINE: u16 = 26; pub const OP_GET: u16 = 27; +pub const OP_DEF_I32: u16 = 28; +pub const OP_DEF_I64: u16 = 29; +pub const OP_DEF_U32: u16 = 30; +pub const OP_DEF_DATE: u16 = 31; +pub const OP_DEF_STRING: u16 = 32; +pub const OP_DEF_CHAR: u16 = 33; +pub const OP_DEF_BOOL: u16 = 34; +pub const OP_DEF_LIST: u16 = 35; +pub const OP_DEF_MAP: u16 = 36; +pub const OP_DEF_OBJ: u16 = 37;