diff --git a/.gitignore b/.gitignore index 212de44..72e5b5e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target -.DS_Store \ No newline at end of file +.DS_Store +.idea +*.iml \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/crudlang.iml b/.idea/crudlang.iml deleted file mode 100644 index 7c12fe5..0000000 --- a/.idea/crudlang.iml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index cec6e80..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/src/compiler.rs.txt b/src/compiler.rs.txt new file mode 100644 index 0000000..217e142 --- /dev/null +++ b/src/compiler.rs.txt @@ -0,0 +1,554 @@ +use crate::chunk::Chunk; +use crate::scanner::scan; +use crate::tokens::{Token, TokenType}; +use crate::value::Value; +use crate::vm::{ + OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, + OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT, + OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, + OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, +}; +use anyhow::anyhow; +use std::collections::HashMap; +use std::sync::LazyLock; +use tracing::debug; + +macro_rules! parse_num { + ($s:ident, $variant:ident, $number:ident) => {{ + $s.typestack.push(TokenType::$variant); + Value::$variant($number.parse()?) + }}; +} + +pub fn compile(source: &str) -> anyhow::Result { + let tokens = scan(source); + debug!("Scanned tokens: {:?}", tokens); + + let mut compiler = Compiler { + source: source.lines().map(|s| s.to_string()).collect(), + chunk: Chunk::new("main"), + previous_token: &tokens[0], + current_token: &tokens[0], + tokens: &tokens, + current: 0, + typestack: vec![], + locals: vec![], + previous: 0, + had_error: false, + }; + compiler.compile() +} + +struct Compiler<'a> { + source: Vec, + chunk: Chunk, + tokens: &'a Vec, + current: usize, + previous_token: &'a Token, + current_token: &'a Token, + typestack: Vec, + locals: Vec, + previous: usize, + had_error: bool, +} + +impl<'a> Compiler<'a> { + fn compile(mut self) -> anyhow::Result { + while !self.match_token(TokenType::Eof) { + self.declaration()?; + } + + // self.expression()?; + + // self.consume(TokenType::Eof, "Expect end of expression.")?; + self.emit_byte(OP_RETURN); + Ok(self.chunk) + } + + fn declaration(&mut self) -> anyhow::Result<()> { + if self.match_token(TokenType::Let) { + self.let_declaration() + } else { + self.statement() + } + } + + fn let_declaration(&mut self) -> anyhow::Result<()> { + let index = self.parse_variable("Expect variable name")?; + let mut declared_type = None; + if self.check(TokenType::Colon) { + self.consume(TokenType::Colon, "must not happen")?; + match self.current_token.token_type { + TokenType::I32 + | TokenType::I64 + | TokenType::U32 + | TokenType::U64 + | TokenType::Date + | TokenType::StringType + | TokenType::Char + | TokenType::Bool + | TokenType::ListType + | TokenType::MapType => declared_type = Some(self.current_token.token_type), + _ => return Err(anyhow!("Invalid type {:?}", self.current_token.token_type)), + } + self.advance()?; + } + if self.match_token(TokenType::Equal) { + self.expression(declared_type)?; + let derived_type = Some(&self.previous_token.token_type); + self.consume(TokenType::Eol, "Expect end of line")?; + self.define_variable(declared_type, derived_type, index)?; + } else { + return Err(anyhow!( + "You cannot declare a variable without initializing it." + )); + } + Ok(()) + } + + fn parse_variable(&mut self, message: &str) -> anyhow::Result { + self.consume(TokenType::Identifier, message)?; + self.identifier_constant(self.previous_token) + } + + fn identifier_constant(&mut self, token: &Token) -> anyhow::Result { + let name = token.lexeme.clone(); + let index = self.chunk.add_constant(Value::String(name)); + Ok(index) + } + + fn define_variable( + &mut self, + var_type: Option, + derived_type: Option<&TokenType>, + index: usize, + ) -> anyhow::Result<()> { + let def_op = match var_type { + Some(TokenType::I32) => OP_DEF_I32, + Some(TokenType::I64) => OP_DEF_I64, + Some(TokenType::U32) => OP_DEF_I64, + Some(TokenType::U64) => OP_DEF_I64, + Some(TokenType::Date) => OP_DEF_DATE, + Some(TokenType::StringType) => OP_DEF_STRING, + Some(TokenType::Char) => OP_DEF_CHAR, + Some(TokenType::Bool) => OP_DEF_BOOL, + Some(TokenType::ListType) => OP_DEF_LIST, + Some(TokenType::MapType) => OP_DEF_MAP, + Some(TokenType::Object) => OP_DEF_STRUCT, + _ => match derived_type { + Some(TokenType::StringType) => OP_DEF_STRING, + Some(TokenType::Bool) => OP_DEF_BOOL, + Some(TokenType::Char) => OP_DEF_CHAR, + Some(TokenType::F64) => OP_DEF_F64, + Some(TokenType::I64) => OP_DEF_I64, + Some(TokenType::ListType) => OP_DEF_LIST, + Some(TokenType::MapType) => OP_DEF_MAP, + _ => OP_DEFINE, + }, + }; + + self.emit_bytes(def_op, index as u16); + Ok(()) + } + + fn statement(&mut self) -> anyhow::Result<()> { + if self.match_token(TokenType::Print) { + self.print_statement() + } else { + self.expression_statement() + } + } + + fn expression_statement(&mut self) -> anyhow::Result<()> { + debug!("expression statement"); + self.expression(None)?; + self.emit_byte(OP_POP); + Ok(()) + } + + fn print_statement(&mut self) -> anyhow::Result<()> { + self.expression(None)?; + self.consume( + TokenType::Eol, + "No further statements expected. Please start on a new line after the first one.\n", + )?; + self.emit_byte(OP_PRINT); + Ok(()) + } + + fn advance(&mut self) -> anyhow::Result<()> { + if self.current < self.tokens.len() - 1 { + self.previous = self.current; + self.previous_token = &self.tokens[self.previous]; + self.current += 1; + self.current_token = &self.tokens[self.current]; + } + if let TokenType::Error = self.current_token.token_type { + self.had_error = true; + Err(anyhow!( + "Error at {} on line {}", + self.current_token.lexeme, + self.current_token.line + )) + } else { + Ok(()) + } + } + + fn consume(&mut self, token_type: TokenType, message: &str) -> anyhow::Result<()> { + if token_type == self.current_token.token_type { + self.advance() + } else { + Err(anyhow!( + r#"{} at line {}: "{}""#, + message, + self.current_token.line + 1, + self.source[self.current_token.line] + )) + } + } + + fn match_token(&mut self, token_type: TokenType) -> bool { + if !self.check(token_type) { + false + } else { + self.advance().expect("token expected"); + true + } + } + + fn check(&mut self, token_type: TokenType) -> bool { + self.current_token.token_type == token_type + } + + fn expression(&mut self, expected_type: Option) -> anyhow::Result<()> { + self.parse_precedence(PREC_ASSIGNMENT, expected_type)?; + + Ok(()) + } + + fn parse_precedence( + &mut self, + precedence: usize, + expected_type: Option, + ) -> anyhow::Result<()> { + self.advance()?; + let rule = get_rule(&self.previous_token.token_type); + debug!("Precedence rule: {:?}", rule); + if let Some(prefix) = rule.prefix { + prefix(self, expected_type)?; + while precedence <= get_rule(&self.current_token.token_type).precedence { + self.advance()?; + let infix_rule = get_rule(&self.previous_token.token_type).infix; + if let Some(infix) = infix_rule { + infix(self, expected_type)?; + } + } + } else { + return Err(anyhow!("Expect expression.")); + } + Ok(()) + } + + fn emit_byte(&mut self, byte: u16) { + self.chunk.add(byte, self.previous_token.line); + } + + fn emit_bytes(&mut self, b1: u16, b2: u16) { + self.emit_byte(b1); + self.emit_byte(b2); + } + + fn emit_constant(&mut self, value: Value) { + let index = self.chunk.add_constant(value); + self.emit_bytes(OP_CONSTANT, index as u16); + } +} + +type ParseFn = fn(&mut Compiler, expected_type: Option) -> anyhow::Result<()>; + +#[derive(Debug)] +struct Rule { + prefix: Option, + infix: Option, + precedence: usize, +} + +impl Rule { + fn new(prefix: Option, infix: Option, precedence: usize) -> Self { + Self { + prefix, + infix, + precedence, + } + } +} + +fn number(s: &mut Compiler, mut expected_type: Option) -> anyhow::Result<()> { + debug!("number: expected type {:?}", expected_type); + + // coerce unknown numeric type to the expected type of the expression if any + if let None = expected_type { + if !s.typestack.is_empty() { + expected_type = Some(*s.typestack.last().unwrap()); + } + } + + let number = &s.previous_token.lexeme; + let value = if let Some(expected_type) = expected_type { + match expected_type { + TokenType::I32 => parse_num!(s, I32, number), + TokenType::I64 => parse_num!(s, I64, number), + TokenType::U32 => parse_num!(s, U32, number), + TokenType::U64 => parse_num!(s, U64, number), + TokenType::F32 => parse_num!(s, F32, number), + TokenType::F64 => parse_num!(s, F64, number), + + _ => { + return Err(anyhow!( + "Invalid type: expected {} value, got {}({})", + expected_type, + &s.previous_token.token_type, + number + )); + } + } + } else { + match s.previous_token.token_type { + TokenType::Integer => Value::I64(number.parse()?), + TokenType::FloatingPoint => Value::F64(number.parse()?), + _ => panic!("I did not think this would happen") + } + }; + s.emit_constant(value); + Ok(()) +} + +fn literal(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + let actual_type = &s.previous_token.token_type; + if let Some(expected_type) = expected_type { + match (actual_type, expected_type) { + (TokenType::False, TokenType::Bool) => { + s.typestack.push(TokenType::Bool); + s.emit_constant(Value::Bool(false)) + } + (TokenType::True, TokenType::Bool) => { + s.typestack.push(TokenType::Bool); + s.emit_constant(Value::Bool(true)) + } + (TokenType::StringType, TokenType::StringType) => { + s.typestack.push(TokenType::StringType); + s.emit_constant(Value::String(s.previous_token.lexeme.clone())) + } + //list + //map + //struct value + _ => { + return Err(anyhow!( + "Invalid type: expected {} value, got {}({})", + expected_type, + &s.previous_token.token_type, + s.previous_token.lexeme + )); + } + } + } else { + match actual_type { + TokenType::False => s.emit_constant(Value::Bool(false)), + TokenType::True => s.emit_constant(Value::Bool(true)), + TokenType::StringType => s.emit_constant(Value::String(s.previous_token.lexeme.clone())), + _ => {} + } + } + Ok(()) +} + +fn skip(s: &mut Compiler, _expected_type: Option) -> anyhow::Result<()> { + Ok(()) +} + +fn grouping(s: &mut Compiler, _expected_type: Option) -> anyhow::Result<()> { + s.expression(None)?; + s.consume(TokenType::RightParen, "Expect ')' after expression.") +} + +fn unary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + let operator_type = s.previous_token.token_type; + + s.parse_precedence(PREC_UNARY, None)?; + + match operator_type { + TokenType::Minus => { + s.emit_byte(OP_NEGATE); + } + TokenType::Bang => { + s.emit_byte(OP_NOT); + } + _ => unimplemented!("unary other than ! and -"), + } + Ok(()) +} + +fn binary(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + let operator_type = &s.previous_token.token_type; + debug!("operator {:?}", operator_type); + debug!("expected type {:?}", expected_type); + let rule = get_rule(operator_type); + s.parse_precedence(rule.precedence + 1, None)?; + match operator_type { + TokenType::Plus => s.emit_byte(OP_ADD), + TokenType::Minus => s.emit_byte(OP_SUBTRACT), + TokenType::Star => s.emit_byte(OP_MULTIPLY), + TokenType::Slash => s.emit_byte(OP_DIVIDE), + TokenType::BitAnd => s.emit_byte(OP_BITAND), + TokenType::BitOr => s.emit_byte(OP_BITOR), + TokenType::BitXor => s.emit_byte(OP_BITXOR), + TokenType::GreaterGreater => s.emit_byte(OP_SHR), + TokenType::LessLess => s.emit_byte(OP_SHL), + TokenType::EqualEqual => s.emit_byte(OP_EQUAL), + TokenType::Greater => s.emit_byte(OP_GREATER), + TokenType::GreaterEqual => s.emit_byte(OP_GREATER_EQUAL), + TokenType::Less => s.emit_byte(OP_LESS), + TokenType::LessEqual => s.emit_byte(OP_LESS_EQUAL), + _ => unimplemented!("binary other than plus, minus, star, slash"), + } + Ok(()) +} + +fn variable(s: &mut Compiler, expected_type: Option) -> anyhow::Result<()> { + let index = s.identifier_constant(s.previous_token)?; + s.emit_bytes(OP_GET, index as u16); + Ok(()) +} + +fn get_rule(operator_type: &TokenType) -> &'static Rule { + debug!("{:?}", operator_type); + RULES.get(operator_type).unwrap() +} + +static RULES: LazyLock> = LazyLock::new(|| { + let mut rules: HashMap = HashMap::new(); + rules.insert(TokenType::Bang, Rule::new(Some(unary), None, PREC_UNARY)); + rules.insert(TokenType::BangEqual, Rule::new(None, None, PREC_EQUALITY)); + rules.insert(TokenType::BitOr, Rule::new(None, Some(binary), PREC_BITOR)); + rules.insert( + TokenType::BitXor, + Rule::new(None, Some(binary), PREC_BITXOR), + ); + rules.insert(TokenType::Colon, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Comma, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Date, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Dot, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Else, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Eof, Rule::new(Some(skip), None, PREC_NONE)); + rules.insert(TokenType::Eol, Rule::new(Some(skip), None, PREC_NONE)); + rules.insert(TokenType::Equal, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::False, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Fn, Rule::new(None, None, PREC_NONE)); + rules.insert( + TokenType::EqualEqual, + Rule::new(None, Some(binary), PREC_EQUALITY), + ); + rules.insert(TokenType::False, Rule::new(Some(literal), None, PREC_NONE)); + rules.insert(TokenType::FloatingPoint, Rule::new(Some(number), None, PREC_NONE)); + rules.insert( + TokenType::Greater, + Rule::new(None, Some(binary), PREC_COMPARISON), + ); + rules.insert( + TokenType::GreaterEqual, + Rule::new(None, Some(binary), PREC_COMPARISON), + ); + rules.insert( + TokenType::GreaterGreater, + Rule::new(None, Some(binary), PREC_BITSHIFT), + ); + rules.insert(TokenType::I32, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::I64, Rule::new(None, None, PREC_NONE)); + rules.insert( + TokenType::Identifier, + Rule::new(Some(variable), None, PREC_NONE), + ); + rules.insert(TokenType::Integer, Rule::new(Some(number), None, PREC_NONE)); + rules.insert(TokenType::Indent, Rule::new(Some(skip), None, PREC_NONE)); + rules.insert(TokenType::LeftBrace, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::LeftBracket, Rule::new(None, None, PREC_NONE)); + rules.insert( + TokenType::LeftParen, + Rule::new(Some(grouping), None, PREC_NONE), + ); + rules.insert( + TokenType::Less, + Rule::new(None, Some(binary), PREC_COMPARISON), + ); + rules.insert( + TokenType::LessEqual, + Rule::new(None, Some(binary), PREC_COMPARISON), + ); + rules.insert( + TokenType::LessLess, + Rule::new(None, Some(binary), PREC_BITSHIFT), + ); + rules.insert( + TokenType::LogicalAnd, + Rule::new(None, Some(binary), PREC_AND), + ); + rules.insert(TokenType::LogicalOr, Rule::new(None, Some(binary), PREC_OR)); + rules.insert( + TokenType::Minus, + Rule::new(Some(unary), Some(binary), PREC_TERM), + ); + rules.insert(TokenType::Plus, Rule::new(None, Some(binary), PREC_TERM)); + rules.insert(TokenType::Print, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Return, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::RightParen, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::RightBrace, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::RightBracket, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Slash, Rule::new(None, Some(binary), PREC_FACTOR)); + rules.insert(TokenType::Star, Rule::new(None, Some(binary), PREC_FACTOR)); + rules.insert(TokenType::StringType, Rule::new(Some(literal), None, PREC_NONE)); + rules.insert( + TokenType::BitAnd, + Rule::new(None, Some(binary), PREC_BITAND), + ); + rules.insert(TokenType::StringType, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::Struct, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::True, Rule::new(Some(literal), None, PREC_NONE)); + rules.insert(TokenType::U32, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::U64, Rule::new(None, None, PREC_NONE)); + rules.insert(TokenType::While, Rule::new(None, None, PREC_NONE)); + + rules +}); + +const PREC_NONE: usize = 0; +const PREC_ASSIGNMENT: usize = 1; +const PREC_OR: usize = 2; +const PREC_AND: usize = 3; +const PREC_BITAND: usize = 4; +const PREC_BITOR: usize = 5; +const PREC_BITXOR: usize = 6; +const PREC_EQUALITY: usize = 7; +const PREC_COMPARISON: usize = 8; +const PREC_BITSHIFT: usize = 9; +const PREC_TERM: usize = 10; +const PREC_FACTOR: usize = 11; +const PREC_UNARY: usize = 12; +const PREC_CALL: usize = 13; +const PREC_PRIMARY: usize = 14; + +enum ValueType { + DateType, + BoolType, + CharType, + F32Type, + F64Type, + I32Type, + I64Type, + ObjectType, + U32Type, + U64Type, + StringType, + ListType, + MapType, +}