From 0bd6048083e08f7f32c70f966471799116d92a7b Mon Sep 17 00:00:00 2001 From: Shautvast Date: Fri, 31 Oct 2025 19:37:45 +0100 Subject: [PATCH] added maps, objects better namespace resolution --- src/ast_compiler.rs | 111 +++++++++++++++++++++++++++++++++------ src/bytecode_compiler.rs | 62 +++++++++++++++------- src/chunk.rs | 17 ++++-- src/compiler_tests.rs | 56 +++++++++++++++++--- src/keywords.rs | 1 - src/lib.rs | 6 +-- src/main.rs | 6 +-- src/tokens.rs | 22 +++++++- src/value.rs | 48 +++++++---------- src/vm.rs | 33 ++++++++++-- 10 files changed, 276 insertions(+), 86 deletions(-) diff --git a/src/ast_compiler.rs b/src/ast_compiler.rs index eebb613..2c74aab 100644 --- a/src/ast_compiler.rs +++ b/src/ast_compiler.rs @@ -1,4 +1,10 @@ -use crate::tokens::TokenType::{Bang, Bool, Colon, Date, Eol, Equal, F32, F64, False, FloatingPoint, Fn, Greater, GreaterEqual, GreaterGreater, I32, I64, Identifier, If, Indent, Integer, LeftBracket, LeftParen, Less, LessEqual, LessLess, Let, ListType, MapType, Minus, Object, Plus, Print, RightBracket, RightParen, SignedInteger, SingleRightArrow, Slash, Star, StringType, True, U32, U64, UnsignedInteger, Char}; +use crate::tokens::TokenType::{ + Bang, Bool, Char, Colon, Date, Eof, Eol, Equal, F32, F64, False, FloatingPoint, Fn, Greater, + GreaterEqual, GreaterGreater, I32, I64, Identifier, Indent, Integer, LeftBrace, LeftBracket, + LeftParen, Less, LessEqual, LessLess, Let, ListType, MapType, Minus, Object, Plus, Print, + RightBrace, RightBracket, RightParen, SignedInteger, SingleRightArrow, Slash, Star, StringType, + True, U32, U64, UnsignedInteger, +}; use crate::tokens::{Token, TokenType}; use crate::value::Value; use anyhow::anyhow; @@ -7,7 +13,7 @@ use std::collections::HashMap; pub fn compile(tokens: Vec) -> anyhow::Result> { let mut compiler = AstCompiler::new(tokens); - compiler.compile_tokens(0) + compiler.compile_tokens() } #[derive(Debug, Clone)] @@ -34,7 +40,7 @@ impl AstCompiler { current: 0, had_error: false, vars: vec![], - indent: vec![], + indent: vec![0], functions: HashMap::new(), } } @@ -43,17 +49,17 @@ impl AstCompiler { self.current = 0; } - fn compile_tokens(&mut self, expected_indent: usize) -> anyhow::Result> { + fn compile_tokens(&mut self) -> anyhow::Result> { self.collect_functions()?; self.reset(); - self.compile(expected_indent) + self.compile() } - fn compile(&mut self, expected_indent: usize) -> anyhow::Result> { + fn compile(&mut self) -> anyhow::Result> { if !self.had_error { let mut statements = vec![]; while !self.is_at_end() { - let statement = self.indent(expected_indent)?; + let statement = self.indent()?; if let Some(statement) = statement { statements.push(statement); } else { @@ -119,7 +125,8 @@ impl AstCompiler { Ok(()) } - fn indent(&mut self, expected_indent: usize) -> anyhow::Result> { + fn indent(&mut self) -> anyhow::Result> { + let expected_indent = *self.indent.last().unwrap(); // skip empty lines while self.check(Eol) { self.advance(); @@ -132,14 +139,13 @@ impl AstCompiler { } if indent_on_line > expected_indent { panic!( - "unexpected indent level {} vs {}", + "unexpected indent level {} vs expected {}", indent_on_line, expected_indent ); } else if indent_on_line < expected_indent { self.indent.pop(); return Ok(None); } else { - self.indent.push(indent_on_line); Ok(Some(self.declaration()?)) } } @@ -149,11 +155,53 @@ impl AstCompiler { self.function_declaration() } else if self.match_token(vec![Let]) { self.let_declaration() + } else if self.match_token(vec![Object]) { + self.object_declaration() } else { self.statement() } } + fn object_declaration(&mut self) -> anyhow::Result { + let type_name = self.consume(Identifier, "Expect object name.")?; + self.consume(Colon, "Expect ':' after object name.")?; + self.consume(Eol, "Expect end of line.")?; + + let mut fields = vec![]; + + let expected_indent = self.indent.last().unwrap() + 1; + // self.indent.push(expected_indent); + let mut done = false; + while !done && !self.match_token(vec![Eof]) { + for _ in 0..expected_indent { + if self.peek().token_type == Indent { + self.advance(); + } else { + done = true; + } + } + if !done { + let field_name = self.consume(Identifier, "Expect an object field name.")?; + self.consume(Colon, "Expect ':' after field name.")?; + let field_type = self.peek().token_type; + if field_type.is_type() { + self.advance(); + } else { + Err(anyhow::anyhow!("Expected a type"))? + } + fields.push(Parameter { + name: field_name, + var_type: field_type, + }); + } + } + self.consume(Eol, "Expect end of line.")?; + Ok(Statement::ObjectStmt { + name: type_name, + fields, + }) + } + fn function_declaration(&mut self) -> anyhow::Result { let name_token = self.consume(Identifier, "Expect function name.")?; self.consume(LeftParen, "Expect '(' after function name.")?; @@ -169,7 +217,8 @@ impl AstCompiler { self.consume(Eol, "Expect end of line.")?; let current_indent = self.indent.last().unwrap(); - let body = self.compile(current_indent + 1)?; + self.indent.push(current_indent + 1); + let body = self.compile()?; self.functions.get_mut(&name_token.lexeme).unwrap().body = body; @@ -201,9 +250,6 @@ impl AstCompiler { return Err(anyhow!("error at line {}: {}", name_token.line, e)); } }; - // match var_type{ - // U32 => U32() - // } self.vars.push(Expression::Variable { name: name_token.lexeme.to_string(), var_type, @@ -329,6 +375,8 @@ impl AstCompiler { debug!("primary {:?}", self.peek()); Ok(if self.match_token(vec![LeftBracket]) { self.list()? + } else if self.match_token(vec![LeftBrace]) { + self.map()? } else if self.match_token(vec![False]) { Expression::Literal { line: self.peek().line, @@ -401,6 +449,27 @@ impl AstCompiler { }) } + fn map(&mut self) -> anyhow::Result { + let mut entries = vec![]; + while !self.match_token(vec![RightBrace]) { + let key = self.expression()?; + self.consume(Colon, "Expect ':' after map key.")?; + let value = self.expression()?; + entries.push((key, value)); + if self.peek().token_type == TokenType::Comma { + self.advance(); + } else { + self.consume(RightBrace, "Expect '}' after map.")?; + break; + } + } + Ok(Expression::Map { + entries, + literaltype: MapType, + line: self.peek().line, + }) + } + fn variable_lookup(&mut self, token: &Token) -> anyhow::Result { let (var_name, var_type) = self .vars @@ -504,7 +573,7 @@ impl AstCompiler { } fn is_at_end(&self) -> bool { - self.peek().token_type == TokenType::Eof + self.peek().token_type == Eof } } @@ -567,6 +636,10 @@ pub enum Statement { FunctionStmt { function: Function, }, + ObjectStmt { + name: Token, + fields: Vec, + }, } impl Statement { @@ -576,6 +649,7 @@ impl Statement { Statement::VarStmt { name, .. } => name.line, Statement::PrintStmt { value } => value.line(), Statement::FunctionStmt { function, .. } => function.name.line, + Statement::ObjectStmt { name, .. } => name.line, } } } @@ -613,6 +687,11 @@ pub enum Expression { literaltype: TokenType, values: Vec, }, + Map { + line: usize, + literaltype: TokenType, + entries: Vec<(Expression, Expression)>, + }, Variable { line: usize, name: String, @@ -634,6 +713,7 @@ impl Expression { Self::Grouping { line, .. } => *line, Self::Literal { line, .. } => *line, Self::List { line, .. } => *line, + Self::Map { line, .. } => *line, Self::Variable { line, .. } => *line, Self::FunctionCall { line, .. } => *line, } @@ -700,6 +780,7 @@ impl Expression { Self::Grouping { expression, .. } => expression.infer_type(), Self::Literal { literaltype, .. } => literaltype.clone(), Self::List { literaltype, .. } => literaltype.clone(), + Self::Map { literaltype, .. } => literaltype.clone(), Self::Unary { right, operator, .. } => { diff --git a/src/bytecode_compiler.rs b/src/bytecode_compiler.rs index 065b29e..bd1db56 100644 --- a/src/bytecode_compiler.rs +++ b/src/bytecode_compiler.rs @@ -12,10 +12,10 @@ use crate::vm::{ use std::collections::HashMap; pub fn compile( - namespace: &str, + namespace: Option<&str>, ast: &Vec, registry: &mut HashMap, -) -> anyhow::Result { +) -> anyhow::Result<()> { compile_name(ast, namespace, registry) } @@ -37,11 +37,19 @@ pub(crate) fn compile_function( pub(crate) fn compile_name( ast: &Vec, - namespace: &str, + namespace: Option<&str>, registry: &mut HashMap, -) -> anyhow::Result { - let compiler = Compiler::new(namespace); - Ok(compiler.compile(ast, registry, namespace)?) +) -> anyhow::Result<()> { + let name=namespace.unwrap_or("main"); + let compiler = Compiler::new(name); + let chunk = compiler.compile(ast, registry, name)?; + let qname = if let Some(namespace) = namespace{ + format!("{}.{}", namespace, "main") + } else { + "main".to_string() + }; + registry.insert(qname, chunk); + Ok(()) } struct Compiler { @@ -67,6 +75,7 @@ impl Compiler { registry: &mut HashMap, namespace: &str, ) -> anyhow::Result { + //TODO can likely be removed for statement in ast { if let Statement::FunctionStmt { function } = statement { self.emit_constant(Value::String(format!( @@ -101,10 +110,7 @@ impl Compiler { let name_index = self.chunk.add_constant(Value::String(name.lexeme.clone())); self.vars.insert(name.lexeme.clone(), name_index); self.compile_expression(namespace, initializer, registry)?; - self.define_variable(var_type, name_index)?; - if let Expression::List { values, .. } = initializer { - self.emit_byte(values.len() as u16); - } + self.define_variable(var_type, name_index, &initializer)?; } Statement::PrintStmt { value } => { self.compile_expression(namespace, value, registry)?; @@ -122,6 +128,9 @@ impl Compiler { compiled_function, ); } + Statement::ObjectStmt { name, fields } => { + self.chunk.add_object_def(&name.lexeme, fields); + } } Ok(()) } @@ -136,15 +145,11 @@ impl Compiler { Expression::FunctionCall { name, arguments, .. } => { - let name = if let None = self.chunk.find_constant(&name) { - format!("{}.{}", namespace, name) - } else { - name.clone() - }; + let qname=format!("{}.{}", namespace, name); let name_index = self .chunk - .find_constant(&name) - .unwrap_or_else(|| self.emit_constant(name.into()) as usize); + .find_constant(&qname) + .unwrap_or_else(|| self.emit_constant(qname.into()) as usize); for argument in arguments { self.compile_expression(namespace, argument, registry)?; @@ -163,7 +168,12 @@ impl Compiler { for expr in values { self.compile_expression(namespace, expr, registry)?; } - // self.emit_bytes(OP_NEW_LIST, values.len() as u16); + } + Expression::Map { entries, .. } => { + for (key, value) in entries { + self.compile_expression(namespace, key, registry)?; + self.compile_expression(namespace, value, registry)?; + } } Expression::Grouping { expression, .. } => { self.compile_expression(namespace, expression, registry)? @@ -214,7 +224,12 @@ impl Compiler { Ok(()) } - fn define_variable(&mut self, var_type: &TokenType, name_index: usize) -> anyhow::Result<()> { + fn define_variable( + &mut self, + var_type: &TokenType, + name_index: usize, + initializer: &Expression, + ) -> anyhow::Result<()> { let def_op = match var_type { TokenType::I32 => OP_DEF_I32, TokenType::I64 => OP_DEF_I64, @@ -233,6 +248,15 @@ impl Compiler { }; self.emit_bytes(def_op, name_index as u16); + match initializer { + Expression::List { values, .. } => { + self.emit_byte(values.len() as u16); + } + Expression::Map { entries, .. } => { + self.emit_byte(entries.len() as u16); + } + _ => {} + } Ok(()) } diff --git a/src/chunk.rs b/src/chunk.rs index fc61f7a..9d0d05b 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,9 +1,11 @@ +use std::collections::HashMap; +use crate::ast_compiler::Parameter; use crate::value::Value; use crate::vm::{ OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CALL, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_F32, OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_STRING, OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, - OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, + OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, OP_DEF_MAP, }; #[derive(Debug, Clone)] @@ -12,6 +14,7 @@ pub struct Chunk { pub code: Vec, pub constants: Vec, lines: Vec, + object_defs: HashMap> } impl Chunk { @@ -28,25 +31,30 @@ impl Chunk { } impl Chunk { - pub fn new(name: &str) -> Chunk { + pub(crate) fn new(name: &str) -> Chunk { Chunk { name: name.to_string(), code: Vec::new(), constants: vec![], lines: vec![], + object_defs: HashMap::new(), } } - pub fn add(&mut self, byte: u16, line: usize) { + pub(crate) fn add(&mut self, byte: u16, line: usize) { self.code.push(byte); self.lines.push(line); } - pub fn add_constant(&mut self, value: impl Into) -> usize { + pub(crate) fn add_constant(&mut self, value: impl Into) -> usize { self.constants.push(value.into()); self.constants.len() - 1 } + pub (crate) fn add_object_def(&mut self, name: &str, fields: &[Parameter]){ + self.object_defs.insert(name.to_string(), fields.to_vec()); + } + pub fn disassemble(&self) { println!("== {} ==", self.name); let mut offset = 0; @@ -95,6 +103,7 @@ impl Chunk { OP_CALL => self.call_inst("CALL", offset), OP_GET => self.constant_inst("GET", offset), OP_DEF_LIST => self.new_inst("DEFLIST", offset), + OP_DEF_MAP => self.new_inst("DEFMAP", offset), _ => { println!("Unknown instruction {}", instruction); offset + 1 diff --git a/src/compiler_tests.rs b/src/compiler_tests.rs index e5d660a..14a0a34 100644 --- a/src/compiler_tests.rs +++ b/src/compiler_tests.rs @@ -2,6 +2,8 @@ mod tests { use crate::compile; use crate::scanner::scan; + use crate::value::Value; + use crate::vm::interpret; #[test] fn literal_int() { @@ -81,14 +83,54 @@ mod tests { #[test] fn call_fn_with_args_returns_value() { - assert!( - compile( - r#" -fn hello(name: string) -> string: + let r = compile( + r#" +fn add_hello(name: string) -> string: "Hello " + name -hello("world")"# - ) - .is_ok() +add_hello("world")"#, ); + assert!(r.is_ok()); + let result = interpret(&r.unwrap(), "main").unwrap(); + assert_eq!(result, Value::String("Hello world".to_string())); + } + + #[test] + fn object_definition() { + let r = compile( + r#" +object Person: + name: string"#, + ); + assert!(r.is_ok()); + } + + // #[test] + // fn object_() { + // let r = compile(r#" + // object Person: + // name: string + // + // let p = Person{name: "Sander"} + // print p + // "#, ); + // println!("{:?}", r); + // assert!(r.is_ok()); + // } + + #[test] + fn let_map() { + let r = compile(r#"{"name": "Dent", "age": 40 }"#); + assert!(r.is_ok()); + let result = interpret(&r.unwrap(), "main").unwrap(); + if let Value::Map(map) = result { + assert_eq!( + map.get(&Value::String("name".to_string())).unwrap(), + &Value::String("Dent".to_string()) + ); + assert_eq!( + map.get(&Value::String("age".to_string())).unwrap(), + &Value::I32(40) + ); + } } } diff --git a/src/keywords.rs b/src/keywords.rs index 42ce9ee..afa69d2 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -21,7 +21,6 @@ pub(crate) fn get_keyword(lexeme: &str) -> Option { "or" => Some(TokenType::LogicalOr), "object" => Some(TokenType::Object), "print" => Some(TokenType::Print), - "struct" => Some(TokenType::Struct), "string" => Some(TokenType::StringType), "true" => Some(TokenType::True), "u32" => Some(TokenType::U32), diff --git a/src/lib.rs b/src/lib.rs index dafe60b..541e929 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,10 +11,10 @@ mod tokens; mod value; pub mod vm; -pub fn compile(src: &str) -> anyhow::Result { +pub fn compile(src: &str) -> anyhow::Result> { let tokens = scan(src)?; let mut registry = HashMap::new(); let ast= ast_compiler::compile(tokens)?; - let bytecode = bytecode_compiler::compile("", &ast, &mut registry)?; - Ok(bytecode) + bytecode_compiler::compile(None, &ast, &mut registry)?; + Ok(registry) } diff --git a/src/main.rs b/src/main.rs index 64a6aba..9eb235c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ use crudlang::ast_compiler; use crudlang::bytecode_compiler::compile; use crudlang::chunk::Chunk; use crudlang::scanner::scan; -use crudlang::vm::interpret; +use crudlang::vm::{interpret, interpret_async}; use std::collections::HashMap; use std::fs; use std::hash::Hash; @@ -32,7 +32,7 @@ async fn main() -> anyhow::Result<()> { .to_str() .unwrap() .replace(".crud", ""); - let chunk = compile(&path, &statements, &mut registry)?; + let chunk = compile(Some(&path), &statements, &mut registry)?; paths.insert(path, chunk); } Err(e) => { @@ -73,7 +73,7 @@ struct AppState { async fn handle_get(State(state): State>) -> Result, StatusCode> { Ok(Json( - interpret(&state.registry, &state.name) + interpret_async(&state.registry, &state.name) .await .unwrap() .to_string(), diff --git a/src/tokens.rs b/src/tokens.rs index a7a04af..58749f1 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -79,7 +79,6 @@ pub enum TokenType { Slash, Star, StringType, - Struct, True, U32, U64, @@ -150,7 +149,6 @@ impl fmt::Display for TokenType { TokenType::SingleRightArrow => write!(f, "->"), TokenType::Slash => write!(f, "/"), TokenType::Star => write!(f, "*"), - TokenType::Struct => write!(f, "struct"), TokenType::True => write!(f, "true"), TokenType::Void => write!(f, "()"), TokenType::While => write!(f, "while"), @@ -161,3 +159,23 @@ impl fmt::Display for TokenType { } impl Eq for TokenType {} + +impl TokenType { + pub(crate) fn is_type(&self) -> bool { + match self { + TokenType::I32 + | TokenType::I64 + | TokenType::U32 + | TokenType::U64 + | TokenType::F32 + | TokenType::F64 + | TokenType::StringType + | TokenType::Date + | TokenType::Object + | TokenType::ListType + | TokenType::MapType + | TokenType::Char => true, + _ => false, + } + } +} diff --git a/src/value.rs b/src/value.rs index fd3f3a7..ffeb772 100644 --- a/src/value.rs +++ b/src/value.rs @@ -7,34 +7,11 @@ use std::hash::{Hash, Hasher}; use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Shl, Shr, Sub}; #[derive(Debug, Clone)] -pub struct StructDefinition { - fields: Vec, -} - -#[derive(Debug, Clone)] -pub struct StructValue { - definition: StructDefinition, +pub struct Object { + definition: String, fields: Vec, } -impl StructValue { - pub fn new(definition: StructDefinition) -> Self { - Self { - definition, - fields: Vec::new(), - } - } -} - -impl Display for StructValue { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - for (i, field) in self.definition.fields.iter().enumerate() { - write!(f, "{}: {}", field, self.fields[i])?; - } - Ok(()) - } -} - #[derive(Debug, Clone)] pub enum Value { U32(u32), @@ -50,7 +27,7 @@ pub enum Value { Enum, List(Vec), Map(HashMap), - Struct(StructValue), + ObjectType(Box), Error(String), Void, } @@ -141,15 +118,30 @@ impl Display for Value { &Value::Char(v) => write!(f, "{}", v), &Value::Date(v) => write!(f, "{}", v), &Value::Enum => write!(f, "enum"), - &Value::Struct(v) => write!(f, "{}", v), + &Value::ObjectType(o) => write!(f, "{}: {:?}", o.definition, o.fields), &Value::List(v) => write!(f, "{:?}", v), - &Value::Map(_) => write!(f, "map"), + &Value::Map(map) => to_string(f, map), &Value::Error(v) => write!(f, "{}", v), &Value::Void => write!(f, "()"), } } } +fn to_string(f: &mut Formatter, map: &HashMap) -> std::fmt::Result { + f.write_str("{")?; + let mut first = true; + for (k, v) in map { + if !first { + f.write_str(", ")?; + } + f.write_str(&k.to_string())?; + f.write_str(": ")?; + f.write_str(&v.to_string())?; + first = false; + } + f.write_str("}") +} + impl Neg for &Value { type Output = anyhow::Result; diff --git a/src/vm.rs b/src/vm.rs index 1729169..d9a2876 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -27,7 +27,23 @@ pub struct Vm<'a> { registry: &'a HashMap, } -pub async fn interpret(registry: &HashMap, function: &str) -> anyhow::Result { +pub fn interpret(registry: &HashMap, function: &str) -> anyhow::Result { + let chunk = registry.get(function).unwrap().clone(); + // for (key,value) in registry.iter() { + // println!("{}", key); + // value.disassemble(); + // } + let mut vm = Vm { + ip: 0, + stack: vec![], + local_vars: HashMap::new(), + error_occurred: false, + registry, + }; + vm.run(&chunk, vec![]) +} + +pub async fn interpret_async(registry: &HashMap, function: &str) -> anyhow::Result { let chunk = registry.get(function).unwrap().clone(); let mut vm = Vm { ip: 0, @@ -135,11 +151,20 @@ impl <'a> Vm<'a> { } self.local_vars.insert(name, Value::List(list)); } - OP_DEF_MAP => define_var!(self, Map, chunk), - OP_DEF_STRUCT => define_var!(self, Struct, chunk), + OP_DEF_MAP => { + let name = self.read_name(chunk); + let len = self.read(chunk); + let mut map = HashMap::new(); + for _ in 0..len { + let value = self.pop(); + let key = self.pop(); + map.insert(key,value); + } + self.local_vars.insert(name, Value::Map(map)); + } OP_GET => { let name = self.read_name(chunk); - let value = self.local_vars.get(&name).unwrap(); + let value = self.local_vars.get(&name). unwrap(); self.push(value.clone()); // not happy debug!("after get {:?}", self.stack); }