diff --git a/Cargo.lock b/Cargo.lock index f535543..5fccc55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -1355,6 +1364,35 @@ dependencies = [ "bitflags 2.9.4", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "reqwest" version = "0.12.24" @@ -1838,6 +1876,7 @@ dependencies = [ "log", "log4rs", "notify", + "regex", "reqwest", "serde", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index adfb9a9..a94efbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,4 @@ url = "2.5.7" clap = { version = "4.5.51", features = ["derive"] } notify = "8.2.0" arc-swap = "1.7.1" +regex = "1.12.2" diff --git a/src/ast_compiler.rs b/src/ast_compiler.rs index fd16802..2f270f9 100644 --- a/src/ast_compiler.rs +++ b/src/ast_compiler.rs @@ -146,7 +146,7 @@ impl AstCompiler { self.query_guard_expr(symbol_table) } else { Err(self.raise(Expected("-> or ?"))) - } + }; } Ok(Stop { line: self.peek().line, @@ -803,6 +803,19 @@ pub struct Parameter { pub(crate) var_type: TokenType, } +impl Parameter { + pub(crate) fn new(name: impl Into, value_type: TokenType) -> Self { + Self { + name: Token { + token_type: TokenType::StringType, + lexeme: name.into(), + line: 0, + }, + var_type: value_type, + } + } +} + #[derive(Debug, Clone)] pub enum Expression { Binary { diff --git a/src/builtins/mod.rs b/src/builtins/mod.rs index 18dd8bc..0204599 100644 --- a/src/builtins/mod.rs +++ b/src/builtins/mod.rs @@ -1,13 +1,39 @@ mod string; +use crate::builtins::string::string_methods; +use crate::errors::{CompilerError, RuntimeError}; +use crate::tokens::TokenType; use crate::value::Value; use std::collections::HashMap; use std::sync::LazyLock; -use crate::builtins::string::string_methods; -use crate::errors::RuntimeError; +use crate::ast_compiler::Parameter; + +pub(crate) struct Signature { + pub(crate) parameters: Vec, + pub(crate) return_type: TokenType, + pub(crate) function: MethodFn, +} + +impl Signature { + pub(crate) fn new( + parameters: Vec, + return_type: TokenType, + function: MethodFn, + ) -> Self { + Self { + parameters, + return_type, + function, + } + } + + pub(crate) fn arity(&self) -> usize { + self.parameters.len() + } +} pub(crate) type MethodFn = fn(Value, Vec) -> Result; -pub(crate) type MethodMap = HashMap; +pub(crate) type MethodMap = HashMap; pub(crate) type MethodTable = HashMap; static METHODS: LazyLock = LazyLock::new(|| { @@ -16,23 +42,26 @@ static METHODS: LazyLock = LazyLock::new(|| { table }); -pub(crate) fn insert(m: &mut MethodMap, name: &str, method: MethodFn) { +pub(crate) fn add(m: &mut MethodMap, name: &str, method: Signature) { m.insert(name.to_string(), method); } -pub fn call( +pub(crate) fn lookup(type_name: &str, method_name: &str) -> Result<&'static Signature, CompilerError> { + METHODS + .get(type_name) + .and_then(|methods| methods.get(method_name)) + .ok_or_else(|| CompilerError::FunctionNotFound(format!("{}.{}", type_name, method_name))) +} + +pub(crate) fn call( type_name: &str, method_name: &str, self_val: Value, args: Vec, ) -> Result { - METHODS - .get(type_name) - .and_then(|methods| methods.get(method_name)) - .ok_or_else(|| RuntimeError::FunctionNotFound(format!("{}.{}",type_name, method_name)))? - (self_val, args) + (lookup(type_name,method_name).map_err(|e|RuntimeError::FunctionNotFound(e.to_string()))?.function)(self_val, args) } pub(crate) fn expected(expected_type: &str) -> RuntimeError { RuntimeError::ExpectedType(expected_type.to_string()) -} \ No newline at end of file +} diff --git a/src/builtins/string.rs b/src/builtins/string.rs index f9e12e1..83d0c38 100644 --- a/src/builtins/string.rs +++ b/src/builtins/string.rs @@ -1,16 +1,45 @@ -use std::collections::HashMap; -use crate::builtins::{expected, insert, MethodMap}; +use crate::builtins::{MethodMap, Parameter, Signature, add, expected}; use crate::errors::RuntimeError; -use crate::value::{bool, i64, string, Value}; +use crate::tokens::TokenType::{StringType, U64}; +use crate::value::{Value, bool, i64, string}; +use regex::Regex; +use std::collections::HashMap; pub(crate) fn string_methods() -> MethodMap { let mut string_methods: MethodMap = HashMap::new(); let m = &mut string_methods; - insert(m, "len", string_len); - insert(m, "to_uppercase", string_to_uppercase); - insert(m, "to_lowercase", string_to_lowercase); - insert(m, "contains", string_contains); - insert(m, "reverse", string_reverse); + add(m, "len", Signature::new(vec![], U64, string_len)); + add( + m, + "to_uppercase", + Signature::new(vec![], StringType, string_to_uppercase), + ); + add( + m, + "to_lowercase", + Signature::new(vec![], StringType, string_to_lowercase), + ); + add(m, "contains", Signature::new(vec![], StringType, string_contains)); + add(m, "reverse", Signature::new(vec![], StringType, string_reverse)); + add(m, "trim", Signature::new(vec![], StringType, string_trim)); + add( + m, + "trim_start", + Signature::new(vec![], StringType, string_trim_start), + ); + add(m, "trim_end", Signature::new(vec![], StringType, string_trim_end)); + add( + m, + "replace_all", + Signature::new( + vec![ + Parameter::new("pattern", StringType), + Parameter::new("replacement", StringType), + ], + StringType, + string_replace_all, + ), + ); string_methods } @@ -37,22 +66,64 @@ fn string_to_lowercase(self_val: Value, _args: Vec) -> Result) -> Result { match (self_val, args.first()) { - (Value::String(s), Some(Value::String(pat))) => { - Ok(bool(s.contains(pat.as_str()))) - } + (Value::String(s), Some(Value::String(pat))) => Ok(bool(s.contains(pat.as_str()))), _ => Err(expected_a_string()), } } fn string_reverse(self_val: Value, _: Vec) -> Result { match self_val { - Value::String(s) => { - Ok(s.chars().rev().collect::().into()) - } + Value::String(s) => Ok(s.chars().rev().collect::().into()), + _ => Err(expected_a_string()), + } +} + +fn string_trim(self_val: Value, _: Vec) -> Result { + match self_val { + Value::String(s) => Ok(string(s.trim())), + _ => Err(expected_a_string()), + } +} + +fn string_trim_start(self_val: Value, _: Vec) -> Result { + match self_val { + Value::String(s) => Ok(string(s.trim_start())), + _ => Err(expected_a_string()), + } +} + +fn string_trim_end(self_val: Value, _: Vec) -> Result { + match self_val { + Value::String(s) => Ok(string(s.trim_end())), + _ => Err(expected_a_string()), + } +} +//TODO check arity in compiler (generically) +fn string_replace_all(receiver: Value, args: Vec) -> Result { + let pattern = if let Value::String(s) = &args[0] { + Regex::new(s).map_err(|_| RuntimeError::IllegalArgumentException("Invalid regex".into()))? + } else { + return Err(RuntimeError::IllegalArgumentException( + format!("Illegal pattern. Expected a string, but got {}", &args[0]).into(), + )); + }; + let replacement = if let Value::String(repl) = &args[1] { + repl + } else { + return Err(RuntimeError::IllegalArgumentException( + format!( + "Illegal replacement. Expected a string but got {}", + &args[1] + ) + .into(), + )); + }; + match receiver { + Value::String(ref str) => Ok(string(pattern.replace_all(str, replacement))), _ => Err(expected_a_string()), } } fn expected_a_string() -> RuntimeError { expected("string") -} \ No newline at end of file +} diff --git a/src/bytecode_compiler.rs b/src/bytecode_compiler.rs index 694ea72..878e632 100644 --- a/src/bytecode_compiler.rs +++ b/src/bytecode_compiler.rs @@ -1,5 +1,6 @@ use crate::ast_compiler::Expression::NamedParameter; use crate::ast_compiler::{Expression, Function, Parameter, Statement}; +use crate::builtins::lookup; use crate::chunk::Chunk; use crate::errors::CompilerError::{IncompatibleTypes, UndeclaredVariable}; use crate::errors::{CompilerError, CompilerErrorAtLine}; @@ -205,17 +206,24 @@ impl Compiler { self.compile_expression(namespace, receiver, symbols, registry)?; let receiver_type = infer_type(receiver, symbols).to_string(); - let type_index = self - .chunk - .find_constant(&receiver_type) - .unwrap_or_else(|| self.chunk.add_constant(Value::String(receiver_type))); + let type_index = self.chunk.find_constant(&receiver_type).unwrap_or_else(|| { + self.chunk + .add_constant(Value::String(receiver_type.clone())) + }); let name_index = self.chunk.find_constant(method_name).unwrap_or_else(|| { self.chunk .add_constant(Value::String(method_name.to_string())) }); - //TODO lookup parameters for builtin - self.get_arguments_in_order(namespace, symbols, registry, arguments, &vec![])?; + let signature = lookup(&receiver_type, method_name).map_err(|e| self.raise(e))?; + + self.get_arguments_in_order( + namespace, + symbols, + registry, + arguments, + &signature.parameters, + )?; self.emit_byte(OP_CALL_BUILTIN); self.emit_byte(name_index as u16); self.emit_byte(type_index as u16); @@ -312,19 +320,17 @@ impl Compiler { namespace: &str, symbols: &SymbolTable, registry: &mut Registry, - arguments: &Vec, - parameters: &Vec, + arguments: &[Expression], + parameters: &[Parameter], ) -> Result<(), CompilerErrorAtLine> { - for parameter in parameters { - for argument in arguments { + for argument in arguments { + for parameter in parameters { if let NamedParameter { name, value, .. } = argument { if name.lexeme == parameter.name.lexeme { let value_type = infer_type(value, symbols); if parameter.var_type != value_type { - return Err(self.raise(CompilerError::IncompatibleTypes( - parameter.var_type.clone(), - value_type, - ))); + return Err(self + .raise(IncompatibleTypes(parameter.var_type.clone(), value_type))); } else { self.compile_expression(namespace, argument, symbols, registry)?; break; diff --git a/src/compiler_tests.rs b/src/compiler_tests.rs index cd4f3b9..81e3f07 100644 --- a/src/compiler_tests.rs +++ b/src/compiler_tests.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use crate::value::Value; + use crate::value::{Value, string}; use crate::{compile, run}; use chrono::DateTime; @@ -21,23 +21,20 @@ mod tests { #[test] fn literal_string() { - assert_eq!(run(r#""a""#), Ok(Value::String("a".into()))); + assert_eq!(run(r#""a""#), Ok(string("a"))); } #[test] fn literal_list() { assert_eq!( run(r#"["abc","def"]"#), - Ok(Value::List(vec![ - Value::String("abc".into()), - Value::String("def".into()) - ])) + Ok(Value::List(vec![string("abc"), string("def")])) ); } #[test] fn index_in_list_literal() { - assert_eq!(run(r#"["abc","def"][1]"#), Ok(Value::String("def".into()))) + assert_eq!(run(r#"["abc","def"][1]"#), Ok(string("def"))) } #[test] @@ -45,7 +42,7 @@ mod tests { assert_eq!( run(r#"let a:list = ["abc","def"] a[1]"#), - Ok(Value::String("def".into())) + Ok(string("def")) ) } @@ -118,7 +115,7 @@ a"#), run(r#"fn add_hello(name: string) -> string: "Hello " + name add_hello("world")"#), - Ok(Value::String("Hello world".to_string())) + Ok(string("Hello world")) ); } @@ -169,11 +166,11 @@ p"#); let result = result.unwrap(); if let Value::Map(map) = result { assert_eq!( - map.get(&Value::String("name".to_string())).unwrap(), - &Value::String("Dent".to_string()) + map.get(&string("name")).unwrap(), + &string("Dent") ); assert_eq!( - map.get(&Value::String("age".to_string())).unwrap(), + map.get(&string("age")).unwrap(), &Value::I64(40) ); } @@ -187,8 +184,8 @@ m"#); let result = result.unwrap(); if let Value::Map(map) = result { assert_eq!( - map.get(&Value::String("name".to_string())).unwrap(), - &Value::String("Dent".to_string()) + map.get(&string("name")).unwrap(), + &string("Dent") ); } } @@ -216,17 +213,17 @@ m["name"]"#); #[test] fn add_strings() { - assert_eq!(run(r#""a"+"b""#), Ok(Value::String("ab".into()))); + assert_eq!(run(r#""a"+"b""#), Ok(string("ab"))); } #[test] fn add_string_and_int() { - assert_eq!(run(r#""a"+42"#), Ok(Value::String("a42".into()))); + assert_eq!(run(r#""a"+42"#), Ok(string("a42"))); } #[test] fn add_string_and_bool() { - assert_eq!(run(r#""a"+false"#), Ok(Value::String("afalse".into()))); + assert_eq!(run(r#""a"+false"#), Ok(string("afalse"))); } #[test] @@ -259,20 +256,25 @@ date"#), } #[test] - fn string_reverse(){ - assert_eq!(run(r#""abc".reverse()"#), Ok(Value::String("cba".into()))); + fn string_reverse() { + assert_eq!(run(r#""abc".reverse()"#), Ok(string("cba"))); } #[test] - fn string_to_upper(){ - assert_eq!(run(r#""abc".to_uppercase()"#), Ok(Value::String("ABC".into()))); + fn string_to_upper() { + assert_eq!(run(r#""abc".to_uppercase()"#), Ok(string("ABC"))); } #[test] - fn string_len(){ + fn string_len() { assert_eq!(run(r#""abc".len()"#), Ok(Value::I64(3))); } + #[test] + fn string_replace() { + assert_eq!(run(r#""Hello".replace_all("l","p")"#), Ok(string("Heppo"))); + } + // #[test] // fn package() { // assert_eq!(run(r#"a.b.c()"#), Ok(Value::U32(48))); diff --git a/src/errors.rs b/src/errors.rs index 3231734..87ea3d5 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -83,6 +83,8 @@ pub enum RuntimeError { FunctionNotFound(String), #[error("The number of of arguments for {0} is not correct. Should be {1}, got {2}")] IllegalArgumentsException(String,usize,usize), + #[error("{0}")] + IllegalArgumentException(String), #[error("Expected {0}")] ExpectedType(String), } diff --git a/src/vm.rs b/src/vm.rs index c01070c..c359ce6 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -194,7 +194,6 @@ impl Vm { let function_type_index = self.read(chunk); let receiver_type_name = chunk.constants[function_type_index].to_string(); - let receiver = self.pop(); let num_args = self.read(chunk); let mut args = vec![]; for _ in 0..num_args { @@ -202,6 +201,7 @@ impl Vm { args.push(arg); } args.reverse(); + let receiver = self.pop(); let return_value = crate::builtins::call(&receiver_type_name, &function_name, receiver, args)?; self.push(return_value); }