From 42c431d5c77483a80d1e6d139dc6dea0c7959be0 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Mon, 3 Nov 2025 19:47:38 +0100 Subject: [PATCH] support hexadecimals --- src/ast_compiler.rs | 76 ++++++++++++++++++++++++++++++------------- src/compiler_tests.rs | 25 ++++++++++++++ src/errors.rs | 2 ++ src/scanner.rs | 37 ++++++++++++++++----- src/tokens.rs | 2 ++ 5 files changed, 110 insertions(+), 32 deletions(-) diff --git a/src/ast_compiler.rs b/src/ast_compiler.rs index 6df9771..8b370d1 100644 --- a/src/ast_compiler.rs +++ b/src/ast_compiler.rs @@ -1,16 +1,19 @@ -use crate::errors::CompilerError::{self, Expected, IncompatibleTypes, ParseError, TooManyParameters, TypeError, UnexpectedIndent, UninitializedVariable}; +use crate::errors::CompilerError::{ + self, Expected, IncompatibleTypes, ParseError, TooManyParameters, TypeError, UnexpectedIndent, + UninitializedVariable, +}; +use crate::errors::CompilerErrorAtLine; use crate::tokens::TokenType::{ - Bang, Bool, Char, Colon, Date, Eof, Eol, Equal, False, FloatingPoint, Fn, Greater, GreaterEqual, GreaterGreater, - Identifier, Indent, Integer, LeftBrace, LeftBracket, LeftParen, Less, LessEqual, LessLess, - Let, ListType, MapType, Minus, Object, Plus, Print, RightBrace, RightBracket, RightParen, SignedInteger, - SingleRightArrow, Slash, Star, StringType, True, UnsignedInteger, F32, F64, - I32, I64, U32, U64, + Bang, Bool, Char, Colon, Date, Eof, Eol, Equal, F32, F64, False, FloatingPoint, Fn, Greater, + GreaterEqual, GreaterGreater, I32, I64, Identifier, Indent, Integer, LeftBrace, LeftBracket, + LeftParen, Less, LessEqual, LessLess, Let, ListType, MapType, Minus, Object, Plus, Print, + RightBrace, RightBracket, RightParen, SignedInteger, SingleRightArrow, Slash, Star, StringType, + True, U32, U64, UnsignedInteger, }; use crate::tokens::{Token, TokenType}; use crate::value::Value; use log::debug; use std::collections::HashMap; -use crate::errors::CompilerErrorAtLine; pub fn compile(tokens: Vec) -> Result, CompilerErrorAtLine> { let mut compiler = AstCompiler::new(tokens); @@ -50,14 +53,12 @@ impl AstCompiler { self.current = 0; } - fn compile_tokens(&mut self) -> Result,CompilerErrorAtLine> { + fn compile_tokens(&mut self) -> Result, CompilerErrorAtLine> { self.collect_functions()?; self.reset(); self.compile() } - - fn compile(&mut self) -> Result, CompilerErrorAtLine> { self.current_line(); if !self.had_error { @@ -146,9 +147,7 @@ impl AstCompiler { indent_on_line += 1; } if indent_on_line > expected_indent { - Err(self.raise(UnexpectedIndent( - indent_on_line, expected_indent - ))) + Err(self.raise(UnexpectedIndent(indent_on_line, expected_indent))) } else if indent_on_line < expected_indent { self.indent.pop(); return Ok(None); @@ -236,8 +235,10 @@ impl AstCompiler { } fn let_declaration(&mut self) -> Result { - if self.peek().token_type.is_type(){ - return Err(self.raise(CompilerError::KeywordNotAllowedAsIdentifier(self.peek().token_type))) + if self.peek().token_type.is_type() { + return Err(self.raise(CompilerError::KeywordNotAllowedAsIdentifier( + self.peek().token_type, + ))); } let name_token = self.consume(Identifier, Expected("variable name."))?; @@ -403,13 +404,41 @@ impl AstCompiler { Expression::Literal { line: self.peek().line, literaltype: Integer, - value: Value::I64(self.previous().lexeme.parse().map_err(|e|self.raise(ParseError(format!("{:?}",e))))?), + value: Value::I64( + self.previous() + .lexeme + .parse() + .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, + ), + } + } else if self.match_token(vec![U32]) { + Expression::Literal { + line: self.peek().line, + literaltype: Integer, + value: Value::U32( + u32::from_str_radix(&self.previous().lexeme.trim_start_matches("0x"), 16) + .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, + ), + } + } else if self.match_token(vec![U64]) { + Expression::Literal { + line: self.peek().line, + literaltype: Integer, + value: Value::U64( + u64::from_str_radix(&self.previous().lexeme.trim_start_matches("0x"), 16) + .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, + ), } } else if self.match_token(vec![FloatingPoint]) { Expression::Literal { line: self.peek().line, literaltype: FloatingPoint, - value: Value::F64(self.previous().lexeme.parse().map_err(|e|self.raise(ParseError(format!("{:?}",e))))?), + value: Value::F64( + self.previous() + .lexeme + .parse() + .map_err(|e| self.raise(ParseError(format!("{:?}", e))))?, + ), } } else if self.match_token(vec![StringType]) { Expression::Literal { @@ -514,7 +543,7 @@ impl AstCompiler { if arg_type != function.parameters[arguments.len()].var_type { return Err(self.raise(IncompatibleTypes( function.parameters[arguments.len()].var_type, - arg_type + arg_type, ))); } arguments.push(arg); @@ -534,7 +563,11 @@ impl AstCompiler { }) } - fn consume(&mut self, token_type: TokenType, message: CompilerError) -> Result { + fn consume( + &mut self, + token_type: TokenType, + message: CompilerError, + ) -> Result { if self.check(token_type) { self.advance(); } else { @@ -610,10 +643,7 @@ fn calculate_type( (U64, I32) => U64, (StringType, _) => StringType, // meh, this all needs rigorous testing. Update: this is in progress _ => { - return Err(IncompatibleTypes( - declared_type, - inferred_type - )); + return Err(IncompatibleTypes(declared_type, inferred_type)); } } } else { diff --git a/src/compiler_tests.rs b/src/compiler_tests.rs index e8da00e..4550e30 100644 --- a/src/compiler_tests.rs +++ b/src/compiler_tests.rs @@ -156,4 +156,29 @@ m"#); assert!(result.is_err()); assert_eq!("Compilation failed: error at line 1, 'map' is a keyword. You cannot use it as an identifier",result.unwrap_err().to_string()); } + + #[test] + fn add_strings(){ + assert_eq!(run(r#""a"+"b""#), Ok(Value::String("ab".into()))); + } + + #[test] + fn add_string_and_int(){ + assert_eq!(run(r#""a"+42"#), Ok(Value::String("a42".into()))); + } + + #[test] + fn add_string_and_bool(){ + assert_eq!(run(r#""a"+false"#), Ok(Value::String("afalse".into()))); + } + + #[test] + fn add_string_and_scientific_float(){ + assert_eq!(run(r#""a"+4.2e10"#), Ok(Value::String("a42000000000".into()))); + } + + #[test] + fn add_hex_ints(){ + assert_eq!(run(r#"0x10 + 0x20"#), Ok(Value::U32(48))); + } } diff --git a/src/errors.rs b/src/errors.rs index fd3cf9c..aaf2ec2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -61,6 +61,8 @@ pub enum CompilerError { UnexpectedType(TokenType), #[error("'{0}' is a keyword. You cannot use it as an identifier")] KeywordNotAllowedAsIdentifier(TokenType), + #[error("Crud does not support numbers above 2^64")] + Overflow, } #[derive(Error, Debug, PartialEq)] diff --git a/src/scanner.rs b/src/scanner.rs index e2b0c05..28858cf 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,6 @@ -use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer}; +use crate::errors::CompilerError::{IllegalCharLength, UnexpectedIdentifier, Unterminated}; +use crate::errors::{CompilerError, CompilerErrorAtLine}; +use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer, U32, U64}; use crate::{ keywords, tokens::{ @@ -6,8 +8,6 @@ use crate::{ TokenType::{self}, }, }; -use crate::errors::{CompilerError, CompilerErrorAtLine}; -use crate::errors::CompilerError::{IllegalCharLength, UnexpectedIdentifier, Unterminated}; pub fn scan(source: &str) -> Result, CompilerErrorAtLine> { let scanner = Scanner { @@ -32,7 +32,7 @@ impl Scanner { Ok(self.tokens) } - fn scan_token(&mut self) -> Result<(),CompilerErrorAtLine> { + fn scan_token(&mut self) -> Result<(), CompilerErrorAtLine> { let c = self.advance(); if self.new_line && (c == ' ' || c == '\t') { self.add_token(TokenType::Indent); @@ -135,7 +135,9 @@ impl Scanner { } '^' => self.add_token(BitXor), _ => { - if is_digit(c) { + if c == '0' && self.peek() == 'x' { + self.hex_number()?; + } else if is_digit(c) { self.number(); } else if is_alpha(c) { self.identifier(); @@ -158,8 +160,25 @@ impl Scanner { self.add_token_with_value(tokentype, value); } + fn hex_number(&mut self) -> Result<(), CompilerErrorAtLine> { + self.advance(); + self.advance(); + while is_digit(self.peek()) || is_alpha(self.peek()) { + self.advance(); + } + let value: String = self.chars[self.start..self.current].iter().collect(); + if value.len() < 5 { + self.add_token_with_value(U32, value); + } else if value.len() < 9 { + self.add_token_with_value(U64, value); + } else { + return Err(self.raise(CompilerError::Overflow)); + } + Ok(()) + } + fn number(&mut self) { - while is_digit(self.peek() ) { + while is_digit(self.peek()) { self.advance(); } let mut has_dot = false; @@ -181,7 +200,7 @@ impl Scanner { } if self.is_at_end() { - return Err(CompilerErrorAtLine::raise(Unterminated("char"), self.line)) + return Err(CompilerErrorAtLine::raise(Unterminated("char"), self.line)); } self.advance(); @@ -200,7 +219,7 @@ impl Scanner { CompilerErrorAtLine::raise(error, self.line) } - fn string(&mut self) -> Result<(),CompilerErrorAtLine> { + fn string(&mut self) -> Result<(), CompilerErrorAtLine> { while self.peek() != '"' && !self.is_at_end() { if self.peek() == '\n' { self.line += 1; @@ -276,7 +295,7 @@ fn is_digit(c: char) -> bool { } fn is_digit_or_scientific(c: char) -> bool { - is_digit(c) || c=='e' || c=='E' + is_digit(c) || c == 'e' || c == 'E' } fn is_alphanumeric(c: char) -> bool { diff --git a/src/tokens.rs b/src/tokens.rs index 58749f1..5e6029a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -45,6 +45,7 @@ pub enum TokenType { GreaterEqual, GreaterGreater, Hash, + Hex, I32, I64, Identifier, @@ -123,6 +124,7 @@ impl fmt::Display for TokenType { TokenType::GreaterEqual => write!(f, ">="), TokenType::GreaterGreater => write!(f, ">>"), TokenType::Hash => write!(f, "#"), + TokenType::Hex => write!(f, "0x"), TokenType::If => write!(f, "if"), TokenType::Identifier => write!(f, "identifier"), TokenType::Indent => write!(f, "indent"),