switched to building AST to do static type checking

This commit is contained in:
Shautvast 2025-10-26 08:56:12 +01:00
parent f6b3b0a39a
commit 96839158fa
10 changed files with 727 additions and 93 deletions

439
src/ast_compiler.rs Normal file
View file

@ -0,0 +1,439 @@
use log::debug;
use crate::tokens::TokenType::{
Bang, Bool, Char, Colon, Date, Eol, Equal, F32, F64, False, FloatingPoint, Greater,
GreaterEqual, I32, I64, Identifier, Integer, LeftParen, Less, LessEqual, Let, ListType,
MapType, Minus, Object, Plus, Print, RightParen, Slash, Star, String, Text, True, U32, U64,
};
use crate::tokens::{Token, TokenType};
use crate::value::Value;
pub fn compile(tokens: Vec<Token>) -> anyhow::Result<Vec<Statement>> {
let mut compiler = AstCompiler::new(tokens);
compiler.compile()
}
struct AstCompiler {
tokens: Vec<Token>,
current: usize,
had_error: bool,
}
impl AstCompiler {
fn new(tokens: Vec<Token>) -> Self {
Self {
tokens,
current: 0,
had_error: false,
}
}
fn compile(&mut self) -> anyhow::Result<Vec<Statement>> {
let mut statements = vec![];
while !self.is_at_end() {
statements.push(self.declaration()?)
}
Ok(statements)
}
fn declaration(&mut self) -> anyhow::Result<Statement> {
if self.match_token(vec![Let]) {
self.let_declaration()
} else {
self.statement()
}
}
fn let_declaration(&mut self) -> anyhow::Result<Statement> {
let name = self.consume(Identifier, "Expect variable name.")?;
let declared_type = if self.check(Colon) {
self.advance();
Some(self.advance().token_type)
} else {
None
};
if self.match_token(vec![Equal]) {
let initializer = self.expression()?;
self.consume(Eol, "Expect end of line after initializer.")?;
let inferred_type = initializer.infer_type();
let var_type = match calculate_type(declared_type, inferred_type) {
Ok(var_type) => var_type,
Err(e) => {
println!("error at line {}", name.line);
self.had_error = true;
return Err(e);
}
};
Ok(Statement::VarStmt {
name,
var_type,
initializer,
})
} else {
Err(anyhow::anyhow!("Uninitialized variables are not allowed."))?
}
}
fn statement(&mut self) -> anyhow::Result<Statement> {
if self.match_token(vec![Print]) {
self.print_statement()
} else {
self.expr_statement()
}
}
fn print_statement(&mut self) -> anyhow::Result<Statement> {
let expr = self.expression()?;
self.consume(Eol, "Expect end of line after expression.")?;
Ok(Statement::Print { value: expr })
}
fn expr_statement(&mut self) -> anyhow::Result<Statement> {
let expr = self.expression()?;
self.consume(Eol, "Expect end of line after expression.")?;
Ok(Statement::ExpressionStmt { expression: expr })
}
fn expression(&mut self) -> anyhow::Result<Expression> {
self.equality()
}
fn equality(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.comparison()?;
while self.match_token(vec![TokenType::BangEqual, TokenType::EqualEqual]) {
let operator = self.previous().clone();
let right = self.comparison()?;
expr = Expression::Binary {
line: operator.line,
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
fn comparison(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.term()?;
while self.match_token(vec![Greater, GreaterEqual, Less, LessEqual]) {
let operator = self.previous().clone();
let right = self.term()?;
expr = Expression::Binary {
line: operator.line,
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
fn term(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.factor()?;
while self.match_token(vec![Minus, Plus]) {
let operator = self.previous().clone();
let right = self.factor()?;
expr = Expression::Binary {
line: operator.line,
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
fn factor(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.unary()?;
while self.match_token(vec![Slash, Star]) {
let operator = self.previous().clone();
let right = self.unary()?;
expr = Expression::Binary {
line: operator.line,
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
fn unary(&mut self) -> anyhow::Result<Expression> {
if self.match_token(vec![Bang, Minus]) {
let operator = self.previous().clone();
let right = self.unary()?;
Ok(Expression::Unary {
line: self.peek().line,
operator,
right: Box::new(right),
})
} else {
self.primary()
}
}
fn primary(&mut self) -> anyhow::Result<Expression> {
Ok(if self.match_token(vec![False]) {
Expression::Literal {
line: self.peek().line,
literaltype: Bool,
value: Value::Bool(false),
}
} else if self.match_token(vec![True]) {
Expression::Literal {
line: self.peek().line,
literaltype: Bool,
value: Value::Bool(true),
} //, FloatingPoint, Text
} else if self.match_token(vec![Integer]) {
Expression::Literal {
line: self.peek().line,
literaltype: Integer,
value: Value::I64(self.previous().lexeme.parse()?),
}
} else if self.match_token(vec![FloatingPoint]) {
Expression::Literal {
line: self.peek().line,
literaltype: FloatingPoint,
value: Value::F64(self.previous().lexeme.parse()?),
}
} else if self.match_token(vec![Text]) {
Expression::Literal {
line: self.peek().line,
literaltype: Text,
value: Value::String(self.previous().lexeme.to_string()),
}
} else if self.match_token(vec![LeftParen]) {
let expr = self.expression()?;
self.consume(RightParen, "Expect ')' after expression.")?;
Expression::Grouping {
line: self.peek().line,
expression: Box::new(expr),
}
} else {
unimplemented!()
})
}
fn consume(&mut self, token_type: TokenType, message: &str) -> anyhow::Result<Token> {
if self.check(token_type) {
self.advance();
} else {
self.had_error = true;
return Err(anyhow::anyhow!(message.to_string()));
}
Ok(self.previous().clone())
}
fn match_token(&mut self, tokens: Vec<TokenType>) -> bool {
for tt in tokens {
if self.check(tt) {
self.advance();
return true;
}
}
false
}
fn check(&self, token_type: TokenType) -> bool {
if self.is_at_end() {
false
} else {
self.peek().token_type == token_type
}
}
fn peek(&self) -> &Token {
&self.tokens[self.current]
}
fn previous(&self) -> &Token {
&self.tokens[self.current - 1]
}
fn advance(&mut self) -> &Token {
if !self.is_at_end() {
self.current += 1;
}
&self.previous()
}
fn is_at_end(&self) -> bool {
self.peek().token_type == TokenType::Eof
}
}
fn calculate_type(
declared_type: Option<TokenType>,
inferred_type: TokenType,
) -> anyhow::Result<TokenType> {
Ok(if let Some(declared_type) = declared_type {
if declared_type != inferred_type {
match (declared_type, inferred_type) {
(I32, I64) => I32,
(U32, U64) => U32,
(F32, F64) => F32,
(F64, I64) => F64,
(U64, I64) => U64,
(U64, I32) => U64,
_ => {
return Err(anyhow::anyhow!(
"Incompatible types. Expected {}, found {}",
declared_type,
inferred_type
));
}
}
} else {
declared_type
}
} else {
inferred_type
})
}
#[derive(Debug)]
pub enum Statement {
ExpressionStmt {
expression: Expression,
},
VarStmt {
name: Token,
var_type: TokenType,
initializer: Expression,
},
Print {
value: Expression,
},
}
impl Statement {
pub fn line(&self) -> usize {
match self {
Statement::ExpressionStmt { expression } => expression.line(),
Statement::VarStmt {
name,
var_type,
initializer,
} => name.line,
Statement::Print { value } => value.line(),
}
}
}
#[derive(Debug)]
pub enum Expression {
Binary {
line: usize,
left: Box<Expression>,
operator: Token,
right: Box<Expression>,
},
Unary {
line: usize,
operator: Token,
right: Box<Expression>,
},
Grouping {
line: usize,
expression: Box<Expression>,
},
Literal {
line: usize,
literaltype: TokenType,
value: Value,
},
}
impl Expression {
pub fn line(&self) -> usize {
match self {
Expression::Binary {
line,
left,
operator,
right,
} => *line,
Expression::Unary {
line,
operator,
right,
} => *line,
Expression::Grouping { line, expression } => *line,
Expression::Literal {
line,
literaltype,
value,
} => *line,
}
}
pub fn infer_type(&self) -> TokenType {
match self {
Self::Binary {
line,
left,
operator,
right,
} => {
let left_type = left.infer_type();
let right_type = right.infer_type();
if left_type == right_type {
// map to determined numeric type if yet undetermined (32 or 64 bits)
match left_type {
FloatingPoint => F64,
Integer => I64,
_ => left_type,
}
} else {
if let Plus = operator.token_type {
// includes string concatenation with numbers
// followed by type coercion to 64 bits for numeric types
debug!("coerce {} : {}",left_type,right_type);
match (left_type, right_type) {
(_, Text) => Text,
(Text, _) => Text,
(FloatingPoint, _) => F64,
(Integer, FloatingPoint) => F64,
(Integer, _) => I64,
(F64, _) => F64,
(U64, U32) => U64,
(I64, I32) => I64,
// could add a date and a duration. future work
// could add a List and a value. also future work
// could add a Map and a tuple. Will I add tuple types? Future work!
_ => panic!("Unexpected coercion"),
}
// could have done some fall through here, but this will fail less gracefully,
// so if my thinking is wrong or incomplete it will panic
} else {
// type coercion to 64 bits for numeric types
debug!("coerce {} : {}",left_type,right_type);
match (left_type, right_type) {
(FloatingPoint, _) => F64,
(Integer, FloatingPoint) => F64,
(I64, FloatingPoint) => F64,
(F64, _) => F64,
(U64, U32) => U64,
(I64, I32) => I64,
(I64, Integer) => I64,
_ => panic!("Unexpected coercion"),
}
}
}
}
Self::Grouping { line, expression } => expression.infer_type(),
Self::Literal {
line,
literaltype,
value,
} => literaltype.clone(),
Self::Unary {
line,
operator,
right,
} => right.infer_type(),
}
}
}

140
src/bytecode_compiler.rs Normal file
View file

@ -0,0 +1,140 @@
use crate::ast_compiler::{Expression, Statement};
use crate::chunk::Chunk;
use crate::tokens::TokenType;
use crate::value::Value;
use crate::vm::{OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT, OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT, OP_DEF_F32, OP_GET, OP_PRINT};
pub fn compile(ast: Vec<Statement>) -> anyhow::Result<Chunk> {
let compiler = Compiler::new();
Ok(compiler.compile(ast)?)
}
struct Compiler {
chunk: Chunk,
had_error: bool,
current_line: usize,
}
impl Compiler {
fn new() -> Self {
Self {
chunk: Chunk::new("main"),
had_error: false,
current_line: 0,
}
}
fn compile(mut self, ast: Vec<Statement>) -> anyhow::Result<Chunk> {
for statement in &ast {
self.compile_statement(statement)?
}
self.emit_byte(OP_RETURN);
Ok(self.chunk)
}
fn compile_statement(&mut self, statement: &Statement) -> anyhow::Result<()> {
self.current_line = statement.line();
match statement {
Statement::VarStmt {
name,
var_type,
initializer,
} => {
let name_index= self.chunk.add_constant(Value::String(name.lexeme.clone()));
self.compile_expression(initializer)?;
self.define_variable(var_type, name_index)?
}
Statement::Print {value} => {
self.compile_expression(value)?;
self.emit_byte(OP_PRINT);
}
_ => unimplemented!(),
}
Ok(())
}
fn compile_expression(&mut self, expression: &Expression) -> anyhow::Result<()> {
match expression {
Expression::Literal { value, .. } => self.emit_constant(value),
Expression::Grouping { expression, .. } => self.compile_expression(expression)?,
Expression::Unary {
operator, right, ..
} => {
self.compile_expression(right)?;
match operator.token_type {
TokenType::Minus => {
self.emit_byte(OP_NEGATE);
}
TokenType::Bang => {
self.emit_byte(OP_NOT);
}
_ => unimplemented!("unary other than ! and -"),
}
}
Expression::Binary {
left,
operator,
right,
..
} => {
self.compile_expression(left)?;
self.compile_expression(right)?;
match operator.token_type {
TokenType::Plus => self.emit_byte(OP_ADD),
TokenType::Minus => self.emit_byte(OP_SUBTRACT),
TokenType::Star => self.emit_byte(OP_MULTIPLY),
TokenType::Slash => self.emit_byte(OP_DIVIDE),
TokenType::BitAnd => self.emit_byte(OP_BITAND),
TokenType::BitOr => self.emit_byte(OP_BITOR),
TokenType::BitXor => self.emit_byte(OP_BITXOR),
TokenType::GreaterGreater => self.emit_byte(OP_SHR),
TokenType::LessLess => self.emit_byte(OP_SHL),
TokenType::EqualEqual => self.emit_byte(OP_EQUAL),
TokenType::Greater => self.emit_byte(OP_GREATER),
TokenType::GreaterEqual => self.emit_byte(OP_GREATER_EQUAL),
TokenType::Less => self.emit_byte(OP_LESS),
TokenType::LessEqual => self.emit_byte(OP_LESS_EQUAL),
_ => unimplemented!("binary other than plus, minus, star, slash"),
}
}
}
Ok(())
}
fn define_variable(&mut self, var_type: &TokenType, name_index: usize) -> anyhow::Result<()> {
let def_op = match var_type {
TokenType::I32 => OP_DEF_I32,
TokenType::I64 => OP_DEF_I64,
TokenType::U32 => OP_DEF_I64,
TokenType::U64 => OP_DEF_I64,
TokenType::F32 => OP_DEF_F32,
TokenType::F64 => OP_DEF_F64,
TokenType::Date => OP_DEF_DATE,
TokenType::String => OP_DEF_STRING,
TokenType::Char => OP_DEF_CHAR,
TokenType::Bool => OP_DEF_BOOL,
TokenType::ListType => OP_DEF_LIST,
TokenType::MapType => OP_DEF_MAP,
TokenType::Object => OP_DEF_STRUCT,
_ => unimplemented!("{}", var_type),
};
self.emit_bytes(def_op, name_index as u16);
Ok(())
}
fn emit_byte(&mut self, byte: u16) {
self.chunk.add(byte, self.current_line);
}
fn emit_bytes(&mut self, b1: u16, b2: u16) {
self.emit_byte(b1);
self.emit_byte(b2);
}
fn emit_constant(&mut self, value: &Value) {
let index = self.chunk.add_constant(value.clone());
self.emit_bytes(OP_CONSTANT, index as u16);
}
}

View file

@ -1,9 +1,10 @@
use tracing::debug; use tracing::debug;
use crate::value::Value; use crate::value::Value;
use crate::vm::{ use crate::vm::{
OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DIVIDE, OP_FALSE, OP_MULTIPLY, OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DIVIDE, OP_MULTIPLY,
OP_NEGATE, OP_RETURN, OP_SUBTRACT, OP_TRUE, OP_NOT, OP_SHL, OP_SHR, OP_LESS, OP_LESS_EQUAL, OP_NEGATE, OP_RETURN, OP_SUBTRACT, OP_NOT, OP_SHL, OP_SHR, OP_LESS, OP_LESS_EQUAL,
OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET,OP_DEF_STRING, OP_DEF_BOOL OP_GREATER, OP_GREATER_EQUAL, OP_EQUAL, OP_PRINT, OP_POP, OP_DEFINE, OP_GET,OP_DEF_STRING,
OP_DEF_I32, OP_DEF_BOOL, OP_DEF_F32, OP_DEF_F64,
}; };
pub struct Chunk { pub struct Chunk {
@ -53,8 +54,6 @@ impl Chunk {
match instruction { match instruction {
OP_CONSTANT => self.constant_inst("LDC", offset), OP_CONSTANT => self.constant_inst("LDC", offset),
OP_ADD => self.simple_inst("ADD", offset), OP_ADD => self.simple_inst("ADD", offset),
OP_FALSE => self.simple_inst("LDC_false", offset),
OP_TRUE => self.simple_inst("LDC_true", offset),
OP_SUBTRACT => self.simple_inst("SUB", offset), OP_SUBTRACT => self.simple_inst("SUB", offset),
OP_MULTIPLY => self.simple_inst("MUL", offset), OP_MULTIPLY => self.simple_inst("MUL", offset),
OP_DIVIDE => self.simple_inst("DIV", offset), OP_DIVIDE => self.simple_inst("DIV", offset),
@ -75,6 +74,9 @@ impl Chunk {
OP_POP => self.simple_inst("POP", offset), OP_POP => self.simple_inst("POP", offset),
OP_DEFINE => self.constant_inst("DEF", offset), OP_DEFINE => self.constant_inst("DEF", offset),
OP_DEF_STRING => self.constant_inst("DEFSTR", offset), OP_DEF_STRING => self.constant_inst("DEFSTR", offset),
OP_DEF_I32 => self.constant_inst("DEFI32", offset),
OP_DEF_F32 => self.constant_inst("DEFF32", offset),
OP_DEF_F64 => self.constant_inst("DEFF64", offset),
OP_DEF_BOOL => self.constant_inst("DEFBOOL", offset), OP_DEF_BOOL => self.constant_inst("DEFBOOL", offset),
OP_GET => self.constant_inst("GET", offset), OP_GET => self.constant_inst("GET", offset),
_ => { _ => {

View file

@ -4,17 +4,22 @@ use crate::tokens::{Token, TokenType};
use crate::value::Value; use crate::value::Value;
use crate::vm::{ use crate::vm::{
OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE, OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE,
OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRUCT, OP_DEF_STRING, OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT,
OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_FALSE, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL,
OP_LESS_EQUAL, OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT,
OP_SUBTRACT, OP_TRUE,
}; };
use anyhow::anyhow; use anyhow::anyhow;
use std::collections::HashMap; use std::collections::HashMap;
use std::mem::discriminant;
use std::sync::LazyLock; use std::sync::LazyLock;
use tracing::debug; use tracing::debug;
macro_rules! parse_num {
($s:ident, $variant:ident, $number:ident) => {{
$s.typestack.push(TokenType::$variant);
Value::$variant($number.parse()?)
}};
}
pub fn compile(source: &str) -> anyhow::Result<Chunk> { pub fn compile(source: &str) -> anyhow::Result<Chunk> {
let tokens = scan(source); let tokens = scan(source);
debug!("Scanned tokens: {:?}", tokens); debug!("Scanned tokens: {:?}", tokens);
@ -26,7 +31,7 @@ pub fn compile(source: &str) -> anyhow::Result<Chunk> {
current_token: &tokens[0], current_token: &tokens[0],
tokens: &tokens, tokens: &tokens,
current: 0, current: 0,
types: vec![], typestack: vec![],
locals: vec![], locals: vec![],
previous: 0, previous: 0,
had_error: false, had_error: false,
@ -41,7 +46,7 @@ struct Compiler<'a> {
current: usize, current: usize,
previous_token: &'a Token, previous_token: &'a Token,
current_token: &'a Token, current_token: &'a Token,
types: Vec<Token>, typestack: Vec<TokenType>,
locals: Vec<String>, locals: Vec<String>,
previous: usize, previous: usize,
had_error: bool, had_error: bool,
@ -279,16 +284,25 @@ impl Rule {
} }
} }
fn number(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> { fn number(s: &mut Compiler, mut expected_type: Option<TokenType>) -> anyhow::Result<()> {
debug!("number: expected type {:?}", expected_type);
// coerce unknown numeric type to the expected type of the expression if any
if let None = expected_type {
if !s.typestack.is_empty() {
expected_type = Some(*s.typestack.last().unwrap());
}
}
let number = &s.previous_token.lexeme; let number = &s.previous_token.lexeme;
let value = if let Some(expected_type) = expected_type { let value = if let Some(expected_type) = expected_type {
match expected_type { match expected_type {
TokenType::I32 => Value::I32(number.parse()?), TokenType::I32 => parse_num!(s, I32, number),
TokenType::I64 => Value::I64(number.parse()?), TokenType::I64 => parse_num!(s, I64, number),
TokenType::U32 => Value::U32(number.parse()?), TokenType::U32 => parse_num!(s, U32, number),
TokenType::U64 => Value::U64(number.parse()?), TokenType::U64 => parse_num!(s, U64, number),
TokenType::F32 => Value::U32(number.parse()?), TokenType::F32 => parse_num!(s, F32, number),
TokenType::F64 => Value::U64(number.parse()?), TokenType::F64 => parse_num!(s, F64, number),
_ => { _ => {
return Err(anyhow!( return Err(anyhow!(
@ -300,14 +314,10 @@ fn number(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<
} }
} }
} else { } else {
if let TokenType::Number = s.previous_token.token_type { match s.previous_token.token_type {
if number.contains('.') { TokenType::Integer => Value::I64(number.parse()?),
Value::F64(number.parse()?) TokenType::FloatingPoint => Value::F64(number.parse()?),
} else { _ => panic!("I did not think this would happen")
Value::I64(number.parse()?)
}
} else {
return Err(anyhow!("I did not think this would happen"));
} }
}; };
s.emit_constant(value); s.emit_constant(value);
@ -318,11 +328,21 @@ fn literal(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result
let actual_type = &s.previous_token.token_type; let actual_type = &s.previous_token.token_type;
if let Some(expected_type) = expected_type { if let Some(expected_type) = expected_type {
match (actual_type, expected_type) { match (actual_type, expected_type) {
(TokenType::False, TokenType::Bool) => s.emit_constant(Value::Bool(false)), (TokenType::False, TokenType::Bool) => {
(TokenType::True, TokenType::Bool) => s.emit_constant(Value::Bool(true)), s.typestack.push(TokenType::Bool);
s.emit_constant(Value::Bool(false))
}
(TokenType::True, TokenType::Bool) => {
s.typestack.push(TokenType::Bool);
s.emit_constant(Value::Bool(true))
}
(TokenType::Text, TokenType::String) => { (TokenType::Text, TokenType::String) => {
s.typestack.push(TokenType::String);
s.emit_constant(Value::String(s.previous_token.lexeme.clone())) s.emit_constant(Value::String(s.previous_token.lexeme.clone()))
} }
//list
//map
//struct value
_ => { _ => {
return Err(anyhow!( return Err(anyhow!(
"Invalid type: expected {} value, got {}({})", "Invalid type: expected {} value, got {}({})",
@ -343,11 +363,11 @@ fn literal(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result
Ok(()) Ok(())
} }
fn skip(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> { fn skip(s: &mut Compiler, _expected_type: Option<TokenType>) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
fn grouping(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> { fn grouping(s: &mut Compiler, _expected_type: Option<TokenType>) -> anyhow::Result<()> {
s.expression(None)?; s.expression(None)?;
s.consume(TokenType::RightParen, "Expect ')' after expression.") s.consume(TokenType::RightParen, "Expect ')' after expression.")
} }
@ -372,6 +392,7 @@ fn unary(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<(
fn binary(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> { fn binary(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> {
let operator_type = &s.previous_token.token_type; let operator_type = &s.previous_token.token_type;
debug!("operator {:?}", operator_type); debug!("operator {:?}", operator_type);
debug!("expected type {:?}", expected_type);
let rule = get_rule(operator_type); let rule = get_rule(operator_type);
s.parse_precedence(rule.precedence + 1, None)?; s.parse_precedence(rule.precedence + 1, None)?;
match operator_type { match operator_type {
@ -429,6 +450,7 @@ static RULES: LazyLock<HashMap<TokenType, Rule>> = LazyLock::new(|| {
Rule::new(None, Some(binary), PREC_EQUALITY), Rule::new(None, Some(binary), PREC_EQUALITY),
); );
rules.insert(TokenType::False, Rule::new(Some(literal), None, PREC_NONE)); rules.insert(TokenType::False, Rule::new(Some(literal), None, PREC_NONE));
rules.insert(TokenType::FloatingPoint, Rule::new(Some(number), None, PREC_NONE));
rules.insert( rules.insert(
TokenType::Greater, TokenType::Greater,
Rule::new(None, Some(binary), PREC_COMPARISON), Rule::new(None, Some(binary), PREC_COMPARISON),
@ -447,12 +469,13 @@ static RULES: LazyLock<HashMap<TokenType, Rule>> = LazyLock::new(|| {
TokenType::Identifier, TokenType::Identifier,
Rule::new(Some(variable), None, PREC_NONE), Rule::new(Some(variable), None, PREC_NONE),
); );
rules.insert(TokenType::Integer, Rule::new(Some(number), None, PREC_NONE));
rules.insert(TokenType::Indent, Rule::new(Some(skip), None, PREC_NONE)); rules.insert(TokenType::Indent, Rule::new(Some(skip), None, PREC_NONE));
rules.insert(TokenType::LeftBrace, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::LeftBrace, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::LeftBracket, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::LeftBracket, Rule::new(None, None, PREC_NONE));
rules.insert( rules.insert(
TokenType::LeftParen, TokenType::LeftParen,
Rule::new(Some(binary), None, PREC_NONE), Rule::new(Some(grouping), None, PREC_NONE),
); );
rules.insert( rules.insert(
TokenType::Less, TokenType::Less,
@ -475,7 +498,6 @@ static RULES: LazyLock<HashMap<TokenType, Rule>> = LazyLock::new(|| {
TokenType::Minus, TokenType::Minus,
Rule::new(Some(unary), Some(binary), PREC_TERM), Rule::new(Some(unary), Some(binary), PREC_TERM),
); );
rules.insert(TokenType::Number, Rule::new(Some(number), None, PREC_NONE));
rules.insert(TokenType::Plus, Rule::new(None, Some(binary), PREC_TERM)); rules.insert(TokenType::Plus, Rule::new(None, Some(binary), PREC_TERM));
rules.insert(TokenType::Print, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Print, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Return, Rule::new(None, None, PREC_NONE)); rules.insert(TokenType::Return, Rule::new(None, None, PREC_NONE));

View file

@ -5,3 +5,5 @@ pub mod scanner;
mod tokens; mod tokens;
mod value; mod value;
pub mod vm; pub mod vm;
pub mod ast_compiler;
pub mod bytecode_compiler;

View file

@ -1,21 +1,41 @@
use crudlang::{ast_compiler, chunk};
use crudlang::bytecode_compiler::compile;
use crudlang::scanner::scan;
use crudlang::vm::{interpret, Vm};
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt::init(); tracing_subscriber::fmt::init();
let chunk = crudlang::compiler::compile( let tokens = scan(r#"let a = ((1+2)*3.0)*4"#);
r#"let a: bool = "koe""#, match ast_compiler::compile(tokens) {
); Ok(statements) => {
match chunk { // println!("{:?}", statements);
let chunk = compile(statements)?;
// chunk.disassemble();
interpret(chunk);
}
Err(e) => { Err(e) => {
println!("{}", e); println!("{}", e)
return Ok(()); }
} }
Ok(chunk) => {
chunk.disassemble();
let result = crudlang::vm::interpret(chunk)?; // println!("{}",expression.infer_type());
println!("{}", result);
} // let chunk = crudlang::compiler::compile(
} // r#"let a ="hello " + 42"#,
// );
// match chunk {
// Err(e) => {
// println!("{}", e);
// return Ok(());
// }
// Ok(chunk) => {
// chunk.disassemble();
//
// let result = crudlang::vm::interpret(chunk)?;
// println!("{}", result);
// }
// }
Ok(()) Ok(())
} }

View file

@ -1,4 +1,4 @@
use crate::tokens::TokenType::BitXor; use crate::tokens::TokenType::{BitXor, FloatingPoint, Integer};
use crate::{ use crate::{
keywords, keywords,
tokens::{ tokens::{
@ -12,7 +12,7 @@ pub fn scan(source: &str) -> Vec<Token> {
chars: source.chars().collect(), chars: source.chars().collect(),
current: 0, current: 0,
start: 0, start: 0,
line: 0, line: 1,
tokens: vec![], tokens: vec![],
new_line: true, new_line: true,
}; };
@ -51,7 +51,7 @@ impl Scanner {
'-' => self.add_token(TokenType::Minus), '-' => self.add_token(TokenType::Minus),
'+' => self.add_token(TokenType::Plus), '+' => self.add_token(TokenType::Plus),
':' => self.add_token(TokenType::Colon), ':' => self.add_token(TokenType::Colon),
';' => self.add_token(TokenType::Semicolon), ';' => println!("Warning: Ignoring semicolon at line {}", self.line),
'*' => self.add_token(TokenType::Star), '*' => self.add_token(TokenType::Star),
'!' => { '!' => {
let t = if self.match_next('=') { let t = if self.match_next('=') {
@ -150,8 +150,9 @@ impl Scanner {
while is_digit(self.peek()) { while is_digit(self.peek()) {
self.advance(); self.advance();
} }
let mut has_dot = false;
if self.peek() == '.' && is_digit(self.peek_next()) { if self.peek() == '.' && is_digit(self.peek_next()) {
has_dot = true;
self.advance(); self.advance();
} }
@ -159,7 +160,7 @@ impl Scanner {
self.advance(); self.advance();
} }
let value: String = self.chars[self.start..self.current].iter().collect(); let value: String = self.chars[self.start..self.current].iter().collect();
self.add_token_with_value(TokenType::Number, value); self.add_token_with_value(if has_dot { FloatingPoint } else { Integer }, value);
} }
fn string(&mut self) { fn string(&mut self) {

View file

@ -1,6 +1,6 @@
use std::fmt; use std::fmt;
#[derive(Debug)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Token { pub struct Token {
pub token_type: TokenType, pub token_type: TokenType,
pub lexeme: String, pub lexeme: String,
@ -23,7 +23,7 @@ enum Value {
} }
#[derive(Debug, PartialEq, Clone, Copy, Hash)] #[derive(Debug, PartialEq, Clone, Copy, Hash)]
pub(crate) enum TokenType { pub enum TokenType {
Bang, Bang,
BangEqual, BangEqual,
BitAnd, BitAnd,
@ -52,9 +52,10 @@ pub(crate) enum TokenType {
Hash, Hash,
I32, I32,
I64, I64,
Identifier,
If, If,
Indent, Indent,
Identifier, Integer, //undetermined integer type
LeftBrace, LeftBrace,
LeftBracket, LeftBracket,
LeftParen, LeftParen,
@ -68,7 +69,7 @@ pub(crate) enum TokenType {
LogicalOr, LogicalOr,
Minus, Minus,
Not, Not,
Number, FloatingPoint, //undetermined float type
Object, Object,
Plus, Plus,
Print, Print,
@ -101,13 +102,14 @@ impl fmt::Display for TokenType {
TokenType::F32 => write!(f, "f32"), TokenType::F32 => write!(f, "f32"),
TokenType::F64 => write!(f, "f64"), TokenType::F64 => write!(f, "f64"),
TokenType::Bool => write!(f, "bool"), TokenType::Bool => write!(f, "bool"),
TokenType::Bang=> write!(f, "!"), TokenType::Bang => write!(f, "!"),
TokenType::BangEqual=> write!(f, "!="), TokenType::BangEqual => write!(f, "!="),
TokenType::BitAnd=> write!(f, "&"), TokenType::BitAnd => write!(f, "&"),
TokenType::BitOr=> write!(f, "|"), TokenType::BitOr => write!(f, "|"),
TokenType::BitXor=> write!(f, "^"), TokenType::BitXor => write!(f, "^"),
TokenType::Colon=> write!(f, ":"), TokenType::Colon => write!(f, ":"),
TokenType::Comma=> write!(f, ","), TokenType::Comma => write!(f, ","),
TokenType::FloatingPoint => write!(f, "float"),
TokenType::MapType => write!(f, "map"), TokenType::MapType => write!(f, "map"),
TokenType::ListType => write!(f, "list"), TokenType::ListType => write!(f, "list"),
TokenType::Dot => write!(f, "."), TokenType::Dot => write!(f, "."),
@ -125,8 +127,9 @@ impl fmt::Display for TokenType {
TokenType::GreaterGreater => write!(f, ">>"), TokenType::GreaterGreater => write!(f, ">>"),
TokenType::Hash => write!(f, "#"), TokenType::Hash => write!(f, "#"),
TokenType::If => write!(f, "if"), TokenType::If => write!(f, "if"),
TokenType::Indent => write!(f, "indent"),
TokenType::Identifier => write!(f, "identifier"), TokenType::Identifier => write!(f, "identifier"),
TokenType::Indent => write!(f, "indent"),
TokenType::Integer => write!(f, "integer"),
TokenType::LeftBrace => write!(f, "{{"), TokenType::LeftBrace => write!(f, "{{"),
TokenType::LeftBracket => write!(f, "["), TokenType::LeftBracket => write!(f, "["),
TokenType::LeftParen => write!(f, "("), TokenType::LeftParen => write!(f, "("),
@ -138,7 +141,6 @@ impl fmt::Display for TokenType {
TokenType::LogicalOr => write!(f, "||"), TokenType::LogicalOr => write!(f, "||"),
TokenType::Minus => write!(f, "-"), TokenType::Minus => write!(f, "-"),
TokenType::Not => write!(f, "not"), TokenType::Not => write!(f, "not"),
TokenType::Number => write!(f, "number"),
TokenType::Object => write!(f, "object"), TokenType::Object => write!(f, "object"),
TokenType::Plus => write!(f, "+"), TokenType::Plus => write!(f, "+"),
TokenType::Print => write!(f, "print"), TokenType::Print => write!(f, "print"),
@ -157,6 +159,4 @@ impl fmt::Display for TokenType {
} }
} }
impl Eq for TokenType { impl Eq for TokenType {}
}

View file

@ -2,31 +2,31 @@ use anyhow::anyhow;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{write, Display, Formatter}; use std::fmt::{Display, Formatter, write};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Shl, Shr, Sub}; use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Shl, Shr, Sub};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct StructDefinition { pub struct StructDefinition {
fields: Vec<String> fields: Vec<String>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Instance { pub struct StructValue {
definition: StructDefinition, definition: StructDefinition,
fields: Vec<Value> fields: Vec<Value>,
} }
impl Instance { impl StructValue {
pub fn new(definition: StructDefinition) -> Self { pub fn new(definition: StructDefinition) -> Self {
Self { Self {
definition, definition,
fields: Vec::new() fields: Vec::new(),
} }
} }
} }
impl Display for Instance { impl Display for StructValue {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
for (i, field) in self.definition.fields.iter().enumerate() { for (i, field) in self.definition.fields.iter().enumerate() {
write!(f, "{}: {}", field, self.fields[i])?; write!(f, "{}: {}", field, self.fields[i])?;
@ -50,9 +50,9 @@ pub enum Value {
Enum, Enum,
List(Vec<Value>), List(Vec<Value>),
Map(HashMap<Value, Value>), Map(HashMap<Value, Value>),
Struct(Instance), Struct(StructValue),
Error(String), Error(String),
Void Void,
} }
impl Into<Value> for i32 { impl Into<Value> for i32 {
@ -219,7 +219,6 @@ impl Mul<&Value> for &Value {
(Value::U64(a), Value::U64(b)) => Ok(Value::U64(a * b)), (Value::U64(a), Value::U64(b)) => Ok(Value::U64(a * b)),
(Value::F32(a), Value::F32(b)) => Ok(Value::F32(a * b)), (Value::F32(a), Value::F32(b)) => Ok(Value::F32(a * b)),
(Value::F64(a), Value::F64(b)) => Ok(Value::F64(a * b)), (Value::F64(a), Value::F64(b)) => Ok(Value::F64(a * b)),
//enum?
_ => Err(anyhow!("Cannot multiply")), _ => Err(anyhow!("Cannot multiply")),
} }
} }
@ -338,7 +337,8 @@ impl PartialEq for Value {
(Value::Map(a), Value::Map(b)) => { (Value::Map(a), Value::Map(b)) => {
let mut equal = true; let mut equal = true;
for (k, v) in a.iter() { for (k, v) in a.iter() {
if !b.contains_key(k) || b.get(k).unwrap() != v { //safe unwrap if !b.contains_key(k) || b.get(k).unwrap() != v {
//safe unwrap
equal = false; equal = false;
break; break;
} }
@ -359,8 +359,8 @@ impl PartialOrd for Value {
(Value::I32(a), Value::I32(b)) => Some(a.partial_cmp(b)?), (Value::I32(a), Value::I32(b)) => Some(a.partial_cmp(b)?),
(Value::I64(a), Value::I64(b)) => Some(a.partial_cmp(b)?), (Value::I64(a), Value::I64(b)) => Some(a.partial_cmp(b)?),
(Value::U32(a), Value::U32(b)) => Some(a.partial_cmp(b)?), (Value::U32(a), Value::U32(b)) => Some(a.partial_cmp(b)?),
(Value::U64(a), Value::U64(b)) =>Some(a.partial_cmp(b)?), (Value::U64(a), Value::U64(b)) => Some(a.partial_cmp(b)?),
(Value::F32(a), Value::F32(b)) =>Some(a.partial_cmp(b)?), (Value::F32(a), Value::F32(b)) => Some(a.partial_cmp(b)?),
(Value::F64(a), Value::F64(b)) => Some(a.partial_cmp(b)?), (Value::F64(a), Value::F64(b)) => Some(a.partial_cmp(b)?),
(Value::String(a), Value::String(b)) => Some(a.partial_cmp(b)?), (Value::String(a), Value::String(b)) => Some(a.partial_cmp(b)?),
(Value::Char(a), Value::Char(b)) => Some(a.partial_cmp(b)?), (Value::Char(a), Value::Char(b)) => Some(a.partial_cmp(b)?),
@ -370,7 +370,7 @@ impl PartialOrd for Value {
} }
} }
impl Hash for Value{ impl Hash for Value {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state); std::mem::discriminant(self).hash(state);

View file

@ -19,31 +19,36 @@ macro_rules! define_var {
}}; }};
} }
pub struct Vm {
chunk: Chunk,
ip: usize,
stack: Vec<Value>,
local_vars: HashMap<String, Value>,
error_occurred: bool,
}
pub fn interpret(chunk: Chunk) -> anyhow::Result<Value> { pub fn interpret(chunk: Chunk) -> anyhow::Result<Value> {
let mut vm = Vm { let mut vm = Vm {
chunk, chunk,
ip: 0, ip: 0,
stack: vec![], stack: vec![],
local_vars: HashMap::new(), local_vars: HashMap::new(),
error_occurred: false,
}; };
vm.run() vm.run()
} }
pub struct Vm {
chunk: Chunk,
ip: usize,
stack: Vec<Value>,
local_vars: HashMap<String, Value>,
}
impl Vm { impl Vm {
fn run(&mut self) -> anyhow::Result<Value> { fn run(&mut self) -> anyhow::Result<Value> {
loop { loop {
if self.error_occurred {
return Err(anyhow!("Error occurred"));
}
debug!("{:?}", self.stack); debug!("{:?}", self.stack);
let opcode = self.chunk.code[self.ip]; let opcode = self.chunk.code[self.ip];
self.ip += 1; self.ip += 1;
match opcode { match opcode {
OP_CONSTANT | OP_FALSE | OP_TRUE => { OP_CONSTANT => {
let value = &self.chunk.constants[self.chunk.code[self.ip] as usize]; let value = &self.chunk.constants[self.chunk.code[self.ip] as usize];
self.ip += 1; self.ip += 1;
self.push(value.clone()); self.push(value.clone());
@ -104,7 +109,7 @@ impl Vm {
OP_DEF_F64 => define_var!(self, F64), OP_DEF_F64 => define_var!(self, F64),
OP_DEF_STRING => define_var!(self, String), OP_DEF_STRING => define_var!(self, String),
OP_DEF_CHAR => define_var!(self, Char), OP_DEF_CHAR => define_var!(self, Char),
OP_DEF_BOOL =>define_var!(self, Bool), OP_DEF_BOOL => define_var!(self, Bool),
OP_DEF_DATE => define_var!(self, Date), OP_DEF_DATE => define_var!(self, Date),
OP_DEF_LIST => define_var!(self, List), OP_DEF_LIST => define_var!(self, List),
OP_DEF_MAP => define_var!(self, Map), OP_DEF_MAP => define_var!(self, Map),
@ -148,7 +153,10 @@ fn binary_op(vm: &mut Vm, op: impl Fn(&Value, &Value) -> anyhow::Result<Value> +
let result = op(&a, &b); let result = op(&a, &b);
match result { match result {
Ok(result) => vm.push(result), Ok(result) => vm.push(result),
Err(e) => println!("Error: {} {:?} and {:?}", e.to_string(), a, b), Err(e) => {
println!("Error: {} {:?} and {:?}", e.to_string(), a, b);
vm.error_occurred = true;
}
} }
} }
@ -169,8 +177,8 @@ pub const OP_DIVIDE: u16 = 5;
pub const OP_NEGATE: u16 = 6; pub const OP_NEGATE: u16 = 6;
pub const OP_PRINT: u16 = 7; pub const OP_PRINT: u16 = 7;
pub const OP_RETURN: u16 = 8; pub const OP_RETURN: u16 = 8;
pub const OP_TRUE: u16 = 9; // pub const OP_TRUE: u16 = 9;
pub const OP_FALSE: u16 = 10; // pub const OP_FALSE: u16 = 10; // obsolete, vacant space
pub const OP_AND: u16 = 11; pub const OP_AND: u16 = 11;
pub const OP_OR: u16 = 12; pub const OP_OR: u16 = 12;
pub const OP_NOT: u16 = 13; pub const OP_NOT: u16 = 13;
@ -186,7 +194,7 @@ pub const OP_BITXOR: u16 = 22;
pub const OP_SHR: u16 = 23; pub const OP_SHR: u16 = 23;
pub const OP_SHL: u16 = 24; pub const OP_SHL: u16 = 24;
pub const OP_POP: u16 = 25; pub const OP_POP: u16 = 25;
pub const OP_DEFINE: u16 = 26;// may be obsolete already pub const OP_DEFINE: u16 = 26; // may be obsolete already
pub const OP_GET: u16 = 27; pub const OP_GET: u16 = 27;
pub const OP_DEF_I32: u16 = 28; pub const OP_DEF_I32: u16 = 28;
pub const OP_DEF_I64: u16 = 29; pub const OP_DEF_I64: u16 = 29;