tipi-lang/src/ast_compiler.rs
2025-10-28 07:27:16 +01:00

651 lines
21 KiB
Rust

use crate::tokens::TokenType::{Bang, Bool, Char, Colon, Date, Eol, Equal, F32, F64, False, FloatingPoint, Fn, Greater, GreaterEqual, GreaterGreater, I32, I64, Identifier, If, Indent, Integer, LeftBracket, LeftParen, Less, LessEqual, LessLess, Let, ListType, MapType, Minus, Object, Plus, Print, RightParen, SingleRightArrow, Slash, Star, StringType, True, U32, U64, RightBracket};
use crate::tokens::{Token, TokenType};
use crate::value::Value;
use log::debug;
use std::collections::HashMap;
pub fn compile(tokens: Vec<Token>) -> anyhow::Result<Vec<Statement>> {
let mut compiler = AstCompiler::new(tokens);
compiler.compile(0)
}
#[derive(Debug, Clone)]
pub(crate) struct Function {
pub(crate) name: Token,
pub(crate) parameters: Vec<Parameter>,
pub(crate) return_type: TokenType,
pub(crate) body: Vec<Statement>,
}
struct AstCompiler {
tokens: Vec<Token>,
current: usize,
had_error: bool,
vars: Vec<Expression>,
indent: Vec<usize>,
functions: HashMap<String, Function>,
}
impl AstCompiler {
fn new(tokens: Vec<Token>) -> Self {
Self {
tokens,
current: 0,
had_error: false,
vars: vec![],
indent: vec![],
functions: HashMap::new(),
}
}
fn compile(&mut self, expected_indent: usize) -> anyhow::Result<Vec<Statement>> {
let mut statements = vec![];
while !self.is_at_end() {
let statement = self.indent(expected_indent)?;
if let Some(statement) = statement {
statements.push(statement);
} else {
break;
}
}
Ok(statements)
}
fn indent(&mut self, expected_indent: usize) -> anyhow::Result<Option<Statement>> {
// skip empty lines
while self.check(Eol) {
self.advance();
}
let mut indent_on_line = 0;
// keep track of indent level
while self.match_token(vec![Indent]) {
indent_on_line += 1;
}
if indent_on_line > expected_indent {
panic!(
"unexpected indent level {} vs {}",
indent_on_line, expected_indent
);
} else if indent_on_line < expected_indent {
self.indent.pop();
return Ok(None);
} else {
self.indent.push(indent_on_line);
Ok(Some(self.declaration()?))
}
}
fn declaration(&mut self) -> anyhow::Result<Statement> {
if self.match_token(vec![Fn]) {
self.function_declaration()
} else if self.match_token(vec![Let]) {
self.let_declaration()
} else {
self.statement()
}
}
fn function_declaration(&mut self) -> anyhow::Result<Statement> {
let name_token = self.consume(Identifier, "Expect function name.")?;
self.consume(LeftParen, "Expect '(' after function name.")?;
let mut parameters = vec![];
while !self.check(RightParen) {
if parameters.len() >= 25 {
return Err(anyhow::anyhow!("Too many parameters."));
}
let parm_name = self.consume(Identifier, "Expect parameter name.")?;
self.consume(Colon, "Expect : after parameter name")?;
let var_type = self.peek().token_type;
self.vars.push(Expression::Variable {
name: parm_name.lexeme.to_string(),
var_type,
line: parm_name.line,
});
self.advance();
parameters.push(Parameter {
name: parm_name,
var_type,
});
}
self.consume(RightParen, "Expect ')' after parameters.")?;
let return_type = if self.check(SingleRightArrow) {
self.consume(SingleRightArrow, "")?;
self.advance().token_type
} else {
TokenType::Void
};
self.consume(Colon, "Expect colon (:) after function declaration.")?;
self.consume(Eol, "Expect end of line.")?;
let current_indent = self.indent.last().unwrap();
let body = self.compile(current_indent + 1)?;
let function = Function {
name: name_token.clone(),
parameters,
return_type,
body,
};
let function_stmt = Statement::FunctionStmt {
function: function.clone(),
};
self.functions.insert(name_token.lexeme, function.clone());
Ok(function_stmt)
}
fn let_declaration(&mut self) -> anyhow::Result<Statement> {
let name_token = self.consume(Identifier, "Expect variable name.")?;
let declared_type = if self.check(Colon) {
self.advance();
Some(self.advance().token_type)
} else {
None
};
if self.match_token(vec![Equal]) {
let initializer = self.expression()?;
self.consume(Eol, "Expect end of line after initializer.")?;
let inferred_type = initializer.infer_type();
let var_type = match calculate_type(declared_type, inferred_type) {
Ok(var_type) => var_type,
Err(e) => {
println!("error at line {}", name_token.line);
self.had_error = true;
return Err(e);
}
};
self.vars.push(Expression::Variable {
name: name_token.lexeme.to_string(),
var_type,
line: name_token.line,
});
Ok(Statement::VarStmt {
name: name_token,
var_type,
initializer,
})
} else {
Err(anyhow::anyhow!("Uninitialized variables are not allowed."))?
}
}
fn statement(&mut self) -> anyhow::Result<Statement> {
if self.match_token(vec![Print]) {
self.print_statement()
} else {
self.expr_statement()
}
}
fn print_statement(&mut self) -> anyhow::Result<Statement> {
let expr = self.expression()?;
self.consume(Eol, "Expect end of line after print statement.")?;
Ok(Statement::PrintStmt { value: expr })
}
fn expr_statement(&mut self) -> anyhow::Result<Statement> {
let expr = self.expression()?;
self.consume(Eol, "Expect end of line after expression.")?;
Ok(Statement::ExpressionStmt { expression: expr })
}
fn expression(&mut self) -> anyhow::Result<Expression> {
self.or()
}
fn or(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.and()?;
self.binary(vec![TokenType::LogicalOr], expr)
}
fn and(&mut self) -> anyhow::Result<Expression> {
let expr = self.bit_and()?;
self.binary(vec![TokenType::LogicalAnd], expr)
}
fn bit_and(&mut self) -> anyhow::Result<Expression> {
let expr = self.bit_or()?;
self.binary(vec![TokenType::BitAnd], expr)
}
fn bit_or(&mut self) -> anyhow::Result<Expression> {
let expr = self.bit_xor()?;
self.binary(vec![TokenType::BitOr], expr)
}
fn bit_xor(&mut self) -> anyhow::Result<Expression> {
let expr = self.equality()?;
self.binary(vec![TokenType::BitXor], expr)
}
fn equality(&mut self) -> anyhow::Result<Expression> {
let expr = self.comparison()?;
self.binary(vec![TokenType::EqualEqual, TokenType::BangEqual], expr)
}
fn comparison(&mut self) -> anyhow::Result<Expression> {
let expr = self.bitshift()?;
self.binary(vec![Greater, GreaterEqual, Less, LessEqual], expr)
}
fn bitshift(&mut self) -> anyhow::Result<Expression> {
let mut expr = self.term()?;
self.binary(vec![GreaterGreater, LessLess], expr)
}
fn term(&mut self) -> anyhow::Result<Expression> {
let expr = self.factor()?;
self.binary(vec![Minus, Plus], expr)
}
fn factor(&mut self) -> anyhow::Result<Expression> {
let expr = self.unary()?;
self.binary(vec![Slash, Star], expr)
}
fn binary(
&mut self,
types: Vec<TokenType>,
mut expr: Expression,
) -> anyhow::Result<Expression> {
while self.match_token(types.clone()) {
let operator = self.previous().clone();
let right = self.comparison()?;
expr = Expression::Binary {
line: operator.line,
left: Box::new(expr),
operator,
right: Box::new(right),
};
}
Ok(expr)
}
fn unary(&mut self) -> anyhow::Result<Expression> {
if self.match_token(vec![Bang, Minus]) {
let operator = self.previous().clone();
let right = self.unary()?;
Ok(Expression::Unary {
line: self.peek().line,
operator,
right: Box::new(right),
})
} else {
self.primary()
}
}
fn primary(&mut self) -> anyhow::Result<Expression> {
debug!("primary {:?}", self.peek());
Ok(if self.match_token(vec![LeftBracket]) {
self.list()?
} else if self.match_token(vec![False]) {
Expression::Literal {
line: self.peek().line,
literaltype: Bool,
value: Value::Bool(false),
}
} else if self.match_token(vec![True]) {
Expression::Literal {
line: self.peek().line,
literaltype: Bool,
value: Value::Bool(true),
} //, FloatingPoint, Text
} else if self.match_token(vec![Integer]) {
Expression::Literal {
line: self.peek().line,
literaltype: Integer,
value: Value::I64(self.previous().lexeme.parse()?),
}
} else if self.match_token(vec![FloatingPoint]) {
Expression::Literal {
line: self.peek().line,
literaltype: FloatingPoint,
value: Value::F64(self.previous().lexeme.parse()?),
}
} else if self.match_token(vec![StringType]) {
Expression::Literal {
line: self.peek().line,
literaltype: StringType,
value: Value::String(self.previous().lexeme.to_string()),
}
} else if self.match_token(vec![LeftParen]) {
let expr = self.expression()?;
self.consume(RightParen, "Expect ')' after expression.")?;
Expression::Grouping {
line: self.peek().line,
expression: Box::new(expr),
}
} else {
let token = self.advance().clone();
debug!("{:?}", token);
if self.match_token(vec![LeftParen]) {
self.function_call(token.lexeme)?
} else {
self.variable_lookup(&token)?
}
})
}
fn list(&mut self) -> anyhow::Result<Expression> {
let mut list = vec![];
while !self.match_token(vec![RightBracket]){
list.push(self.expression()?);
if self.peek().token_type == TokenType::Comma {
self.advance();
} else {
self.consume(RightBracket, "Expect ']' after list.")?;
break;
}
}
Ok(Expression::List {
values: list, literaltype: ListType, line: self.peek().line},
)
}
fn variable_lookup(&mut self, token: &Token) -> anyhow::Result<Expression> {
let (var_name, var_type) = self
.vars
.iter()
.filter_map(|e| {
if let Expression::Variable { name, var_type, .. } = e {
Some((name, var_type))
} else {
None
}
})
.find(|e| e.0 == &token.lexeme)
.ok_or_else(|| return anyhow::anyhow!("Unknown variable: {:?}", token))?;
Ok(Expression::Variable {
name: var_name.to_string(),
var_type: var_type.clone(),
line: token.line,
})
}
fn function_call(&mut self, name: String) -> anyhow::Result<Expression> {
let function_name = self.functions.get(&name).unwrap().name.lexeme.clone();
let function = self.functions.get(&function_name).unwrap().clone();
let mut arguments = vec![];
while !self.match_token(vec![RightParen]) {
if arguments.len() >= 25 {
return Err(anyhow::anyhow!("Too many parameters."));
}
let arg = self.expression()?;
let arg_type = arg.infer_type();
if arg_type != function.parameters[arguments.len()].var_type {
return Err(anyhow::anyhow!(
"Incompatible argument types. Expected {}, found {}",
function.parameters[arguments.len()].var_type,
arg_type
));
}
arguments.push(arg);
if self.peek().token_type == TokenType::Comma {
self.advance();
} else {
self.consume(RightParen, "Expect ')' after arguments.")?;
break;
}
}
let return_type = self.functions.get(&name).unwrap().return_type;
Ok(Expression::FunctionCall {
line: self.peek().line,
name,
arguments,
return_type,
})
}
fn consume(&mut self, token_type: TokenType, message: &str) -> anyhow::Result<Token> {
if self.check(token_type) {
self.advance();
} else {
self.had_error = true;
return Err(anyhow::anyhow!(message.to_string()));
}
Ok(self.previous().clone())
}
fn match_token(&mut self, tokens: Vec<TokenType>) -> bool {
for tt in tokens {
if self.check(tt) {
self.advance();
return true;
}
}
false
}
fn check(&self, token_type: TokenType) -> bool {
if self.is_at_end() {
false
} else {
self.peek().token_type == token_type
}
}
fn peek(&self) -> &Token {
&self.tokens[self.current]
}
fn previous(&self) -> &Token {
&self.tokens[self.current - 1]
}
fn advance(&mut self) -> &Token {
if !self.is_at_end() {
self.current += 1;
}
&self.previous()
}
fn is_at_end(&self) -> bool {
self.peek().token_type == TokenType::Eof
}
}
fn calculate_type(
declared_type: Option<TokenType>,
inferred_type: TokenType,
) -> anyhow::Result<TokenType> {
println!(
"declared type {:?} inferred type: {:?}",
declared_type, inferred_type
);
Ok(if let Some(declared_type) = declared_type {
if declared_type != inferred_type {
match (declared_type, inferred_type) {
(I32, I64) => I32,
(U32, U64) => U32,
(F32, F64) => F32,
(F64, I64) => F64,
(U64, I64) => U64,
(U64, I32) => U64,
(StringType, _) => StringType, // meh, this all needs rigorous testing
_ => {
return Err(anyhow::anyhow!(
"Incompatible types. Expected {}, found {}",
declared_type,
inferred_type
));
}
}
} else {
declared_type
}
} else {
match inferred_type {
Integer | I64 => I64,
FloatingPoint => F64,
Bool => Bool,
Date => Date,
ListType => ListType,
MapType => MapType,
Object => Object,
_ => panic!("Unexpected type"),
}
})
}
#[derive(Debug, Clone)]
pub enum Statement {
ExpressionStmt {
expression: Expression,
},
VarStmt {
name: Token,
var_type: TokenType,
initializer: Expression,
},
PrintStmt {
value: Expression,
},
FunctionStmt {
function: Function,
},
}
impl Statement {
pub fn line(&self) -> usize {
match self {
Statement::ExpressionStmt { expression } => expression.line(),
Statement::VarStmt { name, .. } => name.line,
Statement::PrintStmt { value } => value.line(),
Statement::FunctionStmt { function, .. } => function.name.line,
}
}
}
#[derive(Debug, Clone)]
pub struct Parameter {
pub(crate) name: Token,
pub(crate) var_type: TokenType,
}
#[derive(Debug, Clone)]
pub enum Expression {
Binary {
line: usize,
left: Box<Expression>,
operator: Token,
right: Box<Expression>,
},
Unary {
line: usize,
operator: Token,
right: Box<Expression>,
},
Grouping {
line: usize,
expression: Box<Expression>,
},
Literal {
line: usize,
literaltype: TokenType,
value: Value,
},
List {
line: usize,
literaltype: TokenType,
values: Vec<Expression>,
},
Variable {
line: usize,
name: String,
var_type: TokenType,
},
FunctionCall {
line: usize,
name: String,
arguments: Vec<Expression>,
return_type: TokenType,
},
}
impl Expression {
pub fn line(&self) -> usize {
match self {
Self::Binary { line, .. } => *line,
Self::Unary { line, .. } => *line,
Self::Grouping { line, .. } => *line,
Self::Literal { line, .. } => *line,
Self::List { line, .. } => *line,
Self::Variable { line, .. } => *line,
Self::FunctionCall { line, .. } => *line,
}
}
pub fn infer_type(&self) -> TokenType {
match self {
Self::Binary {
left,
operator,
right,
..
} => {
let left_type = left.infer_type();
let right_type = right.infer_type();
if vec![Greater, Less, GreaterEqual, LessEqual].contains(&operator.token_type) {
Bool
} else if left_type == right_type {
// map to determined numeric type if yet undetermined (32 or 64 bits)
match left_type {
FloatingPoint => F64,
Integer => I64,
_ => left_type,
}
} else {
if let Plus = operator.token_type {
// includes string concatenation with numbers
// followed by type coercion to 64 bits for numeric types
debug!("coerce {} : {}", left_type, right_type);
match (left_type, right_type) {
(_, StringType) => StringType,
(StringType, _) => StringType,
(FloatingPoint, _) => F64,
(Integer, FloatingPoint) => F64,
(Integer, _) => I64,
(I64, Integer) => I64,
(F64, _) => F64,
(U64, U32) => U64,
(I64, I32) => I64,
// could add a date and a duration. future work
// could add a List and a value. also future work
// could add a Map and a tuple. Will I add tuple types? Future work!
_ => panic!("Unexpected coercion"),
}
// could have done some fall through here, but this will fail less gracefully,
// so if my thinking is wrong or incomplete it will panic
} else {
// type coercion to 64 bits for numeric types
debug!("coerce {} : {}", left_type, right_type);
match (left_type, right_type) {
(FloatingPoint, _) => F64,
(Integer, FloatingPoint) => F64,
(Integer, I64) => I64,
(I64, FloatingPoint) => F64,
(F64, _) => F64,
(U64, U32) => U64,
(I64, I32) => I64,
(I64, Integer) => I64,
_ => panic!("Unexpected coercion"),
}
}
}
}
Self::Grouping { expression,.. } => expression.infer_type(),
Self::Literal { literaltype, .. } => literaltype.clone(),
Self::List { literaltype, .. } => literaltype.clone(),
Self::Unary { right, .. } => right.infer_type(),
Self::Variable { var_type, .. } => var_type.clone(),
Self::FunctionCall { return_type, .. } => return_type.clone(),
}
}
}