housekeeping

This commit is contained in:
Shautvast 2025-10-28 07:29:40 +01:00
parent a8850cc547
commit a714c37e79
6 changed files with 557 additions and 35 deletions

4
.gitignore vendored
View file

@ -1,2 +1,4 @@
/target /target
.DS_Store .DS_Store
.idea
*.iml

8
.idea/.gitignore generated vendored
View file

@ -1,8 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

12
.idea/crudlang.iml generated
View file

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="EMPTY_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/examples" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

8
.idea/modules.xml generated
View file

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/crudlang.iml" filepath="$PROJECT_DIR$/.idea/crudlang.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated
View file

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

554
src/compiler.rs.txt Normal file
View file

@ -0,0 +1,554 @@
use crate::chunk::Chunk;
use crate::scanner::scan;
use crate::tokens::{Token, TokenType};
use crate::value::Value;
use crate::vm::{
OP_ADD, OP_BITAND, OP_BITOR, OP_BITXOR, OP_CONSTANT, OP_DEF_BOOL, OP_DEF_CHAR, OP_DEF_DATE,
OP_DEF_F64, OP_DEF_I32, OP_DEF_I64, OP_DEF_LIST, OP_DEF_MAP, OP_DEF_STRING, OP_DEF_STRUCT,
OP_DEFINE, OP_DIVIDE, OP_EQUAL, OP_GET, OP_GREATER, OP_GREATER_EQUAL, OP_LESS, OP_LESS_EQUAL,
OP_MULTIPLY, OP_NEGATE, OP_NOT, OP_POP, OP_PRINT, OP_RETURN, OP_SHL, OP_SHR, OP_SUBTRACT,
};
use anyhow::anyhow;
use std::collections::HashMap;
use std::sync::LazyLock;
use tracing::debug;
macro_rules! parse_num {
($s:ident, $variant:ident, $number:ident) => {{
$s.typestack.push(TokenType::$variant);
Value::$variant($number.parse()?)
}};
}
pub fn compile(source: &str) -> anyhow::Result<Chunk> {
let tokens = scan(source);
debug!("Scanned tokens: {:?}", tokens);
let mut compiler = Compiler {
source: source.lines().map(|s| s.to_string()).collect(),
chunk: Chunk::new("main"),
previous_token: &tokens[0],
current_token: &tokens[0],
tokens: &tokens,
current: 0,
typestack: vec![],
locals: vec![],
previous: 0,
had_error: false,
};
compiler.compile()
}
struct Compiler<'a> {
source: Vec<String>,
chunk: Chunk,
tokens: &'a Vec<Token>,
current: usize,
previous_token: &'a Token,
current_token: &'a Token,
typestack: Vec<TokenType>,
locals: Vec<String>,
previous: usize,
had_error: bool,
}
impl<'a> Compiler<'a> {
fn compile(mut self) -> anyhow::Result<Chunk> {
while !self.match_token(TokenType::Eof) {
self.declaration()?;
}
// self.expression()?;
// self.consume(TokenType::Eof, "Expect end of expression.")?;
self.emit_byte(OP_RETURN);
Ok(self.chunk)
}
fn declaration(&mut self) -> anyhow::Result<()> {
if self.match_token(TokenType::Let) {
self.let_declaration()
} else {
self.statement()
}
}
fn let_declaration(&mut self) -> anyhow::Result<()> {
let index = self.parse_variable("Expect variable name")?;
let mut declared_type = None;
if self.check(TokenType::Colon) {
self.consume(TokenType::Colon, "must not happen")?;
match self.current_token.token_type {
TokenType::I32
| TokenType::I64
| TokenType::U32
| TokenType::U64
| TokenType::Date
| TokenType::StringType
| TokenType::Char
| TokenType::Bool
| TokenType::ListType
| TokenType::MapType => declared_type = Some(self.current_token.token_type),
_ => return Err(anyhow!("Invalid type {:?}", self.current_token.token_type)),
}
self.advance()?;
}
if self.match_token(TokenType::Equal) {
self.expression(declared_type)?;
let derived_type = Some(&self.previous_token.token_type);
self.consume(TokenType::Eol, "Expect end of line")?;
self.define_variable(declared_type, derived_type, index)?;
} else {
return Err(anyhow!(
"You cannot declare a variable without initializing it."
));
}
Ok(())
}
fn parse_variable(&mut self, message: &str) -> anyhow::Result<usize> {
self.consume(TokenType::Identifier, message)?;
self.identifier_constant(self.previous_token)
}
fn identifier_constant(&mut self, token: &Token) -> anyhow::Result<usize> {
let name = token.lexeme.clone();
let index = self.chunk.add_constant(Value::String(name));
Ok(index)
}
fn define_variable(
&mut self,
var_type: Option<TokenType>,
derived_type: Option<&TokenType>,
index: usize,
) -> anyhow::Result<()> {
let def_op = match var_type {
Some(TokenType::I32) => OP_DEF_I32,
Some(TokenType::I64) => OP_DEF_I64,
Some(TokenType::U32) => OP_DEF_I64,
Some(TokenType::U64) => OP_DEF_I64,
Some(TokenType::Date) => OP_DEF_DATE,
Some(TokenType::StringType) => OP_DEF_STRING,
Some(TokenType::Char) => OP_DEF_CHAR,
Some(TokenType::Bool) => OP_DEF_BOOL,
Some(TokenType::ListType) => OP_DEF_LIST,
Some(TokenType::MapType) => OP_DEF_MAP,
Some(TokenType::Object) => OP_DEF_STRUCT,
_ => match derived_type {
Some(TokenType::StringType) => OP_DEF_STRING,
Some(TokenType::Bool) => OP_DEF_BOOL,
Some(TokenType::Char) => OP_DEF_CHAR,
Some(TokenType::F64) => OP_DEF_F64,
Some(TokenType::I64) => OP_DEF_I64,
Some(TokenType::ListType) => OP_DEF_LIST,
Some(TokenType::MapType) => OP_DEF_MAP,
_ => OP_DEFINE,
},
};
self.emit_bytes(def_op, index as u16);
Ok(())
}
fn statement(&mut self) -> anyhow::Result<()> {
if self.match_token(TokenType::Print) {
self.print_statement()
} else {
self.expression_statement()
}
}
fn expression_statement(&mut self) -> anyhow::Result<()> {
debug!("expression statement");
self.expression(None)?;
self.emit_byte(OP_POP);
Ok(())
}
fn print_statement(&mut self) -> anyhow::Result<()> {
self.expression(None)?;
self.consume(
TokenType::Eol,
"No further statements expected. Please start on a new line after the first one.\n",
)?;
self.emit_byte(OP_PRINT);
Ok(())
}
fn advance(&mut self) -> anyhow::Result<()> {
if self.current < self.tokens.len() - 1 {
self.previous = self.current;
self.previous_token = &self.tokens[self.previous];
self.current += 1;
self.current_token = &self.tokens[self.current];
}
if let TokenType::Error = self.current_token.token_type {
self.had_error = true;
Err(anyhow!(
"Error at {} on line {}",
self.current_token.lexeme,
self.current_token.line
))
} else {
Ok(())
}
}
fn consume(&mut self, token_type: TokenType, message: &str) -> anyhow::Result<()> {
if token_type == self.current_token.token_type {
self.advance()
} else {
Err(anyhow!(
r#"{} at line {}: "{}""#,
message,
self.current_token.line + 1,
self.source[self.current_token.line]
))
}
}
fn match_token(&mut self, token_type: TokenType) -> bool {
if !self.check(token_type) {
false
} else {
self.advance().expect("token expected");
true
}
}
fn check(&mut self, token_type: TokenType) -> bool {
self.current_token.token_type == token_type
}
fn expression(&mut self, expected_type: Option<TokenType>) -> anyhow::Result<()> {
self.parse_precedence(PREC_ASSIGNMENT, expected_type)?;
Ok(())
}
fn parse_precedence(
&mut self,
precedence: usize,
expected_type: Option<TokenType>,
) -> anyhow::Result<()> {
self.advance()?;
let rule = get_rule(&self.previous_token.token_type);
debug!("Precedence rule: {:?}", rule);
if let Some(prefix) = rule.prefix {
prefix(self, expected_type)?;
while precedence <= get_rule(&self.current_token.token_type).precedence {
self.advance()?;
let infix_rule = get_rule(&self.previous_token.token_type).infix;
if let Some(infix) = infix_rule {
infix(self, expected_type)?;
}
}
} else {
return Err(anyhow!("Expect expression."));
}
Ok(())
}
fn emit_byte(&mut self, byte: u16) {
self.chunk.add(byte, self.previous_token.line);
}
fn emit_bytes(&mut self, b1: u16, b2: u16) {
self.emit_byte(b1);
self.emit_byte(b2);
}
fn emit_constant(&mut self, value: Value) {
let index = self.chunk.add_constant(value);
self.emit_bytes(OP_CONSTANT, index as u16);
}
}
type ParseFn = fn(&mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()>;
#[derive(Debug)]
struct Rule {
prefix: Option<ParseFn>,
infix: Option<ParseFn>,
precedence: usize,
}
impl Rule {
fn new(prefix: Option<ParseFn>, infix: Option<ParseFn>, precedence: usize) -> Self {
Self {
prefix,
infix,
precedence,
}
}
}
fn number(s: &mut Compiler, mut expected_type: Option<TokenType>) -> anyhow::Result<()> {
debug!("number: expected type {:?}", expected_type);
// coerce unknown numeric type to the expected type of the expression if any
if let None = expected_type {
if !s.typestack.is_empty() {
expected_type = Some(*s.typestack.last().unwrap());
}
}
let number = &s.previous_token.lexeme;
let value = if let Some(expected_type) = expected_type {
match expected_type {
TokenType::I32 => parse_num!(s, I32, number),
TokenType::I64 => parse_num!(s, I64, number),
TokenType::U32 => parse_num!(s, U32, number),
TokenType::U64 => parse_num!(s, U64, number),
TokenType::F32 => parse_num!(s, F32, number),
TokenType::F64 => parse_num!(s, F64, number),
_ => {
return Err(anyhow!(
"Invalid type: expected {} value, got {}({})",
expected_type,
&s.previous_token.token_type,
number
));
}
}
} else {
match s.previous_token.token_type {
TokenType::Integer => Value::I64(number.parse()?),
TokenType::FloatingPoint => Value::F64(number.parse()?),
_ => panic!("I did not think this would happen")
}
};
s.emit_constant(value);
Ok(())
}
fn literal(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> {
let actual_type = &s.previous_token.token_type;
if let Some(expected_type) = expected_type {
match (actual_type, expected_type) {
(TokenType::False, TokenType::Bool) => {
s.typestack.push(TokenType::Bool);
s.emit_constant(Value::Bool(false))
}
(TokenType::True, TokenType::Bool) => {
s.typestack.push(TokenType::Bool);
s.emit_constant(Value::Bool(true))
}
(TokenType::StringType, TokenType::StringType) => {
s.typestack.push(TokenType::StringType);
s.emit_constant(Value::String(s.previous_token.lexeme.clone()))
}
//list
//map
//struct value
_ => {
return Err(anyhow!(
"Invalid type: expected {} value, got {}({})",
expected_type,
&s.previous_token.token_type,
s.previous_token.lexeme
));
}
}
} else {
match actual_type {
TokenType::False => s.emit_constant(Value::Bool(false)),
TokenType::True => s.emit_constant(Value::Bool(true)),
TokenType::StringType => s.emit_constant(Value::String(s.previous_token.lexeme.clone())),
_ => {}
}
}
Ok(())
}
fn skip(s: &mut Compiler, _expected_type: Option<TokenType>) -> anyhow::Result<()> {
Ok(())
}
fn grouping(s: &mut Compiler, _expected_type: Option<TokenType>) -> anyhow::Result<()> {
s.expression(None)?;
s.consume(TokenType::RightParen, "Expect ')' after expression.")
}
fn unary(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> {
let operator_type = s.previous_token.token_type;
s.parse_precedence(PREC_UNARY, None)?;
match operator_type {
TokenType::Minus => {
s.emit_byte(OP_NEGATE);
}
TokenType::Bang => {
s.emit_byte(OP_NOT);
}
_ => unimplemented!("unary other than ! and -"),
}
Ok(())
}
fn binary(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> {
let operator_type = &s.previous_token.token_type;
debug!("operator {:?}", operator_type);
debug!("expected type {:?}", expected_type);
let rule = get_rule(operator_type);
s.parse_precedence(rule.precedence + 1, None)?;
match operator_type {
TokenType::Plus => s.emit_byte(OP_ADD),
TokenType::Minus => s.emit_byte(OP_SUBTRACT),
TokenType::Star => s.emit_byte(OP_MULTIPLY),
TokenType::Slash => s.emit_byte(OP_DIVIDE),
TokenType::BitAnd => s.emit_byte(OP_BITAND),
TokenType::BitOr => s.emit_byte(OP_BITOR),
TokenType::BitXor => s.emit_byte(OP_BITXOR),
TokenType::GreaterGreater => s.emit_byte(OP_SHR),
TokenType::LessLess => s.emit_byte(OP_SHL),
TokenType::EqualEqual => s.emit_byte(OP_EQUAL),
TokenType::Greater => s.emit_byte(OP_GREATER),
TokenType::GreaterEqual => s.emit_byte(OP_GREATER_EQUAL),
TokenType::Less => s.emit_byte(OP_LESS),
TokenType::LessEqual => s.emit_byte(OP_LESS_EQUAL),
_ => unimplemented!("binary other than plus, minus, star, slash"),
}
Ok(())
}
fn variable(s: &mut Compiler, expected_type: Option<TokenType>) -> anyhow::Result<()> {
let index = s.identifier_constant(s.previous_token)?;
s.emit_bytes(OP_GET, index as u16);
Ok(())
}
fn get_rule(operator_type: &TokenType) -> &'static Rule {
debug!("{:?}", operator_type);
RULES.get(operator_type).unwrap()
}
static RULES: LazyLock<HashMap<TokenType, Rule>> = LazyLock::new(|| {
let mut rules: HashMap<TokenType, Rule> = HashMap::new();
rules.insert(TokenType::Bang, Rule::new(Some(unary), None, PREC_UNARY));
rules.insert(TokenType::BangEqual, Rule::new(None, None, PREC_EQUALITY));
rules.insert(TokenType::BitOr, Rule::new(None, Some(binary), PREC_BITOR));
rules.insert(
TokenType::BitXor,
Rule::new(None, Some(binary), PREC_BITXOR),
);
rules.insert(TokenType::Colon, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Comma, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Date, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Dot, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Else, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Eof, Rule::new(Some(skip), None, PREC_NONE));
rules.insert(TokenType::Eol, Rule::new(Some(skip), None, PREC_NONE));
rules.insert(TokenType::Equal, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::False, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Fn, Rule::new(None, None, PREC_NONE));
rules.insert(
TokenType::EqualEqual,
Rule::new(None, Some(binary), PREC_EQUALITY),
);
rules.insert(TokenType::False, Rule::new(Some(literal), None, PREC_NONE));
rules.insert(TokenType::FloatingPoint, Rule::new(Some(number), None, PREC_NONE));
rules.insert(
TokenType::Greater,
Rule::new(None, Some(binary), PREC_COMPARISON),
);
rules.insert(
TokenType::GreaterEqual,
Rule::new(None, Some(binary), PREC_COMPARISON),
);
rules.insert(
TokenType::GreaterGreater,
Rule::new(None, Some(binary), PREC_BITSHIFT),
);
rules.insert(TokenType::I32, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::I64, Rule::new(None, None, PREC_NONE));
rules.insert(
TokenType::Identifier,
Rule::new(Some(variable), None, PREC_NONE),
);
rules.insert(TokenType::Integer, Rule::new(Some(number), None, PREC_NONE));
rules.insert(TokenType::Indent, Rule::new(Some(skip), None, PREC_NONE));
rules.insert(TokenType::LeftBrace, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::LeftBracket, Rule::new(None, None, PREC_NONE));
rules.insert(
TokenType::LeftParen,
Rule::new(Some(grouping), None, PREC_NONE),
);
rules.insert(
TokenType::Less,
Rule::new(None, Some(binary), PREC_COMPARISON),
);
rules.insert(
TokenType::LessEqual,
Rule::new(None, Some(binary), PREC_COMPARISON),
);
rules.insert(
TokenType::LessLess,
Rule::new(None, Some(binary), PREC_BITSHIFT),
);
rules.insert(
TokenType::LogicalAnd,
Rule::new(None, Some(binary), PREC_AND),
);
rules.insert(TokenType::LogicalOr, Rule::new(None, Some(binary), PREC_OR));
rules.insert(
TokenType::Minus,
Rule::new(Some(unary), Some(binary), PREC_TERM),
);
rules.insert(TokenType::Plus, Rule::new(None, Some(binary), PREC_TERM));
rules.insert(TokenType::Print, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Return, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::RightParen, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::RightBrace, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::RightBracket, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Slash, Rule::new(None, Some(binary), PREC_FACTOR));
rules.insert(TokenType::Star, Rule::new(None, Some(binary), PREC_FACTOR));
rules.insert(TokenType::StringType, Rule::new(Some(literal), None, PREC_NONE));
rules.insert(
TokenType::BitAnd,
Rule::new(None, Some(binary), PREC_BITAND),
);
rules.insert(TokenType::StringType, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::Struct, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::True, Rule::new(Some(literal), None, PREC_NONE));
rules.insert(TokenType::U32, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::U64, Rule::new(None, None, PREC_NONE));
rules.insert(TokenType::While, Rule::new(None, None, PREC_NONE));
rules
});
const PREC_NONE: usize = 0;
const PREC_ASSIGNMENT: usize = 1;
const PREC_OR: usize = 2;
const PREC_AND: usize = 3;
const PREC_BITAND: usize = 4;
const PREC_BITOR: usize = 5;
const PREC_BITXOR: usize = 6;
const PREC_EQUALITY: usize = 7;
const PREC_COMPARISON: usize = 8;
const PREC_BITSHIFT: usize = 9;
const PREC_TERM: usize = 10;
const PREC_FACTOR: usize = 11;
const PREC_UNARY: usize = 12;
const PREC_CALL: usize = 13;
const PREC_PRIMARY: usize = 14;
enum ValueType {
DateType,
BoolType,
CharType,
F32Type,
F64Type,
I32Type,
I64Type,
ObjectType,
U32Type,
U64Type,
StringType,
ListType,
MapType,
}