From a66bf2e16ea1f388e0512efe1f845f6b9afaa5f9 Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Tue, 28 Jan 2020 09:28:07 +0100 Subject: [PATCH] * implemented parser without error handling and without identifiers. It works now for arithmetic expressions. * Updated the way literals are stored in tokens so that they are no longer of type Any but proper enum Value types, for strings, numeric and boolean values. --- src/expression.rs | 52 ++++++++++ src/main.rs | 12 ++- src/parser.rs | 157 +++++++++++++++++++++++++++++ src/parser_tests.rs | 30 ++++++ src/scanner.rs | 20 ++-- src/{tests.rs => scanner_tests.rs} | 15 ++- src/tokens.rs | 62 ++++++------ 7 files changed, 301 insertions(+), 47 deletions(-) create mode 100644 src/expression.rs create mode 100644 src/parser.rs create mode 100644 src/parser_tests.rs rename src/{tests.rs => scanner_tests.rs} (86%) diff --git a/src/expression.rs b/src/expression.rs new file mode 100644 index 0000000..8749e2e --- /dev/null +++ b/src/expression.rs @@ -0,0 +1,52 @@ +use crate::tokens::{Token, Value}; + +pub trait Visitor { + fn visit_expr(&mut self, expr: &Expr) -> R; +} + +#[derive(Debug, PartialOrd, PartialEq)] +pub enum Expr { + Binary(Box, Token, Box), + Grouping(Box), + Literal(Value), + Unary(Token, Box), +} + +pub struct AstPrinter {} + +impl Visitor for AstPrinter { + fn visit_expr(&mut self, expr: &Expr) -> String { + return match expr { + Expr::Binary(left, operator, right) => { + self.parenthesize(&operator.lexeme, &[left, right]) + } + Expr::Grouping(expression) => { + self.parenthesize("group", &[expression]) + } + Expr::Literal(value) => { + format!("{:?}", value) + } + Expr::Unary(operator, right) => { + self.parenthesize(&operator.lexeme, &[right]) + } + }; + } +} + +impl AstPrinter { + fn parenthesize(&mut self, name: &str, expressions: &[&Expr]) -> String { + let mut buf = String::from("("); + buf.push_str(name); + buf.push_str(" "); + + let mut index = 0; + for expr in expressions { + if index > 0 { buf.push_str(" "); } + buf.push_str(&self.visit_expr(expr)); + index += 1; + } + + buf.push_str(")"); + buf + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c8e98d1..232fc38 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,12 +6,17 @@ use std::fs::File; use std::io::{self, BufRead, Read, Write}; use std::process; +use crate::expression::{Visitor, AstPrinter}; + mod scanner; mod tokens; mod keywords; +mod expression; +mod parser; #[cfg(test)] -mod tests; +mod scanner_tests; +mod parser_tests; /// main /// no arguments: run interactively @@ -84,9 +89,8 @@ fn run_prompt() { fn run(source: String) -> Result<&'static str, &'static str> { return match scanner::scan_tokens(source.as_str()) { Ok(tokens) => { - for token in tokens { - println!("{:?}", token); - } + let expr = parser::parse(tokens); + println!("{:?}", AstPrinter {}.visit_expr(&expr)); Ok("Ok") } Err(code) => { diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..fc4e86f --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,157 @@ +use crate::expression::Expr; +use crate::expression::Expr::*; +use crate::tokens::{Token, TokenType}; +use crate::tokens::TokenType::*; +use crate::tokens::Value::*; + +pub fn parse(tokens: Vec) -> Expr { + Parser::new(tokens).parse() +} + +struct Parser { + tokens: Vec, + current: usize, +} + +impl Parser { + fn new(tokens: Vec) -> Parser { + Parser { tokens, current: 0 } + } + + fn parse(&mut self) -> Expr { + self.expression() + } + + fn expression(&mut self) -> Expr { + self.equality() + } + + fn equality(&mut self) -> Expr { + let mut expr = self.comparison(); + + while self.match_token(&[BANGEQUAL, EQUALEQUAL]) { + let operator = self.previous(); + let right = self.comparison(); + expr = Binary(Box::new(expr), operator, Box::new(right)); + } + + expr + } + + fn comparison(&mut self) -> Expr { + let mut expr = self.addition(); + + while self.match_token(&[GREATER, GREATEREQUAL, LESS, LESSEQUAL]) { + let operator = self.previous(); + let right = self.addition(); + expr = Binary(Box::new(expr), operator, Box::new(right)); + } + + expr + } + + fn match_token(&mut self, tokens: &[TokenType]) -> bool { + for token in tokens { + if self.check(*token) { + self.advance(); + return true; + } + } + + false + } + + fn check(&self, token_type: TokenType) -> bool { + return if self.is_at_end() { + false + } else { + self.peek().token_type == token_type + }; + } + + fn peek(&self) -> Token { + return self.tokens[self.current].clone(); + } + + fn advance(&mut self) -> Token { + if !self.is_at_end() { + self.current += 1; + } + self.previous() + } + + fn is_at_end(&self) -> bool { + self.peek().token_type == EOF + } + + fn previous(&self) -> Token { + self.tokens[self.current - 1].clone() + } + + fn addition(&mut self) -> Expr { + let mut expr = self.multiplication(); + + while self.match_token(&[MINUS, PLUS]) { + let operator = self.previous(); + let right = self.multiplication(); + expr = Binary(Box::new(expr), operator, Box::new(right)); + } + + expr + } + + fn multiplication(&mut self) -> Expr { + let mut expr = self.unary(); + + while self.match_token(&[SLASH, STAR]) { + let operator = self.previous(); + let right = self.unary(); + expr = Binary(Box::new(expr), operator, Box::new(right)); + } + + return expr; + } + + fn unary(&mut self) -> Expr { + if self.match_token(&[BANG, MINUS]) { + let operator = self.previous(); + let right = self.unary(); + return Unary(operator, Box::new(right)); + } + + return self.primary(); + } + + fn primary(&mut self) -> Expr { + if self.match_token(&[FALSE]) { + return Literal(Boolean(false)); + } + if self.match_token(&[TRUE]) { + return Literal(Boolean(true)); + } + + if self.match_token(&[NIL]) { + return Literal(None); + } + if self.match_token(&[NUMBER, STRING]) { + return Literal(self.previous().literal); + } + + if self.match_token(&[LEFTPAREN]) { + let expr = self.expression(); + self.consume_token(RIGHTPAREN, "Expect ')' after expression."); + return Grouping(Box::new(expr)); + } else { + Literal(None) + } + } + + fn consume_token(&mut self, token_type: TokenType, _message: &str) -> Token { + if self.check(token_type) { + return self.advance(); + } + + panic!() + } +} + diff --git a/src/parser_tests.rs b/src/parser_tests.rs new file mode 100644 index 0000000..c965405 --- /dev/null +++ b/src/parser_tests.rs @@ -0,0 +1,30 @@ +use crate::expression::Expr::Binary; +use crate::expression::Expr::Literal; +use crate::parser::parse; +use crate::scanner::scan_tokens; +use crate::tokens::Token; +use crate::tokens::TokenType::PLUS; +use crate::tokens::Value::{None,Numeric}; + +#[test] +fn test_scan_empty_source() { + let tokens = scan_tokens("").unwrap(); + let expression = parse(tokens); + + assert_eq!(expression, Literal(None)); +} + +#[test] +fn test_scan_arithmetic() { + let tokens = scan_tokens("1+1").unwrap(); + let expression = parse(tokens); + + assert_eq!(expression, Binary(Box::new(Literal(Numeric(1.0))), + Token { + token_type: PLUS, + lexeme: String::from("+"), + literal: None, + line: 1, + }, + Box::new(Literal(Numeric(1.0))))); +} diff --git a/src/scanner.rs b/src/scanner.rs index 271c711..fb558f4 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,7 +1,5 @@ -use std::any::Any; - use crate::keywords::KEYWORDS; -use crate::tokens::{Token, TokenType}; +use crate::tokens::{Token, TokenType, Value}; use crate::tokens::TokenType::*; /// public function for scanning lox source @@ -16,8 +14,8 @@ pub fn scan_tokens(source: &str) -> Result, &'static str> { scanner.tokens.push(Token { token_type: EOF, - lexeme: "lexeme", - literal: Box::new(""), + lexeme: String::new(), + literal: Value::None, line: scanner.line, }); @@ -34,7 +32,7 @@ struct Scanner<'a> { source: &'a str, // the tokens that will be the output of the scan function - tokens: Vec>, + tokens: Vec, // start of unscanned source (updated after part of the source was scanned) start: usize, @@ -147,7 +145,7 @@ impl Scanner<'_> { } let value: f64 = self.source[self.start..self.current].parse().expect("not a number"); - self.add_token_literal(NUMBER, Box::new(value)); + self.add_token_literal(NUMBER, Value::Numeric(value)); } /// handle string literals @@ -167,7 +165,7 @@ impl Scanner<'_> { self.advance(); let value = String::from(&self.source[self.start + 1..self.current - 1]); - self.add_token_literal(STRING, Box::new(value)); + self.add_token_literal(STRING, Value::Text(value)); } } @@ -180,14 +178,14 @@ impl Scanner<'_> { /// adds a token of the given type fn add_token(&mut self, token_type: TokenType) { let text = &self.source[self.start..self.current]; - let token = Token { token_type: token_type, lexeme: text, literal: Box::new(""), line: self.line }; + let token = Token { token_type: token_type, lexeme: String::from(text), literal: Value::None, line: self.line }; self.tokens.push(token); } /// adds a token of the given type and content - fn add_token_literal(&mut self, token_type: TokenType, literal: Box) { + fn add_token_literal(&mut self, token_type: TokenType, literal: Value) { let text = &self.source[self.start..self.current]; - let token = Token { token_type: token_type, lexeme: text, literal: literal, line: self.line }; + let token = Token { token_type: token_type, lexeme: String::from(text), literal, line: self.line }; self.tokens.push(token); } diff --git a/src/tests.rs b/src/scanner_tests.rs similarity index 86% rename from src/tests.rs rename to src/scanner_tests.rs index 495e888..86a3d79 100644 --- a/src/tests.rs +++ b/src/scanner_tests.rs @@ -1,6 +1,7 @@ #[cfg(test)] use crate::scanner::scan_tokens; use crate::tokens::TokenType::*; +use crate::tokens::Value::{Numeric, Text}; #[test] fn test_scan_empty_source() { @@ -44,7 +45,12 @@ fn test_scan_string_literals() { let token = tokens.get(0).unwrap(); assert_eq!(token.token_type, STRING); assert_eq!(token.lexeme, "\"hello world\""); - assert_eq!(token.get_literal_as_string().unwrap(), "hello world"); + match token.literal.clone() { + Text(value) => { + assert_eq!(value, "hello world"); + } + _ => { assert_eq!(true,false, "token value != hello world") } + } } #[test] @@ -55,7 +61,12 @@ fn test_scan_numeric_literals() { let token = tokens.get(0).unwrap(); assert_eq!(token.token_type, NUMBER); assert_eq!(token.lexeme, "0.1"); - assert_eq!(token.get_literal_as_float().unwrap(), 0.1); + match token.literal { + Numeric(value) => { + assert_eq!(value, 0.1); + } + _ => { assert_eq!(true, false, "token value != 0.1") } + } } #[test] diff --git a/src/tokens.rs b/src/tokens.rs index e5cef2a..753e272 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,47 +1,49 @@ -use std::any::Any; use std::fmt; +#[derive(Clone, PartialOrd, PartialEq)] +pub enum Value { + Text(String), + Numeric(f64), + Boolean(bool), + None, +} + +impl fmt::Debug for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Value::Text(value) => { + write!(f, "{}", value.to_string()) + } + Value::Numeric(value) => { + write!(f, "{}", value) + } + Value::Boolean(value) => { + write!(f, "{}", value) + } + Value::None => { + write!(f, "Nil") + } + } + } +} + /// struct that contains a single token -pub struct Token<'a> { +#[derive(Debug, Clone, PartialOrd, PartialEq)] +pub struct Token { // the type pub token_type: TokenType, // the actual part of the code that resulted in this token - pub lexeme: &'a str, + pub lexeme: String, // numeric (ie 1,2, 1.0 etc) and alphanumeric (any quoted text) values - pub literal: Box, + pub literal: Value, // the line that contains the code for this token instance pub line: usize, } -impl Token<'_> { - pub fn get_literal_as_string(&self) -> Option<&str> { - self.literal.downcast_ref::().map(|s| s.as_str()) - } - - pub fn get_literal_as_float(&self) -> Option { - self.literal.downcast_ref::().map(|f| *f) - } -} - -impl fmt::Debug for Token<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let lit = match self.literal.downcast_ref::() { - Some(as_string) => { - as_string.to_string() - } - None => { - format!("{:?}", self.literal) - } - }; - - write!(f, "Token [ type: {:?}, lexeme: {}, literal: {}, line: {} ]", self.token_type, self.lexeme, lit, self.line) - } -} - -#[derive(Eq, PartialEq, Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd)] pub enum TokenType { // Single-character tokens. LEFTPAREN,