* implemented parser without error handling and without identifiers. It works now for arithmetic expressions.

* Updated the way literals are stored in tokens so that they are no longer of type Any but proper enum Value types, for strings, numeric and boolean values.
Merge remote-tracking branch 'origin/master'
2020-01-28 09:28:07 +01:00 · 2020-01-24 14:16:29 +01:00 · 2020-01-24 14:16:10 +01:00 · 2020-01-24 14:13:15 +01:00 · 2020-01-24 14:12:30 +01:00 · 2020-01-24 14:12:12 +01:00
11 changed files with 572 additions and 55 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,6 +1,16 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "rust_lox"
 version = "0.1.0"
 dependencies = [
 "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [metadata]
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -5,3 +5,4 @@ authors = ["Sander Hautvast <shautvast@gmail.com>"]
 edition = "2018"
 [dependencies]
 lazy_static = "1.4.0"
--- a/README.md
+++ b/README.md
@ -1,3 +1,4 @@
-Trying to learn rust as well as writing interpreters.
+* Trying to learn rust as well as writing interpreters.
 * see https://www.craftinginterpreters.com/
-see https://www.craftinginterpreters.com/
+* the repo contains every iteration in a separate commit, so you can follow the development if you check them out individually
--- a/src/expression.rs
+++ b/src/expression.rs
@ -0,0 +1,52 @@
 use crate::tokens::{Token, Value};
 pub trait Visitor<R> {
    fn visit_expr(&mut self, expr: &Expr) -> R;
 }
 #[derive(Debug, PartialOrd, PartialEq)]
 pub enum Expr {
    Binary(Box<Expr>, Token, Box<Expr>),
    Grouping(Box<Expr>),
    Literal(Value),
    Unary(Token, Box<Expr>),
 }
 pub struct AstPrinter {}
 impl Visitor<String> for AstPrinter {
    fn visit_expr(&mut self, expr: &Expr) -> String {
        return match expr {
            Expr::Binary(left, operator, right) => {
                self.parenthesize(&operator.lexeme, &[left, right])
            }
            Expr::Grouping(expression) => {
                self.parenthesize("group", &[expression])
            }
            Expr::Literal(value) => {
                format!("{:?}", value)
            }
            Expr::Unary(operator, right) => {
                self.parenthesize(&operator.lexeme, &[right])
            }
        };
    }
 }
 impl AstPrinter {
    fn parenthesize(&mut self, name: &str, expressions: &[&Expr]) -> String {
        let mut buf = String::from("(");
        buf.push_str(name);
        buf.push_str(" ");
        let mut index = 0;
        for expr in expressions {
            if index > 0 { buf.push_str(" "); }
            buf.push_str(&self.visit_expr(expr));
            index += 1;
        }
        buf.push_str(")");
        buf
    }
 }
--- a/src/keywords.rs
+++ b/src/keywords.rs
@ -0,0 +1,27 @@
 use std::collections::HashMap;
 use crate::tokens::TokenType;
 use crate::tokens::TokenType::*;
 lazy_static! {
 pub static ref KEYWORDS: HashMap<&'static str, TokenType> = {
        let mut keywords = HashMap::new();
        keywords.insert("and", AND);
        keywords.insert("class", CLASS);
        keywords.insert("else", ELSE);
        keywords.insert("false", FALSE);
        keywords.insert("for", FOR);
        keywords.insert("fun", FUN);
        keywords.insert("if", IF);
        keywords.insert("nil", NIL);
        keywords.insert("or", OR);
        keywords.insert("print", PRINT);
        keywords.insert("return", RETURN);
        keywords.insert("super", SUPER);
        keywords.insert("this", THIS);
        keywords.insert("true", TRUE);
        keywords.insert("var", VAR);
        keywords.insert("while", WHILE);
        keywords
    };
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,10 +1,22 @@
 #[macro_use]
 extern crate lazy_static;
 use std::env;
 use std::fs::File;
 use std::io::{self, BufRead, Read, Write};
 use std::process;
 use crate::expression::{Visitor, AstPrinter};
 mod scanner;
 mod tokens;
 mod keywords;
 mod expression;
 mod parser;
 #[cfg(test)]
 mod scanner_tests;
 mod parser_tests;
 /// main
 /// no arguments: run interactively
@ -77,9 +89,8 @@ fn run_prompt() {
 fn run(source: String) -> Result<&'static str, &'static str> {
    return match scanner::scan_tokens(source.as_str()) {
        Ok(tokens) => {
-            for token in tokens {
+            let expr = parser::parse(tokens);
-                println!("{:?}", token);
+            println!("{:?}", AstPrinter {}.visit_expr(&expr));
            }
            Ok("Ok")
        }
        Err(code) => {
--- a/src/parser.rs
+++ b/src/parser.rs
@ -0,0 +1,157 @@
 use crate::expression::Expr;
 use crate::expression::Expr::*;
 use crate::tokens::{Token, TokenType};
 use crate::tokens::TokenType::*;
 use crate::tokens::Value::*;
 pub fn parse(tokens: Vec<Token>) -> Expr {
    Parser::new(tokens).parse()
 }
 struct Parser {
    tokens: Vec<Token>,
    current: usize,
 }
 impl Parser {
    fn new(tokens: Vec<Token>) -> Parser {
        Parser { tokens, current: 0 }
    }
    fn parse(&mut self) -> Expr {
        self.expression()
    }
    fn expression(&mut self) -> Expr {
        self.equality()
    }
    fn equality(&mut self) -> Expr {
        let mut expr = self.comparison();
        while self.match_token(&[BANGEQUAL, EQUALEQUAL]) {
            let operator = self.previous();
            let right = self.comparison();
            expr = Binary(Box::new(expr), operator, Box::new(right));
        }
        expr
    }
    fn comparison(&mut self) -> Expr {
        let mut expr = self.addition();
        while self.match_token(&[GREATER, GREATEREQUAL, LESS, LESSEQUAL]) {
            let operator = self.previous();
            let right = self.addition();
            expr = Binary(Box::new(expr), operator, Box::new(right));
        }
        expr
    }
    fn match_token(&mut self, tokens: &[TokenType]) -> bool {
        for token in tokens {
            if self.check(*token) {
                self.advance();
                return true;
            }
        }
        false
    }
    fn check(&self, token_type: TokenType) -> bool {
        return if self.is_at_end() {
            false
        } else {
            self.peek().token_type == token_type
        };
    }
    fn peek(&self) -> Token {
        return self.tokens[self.current].clone();
    }
    fn advance(&mut self) -> Token {
        if !self.is_at_end() {
            self.current += 1;
        }
        self.previous()
    }
    fn is_at_end(&self) -> bool {
        self.peek().token_type == EOF
    }
    fn previous(&self) -> Token {
        self.tokens[self.current - 1].clone()
    }
    fn addition(&mut self) -> Expr {
        let mut expr = self.multiplication();
        while self.match_token(&[MINUS, PLUS]) {
            let operator = self.previous();
            let right = self.multiplication();
            expr = Binary(Box::new(expr), operator, Box::new(right));
        }
        expr
    }
    fn multiplication(&mut self) -> Expr {
        let mut expr = self.unary();
        while self.match_token(&[SLASH, STAR]) {
            let operator = self.previous();
            let right = self.unary();
            expr = Binary(Box::new(expr), operator, Box::new(right));
        }
        return expr;
    }
    fn unary(&mut self) -> Expr {
        if self.match_token(&[BANG, MINUS]) {
            let operator = self.previous();
            let right = self.unary();
            return Unary(operator, Box::new(right));
        }
        return self.primary();
    }
    fn primary(&mut self) -> Expr {
        if self.match_token(&[FALSE]) {
            return Literal(Boolean(false));
        }
        if self.match_token(&[TRUE]) {
            return Literal(Boolean(true));
        }
        if self.match_token(&[NIL]) {
            return Literal(None);
        }
        if self.match_token(&[NUMBER, STRING]) {
            return Literal(self.previous().literal);
        }
        if self.match_token(&[LEFTPAREN]) {
            let expr = self.expression();
            self.consume_token(RIGHTPAREN, "Expect ')' after expression.");
            return Grouping(Box::new(expr));
        } else {
            Literal(None)
        }
    }
    fn consume_token(&mut self, token_type: TokenType, _message: &str) -> Token {
        if self.check(token_type) {
            return self.advance();
        }
        panic!()
    }
 }
--- a/src/parser_tests.rs
+++ b/src/parser_tests.rs
@ -0,0 +1,30 @@
 use crate::expression::Expr::Binary;
 use crate::expression::Expr::Literal;
 use crate::parser::parse;
 use crate::scanner::scan_tokens;
 use crate::tokens::Token;
 use crate::tokens::TokenType::PLUS;
 use crate::tokens::Value::{None,Numeric};
 #[test]
 fn test_scan_empty_source() {
    let tokens = scan_tokens("").unwrap();
    let expression = parse(tokens);
    assert_eq!(expression, Literal(None));
 }
 #[test]
 fn test_scan_arithmetic() {
    let tokens = scan_tokens("1+1").unwrap();
    let expression = parse(tokens);
    assert_eq!(expression, Binary(Box::new(Literal(Numeric(1.0))),
                                  Token {
                                      token_type: PLUS,
                                      lexeme: String::from("+"),
                                      literal: None,
                                      line: 1,
                                  },
                                  Box::new(Literal(Numeric(1.0)))));
 }
--- a/src/scanner.rs
+++ b/src/scanner.rs
@ -1,4 +1,5 @@
-use crate::tokens::{Token, TokenType};
+use crate::keywords::KEYWORDS;
 use crate::tokens::{Token, TokenType, Value};
 use crate::tokens::TokenType::*;
 /// public function for scanning lox source
@ -13,8 +14,8 @@ pub fn scan_tokens(source: &str) -> Result<Vec<Token>, &'static str> {
    scanner.tokens.push(Token {
        token_type: EOF,
-        lexeme: "lexeme",
+        lexeme: String::new(),
-        literal: Box::new(""),
+        literal: Value::None,
        line: scanner.line,
    });
@ -31,7 +32,7 @@ struct Scanner<'a> {
    source: &'a str,
    // the tokens that will be the output of the scan function
-    tokens: Vec<Token<'a>>,
+    tokens: Vec<Token>,
    // start of unscanned source (updated after part of the source was scanned)
    start: usize,
@ -99,7 +100,72 @@ impl Scanner<'_> {
            ' ' => {}
            '\t' => {}
            '\r' => {}
-            _ => {}
+            '\"' => self.string(),
            _ => {
                if next_char.is_digit(10) {
                    self.number();
                } else if is_alphabetic_or_underscore(next_char) {
                    self.identifier();
                } else {
                    self.report_error(self.line, "unexpected character");
                }
            }
        }
    }
    fn identifier(&mut self) {
        while is_alphanumeric(self.peek(0)) {
            self.advance();
        }
        let text = &self.source[self.start..self.current];
        match KEYWORDS.get(text) {
            Some(token_type) => {
                self.add_token(*token_type);
            }
            None => {
                self.add_token(TokenType::IDENTIFIER);
            }
        }
    }
    /// handle number literals
   /// advances while characters are considered part of the number
   /// finally adds a number token to the list.
    fn number(&mut self) {
        while self.peek(0).is_digit(10) {
            self.advance();
        }
        if self.peek(0) == '.' && self.peek(1).is_digit(10) {
            self.advance();
            while self.peek(0).is_digit(10) {
                self.advance();
            }
        }
        let value: f64 = self.source[self.start..self.current].parse().expect("not a number");
        self.add_token_literal(NUMBER, Value::Numeric(value));
    }
    /// handle string literals
    /// advances until a terminating double quote is found and then adds the string token to the list
    /// raises an interpreter error when the double-quote is not found and the end of the source has been reached
    fn string(&mut self) {
        while self.peek(0) != '\"' && !self.is_at_end() {
            if self.peek(0) == '\n' {
                self.line += 1;
            }
            self.advance();
        }
        if self.is_at_end() {
            self.report_error(self.line, "unterminated string");
        } else {
            self.advance();
            let value = String::from(&self.source[self.start + 1..self.current - 1]);
            self.add_token_literal(STRING, Value::Text(value));
        }
    }
@ -112,7 +178,14 @@ impl Scanner<'_> {
    /// adds a token of the given type
    fn add_token(&mut self, token_type: TokenType) {
        let text = &self.source[self.start..self.current];
-        let token = Token { token_type: token_type, lexeme: text, literal: Box::new(""), line: self.line };
+        let token = Token { token_type: token_type, lexeme: String::from(text), literal: Value::None, line: self.line };
        self.tokens.push(token);
    }
    /// adds a token of the given type and content
    fn add_token_literal(&mut self, token_type: TokenType, literal: Value) {
        let text = &self.source[self.start..self.current];
        let token = Token { token_type: token_type, lexeme: String::from(text), literal, line: self.line };
        self.tokens.push(token);
    }
@ -145,4 +218,19 @@ impl Scanner<'_> {
        self.current += 1;
        true
    }
    /// prints the error and sets the flag
    pub fn report_error(&mut self, line: usize, message: &str) {
        self.error_occured = true;
        println!("[line {} ] Error {} ", line, message);
    }
 }
 fn is_alphabetic_or_underscore(c: char) -> bool {
    c.is_alphabetic() || c == '_'
 }
 fn is_alphanumeric(c: char) -> bool {
    is_alphabetic_or_underscore(c) || c.is_digit(10)
 }
--- a/src/scanner_tests.rs
+++ b/src/scanner_tests.rs
@ -0,0 +1,105 @@
 #[cfg(test)]
 use crate::scanner::scan_tokens;
 use crate::tokens::TokenType::*;
 use crate::tokens::Value::{Numeric, Text};
 #[test]
 fn test_scan_empty_source() {
    let tokens = scan_tokens("").unwrap();
    assert_eq!(tokens.len(), 1);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, EOF);
    assert_eq!(token.line, 1);
 }
 #[test]
 fn test_scan_single_char_tokens() {
    let tokens = scan_tokens(">").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, GREATER);
    assert_eq!(token.lexeme, ">");
    let token = tokens.get(1).unwrap();
    assert_eq!(token.token_type, EOF);
 }
 #[test]
 fn test_scan_double_char_tokens() {
    let tokens = scan_tokens(">=").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, GREATEREQUAL);
    assert_eq!(token.lexeme, ">=");
 }
 #[test]
 fn test_scan_string_literals() {
    let tokens = scan_tokens("\"hello world\"").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, STRING);
    assert_eq!(token.lexeme, "\"hello world\"");
    match token.literal.clone() {
        Text(value) => {
            assert_eq!(value, "hello world");
        }
        _ => { assert_eq!(true,false, "token value != hello world") }
    }
 }
 #[test]
 fn test_scan_numeric_literals() {
    let tokens = scan_tokens("0.1").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, NUMBER);
    assert_eq!(token.lexeme, "0.1");
    match token.literal {
        Numeric(value) => {
            assert_eq!(value, 0.1);
        }
        _ => { assert_eq!(true, false, "token value != 0.1") }
    }
 }
 #[test]
 fn test_keywords() {
    let tokens = scan_tokens("fun").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, FUN);
 }
 #[test]
 fn test_identifiers() {
    let tokens = scan_tokens("a").unwrap();
    assert_eq!(tokens.len(), 2);
    let token = tokens.get(0).unwrap();
    assert_eq!(token.token_type, IDENTIFIER);
 }
 #[test]
 fn test_expression() {
    let tokens = scan_tokens("if a == 1 {b=\"hello world\"}").unwrap();
    assert_eq!(tokens.len(), 10);
    assert_eq!(tokens.get(0).unwrap().token_type, IF);
    assert_eq!(tokens.get(1).unwrap().token_type, IDENTIFIER);
    assert_eq!(tokens.get(2).unwrap().token_type, EQUALEQUAL);
    assert_eq!(tokens.get(3).unwrap().token_type, NUMBER);
    assert_eq!(tokens.get(4).unwrap().token_type, LEFTBRACE);
    assert_eq!(tokens.get(5).unwrap().token_type, IDENTIFIER);
    assert_eq!(tokens.get(6).unwrap().token_type, EQUAL);
    assert_eq!(tokens.get(7).unwrap().token_type, STRING);
    assert_eq!(tokens.get(8).unwrap().token_type, RIGHTBRACE);
    assert_eq!(tokens.get(9).unwrap().token_type, EOF);
 }
--- a/src/tokens.rs
+++ b/src/tokens.rs
@ -1,60 +1,95 @@
 use std::any::Any;
 use std::fmt;
 #[derive(Clone, PartialOrd, PartialEq)]
 pub enum Value {
    Text(String),
    Numeric(f64),
    Boolean(bool),
    None,
 }
 impl fmt::Debug for Value {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Value::Text(value) => {
                write!(f, "{}", value.to_string())
            }
            Value::Numeric(value) => {
                write!(f, "{}", value)
            }
            Value::Boolean(value) => {
                write!(f, "{}", value)
            }
            Value::None => {
                write!(f, "Nil")
            }
        }
    }
 }
 /// struct that contains a single token
-pub struct Token<'a> {
+#[derive(Debug, Clone, PartialOrd, PartialEq)]
 pub struct Token {
    // the type
-    pub lexeme: &'a str,
+    pub token_type: TokenType,
    // the actual part of the code that resulted in this token
-    pub literal: Box<dyn Any>,
+    pub lexeme: String,
    // numeric (ie 1,2, 1.0 etc) and alphanumeric (any quoted text) values
-    pub line: usize,
+    pub literal: Value,
    // the line that contains the code for this token instance
-    pub token_type: TokenType,
+    pub line: usize,
 }
-impl fmt::Debug for Token<'_> {
+#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd)]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let lit = match self.literal.downcast_ref::<String>() {
            Some(as_string) => {
                as_string.to_string()
            }
            None => {
                format!("{:?}", self.literal)
            }
        };
        write!(f, "Token [ type: {:?}, lexeme: {}, literal: {}, line: {} ]", self.token_type, self.lexeme, lit, self.line)
    }
 }
 #[derive(Debug, Clone, Copy)]
 pub enum TokenType {
    // Single-character tokens.
-    LEFTPAREN,  // (
+    LEFTPAREN,
-    RIGHTPAREN, // )
+    RIGHTPAREN,
-    LEFTBRACE,  // [
+    LEFTBRACE,
-    RIGHTBRACE, // ]
+    RIGHTBRACE,
-    COMMA,      // ,
+    COMMA,
-    DOT,        // .
+    DOT,
-    MINUS,      // -
+    MINUS,
-    PLUS,       // +
+    PLUS,
-    SEMICOLON,  // ;
+    SEMICOLON,
-    STAR,       // *
+    STAR,
-    SLASH,      // /
+    SLASH,
    // One or two character tokens.
-    BANG,           // !
+    BANG,
-    BANGEQUAL,      // !=
+    BANGEQUAL,
-    EQUAL,          // =
+    EQUAL,
-    EQUALEQUAL,     // ==
+    EQUALEQUAL,
-    GREATER,        // >
+    GREATER,
-    GREATEREQUAL,   // >=
+    GREATEREQUAL,
-    LESS,           // <
+    LESS,
-    LESSEQUAL,      // <=
+    LESSEQUAL,
-    EOF         // end of file
+    // Literals.
    STRING,
    NUMBER,
    IDENTIFIER,
    // Keywords.
    AND,
    CLASS,
    ELSE,
    FALSE,
    FUN,
    FOR,
    IF,
    NIL,
    OR,
    PRINT,
    RETURN,
    SUPER,
    THIS,
    TRUE,
    VAR,
    WHILE,
    EOF,         // end of file
 }
Author	SHA1	Message	Date
Sander Hautvast	a66bf2e16e	* implemented parser without error handling and without identifiers. It works now for arithmetic expressions. * Updated the way literals are stored in tokens so that they are no longer of type Any but proper enum Value types, for strings, numeric and boolean values.	2020-01-28 09:28:07 +01:00
Sander Hautvast	9639ca051b	Merge remote-tracking branch 'origin/master'	2020-01-24 14:16:29 +01:00
Sander Hautvast	ff416f0f40	FIX messed up comments in tokens.rs	2020-01-24 14:16:10 +01:00
Sander Hautvast	365a17dcf1	Update README.md	2020-01-24 14:13:15 +01:00
Sander Hautvast	05a3eb59b3	Update README.md	2020-01-24 14:12:30 +01:00
Sander Hautvast	a660831aea	Update README.md	2020-01-24 14:12:12 +01:00
Sander Hautvast	a1f656199c	scanner now recognizes complete expressions, adding identifiers and keywords	2020-01-24 14:07:13 +01:00
Sander Hautvast	071f584e92	scanner now recognizes numeric (f64) literals	2020-01-24 13:50:12 +01:00
Sander Hautvast	650a31889b	scanner now recognizes string literals (text between double-quotes)	2020-01-24 13:42:43 +01:00
Sander Hautvast	92e86032ca	added unittests for the functionality sofar	2020-01-24 13:17:16 +01:00