From a1f656199c0736c034ac21215265e09ce8a4ab7c Mon Sep 17 00:00:00 2001 From: Sander Hautvast Date: Fri, 24 Jan 2020 14:07:13 +0100 Subject: [PATCH] scanner now recognizes complete expressions, adding identifiers and keywords --- Cargo.lock | 10 ++++++++++ Cargo.toml | 1 + src/keywords.rs | 27 +++++++++++++++++++++++++++ src/main.rs | 4 ++++ src/scanner.rs | 27 +++++++++++++++++++++++++++ src/tests.rs | 35 +++++++++++++++++++++++++++++++++++ src/tokens.rs | 40 +++++++++++++++++++++------------------- 7 files changed, 125 insertions(+), 19 deletions(-) create mode 100644 src/keywords.rs diff --git a/Cargo.lock b/Cargo.lock index c2fa458..eb8ed04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,16 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rust_lox" version = "0.1.0" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] +[metadata] +"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" diff --git a/Cargo.toml b/Cargo.toml index b71e144..c772f33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ authors = ["Sander Hautvast "] edition = "2018" [dependencies] +lazy_static = "1.4.0" \ No newline at end of file diff --git a/src/keywords.rs b/src/keywords.rs new file mode 100644 index 0000000..946f84e --- /dev/null +++ b/src/keywords.rs @@ -0,0 +1,27 @@ +use std::collections::HashMap; + +use crate::tokens::TokenType; +use crate::tokens::TokenType::*; + +lazy_static! { +pub static ref KEYWORDS: HashMap<&'static str, TokenType> = { + let mut keywords = HashMap::new(); + keywords.insert("and", AND); + keywords.insert("class", CLASS); + keywords.insert("else", ELSE); + keywords.insert("false", FALSE); + keywords.insert("for", FOR); + keywords.insert("fun", FUN); + keywords.insert("if", IF); + keywords.insert("nil", NIL); + keywords.insert("or", OR); + keywords.insert("print", PRINT); + keywords.insert("return", RETURN); + keywords.insert("super", SUPER); + keywords.insert("this", THIS); + keywords.insert("true", TRUE); + keywords.insert("var", VAR); + keywords.insert("while", WHILE); + keywords + }; +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index d3e77f0..c8e98d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,6 @@ +#[macro_use] +extern crate lazy_static; + use std::env; use std::fs::File; use std::io::{self, BufRead, Read, Write}; @@ -5,6 +8,7 @@ use std::process; mod scanner; mod tokens; +mod keywords; #[cfg(test)] mod tests; diff --git a/src/scanner.rs b/src/scanner.rs index 351018c..271c711 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,5 +1,6 @@ use std::any::Any; +use crate::keywords::KEYWORDS; use crate::tokens::{Token, TokenType}; use crate::tokens::TokenType::*; @@ -105,6 +106,8 @@ impl Scanner<'_> { _ => { if next_char.is_digit(10) { self.number(); + } else if is_alphabetic_or_underscore(next_char) { + self.identifier(); } else { self.report_error(self.line, "unexpected character"); } @@ -112,6 +115,21 @@ impl Scanner<'_> { } } + fn identifier(&mut self) { + while is_alphanumeric(self.peek(0)) { + self.advance(); + } + let text = &self.source[self.start..self.current]; + match KEYWORDS.get(text) { + Some(token_type) => { + self.add_token(*token_type); + } + None => { + self.add_token(TokenType::IDENTIFIER); + } + } + } + /// handle number literals /// advances while characters are considered part of the number /// finally adds a number token to the list. @@ -209,3 +227,12 @@ impl Scanner<'_> { println!("[line {} ] Error {} ", line, message); } } + + +fn is_alphabetic_or_underscore(c: char) -> bool { + c.is_alphabetic() || c == '_' +} + +fn is_alphanumeric(c: char) -> bool { + is_alphabetic_or_underscore(c) || c.is_digit(10) +} \ No newline at end of file diff --git a/src/tests.rs b/src/tests.rs index 8115284..495e888 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -57,3 +57,38 @@ fn test_scan_numeric_literals() { assert_eq!(token.lexeme, "0.1"); assert_eq!(token.get_literal_as_float().unwrap(), 0.1); } + +#[test] +fn test_keywords() { + let tokens = scan_tokens("fun").unwrap(); + assert_eq!(tokens.len(), 2); + + let token = tokens.get(0).unwrap(); + assert_eq!(token.token_type, FUN); +} + +#[test] +fn test_identifiers() { + let tokens = scan_tokens("a").unwrap(); + assert_eq!(tokens.len(), 2); + + let token = tokens.get(0).unwrap(); + assert_eq!(token.token_type, IDENTIFIER); +} + +#[test] +fn test_expression() { + let tokens = scan_tokens("if a == 1 {b=\"hello world\"}").unwrap(); + assert_eq!(tokens.len(), 10); + + assert_eq!(tokens.get(0).unwrap().token_type, IF); + assert_eq!(tokens.get(1).unwrap().token_type, IDENTIFIER); + assert_eq!(tokens.get(2).unwrap().token_type, EQUALEQUAL); + assert_eq!(tokens.get(3).unwrap().token_type, NUMBER); + assert_eq!(tokens.get(4).unwrap().token_type, LEFTBRACE); + assert_eq!(tokens.get(5).unwrap().token_type, IDENTIFIER); + assert_eq!(tokens.get(6).unwrap().token_type, EQUAL); + assert_eq!(tokens.get(7).unwrap().token_type, STRING); + assert_eq!(tokens.get(8).unwrap().token_type, RIGHTBRACE); + assert_eq!(tokens.get(9).unwrap().token_type, EOF); +} \ No newline at end of file diff --git a/src/tokens.rs b/src/tokens.rs index 9f0ba47..6136575 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -45,47 +45,49 @@ impl fmt::Debug for Token<'_> { pub enum TokenType { // Single-character tokens. LEFTPAREN, - // ( RIGHTPAREN, - // ) LEFTBRACE, - // [ RIGHTBRACE, - // ] COMMA, - // , DOT, - // . MINUS, - // - PLUS, - // + SEMICOLON, - // ; STAR, - // * - SLASH, // / + SLASH, // One or two character tokens. BANG, - // ! BANGEQUAL, - // != EQUAL, - // = EQUALEQUAL, - // == GREATER, - // > GREATEREQUAL, - // >= LESS, - // < - LESSEQUAL, // <= + LESSEQUAL, // Literals. STRING, NUMBER, + IDENTIFIER, + + // Keywords. + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, EOF, // end of file } \ No newline at end of file