scanner now recognizes complete expressions, adding identifiers and keywords

This commit is contained in:
Sander Hautvast 2020-01-24 14:07:13 +01:00
parent 071f584e92
commit a1f656199c
7 changed files with 125 additions and 19 deletions

10
Cargo.lock generated
View file

@ -1,6 +1,16 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "rust_lox" name = "rust_lox"
version = "0.1.0" version = "0.1.0"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

View file

@ -5,3 +5,4 @@ authors = ["Sander Hautvast <shautvast@gmail.com>"]
edition = "2018" edition = "2018"
[dependencies] [dependencies]
lazy_static = "1.4.0"

27
src/keywords.rs Normal file
View file

@ -0,0 +1,27 @@
use std::collections::HashMap;
use crate::tokens::TokenType;
use crate::tokens::TokenType::*;
lazy_static! {
pub static ref KEYWORDS: HashMap<&'static str, TokenType> = {
let mut keywords = HashMap::new();
keywords.insert("and", AND);
keywords.insert("class", CLASS);
keywords.insert("else", ELSE);
keywords.insert("false", FALSE);
keywords.insert("for", FOR);
keywords.insert("fun", FUN);
keywords.insert("if", IF);
keywords.insert("nil", NIL);
keywords.insert("or", OR);
keywords.insert("print", PRINT);
keywords.insert("return", RETURN);
keywords.insert("super", SUPER);
keywords.insert("this", THIS);
keywords.insert("true", TRUE);
keywords.insert("var", VAR);
keywords.insert("while", WHILE);
keywords
};
}

View file

@ -1,3 +1,6 @@
#[macro_use]
extern crate lazy_static;
use std::env; use std::env;
use std::fs::File; use std::fs::File;
use std::io::{self, BufRead, Read, Write}; use std::io::{self, BufRead, Read, Write};
@ -5,6 +8,7 @@ use std::process;
mod scanner; mod scanner;
mod tokens; mod tokens;
mod keywords;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;

View file

@ -1,5 +1,6 @@
use std::any::Any; use std::any::Any;
use crate::keywords::KEYWORDS;
use crate::tokens::{Token, TokenType}; use crate::tokens::{Token, TokenType};
use crate::tokens::TokenType::*; use crate::tokens::TokenType::*;
@ -105,6 +106,8 @@ impl Scanner<'_> {
_ => { _ => {
if next_char.is_digit(10) { if next_char.is_digit(10) {
self.number(); self.number();
} else if is_alphabetic_or_underscore(next_char) {
self.identifier();
} else { } else {
self.report_error(self.line, "unexpected character"); self.report_error(self.line, "unexpected character");
} }
@ -112,6 +115,21 @@ impl Scanner<'_> {
} }
} }
fn identifier(&mut self) {
while is_alphanumeric(self.peek(0)) {
self.advance();
}
let text = &self.source[self.start..self.current];
match KEYWORDS.get(text) {
Some(token_type) => {
self.add_token(*token_type);
}
None => {
self.add_token(TokenType::IDENTIFIER);
}
}
}
/// handle number literals /// handle number literals
/// advances while characters are considered part of the number /// advances while characters are considered part of the number
/// finally adds a number token to the list. /// finally adds a number token to the list.
@ -209,3 +227,12 @@ impl Scanner<'_> {
println!("[line {} ] Error {} ", line, message); println!("[line {} ] Error {} ", line, message);
} }
} }
fn is_alphabetic_or_underscore(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_alphanumeric(c: char) -> bool {
is_alphabetic_or_underscore(c) || c.is_digit(10)
}

View file

@ -57,3 +57,38 @@ fn test_scan_numeric_literals() {
assert_eq!(token.lexeme, "0.1"); assert_eq!(token.lexeme, "0.1");
assert_eq!(token.get_literal_as_float().unwrap(), 0.1); assert_eq!(token.get_literal_as_float().unwrap(), 0.1);
} }
#[test]
fn test_keywords() {
let tokens = scan_tokens("fun").unwrap();
assert_eq!(tokens.len(), 2);
let token = tokens.get(0).unwrap();
assert_eq!(token.token_type, FUN);
}
#[test]
fn test_identifiers() {
let tokens = scan_tokens("a").unwrap();
assert_eq!(tokens.len(), 2);
let token = tokens.get(0).unwrap();
assert_eq!(token.token_type, IDENTIFIER);
}
#[test]
fn test_expression() {
let tokens = scan_tokens("if a == 1 {b=\"hello world\"}").unwrap();
assert_eq!(tokens.len(), 10);
assert_eq!(tokens.get(0).unwrap().token_type, IF);
assert_eq!(tokens.get(1).unwrap().token_type, IDENTIFIER);
assert_eq!(tokens.get(2).unwrap().token_type, EQUALEQUAL);
assert_eq!(tokens.get(3).unwrap().token_type, NUMBER);
assert_eq!(tokens.get(4).unwrap().token_type, LEFTBRACE);
assert_eq!(tokens.get(5).unwrap().token_type, IDENTIFIER);
assert_eq!(tokens.get(6).unwrap().token_type, EQUAL);
assert_eq!(tokens.get(7).unwrap().token_type, STRING);
assert_eq!(tokens.get(8).unwrap().token_type, RIGHTBRACE);
assert_eq!(tokens.get(9).unwrap().token_type, EOF);
}

View file

@ -45,47 +45,49 @@ impl fmt::Debug for Token<'_> {
pub enum TokenType { pub enum TokenType {
// Single-character tokens. // Single-character tokens.
LEFTPAREN, LEFTPAREN,
// (
RIGHTPAREN, RIGHTPAREN,
// )
LEFTBRACE, LEFTBRACE,
// [
RIGHTBRACE, RIGHTBRACE,
// ]
COMMA, COMMA,
// ,
DOT, DOT,
// .
MINUS, MINUS,
// -
PLUS, PLUS,
// +
SEMICOLON, SEMICOLON,
// ;
STAR, STAR,
// * SLASH,
SLASH, // /
// One or two character tokens. // One or two character tokens.
BANG, BANG,
// !
BANGEQUAL, BANGEQUAL,
// !=
EQUAL, EQUAL,
// =
EQUALEQUAL, EQUALEQUAL,
// ==
GREATER, GREATER,
// >
GREATEREQUAL, GREATEREQUAL,
// >=
LESS, LESS,
// < LESSEQUAL,
LESSEQUAL, // <=
// Literals. // Literals.
STRING, STRING,
NUMBER, NUMBER,
IDENTIFIER,
// Keywords.
AND,
CLASS,
ELSE,
FALSE,
FUN,
FOR,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
EOF, // end of file EOF, // end of file
} }