scanner now recognizes complete expressions, adding identifiers and keywords

This commit is contained in:
Sander Hautvast 2020-01-24 14:07:13 +01:00
parent 071f584e92
commit a1f656199c
7 changed files with 125 additions and 19 deletions

10
Cargo.lock generated
View file

@ -1,6 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rust_lox"
version = "0.1.0"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

View file

@ -5,3 +5,4 @@ authors = ["Sander Hautvast <shautvast@gmail.com>"]
edition = "2018"
[dependencies]
lazy_static = "1.4.0"

27
src/keywords.rs Normal file
View file

@ -0,0 +1,27 @@
use std::collections::HashMap;
use crate::tokens::TokenType;
use crate::tokens::TokenType::*;
lazy_static! {
pub static ref KEYWORDS: HashMap<&'static str, TokenType> = {
let mut keywords = HashMap::new();
keywords.insert("and", AND);
keywords.insert("class", CLASS);
keywords.insert("else", ELSE);
keywords.insert("false", FALSE);
keywords.insert("for", FOR);
keywords.insert("fun", FUN);
keywords.insert("if", IF);
keywords.insert("nil", NIL);
keywords.insert("or", OR);
keywords.insert("print", PRINT);
keywords.insert("return", RETURN);
keywords.insert("super", SUPER);
keywords.insert("this", THIS);
keywords.insert("true", TRUE);
keywords.insert("var", VAR);
keywords.insert("while", WHILE);
keywords
};
}

View file

@ -1,3 +1,6 @@
#[macro_use]
extern crate lazy_static;
use std::env;
use std::fs::File;
use std::io::{self, BufRead, Read, Write};
@ -5,6 +8,7 @@ use std::process;
mod scanner;
mod tokens;
mod keywords;
#[cfg(test)]
mod tests;

View file

@ -1,5 +1,6 @@
use std::any::Any;
use crate::keywords::KEYWORDS;
use crate::tokens::{Token, TokenType};
use crate::tokens::TokenType::*;
@ -105,6 +106,8 @@ impl Scanner<'_> {
_ => {
if next_char.is_digit(10) {
self.number();
} else if is_alphabetic_or_underscore(next_char) {
self.identifier();
} else {
self.report_error(self.line, "unexpected character");
}
@ -112,6 +115,21 @@ impl Scanner<'_> {
}
}
fn identifier(&mut self) {
while is_alphanumeric(self.peek(0)) {
self.advance();
}
let text = &self.source[self.start..self.current];
match KEYWORDS.get(text) {
Some(token_type) => {
self.add_token(*token_type);
}
None => {
self.add_token(TokenType::IDENTIFIER);
}
}
}
/// handle number literals
/// advances while characters are considered part of the number
/// finally adds a number token to the list.
@ -209,3 +227,12 @@ impl Scanner<'_> {
println!("[line {} ] Error {} ", line, message);
}
}
fn is_alphabetic_or_underscore(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_alphanumeric(c: char) -> bool {
is_alphabetic_or_underscore(c) || c.is_digit(10)
}

View file

@ -57,3 +57,38 @@ fn test_scan_numeric_literals() {
assert_eq!(token.lexeme, "0.1");
assert_eq!(token.get_literal_as_float().unwrap(), 0.1);
}
#[test]
fn test_keywords() {
let tokens = scan_tokens("fun").unwrap();
assert_eq!(tokens.len(), 2);
let token = tokens.get(0).unwrap();
assert_eq!(token.token_type, FUN);
}
#[test]
fn test_identifiers() {
let tokens = scan_tokens("a").unwrap();
assert_eq!(tokens.len(), 2);
let token = tokens.get(0).unwrap();
assert_eq!(token.token_type, IDENTIFIER);
}
#[test]
fn test_expression() {
let tokens = scan_tokens("if a == 1 {b=\"hello world\"}").unwrap();
assert_eq!(tokens.len(), 10);
assert_eq!(tokens.get(0).unwrap().token_type, IF);
assert_eq!(tokens.get(1).unwrap().token_type, IDENTIFIER);
assert_eq!(tokens.get(2).unwrap().token_type, EQUALEQUAL);
assert_eq!(tokens.get(3).unwrap().token_type, NUMBER);
assert_eq!(tokens.get(4).unwrap().token_type, LEFTBRACE);
assert_eq!(tokens.get(5).unwrap().token_type, IDENTIFIER);
assert_eq!(tokens.get(6).unwrap().token_type, EQUAL);
assert_eq!(tokens.get(7).unwrap().token_type, STRING);
assert_eq!(tokens.get(8).unwrap().token_type, RIGHTBRACE);
assert_eq!(tokens.get(9).unwrap().token_type, EOF);
}

View file

@ -45,47 +45,49 @@ impl fmt::Debug for Token<'_> {
pub enum TokenType {
// Single-character tokens.
LEFTPAREN,
// (
RIGHTPAREN,
// )
LEFTBRACE,
// [
RIGHTBRACE,
// ]
COMMA,
// ,
DOT,
// .
MINUS,
// -
PLUS,
// +
SEMICOLON,
// ;
STAR,
// *
SLASH, // /
SLASH,
// One or two character tokens.
BANG,
// !
BANGEQUAL,
// !=
EQUAL,
// =
EQUALEQUAL,
// ==
GREATER,
// >
GREATEREQUAL,
// >=
LESS,
// <
LESSEQUAL, // <=
LESSEQUAL,
// Literals.
STRING,
NUMBER,
IDENTIFIER,
// Keywords.
AND,
CLASS,
ELSE,
FALSE,
FUN,
FOR,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
EOF, // end of file
}