parsing arithmetic expressions generates an AST
This commit is contained in:
parent
89968525df
commit
382aa16635
7 changed files with 275 additions and 139 deletions
20
src/lox.cpp
20
src/lox.cpp
|
|
@ -1,12 +1,13 @@
|
|||
#include "parser.hpp"
|
||||
#include "scanner.hpp"
|
||||
#include "tokens.hpp"
|
||||
#include <cstdbool>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
void print_tokens(std::list<Token> *list);
|
||||
void print_tokens(std::vector<Token> *list);
|
||||
int run_file(std::string file);
|
||||
void run_prompt(void);
|
||||
ScanResult run(std::string source);
|
||||
|
|
@ -43,11 +44,16 @@ void run_prompt(void) {
|
|||
|
||||
for (;;) {
|
||||
std::cout << ">";
|
||||
std::cin >> line;
|
||||
|
||||
std::getline(std::cin, line);
|
||||
|
||||
ScanResult scan_result = run(line.substr(0, line.length()));
|
||||
|
||||
print_tokens(&scan_result.token_list);
|
||||
// print_tokens(&scan_result.token_list);
|
||||
if (!scan_result.had_error) {
|
||||
Expression *e = (new Parser())->parse(scan_result.token_list);
|
||||
std::cout << e->to_string();
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,8 +62,8 @@ ScanResult run(std::string source) {
|
|||
return scanner->scan_tokens();
|
||||
}
|
||||
|
||||
void print_tokens(std::list<Token> *list) {
|
||||
for (std::list<Token>::iterator token = list->begin(); token != list->end();
|
||||
void print_tokens(std::vector<Token> *list) {
|
||||
for (std::vector<Token>::iterator token = list->begin(); token != list->end();
|
||||
++token) {
|
||||
std::cout << token->to_string() << "(" << token->literal << "), ";
|
||||
}
|
||||
|
|
|
|||
116
src/parser.cpp
116
src/parser.cpp
|
|
@ -1,111 +1,7 @@
|
|||
// #include "tokens.hpp"
|
||||
// #include <stdbool.h>
|
||||
#include "parser.hpp"
|
||||
|
||||
// static const TokenType eq[] = {BANG_EQUAL, EQUAL_EQUAL};
|
||||
// static const TokenType comp[] = {GREATER, GREATER_EQUAL, LESS, LESS_EQUAL};
|
||||
// static const TokenType unarytokens[] = {BANG, MINUS};
|
||||
|
||||
// // static int current = -1;
|
||||
// // TokenList *tokens;
|
||||
|
||||
// class Expression;
|
||||
|
||||
// class Binary {
|
||||
// public:
|
||||
// Expression *left;
|
||||
// Token *operator;
|
||||
// Expression *right;
|
||||
// };
|
||||
|
||||
// typedef struct Grouping {
|
||||
// Expression *expr;
|
||||
// } Grouping;
|
||||
|
||||
// typedef struct Unary {
|
||||
// Token *operator;
|
||||
// Expression *expr;
|
||||
// } Unary;
|
||||
|
||||
// class Expression {
|
||||
// public:
|
||||
// enum ExprType { Bin, Grp, Lit, Una } type;
|
||||
// union Expr {
|
||||
// Binary *binary;
|
||||
// Grouping *grouping;
|
||||
// Token *literal;
|
||||
// Unary *unary;
|
||||
// } expr;
|
||||
// };
|
||||
|
||||
// void parse(TokenList *tokenlist) {
|
||||
// tokens = tokenlist;
|
||||
// current = 0;
|
||||
// }
|
||||
|
||||
// static Token *peek(void) { return tokenlist_get(tokens, current); }
|
||||
|
||||
// static bool is_at_end(void) { return peek()->type == END_OF_FILE; }
|
||||
|
||||
// static bool check(TokenType type) {
|
||||
// if (is_at_end()) {
|
||||
// return false;
|
||||
// }
|
||||
// return peek()->type == type;
|
||||
// }
|
||||
|
||||
// static bool match(const TokenType tokens[], int n) {
|
||||
// for (int i = 0; i < n; i++) {
|
||||
// if (check(tokens[i])) {
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// static Token previous(void) {
|
||||
// Token t = {NUMBER, "", 0, 0};
|
||||
// return t;
|
||||
// }
|
||||
|
||||
// static Expression unary(void) {
|
||||
// if (match(unarytokens, 2)) {
|
||||
// Token operator= previous();
|
||||
// Expression right = unary();
|
||||
// Expression un;
|
||||
// un.type = Una;
|
||||
// Unary new_una = {&operator, & right };
|
||||
// un.expr.unary = &new_una;
|
||||
// return un;
|
||||
// }
|
||||
// return primary();
|
||||
// }
|
||||
|
||||
// static Expression factor(void) {}
|
||||
|
||||
// static Expression term(void) { Expression expr = factor(); }
|
||||
|
||||
// static Expression comparison(void) {
|
||||
// Expression expr = term();
|
||||
// while (match(comp, 4)) {
|
||||
// Token operator= previous();
|
||||
// Expression right = term();
|
||||
|
||||
// Binary new_bin = {&expr, &operator, & right };
|
||||
// expr.expr.binary = &new_bin;
|
||||
// expr.type = Bin;
|
||||
// }
|
||||
// return expr;
|
||||
// }
|
||||
// static Expression equality(void) {
|
||||
// Expression expr = comparison();
|
||||
|
||||
// while (match(eq, 2)) {
|
||||
// Token operator= previous();
|
||||
// Expression right = comparison();
|
||||
// Binary new_bin = {&expr, &operator, & right };
|
||||
// expr.expr.binary = &new_bin;
|
||||
// expr.type = Bin;
|
||||
// }
|
||||
// return expr;
|
||||
// }
|
||||
|
||||
// // static Expression expression(void) { return equality(); }
|
||||
Expression *Parser::parse(std::vector<Token> tokenlist) {
|
||||
tokens = tokenlist;
|
||||
current_token = 0;
|
||||
return expression();
|
||||
}
|
||||
|
|
|
|||
232
src/parser.hpp
Normal file
232
src/parser.hpp
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
#pragma once
|
||||
|
||||
#include "tokens.hpp"
|
||||
#include <cstdarg>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
enum class ExprType { Binary, Grouping, Unary, Literal, None };
|
||||
|
||||
class Expression {
|
||||
public:
|
||||
virtual ExprType type() { return ExprType::None; };
|
||||
virtual std::string to_string() { return "Expression()"; };
|
||||
virtual ~Expression() {}
|
||||
};
|
||||
|
||||
class Binary : public Expression {
|
||||
public:
|
||||
ExprType type() override { return ExprType::Binary; }
|
||||
std::string to_string() override {
|
||||
return "(" + token_name(op->tokentype) + " " + left->to_string() + " " +
|
||||
right->to_string() + ")";
|
||||
}
|
||||
Expression *left;
|
||||
Token *op;
|
||||
Expression *right;
|
||||
Binary(Expression *_left, Token *_operator, Expression *_right)
|
||||
: left(_left), op(_operator), right(_right){};
|
||||
~Binary() override {
|
||||
delete left;
|
||||
delete right;
|
||||
delete op;
|
||||
}
|
||||
};
|
||||
|
||||
class Grouping : public Expression {
|
||||
public:
|
||||
ExprType type() override { return ExprType::Grouping; }
|
||||
std::string to_string() override { return "(" + expr->to_string() + ")"; }
|
||||
Expression *expr;
|
||||
Grouping(Expression *_expr) : expr(_expr){};
|
||||
~Grouping() override { delete expr; }
|
||||
};
|
||||
|
||||
class Unary : public Expression {
|
||||
public:
|
||||
ExprType type() override { return ExprType::Unary; }
|
||||
std::string to_string() override {
|
||||
return token_name(op->tokentype) + right->to_string();
|
||||
}
|
||||
Token *op;
|
||||
Expression *right;
|
||||
|
||||
Unary(Token *_operator, Expression *_right) : op(_operator), right(_right){};
|
||||
~Unary() override {
|
||||
delete right;
|
||||
delete op;
|
||||
}
|
||||
};
|
||||
|
||||
class Void {};
|
||||
|
||||
class Literal : public Expression {
|
||||
public:
|
||||
ExprType type() override { return ExprType::Literal; }
|
||||
std::string to_string() override {
|
||||
std::string text;
|
||||
switch (valuetype) {
|
||||
case String:
|
||||
text = "\"" + value.str + "\"";
|
||||
break;
|
||||
case Numeric:
|
||||
text = std::to_string(value.numeric);
|
||||
break;
|
||||
case Boolean:
|
||||
text = value.boolean ? "True" : "False";
|
||||
break;
|
||||
case Nil:
|
||||
text = "NULL";
|
||||
break;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
enum ValueType { String, Numeric, Boolean, Nil } valuetype;
|
||||
|
||||
union Value {
|
||||
double_t numeric;
|
||||
bool boolean;
|
||||
std::string str;
|
||||
Void dummy;
|
||||
|
||||
Value(double_t _numeric) : numeric(_numeric) {}
|
||||
Value(bool _boolean) : boolean(_boolean) {}
|
||||
Value(std::string _str) : str(_str) {}
|
||||
Value(Void v) : dummy(v) {}
|
||||
~Value() {}
|
||||
} value;
|
||||
|
||||
Literal(Void v) : valuetype(ValueType::Nil), value(v) {}
|
||||
Literal(double_t _numeric) : valuetype(ValueType::Numeric), value(_numeric) {}
|
||||
Literal(std::string _str) : valuetype(ValueType::String), value(_str) {}
|
||||
Literal(bool _boolean) : valuetype(ValueType::Boolean), value(_boolean) {}
|
||||
};
|
||||
|
||||
class Parser {
|
||||
private:
|
||||
std::vector<Expression> expressions;
|
||||
std::vector<Token> tokens;
|
||||
int current_token;
|
||||
|
||||
Token peek() { return tokens[current_token]; };
|
||||
|
||||
bool is_at_end() { return peek().tokentype == Token::END_OF_FILE; };
|
||||
|
||||
Token *previous() { return &tokens[current_token - 1]; };
|
||||
|
||||
Token *advance() {
|
||||
if (!is_at_end())
|
||||
current_token += 1;
|
||||
return previous();
|
||||
}
|
||||
|
||||
bool check(Token::Type type) {
|
||||
if (is_at_end()) {
|
||||
return false;
|
||||
}
|
||||
return peek().tokentype == type;
|
||||
}
|
||||
|
||||
bool match(int count, ...) {
|
||||
va_list list;
|
||||
va_start(list, count);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
Token::Type ttc = va_arg(list, Token::Type);
|
||||
// std::cout << token_name(ttc) << "\n";
|
||||
if (check(ttc)) {
|
||||
advance();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
Token *consume(Token::Type typ, std::string message) {
|
||||
if (check(typ)) {
|
||||
return advance();
|
||||
}
|
||||
throw error(peek(), message);
|
||||
}
|
||||
|
||||
std::runtime_error error(Token token, std::string message) {
|
||||
std::cout << token.to_string() << " " << message;
|
||||
return std::runtime_error(message);
|
||||
}
|
||||
|
||||
Expression *primary() {
|
||||
if (match(1, Token::Type::FALSE))
|
||||
return new Literal(false);
|
||||
if (match(1, Token::Type::TRUE))
|
||||
return new Literal(true);
|
||||
if (match(1, Token::Type::NIL))
|
||||
return new Literal(new Void());
|
||||
if (match(1, Token::Type::NUMBER)) {
|
||||
return new Literal(std::stod(previous()->literal));
|
||||
}
|
||||
if (match(1, Token::Type::STRING)) {
|
||||
return new Literal(previous()->literal);
|
||||
}
|
||||
if (match(1, Token::Type::LEFT_PAREN)) {
|
||||
Expression *e = expression();
|
||||
consume(Token::Type::RIGHT_PAREN, "Expect ')'.");
|
||||
return new Grouping(e);
|
||||
}
|
||||
throw std::runtime_error("Expected an expression");
|
||||
}
|
||||
|
||||
public:
|
||||
Expression *parse(std::vector<Token> tokenlist);
|
||||
Expression *unary() {
|
||||
if (match(2, Token::BANG, Token::Type::MINUS)) {
|
||||
Token *op = previous();
|
||||
Expression *right = unary();
|
||||
return new Unary(op, right);
|
||||
}
|
||||
return primary();
|
||||
}
|
||||
|
||||
Expression *expression() { return equality(); }
|
||||
|
||||
Expression *factor() {
|
||||
Expression *expr = unary();
|
||||
while (match(2, Token::Type::SLASH, Token::Type::STAR)) {
|
||||
Token *op = previous();
|
||||
Expression *right = unary();
|
||||
expr = new Binary(expr, op, right);
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
Expression *term() {
|
||||
Expression *expr = factor();
|
||||
while (match(2, Token::Type::MINUS, Token::Type::PLUS)) {
|
||||
Token *op = previous();
|
||||
Expression *right = unary();
|
||||
expr = new Binary(expr, op, right);
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
Expression *equality(void) {
|
||||
Expression *expr = comparison();
|
||||
|
||||
while (match(2, Token::Type::BANG_EQUAL, Token::Type::BANG_EQUAL)) {
|
||||
Token *op = previous();
|
||||
Expression *right = comparison();
|
||||
return new Binary(expr, op, right);
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
Expression *comparison(void) {
|
||||
Expression *expr = term();
|
||||
while (match(4, Token::Type::GREATER, Token::Type::GREATER_EQUAL,
|
||||
Token::Type::LESS, Token::Type::LESS_EQUAL)) {
|
||||
Token *op = previous();
|
||||
Expression *right = term();
|
||||
expr = new Binary(expr, op, right);
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
};
|
||||
|
|
@ -2,9 +2,9 @@
|
|||
#include "tokens.hpp"
|
||||
#include <cstdbool>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
static const std::map<std::string, Token::Type> keywords = {
|
||||
{"and", Token::Type::AND}, {"class", Token::Type::CLASS},
|
||||
|
|
@ -19,7 +19,7 @@ static const std::map<std::string, Token::Type> keywords = {
|
|||
|
||||
Scanner::Scanner(std::string s)
|
||||
: had_error(false), current_pos(0), start(0), current_line(1), source(s),
|
||||
token_list(std::list<Token>()) {}
|
||||
token_list(std::vector<Token>()) {}
|
||||
|
||||
ScanResult Scanner::scan_tokens() {
|
||||
while (current_pos < source.length()) {
|
||||
|
|
@ -35,15 +35,13 @@ ScanResult Scanner::scan_tokens() {
|
|||
}
|
||||
|
||||
void Scanner::add_token(Token::Type type) {
|
||||
Token token =
|
||||
Token(type, source.substr(start, current_pos), "", current_line);
|
||||
token_list.push_front(token);
|
||||
Token token = Token(type, "", "", current_line);
|
||||
token_list.push_back(token);
|
||||
}
|
||||
|
||||
void Scanner::add_token_with_literal(Token::Type type, std::string literal) {
|
||||
Token token =
|
||||
Token(type, source.substr(start, current_pos), literal, current_line);
|
||||
token_list.push_front(token);
|
||||
void Scanner::add_token(Token::Type type, std::string literal) {
|
||||
Token token = Token(type, literal, literal, current_line);
|
||||
token_list.push_back(token);
|
||||
}
|
||||
|
||||
char Scanner::advance() {
|
||||
|
|
@ -91,6 +89,9 @@ void Scanner::scan_token() {
|
|||
case '<':
|
||||
add_token(match('=') ? Token::Type::LESS_EQUAL : Token::Type::LESS);
|
||||
break;
|
||||
case '*':
|
||||
add_token(Token::Type::STAR);
|
||||
break;
|
||||
case '/':
|
||||
if (match('/')) {
|
||||
while (peek() != '\n' && !is_at_end()) {
|
||||
|
|
@ -127,13 +128,12 @@ void Scanner::identifier() {
|
|||
advance();
|
||||
}
|
||||
|
||||
std::string text = source.substr(start + 1, current_pos - start);
|
||||
|
||||
std::string text = source.substr(start, current_pos - start);
|
||||
auto it = keywords.find(text);
|
||||
if (it != keywords.end()) {
|
||||
add_token(it->second);
|
||||
add_token(it->second, text);
|
||||
} else {
|
||||
add_token(Token::Type::IDENTIFIER);
|
||||
add_token(Token::Type::IDENTIFIER, text);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,8 +144,7 @@ void Scanner::number() {
|
|||
advance();
|
||||
while (is_digit(peek()))
|
||||
advance();
|
||||
add_token_with_literal(Token::Type::NUMBER,
|
||||
source.substr(start + 1, current_pos - start));
|
||||
add_token(Token::Type::NUMBER, source.substr(start, current_pos - start));
|
||||
}
|
||||
|
||||
bool Scanner::is_digit(char c) { return c >= '0' && c <= '9'; }
|
||||
|
|
@ -165,7 +164,7 @@ void Scanner::string() {
|
|||
advance();
|
||||
|
||||
std::string string = source.substr(start + 1, current_pos - start - 2);
|
||||
add_token_with_literal(Token::Type::STRING, string);
|
||||
add_token(Token::Type::STRING, string);
|
||||
}
|
||||
|
||||
bool Scanner::match(char expected) {
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@
|
|||
|
||||
#include "tokens.hpp"
|
||||
#include <cstdbool>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
typedef struct {
|
||||
bool had_error;
|
||||
std::list<Token> token_list;
|
||||
std::vector<Token> token_list;
|
||||
} ScanResult;
|
||||
|
||||
class Scanner {
|
||||
|
|
@ -17,13 +17,13 @@ private:
|
|||
int start;
|
||||
int current_line;
|
||||
std::string source;
|
||||
std::list<Token> token_list;
|
||||
std::vector<Token> token_list;
|
||||
|
||||
public:
|
||||
Scanner(std::string s);
|
||||
ScanResult scan_tokens();
|
||||
void add_token(Token::Type type);
|
||||
void add_token_with_literal(Token::Type type, std::string literal);
|
||||
void add_token(Token::Type type, std::string literal);
|
||||
char advance();
|
||||
void scan_token();
|
||||
void identifier();
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Token::Token(Token::Type _tokentype, std::string _lexeme, std::string _literal,
|
|||
int _line)
|
||||
: lexeme(_lexeme), literal(_literal), line(_line), tokentype(_tokentype) {}
|
||||
|
||||
std::string Token::to_string() {
|
||||
std::string token_name(Token::Type tokentype) {
|
||||
static const std::string tokens[] = {
|
||||
"END_OF_FILE", "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE",
|
||||
"COMMA", "DOT", "MINUS", "PLUS", "SEMICOLON",
|
||||
|
|
@ -14,6 +14,7 @@ std::string Token::to_string() {
|
|||
"ELSE", "FALSE", "FUN", "FOR", "IF",
|
||||
"NIL", "OR", "PRINT", "RETURN", "SUPER",
|
||||
"THIS", "TRUE", "VAR", "WHILE"};
|
||||
|
||||
return tokens[(int)tokentype];
|
||||
}
|
||||
|
||||
std::string Token::to_string() { return token_name(tokentype); }
|
||||
|
|
|
|||
|
|
@ -54,3 +54,5 @@ public:
|
|||
Token(Token::Type _tokentype, std::string _lexeme, std::string _literal,
|
||||
int line);
|
||||
};
|
||||
|
||||
std::string token_name(Token::Type tokentype);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue