From 382aa16635f7b3a9e6c2146babf3228ffeea8c30 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Tue, 22 Oct 2024 09:22:32 +0200 Subject: [PATCH] parsing arithmetic expressions generates an AST --- src/lox.cpp | 20 +++-- src/parser.cpp | 116 ++---------------------- src/parser.hpp | 232 ++++++++++++++++++++++++++++++++++++++++++++++++ src/scanner.cpp | 31 ++++--- src/scanner.hpp | 8 +- src/tokens.cpp | 5 +- src/tokens.hpp | 2 + 7 files changed, 275 insertions(+), 139 deletions(-) create mode 100644 src/parser.hpp diff --git a/src/lox.cpp b/src/lox.cpp index 797542f..708f316 100644 --- a/src/lox.cpp +++ b/src/lox.cpp @@ -1,12 +1,13 @@ +#include "parser.hpp" #include "scanner.hpp" #include "tokens.hpp" #include #include #include -#include #include +#include -void print_tokens(std::list *list); +void print_tokens(std::vector *list); int run_file(std::string file); void run_prompt(void); ScanResult run(std::string source); @@ -43,11 +44,16 @@ void run_prompt(void) { for (;;) { std::cout << ">"; - std::cin >> line; + + std::getline(std::cin, line); ScanResult scan_result = run(line.substr(0, line.length())); - - print_tokens(&scan_result.token_list); + // print_tokens(&scan_result.token_list); + if (!scan_result.had_error) { + Expression *e = (new Parser())->parse(scan_result.token_list); + std::cout << e->to_string(); + std::cout << "\n"; + } } } @@ -56,8 +62,8 @@ ScanResult run(std::string source) { return scanner->scan_tokens(); } -void print_tokens(std::list *list) { - for (std::list::iterator token = list->begin(); token != list->end(); +void print_tokens(std::vector *list) { + for (std::vector::iterator token = list->begin(); token != list->end(); ++token) { std::cout << token->to_string() << "(" << token->literal << "), "; } diff --git a/src/parser.cpp b/src/parser.cpp index 2c7ff0f..5156459 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1,111 +1,7 @@ -// #include "tokens.hpp" -// #include +#include "parser.hpp" -// static const TokenType eq[] = {BANG_EQUAL, EQUAL_EQUAL}; -// static const TokenType comp[] = {GREATER, GREATER_EQUAL, LESS, LESS_EQUAL}; -// static const TokenType unarytokens[] = {BANG, MINUS}; - -// // static int current = -1; -// // TokenList *tokens; - -// class Expression; - -// class Binary { -// public: -// Expression *left; -// Token *operator; -// Expression *right; -// }; - -// typedef struct Grouping { -// Expression *expr; -// } Grouping; - -// typedef struct Unary { -// Token *operator; -// Expression *expr; -// } Unary; - -// class Expression { -// public: -// enum ExprType { Bin, Grp, Lit, Una } type; -// union Expr { -// Binary *binary; -// Grouping *grouping; -// Token *literal; -// Unary *unary; -// } expr; -// }; - -// void parse(TokenList *tokenlist) { -// tokens = tokenlist; -// current = 0; -// } - -// static Token *peek(void) { return tokenlist_get(tokens, current); } - -// static bool is_at_end(void) { return peek()->type == END_OF_FILE; } - -// static bool check(TokenType type) { -// if (is_at_end()) { -// return false; -// } -// return peek()->type == type; -// } - -// static bool match(const TokenType tokens[], int n) { -// for (int i = 0; i < n; i++) { -// if (check(tokens[i])) { -// } -// } -// return false; -// } - -// static Token previous(void) { -// Token t = {NUMBER, "", 0, 0}; -// return t; -// } - -// static Expression unary(void) { -// if (match(unarytokens, 2)) { -// Token operator= previous(); -// Expression right = unary(); -// Expression un; -// un.type = Una; -// Unary new_una = {&operator, & right }; -// un.expr.unary = &new_una; -// return un; -// } -// return primary(); -// } - -// static Expression factor(void) {} - -// static Expression term(void) { Expression expr = factor(); } - -// static Expression comparison(void) { -// Expression expr = term(); -// while (match(comp, 4)) { -// Token operator= previous(); -// Expression right = term(); - -// Binary new_bin = {&expr, &operator, & right }; -// expr.expr.binary = &new_bin; -// expr.type = Bin; -// } -// return expr; -// } -// static Expression equality(void) { -// Expression expr = comparison(); - -// while (match(eq, 2)) { -// Token operator= previous(); -// Expression right = comparison(); -// Binary new_bin = {&expr, &operator, & right }; -// expr.expr.binary = &new_bin; -// expr.type = Bin; -// } -// return expr; -// } - -// // static Expression expression(void) { return equality(); } +Expression *Parser::parse(std::vector tokenlist) { + tokens = tokenlist; + current_token = 0; + return expression(); +} diff --git a/src/parser.hpp b/src/parser.hpp new file mode 100644 index 0000000..a64be8a --- /dev/null +++ b/src/parser.hpp @@ -0,0 +1,232 @@ +#pragma once + +#include "tokens.hpp" +#include +#include +#include + +enum class ExprType { Binary, Grouping, Unary, Literal, None }; + +class Expression { +public: + virtual ExprType type() { return ExprType::None; }; + virtual std::string to_string() { return "Expression()"; }; + virtual ~Expression() {} +}; + +class Binary : public Expression { +public: + ExprType type() override { return ExprType::Binary; } + std::string to_string() override { + return "(" + token_name(op->tokentype) + " " + left->to_string() + " " + + right->to_string() + ")"; + } + Expression *left; + Token *op; + Expression *right; + Binary(Expression *_left, Token *_operator, Expression *_right) + : left(_left), op(_operator), right(_right){}; + ~Binary() override { + delete left; + delete right; + delete op; + } +}; + +class Grouping : public Expression { +public: + ExprType type() override { return ExprType::Grouping; } + std::string to_string() override { return "(" + expr->to_string() + ")"; } + Expression *expr; + Grouping(Expression *_expr) : expr(_expr){}; + ~Grouping() override { delete expr; } +}; + +class Unary : public Expression { +public: + ExprType type() override { return ExprType::Unary; } + std::string to_string() override { + return token_name(op->tokentype) + right->to_string(); + } + Token *op; + Expression *right; + + Unary(Token *_operator, Expression *_right) : op(_operator), right(_right){}; + ~Unary() override { + delete right; + delete op; + } +}; + +class Void {}; + +class Literal : public Expression { +public: + ExprType type() override { return ExprType::Literal; } + std::string to_string() override { + std::string text; + switch (valuetype) { + case String: + text = "\"" + value.str + "\""; + break; + case Numeric: + text = std::to_string(value.numeric); + break; + case Boolean: + text = value.boolean ? "True" : "False"; + break; + case Nil: + text = "NULL"; + break; + } + return text; + } + enum ValueType { String, Numeric, Boolean, Nil } valuetype; + + union Value { + double_t numeric; + bool boolean; + std::string str; + Void dummy; + + Value(double_t _numeric) : numeric(_numeric) {} + Value(bool _boolean) : boolean(_boolean) {} + Value(std::string _str) : str(_str) {} + Value(Void v) : dummy(v) {} + ~Value() {} + } value; + + Literal(Void v) : valuetype(ValueType::Nil), value(v) {} + Literal(double_t _numeric) : valuetype(ValueType::Numeric), value(_numeric) {} + Literal(std::string _str) : valuetype(ValueType::String), value(_str) {} + Literal(bool _boolean) : valuetype(ValueType::Boolean), value(_boolean) {} +}; + +class Parser { +private: + std::vector expressions; + std::vector tokens; + int current_token; + + Token peek() { return tokens[current_token]; }; + + bool is_at_end() { return peek().tokentype == Token::END_OF_FILE; }; + + Token *previous() { return &tokens[current_token - 1]; }; + + Token *advance() { + if (!is_at_end()) + current_token += 1; + return previous(); + } + + bool check(Token::Type type) { + if (is_at_end()) { + return false; + } + return peek().tokentype == type; + } + + bool match(int count, ...) { + va_list list; + va_start(list, count); + + for (int i = 0; i < count; i++) { + Token::Type ttc = va_arg(list, Token::Type); + // std::cout << token_name(ttc) << "\n"; + if (check(ttc)) { + advance(); + return true; + } + } + return false; + }; + + Token *consume(Token::Type typ, std::string message) { + if (check(typ)) { + return advance(); + } + throw error(peek(), message); + } + + std::runtime_error error(Token token, std::string message) { + std::cout << token.to_string() << " " << message; + return std::runtime_error(message); + } + + Expression *primary() { + if (match(1, Token::Type::FALSE)) + return new Literal(false); + if (match(1, Token::Type::TRUE)) + return new Literal(true); + if (match(1, Token::Type::NIL)) + return new Literal(new Void()); + if (match(1, Token::Type::NUMBER)) { + return new Literal(std::stod(previous()->literal)); + } + if (match(1, Token::Type::STRING)) { + return new Literal(previous()->literal); + } + if (match(1, Token::Type::LEFT_PAREN)) { + Expression *e = expression(); + consume(Token::Type::RIGHT_PAREN, "Expect ')'."); + return new Grouping(e); + } + throw std::runtime_error("Expected an expression"); + } + +public: + Expression *parse(std::vector tokenlist); + Expression *unary() { + if (match(2, Token::BANG, Token::Type::MINUS)) { + Token *op = previous(); + Expression *right = unary(); + return new Unary(op, right); + } + return primary(); + } + + Expression *expression() { return equality(); } + + Expression *factor() { + Expression *expr = unary(); + while (match(2, Token::Type::SLASH, Token::Type::STAR)) { + Token *op = previous(); + Expression *right = unary(); + expr = new Binary(expr, op, right); + } + return expr; + } + + Expression *term() { + Expression *expr = factor(); + while (match(2, Token::Type::MINUS, Token::Type::PLUS)) { + Token *op = previous(); + Expression *right = unary(); + expr = new Binary(expr, op, right); + } + return expr; + } + + Expression *equality(void) { + Expression *expr = comparison(); + + while (match(2, Token::Type::BANG_EQUAL, Token::Type::BANG_EQUAL)) { + Token *op = previous(); + Expression *right = comparison(); + return new Binary(expr, op, right); + } + return expr; + } + + Expression *comparison(void) { + Expression *expr = term(); + while (match(4, Token::Type::GREATER, Token::Type::GREATER_EQUAL, + Token::Type::LESS, Token::Type::LESS_EQUAL)) { + Token *op = previous(); + Expression *right = term(); + expr = new Binary(expr, op, right); + } + return expr; + } +}; diff --git a/src/scanner.cpp b/src/scanner.cpp index 26a37bf..f910226 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -2,9 +2,9 @@ #include "tokens.hpp" #include #include -#include #include #include +#include static const std::map keywords = { {"and", Token::Type::AND}, {"class", Token::Type::CLASS}, @@ -19,7 +19,7 @@ static const std::map keywords = { Scanner::Scanner(std::string s) : had_error(false), current_pos(0), start(0), current_line(1), source(s), - token_list(std::list()) {} + token_list(std::vector()) {} ScanResult Scanner::scan_tokens() { while (current_pos < source.length()) { @@ -35,15 +35,13 @@ ScanResult Scanner::scan_tokens() { } void Scanner::add_token(Token::Type type) { - Token token = - Token(type, source.substr(start, current_pos), "", current_line); - token_list.push_front(token); + Token token = Token(type, "", "", current_line); + token_list.push_back(token); } -void Scanner::add_token_with_literal(Token::Type type, std::string literal) { - Token token = - Token(type, source.substr(start, current_pos), literal, current_line); - token_list.push_front(token); +void Scanner::add_token(Token::Type type, std::string literal) { + Token token = Token(type, literal, literal, current_line); + token_list.push_back(token); } char Scanner::advance() { @@ -91,6 +89,9 @@ void Scanner::scan_token() { case '<': add_token(match('=') ? Token::Type::LESS_EQUAL : Token::Type::LESS); break; + case '*': + add_token(Token::Type::STAR); + break; case '/': if (match('/')) { while (peek() != '\n' && !is_at_end()) { @@ -127,13 +128,12 @@ void Scanner::identifier() { advance(); } - std::string text = source.substr(start + 1, current_pos - start); - + std::string text = source.substr(start, current_pos - start); auto it = keywords.find(text); if (it != keywords.end()) { - add_token(it->second); + add_token(it->second, text); } else { - add_token(Token::Type::IDENTIFIER); + add_token(Token::Type::IDENTIFIER, text); } } @@ -144,8 +144,7 @@ void Scanner::number() { advance(); while (is_digit(peek())) advance(); - add_token_with_literal(Token::Type::NUMBER, - source.substr(start + 1, current_pos - start)); + add_token(Token::Type::NUMBER, source.substr(start, current_pos - start)); } bool Scanner::is_digit(char c) { return c >= '0' && c <= '9'; } @@ -165,7 +164,7 @@ void Scanner::string() { advance(); std::string string = source.substr(start + 1, current_pos - start - 2); - add_token_with_literal(Token::Type::STRING, string); + add_token(Token::Type::STRING, string); } bool Scanner::match(char expected) { diff --git a/src/scanner.hpp b/src/scanner.hpp index bdd8690..90e2f9b 100644 --- a/src/scanner.hpp +++ b/src/scanner.hpp @@ -2,12 +2,12 @@ #include "tokens.hpp" #include -#include #include +#include typedef struct { bool had_error; - std::list token_list; + std::vector token_list; } ScanResult; class Scanner { @@ -17,13 +17,13 @@ private: int start; int current_line; std::string source; - std::list token_list; + std::vector token_list; public: Scanner(std::string s); ScanResult scan_tokens(); void add_token(Token::Type type); - void add_token_with_literal(Token::Type type, std::string literal); + void add_token(Token::Type type, std::string literal); char advance(); void scan_token(); void identifier(); diff --git a/src/tokens.cpp b/src/tokens.cpp index 8703239..79127eb 100644 --- a/src/tokens.cpp +++ b/src/tokens.cpp @@ -4,7 +4,7 @@ Token::Token(Token::Type _tokentype, std::string _lexeme, std::string _literal, int _line) : lexeme(_lexeme), literal(_literal), line(_line), tokentype(_tokentype) {} -std::string Token::to_string() { +std::string token_name(Token::Type tokentype) { static const std::string tokens[] = { "END_OF_FILE", "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "COMMA", "DOT", "MINUS", "PLUS", "SEMICOLON", @@ -14,6 +14,7 @@ std::string Token::to_string() { "ELSE", "FALSE", "FUN", "FOR", "IF", "NIL", "OR", "PRINT", "RETURN", "SUPER", "THIS", "TRUE", "VAR", "WHILE"}; - return tokens[(int)tokentype]; } + +std::string Token::to_string() { return token_name(tokentype); } diff --git a/src/tokens.hpp b/src/tokens.hpp index b2df1b3..5a1047d 100644 --- a/src/tokens.hpp +++ b/src/tokens.hpp @@ -54,3 +54,5 @@ public: Token(Token::Type _tokentype, std::string _lexeme, std::string _literal, int line); }; + +std::string token_name(Token::Type tokentype);