parsing arithmetic expressions generates an AST

This commit is contained in:
Shautvast 2024-10-22 09:22:32 +02:00
parent 89968525df
commit 382aa16635
7 changed files with 275 additions and 139 deletions

View file

@ -1,12 +1,13 @@
#include "parser.hpp"
#include "scanner.hpp" #include "scanner.hpp"
#include "tokens.hpp" #include "tokens.hpp"
#include <cstdbool> #include <cstdbool>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <list>
#include <string> #include <string>
#include <vector>
void print_tokens(std::list<Token> *list); void print_tokens(std::vector<Token> *list);
int run_file(std::string file); int run_file(std::string file);
void run_prompt(void); void run_prompt(void);
ScanResult run(std::string source); ScanResult run(std::string source);
@ -43,11 +44,16 @@ void run_prompt(void) {
for (;;) { for (;;) {
std::cout << ">"; std::cout << ">";
std::cin >> line;
std::getline(std::cin, line);
ScanResult scan_result = run(line.substr(0, line.length())); ScanResult scan_result = run(line.substr(0, line.length()));
// print_tokens(&scan_result.token_list);
print_tokens(&scan_result.token_list); if (!scan_result.had_error) {
Expression *e = (new Parser())->parse(scan_result.token_list);
std::cout << e->to_string();
std::cout << "\n";
}
} }
} }
@ -56,8 +62,8 @@ ScanResult run(std::string source) {
return scanner->scan_tokens(); return scanner->scan_tokens();
} }
void print_tokens(std::list<Token> *list) { void print_tokens(std::vector<Token> *list) {
for (std::list<Token>::iterator token = list->begin(); token != list->end(); for (std::vector<Token>::iterator token = list->begin(); token != list->end();
++token) { ++token) {
std::cout << token->to_string() << "(" << token->literal << "), "; std::cout << token->to_string() << "(" << token->literal << "), ";
} }

View file

@ -1,111 +1,7 @@
// #include "tokens.hpp" #include "parser.hpp"
// #include <stdbool.h>
// static const TokenType eq[] = {BANG_EQUAL, EQUAL_EQUAL}; Expression *Parser::parse(std::vector<Token> tokenlist) {
// static const TokenType comp[] = {GREATER, GREATER_EQUAL, LESS, LESS_EQUAL}; tokens = tokenlist;
// static const TokenType unarytokens[] = {BANG, MINUS}; current_token = 0;
return expression();
// // static int current = -1; }
// // TokenList *tokens;
// class Expression;
// class Binary {
// public:
// Expression *left;
// Token *operator;
// Expression *right;
// };
// typedef struct Grouping {
// Expression *expr;
// } Grouping;
// typedef struct Unary {
// Token *operator;
// Expression *expr;
// } Unary;
// class Expression {
// public:
// enum ExprType { Bin, Grp, Lit, Una } type;
// union Expr {
// Binary *binary;
// Grouping *grouping;
// Token *literal;
// Unary *unary;
// } expr;
// };
// void parse(TokenList *tokenlist) {
// tokens = tokenlist;
// current = 0;
// }
// static Token *peek(void) { return tokenlist_get(tokens, current); }
// static bool is_at_end(void) { return peek()->type == END_OF_FILE; }
// static bool check(TokenType type) {
// if (is_at_end()) {
// return false;
// }
// return peek()->type == type;
// }
// static bool match(const TokenType tokens[], int n) {
// for (int i = 0; i < n; i++) {
// if (check(tokens[i])) {
// }
// }
// return false;
// }
// static Token previous(void) {
// Token t = {NUMBER, "", 0, 0};
// return t;
// }
// static Expression unary(void) {
// if (match(unarytokens, 2)) {
// Token operator= previous();
// Expression right = unary();
// Expression un;
// un.type = Una;
// Unary new_una = {&operator, & right };
// un.expr.unary = &new_una;
// return un;
// }
// return primary();
// }
// static Expression factor(void) {}
// static Expression term(void) { Expression expr = factor(); }
// static Expression comparison(void) {
// Expression expr = term();
// while (match(comp, 4)) {
// Token operator= previous();
// Expression right = term();
// Binary new_bin = {&expr, &operator, & right };
// expr.expr.binary = &new_bin;
// expr.type = Bin;
// }
// return expr;
// }
// static Expression equality(void) {
// Expression expr = comparison();
// while (match(eq, 2)) {
// Token operator= previous();
// Expression right = comparison();
// Binary new_bin = {&expr, &operator, & right };
// expr.expr.binary = &new_bin;
// expr.type = Bin;
// }
// return expr;
// }
// // static Expression expression(void) { return equality(); }

232
src/parser.hpp Normal file
View file

@ -0,0 +1,232 @@
#pragma once
#include "tokens.hpp"
#include <cstdarg>
#include <iostream>
#include <vector>
enum class ExprType { Binary, Grouping, Unary, Literal, None };
class Expression {
public:
virtual ExprType type() { return ExprType::None; };
virtual std::string to_string() { return "Expression()"; };
virtual ~Expression() {}
};
class Binary : public Expression {
public:
ExprType type() override { return ExprType::Binary; }
std::string to_string() override {
return "(" + token_name(op->tokentype) + " " + left->to_string() + " " +
right->to_string() + ")";
}
Expression *left;
Token *op;
Expression *right;
Binary(Expression *_left, Token *_operator, Expression *_right)
: left(_left), op(_operator), right(_right){};
~Binary() override {
delete left;
delete right;
delete op;
}
};
class Grouping : public Expression {
public:
ExprType type() override { return ExprType::Grouping; }
std::string to_string() override { return "(" + expr->to_string() + ")"; }
Expression *expr;
Grouping(Expression *_expr) : expr(_expr){};
~Grouping() override { delete expr; }
};
class Unary : public Expression {
public:
ExprType type() override { return ExprType::Unary; }
std::string to_string() override {
return token_name(op->tokentype) + right->to_string();
}
Token *op;
Expression *right;
Unary(Token *_operator, Expression *_right) : op(_operator), right(_right){};
~Unary() override {
delete right;
delete op;
}
};
class Void {};
class Literal : public Expression {
public:
ExprType type() override { return ExprType::Literal; }
std::string to_string() override {
std::string text;
switch (valuetype) {
case String:
text = "\"" + value.str + "\"";
break;
case Numeric:
text = std::to_string(value.numeric);
break;
case Boolean:
text = value.boolean ? "True" : "False";
break;
case Nil:
text = "NULL";
break;
}
return text;
}
enum ValueType { String, Numeric, Boolean, Nil } valuetype;
union Value {
double_t numeric;
bool boolean;
std::string str;
Void dummy;
Value(double_t _numeric) : numeric(_numeric) {}
Value(bool _boolean) : boolean(_boolean) {}
Value(std::string _str) : str(_str) {}
Value(Void v) : dummy(v) {}
~Value() {}
} value;
Literal(Void v) : valuetype(ValueType::Nil), value(v) {}
Literal(double_t _numeric) : valuetype(ValueType::Numeric), value(_numeric) {}
Literal(std::string _str) : valuetype(ValueType::String), value(_str) {}
Literal(bool _boolean) : valuetype(ValueType::Boolean), value(_boolean) {}
};
class Parser {
private:
std::vector<Expression> expressions;
std::vector<Token> tokens;
int current_token;
Token peek() { return tokens[current_token]; };
bool is_at_end() { return peek().tokentype == Token::END_OF_FILE; };
Token *previous() { return &tokens[current_token - 1]; };
Token *advance() {
if (!is_at_end())
current_token += 1;
return previous();
}
bool check(Token::Type type) {
if (is_at_end()) {
return false;
}
return peek().tokentype == type;
}
bool match(int count, ...) {
va_list list;
va_start(list, count);
for (int i = 0; i < count; i++) {
Token::Type ttc = va_arg(list, Token::Type);
// std::cout << token_name(ttc) << "\n";
if (check(ttc)) {
advance();
return true;
}
}
return false;
};
Token *consume(Token::Type typ, std::string message) {
if (check(typ)) {
return advance();
}
throw error(peek(), message);
}
std::runtime_error error(Token token, std::string message) {
std::cout << token.to_string() << " " << message;
return std::runtime_error(message);
}
Expression *primary() {
if (match(1, Token::Type::FALSE))
return new Literal(false);
if (match(1, Token::Type::TRUE))
return new Literal(true);
if (match(1, Token::Type::NIL))
return new Literal(new Void());
if (match(1, Token::Type::NUMBER)) {
return new Literal(std::stod(previous()->literal));
}
if (match(1, Token::Type::STRING)) {
return new Literal(previous()->literal);
}
if (match(1, Token::Type::LEFT_PAREN)) {
Expression *e = expression();
consume(Token::Type::RIGHT_PAREN, "Expect ')'.");
return new Grouping(e);
}
throw std::runtime_error("Expected an expression");
}
public:
Expression *parse(std::vector<Token> tokenlist);
Expression *unary() {
if (match(2, Token::BANG, Token::Type::MINUS)) {
Token *op = previous();
Expression *right = unary();
return new Unary(op, right);
}
return primary();
}
Expression *expression() { return equality(); }
Expression *factor() {
Expression *expr = unary();
while (match(2, Token::Type::SLASH, Token::Type::STAR)) {
Token *op = previous();
Expression *right = unary();
expr = new Binary(expr, op, right);
}
return expr;
}
Expression *term() {
Expression *expr = factor();
while (match(2, Token::Type::MINUS, Token::Type::PLUS)) {
Token *op = previous();
Expression *right = unary();
expr = new Binary(expr, op, right);
}
return expr;
}
Expression *equality(void) {
Expression *expr = comparison();
while (match(2, Token::Type::BANG_EQUAL, Token::Type::BANG_EQUAL)) {
Token *op = previous();
Expression *right = comparison();
return new Binary(expr, op, right);
}
return expr;
}
Expression *comparison(void) {
Expression *expr = term();
while (match(4, Token::Type::GREATER, Token::Type::GREATER_EQUAL,
Token::Type::LESS, Token::Type::LESS_EQUAL)) {
Token *op = previous();
Expression *right = term();
expr = new Binary(expr, op, right);
}
return expr;
}
};

View file

@ -2,9 +2,9 @@
#include "tokens.hpp" #include "tokens.hpp"
#include <cstdbool> #include <cstdbool>
#include <iostream> #include <iostream>
#include <list>
#include <map> #include <map>
#include <string> #include <string>
#include <vector>
static const std::map<std::string, Token::Type> keywords = { static const std::map<std::string, Token::Type> keywords = {
{"and", Token::Type::AND}, {"class", Token::Type::CLASS}, {"and", Token::Type::AND}, {"class", Token::Type::CLASS},
@ -19,7 +19,7 @@ static const std::map<std::string, Token::Type> keywords = {
Scanner::Scanner(std::string s) Scanner::Scanner(std::string s)
: had_error(false), current_pos(0), start(0), current_line(1), source(s), : had_error(false), current_pos(0), start(0), current_line(1), source(s),
token_list(std::list<Token>()) {} token_list(std::vector<Token>()) {}
ScanResult Scanner::scan_tokens() { ScanResult Scanner::scan_tokens() {
while (current_pos < source.length()) { while (current_pos < source.length()) {
@ -35,15 +35,13 @@ ScanResult Scanner::scan_tokens() {
} }
void Scanner::add_token(Token::Type type) { void Scanner::add_token(Token::Type type) {
Token token = Token token = Token(type, "", "", current_line);
Token(type, source.substr(start, current_pos), "", current_line); token_list.push_back(token);
token_list.push_front(token);
} }
void Scanner::add_token_with_literal(Token::Type type, std::string literal) { void Scanner::add_token(Token::Type type, std::string literal) {
Token token = Token token = Token(type, literal, literal, current_line);
Token(type, source.substr(start, current_pos), literal, current_line); token_list.push_back(token);
token_list.push_front(token);
} }
char Scanner::advance() { char Scanner::advance() {
@ -91,6 +89,9 @@ void Scanner::scan_token() {
case '<': case '<':
add_token(match('=') ? Token::Type::LESS_EQUAL : Token::Type::LESS); add_token(match('=') ? Token::Type::LESS_EQUAL : Token::Type::LESS);
break; break;
case '*':
add_token(Token::Type::STAR);
break;
case '/': case '/':
if (match('/')) { if (match('/')) {
while (peek() != '\n' && !is_at_end()) { while (peek() != '\n' && !is_at_end()) {
@ -127,13 +128,12 @@ void Scanner::identifier() {
advance(); advance();
} }
std::string text = source.substr(start + 1, current_pos - start); std::string text = source.substr(start, current_pos - start);
auto it = keywords.find(text); auto it = keywords.find(text);
if (it != keywords.end()) { if (it != keywords.end()) {
add_token(it->second); add_token(it->second, text);
} else { } else {
add_token(Token::Type::IDENTIFIER); add_token(Token::Type::IDENTIFIER, text);
} }
} }
@ -144,8 +144,7 @@ void Scanner::number() {
advance(); advance();
while (is_digit(peek())) while (is_digit(peek()))
advance(); advance();
add_token_with_literal(Token::Type::NUMBER, add_token(Token::Type::NUMBER, source.substr(start, current_pos - start));
source.substr(start + 1, current_pos - start));
} }
bool Scanner::is_digit(char c) { return c >= '0' && c <= '9'; } bool Scanner::is_digit(char c) { return c >= '0' && c <= '9'; }
@ -165,7 +164,7 @@ void Scanner::string() {
advance(); advance();
std::string string = source.substr(start + 1, current_pos - start - 2); std::string string = source.substr(start + 1, current_pos - start - 2);
add_token_with_literal(Token::Type::STRING, string); add_token(Token::Type::STRING, string);
} }
bool Scanner::match(char expected) { bool Scanner::match(char expected) {

View file

@ -2,12 +2,12 @@
#include "tokens.hpp" #include "tokens.hpp"
#include <cstdbool> #include <cstdbool>
#include <list>
#include <string> #include <string>
#include <vector>
typedef struct { typedef struct {
bool had_error; bool had_error;
std::list<Token> token_list; std::vector<Token> token_list;
} ScanResult; } ScanResult;
class Scanner { class Scanner {
@ -17,13 +17,13 @@ private:
int start; int start;
int current_line; int current_line;
std::string source; std::string source;
std::list<Token> token_list; std::vector<Token> token_list;
public: public:
Scanner(std::string s); Scanner(std::string s);
ScanResult scan_tokens(); ScanResult scan_tokens();
void add_token(Token::Type type); void add_token(Token::Type type);
void add_token_with_literal(Token::Type type, std::string literal); void add_token(Token::Type type, std::string literal);
char advance(); char advance();
void scan_token(); void scan_token();
void identifier(); void identifier();

View file

@ -4,7 +4,7 @@ Token::Token(Token::Type _tokentype, std::string _lexeme, std::string _literal,
int _line) int _line)
: lexeme(_lexeme), literal(_literal), line(_line), tokentype(_tokentype) {} : lexeme(_lexeme), literal(_literal), line(_line), tokentype(_tokentype) {}
std::string Token::to_string() { std::string token_name(Token::Type tokentype) {
static const std::string tokens[] = { static const std::string tokens[] = {
"END_OF_FILE", "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "END_OF_FILE", "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE",
"COMMA", "DOT", "MINUS", "PLUS", "SEMICOLON", "COMMA", "DOT", "MINUS", "PLUS", "SEMICOLON",
@ -14,6 +14,7 @@ std::string Token::to_string() {
"ELSE", "FALSE", "FUN", "FOR", "IF", "ELSE", "FALSE", "FUN", "FOR", "IF",
"NIL", "OR", "PRINT", "RETURN", "SUPER", "NIL", "OR", "PRINT", "RETURN", "SUPER",
"THIS", "TRUE", "VAR", "WHILE"}; "THIS", "TRUE", "VAR", "WHILE"};
return tokens[(int)tokentype]; return tokens[(int)tokentype];
} }
std::string Token::to_string() { return token_name(tokentype); }

View file

@ -54,3 +54,5 @@ public:
Token(Token::Type _tokentype, std::string _lexeme, std::string _literal, Token(Token::Type _tokentype, std::string _lexeme, std::string _literal,
int line); int line);
}; };
std::string token_name(Token::Type tokentype);