From 5df54044f8de9dda845f64c5840659fef1f01ce5 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Wed, 11 Dec 2024 14:16:44 +0100 Subject: [PATCH] back to C --- .idea/.gitignore | 8 - .idea/clox.iml | 9 - .idea/misc.xml | 6 - .idea/modules.xml | 8 - .idea/vcs.xml | 6 - compile_commands.json | 19 --- makefile | 18 +- src/error.hpp | 29 ---- src/lox.c | 85 ++++++++++ src/lox.cpp | 78 --------- src/parser.c | 383 ++++++++++++++++++++++++++++++++++++++++++ src/parser.cpp | 212 ----------------------- src/parser.h | 30 ++++ src/parser.hpp | 115 ------------- src/scanner.c | 240 ++++++++++++++++++++++++++ src/scanner.cpp | 211 ----------------------- src/scanner.h | 45 +++++ src/scanner.hpp | 42 ----- src/tokens.c | 52 ++++++ src/tokens.cpp | 19 --- src/tokens.h | 87 ++++++++++ src/tokens.hpp | 58 ------- src/utils.c | 19 +++ src/utils.h | 6 + 24 files changed, 958 insertions(+), 827 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/clox.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml delete mode 100644 compile_commands.json delete mode 100644 src/error.hpp create mode 100644 src/lox.c delete mode 100644 src/lox.cpp create mode 100644 src/parser.c delete mode 100644 src/parser.cpp create mode 100644 src/parser.h delete mode 100644 src/parser.hpp create mode 100644 src/scanner.c delete mode 100644 src/scanner.cpp create mode 100644 src/scanner.h delete mode 100644 src/scanner.hpp create mode 100644 src/tokens.c delete mode 100644 src/tokens.cpp create mode 100644 src/tokens.h delete mode 100644 src/tokens.hpp create mode 100644 src/utils.c create mode 100644 src/utils.h diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/clox.iml b/.idea/clox.iml deleted file mode 100644 index d6ebd48..0000000 --- a/.idea/clox.iml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a16d8e7..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index a28bd99..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/compile_commands.json b/compile_commands.json deleted file mode 100644 index 0dbb570..0000000 --- a/compile_commands.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - { - "arguments": [ - "/usr/bin/clang++", - "-c", - "-std=c++23", - "-Wall", - "-Wextra", - "-pedantic", - "-Werror", - "-o", - "target/lox.cpp.o", - "src/lox.cpp" - ], - "directory": "/Users/Shautvast/dev/clox", - "file": "/Users/Shautvast/dev/clox/src/lox.cpp", - "output": "/Users/Shautvast/dev/clox/target/lox.cpp.o" - } -] diff --git a/makefile b/makefile index bbe2515..6ccd569 100644 --- a/makefile +++ b/makefile @@ -1,23 +1,27 @@ TARGET := ./target SRC := ./src -CC := clang++ -c -std=c++23 -Wall -Wextra -pedantic -Werror +CC := clang -c -std=c17 +#-Wall -Wextra -pedantic -Werror SRCS := $(shell find $(SRC) -name '*.c') OBJS := $(SRCS:%=$(TARGET)/%.o) -$(TARGET)/lox: $(TARGET)/lox.cpp.o $(TARGET)/parser.cpp.o $(TARGET)/scanner.cpp.o $(TARGET)/tokens.cpp.o $(TARGET)/parser.cpp.o - clang++ $(TARGET)/lox.cpp.o -L$(TARGET) -lscanner.cpp.o -ltokens.cpp.o -lparser.cpp.o -o $(TARGET)/lox +$(TARGET)/lox: $(TARGET)/lox.c.o $(TARGET)/parser.c.o $(TARGET)/scanner.c.o $(TARGET)/tokens.c.o $(TARGET)/utils.c.o + clang $(TARGET)/lox.c.o -L$(TARGET) -lparser.c.o -ltokens.c.o -lscanner.c.o -lutils.c.o -o $(TARGET)/lox -$(TARGET)/tokens.cpp.o: $(SRC)/tokens.cpp +$(TARGET)/utils.c.o: $(SRC)/utils.c $(CC) $< -o $@ -$(TARGET)/scanner.cpp.o: $(SRC)/scanner.cpp +$(TARGET)/tokens.c.o: $(SRC)/tokens.c $(CC) $< -o $@ -$(TARGET)/parser.cpp.o: $(SRC)/parser.cpp +$(TARGET)/scanner.c.o: $(SRC)/scanner.c $(CC) $< -o $@ -$(TARGET)/lox.cpp.o: $(SRC)/lox.cpp $(TARGET) +$(TARGET)/parser.c.o: $(SRC)/parser.c + $(CC) $< -o $@ + +$(TARGET)/lox.c.o: $(SRC)/lox.c $(TARGET) $(CC) $< -o $@ $(TARGET): diff --git a/src/error.hpp b/src/error.hpp deleted file mode 100644 index 8beae48..0000000 --- a/src/error.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include - -class Error { - -public: - std::string message; - Error(std::string _message) : message(_message){}; -}; - -class Void {}; - -template using Result = std::variant; - -template bool is_err(Result r) { - return std::holds_alternative(r); -} -template bool is_ok(Result r) { - return std::holds_alternative(r); -} - -template R Ok(Result r) { return std::get(r); } -/// enables rewrapping errors in a new Result type -template Error Err(Result r) { return std::get(r); } -template std::string err_msg(Result r) { - return std::get(r).message; -} diff --git a/src/lox.c b/src/lox.c new file mode 100644 index 0000000..88c5b6d --- /dev/null +++ b/src/lox.c @@ -0,0 +1,85 @@ +#include "parser.h" +#include "scanner.h" +#include "utils.h" +#include +#include +#include +#include + +int run_file(char *file); +void run_prompt(void); +void run(char *source); + +int main(int argc, char *argv[]) { + setvbuf(stdout, NULL, _IONBF, 0); + if (argc > 2) { + puts("Usage: lox [script]"); + return EXIT_FAILURE; + } else if (argc == 2) { + return run_file(argv[1]); + } else { + run_prompt(); + } + return EXIT_SUCCESS; +} + +int run_file(char *filename) { + + FILE *file = fopen(filename, "r"); + if (file == NULL) { + printf("unable to open file '%s'\n", filename); + return EXIT_FAILURE; + } + char line[255]; + + char *content = malloc(1); + if (content == NULL) { + puts("Out of memory"); + return EXIT_FAILURE; + } + content[0] = '\0'; + + while (fgets(line, sizeof(line), file)) { + content = realloc(content, strlen(content) + strlen(line) + 1); + if (content == NULL) { + return EXIT_FAILURE; + } + strcat(content, line); + } + + fclose(file); + + run(content); + + // FREE UP + free(content); + + // if (scan_result.had_error) { + // return 65; + // } + + return EXIT_SUCCESS; +} + +void run_prompt(void) { + char line[255]; + + for (;;) { + printf(">"); + char *r = fgets(line, 255, stdin); + + if (r == NULL) { + break; + } + + int len = (int)strlen(line); + run(substring(line, 1, len - 1)); + } +} + +void run(char *source) { + ScanResult scan_result = scan_tokens(source); + // tokenlist_print(&scan_result.token_list); + ExpressionList list = parse(&scan_result.token_list); + exprlist_print(&list); +} diff --git a/src/lox.cpp b/src/lox.cpp deleted file mode 100644 index 0e8212c..0000000 --- a/src/lox.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#include "parser.hpp" -#include "scanner.hpp" -#include "tokens.hpp" -#include -#include -#include -#include -#include - -using std::string, std::vector, std::ifstream, std::cin, std::cout; - -void print_tokens(vector *list); -int run_file(string file); -void run_prompt(void); -Result> run(string source); - -int main(int argc, char *argv[]) { - if (argc > 2) { - puts("Usage: lox [script]"); - return EXIT_FAILURE; - } else if (argc == 2) { - return run_file(argv[1]); - } else { - run_prompt(); - } - return EXIT_SUCCESS; -} - -int run_file(string filename) { - string content; - ifstream file; - file.open(filename); - if (file.is_open()) { - file >> content; - } else { - exit(1); - } - - Result> scan_result = run(content); - - return EXIT_SUCCESS; -} - -void run_prompt(void) { - string line; - - for (;;) { - cout << ">"; - getline(cin, line); - auto scan_result = run(line.substr(0, line.length())); - - if (is_ok(scan_result)) { - auto tokens = Ok(scan_result); - print_tokens(&tokens); - auto expression = (new Parser())->parse(get>(scan_result)); - if (is_ok(expression)) { - cout << Ok(expression)->as_string() << "\n"; - } else { - cout << err_msg(expression) << "\n"; - } - } else { - cout << err_msg(scan_result) << "\n"; - } - } -} - -Result> run(string source) { - Scanner *scanner = new Scanner(source); - return scanner->scan_tokens(); -} - -void print_tokens(vector *const list) { - for (Token token : *list) { - cout << token.as_string() << "(" << token.literal << "), "; - } - - cout << "\n"; -} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..1ed97e8 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,383 @@ +#include "parser.h" +#include "tokens.h" +#include +#include +#include +#include + +bool is_at_end(void); +Token *peek(void); +size_t expr_toString(const Expression *expr, char *output, size_t outputSize, + size_t offset); +void expr_print(const Expression *expr); +Expression *declaration(void); +bool match1(TokenType t); +bool match2(TokenType type1, TokenType type2); +bool match3(TokenType type1, TokenType type2, TokenType type3); +bool match4(TokenType type1, TokenType type2, TokenType type3, TokenType type4); +bool check(TokenType t); +Token *advance(void); +Token *previous(void); +Expression *var_declaration(void); +Expression *expression(void); +Expression *statement(void); +Expression *printStatement(void); +ExpressionList block(void); +Expression *expressionStatement(void); +Expression *assignment(void); +Expression *equality(void); +Expression *comparison(void); +Expression *term(void); +Expression *factor(void); +Expression *unary(void); +Expression *primary(void); +Expression *newExpression(char *type); +Token *consume(TokenType type, char *message); + +static TokenList *tokens; +static int current; + +ExpressionList parse(TokenList *tokens_to_parse) { + ExpressionList statements; + exprlist_init(&statements); + + tokens = tokens_to_parse; + current = 0; + + while (!is_at_end()) { + exprlist_add(&statements, declaration()); + } + return statements; +} + +Expression *declaration(void) { + if (match1(VAR)) { + return var_declaration(); + } else { + return statement(); + } +} + +Expression *var_declaration(void) { + Token *name = consume(IDENTIFIER, "Expected a variable name"); + Expression *initializer; + if (match1(EQUAL)) { + initializer = expression(); + } + consume(SEMICOLON, "Expected semicolon"); + Expression *variableStatement = newExpression("VariableStatement"); + variableStatement->name = name->lexeme; + variableStatement->left = initializer; + return variableStatement; +} + +Expression *statement(void) { + if (match1(PRINT)) { + return printStatement(); + } + if (match1(LEFT_BRACE)) { + ExpressionList block_contents = block(); + Expression *block = newExpression("Block"); + block->value = &block_contents; + return block; + } + + return expressionStatement(); +} + +ExpressionList block(void) { + ExpressionList statements; + exprlist_init(&statements); + while (!check(RIGHT_BRACE) && !is_at_end()) { + exprlist_add(&statements, declaration()); + } + return statements; +} + +Expression *printStatement(void) { + Expression *value = expression(); + consume(SEMICOLON, "Expected semicolon"); + Expression *print = newExpression("PrintStmt"); + print->left = value; + return print; +} + +Expression *expressionStatement(void) { + Expression *value = expression(); + consume(SEMICOLON, "Expected semicolon"); + Expression *statement = newExpression("ExprStmt"); + statement->value = &value; + return statement; +} + +Expression *expression(void) { return assignment(); } + +Expression *assignment(void) { + Expression *expr = equality(); + if (match1(EQUAL)) { + Token *equals = previous(); + Expression *value = assignment(); + if (strcmp(expr->type, "Variable") == 0) { + Expression *assign = newExpression("AssignStmt"); + assign->name = expr->name; + assign->value = &value; + return assign; + } + Expression *error = newExpression("Error"); + error->operator= equals; + return error; + } + return expr; +} + +Expression *equality(void) { + Expression *expr = comparison(); + while (match2(BANG_EQUAL, EQUAL_EQUAL)) { + Token *operator= previous(); + Expression *right = comparison(); + Expression *binary = newExpression("BinaryExpr"); + binary->operator= operator; + binary->left = expr; + binary->right = right; + expr = binary; + } + return expr; +} + +Expression *comparison(void) { + Expression *expr = term(); + while (match4(GREATER, GREATER_EQUAL, LESS, LESS_EQUAL)) { + Token *operator= previous(); + Expression *right = term(); + Expression *binary = newExpression("BinaryExpr"); + binary->operator= operator; + binary->left = expr; + binary->right = right; + expr = binary; + } + return expr; +} + +Expression *term(void) { + Expression *expr = factor(); + while (match2(MINUS, PLUS)) { + Token *operator= previous(); + Expression *right = factor(); + Expression *binary = newExpression("BinaryExpr"); + binary->operator= operator; + binary->left = expr; + binary->right = right; + expr = binary; + } + return expr; +} + +Expression *factor(void) { + Expression *expr = unary(); + while (match2(SLASH, STAR)) { + Token *operator= previous(); + Expression *right = unary(); + Expression *binary = newExpression("BinaryExpr"); + binary->operator= operator; + binary->left = expr; + binary->right = right; + expr = binary; + } + return expr; +} + +Expression *unary(void) { + if (match2(BANG, MINUS)) { + Token *operator= previous(); + Expression *right = unary(); + Expression *unary = newExpression("Unary"); + unary->operator= operator; + unary->right = right; + return unary; + } + return primary(); +} + +Expression *primary(void) { + Expression *r = newExpression("Literal"); + if (match1(FALSE)) { + r->name = "boolean"; + r->value = "false"; + return r; + } + if (match1(TRUE)) { + r->name = "boolean"; + r->value = "true"; + return r; + } + + if (match1(NIL)) { + r->name = "nil"; + r->value = NULL; + return r; + } + + if (check(NUMBER)) { + advance(); + r->name = "number"; + r->value = previous()->literal; + return r; + } + if (check(STRING)) { + advance(); + r->name = "string"; + r->value = previous()->literal; + return r; + } + if (match1(IDENTIFIER)) { + Expression *var = newExpression("Variable"); + r->name = previous()->lexeme; + return var; + } + if (match1(LEFT_PAREN)) { + Expression *group = newExpression("Group"); + consume(RIGHT_PAREN, "Expect ')' after expression."); + r->left = group; + return group; + } + + Expression *error = newExpression("Error"); + return error; +} + +Token *consume(TokenType type, char *message) { + if (check(type)) { + return advance(); + } + printf("error\n"); + Token *t = newToken(); + t->type = ERROR; + t->lexeme = message; + + tokenlist_add(tokens, t); + return tokenlist_last(tokens); +} + +bool match1(TokenType type) { + if (check(type)) { + advance(); + return true; + } else { + return false; + } +} + +bool match2(TokenType type1, TokenType type2) { + if (check(type1) || check(type2)) { + advance(); + return true; + } else { + return false; + } +} + +bool match3(TokenType type1, TokenType type2, TokenType type3) { + if (check(type1) || check(type2) || check(type3)) { + advance(); + return true; + } else { + return false; + } +} + +bool match4(TokenType type1, TokenType type2, TokenType type3, + TokenType type4) { + if (check(type1) || check(type2) || check(type3) || check(type4)) { + advance(); + return true; + } else { + return false; + } +} + +Token *advance(void) { + if (!is_at_end()) { + current += 1; + } + return previous(); +} + +Token *previous(void) { return tokenlist_get(tokens, current - 1); } + +bool check(TokenType type) { return peek()->type == type; } + +bool is_at_end(void) { return peek()->type == END_OF_FILE; } + +Token *peek(void) { return tokenlist_get(tokens, current); } + +void exprlist_init(ExpressionList *list) { + list->expressions = malloc(sizeof(Expression) * 32); + list->size = 0; + list->capacity = 32; +} + +void exprlist_add(ExpressionList *list, Expression *value) { + if (list->size >= list->capacity) { + list->capacity *= 2; + list->expressions = + realloc(list->expressions, sizeof(Expression) * list->capacity); + } + list->expressions[list->size] = value; + list->size += 1; +} + +Expression *exprlist_get(ExpressionList *list, int index) { + if (index >= list->size || index < 0) { + printf("Index %d out of bounds for list of size %d\n", index, list->size); + exit(1); + } + return list->expressions[index]; +} + +void exprlist_print(ExpressionList *list) { + for (int i = 0; i < list->size; i++) { + Expression *expr = exprlist_get(list, i); + expr_print(expr); + } + printf("\n"); +} + +void exprlist_free(ExpressionList *list) { free(list->expressions); } + +void expr_print(const Expression *expr) { + printf("Expr[type: %s", expr->type); + if (expr->left != NULL) { + printf(", left: "); + expr_print(expr->left); + } + if (expr->right != NULL) { + printf(", right: "); + expr_print(expr->right); + } + if (expr->operator!= NULL && expr->operator->lexeme != NULL) { + printf(", operator: %s", expr->operator->lexeme); + } + if (expr->name != NULL) { + printf(", name: %s", expr->name); + if (strcmp(expr->name, "string") == 0 || + strcmp(expr->name, "number") == 0 || + strcmp(expr->name, "boolean") == 0) { + printf(", value: %s", (char *)expr->value); + } + if (strcmp(expr->name, "nil") == 0 && expr->value == NULL) { + printf(", value: NULL"); + } + } + printf("]"); +} + +Expression *newExpression(char *type) { + Expression *e = malloc(sizeof(Expression)); + e->type = type; + e->left = NULL; + e->right = NULL; + e->name = NULL; + e->operator= NULL; + e->value = NULL; + return e; +} diff --git a/src/parser.cpp b/src/parser.cpp deleted file mode 100644 index 21f1b11..0000000 --- a/src/parser.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "parser.hpp" - -#include -#include - -using std::string, std::to_string, std::vector; - -Expression::~Expression() = default; - -// class Binary - -string Binary::as_string() const { - return "(" + token_name(op->tokentype) + " " + left->as_string() + " " + - right->as_string() + ")"; -} - -Binary::Binary(Expression *_left, Token *_operator, Expression *_right) - : left(_left), op(_operator), right(_right){}; - -Binary::~Binary() = default; - -// class Grouping - -string Grouping::as_string() const { return "(" + expr->as_string() + ")"; } - -Grouping::Grouping(Expression *_expr) : expr(_expr){}; - -Grouping::~Grouping() = default; - -// class Unary - -string Unary::as_string() const { - return token_name(op->tokentype) + right->as_string(); -} - -Unary::Unary(Token *_operator, Expression *_right) - : op(_operator), right(_right){}; - -Unary::~Unary() = default; - -// class Literal -string Literal::as_string() const { - string text; - if (holds_alternative(value)) { - return "\"" + get(value) + "\""; - } - if (holds_alternative(value)) { - return to_string(get(value)); - } - if (holds_alternative(value)) { - return get(value) ? "True" : "False"; - } - if (holds_alternative(value)) { - return "NULL"; - } - return "unexpected"; -} - -// class Parser -Result Parser::parse(vector tokenlist) { - tokens = tokenlist; - current_token = 0; - return expression(); -} - -Token Parser::peek() { return tokens[current_token]; }; - -bool Parser::is_at_end() { return peek().tokentype == Token::END_OF_FILE; }; - -Token *Parser::previous() { return &tokens[current_token - 1]; }; - -Token *Parser::advance() { - if (!is_at_end()) - current_token += 1; - return previous(); -} - -bool Parser::check(Token::Type type) { - if (is_at_end()) { - return false; - } - return peek().tokentype == type; -} - -bool Parser::match(int count, ...) { - va_list list; - va_start(list, count); - - for (int i = 0; i < count; i++) { - Token::Type ttc = va_arg(list, Token::Type); - // cout << token_name(ttc) << "\n"; - if (check(ttc)) { - advance(); - return true; - } - } - return false; -}; - -Result Parser::consume(Token::Type typ, string message) { - if (check(typ)) { - return advance(); - } - return error(peek(), message); -} - -Error Parser::error(Token token, string message) { - std::cout << token.as_string() << " " << message; - return Error(message); // TODO no exceptions -} - -Result Parser::primary() { - if (match(1, Token::Type::FALSE)) - return new Literal(false); - if (match(1, Token::Type::TRUE)) - return new Literal(true); - if (match(1, Token::Type::NIL)) - return new Literal(new NilType()); - if (match(1, Token::Type::NUMBER)) { - return new Literal(stod(previous()->literal)); - } - if (match(1, Token::Type::STRING)) { - return new Literal(previous()->literal); - } - if (match(1, Token::Type::LEFT_PAREN)) { - auto expr = expression(); - Result r = consume(Token::Type::RIGHT_PAREN, "Expect ')'."); - if (is_err(r)) { - return Err(r); - } - return new Grouping(Ok(expr)); - } - return Error("Expected an expression"); -} - -Result Parser::unary() { - if (match(2, Token::BANG, Token::Type::MINUS)) { - Token *op = previous(); - Result right = unary(); - if (is_ok(right)) { - return new Unary(op, Ok(right)); - } - } - return primary(); -} - -Result Parser::expression() { return equality(); } - -Result Parser::factor() { - Result expr = unary(); - if (is_err(expr)) { - return expr; - } - while (match(2, Token::Type::SLASH, Token::Type::STAR)) { - Token *op = previous(); - auto right = unary(); - if (is_err(right)) { - return right; - } - expr = new Binary(Ok(expr), op, Ok(right)); - } - return expr; -} - -Result Parser::term() { - auto expr = factor(); - if (is_err(expr)) { - return expr; - } - while (match(2, Token::Type::MINUS, Token::Type::PLUS)) { - Token *op = previous(); - auto right = factor(); - if (is_err(right)) { - return right; - } - expr = new Binary(Ok(expr), op, Ok(right)); - } - return expr; -} - -Result Parser::equality(void) { - auto expr = comparison(); - if (is_err(expr)) { - return expr; - } - while (match(2, Token::Type::BANG_EQUAL, Token::Type::BANG_EQUAL)) { - Token *op = previous(); - auto right = comparison(); - if (is_err(right)) { - return right; - } - return new Binary(Ok(expr), op, Ok(right)); - } - return expr; -} - -Result Parser::comparison(void) { - auto expr = term(); - if (is_err(expr)) { - return expr; - } - while (match(4, Token::Type::GREATER, Token::Type::GREATER_EQUAL, - Token::Type::LESS, Token::Type::LESS_EQUAL)) { - Token *op = previous(); - auto right = term(); - if (is_err(right)) { - return right; - } - expr = new Binary(Ok(expr), op, Ok(right)); - } - return expr; -} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..a6c98fe --- /dev/null +++ b/src/parser.h @@ -0,0 +1,30 @@ +#include "tokens.h" + +typedef struct Expression Expression; + +struct Expression { + char *type; + Expression *left; + Expression *right; + Token *operator; + char *name; + void *value; +}; + +typedef struct ExpressionList { + Expression **expressions; + int size; + int capacity; +} ExpressionList; + +ExpressionList parse(TokenList *tokens); + +void exprlist_init(ExpressionList *list); + +void exprlist_add(ExpressionList *list, Expression *value); + +Expression *exprlist_get(ExpressionList *list, int index); + +void exprlist_print(ExpressionList *list); + +void exprlist_free(ExpressionList *list); diff --git a/src/parser.hpp b/src/parser.hpp deleted file mode 100644 index a4804b6..0000000 --- a/src/parser.hpp +++ /dev/null @@ -1,115 +0,0 @@ -#pragma once - -#include "error.hpp" -#include "tokens.hpp" -#include -#include -#include - -enum class ExprType { Binary, Grouping, Unary, Literal, None }; - -/// Base class for expressions -class Expression { -public: - virtual std::string as_string() const = 0; // get string rep for debugging - virtual ~Expression(); -}; - -/// An expression with two operands -class Binary : public Expression { - std::unique_ptr left; - std::unique_ptr op; - std::unique_ptr right; - -public: - std::string as_string() const override; - Binary(Expression *_left, Token *_operator, Expression *_right); - ~Binary(); -}; - -/// An expression between parentheses -class Grouping : public Expression { - std::unique_ptr expr; - -public: - std::string as_string() const override; - Grouping(Expression *_expr); - ~Grouping(); -}; - -/// An expression with one operand (operator is `-` or `!`) -class Unary : public Expression { - std::unique_ptr op; - std::unique_ptr right; - -public: - std::string as_string() const override; - Unary(Token *_operator, Expression *_right); - ~Unary(); -}; - -/// empty class that is the type of the Nil value -class NilType {}; -typedef std::variant Value; - -/// encapsulates a value: numeric, string etc -class Literal : public Expression { -public: - enum ValueType { String, Numeric, Boolean, Nil } valuetype; - - Value value; - - Literal(NilType v) : value(v){}; - Literal(double_t _numeric) : value(_numeric){}; - Literal(std::string _str) : value(_str){}; - Literal(bool _boolean) : value(_boolean){}; - - std::string as_string() const override; -}; - -class Parser { - std::vector expressions; - std::vector tokens; - int current_token; - - /// returns the current token without moving the pointer; - /// pointer here meanse index into the tokenlist. - Token peek(); - /// checks if the current token is EOF - bool is_at_end(); - /// returns the previous token without moving the pointer - Token *previous(); - /// increments the token pointer - Token *advance(); - /// checks if the current token is of specified type - bool check(Token::Type type); - /// checks if the current token is one of the specified types - /// count: the number of tokens to match - /// ... varargs argument for the tokens to match - bool match(int count, ...); - /// checks if the current token is of the specified type and - /// moves the token forward if so, otherwise throws an exception with - /// the specified message - Result consume(Token::Type typ, std::string message); - /// throws an exception for the specified token with the specified message - Error error(Token token, std::string message); - /// tries to parse the token as a primary value (string, number etc) - Result primary(); - /// tries to parse the tokens as a unary expression - Result unary(); - /// tries to parse the tokens - Result expression(); - /// tries to parse the tokens as a multiplication or division - Result factor(); - /// tries to parse the tokens as an addition or subtraction - Result term(); - /// tries to parse the tokens as an equality (`a == b` / `a!= b`) - Result equality(); - /// tries to parse the tokens as a comparison (`a > b` / `a >= b` / `a < b` - /// / `a <= b` ) - Result comparison(); - -public: - /// public method for parsing expressions - Result parse(std::vector tokenlist); -}; diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 0000000..70e8aef --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,240 @@ +#include "scanner.h" +#include "tokens.h" +#include "utils.h" +#include +#include +#include +#include + +static void scan_token(void); +static void error(char *message, char c); +static void report(char *where, char *message, char c); +static bool is_at_end(void); +static bool match(char expected); +static char peek(void); +static char peek_next(void); +static void string(void); +static bool is_digit(char c); +static void number(void); +static bool is_alpha(char c); +static bool is_alphanumeric(char c); +static void identifier(void); + +static bool had_error = false; +static int current_pos = -1; +static int start = -1; +static int current_line = -1; +static char *source; +static TokenList token_list; + +ScanResult scan_tokens(char *src) { + current_pos = 0; + start = 0; + current_line = 1; + source = src; + + tokenlist_init(&token_list); + int len = (int)strlen(source); + + while (current_pos < len) { + start = current_pos; + scan_token(); + } + Token *eof = newToken(); + eof->type = END_OF_FILE; + eof->lexeme = ""; + eof->literal = ""; + + tokenlist_add(&token_list, eof); + + ScanResult scan_result; + scan_result.token_list = token_list; + scan_result.had_error = had_error; + + // tokenlist_print(&scan_result.token_list); + + return scan_result; +} + +static void add_token(TokenType type) { + Token *token = newToken(); + token->type = type; + token->lexeme = substring(source, start + 1, current_pos - start); + token->literal = NULL; + token->line = current_line; + + tokenlist_add(&token_list, token); +} + +static void add_token_with_literal(TokenType type, char *literal) { + Token *token = newToken(); + token->type = type; + token->lexeme = substring(source, start + 1, current_pos - start); + token->literal = literal; + token->line = current_line; + + tokenlist_add(&token_list, token); +} + +static char advance(void) { + char c = source[current_pos++]; + return c; +} + +static void scan_token(void) { + char c = advance(); + + switch (c) { + case '(': + add_token(LEFT_PAREN); + break; + case ')': + add_token(RIGHT_PAREN); + break; + case '{': + add_token(LEFT_BRACE); + break; + case '}': + add_token(RIGHT_BRACE); + break; + case ',': + add_token(COMMA); + break; + case '.': + add_token(DOT); + break; + case '+': + add_token(PLUS); + break; + case '-': + add_token(MINUS); + break; + case '!': + add_token(match('=') ? BANG_EQUAL : BANG); + break; + case '=': + add_token(match('=') ? EQUAL_EQUAL : EQUAL); + break; + case '>': + add_token(match('=') ? GREATER_EQUAL : GREATER); + break; + case '<': + add_token(match('=') ? LESS_EQUAL : LESS); + break; + case '/': + if (match('/')) { + while (peek() != '\n' && !is_at_end()) { + advance(); + } + } else { + add_token(SLASH); + } + break; + case ' ': + case '\t': + case '\r': + break; + case '\n': + current_line += 1; + break; + case '"': + string(); + break; + case ';': + add_token(SEMICOLON); + break; + default: + if (is_digit(c)) { + number(); + } else if (is_alpha(c)) { + identifier(); + } else { + error("Unexpected character.", c); + } + break; + } +} + +static void identifier(void) { + while (is_alphanumeric(peek())) { + advance(); + } + + char *text = substring(source, start + 1, current_pos - start); + const TokenType *tokentype = get_keyword_token(text); + if (tokentype == NULL) { + add_token(IDENTIFIER); + } else { + add_token(*tokentype); + } +} + +static void number(void) { + while (is_digit(peek())) + advance(); + if (peek() == '.' && is_digit((peek_next()))) + advance(); + while (is_digit(peek())) + advance(); + add_token_with_literal(NUMBER, + substring(source, start + 1, current_pos - start)); +} + +bool is_digit(char c) { return c >= '0' && c <= '9'; } + +void string(void) { + while (peek() != '"' && !is_at_end()) { + if (peek() == '\n') + current_line += 1; + advance(); + } + + if (is_at_end()) { + error("Unterminated string.", '\0'); + return; + } + + advance(); + + char *string = substring(source, start + 2, current_pos - start - 2); + add_token_with_literal(STRING, string); +} + +static bool match(char expected) { + if (is_at_end()) { + return false; + } + if (expected != source[current_pos]) { + return false; + } + current_pos += 1; + return true; +} + +static char peek_next(void) { + if (current_pos + 1 >= (int)strlen(source)) { + return '\0'; + } + return source[current_pos + 1]; +} + +static char peek(void) { + if (is_at_end()) { + return '\0'; + } + return source[current_pos]; +} +static bool is_alpha(char c) { + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'); +} + +static bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); } + +static bool is_at_end(void) { return current_pos >= (int)strlen(source); } + +static void error(char *message, char c) { report("", message, c); } + +static void report(char *where, char *message, char c) { + printf("*[Line %i] Error %s : %s [%c]\n", current_line, where, message, c); + had_error = true; +} diff --git a/src/scanner.cpp b/src/scanner.cpp deleted file mode 100644 index 8b91cfe..0000000 --- a/src/scanner.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include "scanner.hpp" -#include "error.hpp" -#include "tokens.hpp" -#include -#include -#include -#include -#include - -using std::string, std::map, std::vector, std::cout; - -static const map keywords = { - {"and", Token::Type::AND}, {"class", Token::Type::CLASS}, - {"else", Token::Type::ELSE}, {"false", Token::Type::FALSE}, - {"for", Token::Type::FOR}, {"fun", Token::Type::FUN}, - {"if", Token::Type::IF}, {"nil", Token::Type::NIL}, - {"or", Token::Type::OR}, {"print", Token::Type::PRINT}, - {"return", Token::Type::RETURN}, {"super", Token::Type::SUPER}, - {"this", Token::Type::THIS}, {"true", Token::Type::TRUE}, - {"var", Token::Type::VAR}, {"while", Token::Type::WHILE}, -}; - -Scanner::Scanner(string _source) - : had_error(false), current_pos(0), start(0), current_line(1), - source(_source), token_list(vector()) {} - -Result> Scanner::scan_tokens() { - while (current_pos < source.length()) { - start = current_pos; - Result r = scan_token(); - if (is_err(r)) { - return Err(r); - } - } - return token_list; -} - -void Scanner::add_token(Token::Type type) { - Token token = Token(type, "", "", current_line); - token_list.push_back(token); -} - -void Scanner::add_token(Token::Type type, string literal) { - Token token = Token(type, literal, literal, current_line); - token_list.push_back(token); -} - -char Scanner::advance() { - char c = source.at(current_pos++); - return c; -} - -Result Scanner::scan_token() { - char c = advance(); - - switch (c) { - case '(': - add_token(Token::Type::LEFT_PAREN); - break; - case ')': - add_token(Token::Type::RIGHT_PAREN); - break; - case '{': - add_token(Token::Type::LEFT_BRACE); - break; - case '}': - add_token(Token::Type::RIGHT_BRACE); - break; - case ',': - add_token(Token::Type::COMMA); - break; - case '.': - add_token(Token::Type::DOT); - break; - case '+': - add_token(Token::Type::PLUS); - break; - case '-': - add_token(Token::Type::MINUS); - break; - case '!': - add_token(match('=') ? Token::Type::BANG_EQUAL : Token::Type::BANG); - break; - case '=': - add_token(match('=') ? Token::Type::EQUAL_EQUAL : Token::Type::EQUAL); - break; - case '>': - add_token(match('=') ? Token::Type::GREATER_EQUAL : Token::Type::GREATER); - break; - case '<': - add_token(match('=') ? Token::Type::LESS_EQUAL : Token::Type::LESS); - break; - case '*': - add_token(Token::Type::STAR); - break; - case '/': - if (match('/')) { - while (peek() != '\n' && !is_at_end()) { - advance(); - } - } else { - add_token(Token::Type::SLASH); - } - break; - case ' ': - case '\t': - case '\r': - break; - case '\n': - current_line += 1; - break; - case '"': - scan_string(); - break; - default: - if (is_digit(c)) { - number(); - } else if (is_alpha(c)) { - identifier(); - } else { - return Error{"Unexpected character."}; - } - break; - } - return Void{}; -} - -void Scanner::identifier() { - while (is_alphanumeric(peek())) { - advance(); - } - - string text = source.substr(start, current_pos - start); - auto it = keywords.find(text); - if (it != keywords.end()) { - add_token(it->second, text); - } else { - add_token(Token::Type::IDENTIFIER, text); - } -} - -void Scanner::number() { - while (is_digit(peek())) - advance(); - if (peek() == '.' && is_digit((peek_next()))) - advance(); - while (is_digit(peek())) - advance(); - add_token(Token::Type::NUMBER, source.substr(start, current_pos - start)); -} - -bool Scanner::is_digit(char c) { return c >= '0' && c <= '9'; } - -void Scanner::scan_string() { - while (peek() != '"' && !is_at_end()) { - if (peek() == '\n') - current_line += 1; - advance(); - } - - if (is_at_end()) { - report("Unterminated string."); - return; - } - - advance(); - - string s = source.substr(start + 1, current_pos - start - 2); - add_token(Token::Type::STRING, s); -} - -bool Scanner::match(char expected) { - if (is_at_end()) { - return false; - } - if (expected != source[current_pos]) { - return false; - } - current_pos += 1; - return true; -} - -char Scanner::peek_next() { - if (current_pos + 1 >= source.length()) { - return '\0'; - } - return source[current_pos + 1]; -} - -char Scanner::peek() { - if (is_at_end()) { - return '\0'; - } - return source[current_pos]; -} - -bool Scanner::is_alpha(char c) { - return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'); -} - -bool Scanner::is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); } - -bool Scanner::is_at_end(void) { return current_pos >= source.length(); } - -void Scanner::report(string message) { report("", message); } - -void Scanner::report(string where, std::string message) { - cout << "*[Line " << current_line << "] Error " << where << " : " << message - << "\n"; - had_error = true; -} diff --git a/src/scanner.h b/src/scanner.h new file mode 100644 index 0000000..8bba336 --- /dev/null +++ b/src/scanner.h @@ -0,0 +1,45 @@ +#ifndef SCANNER_H +#define SCANNER_H + +#include "tokens.h" +#include +#include + +typedef struct { + bool had_error; + TokenList token_list; +} ScanResult; + +ScanResult scan_tokens(char *source); + +typedef struct { + const char *key; + const TokenType value; +} Item; + +static const Item keywords[] = { + {"and", AND}, {"class", CLASS}, {"else", ELSE}, {"false", FALSE}, + {"for", FOR}, {"fun", FUN}, {"if", IF}, {"nil", NIL}, + {"or", OR}, {"print", PRINT}, {"return", RETURN}, {"super", SUPER}, + {"this", THIS}, {"true", TRUE}, {"var", VAR}, {"while", WHILE}}; + +inline static const TokenType *get_keyword_token(char *key) { + int low = 0; + int high = sizeof(keywords) / sizeof(Item); + + while (low < high) { + int mid = (low + high) / 2; + + int c = strcmp(keywords[mid].key, key); + if (c == 0) { + return &keywords[mid].value; + } + if (c < 0) { + low = mid + 1; + } else { + high = mid; + } + } + return NULL; +} +#endif diff --git a/src/scanner.hpp b/src/scanner.hpp deleted file mode 100644 index c9fb590..0000000 --- a/src/scanner.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "error.hpp" -#include "tokens.hpp" -#include -#include -#include - -typedef struct { - bool had_error; - std::vector token_list; -} ScanResult; - -class Scanner { -private: - bool had_error; - size_t current_pos; - int start; - int current_line; - std::string source; - std::vector token_list; - -public: - Scanner(std::string s); - Result> scan_tokens(); - Result scan_token(); - void add_token(Token::Type type); - void add_token(Token::Type type, std::string literal); - char advance(); - void identifier(); - void number(); - bool is_digit(char c); - void scan_string(); - bool match(char expected); - char peek_next(); - char peek(); - bool is_alpha(char c); - bool is_alphanumeric(char c); - bool is_at_end(void); - void report(std::string message); - void report(std::string where, std::string message); -}; diff --git a/src/tokens.c b/src/tokens.c new file mode 100644 index 0000000..8d635f5 --- /dev/null +++ b/src/tokens.c @@ -0,0 +1,52 @@ +#include "tokens.h" +#include +#include + +Token *newToken() { + Token *token = malloc(sizeof(Token)); + if (token == NULL) { + printf("can't allocate memory"); + exit(1); + } + return token; +} + +void tokenlist_init(TokenList *list) { + list->tokens = malloc(sizeof(Token) * 32); + list->size = 0; + list->capacity = 32; +} + +void tokenlist_add(TokenList *list, Token *value) { + if (list->size >= list->capacity) { + list->capacity *= 2; + list->tokens = realloc(list->tokens, sizeof(Token) * list->capacity); + } + list->tokens[list->size] = value; + list->size += 1; +} + +Token *tokenlist_get(TokenList *list, int index) { + if (index >= list->size || index < 0) { + printf("Index %d out of bounds for list of size %d\n", index, list->size); + exit(1); + } + return list->tokens[index]; +} + +Token *tokenlist_last(TokenList *list) { return list->tokens[list->size - 1]; } + +void tokenlist_print(TokenList *tokenlist) { + for (int i = 0; i < tokenlist->size; i++) { + Token *token = tokenlist_get(tokenlist, i); + if (token->literal != NULL) { + printf("%s(x:%s,l:%s), ", token_name(token->type), token->lexeme, + (char *)token->literal); + } else { + printf("%s(l:%s)", token_name(token->type), token->lexeme); + } + } + printf("\n"); +} + +void tokenlist_free(TokenList *list) { free(list->tokens); } diff --git a/src/tokens.cpp b/src/tokens.cpp deleted file mode 100644 index 5d338d8..0000000 --- a/src/tokens.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "tokens.hpp" - -using std::string; - -Token::Token(Token::Type _tokentype, string _lexeme, string _literal, int _line) - : lexeme(_lexeme), literal(_literal), line(_line), tokentype(_tokentype) {} - -string token_name(Token::Type tokentype) { - static const std::string tokens[] = { - "EOF", "(", ")", "{", "}", ",", "*", - "-", "+", ";", "/", "*", "!", "!=", - "=", "==", ">", ">=", "<", "<=", "IDENTIFIER", - "string", "number", "and", "class", "else", "false", "fun", - "for", "if", "Nil", "or", "print", "return", "super", - "this", "true", "var", "while"}; - return tokens[(int)tokentype]; -} - -std::string Token::as_string() const { return token_name(tokentype); } diff --git a/src/tokens.h b/src/tokens.h new file mode 100644 index 0000000..e0b7039 --- /dev/null +++ b/src/tokens.h @@ -0,0 +1,87 @@ +#ifndef TOKENS_H +#define TOKENS_H + +typedef enum { + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + IDENTIFIER, + STRING, + NUMBER, + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + END_OF_FILE, + ERROR +} TokenType; + +static inline const char *token_name(TokenType type) { + static const char *tokens[] = { + "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "COMMA", + "DOT", "MINUS", "PLUS", "SEMICOLON", "SLASH", + "STAR", "BANG", "BANG_EQUAL", "EQUAL", "EQUAL_EQUAL", + "GREATER", "GREATER_EQUAL", "LESS", "LESS_EQUAL", "IDENTIFIER", + "STRING", "NUMBER", "AND", "CLASS", "ELSE", + "FALSE", "FUN", "FOR", "IF", "NIL", + "OR", "PRINT", "RETURN", "SUPER", "THIS", + "TRUE", "VAR", "WHILE", "END_OF_FILE", "ERROR"}; + + return tokens[type]; +} + +typedef struct { + TokenType type; + char *lexeme; + void *literal; + int line; +} Token; + +typedef struct TokenList { + Token **tokens; + int size; + int capacity; +} TokenList; + +Token *newToken(); + +void tokenlist_init(TokenList *list); + +void tokenlist_add(TokenList *list, Token *value); + +Token *tokenlist_get(TokenList *list, int index); + +void tokenlist_print(TokenList *tokenlist); + +void tokenlist_free(TokenList *list); + +Token *tokenlist_last(TokenList *list); +#endif diff --git a/src/tokens.hpp b/src/tokens.hpp deleted file mode 100644 index 4d796a0..0000000 --- a/src/tokens.hpp +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include - -class Token { -public: - std::string lexeme; - std::string literal; - int line; - enum Type { - END_OF_FILE = 0, - LEFT_PAREN = 1, - RIGHT_PAREN = 2, - LEFT_BRACE = 3, - RIGHT_BRACE = 4, - COMMA = 5, - DOT = 6, - MINUS = 7, - PLUS = 8, - SEMICOLON = 9, - SLASH = 10, - STAR = 11, - BANG = 12, - BANG_EQUAL = 13, - EQUAL = 14, - EQUAL_EQUAL = 15, - GREATER = 16, - GREATER_EQUAL = 17, - LESS = 18, - LESS_EQUAL = 19, - IDENTIFIER = 20, - STRING = 21, - NUMBER = 22, - AND = 23, - CLASS = 24, - ELSE = 25, - FALSE = 26, - FUN = 27, - FOR = 28, - IF = 29, - NIL = 30, - OR = 31, - PRINT = 32, - RETURN = 33, - SUPER = 34, - THIS = 35, - TRUE = 36, - VAR = 37, - WHILE = 38, - } tokentype; - - std::string as_string() const; - - Token(Token::Type _tokentype, std::string _lexeme, std::string _literal, - int line); -}; - -std::string token_name(Token::Type tokentype); diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..7a00364 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,19 @@ +#include +#include + +char* substring(char* string, int position, int length){ + char* ptr = malloc(length+1); + if (ptr == NULL) { + printf("out of memory"); + exit(EXIT_FAILURE); + } + + int c; + for (c=0; c < length; c+=1){ + *(ptr+c) = *(string+position-1); + string += sizeof(char); + } + *(ptr+c) = '\0'; + + return ptr; +} diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..7356eb3 --- /dev/null +++ b/src/utils.h @@ -0,0 +1,6 @@ +#ifndef UTILS_H +#define UTILS_H + +char* substring(char* string, int position, int length); + +#endif