From 1c092acd4031f4ef9b88515252b11fff68bcae55 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Wed, 11 Dec 2024 21:36:36 +0100 Subject: [PATCH] interpreter working nicely --- makefile | 9 +- src/interpreter.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++ src/interpreter.h | 28 ++++++ src/lox.c | 9 +- src/parser.c | 138 ++++++++++++++++++-------- src/parser.h | 32 +++++- src/scanner.c | 3 + src/tokens.c | 16 +-- src/tokens.h | 9 +- src/utils.c | 30 +++--- src/utils.h | 2 +- 11 files changed, 448 insertions(+), 76 deletions(-) create mode 100644 src/interpreter.c create mode 100644 src/interpreter.h diff --git a/makefile b/makefile index 6ccd569..ae66869 100644 --- a/makefile +++ b/makefile @@ -1,13 +1,13 @@ TARGET := ./target SRC := ./src CC := clang -c -std=c17 -#-Wall -Wextra -pedantic -Werror +# -Wall -Wextra -pedantic -Werror SRCS := $(shell find $(SRC) -name '*.c') OBJS := $(SRCS:%=$(TARGET)/%.o) -$(TARGET)/lox: $(TARGET)/lox.c.o $(TARGET)/parser.c.o $(TARGET)/scanner.c.o $(TARGET)/tokens.c.o $(TARGET)/utils.c.o - clang $(TARGET)/lox.c.o -L$(TARGET) -lparser.c.o -ltokens.c.o -lscanner.c.o -lutils.c.o -o $(TARGET)/lox +$(TARGET)/lox: $(TARGET)/lox.c.o $(TARGET)/interpreter.c.o $(TARGET)/parser.c.o $(TARGET)/scanner.c.o $(TARGET)/tokens.c.o $(TARGET)/utils.c.o + clang $(TARGET)/lox.c.o -L$(TARGET) -linterpreter.c.o -lparser.c.o -ltokens.c.o -lscanner.c.o -lutils.c.o -o $(TARGET)/lox $(TARGET)/utils.c.o: $(SRC)/utils.c $(CC) $< -o $@ @@ -21,6 +21,9 @@ $(TARGET)/scanner.c.o: $(SRC)/scanner.c $(TARGET)/parser.c.o: $(SRC)/parser.c $(CC) $< -o $@ +$(TARGET)/interpreter.c.o: $(SRC)/interpreter.c + $(CC) $< -o $@ + $(TARGET)/lox.c.o: $(SRC)/lox.c $(TARGET) $(CC) $< -o $@ diff --git a/src/interpreter.c b/src/interpreter.c new file mode 100644 index 0000000..ff02a90 --- /dev/null +++ b/src/interpreter.c @@ -0,0 +1,248 @@ +#include "interpreter.h" +#include + +Value *accept(Expression *expr); +void checkNumeric(Value *left, Value *right); +Value *isEqual(Value *left, Value *right); +bool streq(char *left, char *right); + +Value *visitBinary(Expression *expr); +Value *visitUnary(Expression *unary); +Value *visitLiteral(Expression *literal); +Value *visitGroup(Expression *literal); +Value *visitVariable(Expression *var); +Value *visitVariableStmt(Expression *varStmt); +Value *visitAssignStmt(Expression *varStmt); + +Value *visitPrintStmt(Expression *printStatement); +Value *visitBlock(Expression *block); + +static VarMap *current; + +Value *accept(Expression *expr) { + // printf("accept %s\n", expr->type); + char *type = expr->type; + if (streq(type, "BinaryExpr")) { + return visitBinary(expr); + } + if (streq(type, "UnaryExpr")) { + return visitUnary(expr); + } + if (streq(type, "Literal")) { + return visitLiteral(expr); + } + if (streq(type, "Group")) { + return visitGroup(expr); + } + if (streq(type, "PrintStmt")) { + return visitPrintStmt(expr); + } + if (streq(type, "VariableStmt")) { + return visitVariableStmt(expr); + } + if (streq(type, "AssignStmt")) { + return visitAssignStmt(expr); + } + if (streq(type, "Variable")) { + return visitVariable(expr); + } + if (streq(type, "ExprStmt")) { + return accept(expr->left); + } + if (streq(type, "Block")) { + return visitBlock(expr); + } + + return NULL; +} + +void execute(Expression *statement) { accept(statement); } + +void interpret(VarMap *environment, ExpressionList *statements) { + current = environment; + + for (int i = 0; i < statements->size; i++) { + execute(exprlist_get(statements, i)); + } +} + +Value *visitVariable(Expression *var) { return var_get(current, var->name); } + +Value *visitVariableStmt(Expression *var) { + Value *value = accept(var->left); + if (var_isdefined(current, var->name)) { + printf("%s is already defined\n", var->name); + return NULL; + } + var_add(current, var->name, value); + return NULL; +} + +Value *visitBlock(Expression *blockStmt) { + VarMap *previous = current; + current = newVarMap(previous); + for (int i = 0; i < blockStmt->block->size; i++) { + Expression *e = exprlist_get(blockStmt->block, i); + execute(e); + } + current = previous; + return NULL; +} + +Value *visitGroup(Expression *group) { return accept(group->left); } + +Value *visitPrintStmt(Expression *printStatement) { + Value *value = accept(printStatement->left); + if (value == NULL) { + return NULL; + } + printf("%s\n", value_string(value)); + return NULL; +} + +Value *visitAssignStmt(Expression *var) { + Value *value = accept(var->left); + bool result = var_set(current, var->name, value); + if (!result) { + printf("%s is not defined", var->name); + } + return NULL; +} + +Value *visitUnary(Expression *unary) { + Value *right = accept(unary->right); + + switch (unary->operator->type) { + case MINUS: + return newNumber(-right->value.number); + case BANG: + return newBoolean(!right->value.boolean); + default: + return NULL; + }; +} + +Value *visitLiteral(Expression *literal) { return literal->value; } + +Value *visitBinary(Expression *expr) { + Value *left = accept(expr->left); + Value *right = accept(expr->right); + + switch (expr->operator->type) { + case MINUS: + checkNumeric(left, right); + return newNumber(left->value.number - right->value.number); + case PLUS: + checkNumeric(left, right); + return newNumber(left->value.number + right->value.number); + case SLASH: + checkNumeric(left, right); + return newNumber(left->value.number / right->value.number); + case STAR: + checkNumeric(left, right); + return newNumber(left->value.number * right->value.number); + case GREATER: + checkNumeric(left, right); + return newBoolean(left->value.number > right->value.number); + case GREATER_EQUAL: + checkNumeric(left, right); + return newBoolean(left->value.number >= right->value.number); + case LESS: + checkNumeric(left, right); + return newBoolean(left->value.number < right->value.number); + case LESS_EQUAL: + checkNumeric(left, right); + return newBoolean(left->value.number <= right->value.number); + case BANG_EQUAL: + return isEqual(left, right); + case EQUAL_EQUAL: + return isEqual(left, right); + default: + return NULL; + } +} + +void checkNumeric(Value *left, Value *right) { + if (left->type != NUMBERTYPE || right->type != NUMBERTYPE) { + printf("operands should be numeric"); + exit(-1); + } +} + +VarMap *newVarMap(VarMap *enclosing) { + VarMap *map = malloc(sizeof(VarMap)); + if (map == NULL) { + printf("Can not allocate memory for VarMap"); + exit(1); + } + map->size = 0; + map->enclosing = enclosing; + return map; +} + +bool var_isdefined(VarMap *map, const char *key) { + for (int i = 0; i < map->size; i++) { + if (strcmp(map->entries[i].key, key) == 0) { + return true; + } + } + if (map->enclosing != NULL) { + return var_isdefined(map->enclosing, key); + } + + return false; +} + +void var_add(VarMap *map, const char *key, Value *value) { + if (map->size == MAX_MAP_SIZE) { + printf("Map is full!\n"); + return; + } + strcpy(map->entries[map->size].key, key); + map->entries[map->size].value = value; + + map->size += 1; +} + +bool var_set(VarMap *map, char *key, Value *value) { + for (int i = 0; i < map->size; i++) { + if (strcmp(map->entries[i].key, key) == 0) { + map->entries[i].value = value; // Return the value + return true; + } + } + return false; +} + +Value *var_get(VarMap *map, const char *key) { + for (int i = 0; i < map->size; i++) { + + if (strcmp(map->entries[i].key, key) == 0) { + return map->entries[i].value; // Return the value + } + } + if (map->enclosing != NULL) { + return var_get(map->enclosing, key); + } + + printf("%s is not defined\n", key); + return NULL; // Key not found +} + +Value *isEqual(Value *left, Value *right) { + if (left->type != right->type) { + return newBoolean(false); + } + switch (left->type) { + case STRINGTYPE: + return newBoolean(strcmp(left->value.string, right->value.string) == 0); + case NUMBERTYPE: + return newBoolean(left->value.number == right->value.number); + case BOOLEANTYPE: + return newBoolean(left->value.boolean == right->value.boolean); + case EXPR: // MUST NOT HAPPEN :(=) + return NULL; + } +} + +bool streq(char *left, char *right) { return strcmp(left, right) == 0; } diff --git a/src/interpreter.h b/src/interpreter.h new file mode 100644 index 0000000..67e27b7 --- /dev/null +++ b/src/interpreter.h @@ -0,0 +1,28 @@ +#ifndef INTERPRETER_H +#define INTERPRETER_H + +#include "parser.h" + +#define MAX_MAP_SIZE 1000 + +typedef struct VarMap VarMap; + +VarMap *newVarMap(VarMap *enclosing); +void interpret(VarMap *environment, ExpressionList *statements); + +typedef struct { + char key[50]; // Array of strings for keys + Value *value; // Integer values associated with keys +} MapEntry; + +struct VarMap { + VarMap *enclosing; + MapEntry entries[MAX_MAP_SIZE]; // Array of key-value pairs + int size; // Current size of the map +}; + +bool var_isdefined(VarMap *map, const char *key); +void var_add(VarMap *map, const char *key, Value *value); +Value *var_get(VarMap *map, const char *key); +bool var_set(VarMap *map, char *key, Value *value); +#endif diff --git a/src/lox.c b/src/lox.c index 88c5b6d..e33225e 100644 --- a/src/lox.c +++ b/src/lox.c @@ -1,3 +1,4 @@ +#include "interpreter.h" #include "parser.h" #include "scanner.h" #include "utils.h" @@ -9,8 +10,11 @@ int run_file(char *file); void run_prompt(void); void run(char *source); +static VarMap *environment; int main(int argc, char *argv[]) { + environment = newVarMap(NULL); + setvbuf(stdout, NULL, _IONBF, 0); if (argc > 2) { puts("Usage: lox [script]"); @@ -80,6 +84,7 @@ void run_prompt(void) { void run(char *source) { ScanResult scan_result = scan_tokens(source); // tokenlist_print(&scan_result.token_list); - ExpressionList list = parse(&scan_result.token_list); - exprlist_print(&list); + ExpressionList *list = parse(&scan_result.token_list); + // exprlist_print(list); + interpret(environment, list); } diff --git a/src/parser.c b/src/parser.c index 1ed97e8..914e3d7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,5 +1,4 @@ #include "parser.h" -#include "tokens.h" #include #include #include @@ -7,8 +6,6 @@ bool is_at_end(void); Token *peek(void); -size_t expr_toString(const Expression *expr, char *output, size_t outputSize, - size_t offset); void expr_print(const Expression *expr); Expression *declaration(void); bool match1(TokenType t); @@ -22,7 +19,7 @@ Expression *var_declaration(void); Expression *expression(void); Expression *statement(void); Expression *printStatement(void); -ExpressionList block(void); +ExpressionList *parse_block(void); Expression *expressionStatement(void); Expression *assignment(void); Expression *equality(void); @@ -37,15 +34,14 @@ Token *consume(TokenType type, char *message); static TokenList *tokens; static int current; -ExpressionList parse(TokenList *tokens_to_parse) { - ExpressionList statements; - exprlist_init(&statements); +ExpressionList *parse(TokenList *tokens_to_parse) { + ExpressionList *statements = newExpressionList(); tokens = tokens_to_parse; current = 0; while (!is_at_end()) { - exprlist_add(&statements, declaration()); + exprlist_add(statements, declaration()); } return statements; } @@ -60,12 +56,12 @@ Expression *declaration(void) { Expression *var_declaration(void) { Token *name = consume(IDENTIFIER, "Expected a variable name"); - Expression *initializer; + Expression *initializer = NULL; if (match1(EQUAL)) { initializer = expression(); } consume(SEMICOLON, "Expected semicolon"); - Expression *variableStatement = newExpression("VariableStatement"); + Expression *variableStatement = newExpression("VariableStmt"); variableStatement->name = name->lexeme; variableStatement->left = initializer; return variableStatement; @@ -76,24 +72,22 @@ Expression *statement(void) { return printStatement(); } if (match1(LEFT_BRACE)) { - ExpressionList block_contents = block(); Expression *block = newExpression("Block"); - block->value = &block_contents; + + ExpressionList *block_statements = newExpressionList(); + + while (!check(RIGHT_BRACE) && !is_at_end()) { + exprlist_add(block_statements, declaration()); + } + advance(); + + block->block = block_statements; return block; } return expressionStatement(); } -ExpressionList block(void) { - ExpressionList statements; - exprlist_init(&statements); - while (!check(RIGHT_BRACE) && !is_at_end()) { - exprlist_add(&statements, declaration()); - } - return statements; -} - Expression *printStatement(void) { Expression *value = expression(); consume(SEMICOLON, "Expected semicolon"); @@ -106,7 +100,7 @@ Expression *expressionStatement(void) { Expression *value = expression(); consume(SEMICOLON, "Expected semicolon"); Expression *statement = newExpression("ExprStmt"); - statement->value = &value; + statement->left = value; return statement; } @@ -120,7 +114,7 @@ Expression *assignment(void) { if (strcmp(expr->type, "Variable") == 0) { Expression *assign = newExpression("AssignStmt"); assign->name = expr->name; - assign->value = &value; + assign->left = value; return assign; } Expression *error = newExpression("Error"); @@ -201,13 +195,11 @@ Expression *unary(void) { Expression *primary(void) { Expression *r = newExpression("Literal"); if (match1(FALSE)) { - r->name = "boolean"; - r->value = "false"; + r->value = newBoolean(false); return r; } if (match1(TRUE)) { - r->name = "boolean"; - r->value = "true"; + r->value = newBoolean(true); return r; } @@ -219,25 +211,24 @@ Expression *primary(void) { if (check(NUMBER)) { advance(); - r->name = "number"; - r->value = previous()->literal; + r->value = newNumber(strtod(previous()->literal, NULL)); return r; } if (check(STRING)) { advance(); - r->name = "string"; - r->value = previous()->literal; + r->value = newString(previous()->literal); return r; } if (match1(IDENTIFIER)) { Expression *var = newExpression("Variable"); - r->name = previous()->lexeme; + var->name = previous()->lexeme; return var; } if (match1(LEFT_PAREN)) { + Expression *expr = expression(); Expression *group = newExpression("Group"); consume(RIGHT_PAREN, "Expect ')' after expression."); - r->left = group; + group->left = expr; return group; } @@ -246,10 +237,11 @@ Expression *primary(void) { } Token *consume(TokenType type, char *message) { + // printf("%s==%s\n", token_name(type), token_name(peek()->type)); if (check(type)) { return advance(); } - printf("error\n"); + Token *t = newToken(); t->type = ERROR; t->lexeme = message; @@ -310,10 +302,21 @@ bool is_at_end(void) { return peek()->type == END_OF_FILE; } Token *peek(void) { return tokenlist_get(tokens, current); } -void exprlist_init(ExpressionList *list) { +ExpressionList *newExpressionList() { + ExpressionList *list = malloc(sizeof(ExpressionList)); + if (list == NULL) { + printf("Cannot allocate memory for ExpressionList"); + exit(1); + } + list->expressions = malloc(sizeof(Expression) * 32); + if (list->expressions == NULL) { + printf("Cannot allocate memory for ExpressionList"); + exit(1); + } list->size = 0; list->capacity = 32; + return list; } void exprlist_add(ExpressionList *list, Expression *value) { @@ -322,8 +325,7 @@ void exprlist_add(ExpressionList *list, Expression *value) { list->expressions = realloc(list->expressions, sizeof(Expression) * list->capacity); } - list->expressions[list->size] = value; - list->size += 1; + list->expressions[list->size++] = value; } Expression *exprlist_get(ExpressionList *list, int index) { @@ -359,15 +361,13 @@ void expr_print(const Expression *expr) { } if (expr->name != NULL) { printf(", name: %s", expr->name); - if (strcmp(expr->name, "string") == 0 || - strcmp(expr->name, "number") == 0 || - strcmp(expr->name, "boolean") == 0) { - printf(", value: %s", (char *)expr->value); - } if (strcmp(expr->name, "nil") == 0 && expr->value == NULL) { printf(", value: NULL"); } } + if (expr->value != NULL) { + printf(", value: %s", value_string(expr->value)); + } printf("]"); } @@ -379,5 +379,59 @@ Expression *newExpression(char *type) { e->name = NULL; e->operator= NULL; e->value = NULL; + e->block = NULL; return e; } + +Value *newString(char *string) { + Value *value = newValue(); + value->type = STRINGTYPE; + value->value.string = string; + return value; +} + +Value *newBoolean(bool boolean) { + Value *value = newValue(); + value->type = BOOLEANTYPE; + value->value.boolean = boolean; + return value; +} + +Value *newNumber(double number) { + Value *value = newValue(); + value->type = NUMBERTYPE; + value->value.number = number; + return value; +} + +Value *newValue(void) { + Value *value = malloc(sizeof(Value)); + if (value == NULL) { + printf("can't allocate memory for Value"); + exit(1); + } + return value; +} + +char *d_to_s(double d) { + char *str = (char *)malloc(50); + if (str == NULL) { + puts("cannot allocate memory for string"); + exit(1); + } + snprintf(str, sizeof(str), "%lf", d); // + return str; +} + +const char *value_string(Value *v) { + switch (v->type) { + case STRINGTYPE: + return v->value.string; + case BOOLEANTYPE: + return v->value.boolean ? "true" : "false"; + case NUMBERTYPE: + return d_to_s(v->value.number); + case EXPR: + return v->value.expr->type; + } +} diff --git a/src/parser.h b/src/parser.h index a6c98fe..12f83b5 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,6 +1,24 @@ +#ifndef PARSER_H +#define PARSER_H + #include "tokens.h" typedef struct Expression Expression; +typedef struct ExpressionList ExpressionList; + +typedef union ValueHolder { + double number; + char *string; + bool boolean; + Expression *expr; +} ValueHolder; + +typedef enum { NUMBERTYPE, STRINGTYPE, BOOLEANTYPE, EXPR } Type; + +typedef struct Value { + Type type; + ValueHolder value; +} Value; struct Expression { char *type; @@ -8,7 +26,8 @@ struct Expression { Expression *right; Token *operator; char *name; - void *value; + Value *value; + ExpressionList *block; }; typedef struct ExpressionList { @@ -17,9 +36,15 @@ typedef struct ExpressionList { int capacity; } ExpressionList; -ExpressionList parse(TokenList *tokens); +const char *value_string(Value *v); +Value *newValue(void); +Value *newString(char *string); +Value *newNumber(double number); +Value *newBoolean(bool boolean); -void exprlist_init(ExpressionList *list); +ExpressionList *parse(TokenList *tokens); +ExpressionList *newExpressionList(); +// void exprlist_init(ExpressionList *list); void exprlist_add(ExpressionList *list, Expression *value); @@ -28,3 +53,4 @@ Expression *exprlist_get(ExpressionList *list, int index); void exprlist_print(ExpressionList *list); void exprlist_free(ExpressionList *list); +#endif diff --git a/src/scanner.c b/src/scanner.c index 70e8aef..c604e9d 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -109,6 +109,9 @@ static void scan_token(void) { case '-': add_token(MINUS); break; + case '*': + add_token(STAR); + break; case '!': add_token(match('=') ? BANG_EQUAL : BANG); break; diff --git a/src/tokens.c b/src/tokens.c index 8d635f5..6a26ca2 100644 --- a/src/tokens.c +++ b/src/tokens.c @@ -1,11 +1,12 @@ #include "tokens.h" +#include #include #include -Token *newToken() { +Token *newToken(void) { Token *token = malloc(sizeof(Token)); if (token == NULL) { - printf("can't allocate memory"); + printf("can't allocate memory for Token"); exit(1); } return token; @@ -13,6 +14,10 @@ Token *newToken() { void tokenlist_init(TokenList *list) { list->tokens = malloc(sizeof(Token) * 32); + if (list->tokens == NULL) { + printf("Cannot allocate memory for TokenList"); + exit(1); + } list->size = 0; list->capacity = 32; } @@ -39,12 +44,7 @@ Token *tokenlist_last(TokenList *list) { return list->tokens[list->size - 1]; } void tokenlist_print(TokenList *tokenlist) { for (int i = 0; i < tokenlist->size; i++) { Token *token = tokenlist_get(tokenlist, i); - if (token->literal != NULL) { - printf("%s(x:%s,l:%s), ", token_name(token->type), token->lexeme, - (char *)token->literal); - } else { - printf("%s(l:%s)", token_name(token->type), token->lexeme); - } + printf("%s(%s)", token_name(token->type), token->lexeme); } printf("\n"); } diff --git a/src/tokens.h b/src/tokens.h index e0b7039..a03e27d 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -1,5 +1,8 @@ #ifndef TOKENS_H #define TOKENS_H +#include +#include +#include typedef enum { LEFT_PAREN, @@ -58,10 +61,10 @@ static inline const char *token_name(TokenType type) { return tokens[type]; } -typedef struct { +typedef struct Token { TokenType type; char *lexeme; - void *literal; + char *literal; int line; } Token; @@ -71,7 +74,7 @@ typedef struct TokenList { int capacity; } TokenList; -Token *newToken(); +Token *newToken(void); void tokenlist_init(TokenList *list); diff --git a/src/utils.c b/src/utils.c index 7a00364..bfeafc0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,19 +1,21 @@ -#include +#include "utils.h" #include +#include +#include -char* substring(char* string, int position, int length){ - char* ptr = malloc(length+1); - if (ptr == NULL) { - printf("out of memory"); - exit(EXIT_FAILURE); - } +char *substring(char *string, int position, int length) { + char *ptr = malloc(length + 1); + if (ptr == NULL) { + printf("out of memory"); + exit(EXIT_FAILURE); + } - int c; - for (c=0; c < length; c+=1){ - *(ptr+c) = *(string+position-1); - string += sizeof(char); - } - *(ptr+c) = '\0'; + int c; + for (c = 0; c < length; c += 1) { + *(ptr + c) = *(string + position - 1); + string += sizeof(char); + } + *(ptr + c) = '\0'; - return ptr; + return ptr; } diff --git a/src/utils.h b/src/utils.h index 7356eb3..926f3b3 100644 --- a/src/utils.h +++ b/src/utils.h @@ -1,6 +1,6 @@ #ifndef UTILS_H #define UTILS_H -char* substring(char* string, int position, int length); +char *substring(char *string, int position, int length); #endif