From f76a252bd432745ff7662c33425b7cefbafee471 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Sat, 12 Oct 2024 17:44:30 +0200 Subject: [PATCH] strings and numbers --- src/scanner.c | 85 +++++++++++++++++- src/tokens.c | 237 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 243 insertions(+), 79 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 7a0b7b0..97626a1 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -11,6 +11,11 @@ void error(char *message); void report(char *where, char *message); bool is_at_end(void); bool match(char expected); +char peek(void); +char peek_next(void); +void string(void); +bool is_digit(char c); +void number(void); bool had_error = false; int current_pos = -1; @@ -52,6 +57,16 @@ void add_token(enum TokenType type) { tokenlist_add(&token_list, token); } +void add_token_with_literal(enum TokenType type, char *literal) { + Token token; + token.type = type; + token.lexeme = substring(source, start, current_pos); + token.literal = literal; + token.line = current_line; + + tokenlist_add(&token_list, token); +} + char advance(void) { char c = source[current_pos++]; return c; @@ -97,12 +112,66 @@ void scan_token(void) { case '<': add_token(match('=') ? LESS_EQUAL : LESS); break; + case '/': + if (match('/')) { + while (peek() != '\n' && !is_at_end()) { + advance(); + } + } else { + add_token(SLASH); + } + break; + case ' ': + case '\t': + case '\r': + break; + case '\n': + current_line += 1; + break; + case '"': + string(); + break; default: - error("Unexpected character."); + if (is_digit(c)) { + number(); + } else { + error("Unexpected character."); + } break; } } +void number(void) { + while (is_digit(peek())) + advance(); + if (peek() == '.' && is_digit((peek_next()))) + advance(); + while (is_digit(peek())) + advance(); + add_token_with_literal(NUMBER, + substring(source, start + 1, current_pos - start)); +} + +bool is_digit(char c) { return c >= '0' && c <= '9'; } + +void string(void) { + while (peek() != '"' && !is_at_end()) { + if (peek() == '\n') + current_line += 1; + advance(); + } + + if (is_at_end()) { + error("Unterminated string."); + return; + } + + advance(); + + char *string = substring(source, start + 2, current_pos - start - 2); + add_token_with_literal(STRING, string); +} + bool match(char expected) { if (is_at_end()) { return false; @@ -114,6 +183,20 @@ bool match(char expected) { return true; } +char peek_next(void) { + if (current_pos + 1 >= (int)strlen(source)) { + return '\0'; + } + return source[current_pos + 1]; +} + +char peek(void) { + if (is_at_end()) { + return '\0'; + } + return source[current_pos]; +} + bool is_at_end(void) { return current_pos >= (int)strlen(source); } void error(char *message) { report("", message); } diff --git a/src/tokens.c b/src/tokens.c index 755e681..7de6da9 100644 --- a/src/tokens.c +++ b/src/tokens.c @@ -1,83 +1,164 @@ -#include -#include #include "tokens.h" +#include +#include -char* token_name(Token* token){ - char* type; - switch (token->type){ - case LEFT_PAREN: type = "LEFT_PAREN"; break; - case RIGHT_PAREN: type = "RIGHT_PAREN"; break; - case LEFT_BRACE: type = "LEFT_BRACE"; break; - case RIGHT_BRACE: type = "RIGHT_BRACE"; break; - case COMMA: type = "COMMA"; break; - case DOT: type = "DOT"; break; - case MINUS: type = "MINUS"; break; - case PLUS: type = "PLUS"; break; - case SEMICOLON: type = "SEMICOLON"; break; - case SLASH: type = "SLASH"; break; - case STAR: type = "STAR"; break; - case BANG: type = "BANG"; break; - case BANG_EQUAL: type = "BANG_EQUAL"; break; - case EQUAL: type = "EQUAL"; break; - case EQUAL_EQUAL: type = "EQUAL_EQUAL"; break; - case GREATER: type = "GREATER"; break; - case GREATER_EQUAL: type = "GREATER_EQUAL"; break; - case LESS: type = "LESS"; break; - case LESS_EQUAL: type = "LESS_EQUAL"; break; - case IDENTIFIER: type = "IDENTIFIER"; break; - case STRING: type = "STRING"; break; - case NUMBER: type = "NUMBER"; break; - case AND: type = "AND"; break; - case CLASS: type = "CLASS"; break; - case ELSE: type = "ELSE"; break; - case FALSE: type = "FALSE"; break; - case FUN: type = "FUN"; break; - case FOR: type = "FOR"; break; - case IF: type = "IF"; break; - case NIL: type = "NIL"; break; - case OR: type = "OR"; break; - case PRINT: type = "PRINT"; break; - case RETURN: type = "RETURN"; break; - case SUPER: type = "SUPER"; break; - case THIS: type = "THIS"; break; - case TRUE: type = "TRUE"; break; - case VAR: type = "VAR"; break; - case WHILE: type = "WHILE"; break; - case END_OF_FILE: type = "EOF"; break; +char *token_name(Token *token) { + char *type; + switch (token->type) { + case LEFT_PAREN: + type = "LEFT_PAREN"; + break; + case RIGHT_PAREN: + type = "RIGHT_PAREN"; + break; + case LEFT_BRACE: + type = "LEFT_BRACE"; + break; + case RIGHT_BRACE: + type = "RIGHT_BRACE"; + break; + case COMMA: + type = "COMMA"; + break; + case DOT: + type = "DOT"; + break; + case MINUS: + type = "MINUS"; + break; + case PLUS: + type = "PLUS"; + break; + case SEMICOLON: + type = "SEMICOLON"; + break; + case SLASH: + type = "SLASH"; + break; + case STAR: + type = "STAR"; + break; + case BANG: + type = "BANG"; + break; + case BANG_EQUAL: + type = "BANG_EQUAL"; + break; + case EQUAL: + type = "EQUAL"; + break; + case EQUAL_EQUAL: + type = "EQUAL_EQUAL"; + break; + case GREATER: + type = "GREATER"; + break; + case GREATER_EQUAL: + type = "GREATER_EQUAL"; + break; + case LESS: + type = "LESS"; + break; + case LESS_EQUAL: + type = "LESS_EQUAL"; + break; + case IDENTIFIER: + type = "IDENTIFIER"; + break; + case STRING: + type = "STRING"; + break; + case NUMBER: + type = "NUMBER"; + break; + case AND: + type = "AND"; + break; + case CLASS: + type = "CLASS"; + break; + case ELSE: + type = "ELSE"; + break; + case FALSE: + type = "FALSE"; + break; + case FUN: + type = "FUN"; + break; + case FOR: + type = "FOR"; + break; + case IF: + type = "IF"; + break; + case NIL: + type = "NIL"; + break; + case OR: + type = "OR"; + break; + case PRINT: + type = "PRINT"; + break; + case RETURN: + type = "RETURN"; + break; + case SUPER: + type = "SUPER"; + break; + case THIS: + type = "THIS"; + break; + case TRUE: + type = "TRUE"; + break; + case VAR: + type = "VAR"; + break; + case WHILE: + type = "WHILE"; + break; + case END_OF_FILE: + type = "EOF"; + break; + } + return type; +} + +void tokenlist_init(TokenList *list) { + list->tokens = malloc(sizeof(Token) * 32); + list->size = 0; + list->capacity = 32; +} + +void tokenlist_add(TokenList *list, Token value) { + if (list->size >= list->capacity) { + list->capacity *= 2; + list->tokens = realloc(list->tokens, sizeof(Token) * list->capacity); + } + list->tokens[list->size] = value; + list->size += 1; +} + +Token *tokenlist_get(TokenList *list, int index) { + if (index >= list->size || index < 0) { + printf("Index %d out of bounds for list of size %d\n", index, list->size); + exit(1); + } + return &list->tokens[index]; +} + +void tokenlist_print(TokenList *tokenlist) { + for (int i = 0; i < tokenlist->size; i++) { + Token *token = tokenlist_get(tokenlist, i); + if (token->literal != NULL) { + printf("%s(%s), ", token_name(token), (char *)token->literal); + } else { + printf("%s, ", token_name(token)); } - return type; + } + printf("\n"); } -void tokenlist_init(TokenList* list){ - list->tokens = malloc(sizeof(Token) * 32); - list->size = 0; - list->capacity = 32; -} - -void tokenlist_add(TokenList* list, Token value){ - if (list->size >= list->capacity){ - list->capacity *= 2; - list->tokens = realloc(list->tokens, sizeof(Token)* list->capacity); - } - list->tokens[list->size] = value; - list->size +=1; -} - -Token* tokenlist_get(TokenList* list, int index){ - if (index >= list->size || index < 0){ - printf("Index %d out of bounds for list of size %d\n", index, list->size); - exit(1); - } - return &list->tokens[index]; -} - -void tokenlist_print(TokenList* tokenlist){ - for (int i=0; i< tokenlist->size; i++){ - printf("%s, ", token_name(tokenlist_get(tokenlist, i))); - printf("\n"); - } -} - -void tokenlist_free(TokenList* list){ - free(list->tokens); -} +void tokenlist_free(TokenList *list) { free(list->tokens); }