From 8663a3c99399ea1661abcbb71df8fdcd76abea90 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Sun, 13 Oct 2024 22:31:46 +0200 Subject: [PATCH] keywords --- src/scanner.c | 45 ++++++++++++++++++ src/scanner.h | 20 ++++++-- src/tokens.c | 128 +------------------------------------------------- src/tokens.h | 87 +++++++++++++++++++++++++--------- 4 files changed, 128 insertions(+), 152 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 97626a1..8d13968 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -16,6 +16,9 @@ char peek_next(void); void string(void); bool is_digit(char c); void number(void); +bool is_alpha(char c); +bool is_alphanumeric(char c); +void identifier(void); bool had_error = false; int current_pos = -1; @@ -24,6 +27,26 @@ int current_line = -1; char *source; TokenList token_list; +static const enum TokenType *get_token(char *key) { + int low = 0; + int high = sizeof(keywords) / sizeof(Item); + + while (low < high) { + int mid = (low + high) / 2; + + int c = strcmp(keywords[mid].key, key); + if (c == 0) { + return &keywords[mid].value; + } + if (c < 0) { + low = mid + 1; + } else { + high = mid; + } + } + return NULL; +} + ScanResult scan_tokens(char *src) { current_pos = 0; start = 0; @@ -134,6 +157,8 @@ void scan_token(void) { default: if (is_digit(c)) { number(); + } else if (is_alpha(c)) { + identifier(); } else { error("Unexpected character."); } @@ -141,6 +166,21 @@ void scan_token(void) { } } +void identifier(void) { + while (is_alphanumeric(peek())) { + advance(); + } + + char *text = substring(source, start + 1, current_pos - start); + + const enum TokenType *tokentype = get_token(text); + if (tokentype == NULL) { + add_token(IDENTIFIER); + } else { + add_token(*tokentype); + } +} + void number(void) { while (is_digit(peek())) advance(); @@ -196,6 +236,11 @@ char peek(void) { } return source[current_pos]; } +bool is_alpha(char c) { + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'); +} + +bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); } bool is_at_end(void) { return current_pos >= (int)strlen(source); } diff --git a/src/scanner.h b/src/scanner.h index a506133..00f939d 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -1,14 +1,26 @@ #ifndef SCANNER_H #define SCANNER_H -#include #include "tokens.h" +#include +#include typedef struct { - bool had_error; - TokenList token_list; + bool had_error; + TokenList token_list; } ScanResult; -ScanResult scan_tokens(char* source); +ScanResult scan_tokens(char *source); + +typedef struct { + const char *key; + const enum TokenType value; +} Item; + +static const Item keywords[] = { + {"and", AND}, {"class", CLASS}, {"else", ELSE}, {"false", FALSE}, + {"for", FOR}, {"fun", FUN}, {"if", IF}, {"nil", NIL}, + {"or", OR}, {"print", PRINT}, {"return", RETURN}, {"super", SUPER}, + {"this", THIS}, {"true", TRUE}, {"var", VAR}, {"while", WHILE}}; #endif diff --git a/src/tokens.c b/src/tokens.c index 7de6da9..480e7da 100644 --- a/src/tokens.c +++ b/src/tokens.c @@ -2,130 +2,6 @@ #include #include -char *token_name(Token *token) { - char *type; - switch (token->type) { - case LEFT_PAREN: - type = "LEFT_PAREN"; - break; - case RIGHT_PAREN: - type = "RIGHT_PAREN"; - break; - case LEFT_BRACE: - type = "LEFT_BRACE"; - break; - case RIGHT_BRACE: - type = "RIGHT_BRACE"; - break; - case COMMA: - type = "COMMA"; - break; - case DOT: - type = "DOT"; - break; - case MINUS: - type = "MINUS"; - break; - case PLUS: - type = "PLUS"; - break; - case SEMICOLON: - type = "SEMICOLON"; - break; - case SLASH: - type = "SLASH"; - break; - case STAR: - type = "STAR"; - break; - case BANG: - type = "BANG"; - break; - case BANG_EQUAL: - type = "BANG_EQUAL"; - break; - case EQUAL: - type = "EQUAL"; - break; - case EQUAL_EQUAL: - type = "EQUAL_EQUAL"; - break; - case GREATER: - type = "GREATER"; - break; - case GREATER_EQUAL: - type = "GREATER_EQUAL"; - break; - case LESS: - type = "LESS"; - break; - case LESS_EQUAL: - type = "LESS_EQUAL"; - break; - case IDENTIFIER: - type = "IDENTIFIER"; - break; - case STRING: - type = "STRING"; - break; - case NUMBER: - type = "NUMBER"; - break; - case AND: - type = "AND"; - break; - case CLASS: - type = "CLASS"; - break; - case ELSE: - type = "ELSE"; - break; - case FALSE: - type = "FALSE"; - break; - case FUN: - type = "FUN"; - break; - case FOR: - type = "FOR"; - break; - case IF: - type = "IF"; - break; - case NIL: - type = "NIL"; - break; - case OR: - type = "OR"; - break; - case PRINT: - type = "PRINT"; - break; - case RETURN: - type = "RETURN"; - break; - case SUPER: - type = "SUPER"; - break; - case THIS: - type = "THIS"; - break; - case TRUE: - type = "TRUE"; - break; - case VAR: - type = "VAR"; - break; - case WHILE: - type = "WHILE"; - break; - case END_OF_FILE: - type = "EOF"; - break; - } - return type; -} - void tokenlist_init(TokenList *list) { list->tokens = malloc(sizeof(Token) * 32); list->size = 0; @@ -153,9 +29,9 @@ void tokenlist_print(TokenList *tokenlist) { for (int i = 0; i < tokenlist->size; i++) { Token *token = tokenlist_get(tokenlist, i); if (token->literal != NULL) { - printf("%s(%s), ", token_name(token), (char *)token->literal); + printf("%s(%s), ", token_name(token->type), (char *)token->literal); } else { - printf("%s, ", token_name(token)); + printf("%s, ", token_name(token->type)); } } printf("\n"); diff --git a/src/tokens.h b/src/tokens.h index e63dfd4..900986f 100644 --- a/src/tokens.h +++ b/src/tokens.h @@ -1,40 +1,83 @@ #ifndef TOKENS_H #define TOKENS_H -enum TokenType{ - LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, - COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, - BANG, BANG_EQUAL, EQUAL, EQUAL_EQUAL, - GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, - IDENTIFIER, STRING, NUMBER, - AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, - PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, - END_OF_FILE +enum TokenType { + LEFT_PAREN, + RIGHT_PAREN, + LEFT_BRACE, + RIGHT_BRACE, + COMMA, + DOT, + MINUS, + PLUS, + SEMICOLON, + SLASH, + STAR, + BANG, + BANG_EQUAL, + EQUAL, + EQUAL_EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + IDENTIFIER, + STRING, + NUMBER, + AND, + CLASS, + ELSE, + FALSE, + FUN, + FOR, + IF, + NIL, + OR, + PRINT, + RETURN, + SUPER, + THIS, + TRUE, + VAR, + WHILE, + END_OF_FILE }; typedef struct { - enum TokenType type; - char* lexeme; - void* literal; - int line; + enum TokenType type; + char *lexeme; + void *literal; + int line; } Token; -char* token_name(Token* token); +static inline const char *token_name(enum TokenType type) { + static const char *tokens[] = { + "LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "COMMA", + "DOT", "MINUS", "PLUS", "SEMICOLON", "SLASH", + "STAR", "BANG", "BANG_EQUAL", "EQUAL", "EQUAL_EQUAL", + "GREATER", "GREATER_EQUAL", "LESS", "LESS_EQUAL", "IDENTIFIER", + "STRING", "NUMBER", "AND", "CLASS", "ELSE", + "FALSE", "FUN", "FOR", "IF", "NIL", + "OR", "PRINT", "RETURN", "SUPER", "THIS", + "TRUE", "VAR", "WHILE", "END_OF_FILE"}; + + return tokens[type]; +} typedef struct { - Token* tokens; - int size; - int capacity; + Token *tokens; + int size; + int capacity; } TokenList; -void tokenlist_init(TokenList* list); +void tokenlist_init(TokenList *list); -void tokenlist_add(TokenList* list, Token value); +void tokenlist_add(TokenList *list, Token value); -Token* tokenlist_get(TokenList* list, int index); +Token *tokenlist_get(TokenList *list, int index); -void tokenlist_print(TokenList* tokenlist); +void tokenlist_print(TokenList *tokenlist); -void tokenlist_free(TokenList* list); +void tokenlist_free(TokenList *list); #endif