From 8663a3c99399ea1661abcbb71df8fdcd76abea90 Mon Sep 17 00:00:00 2001
From: Shautvast <shautvast@gmail.com>
Date: Sun, 13 Oct 2024 22:31:46 +0200
Subject: [PATCH] keywords

---
 src/scanner.c |  45 ++++++++++++++++++
 src/scanner.h |  20 ++++++--
 src/tokens.c  | 128 +-------------------------------------------------
 src/tokens.h  |  87 +++++++++++++++++++++++++---------
 4 files changed, 128 insertions(+), 152 deletions(-)

diff --git a/src/scanner.c b/src/scanner.c
index 97626a1..8d13968 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -16,6 +16,9 @@ char peek_next(void);
 void string(void);
 bool is_digit(char c);
 void number(void);
+bool is_alpha(char c);
+bool is_alphanumeric(char c);
+void identifier(void);
 
 bool had_error = false;
 int current_pos = -1;
@@ -24,6 +27,26 @@ int current_line = -1;
 char *source;
 TokenList token_list;
 
+static const enum TokenType *get_token(char *key) {
+  int low = 0;
+  int high = sizeof(keywords) / sizeof(Item);
+
+  while (low < high) {
+    int mid = (low + high) / 2;
+
+    int c = strcmp(keywords[mid].key, key);
+    if (c == 0) {
+      return &keywords[mid].value;
+    }
+    if (c < 0) {
+      low = mid + 1;
+    } else {
+      high = mid;
+    }
+  }
+  return NULL;
+}
+
 ScanResult scan_tokens(char *src) {
   current_pos = 0;
   start = 0;
@@ -134,6 +157,8 @@ void scan_token(void) {
   default:
     if (is_digit(c)) {
       number();
+    } else if (is_alpha(c)) {
+      identifier();
     } else {
       error("Unexpected character.");
     }
@@ -141,6 +166,21 @@ void scan_token(void) {
   }
 }
 
+void identifier(void) {
+  while (is_alphanumeric(peek())) {
+    advance();
+  }
+
+  char *text = substring(source, start + 1, current_pos - start);
+
+  const enum TokenType *tokentype = get_token(text);
+  if (tokentype == NULL) {
+    add_token(IDENTIFIER);
+  } else {
+    add_token(*tokentype);
+  }
+}
+
 void number(void) {
   while (is_digit(peek()))
     advance();
@@ -196,6 +236,11 @@ char peek(void) {
   }
   return source[current_pos];
 }
+bool is_alpha(char c) {
+  return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
+}
+
+bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); }
 
 bool is_at_end(void) { return current_pos >= (int)strlen(source); }
 
diff --git a/src/scanner.h b/src/scanner.h
index a506133..00f939d 100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -1,14 +1,26 @@
 #ifndef SCANNER_H
 #define SCANNER_H
 
-#include <stdbool.h>
 #include "tokens.h"
+#include <stdbool.h>
+#include <string.h>
 
 typedef struct {
-    bool had_error;
-    TokenList token_list;
+  bool had_error;
+  TokenList token_list;
 } ScanResult;
 
-ScanResult scan_tokens(char* source);
+ScanResult scan_tokens(char *source);
+
+typedef struct {
+  const char *key;
+  const enum TokenType value;
+} Item;
+
+static const Item keywords[] = {
+    {"and", AND},   {"class", CLASS}, {"else", ELSE},     {"false", FALSE},
+    {"for", FOR},   {"fun", FUN},     {"if", IF},         {"nil", NIL},
+    {"or", OR},     {"print", PRINT}, {"return", RETURN}, {"super", SUPER},
+    {"this", THIS}, {"true", TRUE},   {"var", VAR},       {"while", WHILE}};
 
 #endif
diff --git a/src/tokens.c b/src/tokens.c
index 7de6da9..480e7da 100644
--- a/src/tokens.c
+++ b/src/tokens.c
@@ -2,130 +2,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-char *token_name(Token *token) {
-  char *type;
-  switch (token->type) {
-  case LEFT_PAREN:
-    type = "LEFT_PAREN";
-    break;
-  case RIGHT_PAREN:
-    type = "RIGHT_PAREN";
-    break;
-  case LEFT_BRACE:
-    type = "LEFT_BRACE";
-    break;
-  case RIGHT_BRACE:
-    type = "RIGHT_BRACE";
-    break;
-  case COMMA:
-    type = "COMMA";
-    break;
-  case DOT:
-    type = "DOT";
-    break;
-  case MINUS:
-    type = "MINUS";
-    break;
-  case PLUS:
-    type = "PLUS";
-    break;
-  case SEMICOLON:
-    type = "SEMICOLON";
-    break;
-  case SLASH:
-    type = "SLASH";
-    break;
-  case STAR:
-    type = "STAR";
-    break;
-  case BANG:
-    type = "BANG";
-    break;
-  case BANG_EQUAL:
-    type = "BANG_EQUAL";
-    break;
-  case EQUAL:
-    type = "EQUAL";
-    break;
-  case EQUAL_EQUAL:
-    type = "EQUAL_EQUAL";
-    break;
-  case GREATER:
-    type = "GREATER";
-    break;
-  case GREATER_EQUAL:
-    type = "GREATER_EQUAL";
-    break;
-  case LESS:
-    type = "LESS";
-    break;
-  case LESS_EQUAL:
-    type = "LESS_EQUAL";
-    break;
-  case IDENTIFIER:
-    type = "IDENTIFIER";
-    break;
-  case STRING:
-    type = "STRING";
-    break;
-  case NUMBER:
-    type = "NUMBER";
-    break;
-  case AND:
-    type = "AND";
-    break;
-  case CLASS:
-    type = "CLASS";
-    break;
-  case ELSE:
-    type = "ELSE";
-    break;
-  case FALSE:
-    type = "FALSE";
-    break;
-  case FUN:
-    type = "FUN";
-    break;
-  case FOR:
-    type = "FOR";
-    break;
-  case IF:
-    type = "IF";
-    break;
-  case NIL:
-    type = "NIL";
-    break;
-  case OR:
-    type = "OR";
-    break;
-  case PRINT:
-    type = "PRINT";
-    break;
-  case RETURN:
-    type = "RETURN";
-    break;
-  case SUPER:
-    type = "SUPER";
-    break;
-  case THIS:
-    type = "THIS";
-    break;
-  case TRUE:
-    type = "TRUE";
-    break;
-  case VAR:
-    type = "VAR";
-    break;
-  case WHILE:
-    type = "WHILE";
-    break;
-  case END_OF_FILE:
-    type = "EOF";
-    break;
-  }
-  return type;
-}
-
 void tokenlist_init(TokenList *list) {
   list->tokens = malloc(sizeof(Token) * 32);
   list->size = 0;
@@ -153,9 +29,9 @@ void tokenlist_print(TokenList *tokenlist) {
   for (int i = 0; i < tokenlist->size; i++) {
     Token *token = tokenlist_get(tokenlist, i);
     if (token->literal != NULL) {
-      printf("%s(%s), ", token_name(token), (char *)token->literal);
+      printf("%s(%s), ", token_name(token->type), (char *)token->literal);
     } else {
-      printf("%s, ", token_name(token));
+      printf("%s, ", token_name(token->type));
     }
   }
   printf("\n");
diff --git a/src/tokens.h b/src/tokens.h
index e63dfd4..900986f 100644
--- a/src/tokens.h
+++ b/src/tokens.h
@@ -1,40 +1,83 @@
 #ifndef TOKENS_H
 #define TOKENS_H
 
-enum TokenType{
-    LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, 
-    COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
-    BANG, BANG_EQUAL, EQUAL, EQUAL_EQUAL,
-    GREATER, GREATER_EQUAL, LESS, LESS_EQUAL,
-    IDENTIFIER, STRING, NUMBER,
-    AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
-    PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
-    END_OF_FILE
+enum TokenType {
+  LEFT_PAREN,
+  RIGHT_PAREN,
+  LEFT_BRACE,
+  RIGHT_BRACE,
+  COMMA,
+  DOT,
+  MINUS,
+  PLUS,
+  SEMICOLON,
+  SLASH,
+  STAR,
+  BANG,
+  BANG_EQUAL,
+  EQUAL,
+  EQUAL_EQUAL,
+  GREATER,
+  GREATER_EQUAL,
+  LESS,
+  LESS_EQUAL,
+  IDENTIFIER,
+  STRING,
+  NUMBER,
+  AND,
+  CLASS,
+  ELSE,
+  FALSE,
+  FUN,
+  FOR,
+  IF,
+  NIL,
+  OR,
+  PRINT,
+  RETURN,
+  SUPER,
+  THIS,
+  TRUE,
+  VAR,
+  WHILE,
+  END_OF_FILE
 };
 
 typedef struct {
-    enum TokenType type;
-    char* lexeme;
-    void* literal;
-    int line;
+  enum TokenType type;
+  char *lexeme;
+  void *literal;
+  int line;
 } Token;
 
-char* token_name(Token* token);
+static inline const char *token_name(enum TokenType type) {
+  static const char *tokens[] = {
+      "LEFT_PAREN", "RIGHT_PAREN",   "LEFT_BRACE", "RIGHT_BRACE", "COMMA",
+      "DOT",        "MINUS",         "PLUS",       "SEMICOLON",   "SLASH",
+      "STAR",       "BANG",          "BANG_EQUAL", "EQUAL",       "EQUAL_EQUAL",
+      "GREATER",    "GREATER_EQUAL", "LESS",       "LESS_EQUAL",  "IDENTIFIER",
+      "STRING",     "NUMBER",        "AND",        "CLASS",       "ELSE",
+      "FALSE",      "FUN",           "FOR",        "IF",          "NIL",
+      "OR",         "PRINT",         "RETURN",     "SUPER",       "THIS",
+      "TRUE",       "VAR",           "WHILE",      "END_OF_FILE"};
+
+  return tokens[type];
+}
 
 typedef struct {
-    Token* tokens;
-    int size;
-    int capacity;
+  Token *tokens;
+  int size;
+  int capacity;
 } TokenList;
 
-void tokenlist_init(TokenList* list);
+void tokenlist_init(TokenList *list);
 
-void tokenlist_add(TokenList* list, Token value);
+void tokenlist_add(TokenList *list, Token value);
 
-Token* tokenlist_get(TokenList* list, int index);
+Token *tokenlist_get(TokenList *list, int index);
 
-void tokenlist_print(TokenList* tokenlist);
+void tokenlist_print(TokenList *tokenlist);
 
-void tokenlist_free(TokenList* list);
+void tokenlist_free(TokenList *list);
 
 #endif