strings and numbers

2024-10-12 17:44:30 +02:00 · 2024-10-12 17:44:30 +02:00 · f76a252bd4
commit f76a252bd4
parent 6e84fea3d1
2 changed files with 243 additions and 79 deletions
--- a/src/scanner.c
+++ b/src/scanner.c
@ -11,6 +11,11 @@ void error(char *message);
 void report(char *where, char *message);
 bool is_at_end(void);
 bool match(char expected);
+char peek(void);
+char peek_next(void);
+void string(void);
+bool is_digit(char c);
+void number(void);

 bool had_error = false;
 int current_pos = -1;
@ -52,6 +57,16 @@ void add_token(enum TokenType type) {
  tokenlist_add(&token_list, token);
 }

+void add_token_with_literal(enum TokenType type, char *literal) {
+  Token token;
+  token.type = type;
+  token.lexeme = substring(source, start, current_pos);
+  token.literal = literal;
+  token.line = current_line;
+
+  tokenlist_add(&token_list, token);
+}
+
 char advance(void) {
  char c = source[current_pos++];
  return c;
@ -97,12 +112,66 @@ void scan_token(void) {
  case '<':
    add_token(match('=') ? LESS_EQUAL : LESS);
    break;
+  case '/':
+    if (match('/')) {
+      while (peek() != '\n' && !is_at_end()) {
+        advance();
+      }
+    } else {
+      add_token(SLASH);
+    }
+    break;
+  case ' ':
+  case '\t':
+  case '\r':
+    break;
+  case '\n':
+    current_line += 1;
+    break;
+  case '"':
+    string();
+    break;
  default:
-    error("Unexpected character.");
+    if (is_digit(c)) {
+      number();
+    } else {
+      error("Unexpected character.");
+    }
    break;
  }
 }

+void number(void) {
+  while (is_digit(peek()))
+    advance();
+  if (peek() == '.' && is_digit((peek_next())))
+    advance();
+  while (is_digit(peek()))
+    advance();
+  add_token_with_literal(NUMBER,
+                         substring(source, start + 1, current_pos - start));
+}
+
+bool is_digit(char c) { return c >= '0' && c <= '9'; }
+
+void string(void) {
+  while (peek() != '"' && !is_at_end()) {
+    if (peek() == '\n')
+      current_line += 1;
+    advance();
+  }
+
+  if (is_at_end()) {
+    error("Unterminated string.");
+    return;
+  }
+
+  advance();
+
+  char *string = substring(source, start + 2, current_pos - start - 2);
+  add_token_with_literal(STRING, string);
+}
+
 bool match(char expected) {
  if (is_at_end()) {
    return false;
@ -114,6 +183,20 @@ bool match(char expected) {
  return true;
 }

+char peek_next(void) {
+  if (current_pos + 1 >= (int)strlen(source)) {
+    return '\0';
+  }
+  return source[current_pos + 1];
+}
+
+char peek(void) {
+  if (is_at_end()) {
+    return '\0';
+  }
+  return source[current_pos];
+}
+
 bool is_at_end(void) { return current_pos >= (int)strlen(source); }

 void error(char *message) { report("", message); }
--- a/src/tokens.c
+++ b/src/tokens.c
@ -1,83 +1,164 @@
-#include <stdlib.h>
-#include <stdio.h>
 #include "tokens.h"
+#include <stdio.h>
+#include <stdlib.h>

-char* token_name(Token* token){
-    char* type;
-    switch (token->type){
-        case LEFT_PAREN: type = "LEFT_PAREN"; break;
-        case RIGHT_PAREN: type = "RIGHT_PAREN"; break;
-        case LEFT_BRACE: type = "LEFT_BRACE"; break;
-        case RIGHT_BRACE: type = "RIGHT_BRACE"; break;
-        case COMMA: type = "COMMA"; break;
-        case DOT: type = "DOT"; break;
-        case MINUS: type = "MINUS"; break;
-        case PLUS: type = "PLUS"; break;
-        case SEMICOLON: type = "SEMICOLON"; break;
-        case SLASH: type = "SLASH"; break;
-        case STAR: type = "STAR"; break;
-        case BANG: type = "BANG"; break;
-        case BANG_EQUAL: type = "BANG_EQUAL"; break;
-        case EQUAL: type = "EQUAL"; break;
-        case EQUAL_EQUAL: type = "EQUAL_EQUAL"; break;
-        case GREATER: type = "GREATER"; break;
-        case GREATER_EQUAL: type = "GREATER_EQUAL"; break;
-        case LESS: type = "LESS"; break;
-        case LESS_EQUAL: type = "LESS_EQUAL"; break;
-        case IDENTIFIER: type = "IDENTIFIER"; break;
-        case STRING: type = "STRING"; break;
-        case NUMBER: type = "NUMBER"; break;
-        case AND: type = "AND"; break;
-        case CLASS: type = "CLASS"; break;
-        case ELSE: type = "ELSE"; break;
-        case FALSE: type = "FALSE"; break;
-        case FUN: type = "FUN"; break;
-        case FOR: type = "FOR"; break;
-        case IF: type = "IF"; break;
-        case NIL: type = "NIL"; break;
-        case OR: type = "OR"; break;
-        case PRINT: type = "PRINT"; break;
-        case RETURN: type = "RETURN"; break;
-        case SUPER: type = "SUPER"; break;
-        case THIS: type = "THIS"; break;
-        case TRUE: type = "TRUE"; break;
-        case VAR: type = "VAR"; break;
-        case WHILE: type = "WHILE"; break;
-        case END_OF_FILE: type = "EOF"; break;
+char *token_name(Token *token) {
+  char *type;
+  switch (token->type) {
+  case LEFT_PAREN:
+    type = "LEFT_PAREN";
+    break;
+  case RIGHT_PAREN:
+    type = "RIGHT_PAREN";
+    break;
+  case LEFT_BRACE:
+    type = "LEFT_BRACE";
+    break;
+  case RIGHT_BRACE:
+    type = "RIGHT_BRACE";
+    break;
+  case COMMA:
+    type = "COMMA";
+    break;
+  case DOT:
+    type = "DOT";
+    break;
+  case MINUS:
+    type = "MINUS";
+    break;
+  case PLUS:
+    type = "PLUS";
+    break;
+  case SEMICOLON:
+    type = "SEMICOLON";
+    break;
+  case SLASH:
+    type = "SLASH";
+    break;
+  case STAR:
+    type = "STAR";
+    break;
+  case BANG:
+    type = "BANG";
+    break;
+  case BANG_EQUAL:
+    type = "BANG_EQUAL";
+    break;
+  case EQUAL:
+    type = "EQUAL";
+    break;
+  case EQUAL_EQUAL:
+    type = "EQUAL_EQUAL";
+    break;
+  case GREATER:
+    type = "GREATER";
+    break;
+  case GREATER_EQUAL:
+    type = "GREATER_EQUAL";
+    break;
+  case LESS:
+    type = "LESS";
+    break;
+  case LESS_EQUAL:
+    type = "LESS_EQUAL";
+    break;
+  case IDENTIFIER:
+    type = "IDENTIFIER";
+    break;
+  case STRING:
+    type = "STRING";
+    break;
+  case NUMBER:
+    type = "NUMBER";
+    break;
+  case AND:
+    type = "AND";
+    break;
+  case CLASS:
+    type = "CLASS";
+    break;
+  case ELSE:
+    type = "ELSE";
+    break;
+  case FALSE:
+    type = "FALSE";
+    break;
+  case FUN:
+    type = "FUN";
+    break;
+  case FOR:
+    type = "FOR";
+    break;
+  case IF:
+    type = "IF";
+    break;
+  case NIL:
+    type = "NIL";
+    break;
+  case OR:
+    type = "OR";
+    break;
+  case PRINT:
+    type = "PRINT";
+    break;
+  case RETURN:
+    type = "RETURN";
+    break;
+  case SUPER:
+    type = "SUPER";
+    break;
+  case THIS:
+    type = "THIS";
+    break;
+  case TRUE:
+    type = "TRUE";
+    break;
+  case VAR:
+    type = "VAR";
+    break;
+  case WHILE:
+    type = "WHILE";
+    break;
+  case END_OF_FILE:
+    type = "EOF";
+    break;
+  }
+  return type;
+}
+
+void tokenlist_init(TokenList *list) {
+  list->tokens = malloc(sizeof(Token) * 32);
+  list->size = 0;
+  list->capacity = 32;
+}
+
+void tokenlist_add(TokenList *list, Token value) {
+  if (list->size >= list->capacity) {
+    list->capacity *= 2;
+    list->tokens = realloc(list->tokens, sizeof(Token) * list->capacity);
+  }
+  list->tokens[list->size] = value;
+  list->size += 1;
+}
+
+Token *tokenlist_get(TokenList *list, int index) {
+  if (index >= list->size || index < 0) {
+    printf("Index %d out of bounds for list of size %d\n", index, list->size);
+    exit(1);
+  }
+  return &list->tokens[index];
+}
+
+void tokenlist_print(TokenList *tokenlist) {
+  for (int i = 0; i < tokenlist->size; i++) {
+    Token *token = tokenlist_get(tokenlist, i);
+    if (token->literal != NULL) {
+      printf("%s(%s), ", token_name(token), (char *)token->literal);
+    } else {
+      printf("%s, ", token_name(token));
    }
-    return type;
+  }
+  printf("\n");
 }

-void tokenlist_init(TokenList* list){
-    list->tokens = malloc(sizeof(Token) * 32);
-    list->size = 0;
-    list->capacity = 32;
-}
-
-void tokenlist_add(TokenList* list, Token value){
-    if (list->size >= list->capacity){
-        list->capacity *= 2;
-        list->tokens = realloc(list->tokens, sizeof(Token)* list->capacity);
-    }
-    list->tokens[list->size] = value;
-    list->size +=1;
-}
-
-Token* tokenlist_get(TokenList* list, int index){
-    if (index >= list->size || index < 0){
-        printf("Index %d out of bounds for list of size %d\n", index, list->size);
-        exit(1);
-    }
-    return &list->tokens[index];
-}
-
-void tokenlist_print(TokenList* tokenlist){
-    for (int i=0; i< tokenlist->size; i++){
-        printf("%s, ", token_name(tokenlist_get(tokenlist, i)));
-        printf("\n");
-    }
-}
-
-void tokenlist_free(TokenList* list){
-    free(list->tokens);
-}
+void tokenlist_free(TokenList *list) { free(list->tokens); }