This commit is contained in:
Shautvast 2024-10-13 22:31:46 +02:00
parent f76a252bd4
commit 8663a3c993
4 changed files with 128 additions and 152 deletions

View file

@ -16,6 +16,9 @@ char peek_next(void);
void string(void); void string(void);
bool is_digit(char c); bool is_digit(char c);
void number(void); void number(void);
bool is_alpha(char c);
bool is_alphanumeric(char c);
void identifier(void);
bool had_error = false; bool had_error = false;
int current_pos = -1; int current_pos = -1;
@ -24,6 +27,26 @@ int current_line = -1;
char *source; char *source;
TokenList token_list; TokenList token_list;
static const enum TokenType *get_token(char *key) {
int low = 0;
int high = sizeof(keywords) / sizeof(Item);
while (low < high) {
int mid = (low + high) / 2;
int c = strcmp(keywords[mid].key, key);
if (c == 0) {
return &keywords[mid].value;
}
if (c < 0) {
low = mid + 1;
} else {
high = mid;
}
}
return NULL;
}
ScanResult scan_tokens(char *src) { ScanResult scan_tokens(char *src) {
current_pos = 0; current_pos = 0;
start = 0; start = 0;
@ -134,6 +157,8 @@ void scan_token(void) {
default: default:
if (is_digit(c)) { if (is_digit(c)) {
number(); number();
} else if (is_alpha(c)) {
identifier();
} else { } else {
error("Unexpected character."); error("Unexpected character.");
} }
@ -141,6 +166,21 @@ void scan_token(void) {
} }
} }
void identifier(void) {
while (is_alphanumeric(peek())) {
advance();
}
char *text = substring(source, start + 1, current_pos - start);
const enum TokenType *tokentype = get_token(text);
if (tokentype == NULL) {
add_token(IDENTIFIER);
} else {
add_token(*tokentype);
}
}
void number(void) { void number(void) {
while (is_digit(peek())) while (is_digit(peek()))
advance(); advance();
@ -196,6 +236,11 @@ char peek(void) {
} }
return source[current_pos]; return source[current_pos];
} }
bool is_alpha(char c) {
return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
}
bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); }
bool is_at_end(void) { return current_pos >= (int)strlen(source); } bool is_at_end(void) { return current_pos >= (int)strlen(source); }

View file

@ -1,14 +1,26 @@
#ifndef SCANNER_H #ifndef SCANNER_H
#define SCANNER_H #define SCANNER_H
#include <stdbool.h>
#include "tokens.h" #include "tokens.h"
#include <stdbool.h>
#include <string.h>
typedef struct { typedef struct {
bool had_error; bool had_error;
TokenList token_list; TokenList token_list;
} ScanResult; } ScanResult;
ScanResult scan_tokens(char* source); ScanResult scan_tokens(char *source);
typedef struct {
const char *key;
const enum TokenType value;
} Item;
static const Item keywords[] = {
{"and", AND}, {"class", CLASS}, {"else", ELSE}, {"false", FALSE},
{"for", FOR}, {"fun", FUN}, {"if", IF}, {"nil", NIL},
{"or", OR}, {"print", PRINT}, {"return", RETURN}, {"super", SUPER},
{"this", THIS}, {"true", TRUE}, {"var", VAR}, {"while", WHILE}};
#endif #endif

View file

@ -2,130 +2,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
char *token_name(Token *token) {
char *type;
switch (token->type) {
case LEFT_PAREN:
type = "LEFT_PAREN";
break;
case RIGHT_PAREN:
type = "RIGHT_PAREN";
break;
case LEFT_BRACE:
type = "LEFT_BRACE";
break;
case RIGHT_BRACE:
type = "RIGHT_BRACE";
break;
case COMMA:
type = "COMMA";
break;
case DOT:
type = "DOT";
break;
case MINUS:
type = "MINUS";
break;
case PLUS:
type = "PLUS";
break;
case SEMICOLON:
type = "SEMICOLON";
break;
case SLASH:
type = "SLASH";
break;
case STAR:
type = "STAR";
break;
case BANG:
type = "BANG";
break;
case BANG_EQUAL:
type = "BANG_EQUAL";
break;
case EQUAL:
type = "EQUAL";
break;
case EQUAL_EQUAL:
type = "EQUAL_EQUAL";
break;
case GREATER:
type = "GREATER";
break;
case GREATER_EQUAL:
type = "GREATER_EQUAL";
break;
case LESS:
type = "LESS";
break;
case LESS_EQUAL:
type = "LESS_EQUAL";
break;
case IDENTIFIER:
type = "IDENTIFIER";
break;
case STRING:
type = "STRING";
break;
case NUMBER:
type = "NUMBER";
break;
case AND:
type = "AND";
break;
case CLASS:
type = "CLASS";
break;
case ELSE:
type = "ELSE";
break;
case FALSE:
type = "FALSE";
break;
case FUN:
type = "FUN";
break;
case FOR:
type = "FOR";
break;
case IF:
type = "IF";
break;
case NIL:
type = "NIL";
break;
case OR:
type = "OR";
break;
case PRINT:
type = "PRINT";
break;
case RETURN:
type = "RETURN";
break;
case SUPER:
type = "SUPER";
break;
case THIS:
type = "THIS";
break;
case TRUE:
type = "TRUE";
break;
case VAR:
type = "VAR";
break;
case WHILE:
type = "WHILE";
break;
case END_OF_FILE:
type = "EOF";
break;
}
return type;
}
void tokenlist_init(TokenList *list) { void tokenlist_init(TokenList *list) {
list->tokens = malloc(sizeof(Token) * 32); list->tokens = malloc(sizeof(Token) * 32);
list->size = 0; list->size = 0;
@ -153,9 +29,9 @@ void tokenlist_print(TokenList *tokenlist) {
for (int i = 0; i < tokenlist->size; i++) { for (int i = 0; i < tokenlist->size; i++) {
Token *token = tokenlist_get(tokenlist, i); Token *token = tokenlist_get(tokenlist, i);
if (token->literal != NULL) { if (token->literal != NULL) {
printf("%s(%s), ", token_name(token), (char *)token->literal); printf("%s(%s), ", token_name(token->type), (char *)token->literal);
} else { } else {
printf("%s, ", token_name(token)); printf("%s, ", token_name(token->type));
} }
} }
printf("\n"); printf("\n");

View file

@ -1,40 +1,83 @@
#ifndef TOKENS_H #ifndef TOKENS_H
#define TOKENS_H #define TOKENS_H
enum TokenType{ enum TokenType {
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, LEFT_PAREN,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, RIGHT_PAREN,
BANG, BANG_EQUAL, EQUAL, EQUAL_EQUAL, LEFT_BRACE,
GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, RIGHT_BRACE,
IDENTIFIER, STRING, NUMBER, COMMA,
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, DOT,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, MINUS,
END_OF_FILE PLUS,
SEMICOLON,
SLASH,
STAR,
BANG,
BANG_EQUAL,
EQUAL,
EQUAL_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
IDENTIFIER,
STRING,
NUMBER,
AND,
CLASS,
ELSE,
FALSE,
FUN,
FOR,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
END_OF_FILE
}; };
typedef struct { typedef struct {
enum TokenType type; enum TokenType type;
char* lexeme; char *lexeme;
void* literal; void *literal;
int line; int line;
} Token; } Token;
char* token_name(Token* token); static inline const char *token_name(enum TokenType type) {
static const char *tokens[] = {
"LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "COMMA",
"DOT", "MINUS", "PLUS", "SEMICOLON", "SLASH",
"STAR", "BANG", "BANG_EQUAL", "EQUAL", "EQUAL_EQUAL",
"GREATER", "GREATER_EQUAL", "LESS", "LESS_EQUAL", "IDENTIFIER",
"STRING", "NUMBER", "AND", "CLASS", "ELSE",
"FALSE", "FUN", "FOR", "IF", "NIL",
"OR", "PRINT", "RETURN", "SUPER", "THIS",
"TRUE", "VAR", "WHILE", "END_OF_FILE"};
return tokens[type];
}
typedef struct { typedef struct {
Token* tokens; Token *tokens;
int size; int size;
int capacity; int capacity;
} TokenList; } TokenList;
void tokenlist_init(TokenList* list); void tokenlist_init(TokenList *list);
void tokenlist_add(TokenList* list, Token value); void tokenlist_add(TokenList *list, Token value);
Token* tokenlist_get(TokenList* list, int index); Token *tokenlist_get(TokenList *list, int index);
void tokenlist_print(TokenList* tokenlist); void tokenlist_print(TokenList *tokenlist);
void tokenlist_free(TokenList* list); void tokenlist_free(TokenList *list);
#endif #endif