This commit is contained in:
Shautvast 2024-10-13 22:31:46 +02:00
parent f76a252bd4
commit 8663a3c993
4 changed files with 128 additions and 152 deletions

View file

@ -16,6 +16,9 @@ char peek_next(void);
void string(void);
bool is_digit(char c);
void number(void);
bool is_alpha(char c);
bool is_alphanumeric(char c);
void identifier(void);
bool had_error = false;
int current_pos = -1;
@ -24,6 +27,26 @@ int current_line = -1;
char *source;
TokenList token_list;
static const enum TokenType *get_token(char *key) {
int low = 0;
int high = sizeof(keywords) / sizeof(Item);
while (low < high) {
int mid = (low + high) / 2;
int c = strcmp(keywords[mid].key, key);
if (c == 0) {
return &keywords[mid].value;
}
if (c < 0) {
low = mid + 1;
} else {
high = mid;
}
}
return NULL;
}
ScanResult scan_tokens(char *src) {
current_pos = 0;
start = 0;
@ -134,6 +157,8 @@ void scan_token(void) {
default:
if (is_digit(c)) {
number();
} else if (is_alpha(c)) {
identifier();
} else {
error("Unexpected character.");
}
@ -141,6 +166,21 @@ void scan_token(void) {
}
}
void identifier(void) {
while (is_alphanumeric(peek())) {
advance();
}
char *text = substring(source, start + 1, current_pos - start);
const enum TokenType *tokentype = get_token(text);
if (tokentype == NULL) {
add_token(IDENTIFIER);
} else {
add_token(*tokentype);
}
}
void number(void) {
while (is_digit(peek()))
advance();
@ -196,6 +236,11 @@ char peek(void) {
}
return source[current_pos];
}
bool is_alpha(char c) {
return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
}
bool is_alphanumeric(char c) { return is_alpha(c) || is_digit(c); }
bool is_at_end(void) { return current_pos >= (int)strlen(source); }

View file

@ -1,8 +1,9 @@
#ifndef SCANNER_H
#define SCANNER_H
#include <stdbool.h>
#include "tokens.h"
#include <stdbool.h>
#include <string.h>
typedef struct {
bool had_error;
@ -11,4 +12,15 @@ typedef struct {
ScanResult scan_tokens(char *source);
typedef struct {
const char *key;
const enum TokenType value;
} Item;
static const Item keywords[] = {
{"and", AND}, {"class", CLASS}, {"else", ELSE}, {"false", FALSE},
{"for", FOR}, {"fun", FUN}, {"if", IF}, {"nil", NIL},
{"or", OR}, {"print", PRINT}, {"return", RETURN}, {"super", SUPER},
{"this", THIS}, {"true", TRUE}, {"var", VAR}, {"while", WHILE}};
#endif

View file

@ -2,130 +2,6 @@
#include <stdio.h>
#include <stdlib.h>
char *token_name(Token *token) {
char *type;
switch (token->type) {
case LEFT_PAREN:
type = "LEFT_PAREN";
break;
case RIGHT_PAREN:
type = "RIGHT_PAREN";
break;
case LEFT_BRACE:
type = "LEFT_BRACE";
break;
case RIGHT_BRACE:
type = "RIGHT_BRACE";
break;
case COMMA:
type = "COMMA";
break;
case DOT:
type = "DOT";
break;
case MINUS:
type = "MINUS";
break;
case PLUS:
type = "PLUS";
break;
case SEMICOLON:
type = "SEMICOLON";
break;
case SLASH:
type = "SLASH";
break;
case STAR:
type = "STAR";
break;
case BANG:
type = "BANG";
break;
case BANG_EQUAL:
type = "BANG_EQUAL";
break;
case EQUAL:
type = "EQUAL";
break;
case EQUAL_EQUAL:
type = "EQUAL_EQUAL";
break;
case GREATER:
type = "GREATER";
break;
case GREATER_EQUAL:
type = "GREATER_EQUAL";
break;
case LESS:
type = "LESS";
break;
case LESS_EQUAL:
type = "LESS_EQUAL";
break;
case IDENTIFIER:
type = "IDENTIFIER";
break;
case STRING:
type = "STRING";
break;
case NUMBER:
type = "NUMBER";
break;
case AND:
type = "AND";
break;
case CLASS:
type = "CLASS";
break;
case ELSE:
type = "ELSE";
break;
case FALSE:
type = "FALSE";
break;
case FUN:
type = "FUN";
break;
case FOR:
type = "FOR";
break;
case IF:
type = "IF";
break;
case NIL:
type = "NIL";
break;
case OR:
type = "OR";
break;
case PRINT:
type = "PRINT";
break;
case RETURN:
type = "RETURN";
break;
case SUPER:
type = "SUPER";
break;
case THIS:
type = "THIS";
break;
case TRUE:
type = "TRUE";
break;
case VAR:
type = "VAR";
break;
case WHILE:
type = "WHILE";
break;
case END_OF_FILE:
type = "EOF";
break;
}
return type;
}
void tokenlist_init(TokenList *list) {
list->tokens = malloc(sizeof(Token) * 32);
list->size = 0;
@ -153,9 +29,9 @@ void tokenlist_print(TokenList *tokenlist) {
for (int i = 0; i < tokenlist->size; i++) {
Token *token = tokenlist_get(tokenlist, i);
if (token->literal != NULL) {
printf("%s(%s), ", token_name(token), (char *)token->literal);
printf("%s(%s), ", token_name(token->type), (char *)token->literal);
} else {
printf("%s, ", token_name(token));
printf("%s, ", token_name(token->type));
}
}
printf("\n");

View file

@ -2,13 +2,44 @@
#define TOKENS_H
enum TokenType {
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
BANG, BANG_EQUAL, EQUAL, EQUAL_EQUAL,
GREATER, GREATER_EQUAL, LESS, LESS_EQUAL,
IDENTIFIER, STRING, NUMBER,
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
LEFT_PAREN,
RIGHT_PAREN,
LEFT_BRACE,
RIGHT_BRACE,
COMMA,
DOT,
MINUS,
PLUS,
SEMICOLON,
SLASH,
STAR,
BANG,
BANG_EQUAL,
EQUAL,
EQUAL_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
IDENTIFIER,
STRING,
NUMBER,
AND,
CLASS,
ELSE,
FALSE,
FUN,
FOR,
IF,
NIL,
OR,
PRINT,
RETURN,
SUPER,
THIS,
TRUE,
VAR,
WHILE,
END_OF_FILE
};
@ -19,7 +50,19 @@ typedef struct {
int line;
} Token;
char* token_name(Token* token);
static inline const char *token_name(enum TokenType type) {
static const char *tokens[] = {
"LEFT_PAREN", "RIGHT_PAREN", "LEFT_BRACE", "RIGHT_BRACE", "COMMA",
"DOT", "MINUS", "PLUS", "SEMICOLON", "SLASH",
"STAR", "BANG", "BANG_EQUAL", "EQUAL", "EQUAL_EQUAL",
"GREATER", "GREATER_EQUAL", "LESS", "LESS_EQUAL", "IDENTIFIER",
"STRING", "NUMBER", "AND", "CLASS", "ELSE",
"FALSE", "FUN", "FOR", "IF", "NIL",
"OR", "PRINT", "RETURN", "SUPER", "THIS",
"TRUE", "VAR", "WHILE", "END_OF_FILE"};
return tokens[type];
}
typedef struct {
Token *tokens;