From 2dab03db236b4683c22343b0006087331d8eb220 Mon Sep 17 00:00:00 2001 From: ghostie Date: Thu, 3 Jul 2025 19:23:34 -0500 Subject: [PATCH] now lexing some single character tokens --- Makefile | 6 +- src/lexer.c | 119 +++++++++++++++++++++++++++++++++++++++ src/lexer.h | 7 +++ src/main.c | 3 + src/token.c | 46 +++++++++++++++ src/token.h | 60 +++++++++++++++++++- src/utils/buffer.c | 9 +++ src/utils/buffer.h | 2 + tests/singletokens.pinky | 1 + 9 files changed, 251 insertions(+), 2 deletions(-) create mode 100644 src/token.c create mode 100644 tests/singletokens.pinky diff --git a/Makefile b/Makefile index a478d8d..11061ac 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ RM=rm CFLAGS=-Wall -Werror -std=gnu99 -O0 -g LIBS= -FILES=build/main.o build/lexer.o build/utils/vector.o build/utils/buffer.o +FILES=build/main.o build/lexer.o build/token.o build/utils/vector.o build/utils/buffer.o OUT=bin/pinky.out all: $(FILES) @@ -20,6 +20,10 @@ build/lexer.o: src/lexer.c @$(ECHO) "CC\t\t"$< @$(CC) $(CFLAGS) $< -c -o $@ $(LIBS) +build/token.o: src/token.c + @$(ECHO) "CC\t\t"$< + @$(CC) $(CFLAGS) $< -c -o $@ $(LIBS) + build/utils/vector.o: src/utils/vector.c @$(ECHO) "CC\t\t"$< @$(CC) $(CFLAGS) $< -c -o $@ $(LIBS) diff --git a/src/lexer.c b/src/lexer.c index 16ede00..2697a44 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,20 +1,139 @@ #include "lexer.h" +#include "token.h" + +#include +#include +#include + +/* helper functions */ +char +advance (struct lexer *l) +{ + /* advances the cur pointer */ + /* (consumes the character) */ + if (!l) + return 0; + + char *code = buffer_get (l->source); + return code[l->cur++]; +} + +char +peek (struct lexer *l) +{ + /* just takes a peek at the current character */ + /* (doesn't consume the character) */ + if (!l) + return 0; + + char *code = buffer_get (l->source); + return code[l->cur]; +} + +char +lookahead (unsigned int n, struct lexer *l) +{ + /* looks at the next character in the source */ + /* does not consume the character */ + if (!l) + return 0; + + char *code = buffer_get (l->source); + return code[l->cur + n]; +} + +_Bool +match (char expected, struct lexer *l) +{ + /* check if cur matches an expectation */ + /* (consumes the character if the match is true) */ + if (!l) + return 0; + + char *code = buffer_get (l->source); + if (code[l->cur] != expected) + return 0; + + l->cur++; + return 1; +} + +void +add_token (enum token_type type, struct lexer *l) +{ + if (!l) + return; + + unsigned int size = l->cur - l->start; + char *lexeme = calloc (size, sizeof (char)); + strncpy (lexeme, buffer_get (l->source) + l->start, size); + + vector_push_back (token_create_heap (type, lexeme), &l->tokens); + free (lexeme); +} + +/* public functions */ struct lexer lexer_create () { struct lexer l = { 0 }; l.tokens = vector_create (); + l.line = 1; return l; } +void +lexer_set_source (struct buffer *b, struct lexer *l) +{ + if (!b || !l) + return; + + l->source = b; +} + +void +lexer_lex (struct lexer *l) +{ + while (l->cur < buffer_length (l->source)) + { + l->start = l->cur; + + char c = advance (l); + if (c == '+') + add_token (TOK_PLUS, l); + if (c == '-') + add_token (TOK_MINUS, l); + if (c == '*') + add_token (TOK_STAR, l); + } +} + +void +lexer_print (struct lexer *l) +{ + puts ("Lexer:"); + + for (unsigned int i = 0; i < l->tokens.length; i++) + { + struct token *t = l->tokens.elements[i]; + printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme)); + } +} + void lexer_free (struct lexer *lexer) { if (!lexer) return; + for (unsigned int i = 0; i < lexer->tokens.length; i++) + { + struct token *tok = lexer->tokens.elements[i]; + token_free_heap (tok); + } + vector_free (&lexer->tokens); } diff --git a/src/lexer.h b/src/lexer.h index 25981fa..edef1a9 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,14 +1,21 @@ #ifndef __LEXER_H #define __LEXER_H +#include "utils/buffer.h" #include "utils/vector.h" struct lexer { struct vector tokens; + struct buffer *source; + + unsigned int start, cur, line; }; struct lexer lexer_create (); +void lexer_set_source (struct buffer *b, struct lexer *l); +void lexer_lex (struct lexer *l); +void lexer_print (struct lexer *l); void lexer_free (struct lexer *lexer); #endif diff --git a/src/main.c b/src/main.c index bbd5115..a539743 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,9 @@ main (int argc, char **argv) return EXIT_FAILURE; struct lexer lexer = lexer_create (); + lexer_set_source (&code, &lexer); + lexer_lex (&lexer); + lexer_print (&lexer); lexer_free (&lexer); buffer_free (&code); diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..a0cad15 --- /dev/null +++ b/src/token.c @@ -0,0 +1,46 @@ +#include "token.h" + +#include + +struct token +token_create (enum token_type type, const char *lexeme) +{ + struct token t = { 0 }; + + t.type = type; + t.lexeme = buffer_create (); + buffer_append (lexeme, &t.lexeme); + + return t; +} + +struct token * +token_create_heap (enum token_type type, const char *lexeme) +{ + struct token *t = calloc (1, sizeof (struct token)); + + t->type = type; + t->lexeme = buffer_create (); + buffer_append (lexeme, &t->lexeme); + + return t; +} + +void +token_free (struct token *t) +{ + if (!t) + return; + + buffer_free (&t->lexeme); +} + +void +token_free_heap (struct token *t) +{ + if (!t) + return; + + buffer_free (&t->lexeme); + free (t); +} diff --git a/src/token.h b/src/token.h index b32a6e7..bf067cb 100644 --- a/src/token.h +++ b/src/token.h @@ -1,14 +1,72 @@ #ifndef __TOKEN_H #define __TOKEN_H +#include "utils/buffer.h" + enum token_type { + TOK_LPAREN, + TOK_RPAREN, + TOK_LCURLY, + TOK_RCURLY, + TOK_LSQUAR, + TOK_RSQUAR, + TOK_COMMA, + TOK_DOT, + TOK_PLUS, + TOK_MINUS, + TOK_STAR, + TOK_SLASH, + TOK_CARET, + TOK_MOD, + TOK_COLON, + TOK_SEMICOLON, + TOK_QUESTION, + TOK_NOT, + TOK_GT, + TOK_LT, + + TOK_GE, + TOK_LE, + TOK_NE, + TOK_EQ, + TOK_ASSIGN, + TOK_GTGT, + TOK_LTLT, + + TOK_IDENTIFIER, + TOK_STRING, + TOK_INTEGER, + TOK_FLOAT, + + TOK_IF, + TOK_THEN, + TOK_ELSE, + TOK_TRUE, + TOK_FALSE, + TOK_AND, + TOK_OR, + TOK_WHILE, + TOK_DO, + TOK_FOR, + TOK_FUNC, + TOK_NULL, + TOK_END, + TOK_PRINT, + TOK_PRINTLN, + TOK_RET }; struct token { enum token_type type; - char lexeme[4]; + struct buffer lexeme; }; +struct token token_create (enum token_type type, const char *lexeme); +struct token *token_create_heap (enum token_type type, const char *lexeme); + +void token_free (struct token *t); +void token_free_heap (struct token *t); + #endif diff --git a/src/utils/buffer.c b/src/utils/buffer.c index f9171e9..5ccfdc2 100644 --- a/src/utils/buffer.c +++ b/src/utils/buffer.c @@ -121,3 +121,12 @@ buffer_free (struct buffer *b) free (b->buf); memset (b, 0, sizeof (struct buffer)); } + +size_t +buffer_length (struct buffer *b) +{ + if (!b) + return 0; + + return b->len; +} diff --git a/src/utils/buffer.h b/src/utils/buffer.h index d5a46fe..a8b87e1 100644 --- a/src/utils/buffer.h +++ b/src/utils/buffer.h @@ -17,4 +17,6 @@ char *buffer_get (struct buffer *b); _Bool buffer_read (const char *path, struct buffer *b); void buffer_free (struct buffer *b); +size_t buffer_length (struct buffer *b); + #endif diff --git a/tests/singletokens.pinky b/tests/singletokens.pinky new file mode 100644 index 0000000..4ee7c8c --- /dev/null +++ b/tests/singletokens.pinky @@ -0,0 +1 @@ ++**- \ No newline at end of file