now lexing some single character tokens

2025-07-03 19:23:34 -05:00 · 2025-07-03 19:23:34 -05:00 · 2dab03db23
commit 2dab03db23
parent b0682bbde5
9 changed files with 251 additions and 2 deletions
--- a/6
+++ b/6
@ -5,7 +5,7 @@ RM=rm
 CFLAGS=-Wall -Werror -std=gnu99 -O0 -g
 LIBS=

-FILES=build/main.o build/lexer.o build/utils/vector.o build/utils/buffer.o
+FILES=build/main.o build/lexer.o build/token.o build/utils/vector.o build/utils/buffer.o
 OUT=bin/pinky.out

 all: $(FILES)
@ -20,6 +20,10 @@ build/lexer.o: src/lexer.c
 	@$(ECHO) "CC\t\t"$<
 	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)

+build/token.o: src/token.c
+	@$(ECHO) "CC\t\t"$<
+	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
+
 build/utils/vector.o: src/utils/vector.c
 	@$(ECHO) "CC\t\t"$<
 	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
--- a/src/lexer.c
+++ b/src/lexer.c
@ -1,20 +1,139 @@
 #include "lexer.h"

+#include "token.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* helper functions */
+char
+advance (struct lexer *l)
+{
+	/* advances the cur pointer */
+	/* (consumes the character) */
+	if (!l)
+		return 0;
+
+	char *code = buffer_get (l->source);
+	return code[l->cur++];
+}
+
+char
+peek (struct lexer *l)
+{
+	/* just takes a peek at the current character */
+	/* (doesn't consume the character) */
+	if (!l)
+		return 0;
+
+	char *code = buffer_get (l->source);
+	return code[l->cur];
+}
+
+char
+lookahead (unsigned int n, struct lexer *l)
+{
+	/* looks at the next character in the source */
+	/* does not consume the character */
+	if (!l)
+		return 0;
+
+	char *code = buffer_get (l->source);
+	return code[l->cur + n];
+}
+
+_Bool
+match (char expected, struct lexer *l)
+{
+	/* check if cur matches an expectation */
+	/* (consumes the character if the match is true) */
+	if (!l)
+		return 0;
+
+	char *code = buffer_get (l->source);
+	if (code[l->cur] != expected)
+		return 0;
+
+	l->cur++;
+	return 1;
+}
+
+void
+add_token (enum token_type type, struct lexer *l)
+{
+	if (!l)
+		return;
+
+	unsigned int size = l->cur - l->start;
+	char *lexeme = calloc (size, sizeof (char));
+	strncpy (lexeme, buffer_get (l->source) + l->start, size);
+
+	vector_push_back (token_create_heap (type, lexeme), &l->tokens);
+	free (lexeme);
+}
+
+/* public functions */
 struct lexer
 lexer_create ()
 {
 	struct lexer l = { 0 };

 	l.tokens = vector_create ();
+	l.line = 1;

 	return l;
 }

+void
+lexer_set_source (struct buffer *b, struct lexer *l)
+{
+	if (!b || !l)
+		return;
+
+	l->source = b;
+}
+
+void
+lexer_lex (struct lexer *l)
+{
+	while (l->cur < buffer_length (l->source))
+		{
+			l->start = l->cur;
+
+			char c = advance (l);
+			if (c == '+')
+				add_token (TOK_PLUS, l);
+			if (c == '-')
+				add_token (TOK_MINUS, l);
+			if (c == '*')
+				add_token (TOK_STAR, l);
+		}
+}
+
+void
+lexer_print (struct lexer *l)
+{
+	puts ("Lexer:");
+
+	for (unsigned int i = 0; i < l->tokens.length; i++)
+		{
+			struct token *t = l->tokens.elements[i];
+			printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme));
+		}
+}
+
 void
 lexer_free (struct lexer *lexer)
 {
 	if (!lexer)
 		return;

+	for (unsigned int i = 0; i < lexer->tokens.length; i++)
+		{
+			struct token *tok = lexer->tokens.elements[i];
+			token_free_heap (tok);
+		}
+
 	vector_free (&lexer->tokens);
 }
--- a/src/lexer.h
+++ b/src/lexer.h
@ -1,14 +1,21 @@
 #ifndef __LEXER_H
 #define __LEXER_H

+#include "utils/buffer.h"
 #include "utils/vector.h"

 struct lexer
 {
 	struct vector tokens;
+	struct buffer *source;
+
+	unsigned int start, cur, line;
 };

 struct lexer lexer_create ();
+void lexer_set_source (struct buffer *b, struct lexer *l);
+void lexer_lex (struct lexer *l);
+void lexer_print (struct lexer *l);
 void lexer_free (struct lexer *lexer);

 #endif
--- a/src/main.c
+++ b/src/main.c
@ -19,6 +19,9 @@ main (int argc, char **argv)
 		return EXIT_FAILURE;

 	struct lexer lexer = lexer_create ();
+	lexer_set_source (&code, &lexer);
+	lexer_lex (&lexer);
+	lexer_print (&lexer);

 	lexer_free (&lexer);
 	buffer_free (&code);
--- a/src/token.c
+++ b/src/token.c
@ -0,0 +1,46 @@
+#include "token.h"
+
+#include <stdlib.h>
+
+struct token
+token_create (enum token_type type, const char *lexeme)
+{
+	struct token t = { 0 };
+
+	t.type = type;
+	t.lexeme = buffer_create ();
+	buffer_append (lexeme, &t.lexeme);
+
+	return t;
+}
+
+struct token *
+token_create_heap (enum token_type type, const char *lexeme)
+{
+	struct token *t = calloc (1, sizeof (struct token));
+
+	t->type = type;
+	t->lexeme = buffer_create ();
+	buffer_append (lexeme, &t->lexeme);
+
+	return t;
+}
+
+void
+token_free (struct token *t)
+{
+	if (!t)
+		return;
+
+	buffer_free (&t->lexeme);
+}
+
+void
+token_free_heap (struct token *t)
+{
+	if (!t)
+		return;
+
+	buffer_free (&t->lexeme);
+	free (t);
+}
--- a/src/token.h
+++ b/src/token.h
@ -1,14 +1,72 @@
 #ifndef __TOKEN_H
 #define __TOKEN_H

+#include "utils/buffer.h"
+
 enum token_type
 {
+	TOK_LPAREN,
+	TOK_RPAREN,
+	TOK_LCURLY,
+	TOK_RCURLY,
+	TOK_LSQUAR,
+	TOK_RSQUAR,
+	TOK_COMMA,
+	TOK_DOT,
+	TOK_PLUS,
+	TOK_MINUS,
+	TOK_STAR,
+	TOK_SLASH,
+	TOK_CARET,
+	TOK_MOD,
+	TOK_COLON,
+	TOK_SEMICOLON,
+	TOK_QUESTION,
+	TOK_NOT,
+	TOK_GT,
+	TOK_LT,
+
+	TOK_GE,
+	TOK_LE,
+	TOK_NE,
+	TOK_EQ,
+	TOK_ASSIGN,
+	TOK_GTGT,
+	TOK_LTLT,
+
+	TOK_IDENTIFIER,
+	TOK_STRING,
+	TOK_INTEGER,
+	TOK_FLOAT,
+
+	TOK_IF,
+	TOK_THEN,
+	TOK_ELSE,
+	TOK_TRUE,
+	TOK_FALSE,
+	TOK_AND,
+	TOK_OR,
+	TOK_WHILE,
+	TOK_DO,
+	TOK_FOR,
+	TOK_FUNC,
+	TOK_NULL,
+	TOK_END,
+	TOK_PRINT,
+	TOK_PRINTLN,
+	TOK_RET
 };

 struct token
 {
 	enum token_type type;
-	char lexeme[4];
+	struct buffer lexeme;
 };

+struct token token_create (enum token_type type, const char *lexeme);
+struct token *token_create_heap (enum token_type type, const char *lexeme);
+
+void token_free (struct token *t);
+void token_free_heap (struct token *t);
+
 #endif
--- a/src/utils/buffer.c
+++ b/src/utils/buffer.c
@ -121,3 +121,12 @@ buffer_free (struct buffer *b)
 	free (b->buf);
 	memset (b, 0, sizeof (struct buffer));
 }
+
+size_t
+buffer_length (struct buffer *b)
+{
+	if (!b)
+		return 0;
+
+	return b->len;
+}
--- a/src/utils/buffer.h
+++ b/src/utils/buffer.h
@ -17,4 +17,6 @@ char *buffer_get (struct buffer *b);
 _Bool buffer_read (const char *path, struct buffer *b);
 void buffer_free (struct buffer *b);

+size_t buffer_length (struct buffer *b);
+
 #endif
--- a/tests/singletokens.pinky
+++ b/tests/singletokens.pinky
@ -0,0 +1 @@
+**-