now lexing some single character tokens

2025-07-03 19:23:34 -05:00 · 2025-07-03 19:23:34 -05:00 · 2dab03db23
commit 2dab03db23
parent b0682bbde5
9 changed files with 251 additions and 2 deletions
--- a/6
+++ b/6
@ -5,7 +5,7 @@ RM=rm
 CFLAGS=-Wall -Werror -std=gnu99 -O0 -g
 LIBS=
-FILES=build/main.o build/lexer.o build/utils/vector.o build/utils/buffer.o
+FILES=build/main.o build/lexer.o build/token.o build/utils/vector.o build/utils/buffer.o
 OUT=bin/pinky.out
 all: $(FILES)
@ -20,6 +20,10 @@ build/lexer.o: src/lexer.c
 	@$(ECHO) "CC\t\t"$<
 	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
 build/token.o: src/token.c
 	@$(ECHO) "CC\t\t"$<
 	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
 build/utils/vector.o: src/utils/vector.c
 	@$(ECHO) "CC\t\t"$<
 	@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
--- a/src/lexer.c
+++ b/src/lexer.c
@ -1,20 +1,139 @@
 #include "lexer.h"
 #include "token.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 /* helper functions */
 char
 advance (struct lexer *l)
 {
 	/* advances the cur pointer */
 	/* (consumes the character) */
 	if (!l)
 		return 0;
 	char *code = buffer_get (l->source);
 	return code[l->cur++];
 }
 char
 peek (struct lexer *l)
 {
 	/* just takes a peek at the current character */
 	/* (doesn't consume the character) */
 	if (!l)
 		return 0;
 	char *code = buffer_get (l->source);
 	return code[l->cur];
 }
 char
 lookahead (unsigned int n, struct lexer *l)
 {
 	/* looks at the next character in the source */
 	/* does not consume the character */
 	if (!l)
 		return 0;
 	char *code = buffer_get (l->source);
 	return code[l->cur + n];
 }
 _Bool
 match (char expected, struct lexer *l)
 {
 	/* check if cur matches an expectation */
 	/* (consumes the character if the match is true) */
 	if (!l)
 		return 0;
 	char *code = buffer_get (l->source);
 	if (code[l->cur] != expected)
 		return 0;
 	l->cur++;
 	return 1;
 }
 void
 add_token (enum token_type type, struct lexer *l)
 {
 	if (!l)
 		return;
 	unsigned int size = l->cur - l->start;
 	char *lexeme = calloc (size, sizeof (char));
 	strncpy (lexeme, buffer_get (l->source) + l->start, size);
 	vector_push_back (token_create_heap (type, lexeme), &l->tokens);
 	free (lexeme);
 }
 /* public functions */
 struct lexer
 lexer_create ()
 {
 	struct lexer l = { 0 };
 	l.tokens = vector_create ();
 	l.line = 1;
 	return l;
 }
 void
 lexer_set_source (struct buffer *b, struct lexer *l)
 {
 	if (!b || !l)
 		return;
 	l->source = b;
 }
 void
 lexer_lex (struct lexer *l)
 {
 	while (l->cur < buffer_length (l->source))
 		{
 			l->start = l->cur;
 			char c = advance (l);
 			if (c == '+')
 				add_token (TOK_PLUS, l);
 			if (c == '-')
 				add_token (TOK_MINUS, l);
 			if (c == '*')
 				add_token (TOK_STAR, l);
 		}
 }
 void
 lexer_print (struct lexer *l)
 {
 	puts ("Lexer:");
 	for (unsigned int i = 0; i < l->tokens.length; i++)
 		{
 			struct token *t = l->tokens.elements[i];
 			printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme));
 		}
 }
 void
 lexer_free (struct lexer *lexer)
 {
 	if (!lexer)
 		return;
 	for (unsigned int i = 0; i < lexer->tokens.length; i++)
 		{
 			struct token *tok = lexer->tokens.elements[i];
 			token_free_heap (tok);
 		}
 	vector_free (&lexer->tokens);
 }
--- a/src/lexer.h
+++ b/src/lexer.h
@ -1,14 +1,21 @@
 #ifndef __LEXER_H
 #define __LEXER_H
 #include "utils/buffer.h"
 #include "utils/vector.h"
 struct lexer
 {
 	struct vector tokens;
 	struct buffer *source;
 	unsigned int start, cur, line;
 };
 struct lexer lexer_create ();
 void lexer_set_source (struct buffer *b, struct lexer *l);
 void lexer_lex (struct lexer *l);
 void lexer_print (struct lexer *l);
 void lexer_free (struct lexer *lexer);
 #endif
--- a/src/main.c
+++ b/src/main.c
@ -19,6 +19,9 @@ main (int argc, char **argv)
 		return EXIT_FAILURE;
 	struct lexer lexer = lexer_create ();
 	lexer_set_source (&code, &lexer);
 	lexer_lex (&lexer);
 	lexer_print (&lexer);
 	lexer_free (&lexer);
 	buffer_free (&code);
--- a/src/token.c
+++ b/src/token.c
@ -0,0 +1,46 @@
 #include "token.h"
 #include <stdlib.h>
 struct token
 token_create (enum token_type type, const char *lexeme)
 {
 	struct token t = { 0 };
 	t.type = type;
 	t.lexeme = buffer_create ();
 	buffer_append (lexeme, &t.lexeme);
 	return t;
 }
 struct token *
 token_create_heap (enum token_type type, const char *lexeme)
 {
 	struct token *t = calloc (1, sizeof (struct token));
 	t->type = type;
 	t->lexeme = buffer_create ();
 	buffer_append (lexeme, &t->lexeme);
 	return t;
 }
 void
 token_free (struct token *t)
 {
 	if (!t)
 		return;
 	buffer_free (&t->lexeme);
 }
 void
 token_free_heap (struct token *t)
 {
 	if (!t)
 		return;
 	buffer_free (&t->lexeme);
 	free (t);
 }
--- a/src/token.h
+++ b/src/token.h
@ -1,14 +1,72 @@
 #ifndef __TOKEN_H
 #define __TOKEN_H
 #include "utils/buffer.h"
 enum token_type
 {
 	TOK_LPAREN,
 	TOK_RPAREN,
 	TOK_LCURLY,
 	TOK_RCURLY,
 	TOK_LSQUAR,
 	TOK_RSQUAR,
 	TOK_COMMA,
 	TOK_DOT,
 	TOK_PLUS,
 	TOK_MINUS,
 	TOK_STAR,
 	TOK_SLASH,
 	TOK_CARET,
 	TOK_MOD,
 	TOK_COLON,
 	TOK_SEMICOLON,
 	TOK_QUESTION,
 	TOK_NOT,
 	TOK_GT,
 	TOK_LT,
 	TOK_GE,
 	TOK_LE,
 	TOK_NE,
 	TOK_EQ,
 	TOK_ASSIGN,
 	TOK_GTGT,
 	TOK_LTLT,
 	TOK_IDENTIFIER,
 	TOK_STRING,
 	TOK_INTEGER,
 	TOK_FLOAT,
 	TOK_IF,
 	TOK_THEN,
 	TOK_ELSE,
 	TOK_TRUE,
 	TOK_FALSE,
 	TOK_AND,
 	TOK_OR,
 	TOK_WHILE,
 	TOK_DO,
 	TOK_FOR,
 	TOK_FUNC,
 	TOK_NULL,
 	TOK_END,
 	TOK_PRINT,
 	TOK_PRINTLN,
 	TOK_RET
 };
 struct token
 {
 	enum token_type type;
-	char lexeme[4];
+	struct buffer lexeme;
 };
 struct token token_create (enum token_type type, const char *lexeme);
 struct token *token_create_heap (enum token_type type, const char *lexeme);
 void token_free (struct token *t);
 void token_free_heap (struct token *t);
 #endif
--- a/src/utils/buffer.c
+++ b/src/utils/buffer.c
@ -121,3 +121,12 @@ buffer_free (struct buffer *b)
 	free (b->buf);
 	memset (b, 0, sizeof (struct buffer));
 }
 size_t
 buffer_length (struct buffer *b)
 {
 	if (!b)
 		return 0;
 	return b->len;
 }
--- a/src/utils/buffer.h
+++ b/src/utils/buffer.h
@ -17,4 +17,6 @@ char *buffer_get (struct buffer *b);
 _Bool buffer_read (const char *path, struct buffer *b);
 void buffer_free (struct buffer *b);
 size_t buffer_length (struct buffer *b);
 #endif
--- a/tests/singletokens.pinky
+++ b/tests/singletokens.pinky
@ -0,0 +1 @@
 +**-