now lexing some single character tokens

This commit is contained in:
ghostie 2025-07-03 19:23:34 -05:00
parent b0682bbde5
commit 2dab03db23
9 changed files with 251 additions and 2 deletions

View File

@ -5,7 +5,7 @@ RM=rm
CFLAGS=-Wall -Werror -std=gnu99 -O0 -g
LIBS=
FILES=build/main.o build/lexer.o build/utils/vector.o build/utils/buffer.o
FILES=build/main.o build/lexer.o build/token.o build/utils/vector.o build/utils/buffer.o
OUT=bin/pinky.out
all: $(FILES)
@ -20,6 +20,10 @@ build/lexer.o: src/lexer.c
@$(ECHO) "CC\t\t"$<
@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
build/token.o: src/token.c
@$(ECHO) "CC\t\t"$<
@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)
build/utils/vector.o: src/utils/vector.c
@$(ECHO) "CC\t\t"$<
@$(CC) $(CFLAGS) $< -c -o $@ $(LIBS)

View File

@ -1,20 +1,139 @@
#include "lexer.h"
#include "token.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* helper functions */
char
advance (struct lexer *l)
{
/* advances the cur pointer */
/* (consumes the character) */
if (!l)
return 0;
char *code = buffer_get (l->source);
return code[l->cur++];
}
char
peek (struct lexer *l)
{
/* just takes a peek at the current character */
/* (doesn't consume the character) */
if (!l)
return 0;
char *code = buffer_get (l->source);
return code[l->cur];
}
char
lookahead (unsigned int n, struct lexer *l)
{
/* looks at the next character in the source */
/* does not consume the character */
if (!l)
return 0;
char *code = buffer_get (l->source);
return code[l->cur + n];
}
_Bool
match (char expected, struct lexer *l)
{
/* check if cur matches an expectation */
/* (consumes the character if the match is true) */
if (!l)
return 0;
char *code = buffer_get (l->source);
if (code[l->cur] != expected)
return 0;
l->cur++;
return 1;
}
void
add_token (enum token_type type, struct lexer *l)
{
if (!l)
return;
unsigned int size = l->cur - l->start;
char *lexeme = calloc (size, sizeof (char));
strncpy (lexeme, buffer_get (l->source) + l->start, size);
vector_push_back (token_create_heap (type, lexeme), &l->tokens);
free (lexeme);
}
/* public functions */
struct lexer
lexer_create ()
{
struct lexer l = { 0 };
l.tokens = vector_create ();
l.line = 1;
return l;
}
void
lexer_set_source (struct buffer *b, struct lexer *l)
{
if (!b || !l)
return;
l->source = b;
}
void
lexer_lex (struct lexer *l)
{
while (l->cur < buffer_length (l->source))
{
l->start = l->cur;
char c = advance (l);
if (c == '+')
add_token (TOK_PLUS, l);
if (c == '-')
add_token (TOK_MINUS, l);
if (c == '*')
add_token (TOK_STAR, l);
}
}
void
lexer_print (struct lexer *l)
{
puts ("Lexer:");
for (unsigned int i = 0; i < l->tokens.length; i++)
{
struct token *t = l->tokens.elements[i];
printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme));
}
}
void
lexer_free (struct lexer *lexer)
{
if (!lexer)
return;
for (unsigned int i = 0; i < lexer->tokens.length; i++)
{
struct token *tok = lexer->tokens.elements[i];
token_free_heap (tok);
}
vector_free (&lexer->tokens);
}

View File

@ -1,14 +1,21 @@
#ifndef __LEXER_H
#define __LEXER_H
#include "utils/buffer.h"
#include "utils/vector.h"
struct lexer
{
struct vector tokens;
struct buffer *source;
unsigned int start, cur, line;
};
struct lexer lexer_create ();
void lexer_set_source (struct buffer *b, struct lexer *l);
void lexer_lex (struct lexer *l);
void lexer_print (struct lexer *l);
void lexer_free (struct lexer *lexer);
#endif

View File

@ -19,6 +19,9 @@ main (int argc, char **argv)
return EXIT_FAILURE;
struct lexer lexer = lexer_create ();
lexer_set_source (&code, &lexer);
lexer_lex (&lexer);
lexer_print (&lexer);
lexer_free (&lexer);
buffer_free (&code);

46
src/token.c Normal file
View File

@ -0,0 +1,46 @@
#include "token.h"
#include <stdlib.h>
struct token
token_create (enum token_type type, const char *lexeme)
{
struct token t = { 0 };
t.type = type;
t.lexeme = buffer_create ();
buffer_append (lexeme, &t.lexeme);
return t;
}
struct token *
token_create_heap (enum token_type type, const char *lexeme)
{
struct token *t = calloc (1, sizeof (struct token));
t->type = type;
t->lexeme = buffer_create ();
buffer_append (lexeme, &t->lexeme);
return t;
}
void
token_free (struct token *t)
{
if (!t)
return;
buffer_free (&t->lexeme);
}
void
token_free_heap (struct token *t)
{
if (!t)
return;
buffer_free (&t->lexeme);
free (t);
}

View File

@ -1,14 +1,72 @@
#ifndef __TOKEN_H
#define __TOKEN_H
#include "utils/buffer.h"
enum token_type
{
TOK_LPAREN,
TOK_RPAREN,
TOK_LCURLY,
TOK_RCURLY,
TOK_LSQUAR,
TOK_RSQUAR,
TOK_COMMA,
TOK_DOT,
TOK_PLUS,
TOK_MINUS,
TOK_STAR,
TOK_SLASH,
TOK_CARET,
TOK_MOD,
TOK_COLON,
TOK_SEMICOLON,
TOK_QUESTION,
TOK_NOT,
TOK_GT,
TOK_LT,
TOK_GE,
TOK_LE,
TOK_NE,
TOK_EQ,
TOK_ASSIGN,
TOK_GTGT,
TOK_LTLT,
TOK_IDENTIFIER,
TOK_STRING,
TOK_INTEGER,
TOK_FLOAT,
TOK_IF,
TOK_THEN,
TOK_ELSE,
TOK_TRUE,
TOK_FALSE,
TOK_AND,
TOK_OR,
TOK_WHILE,
TOK_DO,
TOK_FOR,
TOK_FUNC,
TOK_NULL,
TOK_END,
TOK_PRINT,
TOK_PRINTLN,
TOK_RET
};
struct token
{
enum token_type type;
char lexeme[4];
struct buffer lexeme;
};
struct token token_create (enum token_type type, const char *lexeme);
struct token *token_create_heap (enum token_type type, const char *lexeme);
void token_free (struct token *t);
void token_free_heap (struct token *t);
#endif

View File

@ -121,3 +121,12 @@ buffer_free (struct buffer *b)
free (b->buf);
memset (b, 0, sizeof (struct buffer));
}
size_t
buffer_length (struct buffer *b)
{
if (!b)
return 0;
return b->len;
}

View File

@ -17,4 +17,6 @@ char *buffer_get (struct buffer *b);
_Bool buffer_read (const char *path, struct buffer *b);
void buffer_free (struct buffer *b);
size_t buffer_length (struct buffer *b);
#endif

1
tests/singletokens.pinky Normal file
View File

@ -0,0 +1 @@
+**-