more single-character tokens, ignoring whitespaces, and comments

2025-07-04 14:39:45 -05:00 · 2025-07-04 14:39:45 -05:00 · 68573a4599
commit 68573a4599
parent 2dab03db23
5 changed files with 59 additions and 10 deletions
--- a/Readme.org
+++ b/Readme.org
@ -0,0 +1,8 @@
 * Pinky
 Pinky is a little toy programming language, designed to teach about compilers
 and interpreters development.
 In this repository, you'll find the implementation for both an interpreter and
 compiler of Pinky, the project is pretty new so there is still quite a way to
 go.
--- a/src/lexer.c
+++ b/src/lexer.c
@ -69,7 +69,7 @@ add_token (enum token_type type, struct lexer *l)
 	char *lexeme = calloc (size, sizeof (char));
 	strncpy (lexeme, buffer_get (l->source) + l->start, size);
-	vector_push_back (token_create_heap (type, lexeme), &l->tokens);
+	vector_push_back (token_create_heap (type, l->line, lexeme), &l->tokens);
 	free (lexeme);
 }
@ -102,12 +102,45 @@ lexer_lex (struct lexer *l)
 			l->start = l->cur;
 			char c = advance (l);
-			if (c == '+')
+			if (c == '\n')
 				l->line++;
 			else if (c == ' ' || c == '\t' || c == '\r')
 				continue;
 			else if (c == '#')
 				while (peek (l) != '\n')
 					advance (l);
 			else if (c == '(')
 				add_token (TOK_LPAREN, l);
 			else if (c == ')')
 				add_token (TOK_RPAREN, l);
 			else if (c == '{')
 				add_token (TOK_LCURLY, l);
 			else if (c == '}')
 				add_token (TOK_RCURLY, l);
 			else if (c == '[')
 				add_token (TOK_LSQUAR, l);
 			else if (c == ']')
 				add_token (TOK_RSQUAR, l);
 			else if (c == '.')
 				add_token (TOK_DOT, l);
 			else if (c == ',')
 				add_token (TOK_COMMA, l);
 			else if (c == '+')
 				add_token (TOK_PLUS, l);
-			if (c == '-')
+			else if (c == '-')
 				add_token (TOK_MINUS, l);
-			if (c == '*')
+			else if (c == '*')
 				add_token (TOK_STAR, l);
 			else if (c == '^')
 				add_token (TOK_CARET, l);
 			else if (c == '/')
 				add_token (TOK_SLASH, l);
 			else if (c == ';')
 				add_token (TOK_SEMICOLON, l);
 			else if (c == '?')
 				add_token (TOK_QUESTION, l);
 			else if (c == '%')
 				add_token (TOK_MOD, l);
 		}
 }
@ -119,7 +152,8 @@ lexer_print (struct lexer *l)
 	for (unsigned int i = 0; i < l->tokens.length; i++)
 		{
 			struct token *t = l->tokens.elements[i];
-			printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme));
+			printf ("(TOK_TYPE: %d, \"%s\" - at line %d)\n", t->type,
 							buffer_get (&t->lexeme), t->line);
 		}
 }
--- a/src/token.c
+++ b/src/token.c
@ -3,24 +3,26 @@
 #include <stdlib.h>
 struct token
-token_create (enum token_type type, const char *lexeme)
+token_create (enum token_type type, unsigned int line, const char *lexeme)
 {
 	struct token t = { 0 };
 	t.type = type;
 	t.lexeme = buffer_create ();
 	t.line = line;
 	buffer_append (lexeme, &t.lexeme);
 	return t;
 }
 struct token *
-token_create_heap (enum token_type type, const char *lexeme)
+token_create_heap (enum token_type type, unsigned int line, const char *lexeme)
 {
 	struct token *t = calloc (1, sizeof (struct token));
 	t->type = type;
 	t->lexeme = buffer_create ();
 	t->line = line;
 	buffer_append (lexeme, &t->lexeme);
 	return t;
--- a/src/token.h
+++ b/src/token.h
@ -61,10 +61,13 @@ struct token
 {
 	enum token_type type;
 	struct buffer lexeme;
 	unsigned int line;
 };
-struct token token_create (enum token_type type, const char *lexeme);
+struct token token_create (enum token_type type, unsigned int line,
-struct token *token_create_heap (enum token_type type, const char *lexeme);
+													 const char *lexeme);
 struct token *token_create_heap (enum token_type type, unsigned int line,
 																 const char *lexeme);
 void token_free (struct token *t);
 void token_free_heap (struct token *t);
--- a/tests/singletokens.pinky
+++ b/tests/singletokens.pinky
@ -1 +1,3 @@
-+**-
+# this file is just to test single character tokens
 { } [ ] ( ) . , +
 - * ^ / ; ? %