more single-character tokens, ignoring whitespaces, and comments

2025-07-04 14:39:45 -05:00 · 2025-07-04 14:39:45 -05:00 · 68573a4599
commit 68573a4599
parent 2dab03db23
5 changed files with 59 additions and 10 deletions
--- a/Readme.org
+++ b/Readme.org
@ -0,0 +1,8 @@
+* Pinky
+
+Pinky is a little toy programming language, designed to teach about compilers
+and interpreters development.
+
+In this repository, you'll find the implementation for both an interpreter and
+compiler of Pinky, the project is pretty new so there is still quite a way to
+go.
--- a/src/lexer.c
+++ b/src/lexer.c
@ -69,7 +69,7 @@ add_token (enum token_type type, struct lexer *l)
 	char *lexeme = calloc (size, sizeof (char));
 	strncpy (lexeme, buffer_get (l->source) + l->start, size);

-	vector_push_back (token_create_heap (type, lexeme), &l->tokens);
+	vector_push_back (token_create_heap (type, l->line, lexeme), &l->tokens);
 	free (lexeme);
 }

@ -102,12 +102,45 @@ lexer_lex (struct lexer *l)
 			l->start = l->cur;

 			char c = advance (l);
-			if (c == '+')
+			if (c == '\n')
+				l->line++;
+			else if (c == ' ' || c == '\t' || c == '\r')
+				continue;
+			else if (c == '#')
+				while (peek (l) != '\n')
+					advance (l);
+			else if (c == '(')
+				add_token (TOK_LPAREN, l);
+			else if (c == ')')
+				add_token (TOK_RPAREN, l);
+			else if (c == '{')
+				add_token (TOK_LCURLY, l);
+			else if (c == '}')
+				add_token (TOK_RCURLY, l);
+			else if (c == '[')
+				add_token (TOK_LSQUAR, l);
+			else if (c == ']')
+				add_token (TOK_RSQUAR, l);
+			else if (c == '.')
+				add_token (TOK_DOT, l);
+			else if (c == ',')
+				add_token (TOK_COMMA, l);
+			else if (c == '+')
 				add_token (TOK_PLUS, l);
-			if (c == '-')
+			else if (c == '-')
 				add_token (TOK_MINUS, l);
-			if (c == '*')
+			else if (c == '*')
 				add_token (TOK_STAR, l);
+			else if (c == '^')
+				add_token (TOK_CARET, l);
+			else if (c == '/')
+				add_token (TOK_SLASH, l);
+			else if (c == ';')
+				add_token (TOK_SEMICOLON, l);
+			else if (c == '?')
+				add_token (TOK_QUESTION, l);
+			else if (c == '%')
+				add_token (TOK_MOD, l);
 		}
 }

@ -119,7 +152,8 @@ lexer_print (struct lexer *l)
 	for (unsigned int i = 0; i < l->tokens.length; i++)
 		{
 			struct token *t = l->tokens.elements[i];
-			printf ("(TOK_TYPE: %d, \"%s\")\n", t->type, buffer_get (&t->lexeme));
+			printf ("(TOK_TYPE: %d, \"%s\" - at line %d)\n", t->type,
+							buffer_get (&t->lexeme), t->line);
 		}
 }

--- a/src/token.c
+++ b/src/token.c
@ -3,24 +3,26 @@
 #include <stdlib.h>

 struct token
-token_create (enum token_type type, const char *lexeme)
+token_create (enum token_type type, unsigned int line, const char *lexeme)
 {
 	struct token t = { 0 };

 	t.type = type;
 	t.lexeme = buffer_create ();
+	t.line = line;
 	buffer_append (lexeme, &t.lexeme);

 	return t;
 }

 struct token *
-token_create_heap (enum token_type type, const char *lexeme)
+token_create_heap (enum token_type type, unsigned int line, const char *lexeme)
 {
 	struct token *t = calloc (1, sizeof (struct token));

 	t->type = type;
 	t->lexeme = buffer_create ();
+	t->line = line;
 	buffer_append (lexeme, &t->lexeme);

 	return t;
--- a/src/token.h
+++ b/src/token.h
@ -61,10 +61,13 @@ struct token
 {
 	enum token_type type;
 	struct buffer lexeme;
+	unsigned int line;
 };

-struct token token_create (enum token_type type, const char *lexeme);
-struct token *token_create_heap (enum token_type type, const char *lexeme);
+struct token token_create (enum token_type type, unsigned int line,
+													 const char *lexeme);
+struct token *token_create_heap (enum token_type type, unsigned int line,
+																 const char *lexeme);

 void token_free (struct token *t);
 void token_free_heap (struct token *t);
--- a/tests/singletokens.pinky
+++ b/tests/singletokens.pinky
@ -1 +1,3 @@
-+**-
+# this file is just to test single character tokens
+{ } [ ] ( ) . , +
+- * ^ / ; ? %