started to work with multicharacter tokens

2025-07-04 15:07:51 -05:00 · 2025-07-04 15:07:51 -05:00 · aa46974753
commit aa46974753
parent 68573a4599
3 changed files with 68 additions and 1 deletions
--- a/src/lexer.c
+++ b/src/lexer.c
@ -15,6 +15,9 @@ advance (struct lexer *l)
 	if (!l)
 		return 0;

+	if (l->cur + 1 > buffer_length (l->source))
+		return 0;
+
 	char *code = buffer_get (l->source);
 	return code[l->cur++];
 }
@ -27,6 +30,9 @@ peek (struct lexer *l)
 	if (!l)
 		return 0;

+	if (l->cur + 1 > buffer_length (l->source))
+		return 0;
+
 	char *code = buffer_get (l->source);
 	return code[l->cur];
 }
@ -39,6 +45,9 @@ lookahead (unsigned int n, struct lexer *l)
 	if (!l)
 		return 0;

+	if (l->cur + n > buffer_length (l->source))
+		return 0;
+
 	char *code = buffer_get (l->source);
 	return code[l->cur + n];
 }
@ -51,6 +60,9 @@ match (char expected, struct lexer *l)
 	if (!l)
 		return 0;

+	if (l->cur + 1 > buffer_length (l->source))
+		return 0;
+
 	char *code = buffer_get (l->source);
 	if (code[l->cur] != expected)
 		return 0;
@ -102,13 +114,16 @@ lexer_lex (struct lexer *l)
 			l->start = l->cur;

 			char c = advance (l);
+
+			/* whitespaces */
 			if (c == '\n')
 				l->line++;
 			else if (c == ' ' || c == '\t' || c == '\r')
 				continue;
 			else if (c == '#')
-				while (peek (l) != '\n')
+				while (peek (l) != '\n' && !(l->cur >= buffer_length (l->source)))
 					advance (l);
+			/* single-character tokens */
 			else if (c == '(')
 				add_token (TOK_LPAREN, l);
 			else if (c == ')')
@ -141,6 +156,20 @@ lexer_lex (struct lexer *l)
 				add_token (TOK_QUESTION, l);
 			else if (c == '%')
 				add_token (TOK_MOD, l);
+			/* multi-character tokens */
+			else if (c == '=' && match ('=', l))
+				add_token (TOK_EQ, l);
+			else if (c == '~')
+				add_token (match ('=', l) ? TOK_EQ : TOK_NOT, l);
+			else if (c == '<')
+				add_token (match ('=', l) ? TOK_LE : TOK_LT, l);
+			else if (c == '>')
+				add_token (match ('=', l) ? TOK_GE : TOK_GT, l);
+			else if (c == ':')
+				add_token (match ('=', l) ? TOK_ASSIGN : TOK_COLON, l);
+			/* check if it's a number, and determine if it's a float or integer */
+			/* check if it's a " or ', and get the string */
+			/* check if it's an alpha character or _, then handle identifiers */
 		}
 }

--- a/src/main.c
+++ b/src/main.c
@ -1,6 +1,8 @@
 #include <stdio.h>
 #include <stdlib.h>

+#include <time.h>
+
 #include "utils/buffer.h"

 #include "lexer.h"
@ -14,6 +16,9 @@ main (int argc, char **argv)
 			return EXIT_FAILURE;
 		}

+	struct timespec ts1, ts2;
+	clock_gettime (CLOCK_REALTIME, &ts1);
+
 	struct buffer code = buffer_create ();
 	if (!buffer_read (argv[1], &code))
 		return EXIT_FAILURE;
@ -26,5 +31,12 @@ main (int argc, char **argv)
 	lexer_free (&lexer);
 	buffer_free (&code);

+	clock_gettime (CLOCK_REALTIME, &ts2);
+	unsigned long long ndiff = ts2.tv_nsec - ts1.tv_nsec;
+	unsigned long long sdiff = ts2.tv_sec - ts1.tv_sec;
+
+	printf ("it took %llu nanoseconds and %llu seconds to complete.\n", ndiff,
+					sdiff);
+
 	return EXIT_SUCCESS;
 }
--- a/tests/multichtokens.pinky
+++ b/tests/multichtokens.pinky
@ -0,0 +1,26 @@
+# the equals token:
+==
+
+# the not equals token:
+~=
+
+# the logical not operator
+~
+
+# less or equal
+<=
+
+# less than
+<
+
+# greater or equal
+>=
+
+# greater than
+>
+
+# assignment
+:=
+
+# colon
+: