Quickfall · Zffu · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
@@ -3,26 +3,14 @@
  */
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
-#include "./tokens.h"
-#include "../utils/hashes.h"
 
-/**
- * A token that was parsed by the Lexer.
- */
-struct Token {
-    int type;
-    char value[longestKeywordSize];  // Increased size to handle longer values like numbers
-};
+#include "./lexer.h"
+#include "./tokens.h"
 
-/**
- * The result of the lexer execution.
- */
-struct LexerResult {
-    int size;
-    struct Token tokens[1024];
-};
+#include "../utils/hashes.h"
 
 /**
  * Sets the token type of the currently selected token in the LexerResult with the provided token type.
@@ -32,105 +20,103 @@ void pushToken(struct LexerResult* result, enum TokenType type) {
     result->size++;
 }
 
-/**
- * Runs the lexer on the provided string and returns the parsed tokens.
- */
-struct LexerResult runLexer(char string[]) {
-    struct LexerResult result;
-    result.size = 0;
-
-    const int len = strlen(string);
-    for(int i = 0; i < len; ++i) {
-        const char c = string[i];
-
-        if (c == ' ' || c == '\t' || c == '\n') {
-            continue;
-        } else if (isdigit(c)) {
-            int numLen = 0;
-            char numStr[32] = {0};
-
-            while (i < len && (isdigit(string[i]) || string[i] == '.')) {
-                numStr[numLen++] = string[i++];
-            }
-            i--;
-
-            struct Token token;
-            token.type = NUMBER;
-            strncpy(token.value, numStr, sizeof(token.value) - 1);
-            result.tokens[result.size++] = token;
-            continue;
-        } else if (c == '"') {
-            int numLen = 0;
-            char strValue[longestKeywordSize] = {0};
-            i++;
-
-            while (i < len && string[i] != '"') {
-                strValue[numLen++] = string[i++];
-            }
-
-            struct Token token;
-            token.type = STRING;
-            strncpy(token.value, strValue, sizeof(token.value) - 1);
-            result.tokens[result.size++] = token;
-            continue;
-        } else if (isalpha(c)) {
-            int wordLen = 0;
-            char word[longestKeywordSize] = {0};
-
-            while (i < len && (isalnum(string[i]) || string[i] == '_')) {
-                word[wordLen++] = string[i++];
-            }
-            i--;
-
-            struct Token token;
-
-            if (strcmp(word, "func") == 0) {
-                token.type = FUNCTION;
-            } else if (strcmp(word, "true") == 0 || strcmp(word, "false") == 0) {
-                token.type = BOOLEAN_VALUE;
-            } else if (strcmp(word, "null") == 0) {
-                token.type = NU;
-            } else if(strcmp(word, "use") == 0) {
-                token.type = USE;
-            } else if(strcmp(word, "var") == 0) {
-		token.type = VAR;
-	    }
-            else {
-                token.type = KEYWORD;
-            }
-
-            strncpy(token.value, word, sizeof(token.value) - 1);
-            result.tokens[result.size++] = token;
-            continue;
-        }
-
-        switch(c) {
-            case '{': pushToken(&result, BRACKETS_OPEN); break;
-            case '}': pushToken(&result, BRACKETS_CLOSE); break;
-            case '(': pushToken(&result, PAREN_OPEN); break;
-            case ')': pushToken(&result, PAREN_CLOSE); break;
-            case '[': pushToken(&result, ARRAY_OPEN); break;
-            case ']': pushToken(&result, ARRAY_CLOSE); break;
-            case ';': pushToken(&result, SEMICOLON); break;
-            case ',': pushToken(&result, COMMA); break;
-            case '=': pushToken(&result, DECLARE); break;
-	    case '?':
-		pushToken(&result, NONE); 
-		result.tokens[result.size - 1].value[0] = '?';	
-		break;
-	    case '+':
-	    case '-':
-	    case '/':
-	    case '*':
-		pushToken(&result, MATH_OP);
-		result.tokens[result.size - 1].value[0] = c;
-		break;
-        }
-    }
-
-    if (result.size > 0 && strlen(result.tokens[result.size - 1].value) == 0) {
-        result.size--;
-    }
-
-    return result;
+struct LexerResult runLexer(char* string) {
+	struct LexerResult result;
+	result.size = 0;
+
+	result.tokens = malloc(sizeof(struct Token) * 1024);
+
+	char c;
+
+	while(c = *string++) {
+
+		int buffLen = 32;
+		char* buff = malloc(buffLen);
+
+		if(c == ' ' || c == '\t' || c == '\n') {
+			continue;
+		} else if (isdigit(c)) {
+			int numLen = 0;
+
+			while(isdigit(c)) {
+				buff[numLen] = c;
+				numLen++;
+
+				c = *string++;
+			}
+
+			pushToken(&result, NUMBER);
+			result.tokens[result.size - 1].value = buff;
+
+		} else if (c == '\"') {
+			int strLen = 0;
+
+			while(c != '\"') {
+				buff[strLen] = c;
+				strLen++;
+
+				c = *string++;
+			}
-			while(c != '\"') {
-				buff[strLen] = c;
-				strLen++;
-
-				c = *string++;
-			}
+			while(c != '\"' && c != '\0') {
+				buff[strLen] = c;
+				strLen++;
+
+				c = *string++;
+			}
-			while(c != '\"') {
-				buff[strLen] = c;
-				strLen++;
-
-				c = *string++;
-			}
+			while(c != '\"' && c != '\0') {
+				buff[strLen] = c;
+				strLen++;
+
+				c = *string++;
+			}
+
+			pushToken(&result, STRING);
+			result.tokens[result.size - 1].value = buff;
+
+		} else if(isalpha(c)) {
+			int keywordLen = 0;
+
+			while(isalpha(c)) {
+				buff[keywordLen] = c;
+				keywordLen++;
+
+				c = *string++;
+			}
+
+			if(strcmp(buff, "func") == 0) {
+				pushToken(&result, FUNCTION);
+			}
+			else if(strcmp(buff, "true") == 0 || strcmp(buff, "false") == 0) {
+				pushToken(&result, BOOLEAN_VALUE);
+				result.tokens[result.size - 1].value = buff;
+			}
+			else if(strcmp(buff, "null") == 0) {
+				pushToken(&result, NU);
+			}
+			else if(strcmp(buff, "use") == 0) {
+				pushToken(&result, USE);
+			}
+			else if(strcmp(buff, "var") == 0) {
+				pushToken(&result, VAR);
+			}
+			else {
+				pushToken(&result, KEYWORD);
+				result.tokens[result.size - 1].value = buff;
+			}
+		}
+		else {
+			switch(c) {
+				case '{': pushToken(&result, BRACKETS_OPEN); break;
+				case '}': pushToken(&result, BRACKETS_CLOSE); break;
+				case '(': pushToken(&result, PAREN_OPEN); break;
+				case ')': pushToken(&result, PAREN_CLOSE); break;
+				case '[': pushToken(&result, ARRAY_OPEN); break;
+				case ']': pushToken(&result, ARRAY_CLOSE); break;
+				case ';': pushToken(&result, SEMICOLON); break;
+				case ',': pushToken(&result, COMMA); break;
+				case '=': pushToken(&result, DECLARE); break;
+				case '?': pushToken(&result, NONE); break;
+
+				case '+':
+				case '-':
+				case '*':
+				case '/':
+				case '^':
+					pushToken(&result, MATH_OP);
+					result.tokens[result.size - 1].value[0] = c;
+			}
-					result.tokens[result.size - 1].value[0] = c;
-			}
+					result.tokens[result.size - 1].value = malloc(2);
+					result.tokens[result.size - 1].value[0] = c;
+					result.tokens[result.size - 1].value[1] = '\0';
+			}
-					result.tokens[result.size - 1].value[0] = c;
-			}
+					result.tokens[result.size - 1].value = malloc(2);
+					result.tokens[result.size - 1].value[0] = c;
+					result.tokens[result.size - 1].value[1] = '\0';
+			}
+		}
+	}
+
+	return result;
 }
+
+
diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h
@@ -4,27 +4,19 @@
 #include <stdio.h>
 #include "./tokens.h"
 
-/**
- * Represents a single token from lexical analysis
- */
-struct Token {
-    enum TokenType type;
-    char value[32];  // Using 32 as longestKeywordSize
-};
-
 /**
  * Contains the results of lexical analysis
  */
 struct LexerResult {
     int size;
-    struct Token tokens[1024];
+    struct Token* tokens;
 };
 
 /**
  * Performs lexical analysis on an input string
  * Returns a LexerResult containing the tokens
  */
-struct LexerResult runLexer(const char* input);
+struct LexerResult runLexer(char* input);
 
 /**
  * Adds a token to the LexerResult

diff --git a/src/lexer/tokens.h b/src/lexer/tokens.h
@@ -1,39 +1,36 @@
 #ifndef TOKENS_H
 #define TOKENS_H
 
-#define longestKeywordSize 32
-#define smallestKeywordSize 4
-
 enum TokenType {
-    FUNCTION = 1,
-    RETURN = 2,
-    VAR = 3,
-    BRACKETS_OPEN = 4,
-    BRACKETS_CLOSE = 5,
-    PAREN_OPEN = 6,
-    PAREN_CLOSE = 7,
-    ARRAY_OPEN = 8,
-    ARRAY_CLOSE = 9,
-    NUMBER = 10,
-    STRING = 11,
-    BOOLEAN_VALUE = 12,
-    NU = 13,
-    KEYWORD = 14,
-    SEMICOLON = 15,
-    COMMA = 16,
-    DECLARE = 17,
-    USE = 18,
-    NONE = 19,
-    MATH_OP = 20
+    FUNCTION,
+    RETURN,
+    VAR,
+    BRACKETS_OPEN,
+    BRACKETS_CLOSE,
+    PAREN_OPEN,
+    PAREN_CLOSE,
+    ARRAY_OPEN,
+    ARRAY_CLOSE,
+    NUMBER,
+    STRING,
+    BOOLEAN_VALUE,
+    NU,
+    KEYWORD,
+    SEMICOLON,
+    COMMA,
+    DECLARE,
+    USE,
+    NONE,
+    MATH_OP
 };
 
-struct KeywordResult {
-    int count;
-    char* keywords[10];
-    enum TokenType types[10];
+/**
+ * An lexer token generated by the Lexer.
+ */
+struct Token {
+	enum TokenType type;
+	char* value;
 };
 
-void initKeywords();
-struct KeywordResult getKeywords(char start);
 
 #endif