diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index b9035b6..f0d8e21 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -3,26 +3,14 @@ */ #include +#include #include #include -#include "./tokens.h" -#include "../utils/hashes.h" -/** - * A token that was parsed by the Lexer. - */ -struct Token { - int type; - char value[longestKeywordSize]; // Increased size to handle longer values like numbers -}; +#include "./lexer.h" +#include "./tokens.h" -/** - * The result of the lexer execution. - */ -struct LexerResult { - int size; - struct Token tokens[1024]; -}; +#include "../utils/hashes.h" /** * Sets the token type of the currently selected token in the LexerResult with the provided token type. @@ -32,105 +20,103 @@ void pushToken(struct LexerResult* result, enum TokenType type) { result->size++; } -/** - * Runs the lexer on the provided string and returns the parsed tokens. - */ -struct LexerResult runLexer(char string[]) { - struct LexerResult result; - result.size = 0; - - const int len = strlen(string); - for(int i = 0; i < len; ++i) { - const char c = string[i]; - - if (c == ' ' || c == '\t' || c == '\n') { - continue; - } else if (isdigit(c)) { - int numLen = 0; - char numStr[32] = {0}; - - while (i < len && (isdigit(string[i]) || string[i] == '.')) { - numStr[numLen++] = string[i++]; - } - i--; - - struct Token token; - token.type = NUMBER; - strncpy(token.value, numStr, sizeof(token.value) - 1); - result.tokens[result.size++] = token; - continue; - } else if (c == '"') { - int numLen = 0; - char strValue[longestKeywordSize] = {0}; - i++; - - while (i < len && string[i] != '"') { - strValue[numLen++] = string[i++]; - } - - struct Token token; - token.type = STRING; - strncpy(token.value, strValue, sizeof(token.value) - 1); - result.tokens[result.size++] = token; - continue; - } else if (isalpha(c)) { - int wordLen = 0; - char word[longestKeywordSize] = {0}; - - while (i < len && (isalnum(string[i]) || string[i] == '_')) { - word[wordLen++] = string[i++]; - } - i--; - - struct Token token; - - if (strcmp(word, "func") == 0) { - token.type = FUNCTION; - } else if (strcmp(word, "true") == 0 || strcmp(word, "false") == 0) { - token.type = BOOLEAN_VALUE; - } else if (strcmp(word, "null") == 0) { - token.type = NU; - } else if(strcmp(word, "use") == 0) { - token.type = USE; - } else if(strcmp(word, "var") == 0) { - token.type = VAR; - } - else { - token.type = KEYWORD; - } - - strncpy(token.value, word, sizeof(token.value) - 1); - result.tokens[result.size++] = token; - continue; - } - - switch(c) { - case '{': pushToken(&result, BRACKETS_OPEN); break; - case '}': pushToken(&result, BRACKETS_CLOSE); break; - case '(': pushToken(&result, PAREN_OPEN); break; - case ')': pushToken(&result, PAREN_CLOSE); break; - case '[': pushToken(&result, ARRAY_OPEN); break; - case ']': pushToken(&result, ARRAY_CLOSE); break; - case ';': pushToken(&result, SEMICOLON); break; - case ',': pushToken(&result, COMMA); break; - case '=': pushToken(&result, DECLARE); break; - case '?': - pushToken(&result, NONE); - result.tokens[result.size - 1].value[0] = '?'; - break; - case '+': - case '-': - case '/': - case '*': - pushToken(&result, MATH_OP); - result.tokens[result.size - 1].value[0] = c; - break; - } - } - - if (result.size > 0 && strlen(result.tokens[result.size - 1].value) == 0) { - result.size--; - } - - return result; +struct LexerResult runLexer(char* string) { + struct LexerResult result; + result.size = 0; + + result.tokens = malloc(sizeof(struct Token) * 1024); + + char c; + + while(c = *string++) { + + int buffLen = 32; + char* buff = malloc(buffLen); + + if(c == ' ' || c == '\t' || c == '\n') { + continue; + } else if (isdigit(c)) { + int numLen = 0; + + while(isdigit(c)) { + buff[numLen] = c; + numLen++; + + c = *string++; + } + + pushToken(&result, NUMBER); + result.tokens[result.size - 1].value = buff; + + } else if (c == '\"') { + int strLen = 0; + + while(c != '\"') { + buff[strLen] = c; + strLen++; + + c = *string++; + } + + pushToken(&result, STRING); + result.tokens[result.size - 1].value = buff; + + } else if(isalpha(c)) { + int keywordLen = 0; + + while(isalpha(c)) { + buff[keywordLen] = c; + keywordLen++; + + c = *string++; + } + + if(strcmp(buff, "func") == 0) { + pushToken(&result, FUNCTION); + } + else if(strcmp(buff, "true") == 0 || strcmp(buff, "false") == 0) { + pushToken(&result, BOOLEAN_VALUE); + result.tokens[result.size - 1].value = buff; + } + else if(strcmp(buff, "null") == 0) { + pushToken(&result, NU); + } + else if(strcmp(buff, "use") == 0) { + pushToken(&result, USE); + } + else if(strcmp(buff, "var") == 0) { + pushToken(&result, VAR); + } + else { + pushToken(&result, KEYWORD); + result.tokens[result.size - 1].value = buff; + } + } + else { + switch(c) { + case '{': pushToken(&result, BRACKETS_OPEN); break; + case '}': pushToken(&result, BRACKETS_CLOSE); break; + case '(': pushToken(&result, PAREN_OPEN); break; + case ')': pushToken(&result, PAREN_CLOSE); break; + case '[': pushToken(&result, ARRAY_OPEN); break; + case ']': pushToken(&result, ARRAY_CLOSE); break; + case ';': pushToken(&result, SEMICOLON); break; + case ',': pushToken(&result, COMMA); break; + case '=': pushToken(&result, DECLARE); break; + case '?': pushToken(&result, NONE); break; + + case '+': + case '-': + case '*': + case '/': + case '^': + pushToken(&result, MATH_OP); + result.tokens[result.size - 1].value[0] = c; + } + } + } + + return result; } + + diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index d4e29bf..f3a9072 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -4,27 +4,19 @@ #include #include "./tokens.h" -/** - * Represents a single token from lexical analysis - */ -struct Token { - enum TokenType type; - char value[32]; // Using 32 as longestKeywordSize -}; - /** * Contains the results of lexical analysis */ struct LexerResult { int size; - struct Token tokens[1024]; + struct Token* tokens; }; /** * Performs lexical analysis on an input string * Returns a LexerResult containing the tokens */ -struct LexerResult runLexer(const char* input); +struct LexerResult runLexer(char* input); /** * Adds a token to the LexerResult diff --git a/src/lexer/tokens.h b/src/lexer/tokens.h index e1cfaa9..a5b7ece 100644 --- a/src/lexer/tokens.h +++ b/src/lexer/tokens.h @@ -1,39 +1,36 @@ #ifndef TOKENS_H #define TOKENS_H -#define longestKeywordSize 32 -#define smallestKeywordSize 4 - enum TokenType { - FUNCTION = 1, - RETURN = 2, - VAR = 3, - BRACKETS_OPEN = 4, - BRACKETS_CLOSE = 5, - PAREN_OPEN = 6, - PAREN_CLOSE = 7, - ARRAY_OPEN = 8, - ARRAY_CLOSE = 9, - NUMBER = 10, - STRING = 11, - BOOLEAN_VALUE = 12, - NU = 13, - KEYWORD = 14, - SEMICOLON = 15, - COMMA = 16, - DECLARE = 17, - USE = 18, - NONE = 19, - MATH_OP = 20 + FUNCTION, + RETURN, + VAR, + BRACKETS_OPEN, + BRACKETS_CLOSE, + PAREN_OPEN, + PAREN_CLOSE, + ARRAY_OPEN, + ARRAY_CLOSE, + NUMBER, + STRING, + BOOLEAN_VALUE, + NU, + KEYWORD, + SEMICOLON, + COMMA, + DECLARE, + USE, + NONE, + MATH_OP }; -struct KeywordResult { - int count; - char* keywords[10]; - enum TokenType types[10]; +/** + * An lexer token generated by the Lexer. + */ +struct Token { + enum TokenType type; + char* value; }; -void initKeywords(); -struct KeywordResult getKeywords(char start); #endif