diff --git a/Makefile b/Makefile index 6e63292..29bec26 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,9 @@ -# Detect operating system and set commands accordingly +# +# +# The Quickfall makefile. Allows to build the Quickfall toolchain. +# +# + ifeq ($(OS),Windows_NT) DETECTED_OS := Windows TARGET_EXTENSION := .exe @@ -15,74 +20,62 @@ else PATHSEP := / endif -# Compiler settings -CFLAGS = -Wall -Wextra -std=c11 -Wimplicit-function-declaration -g +# +# Compiling configuration. +# + +# Which compiler to use (must be equal to the command used to compile) +COMPILER = gcc + +# Flags that are passed to the compiler +FLAGS = -Wall -Wextra -Wimplicit-function-declaration + +# +# Path configuration. +# -# Directories +# The directory containing the code of Quickfall. SRC_DIR = src -CLI_DIR = $(SRC_DIR)/cli -LEXER_DIR = $(SRC_DIR)/lexer -PARSER_DIR = $(SRC_DIR)/parser -COMPILER_DIR = $(SRC_DIR)/compiler -COMPILER_PLATFORMS = $(COMPILER_DIR)/platforms -UTILS_DIR = $(SRC_DIR)/utils -BUILD_DIR = build - -# Source files -SOURCES = $(LEXER_DIR)/lexer.c \ - $(LEXER_DIR)/tokens.c \ - $(PARSER_DIR)/parser.c \ - $(PARSER_DIR)/ast.c \ - $(COMPILER_DIR)/compiler.c \ - $(COMPILER_DIR)/stdl.c \ - $(COMPILER_PLATFORMS)/linux.c \ - $(COMPILER_PLATFORMS)/windowsx86-64.c \ - $(UTILS_DIR)/hashes.c \ - -# Executable name + +# The directory containing the benchmarks of Quickfall. +BENCH_SRC_DIR = benchmarks + +# +# Output definition +# + +# The target of a normal mode compiling. TARGET = quickfall$(TARGET_EXTENSION) + +# The target of a bench mode compiling. BENCH_TARGET = bench$(TARGET_EXTENSION) -# Default target -all: check_commands $(TARGET) -bench: check_commands $(BENCH_TARGET) -OBJECTS = $(SOURCES:%.c=$(BUILD_DIR)/%.o) +# +# Building variables +# + +# The sources that are going to be compiled in normal mode. +SOURCES = $(wildcard ${SRC_DIR}/**/*.c) $(wildcard ${SRC_DIR}/**/**/*.c) + +BENCH_SOURCES = ${SOURCES} $(wildcard ${BENCH_SRC_DIR}/**/*.c) -# Check commands target -check_commands: - @echo "Using compiler: gcc" - @echo "Operating System: $(DETECTED_OS)" +# +# Building logic +# + +all: prepare_build $(TARGET) +bench: prepare_build $(BENCH_TARGET) + +prepare_build: + @echo [INFO] Using "${COMPILER}" as a compiler! + @echo [INFO] Detected current operating system as ${DETECTED_OS} $(CHECK_COMMANDS) - @echo "Sources: $(SOURCES)" - -# Create build directory structure -$(BUILD_DIR): - $(MKDIR) $(BUILD_DIR)$(PATHSEP)benchmarks - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)cli - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)compiler - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)compiler$(PATHSEP)platforms - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)lexer - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)parser - $(MKDIR) $(BUILD_DIR)$(PATHSEP)$(SRC_DIR)$(PATHSEP)utils - -# Compile source files -$(BUILD_DIR)/%.o: %.c | $(BUILD_DIR) - gcc $(CFLAGS) -c $< -o $@ - -# Link object files -$(TARGET): $(OBJECTS) - gcc $(CFLAGS) -c src/cli/main.c -o build/src/cli/main.o - gcc build/src/cli/main.o $(OBJECTS) -g -o $@ - -$(BENCH_TARGET): $(OBJECTS) - gcc $(CFLAGS) -c benchmarks/main.c -o build/benchmarks/main.o - gcc build/benchmarks/main.o $(OBJECTS) -g -o $@ - -# Clean build files -clean: - $(RM) $(BUILD_DIR) + @echo [INFO] Clearing old builds + $(RM) build $(RM) $(TARGET) + @echo [INFO] Starting building logic + +$(TARGET): + $(COMPILER) $(FLAGS) $(SOURCES) -o $(TARGET) -# Phony targets -.PHONY: all clean check_commands diff --git a/src/cli/main.c b/src/cli/main.c index f86a018..02c76a1 100644 --- a/src/cli/main.c +++ b/src/cli/main.c @@ -1,4 +1,4 @@ -/** +/* * The file of the Quickfall CLI. * A modern, fast and lightweight programming language. * Usage: @@ -13,6 +13,7 @@ #include "../lexer/lexer.h" #include "../parser/parser.h" #include "../compiler/compiler.h" +#include "../compilerv2/compilerv2.h" #include "../utils/logging.c" @@ -111,18 +112,19 @@ int main(int argc, char* argv[]) { struct LexerResult result = runLexer(buff); struct ASTNode* root = runParser(result); - char* output = compile(root, "win"); // todo: change the platform. - + struct Context ctx = parseContext(root); + + char* output = compileV2(ctx); + if(output == NULL) { - printf("%sError: Compiling failed! Coudln't gather the output!%s\n", TEXT_RED, RESET); + printf("Error: the compiled output is null! Something went wrong!\n"); return -1; - } + } fptr = fopen(outputFile, "w"); fprintf(fptr, output); fclose(fptr); - break; case 'v': if(strlen(argv[1]) > 1 && strcmp(argv[1], "version") != 0) { diff --git a/src/compilerv2/att/att-linux.h b/src/compilerv2/att/att-linux.h new file mode 100644 index 0000000..fb6645e --- /dev/null +++ b/src/compilerv2/att/att-linux.h @@ -0,0 +1,13 @@ +/** + * The Linux-AT & T assembly format. + */ + +/** + * The registries used for function arguments. + */ +char* ATTLINUX_ARGUMENT_REGISTRIES[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"}; + +/** + * The types of different data values that are registered in sections and not the stack. + */ +char* ATTLINUX_SECTION_TYPES[] = {".string"}; diff --git a/src/compilerv2/att/att-win.h b/src/compilerv2/att/att-win.h new file mode 100644 index 0000000..d7f8a1f --- /dev/null +++ b/src/compilerv2/att/att-win.h @@ -0,0 +1,13 @@ +/** + * The Windows-AT & T assembly format. + */ + +/** + * The registries used for function arguments. + */ +char* ATTWIN_ARGUMENT_REGISTRIES[] = {"rcx", "rdx", "r8", "r9"}; + +/** + * The types of different data values that are registered in sections and not the stack. + */ +char* ATTWIN_SECTION_TYPES[] = {".ascii"}; diff --git a/src/compilerv2/compilerv2.c b/src/compilerv2/compilerv2.c new file mode 100644 index 0000000..322f5b2 --- /dev/null +++ b/src/compilerv2/compilerv2.c @@ -0,0 +1,197 @@ +/** + * The compiler of Quickfall. + */ + +#include +#include +#include + +#include "./compilerv2.h" +#include "./objects.h" +#include "../parser/ast.h" +#include "../utils/logging.c" + +#include "./att/att-win.h" +#include "./att/att-linux.h" + +#include "../utils/hashmap.h" +#include "../utils/hash.h" + +/** + * Assembly format defintions. Will be changed by the architure. + */ +char** ARGUMENT_REGISTRIES = NULL; +char** SECTION_TYPES = NULL; + +/** + * The maximum hash the hashmaps can store. + */ +#define MAX_HASH_CAPACITY 256000 + +struct Context parseContext(struct ASTNode* node) { + struct Context ctx = {0}; + + ctx.variables = malloc(sizeof(struct Variable*) * 50); + ctx.functions = malloc(sizeof(struct Function*) * 50); + + ctx.variableHashMap = createHashmap(512, 500); + ctx.functionHashMap = createHashmap(512, 500); + + ctx.variableCount = 0; + ctx.functionCount = 0; + + while(node->next != NULL) { + node = node->next; + switch (node->type) { + case AST_VARIABLE_DEF: + struct Variable* var = malloc(sizeof(struct Variable)); + + var->name = node->left->value; + var->type = node->value; + + if(node->right->type == AST_VARIABLE_VALUE) { + var->value = node->right->value; + } + else { + printf("%sError: Invalid token type as variable value!%s\n", TEXT_HRED, RESET); + } + + int hash = hashstr(node->left->value); + + if(hashGet(ctx.variableHashMap, hash) != NULL) { + printf("%sError: Variable %s is already defined!%s\n", TEXT_HRED, var->name, RESET); + return ctx; + } + + ctx.variables[ctx.variableCount] = var; + + hashPut(ctx.variableHashMap, hash, var); + + ctx.variableCount++; + break; + case AST_FUNCTION_DEF: + struct Function* func = malloc(sizeof(struct Function)); + + func->name = node->left->left->value; + + while(node->left->right->next != NULL) { + node->left->right = node->left->right->next; + + int c = func->variableCount; + + func->variables[c].name = node->left->right->right->value; + func->variables[c].type = node->left->right->left->value; + func->variableCount++; + } + + func->body = node->right; + + hash = hashstr(func->name); + + if(hashGet(ctx.functionHashMap, hash) != NULL) { + printf("%sError: Function %s is already defined!%s\n", TEXT_HRED, func->name, RESET); + return ctx; + } + + ctx.functions[ctx.functionCount] = func; + + hashPut(ctx.functionHashMap, hash, func); + + ctx.functionCount++; + break; + } + } + + return ctx; + +} + +/** + * Compiles the context down to assembly. + */ +char* compileV2(struct Context context) { + char* firstSection = malloc(1024); + char* sections = malloc(1024); + char* main = malloc(1024); + + firstSection[0] = '\0'; + sections[0] = '\0'; + main[0] = '\0'; + + int sectionIndex = 0; + int stackSize = 0; + + // Platform def + ARGUMENT_REGISTRIES = ATTWIN_ARGUMENT_REGISTRIES; + SECTION_TYPES = ATTWIN_SECTION_TYPES; + + strcat(firstSection, ".LC0:\n .globl main"); + + for(int i = 0; i < context.variableCount; ++i) { + if(context.variables[i]->type[0] == 's') { + if(sectionIndex == 0) { + strcat(firstSection, "\n "); + strcat(firstSection, SECTION_TYPES[0]); + strcat(firstSection, " "); + strcat(firstSection, "\""); + strcat(firstSection, context.variables[i]->value); + strcat(firstSection, "\""); + } + else { + strcat(sections, ".LC"); + + char secI[5]; + snprintf(secI, 5, "%d", sectionIndex); + + strcat(sections, secI); + strcat(sections, ":\n "); + strcat(sections, SECTION_TYPES[0]); + strcat(sections, " "); + strcat(sections, "\""); + strcat(sections, context.variables[i]->value); + strcat(sections, "\""); + } + sectionIndex++; + } + else if(context.variables[i]->type[0] = 'n') { + stackSize += 4; + + strcat(main, "\n movq $"); + strcat(main, context.variables[i]->value); + strcat(main, ", -"); + + char sI[5]; + snprintf(sI, 5, "%d", stackSize); + + strcat(main, sI); + strcat(main, "(%rsp)"); + } + } + + char* buff = malloc(1024); + + buff[0] = '\0'; + + strcat(buff, firstSection); + strcat(buff, sections); + strcat(buff, "\n\nmain:"); + + char size[5]; + snprintf(size, 5, "%d", stackSize); + + if(stackSize > 0) { + strcat(buff, "\n subq $"); + strcat(buff, size); + strcat(buff, ", %rsp"); + } + + strcat(buff, main); + + if(stackSize > 0) { + strcat(buff, "\n addq $"); + strcat(buff, size); + strcat(buff, ", %rsp"); + } + + return buff; +} diff --git a/src/compilerv2/compilerv2.h b/src/compilerv2/compilerv2.h new file mode 100644 index 0000000..7d26c01 --- /dev/null +++ b/src/compilerv2/compilerv2.h @@ -0,0 +1,38 @@ +/** + * The compiler of Quickfall. + */ + +#ifndef COMPILER_2_H +#define COMPILER_2_H + +#include "../utils/hashmap.h" +#include "../parser/ast.h" +#include "./objects.h" + +enum Platform { + ATT_WINDOWS, + ATT_LINUX +}; + +/** + * A context is the less abstract way Quickfall represents the code before converting it to assembly. + */ +struct Context { + struct Variable** variables; + struct Function** functions; + int variableCount; + int functionCount; + + // Hashmaps + struct Hashmap* variableHashMap; + struct Hashmap* functionHashMap; +}; + +/** + * Parses the AST tree into a context. + */ +struct Context parseContext(struct ASTNode* node); + +char* compileV2(struct Context context); + +#endif diff --git a/src/compilerv2/objects.h b/src/compilerv2/objects.h new file mode 100644 index 0000000..9cd808d --- /dev/null +++ b/src/compilerv2/objects.h @@ -0,0 +1,24 @@ +/** + * Compiling objects such as functions, variables... + */ + +#ifndef COMPILER_OBJECTS_H +#define COMPILER_OBJECTS_H + +#include "../parser/ast.h" + +struct Variable { + char* name; + char* value; + char* type; + unsigned char varType; // 0 = variable, 1 = Func argument, 2 = Function body argument +}; + +struct Function { + char* name; + struct Variable* variables; + int variableCount; + struct ASTNode* body; +}; + +#endif diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 35830eb..31cdb93 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -112,6 +112,10 @@ struct LexerResult runLexer(char string[]) { case ';': pushToken(&result, SEMICOLON); break; case ',': pushToken(&result, COMMA); break; case '=': pushToken(&result, DECLARE); break; + case '?': + pushToken(&result, NONE); + result.tokens[result.size - 1].value[0] = '?'; + break; } } diff --git a/src/lexer/tokens.c b/src/lexer/tokens.c index 28d8973..f43cf63 100644 --- a/src/lexer/tokens.c +++ b/src/lexer/tokens.c @@ -26,5 +26,6 @@ enum TokenType { SEMICOLON = 15, COMMA = 16, DECLARE = 17, - USE = 18 + USE = 18, + NONE = 19 }; diff --git a/src/lexer/tokens.h b/src/lexer/tokens.h index de42fa4..4b24b6e 100644 --- a/src/lexer/tokens.h +++ b/src/lexer/tokens.h @@ -22,7 +22,8 @@ enum TokenType { SEMICOLON = 15, COMMA = 16, DECLARE = 17, - USE = 18 + USE = 18, + NONE = 19 }; struct KeywordResult { diff --git a/src/parser/parser.c b/src/parser/parser.c index e4a806c..05fba94 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -5,6 +5,8 @@ #include "../lexer/tokens.h" #include "../lexer/lexer.h" #include "./ast.h" +#include "../utils/logging.c" + #include #include @@ -23,49 +25,46 @@ struct ASTNode* parseParameters(struct LexerResult result, int index) { for(; index < result.size + 1; ++index) { struct Token t = result.tokens[index]; - - if(t.type == PAREN_CLOSE) { - root->end = index; - return root; - } - - if(t.type != COMMA && t.type != KEYWORD) { - printf("Error: Parameters must be literals! Got %d", t.type); - return NULL; - } - - if(t.type == COMMA) { - if(mode != 2) { - printf("Error: Parameters were not passed correctly!\n"); - return NULL; - } - - struct ASTNode* node = createASTNode(AST_PARAM); - current->next = node; - current = node; - } - - if(!root) { - root = createASTNode(AST_PARAM); - current = root; - } - - root->end = index; - - if(!current->left) { - current->left = createASTNode(AST_PARAM_TYPE); - memcpy(current->left->value, result.tokens[index].value, strlen(result.tokens[index].value)); - mode = 1; - } - else if(!current->right) { - current->right = createASTNode(AST_PARAM_NAME); - memcpy(current->right->value, result.tokens[index].value, strlen(result.tokens[index].value)); - mode = 2; - } + + root->end = index; + + switch (t.type) { + case COMMA: + if (mode == 0) { + printf("Error: Arguments aren't passed properly!\n"); + return NULL; + } + mode = 0; + current->next = createASTNode(AST_PARAM); + current = current->next; + break; + case NONE: + case KEYWORD: + if(mode >= 2) { + printf("Error: Arguments aren't passed properly!\n"); + return NULL; + } + if(result.tokens[index + 1].type == NONE || result.tokens[index + 1].type == KEYWORD) { + printf("Passed type %s\n", t.value); + current->right = createASTNode(AST_PARAM_TYPE); + memcpy(current->right->value, t.value, strlen(t.value)); + } + else { + printf("Passed name %s\n", t.value); + current->left = createASTNode(AST_PARAM_NAME); + memcpy(current->left->value, t.value, strlen(t.value)); + } + mode++; + break; + case PAREN_CLOSE: + return root; + default: + printf("Didn't except token %d in arguments!\n", t.type); + } } - printf("Error: The paren wasn't closed!\n"); - return root; + printf("Error: The arguments paren wasn't closed!\n"); + return NULL; } /** @@ -163,6 +162,20 @@ struct ASTNode* parseVariableDefinition(struct LexerResult result, int index) { struct Token val = result.tokens[index + 2]; + switch(val.type) { + case NUMBER: + node->value[0] = 'n'; + break; + case STRING: + node->value[0] = 's'; + break; + case BOOLEAN_VALUE: + node->value[0] = 'b'; + break; + default: + printf("%sWarning: unsupported variable value type! Compiling of this variable will be ignored!%s\n", TEXT_YELLOW, RESET); // This warning will be here until there is no unsupported types + } + if(val.type != KEYWORD && val.type != NUMBER && val.type != STRING && val.type != BOOLEAN_VALUE) { printf("Error: Disallowed token as variable value: %d\n", val.type); return NULL; diff --git a/src/utils/hash.c b/src/utils/hash.c new file mode 100644 index 0000000..6f5e5d9 --- /dev/null +++ b/src/utils/hash.c @@ -0,0 +1,17 @@ +/** + * Hashing related utilities. + */ + +#include + +unsigned int hashstr(char* str) { + unsigned int result = 0; + unsigned char* p = (unsigned char*) str; + + while(*p != '\0') { + result = ((*p - 97) << 5) + result + 1; + ++p; + } + + return result; +} diff --git a/src/utils/hash.h b/src/utils/hash.h new file mode 100644 index 0000000..50e1340 --- /dev/null +++ b/src/utils/hash.h @@ -0,0 +1,9 @@ +#ifndef HASH_UTILS +#define HASH_UTILS + +/** + * Hashes the string. + */ +unsigned int hashstr(char* string); + +#endif diff --git a/src/utils/hashmap.c b/src/utils/hashmap.c new file mode 100644 index 0000000..a6cfb1c --- /dev/null +++ b/src/utils/hashmap.c @@ -0,0 +1,44 @@ +/** + * A simple hashmap implementation. + */ + +#include + +#include "./hashmap.h" + +struct Hashmap* createHashmap(int bucketSize, int maxBucketIndex) { + struct Hashmap* map = malloc(sizeof(struct Hashmap)); + map->bucketSize = bucketSize; + map->maxBucketIndex = maxBucketIndex; + + map->buckets = malloc(sizeof(struct Bucket*) * maxBucketIndex); + + for(int i = 0; i < maxBucketIndex; ++i) { + map->buckets[i] = NULL; + } + + return map; +} + +void hashPut(struct Hashmap* hashmap, int hash, void* value) { + int bucket = hash / hashmap->bucketSize; + + if(hashmap->buckets[bucket] == NULL) { + struct Bucket* b = malloc(sizeof(struct Bucket)); + b->bucketSize = hashmap->bucketSize; + b->values = malloc(sizeof(int*) * b->bucketSize); + + hashmap->buckets[bucket] = b; + } + + hashmap->buckets[bucket]->values[hash % hashmap->bucketSize] = value; +} + +void* hashGet(struct Hashmap* hashmap, int hash) { + int bucket = hash / hashmap->bucketSize; + + if(hashmap->buckets[bucket] == NULL) return NULL; + + return hashmap->buckets[bucket]->values[hash % hashmap->bucketSize]; +} + diff --git a/src/utils/hashmap.h b/src/utils/hashmap.h new file mode 100644 index 0000000..e87e634 --- /dev/null +++ b/src/utils/hashmap.h @@ -0,0 +1,24 @@ +/** + * A simple hashmap implementation. + */ + +#ifndef HASHMAP_H +#define HASHMAP_H + +struct Bucket { + void** values; + int bucketSize; +}; + +struct Hashmap { + int bucketSize; + int maxBucketIndex; + + struct Bucket** buckets; +}; + +struct Hashmap* createHashmap(int bucketSize, int maxBucketIndex); +void hashPut(struct Hashmap* hashmap, int hash, void* value); +void* hashGet(struct Hashmap* hashmap, int hash); + +#endif