diff options
author | Yann Herklotz <ymherklotz@gmail.com> | 2017-02-07 16:56:57 +0000 |
---|---|---|
committer | Yann Herklotz <ymherklotz@gmail.com> | 2017-02-07 16:56:57 +0000 |
commit | 76699050049febe4c7eb0199cd0ae9d13fb36b74 (patch) | |
tree | 612fd98f279355c529390f0c7f431ebd1f0dbc27 /c_lexer | |
parent | 9d94e43d23698f4804060c82482966da9680faa1 (diff) | |
download | Compiler-76699050049febe4c7eb0199cd0ae9d13fb36b74.tar.gz Compiler-76699050049febe4c7eb0199cd0ae9d13fb36b74.zip |
Changing file structure
Diffstat (limited to 'c_lexer')
-rw-r--r-- | c_lexer/include/c_lexer.hpp | 33 | ||||
-rw-r--r-- | c_lexer/src/.gitignore | 0 | ||||
-rw-r--r-- | c_lexer/src/c_lexer.cpp | 7 | ||||
-rw-r--r-- | c_lexer/src/c_lexer.flex | 131 | ||||
-rw-r--r-- | c_lexer/src/main.cpp | 74 | ||||
-rw-r--r-- | c_lexer/test/.gitignore | 2 | ||||
-rw-r--r-- | c_lexer/test/test_lex.c | 14 |
7 files changed, 261 insertions, 0 deletions
diff --git a/c_lexer/include/c_lexer.hpp b/c_lexer/include/c_lexer.hpp new file mode 100644 index 0000000..059664a --- /dev/null +++ b/c_lexer/include/c_lexer.hpp @@ -0,0 +1,33 @@ +#ifndef C_LEXER_HPP +#define C_LEXER_HPP + +#include <string> + +enum TokenType { + None, + Keyword, + Identifier, + Operator, + Constant, + StringLiteral, + Invalid +}; + +// Global variable that will be looked for by byson +extern std::string *yylval; + +// flex function to run on input +extern int yylex(); + +extern int yyleng; + +extern int lineCount; +extern int spaceCount; +extern int sourceLineCount; + +extern std::string fileName; + +// get the correct output +std::string toJson(const std::string& classType, const std::string& text, const std::string& strLine, const std::string& srcCol, const std::string& srcLine, const std::string& fName); + +#endif diff --git a/c_lexer/src/.gitignore b/c_lexer/src/.gitignore new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/c_lexer/src/.gitignore diff --git a/c_lexer/src/c_lexer.cpp b/c_lexer/src/c_lexer.cpp new file mode 100644 index 0000000..e01d73f --- /dev/null +++ b/c_lexer/src/c_lexer.cpp @@ -0,0 +1,7 @@ +#include "c_lexer.hpp" + +std::string toJson(const std::string& classType, const std::string& text, const std::string& strLine, const std::string& srcCol, const std::string& srcLine, const std::string& fName) { + std::string tmp = "{\"Class\":\"" + classType + "\", \"Text\":\"" + text + "\", \"StreamLine\":" + strLine + ", \"SourceFile\":\"" + fName + "\", \"SourceLine\":" + srcLine + ", \"SourceCol\":" + srcCol + "}"; + + return tmp; +} diff --git a/c_lexer/src/c_lexer.flex b/c_lexer/src/c_lexer.flex new file mode 100644 index 0000000..c2a42c4 --- /dev/null +++ b/c_lexer/src/c_lexer.flex @@ -0,0 +1,131 @@ +%option noyywrap + +%{ + +#include "c_lexer.hpp" + +#include <sstream> +#include <stdlib.h> + +int lineCount = 1; +int spaceCount = 1; +int sourceLineCount = 1; + +std::string fileName; + +%} + +KEYWORD auto|double|int|struct|break|else|long|switch|case|enum|register|typedef|char|extern|return|union|const|float|short|unsigned|continue|for|signed|void|default|goto|sizeof|volatile|do|if|static|while + +IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* + +OPERATOR [.][.][.]|[<>][<>][=]|[-][-]|[+][+]|[|][|]|[#][#]|[&][&]|[+\-*\/<>=!%^|&][=]|[<][<]|[->][>]|[<>&=+\/\-*(){}\[\]\.,%~!?:|^;] + +FRACTIONALCONSTANT (([0-9]*\.[0-9]+)|([0-9]+\.)) +EXPONENTPART ([eE][+-]?[0-9]+) + +FLOATINGSUFFIX ([flFL]) +INTEGERSUFFIX ([uU][lL]|[lL][uU]|[uUlL]) + +DECIMALCONSTANT ([1-9][0-9]*) +OCTALCONSTANT ([0][0-7]*) +HEXCONSTANT ([0][xX][0-9A-Fa-f]+) + +CHARCONSTANT ('(([\\]['])|([^']))+') + +STRINGLITERAL ["](([\\]["])|([^"]))*["] + +NEWLINE (\r\n?|\n) + +WHITESPACE [ ] + +TAB \t + +PREPROC [#][ ][0-9]+[ ]{STRINGLITERAL}[ 0-9]* + +ALL . + +%% + +{KEYWORD} { + yylval = new std::string(yytext); + return Keyword; +} + +{IDENTIFIER} { + yylval = new std::string(yytext); + return Identifier; +} + +{OPERATOR} { + yylval = new std::string(yytext); + return Operator; +} + +{FRACTIONALCONSTANT}{EXPONENTPART}?{FLOATINGSUFFIX}? { + yylval = new std::string(yytext); + return Constant; +} + +([0-9]+){EXPONENTPART}{FLOATINGSUFFIX}? { + yylval = new std::string(yytext); + return Constant; +} + +{HEXCONSTANT}{INTEGERSUFFIX}? { + yylval = new std::string(yytext); + return Constant; +} + +{DECIMALCONSTANT}{INTEGERSUFFIX}? { + yylval = new std::string(yytext); + return Constant; +} + +{OCTALCONSTANT}{INTEGERSUFFIX}? { + yylval = new std::string(yytext); + return Constant; +} + +{CHARCONSTANT} { + std::string tmp(yytext); + yylval = new std::string(tmp.substr(1, tmp.length()-2)); + return Constant; +} + +{STRINGLITERAL} { + std::string tmp(yytext); + yylval = new std::string(tmp.substr(1, tmp.length()-2)); + return StringLiteral; +} + +{NEWLINE} { + spaceCount = 1; + lineCount++; + sourceLineCount++; +} + +{WHITESPACE} { + spaceCount++; +} + +{PREPROC} { + int srcLineInt; + + yylval = new std::string(yytext); + std::stringstream preProcLine((*yylval).substr(1, (*yylval).length())); + preProcLine >> srcLineInt >> fileName; + sourceLineCount = srcLineInt - 1; + fileName = fileName.substr(1, fileName.length() - 2); +} + +{TAB} { + spaceCount += 8; +} + +{ALL} { + yylval = new std::string(yytext); + return Invalid; +} + +%% diff --git a/c_lexer/src/main.cpp b/c_lexer/src/main.cpp new file mode 100644 index 0000000..8b20098 --- /dev/null +++ b/c_lexer/src/main.cpp @@ -0,0 +1,74 @@ +#include "c_lexer.hpp" + +#include <cassert> +#include <sstream> +#include <vector> + +std::string *yylval; + +int main() { + printf("[\n"); + std::string sourceLine, streamLine, sourceCol, classType, text; + while(1) { + // get the token type and run the lexer + std::stringstream str_line, src_col, src_line; + + TokenType type = (TokenType)yylex(); + + str_line << lineCount; + streamLine = str_line.str(); + + src_col << spaceCount; + sourceCol = src_col.str(); + + src_line << sourceLineCount; + sourceLine = src_line.str(); + + if(type == None) { + // returns None when the file ends and we want to break then + break; + } else if(type == Invalid) { + // type is Invalid + text = *yylval; + classType = "Invalid"; + delete yylval; + } else if(type == Keyword) { + // found a keyword + text = *yylval; + classType = "Keyword"; + delete yylval; + } else if(type == Identifier) { + // found an identifier + text = *yylval; + classType = "Identifier"; + delete yylval; + } else if(type == Operator) { + // found an operator + text = *yylval; + classType = "Operator"; + delete yylval; + } else if(type == Constant) { + // found a constant + text = *yylval; + classType = "Constant"; + delete yylval; + } else if(type == StringLiteral) { + // found a string literal + text = *yylval; + classType = "StringLiteral"; + delete yylval; + } else { + // if any other type comes we assert to 0 + assert(0); + return 1; + } + + printf("%s,\n", toJson(classType, text, streamLine, sourceCol, sourceLine, fileName).c_str()); + + spaceCount += yyleng; + } + + printf("{}\n]\n"); + + return 0; +} diff --git a/c_lexer/test/.gitignore b/c_lexer/test/.gitignore new file mode 100644 index 0000000..94ca778 --- /dev/null +++ b/c_lexer/test/.gitignore @@ -0,0 +1,2 @@ +output.json +pre_processed_test_lex.c diff --git a/c_lexer/test/test_lex.c b/c_lexer/test/test_lex.c new file mode 100644 index 0000000..9db9054 --- /dev/null +++ b/c_lexer/test/test_lex.c @@ -0,0 +1,14 @@ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> + +#define POTATO 5 + +int main(int argc, char* argv[]) { + char* h = "Hello World\""; + int j = POTATO; + int u = 2398uL; + float rt = 23.238e-283; + return 0; +} |