aboutsummaryrefslogtreecommitdiffstats
path: root/c_lexer
diff options
context:
space:
mode:
authorYann Herklotz <ymherklotz@gmail.com>2017-02-07 16:56:57 +0000
committerYann Herklotz <ymherklotz@gmail.com>2017-02-07 16:56:57 +0000
commit76699050049febe4c7eb0199cd0ae9d13fb36b74 (patch)
tree612fd98f279355c529390f0c7f431ebd1f0dbc27 /c_lexer
parent9d94e43d23698f4804060c82482966da9680faa1 (diff)
downloadCompiler-76699050049febe4c7eb0199cd0ae9d13fb36b74.tar.gz
Compiler-76699050049febe4c7eb0199cd0ae9d13fb36b74.zip
Changing file structure
Diffstat (limited to 'c_lexer')
-rw-r--r--c_lexer/include/c_lexer.hpp33
-rw-r--r--c_lexer/src/.gitignore0
-rw-r--r--c_lexer/src/c_lexer.cpp7
-rw-r--r--c_lexer/src/c_lexer.flex131
-rw-r--r--c_lexer/src/main.cpp74
-rw-r--r--c_lexer/test/.gitignore2
-rw-r--r--c_lexer/test/test_lex.c14
7 files changed, 261 insertions, 0 deletions
diff --git a/c_lexer/include/c_lexer.hpp b/c_lexer/include/c_lexer.hpp
new file mode 100644
index 0000000..059664a
--- /dev/null
+++ b/c_lexer/include/c_lexer.hpp
@@ -0,0 +1,33 @@
+#ifndef C_LEXER_HPP
+#define C_LEXER_HPP
+
+#include <string>
+
+enum TokenType {
+ None,
+ Keyword,
+ Identifier,
+ Operator,
+ Constant,
+ StringLiteral,
+ Invalid
+};
+
+// Global variable that will be looked for by byson
+extern std::string *yylval;
+
+// flex function to run on input
+extern int yylex();
+
+extern int yyleng;
+
+extern int lineCount;
+extern int spaceCount;
+extern int sourceLineCount;
+
+extern std::string fileName;
+
+// get the correct output
+std::string toJson(const std::string& classType, const std::string& text, const std::string& strLine, const std::string& srcCol, const std::string& srcLine, const std::string& fName);
+
+#endif
diff --git a/c_lexer/src/.gitignore b/c_lexer/src/.gitignore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/c_lexer/src/.gitignore
diff --git a/c_lexer/src/c_lexer.cpp b/c_lexer/src/c_lexer.cpp
new file mode 100644
index 0000000..e01d73f
--- /dev/null
+++ b/c_lexer/src/c_lexer.cpp
@@ -0,0 +1,7 @@
+#include "c_lexer.hpp"
+
+std::string toJson(const std::string& classType, const std::string& text, const std::string& strLine, const std::string& srcCol, const std::string& srcLine, const std::string& fName) {
+ std::string tmp = "{\"Class\":\"" + classType + "\", \"Text\":\"" + text + "\", \"StreamLine\":" + strLine + ", \"SourceFile\":\"" + fName + "\", \"SourceLine\":" + srcLine + ", \"SourceCol\":" + srcCol + "}";
+
+ return tmp;
+}
diff --git a/c_lexer/src/c_lexer.flex b/c_lexer/src/c_lexer.flex
new file mode 100644
index 0000000..c2a42c4
--- /dev/null
+++ b/c_lexer/src/c_lexer.flex
@@ -0,0 +1,131 @@
+%option noyywrap
+
+%{
+
+#include "c_lexer.hpp"
+
+#include <sstream>
+#include <stdlib.h>
+
+int lineCount = 1;
+int spaceCount = 1;
+int sourceLineCount = 1;
+
+std::string fileName;
+
+%}
+
+KEYWORD auto|double|int|struct|break|else|long|switch|case|enum|register|typedef|char|extern|return|union|const|float|short|unsigned|continue|for|signed|void|default|goto|sizeof|volatile|do|if|static|while
+
+IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
+
+OPERATOR [.][.][.]|[<>][<>][=]|[-][-]|[+][+]|[|][|]|[#][#]|[&][&]|[+\-*\/<>=!%^|&][=]|[<][<]|[->][>]|[<>&=+\/\-*(){}\[\]\.,%~!?:|^;]
+
+FRACTIONALCONSTANT (([0-9]*\.[0-9]+)|([0-9]+\.))
+EXPONENTPART ([eE][+-]?[0-9]+)
+
+FLOATINGSUFFIX ([flFL])
+INTEGERSUFFIX ([uU][lL]|[lL][uU]|[uUlL])
+
+DECIMALCONSTANT ([1-9][0-9]*)
+OCTALCONSTANT ([0][0-7]*)
+HEXCONSTANT ([0][xX][0-9A-Fa-f]+)
+
+CHARCONSTANT ('(([\\]['])|([^']))+')
+
+STRINGLITERAL ["](([\\]["])|([^"]))*["]
+
+NEWLINE (\r\n?|\n)
+
+WHITESPACE [ ]
+
+TAB \t
+
+PREPROC [#][ ][0-9]+[ ]{STRINGLITERAL}[ 0-9]*
+
+ALL .
+
+%%
+
+{KEYWORD} {
+ yylval = new std::string(yytext);
+ return Keyword;
+}
+
+{IDENTIFIER} {
+ yylval = new std::string(yytext);
+ return Identifier;
+}
+
+{OPERATOR} {
+ yylval = new std::string(yytext);
+ return Operator;
+}
+
+{FRACTIONALCONSTANT}{EXPONENTPART}?{FLOATINGSUFFIX}? {
+ yylval = new std::string(yytext);
+ return Constant;
+}
+
+([0-9]+){EXPONENTPART}{FLOATINGSUFFIX}? {
+ yylval = new std::string(yytext);
+ return Constant;
+}
+
+{HEXCONSTANT}{INTEGERSUFFIX}? {
+ yylval = new std::string(yytext);
+ return Constant;
+}
+
+{DECIMALCONSTANT}{INTEGERSUFFIX}? {
+ yylval = new std::string(yytext);
+ return Constant;
+}
+
+{OCTALCONSTANT}{INTEGERSUFFIX}? {
+ yylval = new std::string(yytext);
+ return Constant;
+}
+
+{CHARCONSTANT} {
+ std::string tmp(yytext);
+ yylval = new std::string(tmp.substr(1, tmp.length()-2));
+ return Constant;
+}
+
+{STRINGLITERAL} {
+ std::string tmp(yytext);
+ yylval = new std::string(tmp.substr(1, tmp.length()-2));
+ return StringLiteral;
+}
+
+{NEWLINE} {
+ spaceCount = 1;
+ lineCount++;
+ sourceLineCount++;
+}
+
+{WHITESPACE} {
+ spaceCount++;
+}
+
+{PREPROC} {
+ int srcLineInt;
+
+ yylval = new std::string(yytext);
+ std::stringstream preProcLine((*yylval).substr(1, (*yylval).length()));
+ preProcLine >> srcLineInt >> fileName;
+ sourceLineCount = srcLineInt - 1;
+ fileName = fileName.substr(1, fileName.length() - 2);
+}
+
+{TAB} {
+ spaceCount += 8;
+}
+
+{ALL} {
+ yylval = new std::string(yytext);
+ return Invalid;
+}
+
+%%
diff --git a/c_lexer/src/main.cpp b/c_lexer/src/main.cpp
new file mode 100644
index 0000000..8b20098
--- /dev/null
+++ b/c_lexer/src/main.cpp
@@ -0,0 +1,74 @@
+#include "c_lexer.hpp"
+
+#include <cassert>
+#include <sstream>
+#include <vector>
+
+std::string *yylval;
+
+int main() {
+ printf("[\n");
+ std::string sourceLine, streamLine, sourceCol, classType, text;
+ while(1) {
+ // get the token type and run the lexer
+ std::stringstream str_line, src_col, src_line;
+
+ TokenType type = (TokenType)yylex();
+
+ str_line << lineCount;
+ streamLine = str_line.str();
+
+ src_col << spaceCount;
+ sourceCol = src_col.str();
+
+ src_line << sourceLineCount;
+ sourceLine = src_line.str();
+
+ if(type == None) {
+ // returns None when the file ends and we want to break then
+ break;
+ } else if(type == Invalid) {
+ // type is Invalid
+ text = *yylval;
+ classType = "Invalid";
+ delete yylval;
+ } else if(type == Keyword) {
+ // found a keyword
+ text = *yylval;
+ classType = "Keyword";
+ delete yylval;
+ } else if(type == Identifier) {
+ // found an identifier
+ text = *yylval;
+ classType = "Identifier";
+ delete yylval;
+ } else if(type == Operator) {
+ // found an operator
+ text = *yylval;
+ classType = "Operator";
+ delete yylval;
+ } else if(type == Constant) {
+ // found a constant
+ text = *yylval;
+ classType = "Constant";
+ delete yylval;
+ } else if(type == StringLiteral) {
+ // found a string literal
+ text = *yylval;
+ classType = "StringLiteral";
+ delete yylval;
+ } else {
+ // if any other type comes we assert to 0
+ assert(0);
+ return 1;
+ }
+
+ printf("%s,\n", toJson(classType, text, streamLine, sourceCol, sourceLine, fileName).c_str());
+
+ spaceCount += yyleng;
+ }
+
+ printf("{}\n]\n");
+
+ return 0;
+}
diff --git a/c_lexer/test/.gitignore b/c_lexer/test/.gitignore
new file mode 100644
index 0000000..94ca778
--- /dev/null
+++ b/c_lexer/test/.gitignore
@@ -0,0 +1,2 @@
+output.json
+pre_processed_test_lex.c
diff --git a/c_lexer/test/test_lex.c b/c_lexer/test/test_lex.c
new file mode 100644
index 0000000..9db9054
--- /dev/null
+++ b/c_lexer/test/test_lex.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+#define POTATO 5
+
+int main(int argc, char* argv[]) {
+ char* h = "Hello World\"";
+ int j = POTATO;
+ int u = 2398uL;
+ float rt = 23.238e-283;
+ return 0;
+}