aboutsummaryrefslogtreecommitdiffstats
path: root/c_compiler/src
diff options
context:
space:
mode:
authorYann Herklotz <ymherklotz@gmail.com>2017-03-01 17:18:54 +0000
committerYann Herklotz <ymherklotz@gmail.com>2017-03-01 17:18:54 +0000
commit9e761324895d098a87f0ba66b7eb1794cd3ed6b4 (patch)
treeb46b0eb0eb91f6784d586acb8611495de81b92e4 /c_compiler/src
parent2e5cacc6633a6973f8e96adc6bafa633487fc2a1 (diff)
downloadCompiler-9e761324895d098a87f0ba66b7eb1794cd3ed6b4.tar.gz
Compiler-9e761324895d098a87f0ba66b7eb1794cd3ed6b4.zip
Finished parser
Diffstat (limited to 'c_compiler/src')
-rw-r--r--c_compiler/src/c_lexer.flex98
-rw-r--r--c_compiler/src/c_parser.y303
-rw-r--r--c_compiler/src/parser_main.cpp15
3 files changed, 416 insertions, 0 deletions
diff --git a/c_compiler/src/c_lexer.flex b/c_compiler/src/c_lexer.flex
new file mode 100644
index 0000000..c8ca90a
--- /dev/null
+++ b/c_compiler/src/c_lexer.flex
@@ -0,0 +1,98 @@
+%option noyywrap
+
+%{
+// Avoid error "error: fileno was not declared in this scope"
+extern "C" int fileno(FILE *stream);
+
+#include "c_parser.tab.hpp"
+
+%}
+
+KEYWORD auto|double|int|struct|break|else|long|switch|case|enum|register|typedef|char|extern|return|union|const|float|short|unsigned|continue|for|signed|void|default|goto|sizeof|volatile|do|if|static|while
+
+IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
+
+OPERATOR [.][.][.]|[<>][<>][=]|[-][-]|[+][+]|[|][|]|[#][#]|[&][&]|[+\-*\/<>=!%^|&][=]|[<][<]|[->][>]|[<>&=+\/\-*(){}\[\]\.,%~!?:|^;]
+
+ASSIGNMENT_OPERATOR (([<>][<>]|[*\/%+\-&^|])[=]|[=])
+
+FRACTIONALCONSTANT (([0-9]*\.[0-9]+)|([0-9]+\.))
+EXPONENTPART ([eE][+-]?[0-9]+)
+
+FLOATINGSUFFI X ([flFL])
+INTEGERSUFFIX ([uU][lL]|[lL][uU]|[uUlL])
+
+DECIMALCONSTANT ([1-9][0-9]*)
+OCTALCONSTANT ([0][0-7]*)
+HEXCONSTANT ([0][xX][0-9A-Fa-f]+)
+
+CHARCONSTANT ('(([\\]['])|([^']))+')
+
+STRINGLITERAL ["](([\\]["])|([^"]))*["]
+
+WHITESPACE [ \t\r\n]+
+
+PREPROC [#][ ][0-9]+[ ]{STRINGLITERAL}[ 0-9]*
+
+ALL .
+
+%%
+
+typedef|extern|static|auto|register { return T_STRG_SPEC; }
+void|char|short|int|long|float|double|signed|unsigned { return T_TYPE_SPEC; }
+const|volatile { return T_TYPE_QUAL; }
+
+[;] { return T_SC; }
+[,] { return T_CMA; }
+[(] { return T_LRB; }
+[)] { return T_RRB; }
+[{] { return T_LCB; }
+[}] { return T_RCB; }
+[[] { return T_LSB; }
+[]] { return T_RSB; }
+[?] { return T_QU; }
+[:] { return T_COL; }
+[|][|] { return T_LOG_OR; }
+[&][&] { return T_LOG_AND; }
+[|] { return T_OR; }
+[\^] { return T_XOR; }
+[&] { return T_AND; }
+[=][=] { return T_EQUALITY_OP; }
+[!][=] { return T_EQUALITY_OP; }
+([<>][=])|[<>] { return T_REL_OP; }
+[<>][<>] { return T_SHIFT_OP; }
+[*] { return T_MULT; }
+[\/] { return T_DIV; }
+[%] { return T_REM; }
+[~] { return T_TILDE; }
+[!] { return T_NOT; }
+[.] { return T_DOT; }
+[-][>] { return T_ARROW; }
+[+-][+-] { return T_INCDEC; }
+[+-] { return T_ADDSUB_OP; }
+[=] { yylval.string = new std::string(yytext); return T_EQ; }
+
+{ASSIGNMENT_OPERATOR} { yylval.string = new std::string(yytext); return T_ASSIGN_OPER; }
+
+if { return T_IF; }
+else { return T_ELSE; }
+return { return T_RETURN; }
+while { return T_WHILE; }
+do { return T_DO; }
+for { return T_FOR; }
+sizeof { return T_SIZEOF; }
+
+{IDENTIFIER} { yylval.string = new std::string(yytext); return T_IDENTIFIER; }
+
+({HEXCONSTANT}|{OCTALCONSTANT}|{DECIMALCONSTANT}){INTEGERSUFFIX}? { yylval.number = strtol(yytext, NULL, 0); return T_INT_CONST; }
+
+{WHITESPACE} { ; }
+
+. { fprintf(stderr, "Invalid token\n"); exit(1); }
+
+%%
+
+void yyerror(char const *s) {
+ fprintf (stderr, "Parse error : %s\n", s);
+ exit(1);
+}
diff --git a/c_compiler/src/c_parser.y b/c_compiler/src/c_parser.y
new file mode 100644
index 0000000..1fc4d4a
--- /dev/null
+++ b/c_compiler/src/c_parser.y
@@ -0,0 +1,303 @@
+%code requires{
+
+#include "ast.hpp"
+extern ast_Top *g_root; // A way of getting the AST out
+
+//! This is to fix problems when generating C++
+// We are declaring the functions provided by Flex, so
+// that Bison generated code can call them.
+int yylex(void);
+void yyerror(const char *);
+
+}
+
+// Represents the value associated with any kind of
+// AST node.
+%union{
+ const Base *stmnt;
+ double number;
+ std::string *string;
+}
+
+%token T_TYPE_SPEC T_TYPE_QUAL T_STRG_SPEC T_IDENTIFIER
+%token T_SC T_CMA T_LRB T_LCB T_RCB T_LSB T_RSB T_QU T_COL T_LOG_OR T_LOG_AND T_OR T_XOR T_AND T_EQUALITY_OP T_REL_OP T_SHIFT_OP T_MULT T_DIV T_REM T_TILDE T_NOT T_DOT T_ARROW T_INCDEC T_ADDSUB_OP T_ASSIGN_OPER T_EQ T_SIZEOF
+%token T_INT_CONST
+%token T_IF T_WHILE T_DO T_FOR T_RETURN
+%nonassoc T_RRB
+%nonassoc T_ELSE
+
+
+%type <stmnt> ExtDef ExtDeclaration
+
+%type <stmnt> FuncDef ParameterList Parameter ParamDeclarator
+
+%type <stmnt> DeclarationList Declaration DeclarationSpec DeclarationSpec_T InitDeclarator InitDeclaratorList Declarator
+
+%type <stmnt> StatementList Statement CompoundStatement CompoundStatement_2 SelectionStatement ExpressionStatement JumpStatement IterationStatement
+
+%type <stmnt> Expression AssignmentExpression ConditionalExpression LogicalOrExpression LogicalAndExpression InclusiveOrExpression ExclusiveOrExpression AndExpression EqualityExpression RelationalExpression ShiftExpression AdditiveExpression MultiplicativeExpression CastExpression UnaryExpression PostfixExpression PostfixExpression2 ArgumentExpressionList PrimaryExpression
+
+
+%type <number> Constant T_INT_CONST
+
+
+%type <string> T_IDENTIFIER MultDivRemOP UnaryOperator ASSIGN_OPER T_ASSIGN_OPER T_EQ T_AND T_ADDSUB_OP T_TILDE T_NOT T_MULT T_DIV T_REM //T_Operator
+
+%start ROOT
+
+%%
+
+ROOT:
+ ExtDef { ; }
+ ;
+
+// EXTERNAL DEFINITION
+
+ExtDef:
+ ExtDeclaration { g_root->push($1); }
+ | ExtDef ExtDeclaration { g_root->push($2); }
+ ;
+
+ExtDeclaration:
+ Declaration { $$ = $1; }
+ | FuncDef { $$ = $1; }
+ ;
+
+// FUNCTION DEFINITION
+
+FuncDef:
+ DeclarationSpec T_IDENTIFIER T_LRB ParameterList T_RRB CompoundStatement { $$ = new Function(*$2, $4, $6); }
+ ;
+
+ParameterList:
+ %empty { $$ = new ParamList(); }
+ | Parameter { $$ = new ParamList($1); }
+ | ParameterList T_CMA Parameter { $$->push($3); }
+ ;
+
+Parameter:
+ DeclarationSpec ParamDeclarator { $$ = $2; }
+ ;
+
+ParamDeclarator:
+ T_IDENTIFIER { $$ = new Parameter(*$1);}
+ ;
+
+// Declaration
+
+DeclarationList:
+ Declaration { $$ = new DeclarationList($1); }
+ | DeclarationList Declaration { $$->push($2); }
+ ;
+
+Declaration:
+ DeclarationSpec InitDeclaratorList T_SC { $$ = $2; }
+ ;
+
+DeclarationSpec:
+ DeclarationSpec_T { ; }
+ | DeclarationSpec_T DeclarationSpec { ; }
+ ;
+
+DeclarationSpec_T:
+ T_TYPE_SPEC { ; }
+ | T_TYPE_QUAL { ; }
+ | T_STRG_SPEC { ; }
+ ;
+
+InitDeclaratorList:
+ InitDeclarator { $$ = new VariableDeclaration($1); }
+ | InitDeclaratorList T_CMA InitDeclarator { $$->push($3); }
+ ;
+
+InitDeclarator:
+ Declarator { ; }
+ | Declarator T_EQ AssignmentExpression { ; }
+ ;
+
+Declarator:
+ T_IDENTIFIER {$$ = new Variable(*$1); }
+ ;
+
+// Statement
+
+StatementList:
+ Statement { $$ = new StatementList($1); }
+ | StatementList Statement { $$->push($2); }
+ ;
+
+Statement:
+ CompoundStatement { $$ = $1; }
+ | SelectionStatement { $$ = $1; }
+ | ExpressionStatement { $$ = $1; }
+ | JumpStatement { $$ = $1; }
+ | IterationStatement { $$ = $1; }
+ ;
+
+CompoundStatement:
+ T_LCB CompoundStatement_2 { $$ = $2; }
+ ;
+
+CompoundStatement_2:
+ T_RCB { $$ = new CompoundStatement; }
+ | DeclarationList T_RCB { $$ = new CompoundStatement($1); }
+ | DeclarationList StatementList T_RCB { $$ = new CompoundStatement($1, $2); }
+ | StatementList T_RCB { $$ = new CompoundStatement($1); }
+ ;
+
+SelectionStatement:
+ T_IF T_LRB Expression T_RRB Statement { $$ = new SelectionStatement($5); }
+| T_IF T_LRB Expression T_RRB Statement T_ELSE Statement { $$ = new SelectionStatement($5, $7); }
+ ;
+
+ExpressionStatement:
+ T_SC { $$ = new ExpressionStatement(); }
+ | Expression T_SC { $$ = $1; }
+ ;
+
+JumpStatement:
+ T_RETURN ExpressionStatement { $$ = $2; }
+ ;
+
+IterationStatement:
+ T_WHILE T_LRB Expression T_RRB Statement { $$ = $5; }
+ | T_DO Statement T_WHILE T_LRB Expression T_RRB T_SC { $$ = $2; }
+ | T_FOR T_LRB Expression T_SC Expression T_SC Expression T_RRB Statement { $$ = $9; }
+ ;
+
+// Expressions
+
+Expression:
+ AssignmentExpression { $$ = $1; }
+ ;
+
+AssignmentExpression:
+ ConditionalExpression { $$ = $1; }
+ | UnaryExpression ASSIGN_OPER AssignmentExpression { $$ = $1; }
+ ;
+
+ASSIGN_OPER:
+ T_ASSIGN_OPER { ; }
+ | T_EQ { ; }
+ ;
+
+ConditionalExpression:
+ LogicalOrExpression { $$ = $1; }
+ | LogicalOrExpression T_QU Expression T_COL ConditionalExpression { $$ = $1; }
+ ;
+
+LogicalOrExpression:
+ LogicalAndExpression { $$ = $1; }
+ | LogicalOrExpression T_LOG_OR LogicalAndExpression { $$ = $3; }
+ ;
+
+LogicalAndExpression:
+ InclusiveOrExpression { $$ = $1; }
+ | LogicalAndExpression T_LOG_AND InclusiveOrExpression { $$ = $3; }
+ ;
+
+InclusiveOrExpression:
+ ExclusiveOrExpression { $$ = $1; }
+ | InclusiveOrExpression T_OR ExclusiveOrExpression { $$ = $3; }
+ ;
+
+ExclusiveOrExpression:
+ AndExpression { $$ = $1; }
+ | ExclusiveOrExpression T_XOR AndExpression { $$ = $3; }
+ ;
+
+AndExpression:
+ EqualityExpression { $$ = $1; }
+ | AndExpression T_AND EqualityExpression { $$ = $3; }
+ ;
+
+EqualityExpression:
+ RelationalExpression { $$ = $1; }
+ | EqualityExpression T_EQUALITY_OP RelationalExpression { $$ = $3; }
+ ;
+
+RelationalExpression:
+ ShiftExpression { $$ = $1; }
+ | RelationalExpression T_REL_OP ShiftExpression { $$ = $3; }
+ ;
+
+ShiftExpression:
+ AdditiveExpression { $$ = $1; }
+ | ShiftExpression T_SHIFT_OP AdditiveExpression { $$ = $3; }
+ ;
+
+AdditiveExpression:
+ MultiplicativeExpression { $$ = $1; }
+ | AdditiveExpression T_ADDSUB_OP MultiplicativeExpression { $$ = $3; }
+ ;
+
+MultiplicativeExpression:
+ CastExpression { $$ = $1; }
+ | MultiplicativeExpression MultDivRemOP CastExpression { $$ = $3; }
+ ;
+
+MultDivRemOP:
+ T_MULT { $$ = $1; }
+ | T_DIV { $$ = $1; }
+ | T_REM { $$ = $1; }
+ ;
+
+CastExpression:
+ UnaryExpression { $$ = $1; }
+ | T_LRB T_TYPE_SPEC T_RRB CastExpression { $$ = $4; }
+ ;
+
+UnaryExpression:
+ PostfixExpression { $$ = $1; }
+ | T_INCDEC UnaryExpression { $$ = $2; }
+ | UnaryOperator CastExpression { $$ = $2; }
+ | T_SIZEOF UnaryExpression { $$ = $2; }
+ | T_SIZEOF T_LRB T_TYPE_SPEC T_RRB { $$ = new Expression(); }
+ ;
+
+UnaryOperator:
+ T_AND { $$ = $1; }
+ | T_ADDSUB_OP { $$ = $1; }
+ | T_MULT { $$ = $1; }
+ | T_TILDE { $$ = $1; }
+ | T_NOT { $$ = $1; }
+ ;
+
+PostfixExpression:
+ PrimaryExpression { $$ = $1; }
+ | PostfixExpression T_LSB Expression T_RSB { $$ = $3; }
+ | PostfixExpression T_LRB PostfixExpression2 { $$ = $3; }
+ | PostfixExpression T_DOT T_IDENTIFIER { $$ = new Expression(); }
+ | PostfixExpression T_ARROW T_IDENTIFIER { $$ = new Expression(); }
+ | PostfixExpression T_INCDEC { $$ = new Expression(); }
+ ;
+
+PostfixExpression2:
+ T_RRB { $$ = new Expression(); }
+ | ArgumentExpressionList T_RRB { $$ = $1; }
+ ;
+
+ArgumentExpressionList:
+ AssignmentExpression { $$ = $1; }
+ | ArgumentExpressionList T_CMA AssignmentExpression { $$ = $3; }
+ ;
+
+PrimaryExpression:
+ T_IDENTIFIER { $$ = new Expression(); }
+ | Constant { $$ = new Expression(); }
+ | T_LRB Expression T_RRB { $$ = $2; }
+ ;
+
+Constant:
+ T_INT_CONST { $$ = $1; }
+ ;
+
+%%
+
+ast_Top *g_root; // Definition of variable (to match declaration earlier)
+
+ast_Top *parseAST() {
+ g_root = new ast_Top;
+ yyparse();
+ return g_root;
+}
diff --git a/c_compiler/src/parser_main.cpp b/c_compiler/src/parser_main.cpp
new file mode 100644
index 0000000..9626334
--- /dev/null
+++ b/c_compiler/src/parser_main.cpp
@@ -0,0 +1,15 @@
+#include "ast.hpp"
+
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+ ast_Top *ast = parseAST();
+
+ std::cout << "<?xml version=\"1.0\"?>" << std::endl << "<Program>" << std::endl;
+
+ ast->print();
+
+ std::cout << "</Program>" << std::endl;
+
+ return 0;
+}