From af2cd75bc1e8fb119e2ef091f51355db333167ac Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:22:37 +0200 Subject: Pass --no-stdlib and -v to menhir when compiling pre_parser.mly. Passing --no-stdlib ensures that there is no dependency on Menhir's standard library. Passing -v, which is equivalent to --explain --dump, requests the generation of pre_parser.automaton, a description of the automaton. --- .gitignore | 1 + Makefile.extr | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 107dd3a1..32cbdadf 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ cparser/Parser.v cparser/Lexer.ml cparser/pre_parser.ml cparser/pre_parser.mli +cparser/pre_parser.automaton lib/Readconfig.ml lib/Tokenize.ml driver/Version.ml diff --git a/Makefile.extr b/Makefile.extr index 1bb3eec8..c8d451a2 100644 --- a/Makefile.extr +++ b/Makefile.extr @@ -70,7 +70,7 @@ OCAMLC_P4=ocamlfind ocamlc $(COMPFLAGS) $(BITSTRING) OCAMLOPT_P4=ocamlfind ocamlopt $(COMPFLAGS) $(BITSTRING) OCAMLDEP_P4=ocamlfind ocamldep $(INCLUDES) $(BITSTRING) -MENHIR=menhir --explain +MENHIR=menhir -v --no-stdlib OCAMLLEX=ocamllex -q MODORDER=tools/modorder .depend.extr -- cgit From 82435bab1bf71c37c645f0853bf02b3d4224bc6d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:37:28 +0200 Subject: Add whitespace, for better vertical alignment and better readability. This violates the 80-column width limit, but is really important. --- cparser/pre_parser.mly | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e73cc22a..54322888 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -286,7 +286,7 @@ constant_expression: typedef). *) declaration: -| declaration_specifiers init_declarator_list? SEMICOLON +| declaration_specifiers init_declarator_list? SEMICOLON {} | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} @@ -321,8 +321,8 @@ storage_class_specifier_no_typedef: that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: | storage_class_specifier_no_typedef declaration_specifiers_no_type? -| type_qualifier declaration_specifiers_no_type? -| function_specifier declaration_specifiers_no_type? +| type_qualifier declaration_specifiers_no_type? +| function_specifier declaration_specifiers_no_type? {} (* [declaration_specifiers_no_typedef_name] matches declaration @@ -331,9 +331,9 @@ declaration_specifiers_no_type: keyword"). *) declaration_specifiers_no_typedef_name: | storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name? -| type_qualifier declaration_specifiers_no_typedef_name? -| function_specifier declaration_specifiers_no_typedef_name? -| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| type_qualifier declaration_specifiers_no_typedef_name? +| function_specifier declaration_specifiers_no_typedef_name? +| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* [declaration_specifiers_no_type] matches declaration_specifiers @@ -353,7 +353,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -362,19 +362,11 @@ declaration_specifiers: "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? -| declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? - type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifiers_no_type? - type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? - TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) @@ -421,14 +413,14 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? +| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? { set_id_type i TypedefId } | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} specifier_qualifier_list_no_typedef_name: | type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? -| type_qualifier specifier_qualifier_list_no_typedef_name? +| type_qualifier specifier_qualifier_list_no_typedef_name? {} struct_declarator_list: -- cgit From a6b15496744c19a54ab52c96d1dca441cd7ad1b8 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:40:00 +0200 Subject: Add -la 1 to Menhir's invocation, to see statistics and warnings. --- Makefile.extr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.extr b/Makefile.extr index c8d451a2..8dcbdc33 100644 --- a/Makefile.extr +++ b/Makefile.extr @@ -70,7 +70,7 @@ OCAMLC_P4=ocamlfind ocamlc $(COMPFLAGS) $(BITSTRING) OCAMLOPT_P4=ocamlfind ocamlopt $(COMPFLAGS) $(BITSTRING) OCAMLDEP_P4=ocamlfind ocamldep $(INCLUDES) $(BITSTRING) -MENHIR=menhir -v --no-stdlib +MENHIR=menhir -v --no-stdlib -la 1 OCAMLLEX=ocamllex -q MODORDER=tools/modorder .depend.extr -- cgit From 30ac183455a0e15fb9889793a3bc774bc1b7b5c2 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:56:12 +0200 Subject: Use [option] as much as possible and [ioption] only where necessary. The existing [option(X)] was marked %inline, and has been renamed [ioption(X)]. A new [option(X)], which is not marked %inline, has been introduced. The grammar now uses [option] everywhere, except where [ioption] is necessary in order to avoid conflicts. This reduces the number of states in the automaton. The number of LR(0) cores drops from 857 to 712. --- cparser/pre_parser.mly | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 54322888..14bf4a23 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -74,7 +74,24 @@ (* Helpers *) -%inline option(X): +(* Note that, by convention, [X?] is syntactic sugar for [option(X)], + so this definition of [option] is actually used, even though the + word [option] does not appear in the rest of this file. *) + +option(X): +| /* nothing */ + { None } +| x = X + { Some x } + +(* [ioption(X)] is equivalent to [option(X)], but is marked [%inline], + so its definition is expanded. In the absence of conflicts, the two + are equivalent. Using [ioption] instead of [option] in well-chosen + places can help avoid conflicts. Conversely, using [option] instead + of [ioption] in well-chosen places can help reduce the number of + states of the automaton. *) + +%inline ioption(X): | /* nothing */ { None } | x = X @@ -353,20 +370,20 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) @@ -511,7 +528,7 @@ function_specifier: has to be restored if entering the body of the function being defined, if so. *) declarator: -| pointer? x = direct_declarator attribute_specifier_list +| ioption(pointer) x = direct_declarator attribute_specifier_list { x } direct_declarator: @@ -559,13 +576,13 @@ type_name: abstract_declarator: | pointer -| pointer? direct_abstract_declarator +| ioption(pointer) direct_abstract_declarator {} direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| direct_abstract_declarator? LBRACK type_qualifier_list? assignment_expression? RBRACK -| direct_abstract_declarator? LPAREN in_context(parameter_type_list?) RPAREN +| ioption(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} c_initializer: @@ -809,12 +826,12 @@ external_declaration: {} function_definition_begin: -| declaration_specifiers pointer? x=direct_declarator +| declaration_specifiers ioption(pointer) x=direct_declarator { match x with | (_, None) -> $syntaxerror | (i, Some restore_fun) -> restore_fun () } -| declaration_specifiers pointer? x=direct_declarator +| declaration_specifiers ioption(pointer) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with | (_, Some _) -> $syntaxerror -- cgit From d9b17759c9a56a33b7e2d57e0aaaab4951ef222d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:59:50 +0200 Subject: One more replacement of [ioption] with [option]. I missed this opportunity in the previous commit. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 14bf4a23..43b44c13 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -581,7 +581,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| ioption(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| option(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} -- cgit From 7c4dd1467e62229689fe15656f4405f617edca1d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:09:33 +0200 Subject: Introduced [other_identifier] as a more elegant way of calling [set_id_type i OtherId]. This causes no change in the automaton. --- cparser/pre_parser.mly | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 43b44c13..c036caeb 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -106,6 +106,15 @@ general_identifier: | i = TYPEDEF_NAME { i } +(* [other_identifier] is equivalent to [general_identifier], but adds + an instruction that re-classifies this identifier as an [OtherId]. + Because this definition is marked %inline, the function call takes + place when the host production is reduced. *) + +%inline other_identifier: + i = general_identifier + { set_id_type i OtherId } + string_literals_list: | STRING_LITERAL | string_literals_list STRING_LITERAL @@ -161,9 +170,8 @@ postfix_expression: {} | BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name error { unclosed "(" ")" $startpos($2) $endpos } -| postfix_expression DOT i = general_identifier -| postfix_expression PTR i = general_identifier - { set_id_type i OtherId } +| postfix_expression DOT other_identifier +| postfix_expression PTR other_identifier | postfix_expression INC | postfix_expression DEC | LPAREN type_name RPAREN LBRACE initializer_list COMMA? RBRACE @@ -404,10 +412,9 @@ type_specifier_no_typedef_name: struct_or_union_specifier: | struct_or_union attribute_specifier_list LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier {} -| struct_or_union attribute_specifier_list i = general_identifier LBRACE struct_declaration_list RBRACE -| struct_or_union attribute_specifier_list i = general_identifier - { set_id_type i OtherId } | struct_or_union attribute_specifier_list LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($3) $endpos } | struct_or_union attribute_specifier_list general_identifier LBRACE struct_declaration_list error @@ -452,10 +459,9 @@ struct_declarator: enum_specifier: | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier {} -| ENUM attribute_specifier_list i = general_identifier LBRACE enumerator_list COMMA? RBRACE -| ENUM attribute_specifier_list i = general_identifier - { set_id_type i OtherId } | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($3) $endpos } | ENUM attribute_specifier_list general_identifier LBRACE enumerator_list COMMA? error @@ -513,8 +519,7 @@ gcc_attribute: { set_id_type i VarId } gcc_attribute_word: -| i = general_identifier - { set_id_type i OtherId } +| other_identifier | CONST | PACKED {} @@ -607,9 +612,8 @@ designator_list: designator: | LBRACK constant_expression RBRACK +| DOT other_identifier {} -| DOT i = general_identifier - { set_id_type i OtherId } (* The grammar of statements is replicated three times. @@ -657,8 +661,7 @@ statement_intern_close: (* [labeled_statement(last_statement)] has the same effect on contexts as [last_statement]. *) labeled_statement(last_statement): -| i = general_identifier COLON last_statement - { set_id_type i OtherId } +| other_identifier COLON last_statement | CASE constant_expression COLON last_statement | DEFAULT COLON last_statement {} @@ -692,8 +695,7 @@ expression_statement(close): {} jump_statement(close): -| GOTO i = general_identifier close SEMICOLON - { set_id_type i OtherId } +| GOTO other_identifier close SEMICOLON | CONTINUE close SEMICOLON | BREAK close SEMICOLON | RETURN expression? close SEMICOLON @@ -797,8 +799,9 @@ asm_operand: {} asm_op_name: -| /*empty*/ {} -| LBRACK i = general_identifier RBRACK { set_id_type i OtherId } +| /*empty*/ +| LBRACK other_identifier RBRACK + {} asm_flags: | string_literals_list -- cgit From 7b62517ea6cf0d132099d9a921950f97704e3b9c Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:14:05 +0200 Subject: For clarity, removed several redundant calls to [set_id_type]. A TYPEDEF_NAME is already classified as a [TypedefId] by the lexer, and similarly, a VAR_NAME is already classified as a [VarId]. Thus, the removed calls had no effect. The remaining calls to [set_id_type] are useful, as they can re-classify a token. --- cparser/pre_parser.mly | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index c036caeb..0d1e8fee 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -150,8 +150,7 @@ declare_typename(nt): (* Actual grammar *) primary_expression: -| i = VAR_NAME - { set_id_type i VarId } +| VAR_NAME | CONSTANT | string_literals_list | LPAREN expression RPAREN @@ -378,8 +377,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? - { set_id_type i TypedefId } +| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -387,9 +385,8 @@ declaration_specifiers: "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? - { set_id_type i TypedefId } +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? | ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} @@ -437,8 +434,7 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? - { set_id_type i TypedefId } +| type_qualifier_list? TYPEDEF_NAME type_qualifier_list? | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} -- cgit From df2ba9189d479efce7f37c61ed1b15d93767145e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:34:10 +0200 Subject: Factorized two productions (and two error productions) in [struct_or_union_specifier]. The old version was strictly equivalent to using [ioption(other_identifier)]. The new version uses [option(other_identifier)] instead, that is, [other_identifier?]. Technically, this means that [set_id_type i OtherId] is called slightly earlier (at the opening brace, instead of at the closing brace), but this does not make any difference, since the re-classification of identifiers affects only the second parsing phase. --- cparser/pre_parser.mly | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 0d1e8fee..8cc92581 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -408,13 +408,10 @@ type_specifier_no_typedef_name: {} struct_or_union_specifier: -| struct_or_union attribute_specifier_list LBRACE struct_declaration_list RBRACE -| struct_or_union attribute_specifier_list other_identifier LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list RBRACE | struct_or_union attribute_specifier_list other_identifier {} -| struct_or_union attribute_specifier_list LBRACE struct_declaration_list error - { unclosed "{" "}" $startpos($3) $endpos } -| struct_or_union attribute_specifier_list general_identifier LBRACE struct_declaration_list error +| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($4) $endpos } struct_or_union: -- cgit From e9ba1d3276b0b2fbc37ecb8bd7e4955fd8ec030b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:37:06 +0200 Subject: Factorized two productions (and two error productions) in [enum_specifier]. This is analogous to the previous commit. --- cparser/pre_parser.mly | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 8cc92581..7567b372 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -451,13 +451,10 @@ struct_declarator: {} enum_specifier: -| ENUM attribute_specifier_list LBRACE enumerator_list COMMA? RBRACE -| ENUM attribute_specifier_list other_identifier LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? RBRACE | ENUM attribute_specifier_list other_identifier {} -| ENUM attribute_specifier_list LBRACE enumerator_list COMMA? error - { unclosed "{" "}" $startpos($3) $endpos } -| ENUM attribute_specifier_list general_identifier LBRACE enumerator_list COMMA? error +| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($4) $endpos } enumerator_list: -- cgit From c1937e330a3ca6c19ef648e2dcfe4871fc3c2219 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:42:10 +0200 Subject: Factorized the productions for several categories of binary operators. This leads to a small savings in the number of states (which could become greater in the future if we decide to parameterize expressions). If desired, the old automaton could be recovered by marking the binary operators as %inline. --- cparser/pre_parser.mly | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 7567b372..497851bf 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -210,37 +210,44 @@ cast_expression: | LPAREN type_name RPAREN cast_expression {} +multiplicative_operator: + STAR | SLASH | PERCENT {} + multiplicative_expression: | cast_expression -| multiplicative_expression STAR cast_expression -| multiplicative_expression SLASH cast_expression -| multiplicative_expression PERCENT cast_expression +| multiplicative_expression multiplicative_operator cast_expression {} +additive_operator: + PLUS | MINUS {} + additive_expression: | multiplicative_expression -| additive_expression PLUS multiplicative_expression -| additive_expression MINUS multiplicative_expression +| additive_expression additive_operator multiplicative_expression {} +shift_operator: + LEFT | RIGHT {} + shift_expression: | additive_expression -| shift_expression LEFT additive_expression -| shift_expression RIGHT additive_expression +| shift_expression shift_operator additive_expression {} +relational_operator: + LT | GT | LEQ | GEQ {} + relational_expression: | shift_expression -| relational_expression LT shift_expression -| relational_expression GT shift_expression -| relational_expression LEQ shift_expression -| relational_expression GEQ shift_expression +| relational_expression relational_operator shift_expression {} +equality_operator: + EQEQ | NEQ {} + equality_expression: | relational_expression -| equality_expression EQEQ relational_expression -| equality_expression NEQ relational_expression +| equality_expression equality_operator relational_expression {} and_expression: -- cgit From 09527e66514edcfa20a0341acd75c1fe6fd77363 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:49:42 +0200 Subject: Introduced optional(X, Y), which means X? Y, and used it in array declarators and FOR loops. This leads to fewer automaton states, and potentially better error messages. --- cparser/pre_parser.mly | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 497851bf..62a57618 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -97,6 +97,14 @@ option(X): | x = X { Some x } +(* [optional(X, Y)] is equivalent to [X? Y]. However, by inlining + the two possibilies -- either [X Y] or just [Y] -- we are able + to give more meaningful syntax error messages. [optional(X, Y)] + itself is usually NOT inlined, as that would cause a useless + explosion of cases. *) +optional(X, Y): + ioption(X) Y {} + %inline fst(X): | x = X { fst x } @@ -537,7 +545,7 @@ direct_declarator: | i = general_identifier { set_id_type i VarId; (i, None) } | LPAREN x = declarator RPAREN -| x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK +| x = direct_declarator LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) { x } | x = direct_declarator LPAREN open_context parameter_type_list? restore_fun = save_contexts_stk @@ -583,7 +591,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| option(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| option(direct_abstract_declarator) LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} @@ -764,8 +772,8 @@ iteration_statement(openc,last_statement): | WHILE openc LPAREN expression RPAREN last_statement | DO open_context statement_finish_close WHILE openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN last_statement -| FOR openc LPAREN declaration expression? SEMICOLON expression? RPAREN last_statement +| FOR openc LPAREN optional(expression, SEMICOLON) optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement +| FOR openc LPAREN declaration optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement {} asm_attributes: -- cgit From c16b1ce7a09e7091f2482c9d898bc4f7ac73fe29 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:53:26 +0200 Subject: Factorized the two forms of FOR statement by introducing [for_statement_header]. This leads to a smaller automaton. --- cparser/pre_parser.mly | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 62a57618..d6097ec1 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -772,8 +772,12 @@ iteration_statement(openc,last_statement): | WHILE openc LPAREN expression RPAREN last_statement | DO open_context statement_finish_close WHILE openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN optional(expression, SEMICOLON) optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement -| FOR openc LPAREN declaration optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement +| FOR openc LPAREN for_statement_header optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement + {} + +for_statement_header: +| optional(expression, SEMICOLON) +| declaration {} asm_attributes: -- cgit From b3d81f80a3e88adf2c8bd5eec7fe642497efd407 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 11:00:42 +0200 Subject: One cosmetic change of [option] to [?]. No impact. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index d6097ec1..69618c12 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -591,7 +591,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| option(direct_abstract_declarator) LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) +| direct_abstract_declarator? LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} -- cgit From 7f952a804eda8bac8d812800741b047550b1194b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 11:44:43 +0200 Subject: Cosmetic. Removed some spaces. Shared one redundant semantic action {}. --- cparser/pre_parser.mly | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 69618c12..6a2ae411 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -138,7 +138,7 @@ string_literals_list: follow set of the non-terminal in question. The follow sets are given by menhir with option -lg 3. *) -%inline nop: (* empty *) { } +%inline nop: (* empty *) {} open_context: (* empty *)%prec highPrec { !open_context () } @@ -326,7 +326,6 @@ constant_expression: declaration: | declaration_specifiers init_declarator_list? SEMICOLON - {} | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} @@ -338,7 +337,7 @@ init_declarator_list: init_declarator: | declare_varname(fst(declarator)) | declare_varname(fst(declarator)) EQ c_initializer - { } + {} typedef_declarator_list: | typedef_declarator @@ -347,7 +346,7 @@ typedef_declarator_list: typedef_declarator: | declare_typename(fst(declarator)) - { } + {} storage_class_specifier_no_typedef: | EXTERN @@ -860,12 +859,12 @@ identifier_list: declaration_list: | /*empty*/ - { } + {} | declaration_list declaration - { } + {} function_definition: | function_definition_begin LBRACE block_item_list? close_context RBRACE - { } + {} | function_definition_begin LBRACE block_item_list? close_context error { unclosed "{" "}" $startpos($2) $endpos } -- cgit From 7d68132721bb4c12de8b846717972a25899ecc3f Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 01:11:42 +0200 Subject: Replaced 4 uses of [ioption(declaration_specifiers_no_type)] with [declaration_specifiers_no_type?]. Inlining these options was not necessary. This reduces the number of states in the automaton. --- cparser/pre_parser.mly | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 6a2ae411..52a94078 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -392,17 +392,17 @@ declaration_specifiers_no_typedef_name: *) declaration_specifiers: | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) -- cgit From 153651d6f959f9a18a47441f2e7280046b590f1e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 16 Oct 2015 11:30:16 +0200 Subject: Added [Makefile.menhir], which gives a choice between Menhir's "code" and "table" back-ends when compiling CompCert. For now, MENHIR_TABLE is set to false, so CompCert is not affected. Setting MENHIR_TABLE to true builds CompCert using Menhir's table back-end. This causes a small but repeatable slowdown on "make test", about 2% (roughly 1 second out of 40). I have tested building ccomp and ccomp.byte. I have tested with an ocamlfind-installed menhir and with a manually-installed menhir. --- Makefile.extr | 17 +++++++++------ Makefile.menhir | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 Makefile.menhir diff --git a/Makefile.extr b/Makefile.extr index 8dcbdc33..6c19d1ed 100644 --- a/Makefile.extr +++ b/Makefile.extr @@ -17,6 +17,10 @@ include Makefile.config +# Menhir configuration and rules. + +include Makefile.menhir + # Directories containing plain Caml code (no preprocessing) DIRS=extraction \ @@ -33,7 +37,7 @@ endif ALLDIRS=$(DIRS) $(DIRS_P4) -INCLUDES=$(patsubst %,-I %, $(ALLDIRS)) +INCLUDES=$(patsubst %,-I %, $(ALLDIRS)) $(MENHIR_INCLUDES) # Control of warnings: # warning 3 = deprecated feature. Turned off for OCaml 4.02 (bytes vs strings) @@ -70,7 +74,6 @@ OCAMLC_P4=ocamlfind ocamlc $(COMPFLAGS) $(BITSTRING) OCAMLOPT_P4=ocamlfind ocamlopt $(COMPFLAGS) $(BITSTRING) OCAMLDEP_P4=ocamlfind ocamldep $(INCLUDES) $(BITSTRING) -MENHIR=menhir -v --no-stdlib -la 1 OCAMLLEX=ocamllex -q MODORDER=tools/modorder .depend.extr @@ -78,7 +81,9 @@ PARSERS=backend/CMparser.mly cparser/pre_parser.mly LEXERS=backend/CMlexer.mll cparser/Lexer.mll \ lib/Tokenize.mll lib/Readconfig.mll -LIBS=str.cmxa unix.cmxa +LIBS=str.cmxa unix.cmxa $(MENHIR_LIBS) +LIBS_BYTE=$(patsubst %.cmxa,%.cma,$(patsubst %.cmx,%.cmo,$(LIBS))) + CHECKLINK_LIBS=str.cmxa EXECUTABLES=ccomp ccomp.byte cchecklink cchecklink.byte clightgen clightgen.byte @@ -96,7 +101,7 @@ ccomp: $(CCOMP_OBJS) ccomp.byte: $(CCOMP_OBJS:.cmx=.cmo) @echo "Linking $@" - @$(OCAMLC) -o $@ $(LIBS:.cmxa=.cma) $+ + @$(OCAMLC) -o $@ $(LIBS_BYTE) $+ ifeq ($(CCHECKLINK),true) @@ -120,7 +125,7 @@ clightgen: $(CLIGHTGEN_OBJS) clightgen.byte: $(CLIGHTGEN_OBJS:.cmx=.cmo) @echo "Linking $@" - @$(OCAMLC) -o $@ $(LIBS:.cmxa=.cma) $+ + @$(OCAMLC) -o $@ $(LIBS_BYTE) $+ include .depend.extr @@ -148,8 +153,6 @@ checklink/%.cmx: checklink/%.ml @echo "OCAMLOPT $<" @$(OCAMLOPT) -c $< -%.ml %.mli: %.mly - $(MENHIR) $< %.ml: %.mll $(OCAMLLEX) $< diff --git a/Makefile.menhir b/Makefile.menhir new file mode 100644 index 00000000..414279ab --- /dev/null +++ b/Makefile.menhir @@ -0,0 +1,67 @@ +# This is a Makefile fragment for Menhir-specific aspects. + +# This flag can be set to true or false. It controls whether we use +# Menhir's table back-end or code back-end. The table back-end is a +# bit slower, but supports more features, including advanced error +# reporting. + +MENHIR_TABLE = false + +# Executable. + +ifeq ($(MENHIR_TABLE),true) + MENHIR = menhir --table +else + MENHIR = menhir +endif + +# Options. + +MENHIR_FLAGS = -v --no-stdlib -la 1 + +# Using Menhir in --table mode requires MenhirLib. + +ifeq ($(MENHIR_TABLE),true) + MENHIR_LIBS = menhirLib.cmx +else + MENHIR_LIBS = +endif + +# The compilation rule. + +%.ml %.mli: %.mly + $(MENHIR) $(MENHIR_FLAGS) $< + +# Note 1: finding where MenhirLib has been installed would be easier if we +# could depend on ocamlfind, but as far as I understand and as of today, +# CompCert can be compiled and linked even in the absence of ocamlfind. +# So, we should not require ocamlfind. + +# Note 2: Menhir has options --suggest-comp-flags and --suggest-link-flags +# which we are supposed to help solve this difficulty. However, they don't +# work for us out of the box, because if Menhir has been installed using +# ocamlfind, then Menhir will suggest using ocamlfind (i.e. it will suggest +# -package and -linkpkg options), which we don't want to do. + +# Solution: Ask Menhir first. If Menhir answers "-package menhirLib", then +# Menhir was installed with ocamlfind, so we should not ask Menhir, but we +# can instead ask ocamlfind where Menhir's library was installed. Otherwise, +# Menhir answers directly with a "-I ..." directive, which we use. + +ifeq ($(MENHIR_TABLE),true) + + MENHIR_SUGGESTION = $(MENHIR) --suggest-comp-flags + + MENHIR_INCLUDES = $(shell \ + if $(MENHIR_SUGGESTION) | grep -e "-package" >/dev/null ; then \ + echo "-I `ocamlfind query menhirLib`" ; \ + else \ + $(MENHIR_SUGGESTION) ; \ + fi) + +else + + MENHIR_INCLUDES = + +endif + -- cgit From dfa2941c7df7641872464ff07466f754718df1c1 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 16 Oct 2015 11:37:25 +0200 Subject: It is probably more efficient to eagerly evaluate $(MENHIR_INCLUDES). This should save a lot of calls to the shell, menhir, and ocamlfind. --- Makefile.menhir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.menhir b/Makefile.menhir index 414279ab..72b54b14 100644 --- a/Makefile.menhir +++ b/Makefile.menhir @@ -52,7 +52,7 @@ ifeq ($(MENHIR_TABLE),true) MENHIR_SUGGESTION = $(MENHIR) --suggest-comp-flags - MENHIR_INCLUDES = $(shell \ + MENHIR_INCLUDES := $(shell \ if $(MENHIR_SUGGESTION) | grep -e "-package" >/dev/null ; then \ echo "-I `ocamlfind query menhirLib`" ; \ else \ -- cgit