From 82435bab1bf71c37c645f0853bf02b3d4224bc6d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:37:28 +0200 Subject: Add whitespace, for better vertical alignment and better readability. This violates the 80-column width limit, but is really important. --- cparser/pre_parser.mly | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e73cc22a..54322888 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -286,7 +286,7 @@ constant_expression: typedef). *) declaration: -| declaration_specifiers init_declarator_list? SEMICOLON +| declaration_specifiers init_declarator_list? SEMICOLON {} | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} @@ -321,8 +321,8 @@ storage_class_specifier_no_typedef: that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: | storage_class_specifier_no_typedef declaration_specifiers_no_type? -| type_qualifier declaration_specifiers_no_type? -| function_specifier declaration_specifiers_no_type? +| type_qualifier declaration_specifiers_no_type? +| function_specifier declaration_specifiers_no_type? {} (* [declaration_specifiers_no_typedef_name] matches declaration @@ -331,9 +331,9 @@ declaration_specifiers_no_type: keyword"). *) declaration_specifiers_no_typedef_name: | storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name? -| type_qualifier declaration_specifiers_no_typedef_name? -| function_specifier declaration_specifiers_no_typedef_name? -| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| type_qualifier declaration_specifiers_no_typedef_name? +| function_specifier declaration_specifiers_no_typedef_name? +| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* [declaration_specifiers_no_type] matches declaration_specifiers @@ -353,7 +353,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -362,19 +362,11 @@ declaration_specifiers: "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? -| declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? - TYPEDEF declaration_specifiers_no_type? - type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifiers_no_type? - type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? - TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) @@ -421,14 +413,14 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? +| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? { set_id_type i TypedefId } | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} specifier_qualifier_list_no_typedef_name: | type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? -| type_qualifier specifier_qualifier_list_no_typedef_name? +| type_qualifier specifier_qualifier_list_no_typedef_name? {} struct_declarator_list: -- cgit From 30ac183455a0e15fb9889793a3bc774bc1b7b5c2 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:56:12 +0200 Subject: Use [option] as much as possible and [ioption] only where necessary. The existing [option(X)] was marked %inline, and has been renamed [ioption(X)]. A new [option(X)], which is not marked %inline, has been introduced. The grammar now uses [option] everywhere, except where [ioption] is necessary in order to avoid conflicts. This reduces the number of states in the automaton. The number of LR(0) cores drops from 857 to 712. --- cparser/pre_parser.mly | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 54322888..14bf4a23 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -74,7 +74,24 @@ (* Helpers *) -%inline option(X): +(* Note that, by convention, [X?] is syntactic sugar for [option(X)], + so this definition of [option] is actually used, even though the + word [option] does not appear in the rest of this file. *) + +option(X): +| /* nothing */ + { None } +| x = X + { Some x } + +(* [ioption(X)] is equivalent to [option(X)], but is marked [%inline], + so its definition is expanded. In the absence of conflicts, the two + are equivalent. Using [ioption] instead of [option] in well-chosen + places can help avoid conflicts. Conversely, using [option] instead + of [ioption] in well-chosen places can help reduce the number of + states of the automaton. *) + +%inline ioption(X): | /* nothing */ { None } | x = X @@ -353,20 +370,20 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) @@ -511,7 +528,7 @@ function_specifier: has to be restored if entering the body of the function being defined, if so. *) declarator: -| pointer? x = direct_declarator attribute_specifier_list +| ioption(pointer) x = direct_declarator attribute_specifier_list { x } direct_declarator: @@ -559,13 +576,13 @@ type_name: abstract_declarator: | pointer -| pointer? direct_abstract_declarator +| ioption(pointer) direct_abstract_declarator {} direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| direct_abstract_declarator? LBRACK type_qualifier_list? assignment_expression? RBRACK -| direct_abstract_declarator? LPAREN in_context(parameter_type_list?) RPAREN +| ioption(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} c_initializer: @@ -809,12 +826,12 @@ external_declaration: {} function_definition_begin: -| declaration_specifiers pointer? x=direct_declarator +| declaration_specifiers ioption(pointer) x=direct_declarator { match x with | (_, None) -> $syntaxerror | (i, Some restore_fun) -> restore_fun () } -| declaration_specifiers pointer? x=direct_declarator +| declaration_specifiers ioption(pointer) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with | (_, Some _) -> $syntaxerror -- cgit From d9b17759c9a56a33b7e2d57e0aaaab4951ef222d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 09:59:50 +0200 Subject: One more replacement of [ioption] with [option]. I missed this opportunity in the previous commit. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 14bf4a23..43b44c13 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -581,7 +581,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| ioption(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| option(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} -- cgit From 7c4dd1467e62229689fe15656f4405f617edca1d Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:09:33 +0200 Subject: Introduced [other_identifier] as a more elegant way of calling [set_id_type i OtherId]. This causes no change in the automaton. --- cparser/pre_parser.mly | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 43b44c13..c036caeb 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -106,6 +106,15 @@ general_identifier: | i = TYPEDEF_NAME { i } +(* [other_identifier] is equivalent to [general_identifier], but adds + an instruction that re-classifies this identifier as an [OtherId]. + Because this definition is marked %inline, the function call takes + place when the host production is reduced. *) + +%inline other_identifier: + i = general_identifier + { set_id_type i OtherId } + string_literals_list: | STRING_LITERAL | string_literals_list STRING_LITERAL @@ -161,9 +170,8 @@ postfix_expression: {} | BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name error { unclosed "(" ")" $startpos($2) $endpos } -| postfix_expression DOT i = general_identifier -| postfix_expression PTR i = general_identifier - { set_id_type i OtherId } +| postfix_expression DOT other_identifier +| postfix_expression PTR other_identifier | postfix_expression INC | postfix_expression DEC | LPAREN type_name RPAREN LBRACE initializer_list COMMA? RBRACE @@ -404,10 +412,9 @@ type_specifier_no_typedef_name: struct_or_union_specifier: | struct_or_union attribute_specifier_list LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier {} -| struct_or_union attribute_specifier_list i = general_identifier LBRACE struct_declaration_list RBRACE -| struct_or_union attribute_specifier_list i = general_identifier - { set_id_type i OtherId } | struct_or_union attribute_specifier_list LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($3) $endpos } | struct_or_union attribute_specifier_list general_identifier LBRACE struct_declaration_list error @@ -452,10 +459,9 @@ struct_declarator: enum_specifier: | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier {} -| ENUM attribute_specifier_list i = general_identifier LBRACE enumerator_list COMMA? RBRACE -| ENUM attribute_specifier_list i = general_identifier - { set_id_type i OtherId } | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($3) $endpos } | ENUM attribute_specifier_list general_identifier LBRACE enumerator_list COMMA? error @@ -513,8 +519,7 @@ gcc_attribute: { set_id_type i VarId } gcc_attribute_word: -| i = general_identifier - { set_id_type i OtherId } +| other_identifier | CONST | PACKED {} @@ -607,9 +612,8 @@ designator_list: designator: | LBRACK constant_expression RBRACK +| DOT other_identifier {} -| DOT i = general_identifier - { set_id_type i OtherId } (* The grammar of statements is replicated three times. @@ -657,8 +661,7 @@ statement_intern_close: (* [labeled_statement(last_statement)] has the same effect on contexts as [last_statement]. *) labeled_statement(last_statement): -| i = general_identifier COLON last_statement - { set_id_type i OtherId } +| other_identifier COLON last_statement | CASE constant_expression COLON last_statement | DEFAULT COLON last_statement {} @@ -692,8 +695,7 @@ expression_statement(close): {} jump_statement(close): -| GOTO i = general_identifier close SEMICOLON - { set_id_type i OtherId } +| GOTO other_identifier close SEMICOLON | CONTINUE close SEMICOLON | BREAK close SEMICOLON | RETURN expression? close SEMICOLON @@ -797,8 +799,9 @@ asm_operand: {} asm_op_name: -| /*empty*/ {} -| LBRACK i = general_identifier RBRACK { set_id_type i OtherId } +| /*empty*/ +| LBRACK other_identifier RBRACK + {} asm_flags: | string_literals_list -- cgit From 7b62517ea6cf0d132099d9a921950f97704e3b9c Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:14:05 +0200 Subject: For clarity, removed several redundant calls to [set_id_type]. A TYPEDEF_NAME is already classified as a [TypedefId] by the lexer, and similarly, a VAR_NAME is already classified as a [VarId]. Thus, the removed calls had no effect. The remaining calls to [set_id_type] are useful, as they can re-classify a token. --- cparser/pre_parser.mly | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index c036caeb..0d1e8fee 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -150,8 +150,7 @@ declare_typename(nt): (* Actual grammar *) primary_expression: -| i = VAR_NAME - { set_id_type i VarId } +| VAR_NAME | CONSTANT | string_literals_list | LPAREN expression RPAREN @@ -378,8 +377,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? - { set_id_type i TypedefId } +| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -387,9 +385,8 @@ declaration_specifiers: "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) i = TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? - { set_id_type i TypedefId } +| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? +| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? | ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} @@ -437,8 +434,7 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? - { set_id_type i TypedefId } +| type_qualifier_list? TYPEDEF_NAME type_qualifier_list? | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} -- cgit From df2ba9189d479efce7f37c61ed1b15d93767145e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:34:10 +0200 Subject: Factorized two productions (and two error productions) in [struct_or_union_specifier]. The old version was strictly equivalent to using [ioption(other_identifier)]. The new version uses [option(other_identifier)] instead, that is, [other_identifier?]. Technically, this means that [set_id_type i OtherId] is called slightly earlier (at the opening brace, instead of at the closing brace), but this does not make any difference, since the re-classification of identifiers affects only the second parsing phase. --- cparser/pre_parser.mly | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 0d1e8fee..8cc92581 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -408,13 +408,10 @@ type_specifier_no_typedef_name: {} struct_or_union_specifier: -| struct_or_union attribute_specifier_list LBRACE struct_declaration_list RBRACE -| struct_or_union attribute_specifier_list other_identifier LBRACE struct_declaration_list RBRACE +| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list RBRACE | struct_or_union attribute_specifier_list other_identifier {} -| struct_or_union attribute_specifier_list LBRACE struct_declaration_list error - { unclosed "{" "}" $startpos($3) $endpos } -| struct_or_union attribute_specifier_list general_identifier LBRACE struct_declaration_list error +| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($4) $endpos } struct_or_union: -- cgit From e9ba1d3276b0b2fbc37ecb8bd7e4955fd8ec030b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:37:06 +0200 Subject: Factorized two productions (and two error productions) in [enum_specifier]. This is analogous to the previous commit. --- cparser/pre_parser.mly | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 8cc92581..7567b372 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -451,13 +451,10 @@ struct_declarator: {} enum_specifier: -| ENUM attribute_specifier_list LBRACE enumerator_list COMMA? RBRACE -| ENUM attribute_specifier_list other_identifier LBRACE enumerator_list COMMA? RBRACE +| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? RBRACE | ENUM attribute_specifier_list other_identifier {} -| ENUM attribute_specifier_list LBRACE enumerator_list COMMA? error - { unclosed "{" "}" $startpos($3) $endpos } -| ENUM attribute_specifier_list general_identifier LBRACE enumerator_list COMMA? error +| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($4) $endpos } enumerator_list: -- cgit From c1937e330a3ca6c19ef648e2dcfe4871fc3c2219 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:42:10 +0200 Subject: Factorized the productions for several categories of binary operators. This leads to a small savings in the number of states (which could become greater in the future if we decide to parameterize expressions). If desired, the old automaton could be recovered by marking the binary operators as %inline. --- cparser/pre_parser.mly | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 7567b372..497851bf 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -210,37 +210,44 @@ cast_expression: | LPAREN type_name RPAREN cast_expression {} +multiplicative_operator: + STAR | SLASH | PERCENT {} + multiplicative_expression: | cast_expression -| multiplicative_expression STAR cast_expression -| multiplicative_expression SLASH cast_expression -| multiplicative_expression PERCENT cast_expression +| multiplicative_expression multiplicative_operator cast_expression {} +additive_operator: + PLUS | MINUS {} + additive_expression: | multiplicative_expression -| additive_expression PLUS multiplicative_expression -| additive_expression MINUS multiplicative_expression +| additive_expression additive_operator multiplicative_expression {} +shift_operator: + LEFT | RIGHT {} + shift_expression: | additive_expression -| shift_expression LEFT additive_expression -| shift_expression RIGHT additive_expression +| shift_expression shift_operator additive_expression {} +relational_operator: + LT | GT | LEQ | GEQ {} + relational_expression: | shift_expression -| relational_expression LT shift_expression -| relational_expression GT shift_expression -| relational_expression LEQ shift_expression -| relational_expression GEQ shift_expression +| relational_expression relational_operator shift_expression {} +equality_operator: + EQEQ | NEQ {} + equality_expression: | relational_expression -| equality_expression EQEQ relational_expression -| equality_expression NEQ relational_expression +| equality_expression equality_operator relational_expression {} and_expression: -- cgit From 09527e66514edcfa20a0341acd75c1fe6fd77363 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:49:42 +0200 Subject: Introduced optional(X, Y), which means X? Y, and used it in array declarators and FOR loops. This leads to fewer automaton states, and potentially better error messages. --- cparser/pre_parser.mly | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 497851bf..62a57618 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -97,6 +97,14 @@ option(X): | x = X { Some x } +(* [optional(X, Y)] is equivalent to [X? Y]. However, by inlining + the two possibilies -- either [X Y] or just [Y] -- we are able + to give more meaningful syntax error messages. [optional(X, Y)] + itself is usually NOT inlined, as that would cause a useless + explosion of cases. *) +optional(X, Y): + ioption(X) Y {} + %inline fst(X): | x = X { fst x } @@ -537,7 +545,7 @@ direct_declarator: | i = general_identifier { set_id_type i VarId; (i, None) } | LPAREN x = declarator RPAREN -| x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK +| x = direct_declarator LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) { x } | x = direct_declarator LPAREN open_context parameter_type_list? restore_fun = save_contexts_stk @@ -583,7 +591,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| option(direct_abstract_declarator) LBRACK type_qualifier_list? assignment_expression? RBRACK +| option(direct_abstract_declarator) LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} @@ -764,8 +772,8 @@ iteration_statement(openc,last_statement): | WHILE openc LPAREN expression RPAREN last_statement | DO open_context statement_finish_close WHILE openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN last_statement -| FOR openc LPAREN declaration expression? SEMICOLON expression? RPAREN last_statement +| FOR openc LPAREN optional(expression, SEMICOLON) optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement +| FOR openc LPAREN declaration optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement {} asm_attributes: -- cgit From c16b1ce7a09e7091f2482c9d898bc4f7ac73fe29 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 10:53:26 +0200 Subject: Factorized the two forms of FOR statement by introducing [for_statement_header]. This leads to a smaller automaton. --- cparser/pre_parser.mly | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 62a57618..d6097ec1 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -772,8 +772,12 @@ iteration_statement(openc,last_statement): | WHILE openc LPAREN expression RPAREN last_statement | DO open_context statement_finish_close WHILE openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN optional(expression, SEMICOLON) optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement -| FOR openc LPAREN declaration optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement +| FOR openc LPAREN for_statement_header optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement + {} + +for_statement_header: +| optional(expression, SEMICOLON) +| declaration {} asm_attributes: -- cgit From b3d81f80a3e88adf2c8bd5eec7fe642497efd407 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 11:00:42 +0200 Subject: One cosmetic change of [option] to [?]. No impact. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index d6097ec1..69618c12 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -591,7 +591,7 @@ abstract_declarator: direct_abstract_declarator: | LPAREN abstract_declarator RPAREN -| option(direct_abstract_declarator) LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) +| direct_abstract_declarator? LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} -- cgit From 7f952a804eda8bac8d812800741b047550b1194b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 11:44:43 +0200 Subject: Cosmetic. Removed some spaces. Shared one redundant semantic action {}. --- cparser/pre_parser.mly | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 69618c12..6a2ae411 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -138,7 +138,7 @@ string_literals_list: follow set of the non-terminal in question. The follow sets are given by menhir with option -lg 3. *) -%inline nop: (* empty *) { } +%inline nop: (* empty *) {} open_context: (* empty *)%prec highPrec { !open_context () } @@ -326,7 +326,6 @@ constant_expression: declaration: | declaration_specifiers init_declarator_list? SEMICOLON - {} | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} @@ -338,7 +337,7 @@ init_declarator_list: init_declarator: | declare_varname(fst(declarator)) | declare_varname(fst(declarator)) EQ c_initializer - { } + {} typedef_declarator_list: | typedef_declarator @@ -347,7 +346,7 @@ typedef_declarator_list: typedef_declarator: | declare_typename(fst(declarator)) - { } + {} storage_class_specifier_no_typedef: | EXTERN @@ -860,12 +859,12 @@ identifier_list: declaration_list: | /*empty*/ - { } + {} | declaration_list declaration - { } + {} function_definition: | function_definition_begin LBRACE block_item_list? close_context RBRACE - { } + {} | function_definition_begin LBRACE block_item_list? close_context error { unclosed "{" "}" $startpos($2) $endpos } -- cgit From 7d68132721bb4c12de8b846717972a25899ecc3f Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 01:11:42 +0200 Subject: Replaced 4 uses of [ioption(declaration_specifiers_no_type)] with [declaration_specifiers_no_type?]. Inlining these options was not necessary. This reduces the number of states in the automaton. --- cparser/pre_parser.mly | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 6a2ae411..52a94078 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -392,17 +392,17 @@ declaration_specifiers_no_typedef_name: *) declaration_specifiers: | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| ioption(declaration_specifiers_no_type) type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) -- cgit From abf35973bb7128689b94a0e518cc50d26c4d5e10 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 11:44:52 +0200 Subject: Remove all productions that involve the [error] token. These productions were used to give better error messages in some situations. They are no longer useful, since we are building a whole new system for reporting errors. --- cparser/pre_parser.mly | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 52a94078..23ef1bc5 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -25,16 +25,6 @@ let declare_typename (i,_,_) = !declare_typename i - let syntax_error pos = - Cerrors.fatal_error "%s:%d: syntax error" - pos.Lexing.pos_fname pos.Lexing.pos_lnum - - let unclosed opening closing pos1 pos2 = - Cerrors.info "%s:%d: syntax error: expecting '%s'" - pos2.Lexing.pos_fname pos2.Lexing.pos_lnum closing; - Cerrors.fatal_error "%s:%d: this is the location of the unclosed '%s'" - pos1.Lexing.pos_fname pos1.Lexing.pos_lnum opening - %} %token @@ -163,30 +153,18 @@ primary_expression: | string_literals_list | LPAREN expression RPAREN {} -| LPAREN expression error - { unclosed "(" ")" $startpos($1) $endpos } postfix_expression: | primary_expression | postfix_expression LBRACK expression RBRACK | postfix_expression LPAREN argument_expression_list? RPAREN - {} -| postfix_expression LPAREN argument_expression_list? error - { unclosed "(" ")" $startpos($2) $endpos } | BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name RPAREN - {} -| BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name error - { unclosed "(" ")" $startpos($2) $endpos } | postfix_expression DOT other_identifier | postfix_expression PTR other_identifier | postfix_expression INC | postfix_expression DEC | LPAREN type_name RPAREN LBRACE initializer_list COMMA? RBRACE {} -| LPAREN type_name error - { unclosed "(" ")" $startpos($1) $endpos } -| LPAREN type_name RPAREN LBRACE initializer_list COMMA? error - { unclosed "{" "}" $startpos($4) $endpos } argument_expression_list: | assignment_expression @@ -425,8 +403,6 @@ struct_or_union_specifier: | struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list RBRACE | struct_or_union attribute_specifier_list other_identifier {} -| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list error - { unclosed "{" "}" $startpos($4) $endpos } struct_or_union: | STRUCT @@ -468,8 +444,6 @@ enum_specifier: | ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? RBRACE | ENUM attribute_specifier_list other_identifier {} -| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? error - { unclosed "{" "}" $startpos($4) $endpos } enumerator_list: | declare_varname(enumerator) @@ -598,8 +572,6 @@ c_initializer: | assignment_expression | LBRACE initializer_list COMMA? RBRACE {} -| LBRACE initializer_list COMMA? error - { unclosed "{" "}" $startpos($1) $endpos } initializer_list: | designation? c_initializer @@ -677,8 +649,6 @@ labeled_statement(last_statement): compound_statement(openc): | LBRACE openc block_item_list? close_context RBRACE {} -| LBRACE openc block_item_list? close_context error - { unclosed "{" "}" $startpos($1) $endpos } block_item_list: | block_item_list? block_item @@ -820,8 +790,6 @@ translation_unit_file: | translation_unit EOF | EOF {} -| error - { syntax_error $endpos } translation_unit: | external_declaration @@ -866,5 +834,3 @@ declaration_list: function_definition: | function_definition_begin LBRACE block_item_list? close_context RBRACE {} -| function_definition_begin LBRACE block_item_list? close_context error - { unclosed "{" "}" $startpos($2) $endpos } -- cgit From 431d01db20514292c75fa00f522a8b56d7150b03 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 13:22:25 +0200 Subject: A general comment about phantom parameters. --- cparser/pre_parser.mly | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 23ef1bc5..df0244b7 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -145,6 +145,22 @@ declare_varname(nt): declare_typename(nt): i = nt { declare_typename i; i } +(* A note about phantom parameters. The definition of a non-terminal symbol + [nt] is sometimes parameterized with a parameter that is unused in the + right-hand side. This parameter disappears when macro-expansion takes + place. Thus, the presence of this parameter does not influence the language + that is accepted by the parser. Yet, it carries information about the + context, since different call sites can supply different values of this + parameter. This forces the creation of two (or more) identical copies of + the definition of [nt], which leads to a larger automaton, where some + states have been duplicated. In these states, more information about the + context is available, which allows better syntax error messages to be + given. + + By convention, a formal phantom parameter is named [phantom], so as to be + easily recognizable. For clarity, we usually explicitly document which + actual values it can take. *) + (* Actual grammar *) primary_expression: -- cgit From 89e9eabc2fb752a6535cb375232d0141cc1f4fd3 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 13:26:38 +0200 Subject: Added a phantom parameter to [specifier_qualifier_list]. --- cparser/pre_parser.mly | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index df0244b7..1940aaa8 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -431,12 +431,13 @@ struct_declaration_list: {} struct_declaration: -| specifier_qualifier_list struct_declarator_list? SEMICOLON +| specifier_qualifier_list(struct_declaration) struct_declarator_list? SEMICOLON {} (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) -specifier_qualifier_list: +(* The phantom parameter can be [struct_declaration] or [type_name]. *) +specifier_qualifier_list(phantom): | type_qualifier_list? TYPEDEF_NAME type_qualifier_list? | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} @@ -570,7 +571,7 @@ parameter_declaration: { None } type_name: -| specifier_qualifier_list abstract_declarator? +| specifier_qualifier_list(type_name) abstract_declarator? {} abstract_declarator: -- cgit From e9ae100975a868a6e0e91a51ce54d243d52ab6c7 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 12:47:41 +0200 Subject: Added a phantom parameter to [abstract_declarator]. This allows distinguishing two uses of abstract_declarator, within a type_name and within a parameter_declaration. This provides more static context and allows giving a better syntax error message, as this allows us know what is expected next: a closing parenthesis or a comma. --- cparser/pre_parser.mly | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 1940aaa8..8002d5c4 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -567,20 +567,24 @@ parameter_list: parameter_declaration: | declaration_specifiers id=declare_varname(fst(declarator)) { Some id } -| declaration_specifiers abstract_declarator? +| declaration_specifiers abstract_declarator(parameter_declaration)? { None } type_name: -| specifier_qualifier_list(type_name) abstract_declarator? +| specifier_qualifier_list(type_name) abstract_declarator(type_name)? {} -abstract_declarator: +(* The phantom parameter can be [parameter_declaration] or [type_name]. + We take the latter to mean [type_or_name] or [direct_abstract_declarator]. + We need not distinguish these two cases: in both cases, a closing parenthesis + is permitted (and we do not wish to keep track of why it is permitted). *) +abstract_declarator(phantom): | pointer | ioption(pointer) direct_abstract_declarator {} direct_abstract_declarator: -| LPAREN abstract_declarator RPAREN +| LPAREN abstract_declarator(type_name) RPAREN | direct_abstract_declarator? LBRACK type_qualifier_list? optional(assignment_expression, RBRACK) | ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN {} -- cgit From b371ea255077d700f848165a5834f104601e8253 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 15:43:52 +0200 Subject: Added a phantom parameter to [declaration_specifiers]. This does not change the automaton at all. It allows us to distinguish more easily between two contexts: - the beginning of a declaration or function definition; - the beginning of a parameter declaration. This leads to better error messages. --- cparser/pre_parser.mly | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 8002d5c4..0bbf10ca 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -319,8 +319,8 @@ constant_expression: typedef). *) declaration: -| declaration_specifiers init_declarator_list? SEMICOLON -| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON +| declaration_specifiers(declaration) init_declarator_list? SEMICOLON +| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} init_declarator_list: @@ -384,7 +384,11 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) -declaration_specifiers: +(* The phantom parameter is EITHER [declaration], which we take to mean that + this is the beginning of a declaration *or* a function definition (we + cannot distinguish the two!), OR [parameter_declaration], which means that + this is the beginning of a parameter declaration. *) +declaration_specifiers(phantom): | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -565,9 +569,9 @@ parameter_list: { i::l } parameter_declaration: -| declaration_specifiers id=declare_varname(fst(declarator)) +| declaration_specifiers(parameter_declaration) id=declare_varname(fst(declarator)) { Some id } -| declaration_specifiers abstract_declarator(parameter_declaration)? +| declaration_specifiers(parameter_declaration) abstract_declarator(parameter_declaration)? { None } type_name: @@ -826,12 +830,12 @@ external_declaration: {} function_definition_begin: -| declaration_specifiers ioption(pointer) x=direct_declarator +| declaration_specifiers(declaration) ioption(pointer) x=direct_declarator { match x with | (_, None) -> $syntaxerror | (i, Some restore_fun) -> restore_fun () } -| declaration_specifiers ioption(pointer) x=direct_declarator +| declaration_specifiers(declaration) ioption(pointer) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with | (_, Some _) -> $syntaxerror -- cgit From 17733e430b0c0a19853e1367ca38282a943e0c76 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Wed, 7 Oct 2015 17:10:29 +0200 Subject: Added a phantom parameter to [declaration]. This parameter is passed down in [declaration_specifiers(declaration(phantom))]. This allows us to distinguish between three calling contexts for [declaration_specifiers]: - we are definitely in a parameter declaration; - we are definitely in a declaration (e.g., in a block); - we are in a declaration or in a function definition (i.e., at the top level). This allows us to give better error messages. For instance, when inside a block, we know that this cannot be the beginning of a function definition. --- cparser/pre_parser.mly | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 0bbf10ca..78641e58 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -318,9 +318,12 @@ constant_expression: cannot contain an initialization (this is an error to initialize a typedef). *) -declaration: -| declaration_specifiers(declaration) init_declarator_list? SEMICOLON -| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON +(* The phantom parameter is either [block_item], which means we are + definitely reading a declaration, or [external_declaration], which + means we could also be reading the beginning of a function definition. *) +declaration(phantom): +| declaration_specifiers(declaration(phantom)) init_declarator_list? SEMICOLON +| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} init_declarator_list: @@ -384,10 +387,10 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) -(* The phantom parameter is EITHER [declaration], which we take to mean that - this is the beginning of a declaration *or* a function definition (we - cannot distinguish the two!), OR [parameter_declaration], which means that - this is the beginning of a parameter declaration. *) +(* The phantom parameter is either [declaration(_)], which means that + this is the beginning of a declaration or a function definition, or + [parameter_declaration], which means that this is the beginning of a + parameter declaration. *) declaration_specifiers(phantom): | ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? @@ -680,7 +683,7 @@ block_item_list: {} block_item: -| declaration +| declaration(block_item) | statement_finish_noclose | PRAGMA {} @@ -771,7 +774,7 @@ iteration_statement(openc,last_statement): for_statement_header: | optional(expression, SEMICOLON) -| declaration +| declaration(block_item) {} asm_attributes: @@ -825,17 +828,19 @@ translation_unit: external_declaration: | function_definition -| declaration +| declaration(external_declaration) | PRAGMA {} function_definition_begin: -| declaration_specifiers(declaration) ioption(pointer) x=direct_declarator +| declaration_specifiers(declaration(external_declaration)) + ioption(pointer) x=direct_declarator { match x with | (_, None) -> $syntaxerror | (i, Some restore_fun) -> restore_fun () } -| declaration_specifiers(declaration) ioption(pointer) x=direct_declarator +| declaration_specifiers(declaration(external_declaration)) + ioption(pointer) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with | (_, Some _) -> $syntaxerror @@ -853,7 +858,7 @@ identifier_list: declaration_list: | /*empty*/ {} -| declaration_list declaration +| declaration_list declaration(block_item) {} function_definition: -- cgit From 44b910bb829f6e5920b0a51985624c345019711e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 01:20:31 +0200 Subject: Factorized [declaration_specifier_no_type]. This saves a few states. --- cparser/pre_parser.mly | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 78641e58..0aa884c9 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -354,10 +354,14 @@ storage_class_specifier_no_typedef: (* [declaration_specifiers_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) +declaration_specifier_no_type: +| storage_class_specifier_no_typedef +| type_qualifier +| function_specifier + {} + declaration_specifiers_no_type: -| storage_class_specifier_no_typedef declaration_specifiers_no_type? -| type_qualifier declaration_specifiers_no_type? -| function_specifier declaration_specifiers_no_type? +| declaration_specifier_no_type declaration_specifiers_no_type? {} (* [declaration_specifiers_no_typedef_name] matches declaration -- cgit From 4972a6a8851dfe823a022fc3b8c7c01332a89c35 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 01:30:18 +0200 Subject: Introduced [list] and [ilist]. Redefined [declaration_specifiers_no_type] as a left-recursive list. This further reduces the number of states (and error states). --- cparser/pre_parser.mly | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 0aa884c9..e2ca8439 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -95,6 +95,21 @@ option(X): optional(X, Y): ioption(X) Y {} +(* This is a standard left-recursive, possibly empty list, without + separators. Note that, by convention, [X*] is syntactic sugar for + [list(X)]. *) + +list(X): +| (* empty *) {} +| list(X) X {} + +(* [ilist(X)] is equivalent to [list(X)], but is marked [%inline], + so its definition is expanded. *) + +%inline ilist(X): +| (* empty *) {} +| list(X) X {} + %inline fst(X): | x = X { fst x } @@ -352,7 +367,7 @@ storage_class_specifier_no_typedef: | REGISTER {} -(* [declaration_specifiers_no_type] matches declaration specifiers +(* [declaration_specifier_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) declaration_specifier_no_type: | storage_class_specifier_no_typedef @@ -360,10 +375,6 @@ declaration_specifier_no_type: | function_specifier {} -declaration_specifiers_no_type: -| declaration_specifier_no_type declaration_specifiers_no_type? - {} - (* [declaration_specifiers_no_typedef_name] matches declaration specifiers that contain neither "typedef" nor a typedef name (i.e. type specifier declared using a previous "typedef @@ -396,18 +407,18 @@ declaration_specifiers_no_typedef_name: [parameter_declaration], which means that this is the beginning of a parameter declaration. *) declaration_specifiers(phantom): -| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* +| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type? -| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? -| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* TYPEDEF_NAME declaration_specifier_no_type* +| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* +| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) -- cgit From 1c6d12874f0737d07acbda6b56e43053ca159c36 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 09:23:41 +0200 Subject: Reformulated the definitions of [option] and [list] in a slightly more elegant manner. --- cparser/pre_parser.mly | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e2ca8439..6627568d 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -68,12 +68,6 @@ so this definition of [option] is actually used, even though the word [option] does not appear in the rest of this file. *) -option(X): -| /* nothing */ - { None } -| x = X - { Some x } - (* [ioption(X)] is equivalent to [option(X)], but is marked [%inline], so its definition is expanded. In the absence of conflicts, the two are equivalent. Using [ioption] instead of [option] in well-chosen @@ -81,12 +75,20 @@ option(X): of [ioption] in well-chosen places can help reduce the number of states of the automaton. *) +(* Defining the non-%inline version in terms of the %inline version is + a standard idiom. It obviates the need to duplicate the definition. + The same idiom is used elsewhere below. *) + %inline ioption(X): | /* nothing */ { None } | x = X { Some x } +option(X): + o = ioption(X) + { o } + (* [optional(X, Y)] is equivalent to [X? Y]. However, by inlining the two possibilies -- either [X Y] or just [Y] -- we are able to give more meaningful syntax error messages. [optional(X, Y)] @@ -99,17 +101,17 @@ optional(X, Y): separators. Note that, by convention, [X*] is syntactic sugar for [list(X)]. *) -list(X): -| (* empty *) {} -| list(X) X {} - (* [ilist(X)] is equivalent to [list(X)], but is marked [%inline], - so its definition is expanded. *) + so its definition is expanded (only one level deep, of course). *) %inline ilist(X): | (* empty *) {} | list(X) X {} +list(X): + xs = ilist(X) + { xs } + %inline fst(X): | x = X { fst x } -- cgit From ca5594ace000eebab2e89791aa42cb849fad1a16 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 10:09:20 +0200 Subject: Factorized [declaration_specifier_no_typedef_name]. This results in slightly fewer states. --- cparser/pre_parser.mly | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 6627568d..f25dde4f 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -377,15 +377,19 @@ declaration_specifier_no_type: | function_specifier {} -(* [declaration_specifiers_no_typedef_name] matches declaration +(* [declaration_specifier_no_typedef_name] matches declaration specifiers that contain neither "typedef" nor a typedef name (i.e. type specifier declared using a previous "typedef keyword"). *) +declaration_specifier_no_typedef_name: +| storage_class_specifier_no_typedef +| type_qualifier +| function_specifier +| type_specifier_no_typedef_name + {} + declaration_specifiers_no_typedef_name: -| storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name? -| type_qualifier declaration_specifiers_no_typedef_name? -| function_specifier declaration_specifiers_no_typedef_name? -| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? + declaration_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* [declaration_specifiers_no_type] matches declaration_specifiers -- cgit From d7d088921bcd26012bd27e5cc4f01746f91d029b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 10:16:25 +0200 Subject: Replaced [declaration_specifiers_no_typedef_name] with [declaration_specifier_no_typedef_name*]. This replaces a right-recursive list with a left-recursive list. This saves 2 states and 6 error states. --- cparser/pre_parser.mly | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index f25dde4f..f077a30c 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -388,10 +388,6 @@ declaration_specifier_no_typedef_name: | type_specifier_no_typedef_name {} -declaration_specifiers_no_typedef_name: - declaration_specifier_no_typedef_name declaration_specifiers_no_typedef_name? - {} - (* [declaration_specifiers_no_type] matches declaration_specifiers that do not contains "typedef". Moreover, it makes sure that it contains either one typename and not other type specifier or no @@ -414,7 +410,7 @@ declaration_specifiers_no_typedef_name: parameter declaration. *) declaration_specifiers(phantom): | ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* -| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name* {} (* This matches declaration_specifiers that do contains once the @@ -423,8 +419,8 @@ declaration_specifiers(phantom): declaration_specifiers_typedef: | declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* TYPEDEF_NAME declaration_specifier_no_type* | ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* -| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? -| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? +| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name* +| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name* TYPEDEF declaration_specifier_no_typedef_name* {} (* A type specifier which is not a typedef name. *) -- cgit From c6d574de8b0435676947b6b7e97d6d36d89f1f20 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 8 Oct 2015 11:03:30 +0200 Subject: Redefined [pointer] as a left-recursive list. This creates more states and does not change the number of error states. It should make it easier to give a good error message in at least 2 states. --- cparser/pre_parser.mly | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index f077a30c..61fd6972 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -552,7 +552,7 @@ function_specifier: has to be restored if entering the body of the function being defined, if so. *) declarator: -| ioption(pointer) x = direct_declarator attribute_specifier_list +| ilist(pointer1) x = direct_declarator attribute_specifier_list { x } direct_declarator: @@ -568,9 +568,26 @@ direct_declarator: | None -> (fst x, Some restore_fun) | Some _ -> x } -pointer: -| STAR type_qualifier_list? -| STAR type_qualifier_list? pointer +(* The C standard defines [pointer] as a right-recursive list. We prefer to + define it as a left-recursive list, because this provides better static + context (that is, this changes the automaton in such a way that it is + easier to give good error messages, in at least 2 states). + + The non-terminal symbol [pointer1] represents one list element. + + [pointer], which represents a non-empty list of [pointer1]'s, is defined + as [pointer1* pointer1]. + + When the C standard writes [pointer?], which represents a possibly empty + list of [pointer1]'s, we write [pointer1*] or [ilist(pointer1)]. The two + are equivalent, as long as there is no conflict. *) + +%inline pointer1: + STAR type_qualifier_list? + {} + +%inline pointer: + pointer1* pointer1 {} type_qualifier_list: @@ -604,7 +621,7 @@ type_name: is permitted (and we do not wish to keep track of why it is permitted). *) abstract_declarator(phantom): | pointer -| ioption(pointer) direct_abstract_declarator +| ilist(pointer1) direct_abstract_declarator {} direct_abstract_declarator: @@ -851,13 +868,13 @@ external_declaration: function_definition_begin: | declaration_specifiers(declaration(external_declaration)) - ioption(pointer) x=direct_declarator + ilist(pointer1) x=direct_declarator { match x with | (_, None) -> $syntaxerror | (i, Some restore_fun) -> restore_fun () } | declaration_specifiers(declaration(external_declaration)) - ioption(pointer) x=direct_declarator + ilist(pointer1) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with | (_, Some _) -> $syntaxerror -- cgit From 4a088ba7c9f82d105b25935c1ff7abb07e72de6b Mon Sep 17 00:00:00 2001 From: François Pottier Date: Mon, 19 Oct 2015 10:59:28 +0200 Subject: Reformulated [specifier_qualifier_list_no_typedef_name] as a left-recursive list. This saves 7 states and 4 error states. --- cparser/pre_parser.mly | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 61fd6972..639c5ac5 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -463,12 +463,12 @@ struct_declaration: (* The phantom parameter can be [struct_declaration] or [type_name]. *) specifier_qualifier_list(phantom): | type_qualifier_list? TYPEDEF_NAME type_qualifier_list? -| type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? +| type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_no_typedef_name* {} -specifier_qualifier_list_no_typedef_name: -| type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? -| type_qualifier specifier_qualifier_list_no_typedef_name? +specifier_qualifier_no_typedef_name: +| type_specifier_no_typedef_name +| type_qualifier {} struct_declarator_list: -- cgit From e9ea9da494eeabfc459e7ba43f9f5ccb9a48dcf1 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Mon, 19 Oct 2015 18:39:27 +0200 Subject: Inlined [constant_expression] to save one state. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 639c5ac5..f3bfb9e8 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -323,7 +323,7 @@ expression: | expression COMMA assignment_expression {} -constant_expression: +%inline constant_expression: | conditional_expression {} -- cgit From e5d2a949f0e5cd5f1ddaece41a03f65ac1e3836e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 12:59:47 +0200 Subject: Removed the two uses of $syntaxerror in a semantic action. For the first one, this is fine; the error is caught by a type check later on. For the second one, it is temporary. More thought is needed about the syntax of K&R functions anyway, as Jacques-Henri and I discovered that it is currently broken (it mis-interprets some function definitions). --- cparser/pre_parser.mly | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index f3bfb9e8..96cf0de7 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -870,14 +870,19 @@ function_definition_begin: | declaration_specifiers(declaration(external_declaration)) ilist(pointer1) x=direct_declarator { match x with - | (_, None) -> $syntaxerror + | (_, None) -> !open_context() + (* this case does not make sense, but we let it pass anyway; + this error will be caught later on by a type check *) | (i, Some restore_fun) -> restore_fun () } | declaration_specifiers(declaration(external_declaration)) ilist(pointer1) x=direct_declarator LPAREN params=identifier_list RPAREN open_context declaration_list { match x with - | (_, Some _) -> $syntaxerror + | (i, Some _) -> declare_varname i + (* this case does not make sense; the syntax of K&R + declarators is broken anyway, Jacques-Henri should + propose a fix soon *) | (i, None) -> declare_varname i; List.iter declare_varname params -- cgit From 073e50a2b795f68e59075d6e365e72b4bd2417c1 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:01:14 +0200 Subject: Changed [asm_flags] to a left-recursive list. This allows us to give a better error message in one state. --- cparser/pre_parser.mly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 96cf0de7..e55389ba 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -845,7 +845,7 @@ asm_op_name: asm_flags: | string_literals_list -| string_literals_list COMMA asm_flags +| asm_flags COMMA string_literals_list {} translation_unit_file: -- cgit From 883ebf950a4ef38788792cb1129fb9c408225ad3 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:04:34 +0200 Subject: Added an %on_error_reduce declaration. This affects in which states errors are detected, but does not change the language that is accepted. --- cparser/pre_parser.mly | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e55389ba..1de726be 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -60,6 +60,48 @@ %nonassoc highPrec %start translation_unit_file + +(* The following declarations cause certain nonterminal symbols to be + reduced when an error is detected. This replaces error actions in + the automaton with reduction actions. So, if the input is correct, + this makes no difference, and if the input is incorrect, this only + forces a few more reductions to take place before the error is + detected and reported. If used properly, this facilitates error + reports. *) + +%on_error_reduce + primary_expression + postfix_expression + unary_expression + cast_expression + multiplicative_expression + additive_expression + shift_expression + relational_expression + equality_expression + and_expression + exclusive_or_expression + inclusive_or_expression + logical_and_expression + logical_or_expression + conditional_expression + assignment_expression + expression + attribute_specifier_list + declarator + statement_finish_close + iteration_statement(nop,statement_finish_close) + enum_specifier + struct_or_union_specifier + specifier_qualifier_list(struct_declaration) + specifier_qualifier_list(type_name) + option(abstract_declarator(type_name)) + abstract_declarator(type_name) + abstract_declarator(parameter_declaration) + asm_flags + asm_operands + init_declarator + %% (* Helpers *) -- cgit From 1f74fdf503d3c501d2e261e76337452f6401d63a Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:59:40 +0200 Subject: Added copyright banners to the new files. --- cparser/pre_parser.mly | 1 + 1 file changed, 1 insertion(+) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 1de726be..25e7a745 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -3,6 +3,7 @@ /* The Compcert verified compiler */ /* */ /* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt */ +/* François Pottier, INRIA Paris-Rocquencourt */ /* */ /* Copyright Institut National de Recherche en Informatique et en */ /* Automatique. All rights reserved. This file is distributed */ -- cgit