From e18d267e6912e18462472687abc014a3d04b9a37 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Thu, 8 Oct 2015 17:27:31 +0200 Subject: other, simpler fix: the lexer emits 2 tokens for each identifier --- cparser/pre_parser.mly | 339 ++++++++++++++++++++----------------------------- 1 file changed, 136 insertions(+), 203 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e73cc22a..eacd59c8 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -37,6 +37,7 @@ %} +%token PRE_NAME %token VAR_NAME TYPEDEF_NAME %token CONSTANT @@ -55,19 +56,27 @@ %token EOF -(* These precedences declarations solve the conflict in the following declaration : +(* These precedence declarations solve the conflict in the following + declaration : int f(int (a)); - when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11. + when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: a should + be taken as the type of parameter the anonymous function +*) +%nonassoc lowPrec1 +%nonassoc TYPEDEF_NAME +(* These precedence declaration solve the dangling else conflict. *) +%nonassoc lowPrec2 +%nonassoc ELSE + +(* WARNING: These precedence declarations tend to silently solve other conflicts. So, if you change the grammar (especially or statements), you should check that without these declarations, it - has ONLY ONE CONFLICT. + has ONLY 3 CONFLICTS. *) -%nonassoc TYPEDEF_NAME -%nonassoc highPrec %start translation_unit_file %% @@ -84,37 +93,42 @@ | x = X { fst x } -general_identifier: -| i = VAR_NAME -| i = TYPEDEF_NAME - { i } +(* The kind of an identifier should not be determined when looking + ahead, because the context may not be up to date. For this reason, + when reading an identifier, the lexer emits two tokens: the first + one (PRE_NAME) is eaten as a lookahead token, the second one is the + actual identifier. +*) -string_literals_list: -| STRING_LITERAL -| string_literals_list STRING_LITERAL - {} +typedef_name: +| PRE_NAME i = TYPEDEF_NAME + { i } -(* WARNING : because of the lookahead token, the context might be - opened or closed one token after the position of this non-terminal ! +var_name: +| PRE_NAME i = VAR_NAME + { i } - Opening too late is not dangerous for us, because this does not - change the token stream. However, we have to make sure the - lookahead token present just after closing/declaring/restoring is - not an identifier. An easy way to check that is to look at the - follow set of the non-terminal in question. The follow sets are - given by menhir with option -lg 3. *) +general_identifier: +| i = typedef_name +| i = var_name + { i } -%inline nop: (* empty *) { } +(* We add this non-terminal here to force the resolution of the + conflict at the point of shifting the TYPEDEF_NAME. If we had + already shifted it, reduce/reduce conflict appear, and menhir is + not able to solve them. *) +low_prec : %prec lowPrec1 {} +general_identifier_red: +| PRE_NAME low_prec i = TYPEDEF_NAME +| PRE_NAME i = VAR_NAME + { i } -open_context: - (* empty *)%prec highPrec { !open_context () } -close_context: - (* empty *) { !close_context () } -in_context(nt): - open_context x = nt close_context { x } +string_literals_list: +| string_literals_list? STRING_LITERAL + {} -save_contexts_stk: - (* empty *) { !save_contexts_stk () } +save_context: + (* empty *) { !save_context () } declare_varname(nt): i = nt { declare_varname i; i } @@ -124,7 +138,7 @@ declare_typename(nt): (* Actual grammar *) primary_expression: -| i = VAR_NAME +| i = var_name { set_id_type i VarId } | CONSTANT | string_literals_list @@ -320,9 +334,9 @@ storage_class_specifier_no_typedef: (* [declaration_specifiers_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: -| storage_class_specifier_no_typedef declaration_specifiers_no_type? -| type_qualifier declaration_specifiers_no_type? -| function_specifier declaration_specifiers_no_type? +| declaration_specifiers_no_type? storage_class_specifier_no_typedef +| declaration_specifiers_no_type? type_qualifier +| declaration_specifiers_no_type? function_specifier {} (* [declaration_specifiers_no_typedef_name] matches declaration @@ -330,10 +344,10 @@ declaration_specifiers_no_type: (i.e. type specifier declared using a previous "typedef keyword"). *) declaration_specifiers_no_typedef_name: -| storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name? -| type_qualifier declaration_specifiers_no_typedef_name? -| function_specifier declaration_specifiers_no_typedef_name? -| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_typedef_name? storage_class_specifier_no_typedef +| declaration_specifiers_no_typedef_name? type_qualifier +| declaration_specifiers_no_typedef_name? function_specifier +| declaration_specifiers_no_typedef_name? type_specifier_no_typedef_name {} (* [declaration_specifiers_no_type] matches declaration_specifiers @@ -353,7 +367,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = typedef_name declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -364,9 +378,9 @@ declaration_specifiers: declaration_specifiers_typedef: | declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? + i = typedef_name declaration_specifiers_no_type? | declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? + i = typedef_name declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? @@ -421,7 +435,7 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? +| type_qualifier_list? i = typedef_name type_qualifier_list? { set_id_type i TypedefId } | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} @@ -497,7 +511,7 @@ gcc_attribute: | gcc_attribute_word | gcc_attribute_word LPAREN argument_expression_list? RPAREN {} -| gcc_attribute_word LPAREN i = TYPEDEF_NAME COMMA argument_expression_list RPAREN +| gcc_attribute_word LPAREN i = typedef_name COMMA argument_expression_list RPAREN (* This is to emulate GCC's attribute syntax : we make this identifier a var name identifier, so that the parser will see it as a variable reference *) @@ -523,16 +537,14 @@ declarator: { x } direct_declarator: -| i = general_identifier +| i = general_identifier_red { set_id_type i VarId; (i, None) } -| LPAREN x = declarator RPAREN +| LPAREN save_context x = declarator RPAREN | x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK { x } -| x = direct_declarator LPAREN - open_context parameter_type_list? restore_fun = save_contexts_stk - close_context RPAREN +| x = direct_declarator LPAREN ctx = context_parameter_type_list RPAREN { match snd x with - | None -> (fst x, Some restore_fun) + | None -> (fst x, Some ctx) | Some _ -> x } pointer: @@ -544,6 +556,10 @@ type_qualifier_list: | type_qualifier_list? type_qualifier {} +context_parameter_type_list: +| ctx1 = save_context parameter_type_list? ctx2 = save_context + { ctx1 (); ctx2 } + parameter_type_list: | l=parameter_list | l=parameter_list COMMA ELLIPSIS @@ -571,9 +587,9 @@ abstract_declarator: {} direct_abstract_declarator: -| LPAREN abstract_declarator RPAREN +| LPAREN save_context abstract_declarator RPAREN | direct_abstract_declarator? LBRACK type_qualifier_list? assignment_expression? RBRACK -| direct_abstract_declarator? LPAREN in_context(parameter_type_list?) RPAREN +| direct_abstract_declarator? LPAREN context_parameter_type_list RPAREN {} c_initializer: @@ -602,67 +618,28 @@ designator: | DOT i = general_identifier { set_id_type i OtherId } -(* The grammar of statements is replicated three times. - - [statement_finish_close] should close the current context just - before its last token. - - [statement_finish_noclose] should not close the current context. It - should modify it only if this modification actually changes the - context of the current block. - - [statement_intern_close] is like [statement_finish_close], except - it cannot reduce to a single-branch if statement. -*) +statement: +| labeled_statement +| compound_statement +| expression_statement +| selection_statement +| iteration_statement +| jump_statement +| asm_statement + {} -statement_finish_close: -| labeled_statement(statement_finish_close) -| compound_statement(nop) -| expression_statement(close_context) -| selection_statement_finish(nop) -| iteration_statement(nop,statement_finish_close) -| jump_statement(close_context) -| asm_statement(close_context) - {} - -statement_finish_noclose: -| labeled_statement(statement_finish_noclose) -| compound_statement(open_context) -| expression_statement(nop) -| selection_statement_finish(open_context) -| iteration_statement(open_context,statement_finish_close) -| jump_statement(nop) -| asm_statement(nop) - {} - -statement_intern_close: -| labeled_statement(statement_intern_close) -| compound_statement(nop) -| expression_statement(close_context) -| selection_statement_intern_close -| iteration_statement(nop,statement_intern_close) -| jump_statement(close_context) -| asm_statement(close_context) - {} - -(* [labeled_statement(last_statement)] has the same effect on contexts - as [last_statement]. *) -labeled_statement(last_statement): -| i = general_identifier COLON last_statement +labeled_statement: +| i = general_identifier COLON statement { set_id_type i OtherId } -| CASE constant_expression COLON last_statement -| DEFAULT COLON last_statement +| CASE constant_expression COLON statement +| DEFAULT COLON statement {} -(* [compound_statement] uses a local context and closes it before its - last token. It uses [openc] to open this local context if needed. - That is, if a local context has already been opened, [openc] = [nop], - otherwise, [openc] = [open_context]. *) -compound_statement(openc): -| LBRACE openc block_item_list? close_context RBRACE - {} -| LBRACE openc block_item_list? close_context error - { unclosed "{" "}" $startpos($1) $endpos } +compound_statement: +| ctx = save_context LBRACE block_item_list? RBRACE + { ctx() } +| ctx = save_context LBRACE block_item_list? error + { ctx(); unclosed "{" "}" $startpos($2) $endpos } block_item_list: | block_item_list? block_item @@ -670,95 +647,46 @@ block_item_list: block_item: | declaration -| statement_finish_noclose +| statement | PRAGMA {} -(* [expression_statement], [jump_statement] and [asm_statement] close - the local context if needed, depending of the close parameter. If - there is no local context, [close] = [nop]. Otherwise, - [close] = [close_context]. *) -expression_statement(close): -| expression? close SEMICOLON +expression_statement: +| expression? SEMICOLON {} -jump_statement(close): -| GOTO i = general_identifier close SEMICOLON +jump_statement: +| GOTO i = general_identifier SEMICOLON { set_id_type i OtherId } -| CONTINUE close SEMICOLON -| BREAK close SEMICOLON -| RETURN expression? close SEMICOLON - {} - -asm_statement(close): -| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN close SEMICOLON - {} - -(* [selection_statement_finish] and [selection_statement_intern] use a - local context and close it before their last token. - - [selection_statement_finish(openc)] uses [openc] to open this local - context if needed. That is, if a local context has already been - opened, [openc] = [nop], otherwise, [openc] = [open_context]. - - [selection_statement_intern_close] is always called with a local - context openned. It closes it before its last token. *) - -(* It should be noted that the token [ELSE] should be lookaheaded - /outside/ of the local context because if the lookaheaded token is - not [ELSE], then this is the end of the statement. - - This is especially important to parse correctly the following - example: - - typedef int a; - - int f() { - for(int a; ;) - if(1); - a * x; - } - - However, if the lookahead token is [ELSE], we should parse the - second branch in the same context as the first branch, so we have - to reopen the previously closed context. This is the reason for the - save/restore system. -*) - -if_else_statement_begin(openc): -| IF openc LPAREN expression RPAREN restore_fun = save_contexts_stk - statement_intern_close - { restore_fun () } - -selection_statement_finish(openc): -| IF openc LPAREN expression RPAREN save_contexts_stk statement_finish_close -| if_else_statement_begin(openc) ELSE statement_finish_close -| SWITCH openc LPAREN expression RPAREN statement_finish_close +| CONTINUE SEMICOLON +| BREAK SEMICOLON +| RETURN expression? SEMICOLON {} -selection_statement_intern_close: -| if_else_statement_begin(nop) ELSE statement_intern_close -| SWITCH LPAREN expression RPAREN statement_intern_close +asm_statement: +| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON {} -(* [iteration_statement] uses a local context and closes it before - their last token. +ifelse_statement1: +| IF LPAREN expression RPAREN ctx = save_context statement ELSE + { ctx() } - [iteration_statement] uses [openc] to open this local context if - needed. That is, if a local context has already been opened, - [openc] = [nop], otherwise, [openc] = [open_context]. +selection_statement: +| ctx = save_context ifelse_statement1 statement +| ctx = save_context IF LPAREN expression RPAREN save_context statement %prec lowPrec2 +| ctx = save_context SWITCH LPAREN expression RPAREN statement + { ctx() } - [last_statement] is either [statement_intern_close] or - [statement_finish_close]. That is, it should /always/ close the - local context. *) +do_statement1: +| ctx = save_context DO statement + { ctx () } -iteration_statement(openc,last_statement): -| WHILE openc LPAREN expression RPAREN last_statement -| DO open_context statement_finish_close WHILE - openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN last_statement -| FOR openc LPAREN declaration expression? SEMICOLON expression? RPAREN last_statement - {} +iteration_statement: +| ctx = save_context WHILE LPAREN expression RPAREN statement +| ctx = save_context FOR LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN statement +| ctx = save_context FOR LPAREN declaration expression? SEMICOLON expression? RPAREN statement +| ctx = save_context do_statement1 WHILE LPAREN expression RPAREN SEMICOLON + { ctx() } asm_attributes: | /* empty */ @@ -816,35 +744,40 @@ external_declaration: | PRAGMA {} -function_definition_begin: -| declaration_specifiers pointer? x=direct_declarator +identifier_list: +| id = var_name + { [id] } +| idl = identifier_list COMMA id = var_name + { id :: idl } + +declaration_list: +| /*empty*/ + { } +| declaration_list declaration + { } + +function_definition1: +| declaration_specifiers pointer? x=direct_declarator ctx = save_context { match x with | (_, None) -> $syntaxerror - | (i, Some restore_fun) -> restore_fun () + | (_, Some ctx') -> ctx'(); ctx } | declaration_specifiers pointer? x=direct_declarator - LPAREN params=identifier_list RPAREN open_context declaration_list + LPAREN save_context params=identifier_list RPAREN ctx = save_context declaration_list { match x with | (_, Some _) -> $syntaxerror | (i, None) -> declare_varname i; - List.iter declare_varname params + List.iter declare_varname params; + ctx } -identifier_list: -| id = VAR_NAME - { [id] } -| idl = identifier_list COMMA id = VAR_NAME - { id :: idl } - -declaration_list: -| /*empty*/ - { } -| declaration_list declaration - { } +function_definition2: +| ctx = function_definition1 LBRACE block_item_list? + { ctx() } +| ctx = function_definition1 LBRACE block_item_list? error + { unclosed "{" "}" $startpos($2) $endpos } function_definition: -| function_definition_begin LBRACE block_item_list? close_context RBRACE +| function_definition2 RBRACE { } -| function_definition_begin LBRACE block_item_list? close_context error - { unclosed "{" "}" $startpos($2) $endpos } -- cgit From b960c83725d7e185ac5c6e3c0d6043c7dcd2f556 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Sun, 1 Nov 2015 22:32:23 +0100 Subject: Better handling of old-style K&R function declarations: - Added a Cabs.PROTO_OLD constructor to Cabs.decl_type - Refactored the Parser.vy and pre_parser.mly grammars - Rewritten the conversion of old function definitions to new-style --- cparser/pre_parser.mly | 178 +++++++++++++++++++++++++------------------------ 1 file changed, 90 insertions(+), 88 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index eacd59c8..41b068de 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -13,6 +13,13 @@ /* */ /* *********************************************************************/ +(* + WARNING: The precedence declarations tend to silently solve + conflicts. So, if you change the grammar (especially for + statements), you should check that without these declarations, it + has ONLY 3 CONFLICTS in 3 STATES. +*) + %{ open Pre_parser_aux @@ -35,6 +42,12 @@ Cerrors.fatal_error "%s:%d: this is the location of the unclosed '%s'" pos1.Lexing.pos_fname pos1.Lexing.pos_lnum opening + type 'id fun_declarator_ctx = + | Decl_ident + | Decl_other + | Decl_fun of (unit -> unit) + | Decl_krfun of 'id + %} %token PRE_NAME @@ -57,7 +70,7 @@ %token EOF (* These precedence declarations solve the conflict in the following - declaration : + declaration: int f(int (a)); @@ -71,13 +84,6 @@ %nonassoc lowPrec2 %nonassoc ELSE -(* - WARNING: These precedence declarations tend to silently solve other - conflicts. So, if you change the grammar (especially or - statements), you should check that without these declarations, it - has ONLY 3 CONFLICTS. -*) - %start translation_unit_file %% @@ -89,10 +95,6 @@ | x = X { Some x } -%inline fst(X): -| x = X - { fst x } - (* The kind of an identifier should not be determined when looking ahead, because the context may not be up to date. For this reason, when reading an identifier, the lexer emits two tokens: the first @@ -113,16 +115,6 @@ general_identifier: | i = var_name { i } -(* We add this non-terminal here to force the resolution of the - conflict at the point of shifting the TYPEDEF_NAME. If we had - already shifted it, reduce/reduce conflict appear, and menhir is - not able to solve them. *) -low_prec : %prec lowPrec1 {} -general_identifier_red: -| PRE_NAME low_prec i = TYPEDEF_NAME -| PRE_NAME i = VAR_NAME - { i } - string_literals_list: | string_literals_list? STRING_LITERAL {} @@ -131,9 +123,9 @@ save_context: (* empty *) { !save_context () } declare_varname(nt): - i = nt { declare_varname i; i } + i = nt { declare_varname (fst i); i } declare_typename(nt): - i = nt { declare_typename i; i } + i = nt { declare_typename (fst i); i } (* Actual grammar *) @@ -301,7 +293,6 @@ constant_expression: declaration: | declaration_specifiers init_declarator_list? SEMICOLON - {} | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} @@ -311,9 +302,9 @@ init_declarator_list: {} init_declarator: -| declare_varname(fst(declarator)) -| declare_varname(fst(declarator)) EQ c_initializer - { } +| declare_varname(declarator_noattrend) save_context attribute_specifier_list +| declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ c_initializer + {} typedef_declarator_list: | typedef_declarator @@ -321,8 +312,8 @@ typedef_declarator_list: {} typedef_declarator: -| declare_typename(fst(declarator)) - { } +| declare_typename(declarator) + {} storage_class_specifier_no_typedef: | EXTERN @@ -334,9 +325,9 @@ storage_class_specifier_no_typedef: (* [declaration_specifiers_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: -| declaration_specifiers_no_type? storage_class_specifier_no_typedef -| declaration_specifiers_no_type? type_qualifier -| declaration_specifiers_no_type? function_specifier +| storage_class_specifier_no_typedef declaration_specifiers_no_type? +| type_qualifier declaration_specifiers_no_type? +| function_specifier declaration_specifiers_no_type? {} (* [declaration_specifiers_no_typedef_name] matches declaration @@ -350,10 +341,8 @@ declaration_specifiers_no_typedef_name: | declaration_specifiers_no_typedef_name? type_specifier_no_typedef_name {} -(* [declaration_specifiers_no_type] matches declaration_specifiers - that do not contains "typedef". Moreover, it makes sure that it - contains either one typename and not other type specifier or no - typename. +(* [declaration_specifiers] makes sure one type specifier is given, and, + if a typedef_name is given, then no other type specifier is given. This is a weaker condition than 6.7.2 2. It is necessary to enforce this in the grammar to disambiguate the example in 6.7.7 6: @@ -474,13 +463,13 @@ enumerator_list: enumerator: | i = enumeration_constant | i = enumeration_constant EQ constant_expression - { i } + { (i, ()) } enumeration_constant: | i = general_identifier { set_id_type i VarId; i } -type_qualifier: +%inline type_qualifier: | CONST | RESTRICT | VOLATILE @@ -489,7 +478,7 @@ type_qualifier: attribute_specifier_list: | /* empty */ -| attribute_specifier_list attribute_specifier +| attribute_specifier attribute_specifier_list {} attribute_specifier: @@ -528,24 +517,50 @@ function_specifier: | INLINE {} +(* We add this non-terminal here to force the resolution of the + conflict at the point of shifting the TYPEDEF_NAME. If we had + already shifted it, reduce/reduce conflict appear, and menhir is + not able to solve them. *) +low_prec : %prec lowPrec1 {} +declarator_identifier: +| PRE_NAME low_prec i = TYPEDEF_NAME +| PRE_NAME i = VAR_NAME + { i } + (* The semantic action returned by [declarator] is a pair of the - identifier being defined and an option of the context stack that - has to be restored if entering the body of the function being + identifier being defined and a value containing the context stack + that has to be restored if entering the body of the function being defined, if so. *) declarator: -| pointer? x = direct_declarator attribute_specifier_list +| x = declarator_noattrend attribute_specifier_list { x } +declarator_noattrend: +| x = direct_declarator + { x } +| pointer x = direct_declarator + { match snd x with + | Decl_ident -> (fst x, Decl_other) + | _ -> x } + direct_declarator: -| i = general_identifier_red - { set_id_type i VarId; (i, None) } +| i = declarator_identifier + { set_id_type i VarId; (i, Decl_ident) } | LPAREN save_context x = declarator RPAREN -| x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK { x } +| x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK + { match snd x with + | Decl_ident -> (fst x, Decl_other) + | _ -> x } | x = direct_declarator LPAREN ctx = context_parameter_type_list RPAREN { match snd x with - | None -> (fst x, Some ctx) - | Some _ -> x } + | Decl_ident -> (fst x, Decl_fun ctx) + | _ -> x } +| x = direct_declarator LPAREN save_context il=identifier_list? RPAREN + { match snd x, il with + | Decl_ident, Some il -> (fst x, Decl_krfun il) + | Decl_ident, None -> (fst x, Decl_krfun []) + | _ -> x } pointer: | STAR type_qualifier_list? @@ -557,25 +572,23 @@ type_qualifier_list: {} context_parameter_type_list: -| ctx1 = save_context parameter_type_list? ctx2 = save_context +| ctx1 = save_context parameter_type_list ctx2 = save_context { ctx1 (); ctx2 } parameter_type_list: -| l=parameter_list -| l=parameter_list COMMA ELLIPSIS - { l } +| parameter_list +| parameter_list COMMA ELLIPSIS + {} parameter_list: -| i=parameter_declaration - { [i] } -| l=parameter_list COMMA i=parameter_declaration - { i::l } +| parameter_declaration +| parameter_list COMMA parameter_declaration + {} parameter_declaration: -| declaration_specifiers id=declare_varname(fst(declarator)) - { Some id } +| declaration_specifiers declare_varname(declarator) | declaration_specifiers abstract_declarator? - { None } + {} type_name: | specifier_qualifier_list abstract_declarator? @@ -745,39 +758,28 @@ external_declaration: {} identifier_list: -| id = var_name - { [id] } -| idl = identifier_list COMMA id = var_name - { id :: idl } +| x = var_name + { [x] } +| l = identifier_list COMMA x = var_name + { x::l } declaration_list: -| /*empty*/ - { } +| declaration | declaration_list declaration - { } + {} function_definition1: -| declaration_specifiers pointer? x=direct_declarator ctx = save_context - { match x with - | (_, None) -> $syntaxerror - | (_, Some ctx') -> ctx'(); ctx - } -| declaration_specifiers pointer? x=direct_declarator - LPAREN save_context params=identifier_list RPAREN ctx = save_context declaration_list - { match x with - | (_, Some _) -> $syntaxerror - | (i, None) -> - declare_varname i; - List.iter declare_varname params; - ctx - } - -function_definition2: -| ctx = function_definition1 LBRACE block_item_list? - { ctx() } -| ctx = function_definition1 LBRACE block_item_list? error - { unclosed "{" "}" $startpos($2) $endpos } +| declaration_specifiers func = declare_varname(declarator_noattrend) + save_context attribute_specifier_list ctx = save_context +| declaration_specifiers func = declare_varname(declarator_noattrend) + ctx = save_context declaration_list + { begin match snd func with + | Decl_fun ctx -> ctx (); declare_varname (fst func) + | Decl_krfun il -> List.iter declare_varname il + | _ -> () + end; + ctx } function_definition: -| function_definition2 RBRACE - { } +| ctx = function_definition1 compound_statement + { ctx () } -- cgit From b071b27ecfea4911b1cd07d4083d8ad7a1db7809 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Sat, 7 Nov 2015 18:21:00 +0100 Subject: Integrate a few comments of F. Pottier into the pre_parser and handcrafted.messages --- cparser/pre_parser.mly | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 4d7ad40a..0cc30668 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -17,8 +17,9 @@ (* WARNING: The precedence declarations tend to silently solve conflicts. So, if you change the grammar (especially for - statements), you should check that without these declarations, it - has ONLY 2 CONFLICTS in 2 STATES. + statements), you should check that when you run "make correct" + in the cparser/ directory, Menhir should say: + 2 shift/reduce conflicts were silently solved. *) %{ @@ -65,13 +66,15 @@ int f(int (a)); - when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: a should - be taken as the type of parameter the anonymous function + when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: 'a' should + be taken as the type of parameter of the anonymous function. + + See below comment on [low_prec] *) %nonassoc lowPrec1 %nonassoc TYPEDEF_NAME -(* These precedence declaration solve the dangling else conflict. *) +(* These precedence declarations solve the dangling else conflict. *) %nonassoc lowPrec2 %nonassoc ELSE @@ -182,6 +185,9 @@ rlist(X): one (PRE_NAME) is eaten as a lookahead token, the second one is the actual identifier. *) +(* For [var_name] we need more context on error reporting, so we use + %inline. Not using %inline for typedef_name helps foctorizing many + similar error messages. *) typedef_name: | PRE_NAME i = TYPEDEF_NAME @@ -606,8 +612,32 @@ function_specifier: (* We add this non-terminal here to force the resolution of the conflict at the point of shifting the TYPEDEF_NAME. If we had - already shifted it, reduce/reduce conflict appear, and menhir is - not able to solve them. *) + already shifted it, a reduce/reduce conflict appears, and menhir is + not able to solve them. + + The conflict in question is when parsing : + int f(int (t + With lookahead ')', in a context where 't' is a type name. + In this case, we are able to reduce the two productions: + (1) "declarator_identifier -> PRE_NAME TYPEDEF_NAME" + followed by "direct_declarator -> declarator_identifier" + meaning that 't' is the parameter of function 'f' + (2) "list(declaration_specifier_no_type) -> " + followed by "list(declaration_specifier_no_type) -> PRE_NAME TYPEDEF_NAME list(declaration_specifier_no_type)" + followed by "declaration_specifiers(...) -> ..." + followed by "parameter_declaration -> ..." + meaning that 't' is the type of the parameter of a function + passed as parameter to 'f' + + By adding this non-terminal at this point, we force this conflict to + be solved earlier: once we have seen "f(int (", followed by PRE_NAME + and with TYPEDEF_NAME in lookahead position, we know (1) can safely + be ignored (if (1) is still possible after reading the next token, + (2) will also be possible, and the conflict has to be solved in + favor of (2)). We add low_prec in declaration_specifier, but not in + typedef_name, so that it has to be reduced in (1) but not in (2). + This is a shift/reduce conflict that can be solved using precedences. +*) low_prec : %prec lowPrec1 {} declarator_identifier: | PRE_NAME low_prec i = TYPEDEF_NAME @@ -765,14 +795,14 @@ block_item: {} expression_statement: -| ioption(expression) SEMICOLON +| expression? SEMICOLON {} jump_statement: | GOTO other_identifier SEMICOLON | CONTINUE SEMICOLON | BREAK SEMICOLON -| RETURN ioption(expression) SEMICOLON +| RETURN expression? SEMICOLON {} asm_statement: -- cgit From 0ebefc1d145f82783829174bad1f41bb319742b4 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Sat, 7 Nov 2015 18:53:50 +0100 Subject: Typo, coherence in error messages --- cparser/pre_parser.mly | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'cparser/pre_parser.mly') diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index 0cc30668..d217a7a4 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -634,10 +634,11 @@ function_specifier: and with TYPEDEF_NAME in lookahead position, we know (1) can safely be ignored (if (1) is still possible after reading the next token, (2) will also be possible, and the conflict has to be solved in - favor of (2)). We add low_prec in declaration_specifier, but not in + favor of (2)). We add low_prec in declaration_identifier, but not in typedef_name, so that it has to be reduced in (1) but not in (2). - This is a shift/reduce conflict that can be solved using precedences. -*) + This is a shift/reduce conflict that can be solved using + precedences. + *) low_prec : %prec lowPrec1 {} declarator_identifier: | PRE_NAME low_prec i = TYPEDEF_NAME -- cgit