/* *********************************************************************/ /* */ /* The Compcert verified compiler */ /* */ /* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt */ /* */ /* Copyright Institut National de Recherche en Informatique et en */ /* Automatique. All rights reserved. This file is distributed */ /* under the terms of the GNU General Public License as published by */ /* the Free Software Foundation, either version 2 of the License, or */ /* (at your option) any later version. This file is also distributed */ /* under the terms of the INRIA Non-Commercial License Agreement. */ /* */ /* *********************************************************************/ (* WARNING: The precedence declarations tend to silently solve conflicts. So, if you change the grammar (especially for statements), you should check that without these declarations, it has ONLY 3 CONFLICTS in 3 STATES. *) %{ open Pre_parser_aux let set_id_type (_,r,_) t = r := t let declare_varname (i,_,_) = !declare_varname i let declare_typename (i,_,_) = !declare_typename i let syntax_error pos = Cerrors.fatal_error "%s:%d: syntax error" pos.Lexing.pos_fname pos.Lexing.pos_lnum let unclosed opening closing pos1 pos2 = Cerrors.info "%s:%d: syntax error: expecting '%s'" pos2.Lexing.pos_fname pos2.Lexing.pos_lnum closing; Cerrors.fatal_error "%s:%d: this is the location of the unclosed '%s'" pos1.Lexing.pos_fname pos1.Lexing.pos_lnum opening type 'id fun_declarator_ctx = | Decl_ident | Decl_other | Decl_fun of (unit -> unit) | Decl_krfun of 'id %} %token PRE_NAME %token VAR_NAME TYPEDEF_NAME %token CONSTANT %token STRING_LITERAL %token PRAGMA %token SIZEOF PTR INC DEC LEFT RIGHT LEQ GEQ EQEQ EQ NEQ LT GT ANDAND BARBAR PLUS MINUS STAR TILDE BANG SLASH PERCENT HAT BAR QUESTION COLON AND MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN LPAREN RPAREN LBRACK RBRACK LBRACE RBRACE DOT COMMA SEMICOLON ELLIPSIS TYPEDEF EXTERN STATIC RESTRICT AUTO REGISTER INLINE CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE UNDERSCORE_BOOL CONST VOLATILE VOID STRUCT UNION ENUM CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN BUILTIN_VA_ARG ALIGNOF ATTRIBUTE ALIGNAS PACKED ASM %token EOF (* These precedence declarations solve the conflict in the following declaration: int f(int (a)); when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: a should be taken as the type of parameter the anonymous function *) %nonassoc lowPrec1 %nonassoc TYPEDEF_NAME (* These precedence declaration solve the dangling else conflict. *) %nonassoc lowPrec2 %nonassoc ELSE %start translation_unit_file %% (* Helpers *) %inline option(X): | /* nothing */ { None } | x = X { Some x } (* The kind of an identifier should not be determined when looking ahead, because the context may not be up to date. For this reason, when reading an identifier, the lexer emits two tokens: the first one (PRE_NAME) is eaten as a lookahead token, the second one is the actual identifier. *) typedef_name: | PRE_NAME i = TYPEDEF_NAME { i } var_name: | PRE_NAME i = VAR_NAME { i } general_identifier: | i = typedef_name | i = var_name { i } string_literals_list: | string_literals_list? STRING_LITERAL {} save_context: (* empty *) { !save_context () } declare_varname(nt): i = nt { declare_varname (fst i); i } declare_typename(nt): i = nt { declare_typename (fst i); i } (* Actual grammar *) primary_expression: | i = var_name { set_id_type i VarId } | CONSTANT | string_literals_list | LPAREN expression RPAREN {} | LPAREN expression error { unclosed "(" ")" $startpos($1) $endpos } postfix_expression: | primary_expression | postfix_expression LBRACK expression RBRACK | postfix_expression LPAREN argument_expression_list? RPAREN {} | postfix_expression LPAREN argument_expression_list? error { unclosed "(" ")" $startpos($2) $endpos } | BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name RPAREN {} | BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name error { unclosed "(" ")" $startpos($2) $endpos } | postfix_expression DOT i = general_identifier | postfix_expression PTR i = general_identifier { set_id_type i OtherId } | postfix_expression INC | postfix_expression DEC | LPAREN type_name RPAREN LBRACE initializer_list COMMA? RBRACE {} | LPAREN type_name error { unclosed "(" ")" $startpos($1) $endpos } | LPAREN type_name RPAREN LBRACE initializer_list COMMA? error { unclosed "{" "}" $startpos($4) $endpos } argument_expression_list: | assignment_expression | argument_expression_list COMMA assignment_expression {} unary_expression: | postfix_expression | INC unary_expression | DEC unary_expression | unary_operator cast_expression | SIZEOF unary_expression | SIZEOF LPAREN type_name RPAREN | ALIGNOF unary_expression | ALIGNOF LPAREN type_name RPAREN {} unary_operator: | AND | STAR | PLUS | MINUS | TILDE | BANG {} cast_expression: | unary_expression | LPAREN type_name RPAREN cast_expression {} multiplicative_expression: | cast_expression | multiplicative_expression STAR cast_expression | multiplicative_expression SLASH cast_expression | multiplicative_expression PERCENT cast_expression {} additive_expression: | multiplicative_expression | additive_expression PLUS multiplicative_expression | additive_expression MINUS multiplicative_expression {} shift_expression: | additive_expression | shift_expression LEFT additive_expression | shift_expression RIGHT additive_expression {} relational_expression: | shift_expression | relational_expression LT shift_expression | relational_expression GT shift_expression | relational_expression LEQ shift_expression | relational_expression GEQ shift_expression {} equality_expression: | relational_expression | equality_expression EQEQ relational_expression | equality_expression NEQ relational_expression {} and_expression: | equality_expression | and_expression AND equality_expression {} exclusive_or_expression: | and_expression | exclusive_or_expression HAT and_expression {} inclusive_or_expression: | exclusive_or_expression | inclusive_or_expression BAR exclusive_or_expression {} logical_and_expression: | inclusive_or_expression | logical_and_expression ANDAND inclusive_or_expression {} logical_or_expression: | logical_and_expression | logical_or_expression BARBAR logical_and_expression {} conditional_expression: | logical_or_expression | logical_or_expression QUESTION expression COLON conditional_expression {} assignment_expression: | conditional_expression | unary_expression assignment_operator assignment_expression {} assignment_operator: | EQ | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN | ADD_ASSIGN | SUB_ASSIGN | LEFT_ASSIGN | RIGHT_ASSIGN | AND_ASSIGN | XOR_ASSIGN | OR_ASSIGN {} expression: | assignment_expression | expression COMMA assignment_expression {} constant_expression: | conditional_expression {} (* We separate two kinds of declarations: the typedef declaration and the normal declarations. This makes possible to distinguish /in the grammar/ whether a declaration should add a typename or a varname in the context. There is an other difference between [init_declarator_list] and [typedef_declarator_list]: the later cannot contain an initialization (this is an error to initialize a typedef). *) declaration: | declaration_specifiers init_declarator_list? SEMICOLON | declaration_specifiers_typedef typedef_declarator_list? SEMICOLON {} init_declarator_list: | init_declarator | init_declarator_list COMMA init_declarator {} init_declarator: | declare_varname(declarator_noattrend) save_context attribute_specifier_list | declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ c_initializer {} typedef_declarator_list: | typedef_declarator | typedef_declarator_list COMMA typedef_declarator {} typedef_declarator: | declare_typename(declarator) {} storage_class_specifier_no_typedef: | EXTERN | STATIC | AUTO | REGISTER {} (* [declaration_specifiers_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: | storage_class_specifier_no_typedef declaration_specifiers_no_type? | type_qualifier declaration_specifiers_no_type? | function_specifier declaration_specifiers_no_type? {} (* [declaration_specifiers_no_typedef_name] matches declaration specifiers that contain neither "typedef" nor a typedef name (i.e. type specifier declared using a previous "typedef keyword"). *) declaration_specifiers_no_typedef_name: | declaration_specifiers_no_typedef_name? storage_class_specifier_no_typedef | declaration_specifiers_no_typedef_name? type_qualifier | declaration_specifiers_no_typedef_name? function_specifier | declaration_specifiers_no_typedef_name? type_specifier_no_typedef_name {} (* [declaration_specifiers] makes sure one type specifier is given, and, if a typedef_name is given, then no other type specifier is given. This is a weaker condition than 6.7.2 2. It is necessary to enforce this in the grammar to disambiguate the example in 6.7.7 6: typedef signed int t; struct tag { unsigned t:4; const t:5; }; The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: | declaration_specifiers_no_type? i = typedef_name declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} (* This matches declaration_specifiers that do contains once the "typedef" keyword. To avoid conflicts, we also encode the constraint described in the comment for [declaration_specifiers]. *) declaration_specifiers_typedef: | declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? i = typedef_name declaration_specifiers_no_type? | declaration_specifiers_no_type? i = typedef_name declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name? {} (* A type specifier which is not a typedef name. *) type_specifier_no_typedef_name: | VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | UNDERSCORE_BOOL | struct_or_union_specifier | enum_specifier {} struct_or_union_specifier: | struct_or_union attribute_specifier_list LBRACE struct_declaration_list RBRACE {} | struct_or_union attribute_specifier_list i = general_identifier LBRACE struct_declaration_list RBRACE | struct_or_union attribute_specifier_list i = general_identifier { set_id_type i OtherId } | struct_or_union attribute_specifier_list LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($3) $endpos } | struct_or_union attribute_specifier_list general_identifier LBRACE struct_declaration_list error { unclosed "{" "}" $startpos($4) $endpos } struct_or_union: | STRUCT | UNION {} struct_declaration_list: | (* empty *) | struct_declaration_list struct_declaration {} struct_declaration: | specifier_qualifier_list struct_declarator_list? SEMICOLON {} (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: | type_qualifier_list? i = typedef_name type_qualifier_list? { set_id_type i TypedefId } | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} specifier_qualifier_list_no_typedef_name: | type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? | type_qualifier specifier_qualifier_list_no_typedef_name? {} struct_declarator_list: | struct_declarator | struct_declarator_list COMMA struct_declarator {} struct_declarator: | declarator | declarator? COLON constant_expression {} enum_specifier: | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? RBRACE {} | ENUM attribute_specifier_list i = general_identifier LBRACE enumerator_list COMMA? RBRACE | ENUM attribute_specifier_list i = general_identifier { set_id_type i OtherId } | ENUM attribute_specifier_list LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($3) $endpos } | ENUM attribute_specifier_list general_identifier LBRACE enumerator_list COMMA? error { unclosed "{" "}" $startpos($4) $endpos } enumerator_list: | declare_varname(enumerator) | enumerator_list COMMA declare_varname(enumerator) {} enumerator: | i = enumeration_constant | i = enumeration_constant EQ constant_expression { (i, ()) } enumeration_constant: | i = general_identifier { set_id_type i VarId; i } %inline type_qualifier: | CONST | RESTRICT | VOLATILE | attribute_specifier {} attribute_specifier_list: | /* empty */ | attribute_specifier attribute_specifier_list {} attribute_specifier: | ATTRIBUTE LPAREN LPAREN gcc_attribute_list RPAREN RPAREN | PACKED LPAREN argument_expression_list RPAREN (* TODO: slove conflict *) (* | PACKED *) | ALIGNAS LPAREN argument_expression_list RPAREN | ALIGNAS LPAREN type_name RPAREN {} gcc_attribute_list: | gcc_attribute | gcc_attribute_list COMMA gcc_attribute {} gcc_attribute: | /* empty */ | gcc_attribute_word | gcc_attribute_word LPAREN argument_expression_list? RPAREN {} | gcc_attribute_word LPAREN i = typedef_name COMMA argument_expression_list RPAREN (* This is to emulate GCC's attribute syntax : we make this identifier a var name identifier, so that the parser will see it as a variable reference *) { set_id_type i VarId } gcc_attribute_word: | i = general_identifier { set_id_type i OtherId } | CONST | PACKED {} function_specifier: | INLINE {} (* We add this non-terminal here to force the resolution of the conflict at the point of shifting the TYPEDEF_NAME. If we had already shifted it, reduce/reduce conflict appear, and menhir is not able to solve them. *) low_prec : %prec lowPrec1 {} declarator_identifier: | PRE_NAME low_prec i = TYPEDEF_NAME | PRE_NAME i = VAR_NAME { i } (* The semantic action returned by [declarator] is a pair of the identifier being defined and a value containing the context stack that has to be restored if entering the body of the function being defined, if so. *) declarator: | x = declarator_noattrend attribute_specifier_list { x } declarator_noattrend: | x = direct_declarator { x } | pointer x = direct_declarator { match snd x with | Decl_ident -> (fst x, Decl_other) | _ -> x } direct_declarator: | i = declarator_identifier { set_id_type i VarId; (i, Decl_ident) } | LPAREN save_context x = declarator RPAREN { x } | x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK { match snd x with | Decl_ident -> (fst x, Decl_other) | _ -> x } | x = direct_declarator LPAREN ctx = context_parameter_type_list RPAREN { match snd x with | Decl_ident -> (fst x, Decl_fun ctx) | _ -> x } | x = direct_declarator LPAREN save_context il=identifier_list? RPAREN { match snd x, il with | Decl_ident, Some il -> (fst x, Decl_krfun il) | Decl_ident, None -> (fst x, Decl_krfun []) | _ -> x } pointer: | STAR type_qualifier_list? | STAR type_qualifier_list? pointer {} type_qualifier_list: | type_qualifier_list? type_qualifier {} context_parameter_type_list: | ctx1 = save_context parameter_type_list ctx2 = save_context { ctx1 (); ctx2 } parameter_type_list: | parameter_list | parameter_list COMMA ELLIPSIS {} parameter_list: | parameter_declaration | parameter_list COMMA parameter_declaration {} parameter_declaration: | declaration_specifiers declare_varname(declarator) | declaration_specifiers abstract_declarator? {} type_name: | specifier_qualifier_list abstract_declarator? {} abstract_declarator: | pointer | pointer? direct_abstract_declarator {} direct_abstract_declarator: | LPAREN save_context abstract_declarator RPAREN | direct_abstract_declarator? LBRACK type_qualifier_list? assignment_expression? RBRACK | direct_abstract_declarator? LPAREN context_parameter_type_list RPAREN {} c_initializer: | assignment_expression | LBRACE initializer_list COMMA? RBRACE {} | LBRACE initializer_list COMMA? error { unclosed "{" "}" $startpos($1) $endpos } initializer_list: | designation? c_initializer | initializer_list COMMA designation? c_initializer {} designation: | designator_list EQ {} designator_list: | designator_list? designator {} designator: | LBRACK constant_expression RBRACK {} | DOT i = general_identifier { set_id_type i OtherId } statement: | labeled_statement | compound_statement | expression_statement | selection_statement | iteration_statement | jump_statement | asm_statement {} labeled_statement: | i = general_identifier COLON statement { set_id_type i OtherId } | CASE constant_expression COLON statement | DEFAULT COLON statement {} compound_statement: | ctx = save_context LBRACE block_item_list? RBRACE { ctx() } | ctx = save_context LBRACE block_item_list? error { ctx(); unclosed "{" "}" $startpos($2) $endpos } block_item_list: | block_item_list? block_item {} block_item: | declaration | statement | PRAGMA {} expression_statement: | expression? SEMICOLON {} jump_statement: | GOTO i = general_identifier SEMICOLON { set_id_type i OtherId } | CONTINUE SEMICOLON | BREAK SEMICOLON | RETURN expression? SEMICOLON {} asm_statement: | ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON {} ifelse_statement1: | IF LPAREN expression RPAREN ctx = save_context statement ELSE { ctx() } selection_statement: | ctx = save_context ifelse_statement1 statement | ctx = save_context IF LPAREN expression RPAREN save_context statement %prec lowPrec2 | ctx = save_context SWITCH LPAREN expression RPAREN statement { ctx() } do_statement1: | ctx = save_context DO statement { ctx () } iteration_statement: | ctx = save_context WHILE LPAREN expression RPAREN statement | ctx = save_context FOR LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN statement | ctx = save_context FOR LPAREN declaration expression? SEMICOLON expression? RPAREN statement | ctx = save_context do_statement1 WHILE LPAREN expression RPAREN SEMICOLON { ctx() } asm_attributes: | /* empty */ | CONST asm_attributes | VOLATILE asm_attributes {} asm_arguments: | /* empty */ | COLON asm_operands | COLON asm_operands COLON asm_operands | COLON asm_operands COLON asm_operands COLON asm_flags {} asm_operands: | /* empty */ | asm_operands_ne {} asm_operands_ne: | asm_operands_ne COMMA asm_operand | asm_operand {} asm_operand: | asm_op_name string_literals_list LPAREN expression RPAREN {} asm_op_name: | /*empty*/ {} | LBRACK i = general_identifier RBRACK { set_id_type i OtherId } asm_flags: | string_literals_list | string_literals_list COMMA asm_flags {} translation_unit_file: | translation_unit EOF | EOF {} | error { syntax_error $endpos } translation_unit: | external_declaration | translation_unit external_declaration | translation_unit SEMICOLON | SEMICOLON {} external_declaration: | function_definition | declaration | PRAGMA {} identifier_list: | x = var_name { [x] } | l = identifier_list COMMA x = var_name { x::l } declaration_list: | declaration | declaration_list declaration {} function_definition1: | declaration_specifiers func = declare_varname(declarator_noattrend) save_context attribute_specifier_list ctx = save_context | declaration_specifiers func = declare_varname(declarator_noattrend) ctx = save_context declaration_list { begin match snd func with | Decl_fun ctx -> ctx (); declare_varname (fst func) | Decl_krfun il -> List.iter declare_varname il | _ -> () end; ctx } function_definition: | ctx = function_definition1 compound_statement { ctx () }