aboutsummaryrefslogtreecommitdiffstats
path: root/cparser/pre_parser.mly
diff options
context:
space:
mode:
Diffstat (limited to 'cparser/pre_parser.mly')
-rw-r--r--cparser/pre_parser.mly449
1 files changed, 197 insertions, 252 deletions
diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly
index 25e7a745..4d7ad40a 100644
--- a/cparser/pre_parser.mly
+++ b/cparser/pre_parser.mly
@@ -14,6 +14,13 @@
/* */
/* *********************************************************************/
+(*
+ WARNING: The precedence declarations tend to silently solve
+ conflicts. So, if you change the grammar (especially for
+ statements), you should check that without these declarations, it
+ has ONLY 2 CONFLICTS in 2 STATES.
+*)
+
%{
open Pre_parser_aux
@@ -26,8 +33,15 @@
let declare_typename (i,_,_) =
!declare_typename i
+ type 'id fun_declarator_ctx =
+ | Decl_ident
+ | Decl_other
+ | Decl_fun of (unit -> unit)
+ | Decl_krfun of 'id
+
%}
+%token<string> PRE_NAME
%token<string * Pre_parser_aux.identifier_type ref * Cabs.cabsloc>
VAR_NAME TYPEDEF_NAME
%token<Cabs.constant * Cabs.cabsloc> CONSTANT
@@ -46,19 +60,20 @@
%token EOF
-(* These precedences declarations solve the conflict in the following declaration :
+(* These precedence declarations solve the conflict in the following
+ declaration:
int f(int (a));
- when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11.
-
- WARNING: These precedence declarations tend to silently solve other
- conflicts. So, if you change the grammar (especially or
- statements), you should check that without these declarations, it
- has ONLY ONE CONFLICT.
+ when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: a should
+ be taken as the type of parameter the anonymous function
*)
+%nonassoc lowPrec1
%nonassoc TYPEDEF_NAME
-%nonassoc highPrec
+
+(* These precedence declaration solve the dangling else conflict. *)
+%nonassoc lowPrec2
+%nonassoc ELSE
%start<unit> translation_unit_file
@@ -90,8 +105,8 @@
expression
attribute_specifier_list
declarator
- statement_finish_close
- iteration_statement(nop,statement_finish_close)
+ declarator_noattrend
+ selection_statement
enum_specifier
struct_or_union_specifier
specifier_qualifier_list(struct_declaration)
@@ -102,6 +117,7 @@
asm_flags
asm_operands
init_declarator
+ rlist(declaration_specifier_no_type)
%%
@@ -152,16 +168,32 @@ optional(X, Y):
| list(X) X {}
list(X):
- xs = ilist(X)
- { xs }
+| ilist(X) {}
-%inline fst(X):
-| x = X
- { fst x }
+(* [rlist(X)] is right-recursive non-empty list. *)
+
+rlist(X):
+| X {}
+| X rlist(X) {}
+
+(* The kind of an identifier should not be determined when looking
+ ahead, because the context may not be up to date. For this reason,
+ when reading an identifier, the lexer emits two tokens: the first
+ one (PRE_NAME) is eaten as a lookahead token, the second one is the
+ actual identifier.
+*)
+
+typedef_name:
+| PRE_NAME i = TYPEDEF_NAME
+ { i }
+
+%inline var_name:
+| PRE_NAME i = VAR_NAME
+ { i }
general_identifier:
-| i = VAR_NAME
-| i = TYPEDEF_NAME
+| i = typedef_name
+| i = var_name
{ i }
(* [other_identifier] is equivalent to [general_identifier], but adds
@@ -178,32 +210,13 @@ string_literals_list:
| string_literals_list STRING_LITERAL
{}
-(* WARNING : because of the lookahead token, the context might be
- opened or closed one token after the position of this non-terminal !
-
- Opening too late is not dangerous for us, because this does not
- change the token stream. However, we have to make sure the
- lookahead token present just after closing/declaring/restoring is
- not an identifier. An easy way to check that is to look at the
- follow set of the non-terminal in question. The follow sets are
- given by menhir with option -lg 3. *)
-
-%inline nop: (* empty *) {}
-
-open_context:
- (* empty *)%prec highPrec { !open_context () }
-close_context:
- (* empty *) { !close_context () }
-in_context(nt):
- open_context x = nt close_context { x }
-
-save_contexts_stk:
- (* empty *) { !save_contexts_stk () }
+save_context:
+ (* empty *) { !save_context () }
declare_varname(nt):
- i = nt { declare_varname i; i }
+ i = nt { declare_varname (fst i); i }
declare_typename(nt):
- i = nt { declare_typename i; i }
+ i = nt { declare_typename (fst i); i }
(* A note about phantom parameters. The definition of a non-terminal symbol
[nt] is sometimes parameterized with a parameter that is unused in the
@@ -216,7 +229,7 @@ declare_typename(nt):
states have been duplicated. In these states, more information about the
context is available, which allows better syntax error messages to be
given.
-
+
By convention, a formal phantom parameter is named [phantom], so as to be
easily recognizable. For clarity, we usually explicitly document which
actual values it can take. *)
@@ -224,7 +237,7 @@ declare_typename(nt):
(* Actual grammar *)
primary_expression:
-| VAR_NAME
+| var_name
| CONSTANT
| string_literals_list
| LPAREN expression RPAREN
@@ -392,8 +405,8 @@ init_declarator_list:
{}
init_declarator:
-| declare_varname(fst(declarator))
-| declare_varname(fst(declarator)) EQ c_initializer
+| declare_varname(declarator_noattrend) save_context attribute_specifier_list
+| declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ c_initializer
{}
typedef_declarator_list:
@@ -402,7 +415,7 @@ typedef_declarator_list:
{}
typedef_declarator:
-| declare_typename(fst(declarator))
+| declare_typename(declarator)
{}
storage_class_specifier_no_typedef:
@@ -414,10 +427,11 @@ storage_class_specifier_no_typedef:
(* [declaration_specifier_no_type] matches declaration specifiers
that do not contain either "typedef" nor type specifiers. *)
-declaration_specifier_no_type:
+%inline declaration_specifier_no_type:
| storage_class_specifier_no_typedef
-| type_qualifier
+| type_qualifier_noattr
| function_specifier
+| attribute_specifier
{}
(* [declaration_specifier_no_typedef_name] matches declaration
@@ -431,10 +445,8 @@ declaration_specifier_no_typedef_name:
| type_specifier_no_typedef_name
{}
-(* [declaration_specifiers_no_type] matches declaration_specifiers
- that do not contains "typedef". Moreover, it makes sure that it
- contains either one typename and not other type specifier or no
- typename.
+(* [declaration_specifiers] makes sure one type specifier is given, and,
+ if a typedef_name is given, then no other type specifier is given.
This is a weaker condition than 6.7.2 2. It is necessary to enforce
this in the grammar to disambiguate the example in 6.7.7 6:
@@ -452,18 +464,18 @@ declaration_specifier_no_typedef_name:
[parameter_declaration], which means that this is the beginning of a
parameter declaration. *)
declaration_specifiers(phantom):
-| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type*
-| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
+| ioption(rlist(declaration_specifier_no_type)) typedef_name declaration_specifier_no_type*
+| ioption(rlist(declaration_specifier_no_type)) type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
{}
(* This matches declaration_specifiers that do contains once the
"typedef" keyword. To avoid conflicts, we also encode the
constraint described in the comment for [declaration_specifiers]. *)
declaration_specifiers_typedef:
-| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* TYPEDEF_NAME declaration_specifier_no_type*
-| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type*
-| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
-| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name* TYPEDEF declaration_specifier_no_typedef_name*
+| ioption(rlist(declaration_specifier_no_type)) TYPEDEF declaration_specifier_no_type* typedef_name declaration_specifier_no_type*
+| ioption(rlist(declaration_specifier_no_type)) typedef_name declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type*
+| ioption(rlist(declaration_specifier_no_type)) TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
+| ioption(rlist(declaration_specifier_no_type)) type_specifier_no_typedef_name declaration_specifier_no_typedef_name* TYPEDEF declaration_specifier_no_typedef_name*
{}
(* A type specifier which is not a typedef name. *)
@@ -505,8 +517,8 @@ struct_declaration:
in the comment above [declaration_specifiers]. *)
(* The phantom parameter can be [struct_declaration] or [type_name]. *)
specifier_qualifier_list(phantom):
-| type_qualifier_list? TYPEDEF_NAME type_qualifier_list?
-| type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_no_typedef_name*
+| ioption(type_qualifier_list) typedef_name type_qualifier_list?
+| ioption(type_qualifier_list) type_specifier_no_typedef_name specifier_qualifier_no_typedef_name*
{}
specifier_qualifier_no_typedef_name:
@@ -537,29 +549,31 @@ enumerator_list:
enumerator:
| i = enumeration_constant
| i = enumeration_constant EQ constant_expression
- { i }
+ { (i, ()) }
enumeration_constant:
| i = general_identifier
{ set_id_type i VarId; i }
-type_qualifier:
+type_qualifier_noattr:
| CONST
| RESTRICT
| VOLATILE
+ {}
+
+%inline type_qualifier:
+| type_qualifier_noattr
| attribute_specifier
{}
attribute_specifier_list:
| /* empty */
-| attribute_specifier_list attribute_specifier
+| attribute_specifier attribute_specifier_list
{}
attribute_specifier:
| ATTRIBUTE LPAREN LPAREN gcc_attribute_list RPAREN RPAREN
| PACKED LPAREN argument_expression_list RPAREN
-(* TODO: slove conflict *)
-(* | PACKED *)
| ALIGNAS LPAREN argument_expression_list RPAREN
| ALIGNAS LPAREN type_name RPAREN
{}
@@ -574,7 +588,7 @@ gcc_attribute:
| gcc_attribute_word
| gcc_attribute_word LPAREN argument_expression_list? RPAREN
{}
-| gcc_attribute_word LPAREN i = TYPEDEF_NAME COMMA argument_expression_list RPAREN
+| gcc_attribute_word LPAREN i = typedef_name COMMA argument_expression_list RPAREN
(* This is to emulate GCC's attribute syntax : we make this identifier
a var name identifier, so that the parser will see it as a variable
reference *)
@@ -590,26 +604,49 @@ function_specifier:
| INLINE
{}
+(* We add this non-terminal here to force the resolution of the
+ conflict at the point of shifting the TYPEDEF_NAME. If we had
+ already shifted it, reduce/reduce conflict appear, and menhir is
+ not able to solve them. *)
+low_prec : %prec lowPrec1 {}
+declarator_identifier:
+| PRE_NAME low_prec i = TYPEDEF_NAME
+| i = var_name
+ { i }
+
(* The semantic action returned by [declarator] is a pair of the
- identifier being defined and an option of the context stack that
- has to be restored if entering the body of the function being
+ identifier being defined and a value containing the context stack
+ that has to be restored if entering the body of the function being
defined, if so. *)
declarator:
-| ilist(pointer1) x = direct_declarator attribute_specifier_list
+| x = declarator_noattrend attribute_specifier_list
+ { x }
+
+declarator_noattrend:
+| x = direct_declarator
{ x }
+| pointer x = direct_declarator
+ { match snd x with
+ | Decl_ident -> (fst x, Decl_other)
+ | _ -> x }
direct_declarator:
-| i = general_identifier
- { set_id_type i VarId; (i, None) }
-| LPAREN x = declarator RPAREN
+| i = declarator_identifier
+ { set_id_type i VarId; (i, Decl_ident) }
+| LPAREN save_context x = declarator RPAREN
| x = direct_declarator LBRACK type_qualifier_list? optional(assignment_expression, RBRACK)
- { x }
-| x = direct_declarator LPAREN
- open_context parameter_type_list? restore_fun = save_contexts_stk
- close_context RPAREN
{ match snd x with
- | None -> (fst x, Some restore_fun)
- | Some _ -> x }
+ | Decl_ident -> (fst x, Decl_other)
+ | _ -> x }
+| x = direct_declarator LPAREN ctx = context_parameter_type_list RPAREN
+ { match snd x with
+ | Decl_ident -> (fst x, Decl_fun ctx)
+ | _ -> x }
+| x = direct_declarator LPAREN save_context il=identifier_list? RPAREN
+ { match snd x, il with
+ | Decl_ident, Some il -> (fst x, Decl_krfun il)
+ | Decl_ident, None -> (fst x, Decl_krfun [])
+ | _ -> x }
(* The C standard defines [pointer] as a right-recursive list. We prefer to
define it as a left-recursive list, because this provides better static
@@ -622,8 +659,7 @@ direct_declarator:
as [pointer1* pointer1].
When the C standard writes [pointer?], which represents a possibly empty
- list of [pointer1]'s, we write [pointer1*] or [ilist(pointer1)]. The two
- are equivalent, as long as there is no conflict. *)
+ list of [pointer1]'s, we write [pointer1*]. *)
%inline pointer1:
STAR type_qualifier_list?
@@ -637,22 +673,24 @@ type_qualifier_list:
| type_qualifier_list? type_qualifier
{}
+context_parameter_type_list:
+| ctx1 = save_context parameter_type_list ctx2 = save_context
+ { ctx1 (); ctx2 }
+
parameter_type_list:
-| l=parameter_list
-| l=parameter_list COMMA ELLIPSIS
- { l }
+| parameter_list
+| parameter_list COMMA ELLIPSIS
+ {}
parameter_list:
-| i=parameter_declaration
- { [i] }
-| l=parameter_list COMMA i=parameter_declaration
- { i::l }
+| parameter_declaration
+| parameter_list COMMA parameter_declaration
+ {}
parameter_declaration:
-| declaration_specifiers(parameter_declaration) id=declare_varname(fst(declarator))
- { Some id }
+| declaration_specifiers(parameter_declaration) declare_varname(declarator)
| declaration_specifiers(parameter_declaration) abstract_declarator(parameter_declaration)?
- { None }
+ {}
type_name:
| specifier_qualifier_list(type_name) abstract_declarator(type_name)?
@@ -664,13 +702,13 @@ type_name:
is permitted (and we do not wish to keep track of why it is permitted). *)
abstract_declarator(phantom):
| pointer
-| ilist(pointer1) direct_abstract_declarator
+| ioption(pointer) direct_abstract_declarator
{}
direct_abstract_declarator:
-| LPAREN abstract_declarator(type_name) RPAREN
+| LPAREN save_context abstract_declarator(type_name) RPAREN
| direct_abstract_declarator? LBRACK type_qualifier_list? optional(assignment_expression, RBRACK)
-| ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN
+| ioption(direct_abstract_declarator) LPAREN context_parameter_type_list? RPAREN
{}
c_initializer:
@@ -696,64 +734,25 @@ designator:
| DOT other_identifier
{}
-(* The grammar of statements is replicated three times.
-
- [statement_finish_close] should close the current context just
- before its last token.
-
- [statement_finish_noclose] should not close the current context. It
- should modify it only if this modification actually changes the
- context of the current block.
-
- [statement_intern_close] is like [statement_finish_close], except
- it cannot reduce to a single-branch if statement.
-*)
-
-statement_finish_close:
-| labeled_statement(statement_finish_close)
-| compound_statement(nop)
-| expression_statement(close_context)
-| selection_statement_finish(nop)
-| iteration_statement(nop,statement_finish_close)
-| jump_statement(close_context)
-| asm_statement(close_context)
- {}
-
-statement_finish_noclose:
-| labeled_statement(statement_finish_noclose)
-| compound_statement(open_context)
-| expression_statement(nop)
-| selection_statement_finish(open_context)
-| iteration_statement(open_context,statement_finish_close)
-| jump_statement(nop)
-| asm_statement(nop)
+statement:
+| labeled_statement
+| compound_statement
+| expression_statement
+| selection_statement
+| iteration_statement
+| jump_statement
+| asm_statement
{}
-statement_intern_close:
-| labeled_statement(statement_intern_close)
-| compound_statement(nop)
-| expression_statement(close_context)
-| selection_statement_intern_close
-| iteration_statement(nop,statement_intern_close)
-| jump_statement(close_context)
-| asm_statement(close_context)
+labeled_statement:
+| other_identifier COLON statement
+| CASE constant_expression COLON statement
+| DEFAULT COLON statement
{}
-(* [labeled_statement(last_statement)] has the same effect on contexts
- as [last_statement]. *)
-labeled_statement(last_statement):
-| other_identifier COLON last_statement
-| CASE constant_expression COLON last_statement
-| DEFAULT COLON last_statement
- {}
-
-(* [compound_statement] uses a local context and closes it before its
- last token. It uses [openc] to open this local context if needed.
- That is, if a local context has already been opened, [openc] = [nop],
- otherwise, [openc] = [open_context]. *)
-compound_statement(openc):
-| LBRACE openc block_item_list? close_context RBRACE
- {}
+compound_statement:
+| ctx = save_context LBRACE block_item_list? RBRACE
+ { ctx() }
block_item_list:
| block_item_list? block_item
@@ -761,93 +760,44 @@ block_item_list:
block_item:
| declaration(block_item)
-| statement_finish_noclose
+| statement
| PRAGMA
{}
-(* [expression_statement], [jump_statement] and [asm_statement] close
- the local context if needed, depending of the close parameter. If
- there is no local context, [close] = [nop]. Otherwise,
- [close] = [close_context]. *)
-expression_statement(close):
-| expression? close SEMICOLON
+expression_statement:
+| ioption(expression) SEMICOLON
{}
-jump_statement(close):
-| GOTO other_identifier close SEMICOLON
-| CONTINUE close SEMICOLON
-| BREAK close SEMICOLON
-| RETURN expression? close SEMICOLON
+jump_statement:
+| GOTO other_identifier SEMICOLON
+| CONTINUE SEMICOLON
+| BREAK SEMICOLON
+| RETURN ioption(expression) SEMICOLON
{}
-asm_statement(close):
-| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN close SEMICOLON
+asm_statement:
+| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON
{}
-(* [selection_statement_finish] and [selection_statement_intern] use a
- local context and close it before their last token.
-
- [selection_statement_finish(openc)] uses [openc] to open this local
- context if needed. That is, if a local context has already been
- opened, [openc] = [nop], otherwise, [openc] = [open_context].
-
- [selection_statement_intern_close] is always called with a local
- context openned. It closes it before its last token. *)
-
-(* It should be noted that the token [ELSE] should be lookaheaded
- /outside/ of the local context because if the lookaheaded token is
- not [ELSE], then this is the end of the statement.
-
- This is especially important to parse correctly the following
- example:
+ifelse_statement1:
+| IF LPAREN expression RPAREN ctx = save_context statement ELSE
+ { ctx() }
- typedef int a;
+selection_statement:
+| ctx = save_context ifelse_statement1 statement
+| ctx = save_context IF LPAREN expression RPAREN save_context statement %prec lowPrec2
+| ctx = save_context SWITCH LPAREN expression RPAREN statement
+ { ctx() }
- int f() {
- for(int a; ;)
- if(1);
- a * x;
- }
+do_statement1:
+| ctx = save_context DO statement
+ { ctx () }
- However, if the lookahead token is [ELSE], we should parse the
- second branch in the same context as the first branch, so we have
- to reopen the previously closed context. This is the reason for the
- save/restore system.
-*)
-
-if_else_statement_begin(openc):
-| IF openc LPAREN expression RPAREN restore_fun = save_contexts_stk
- statement_intern_close
- { restore_fun () }
-
-selection_statement_finish(openc):
-| IF openc LPAREN expression RPAREN save_contexts_stk statement_finish_close
-| if_else_statement_begin(openc) ELSE statement_finish_close
-| SWITCH openc LPAREN expression RPAREN statement_finish_close
- {}
-
-selection_statement_intern_close:
-| if_else_statement_begin(nop) ELSE statement_intern_close
-| SWITCH LPAREN expression RPAREN statement_intern_close
- {}
-
-(* [iteration_statement] uses a local context and closes it before
- their last token.
-
- [iteration_statement] uses [openc] to open this local context if
- needed. That is, if a local context has already been opened,
- [openc] = [nop], otherwise, [openc] = [open_context].
-
- [last_statement] is either [statement_intern_close] or
- [statement_finish_close]. That is, it should /always/ close the
- local context. *)
-
-iteration_statement(openc,last_statement):
-| WHILE openc LPAREN expression RPAREN last_statement
-| DO open_context statement_finish_close WHILE
- openc LPAREN expression RPAREN close_context SEMICOLON
-| FOR openc LPAREN for_statement_header optional(expression, SEMICOLON) optional(expression, RPAREN) last_statement
- {}
+iteration_statement:
+| ctx = save_context WHILE LPAREN expression RPAREN statement
+| ctx = save_context do_statement1 WHILE LPAREN expression RPAREN SEMICOLON
+| ctx = save_context FOR LPAREN for_statement_header optional(expression, SEMICOLON) optional(expression, RPAREN) statement
+ { ctx() }
for_statement_header:
| optional(expression, SEMICOLON)
@@ -909,40 +859,35 @@ external_declaration:
| PRAGMA
{}
-function_definition_begin:
-| declaration_specifiers(declaration(external_declaration))
- ilist(pointer1) x=direct_declarator
- { match x with
- | (_, None) -> !open_context()
- (* this case does not make sense, but we let it pass anyway;
- this error will be caught later on by a type check *)
- | (i, Some restore_fun) -> restore_fun ()
- }
-| declaration_specifiers(declaration(external_declaration))
- ilist(pointer1) x=direct_declarator
- LPAREN params=identifier_list RPAREN open_context declaration_list
- { match x with
- | (i, Some _) -> declare_varname i
- (* this case does not make sense; the syntax of K&R
- declarators is broken anyway, Jacques-Henri should
- propose a fix soon *)
- | (i, None) ->
- declare_varname i;
- List.iter declare_varname params
- }
-
identifier_list:
-| id = VAR_NAME
- { [id] }
-| idl = identifier_list COMMA id = VAR_NAME
- { id :: idl }
+| x = var_name
+ { [x] }
+| l = identifier_list COMMA x = var_name
+ { x::l }
-declaration_list:
-| /*empty*/
+kr_param_declaration:
+| declaration_specifiers(declaration(block_item)) init_declarator_list? SEMICOLON
{}
-| declaration_list declaration(block_item)
+
+declaration_list:
+| kr_param_declaration
+| declaration_list kr_param_declaration
{}
+function_definition1:
+| declaration_specifiers(declaration(external_declaration))
+ func = declare_varname(declarator_noattrend)
+ save_context attribute_specifier_list ctx = save_context
+| declaration_specifiers(declaration(external_declaration))
+ func = declare_varname(declarator_noattrend)
+ ctx = save_context declaration_list
+ { begin match snd func with
+ | Decl_fun ctx -> ctx (); declare_varname (fst func)
+ | Decl_krfun il -> List.iter declare_varname il
+ | _ -> ()
+ end;
+ ctx }
+
function_definition:
-| function_definition_begin LBRACE block_item_list? close_context RBRACE
- {}
+| ctx = function_definition1 compound_statement
+ { ctx () }