aboutsummaryrefslogtreecommitdiffstats
path: root/cparser/pre_parser.mly
diff options
context:
space:
mode:
Diffstat (limited to 'cparser/pre_parser.mly')
-rw-r--r--cparser/pre_parser.mly252
1 files changed, 165 insertions, 87 deletions
diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly
index 52a94078..25e7a745 100644
--- a/cparser/pre_parser.mly
+++ b/cparser/pre_parser.mly
@@ -3,6 +3,7 @@
/* The Compcert verified compiler */
/* */
/* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt */
+/* François Pottier, INRIA Paris-Rocquencourt */
/* */
/* Copyright Institut National de Recherche en Informatique et en */
/* Automatique. All rights reserved. This file is distributed */
@@ -25,16 +26,6 @@
let declare_typename (i,_,_) =
!declare_typename i
- let syntax_error pos =
- Cerrors.fatal_error "%s:%d: syntax error"
- pos.Lexing.pos_fname pos.Lexing.pos_lnum
-
- let unclosed opening closing pos1 pos2 =
- Cerrors.info "%s:%d: syntax error: expecting '%s'"
- pos2.Lexing.pos_fname pos2.Lexing.pos_lnum closing;
- Cerrors.fatal_error "%s:%d: this is the location of the unclosed '%s'"
- pos1.Lexing.pos_fname pos1.Lexing.pos_lnum opening
-
%}
%token<string * Pre_parser_aux.identifier_type ref * Cabs.cabsloc>
@@ -70,6 +61,48 @@
%nonassoc highPrec
%start<unit> translation_unit_file
+
+(* The following declarations cause certain nonterminal symbols to be
+ reduced when an error is detected. This replaces error actions in
+ the automaton with reduction actions. So, if the input is correct,
+ this makes no difference, and if the input is incorrect, this only
+ forces a few more reductions to take place before the error is
+ detected and reported. If used properly, this facilitates error
+ reports. *)
+
+%on_error_reduce
+ primary_expression
+ postfix_expression
+ unary_expression
+ cast_expression
+ multiplicative_expression
+ additive_expression
+ shift_expression
+ relational_expression
+ equality_expression
+ and_expression
+ exclusive_or_expression
+ inclusive_or_expression
+ logical_and_expression
+ logical_or_expression
+ conditional_expression
+ assignment_expression
+ expression
+ attribute_specifier_list
+ declarator
+ statement_finish_close
+ iteration_statement(nop,statement_finish_close)
+ enum_specifier
+ struct_or_union_specifier
+ specifier_qualifier_list(struct_declaration)
+ specifier_qualifier_list(type_name)
+ option(abstract_declarator(type_name))
+ abstract_declarator(type_name)
+ abstract_declarator(parameter_declaration)
+ asm_flags
+ asm_operands
+ init_declarator
+
%%
(* Helpers *)
@@ -78,12 +111,6 @@
so this definition of [option] is actually used, even though the
word [option] does not appear in the rest of this file. *)
-option(X):
-| /* nothing */
- { None }
-| x = X
- { Some x }
-
(* [ioption(X)] is equivalent to [option(X)], but is marked [%inline],
so its definition is expanded. In the absence of conflicts, the two
are equivalent. Using [ioption] instead of [option] in well-chosen
@@ -91,12 +118,20 @@ option(X):
of [ioption] in well-chosen places can help reduce the number of
states of the automaton. *)
+(* Defining the non-%inline version in terms of the %inline version is
+ a standard idiom. It obviates the need to duplicate the definition.
+ The same idiom is used elsewhere below. *)
+
%inline ioption(X):
| /* nothing */
{ None }
| x = X
{ Some x }
+option(X):
+ o = ioption(X)
+ { o }
+
(* [optional(X, Y)] is equivalent to [X? Y]. However, by inlining
the two possibilies -- either [X Y] or just [Y] -- we are able
to give more meaningful syntax error messages. [optional(X, Y)]
@@ -105,6 +140,21 @@ option(X):
optional(X, Y):
ioption(X) Y {}
+(* This is a standard left-recursive, possibly empty list, without
+ separators. Note that, by convention, [X*] is syntactic sugar for
+ [list(X)]. *)
+
+(* [ilist(X)] is equivalent to [list(X)], but is marked [%inline],
+ so its definition is expanded (only one level deep, of course). *)
+
+%inline ilist(X):
+| (* empty *) {}
+| list(X) X {}
+
+list(X):
+ xs = ilist(X)
+ { xs }
+
%inline fst(X):
| x = X
{ fst x }
@@ -155,6 +205,22 @@ declare_varname(nt):
declare_typename(nt):
i = nt { declare_typename i; i }
+(* A note about phantom parameters. The definition of a non-terminal symbol
+ [nt] is sometimes parameterized with a parameter that is unused in the
+ right-hand side. This parameter disappears when macro-expansion takes
+ place. Thus, the presence of this parameter does not influence the language
+ that is accepted by the parser. Yet, it carries information about the
+ context, since different call sites can supply different values of this
+ parameter. This forces the creation of two (or more) identical copies of
+ the definition of [nt], which leads to a larger automaton, where some
+ states have been duplicated. In these states, more information about the
+ context is available, which allows better syntax error messages to be
+ given.
+
+ By convention, a formal phantom parameter is named [phantom], so as to be
+ easily recognizable. For clarity, we usually explicitly document which
+ actual values it can take. *)
+
(* Actual grammar *)
primary_expression:
@@ -163,30 +229,18 @@ primary_expression:
| string_literals_list
| LPAREN expression RPAREN
{}
-| LPAREN expression error
- { unclosed "(" ")" $startpos($1) $endpos }
postfix_expression:
| primary_expression
| postfix_expression LBRACK expression RBRACK
| postfix_expression LPAREN argument_expression_list? RPAREN
- {}
-| postfix_expression LPAREN argument_expression_list? error
- { unclosed "(" ")" $startpos($2) $endpos }
| BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name RPAREN
- {}
-| BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name error
- { unclosed "(" ")" $startpos($2) $endpos }
| postfix_expression DOT other_identifier
| postfix_expression PTR other_identifier
| postfix_expression INC
| postfix_expression DEC
| LPAREN type_name RPAREN LBRACE initializer_list COMMA? RBRACE
{}
-| LPAREN type_name error
- { unclosed "(" ")" $startpos($1) $endpos }
-| LPAREN type_name RPAREN LBRACE initializer_list COMMA? error
- { unclosed "{" "}" $startpos($4) $endpos }
argument_expression_list:
| assignment_expression
@@ -312,7 +366,7 @@ expression:
| expression COMMA assignment_expression
{}
-constant_expression:
+%inline constant_expression:
| conditional_expression
{}
@@ -324,9 +378,12 @@ constant_expression:
cannot contain an initialization (this is an error to initialize a
typedef). *)
-declaration:
-| declaration_specifiers init_declarator_list? SEMICOLON
-| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON
+(* The phantom parameter is either [block_item], which means we are
+ definitely reading a declaration, or [external_declaration], which
+ means we could also be reading the beginning of a function definition. *)
+declaration(phantom):
+| declaration_specifiers(declaration(phantom)) init_declarator_list? SEMICOLON
+| declaration_specifiers_typedef typedef_declarator_list? SEMICOLON
{}
init_declarator_list:
@@ -355,23 +412,23 @@ storage_class_specifier_no_typedef:
| REGISTER
{}
-(* [declaration_specifiers_no_type] matches declaration specifiers
+(* [declaration_specifier_no_type] matches declaration specifiers
that do not contain either "typedef" nor type specifiers. *)
-declaration_specifiers_no_type:
-| storage_class_specifier_no_typedef declaration_specifiers_no_type?
-| type_qualifier declaration_specifiers_no_type?
-| function_specifier declaration_specifiers_no_type?
+declaration_specifier_no_type:
+| storage_class_specifier_no_typedef
+| type_qualifier
+| function_specifier
{}
-(* [declaration_specifiers_no_typedef_name] matches declaration
+(* [declaration_specifier_no_typedef_name] matches declaration
specifiers that contain neither "typedef" nor a typedef name
(i.e. type specifier declared using a previous "typedef
keyword"). *)
-declaration_specifiers_no_typedef_name:
-| storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name?
-| type_qualifier declaration_specifiers_no_typedef_name?
-| function_specifier declaration_specifiers_no_typedef_name?
-| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name?
+declaration_specifier_no_typedef_name:
+| storage_class_specifier_no_typedef
+| type_qualifier
+| function_specifier
+| type_specifier_no_typedef_name
{}
(* [declaration_specifiers_no_type] matches declaration_specifiers
@@ -390,19 +447,23 @@ declaration_specifiers_no_typedef_name:
The first field is a named t, while the second is unnamed of type t.
*)
-declaration_specifiers:
-| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type?
-| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name?
+(* The phantom parameter is either [declaration(_)], which means that
+ this is the beginning of a declaration or a function definition, or
+ [parameter_declaration], which means that this is the beginning of a
+ parameter declaration. *)
+declaration_specifiers(phantom):
+| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type*
+| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
{}
(* This matches declaration_specifiers that do contains once the
"typedef" keyword. To avoid conflicts, we also encode the
constraint described in the comment for [declaration_specifiers]. *)
declaration_specifiers_typedef:
-| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? TYPEDEF_NAME declaration_specifiers_no_type?
-| ioption(declaration_specifiers_no_type) TYPEDEF_NAME declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type?
-| declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name?
-| declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? TYPEDEF declaration_specifiers_no_typedef_name?
+| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* TYPEDEF_NAME declaration_specifier_no_type*
+| ilist(declaration_specifier_no_type) TYPEDEF_NAME declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type*
+| declaration_specifier_no_type* TYPEDEF declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name*
+| declaration_specifier_no_type* type_specifier_no_typedef_name declaration_specifier_no_typedef_name* TYPEDEF declaration_specifier_no_typedef_name*
{}
(* A type specifier which is not a typedef name. *)
@@ -425,8 +486,6 @@ struct_or_union_specifier:
| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list RBRACE
| struct_or_union attribute_specifier_list other_identifier
{}
-| struct_or_union attribute_specifier_list other_identifier? LBRACE struct_declaration_list error
- { unclosed "{" "}" $startpos($4) $endpos }
struct_or_union:
| STRUCT
@@ -439,19 +498,20 @@ struct_declaration_list:
{}
struct_declaration:
-| specifier_qualifier_list struct_declarator_list? SEMICOLON
+| specifier_qualifier_list(struct_declaration) struct_declarator_list? SEMICOLON
{}
(* As in the standard, except it also encodes the constraint described
in the comment above [declaration_specifiers]. *)
-specifier_qualifier_list:
+(* The phantom parameter can be [struct_declaration] or [type_name]. *)
+specifier_qualifier_list(phantom):
| type_qualifier_list? TYPEDEF_NAME type_qualifier_list?
-| type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name?
+| type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_no_typedef_name*
{}
-specifier_qualifier_list_no_typedef_name:
-| type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name?
-| type_qualifier specifier_qualifier_list_no_typedef_name?
+specifier_qualifier_no_typedef_name:
+| type_specifier_no_typedef_name
+| type_qualifier
{}
struct_declarator_list:
@@ -468,8 +528,6 @@ enum_specifier:
| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? RBRACE
| ENUM attribute_specifier_list other_identifier
{}
-| ENUM attribute_specifier_list other_identifier? LBRACE enumerator_list COMMA? error
- { unclosed "{" "}" $startpos($4) $endpos }
enumerator_list:
| declare_varname(enumerator)
@@ -537,7 +595,7 @@ function_specifier:
has to be restored if entering the body of the function being
defined, if so. *)
declarator:
-| ioption(pointer) x = direct_declarator attribute_specifier_list
+| ilist(pointer1) x = direct_declarator attribute_specifier_list
{ x }
direct_declarator:
@@ -553,9 +611,26 @@ direct_declarator:
| None -> (fst x, Some restore_fun)
| Some _ -> x }
-pointer:
-| STAR type_qualifier_list?
-| STAR type_qualifier_list? pointer
+(* The C standard defines [pointer] as a right-recursive list. We prefer to
+ define it as a left-recursive list, because this provides better static
+ context (that is, this changes the automaton in such a way that it is
+ easier to give good error messages, in at least 2 states).
+
+ The non-terminal symbol [pointer1] represents one list element.
+
+ [pointer], which represents a non-empty list of [pointer1]'s, is defined
+ as [pointer1* pointer1].
+
+ When the C standard writes [pointer?], which represents a possibly empty
+ list of [pointer1]'s, we write [pointer1*] or [ilist(pointer1)]. The two
+ are equivalent, as long as there is no conflict. *)
+
+%inline pointer1:
+ STAR type_qualifier_list?
+ {}
+
+%inline pointer:
+ pointer1* pointer1
{}
type_qualifier_list:
@@ -574,22 +649,26 @@ parameter_list:
{ i::l }
parameter_declaration:
-| declaration_specifiers id=declare_varname(fst(declarator))
+| declaration_specifiers(parameter_declaration) id=declare_varname(fst(declarator))
{ Some id }
-| declaration_specifiers abstract_declarator?
+| declaration_specifiers(parameter_declaration) abstract_declarator(parameter_declaration)?
{ None }
type_name:
-| specifier_qualifier_list abstract_declarator?
+| specifier_qualifier_list(type_name) abstract_declarator(type_name)?
{}
-abstract_declarator:
+(* The phantom parameter can be [parameter_declaration] or [type_name].
+ We take the latter to mean [type_or_name] or [direct_abstract_declarator].
+ We need not distinguish these two cases: in both cases, a closing parenthesis
+ is permitted (and we do not wish to keep track of why it is permitted). *)
+abstract_declarator(phantom):
| pointer
-| ioption(pointer) direct_abstract_declarator
+| ilist(pointer1) direct_abstract_declarator
{}
direct_abstract_declarator:
-| LPAREN abstract_declarator RPAREN
+| LPAREN abstract_declarator(type_name) RPAREN
| direct_abstract_declarator? LBRACK type_qualifier_list? optional(assignment_expression, RBRACK)
| ioption(direct_abstract_declarator) LPAREN in_context(parameter_type_list?) RPAREN
{}
@@ -598,8 +677,6 @@ c_initializer:
| assignment_expression
| LBRACE initializer_list COMMA? RBRACE
{}
-| LBRACE initializer_list COMMA? error
- { unclosed "{" "}" $startpos($1) $endpos }
initializer_list:
| designation? c_initializer
@@ -677,15 +754,13 @@ labeled_statement(last_statement):
compound_statement(openc):
| LBRACE openc block_item_list? close_context RBRACE
{}
-| LBRACE openc block_item_list? close_context error
- { unclosed "{" "}" $startpos($1) $endpos }
block_item_list:
| block_item_list? block_item
{}
block_item:
-| declaration
+| declaration(block_item)
| statement_finish_noclose
| PRAGMA
{}
@@ -776,7 +851,7 @@ iteration_statement(openc,last_statement):
for_statement_header:
| optional(expression, SEMICOLON)
-| declaration
+| declaration(block_item)
{}
asm_attributes:
@@ -813,15 +888,13 @@ asm_op_name:
asm_flags:
| string_literals_list
-| string_literals_list COMMA asm_flags
+| asm_flags COMMA string_literals_list
{}
translation_unit_file:
| translation_unit EOF
| EOF
{}
-| error
- { syntax_error $endpos }
translation_unit:
| external_declaration
@@ -832,20 +905,27 @@ translation_unit:
external_declaration:
| function_definition
-| declaration
+| declaration(external_declaration)
| PRAGMA
{}
function_definition_begin:
-| declaration_specifiers ioption(pointer) x=direct_declarator
+| declaration_specifiers(declaration(external_declaration))
+ ilist(pointer1) x=direct_declarator
{ match x with
- | (_, None) -> $syntaxerror
+ | (_, None) -> !open_context()
+ (* this case does not make sense, but we let it pass anyway;
+ this error will be caught later on by a type check *)
| (i, Some restore_fun) -> restore_fun ()
}
-| declaration_specifiers ioption(pointer) x=direct_declarator
+| declaration_specifiers(declaration(external_declaration))
+ ilist(pointer1) x=direct_declarator
LPAREN params=identifier_list RPAREN open_context declaration_list
{ match x with
- | (_, Some _) -> $syntaxerror
+ | (i, Some _) -> declare_varname i
+ (* this case does not make sense; the syntax of K&R
+ declarators is broken anyway, Jacques-Henri should
+ propose a fix soon *)
| (i, None) ->
declare_varname i;
List.iter declare_varname params
@@ -860,11 +940,9 @@ identifier_list:
declaration_list:
| /*empty*/
{}
-| declaration_list declaration
+| declaration_list declaration(block_item)
{}
function_definition:
| function_definition_begin LBRACE block_item_list? close_context RBRACE
{}
-| function_definition_begin LBRACE block_item_list? close_context error
- { unclosed "{" "}" $startpos($2) $endpos }