From e18d267e6912e18462472687abc014a3d04b9a37 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Thu, 8 Oct 2015 17:27:31 +0200 Subject: other, simpler fix: the lexer emits 2 tokens for each identifier --- cparser/Lexer.mll | 80 +++++------ cparser/pre_parser.mly | 339 +++++++++++++++++++--------------------------- cparser/pre_parser_aux.ml | 19 +-- test/regression/parsing.c | 17 +++ 4 files changed, 198 insertions(+), 257 deletions(-) diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index 5cfe74fd..139bb5aa 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -20,13 +20,12 @@ open Pre_parser_aux open Cabshelper open Camlcoq -module SMap = Map.Make(String) +module SSet = Set.Make(String) -let contexts_stk : (Cabs.cabsloc -> token) SMap.t list ref = ref [] +let lexicon : (string, Cabs.cabsloc -> token) Hashtbl.t = Hashtbl.create 17 -let init_ctx = - List.fold_left (fun ctx (key, builder) -> SMap.add key builder ctx) - SMap.empty +let () = + List.iter (fun (key, builder) -> Hashtbl.add lexicon key builder) [ ("_Alignas", fun loc -> ALIGNAS loc); ("_Alignof", fun loc -> ALIGNOF loc); ("_Bool", fun loc -> UNDERSCORE_BOOL loc); @@ -82,38 +81,24 @@ let init_ctx = ("unsigned", fun loc -> UNSIGNED loc); ("void", fun loc -> VOID loc); ("volatile", fun loc -> VOLATILE loc); - ("while", fun loc -> WHILE loc); - (let id = "__builtin_va_list" in - id, fun loc -> TYPEDEF_NAME (id, ref TypedefId, loc))] + ("while", fun loc -> WHILE loc)] + +let init_ctx = SSet.singleton "__builtin_va_list" +let types_context : SSet.t ref = ref init_ctx let _ = (* See comments in pre_parser_aux.ml *) - open_context := begin fun () -> - contexts_stk := List.hd !contexts_stk::!contexts_stk - end; - - close_context := begin fun () -> - contexts_stk := List.tl !contexts_stk - end; - - save_contexts_stk := begin fun () -> - let save = !contexts_stk in - fun () -> contexts_stk := save + save_context := begin fun () -> + let save = !types_context in + fun () -> types_context := save end; declare_varname := begin fun id -> - match !contexts_stk with - (* This is the default, so there is no need to have an entry in this case. *) - | ctx::stk -> contexts_stk := SMap.remove id ctx::stk - | [] -> assert false + types_context := SSet.remove id !types_context end; declare_typename := begin fun id -> - match !contexts_stk with - | ctx::stk -> - contexts_stk := - SMap.add id (fun loc -> TYPEDEF_NAME (id, ref TypedefId, loc)) ctx::stk - | [] -> assert false + types_context := SSet.add id !types_context end let init filename channel : Lexing.lexbuf = @@ -340,8 +325,8 @@ rule initial = parse | "," { COMMA(currentLoc lexbuf) } | "." { DOT(currentLoc lexbuf) } | identifier as id { - try SMap.find id (List.hd !contexts_stk) (currentLoc lexbuf) - with Not_found -> VAR_NAME (id, ref VarId, currentLoc lexbuf) } + try Hashtbl.find lexicon id (currentLoc lexbuf) + with Not_found -> PRE_NAME id } | eof { EOF } | _ as c { fatal_error lexbuf "invalid symbol %C" c } @@ -440,21 +425,35 @@ and singleline_comment = parse let tokens_stream filename channel : token coq_Stream = let tokens = Queue.create () in + let curr_id = ref None in let lexer_wraper lexbuf : Pre_parser.token = - let res = - if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then - initial_linebegin lexbuf - else - initial lexbuf - in - Queue.push res tokens; - res + match !curr_id with + | Some id -> + curr_id := None; + let loc = currentLoc lexbuf in + let res = + if SSet.mem id !types_context then TYPEDEF_NAME (id, ref TypedefId, loc) + else VAR_NAME (id, ref VarId, loc) + in + Queue.push res tokens; + res + | None -> + let res = + if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then + initial_linebegin lexbuf + else + initial lexbuf + in + begin match res with + | PRE_NAME id -> curr_id := Some id + | _ -> Queue.push res tokens + end; + res in let lexbuf = Lexing.from_channel channel in lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1}; - contexts_stk := [init_ctx]; + types_context := init_ctx; Pre_parser.translation_unit_file lexer_wraper lexbuf; - assert (List.length !contexts_stk = 1); let rec compute_token_stream () = let loop t v = Cons (Coq_existT (t, Obj.magic v), Lazy.from_fun compute_token_stream) @@ -574,6 +573,7 @@ and singleline_comment = parse | ATTRIBUTE loc -> loop ATTRIBUTE't loc | ASM loc -> loop ASM't loc | PRAGMA (s, loc) -> loop PRAGMA't (s, loc) + | PRE_NAME _ -> assert false in Lazy.from_fun compute_token_stream diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index e73cc22a..eacd59c8 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -37,6 +37,7 @@ %} +%token PRE_NAME %token VAR_NAME TYPEDEF_NAME %token CONSTANT @@ -55,19 +56,27 @@ %token EOF -(* These precedences declarations solve the conflict in the following declaration : +(* These precedence declarations solve the conflict in the following + declaration : int f(int (a)); - when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11. + when a is a TYPEDEF_NAME. It is specified by 6.7.5.3 11: a should + be taken as the type of parameter the anonymous function +*) +%nonassoc lowPrec1 +%nonassoc TYPEDEF_NAME +(* These precedence declaration solve the dangling else conflict. *) +%nonassoc lowPrec2 +%nonassoc ELSE + +(* WARNING: These precedence declarations tend to silently solve other conflicts. So, if you change the grammar (especially or statements), you should check that without these declarations, it - has ONLY ONE CONFLICT. + has ONLY 3 CONFLICTS. *) -%nonassoc TYPEDEF_NAME -%nonassoc highPrec %start translation_unit_file %% @@ -84,37 +93,42 @@ | x = X { fst x } -general_identifier: -| i = VAR_NAME -| i = TYPEDEF_NAME - { i } +(* The kind of an identifier should not be determined when looking + ahead, because the context may not be up to date. For this reason, + when reading an identifier, the lexer emits two tokens: the first + one (PRE_NAME) is eaten as a lookahead token, the second one is the + actual identifier. +*) -string_literals_list: -| STRING_LITERAL -| string_literals_list STRING_LITERAL - {} +typedef_name: +| PRE_NAME i = TYPEDEF_NAME + { i } -(* WARNING : because of the lookahead token, the context might be - opened or closed one token after the position of this non-terminal ! +var_name: +| PRE_NAME i = VAR_NAME + { i } - Opening too late is not dangerous for us, because this does not - change the token stream. However, we have to make sure the - lookahead token present just after closing/declaring/restoring is - not an identifier. An easy way to check that is to look at the - follow set of the non-terminal in question. The follow sets are - given by menhir with option -lg 3. *) +general_identifier: +| i = typedef_name +| i = var_name + { i } -%inline nop: (* empty *) { } +(* We add this non-terminal here to force the resolution of the + conflict at the point of shifting the TYPEDEF_NAME. If we had + already shifted it, reduce/reduce conflict appear, and menhir is + not able to solve them. *) +low_prec : %prec lowPrec1 {} +general_identifier_red: +| PRE_NAME low_prec i = TYPEDEF_NAME +| PRE_NAME i = VAR_NAME + { i } -open_context: - (* empty *)%prec highPrec { !open_context () } -close_context: - (* empty *) { !close_context () } -in_context(nt): - open_context x = nt close_context { x } +string_literals_list: +| string_literals_list? STRING_LITERAL + {} -save_contexts_stk: - (* empty *) { !save_contexts_stk () } +save_context: + (* empty *) { !save_context () } declare_varname(nt): i = nt { declare_varname i; i } @@ -124,7 +138,7 @@ declare_typename(nt): (* Actual grammar *) primary_expression: -| i = VAR_NAME +| i = var_name { set_id_type i VarId } | CONSTANT | string_literals_list @@ -320,9 +334,9 @@ storage_class_specifier_no_typedef: (* [declaration_specifiers_no_type] matches declaration specifiers that do not contain either "typedef" nor type specifiers. *) declaration_specifiers_no_type: -| storage_class_specifier_no_typedef declaration_specifiers_no_type? -| type_qualifier declaration_specifiers_no_type? -| function_specifier declaration_specifiers_no_type? +| declaration_specifiers_no_type? storage_class_specifier_no_typedef +| declaration_specifiers_no_type? type_qualifier +| declaration_specifiers_no_type? function_specifier {} (* [declaration_specifiers_no_typedef_name] matches declaration @@ -330,10 +344,10 @@ declaration_specifiers_no_type: (i.e. type specifier declared using a previous "typedef keyword"). *) declaration_specifiers_no_typedef_name: -| storage_class_specifier_no_typedef declaration_specifiers_no_typedef_name? -| type_qualifier declaration_specifiers_no_typedef_name? -| function_specifier declaration_specifiers_no_typedef_name? -| type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? +| declaration_specifiers_no_typedef_name? storage_class_specifier_no_typedef +| declaration_specifiers_no_typedef_name? type_qualifier +| declaration_specifiers_no_typedef_name? function_specifier +| declaration_specifiers_no_typedef_name? type_specifier_no_typedef_name {} (* [declaration_specifiers_no_type] matches declaration_specifiers @@ -353,7 +367,7 @@ declaration_specifiers_no_typedef_name: The first field is a named t, while the second is unnamed of type t. *) declaration_specifiers: -| declaration_specifiers_no_type? i = TYPEDEF_NAME declaration_specifiers_no_type? +| declaration_specifiers_no_type? i = typedef_name declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? type_specifier_no_typedef_name declaration_specifiers_no_typedef_name? {} @@ -364,9 +378,9 @@ declaration_specifiers: declaration_specifiers_typedef: | declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? + i = typedef_name declaration_specifiers_no_type? | declaration_specifiers_no_type? - i = TYPEDEF_NAME declaration_specifiers_no_type? + i = typedef_name declaration_specifiers_no_type? TYPEDEF declaration_specifiers_no_type? { set_id_type i TypedefId } | declaration_specifiers_no_type? @@ -421,7 +435,7 @@ struct_declaration: (* As in the standard, except it also encodes the constraint described in the comment above [declaration_specifiers]. *) specifier_qualifier_list: -| type_qualifier_list? i = TYPEDEF_NAME type_qualifier_list? +| type_qualifier_list? i = typedef_name type_qualifier_list? { set_id_type i TypedefId } | type_qualifier_list? type_specifier_no_typedef_name specifier_qualifier_list_no_typedef_name? {} @@ -497,7 +511,7 @@ gcc_attribute: | gcc_attribute_word | gcc_attribute_word LPAREN argument_expression_list? RPAREN {} -| gcc_attribute_word LPAREN i = TYPEDEF_NAME COMMA argument_expression_list RPAREN +| gcc_attribute_word LPAREN i = typedef_name COMMA argument_expression_list RPAREN (* This is to emulate GCC's attribute syntax : we make this identifier a var name identifier, so that the parser will see it as a variable reference *) @@ -523,16 +537,14 @@ declarator: { x } direct_declarator: -| i = general_identifier +| i = general_identifier_red { set_id_type i VarId; (i, None) } -| LPAREN x = declarator RPAREN +| LPAREN save_context x = declarator RPAREN | x = direct_declarator LBRACK type_qualifier_list? assignment_expression? RBRACK { x } -| x = direct_declarator LPAREN - open_context parameter_type_list? restore_fun = save_contexts_stk - close_context RPAREN +| x = direct_declarator LPAREN ctx = context_parameter_type_list RPAREN { match snd x with - | None -> (fst x, Some restore_fun) + | None -> (fst x, Some ctx) | Some _ -> x } pointer: @@ -544,6 +556,10 @@ type_qualifier_list: | type_qualifier_list? type_qualifier {} +context_parameter_type_list: +| ctx1 = save_context parameter_type_list? ctx2 = save_context + { ctx1 (); ctx2 } + parameter_type_list: | l=parameter_list | l=parameter_list COMMA ELLIPSIS @@ -571,9 +587,9 @@ abstract_declarator: {} direct_abstract_declarator: -| LPAREN abstract_declarator RPAREN +| LPAREN save_context abstract_declarator RPAREN | direct_abstract_declarator? LBRACK type_qualifier_list? assignment_expression? RBRACK -| direct_abstract_declarator? LPAREN in_context(parameter_type_list?) RPAREN +| direct_abstract_declarator? LPAREN context_parameter_type_list RPAREN {} c_initializer: @@ -602,67 +618,28 @@ designator: | DOT i = general_identifier { set_id_type i OtherId } -(* The grammar of statements is replicated three times. - - [statement_finish_close] should close the current context just - before its last token. - - [statement_finish_noclose] should not close the current context. It - should modify it only if this modification actually changes the - context of the current block. - - [statement_intern_close] is like [statement_finish_close], except - it cannot reduce to a single-branch if statement. -*) +statement: +| labeled_statement +| compound_statement +| expression_statement +| selection_statement +| iteration_statement +| jump_statement +| asm_statement + {} -statement_finish_close: -| labeled_statement(statement_finish_close) -| compound_statement(nop) -| expression_statement(close_context) -| selection_statement_finish(nop) -| iteration_statement(nop,statement_finish_close) -| jump_statement(close_context) -| asm_statement(close_context) - {} - -statement_finish_noclose: -| labeled_statement(statement_finish_noclose) -| compound_statement(open_context) -| expression_statement(nop) -| selection_statement_finish(open_context) -| iteration_statement(open_context,statement_finish_close) -| jump_statement(nop) -| asm_statement(nop) - {} - -statement_intern_close: -| labeled_statement(statement_intern_close) -| compound_statement(nop) -| expression_statement(close_context) -| selection_statement_intern_close -| iteration_statement(nop,statement_intern_close) -| jump_statement(close_context) -| asm_statement(close_context) - {} - -(* [labeled_statement(last_statement)] has the same effect on contexts - as [last_statement]. *) -labeled_statement(last_statement): -| i = general_identifier COLON last_statement +labeled_statement: +| i = general_identifier COLON statement { set_id_type i OtherId } -| CASE constant_expression COLON last_statement -| DEFAULT COLON last_statement +| CASE constant_expression COLON statement +| DEFAULT COLON statement {} -(* [compound_statement] uses a local context and closes it before its - last token. It uses [openc] to open this local context if needed. - That is, if a local context has already been opened, [openc] = [nop], - otherwise, [openc] = [open_context]. *) -compound_statement(openc): -| LBRACE openc block_item_list? close_context RBRACE - {} -| LBRACE openc block_item_list? close_context error - { unclosed "{" "}" $startpos($1) $endpos } +compound_statement: +| ctx = save_context LBRACE block_item_list? RBRACE + { ctx() } +| ctx = save_context LBRACE block_item_list? error + { ctx(); unclosed "{" "}" $startpos($2) $endpos } block_item_list: | block_item_list? block_item @@ -670,95 +647,46 @@ block_item_list: block_item: | declaration -| statement_finish_noclose +| statement | PRAGMA {} -(* [expression_statement], [jump_statement] and [asm_statement] close - the local context if needed, depending of the close parameter. If - there is no local context, [close] = [nop]. Otherwise, - [close] = [close_context]. *) -expression_statement(close): -| expression? close SEMICOLON +expression_statement: +| expression? SEMICOLON {} -jump_statement(close): -| GOTO i = general_identifier close SEMICOLON +jump_statement: +| GOTO i = general_identifier SEMICOLON { set_id_type i OtherId } -| CONTINUE close SEMICOLON -| BREAK close SEMICOLON -| RETURN expression? close SEMICOLON - {} - -asm_statement(close): -| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN close SEMICOLON - {} - -(* [selection_statement_finish] and [selection_statement_intern] use a - local context and close it before their last token. - - [selection_statement_finish(openc)] uses [openc] to open this local - context if needed. That is, if a local context has already been - opened, [openc] = [nop], otherwise, [openc] = [open_context]. - - [selection_statement_intern_close] is always called with a local - context openned. It closes it before its last token. *) - -(* It should be noted that the token [ELSE] should be lookaheaded - /outside/ of the local context because if the lookaheaded token is - not [ELSE], then this is the end of the statement. - - This is especially important to parse correctly the following - example: - - typedef int a; - - int f() { - for(int a; ;) - if(1); - a * x; - } - - However, if the lookahead token is [ELSE], we should parse the - second branch in the same context as the first branch, so we have - to reopen the previously closed context. This is the reason for the - save/restore system. -*) - -if_else_statement_begin(openc): -| IF openc LPAREN expression RPAREN restore_fun = save_contexts_stk - statement_intern_close - { restore_fun () } - -selection_statement_finish(openc): -| IF openc LPAREN expression RPAREN save_contexts_stk statement_finish_close -| if_else_statement_begin(openc) ELSE statement_finish_close -| SWITCH openc LPAREN expression RPAREN statement_finish_close +| CONTINUE SEMICOLON +| BREAK SEMICOLON +| RETURN expression? SEMICOLON {} -selection_statement_intern_close: -| if_else_statement_begin(nop) ELSE statement_intern_close -| SWITCH LPAREN expression RPAREN statement_intern_close +asm_statement: +| ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON {} -(* [iteration_statement] uses a local context and closes it before - their last token. +ifelse_statement1: +| IF LPAREN expression RPAREN ctx = save_context statement ELSE + { ctx() } - [iteration_statement] uses [openc] to open this local context if - needed. That is, if a local context has already been opened, - [openc] = [nop], otherwise, [openc] = [open_context]. +selection_statement: +| ctx = save_context ifelse_statement1 statement +| ctx = save_context IF LPAREN expression RPAREN save_context statement %prec lowPrec2 +| ctx = save_context SWITCH LPAREN expression RPAREN statement + { ctx() } - [last_statement] is either [statement_intern_close] or - [statement_finish_close]. That is, it should /always/ close the - local context. *) +do_statement1: +| ctx = save_context DO statement + { ctx () } -iteration_statement(openc,last_statement): -| WHILE openc LPAREN expression RPAREN last_statement -| DO open_context statement_finish_close WHILE - openc LPAREN expression RPAREN close_context SEMICOLON -| FOR openc LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN last_statement -| FOR openc LPAREN declaration expression? SEMICOLON expression? RPAREN last_statement - {} +iteration_statement: +| ctx = save_context WHILE LPAREN expression RPAREN statement +| ctx = save_context FOR LPAREN expression? SEMICOLON expression? SEMICOLON expression? RPAREN statement +| ctx = save_context FOR LPAREN declaration expression? SEMICOLON expression? RPAREN statement +| ctx = save_context do_statement1 WHILE LPAREN expression RPAREN SEMICOLON + { ctx() } asm_attributes: | /* empty */ @@ -816,35 +744,40 @@ external_declaration: | PRAGMA {} -function_definition_begin: -| declaration_specifiers pointer? x=direct_declarator +identifier_list: +| id = var_name + { [id] } +| idl = identifier_list COMMA id = var_name + { id :: idl } + +declaration_list: +| /*empty*/ + { } +| declaration_list declaration + { } + +function_definition1: +| declaration_specifiers pointer? x=direct_declarator ctx = save_context { match x with | (_, None) -> $syntaxerror - | (i, Some restore_fun) -> restore_fun () + | (_, Some ctx') -> ctx'(); ctx } | declaration_specifiers pointer? x=direct_declarator - LPAREN params=identifier_list RPAREN open_context declaration_list + LPAREN save_context params=identifier_list RPAREN ctx = save_context declaration_list { match x with | (_, Some _) -> $syntaxerror | (i, None) -> declare_varname i; - List.iter declare_varname params + List.iter declare_varname params; + ctx } -identifier_list: -| id = VAR_NAME - { [id] } -| idl = identifier_list COMMA id = VAR_NAME - { id :: idl } - -declaration_list: -| /*empty*/ - { } -| declaration_list declaration - { } +function_definition2: +| ctx = function_definition1 LBRACE block_item_list? + { ctx() } +| ctx = function_definition1 LBRACE block_item_list? error + { unclosed "{" "}" $startpos($2) $endpos } function_definition: -| function_definition_begin LBRACE block_item_list? close_context RBRACE +| function_definition2 RBRACE { } -| function_definition_begin LBRACE block_item_list? close_context error - { unclosed "{" "}" $startpos($2) $endpos } diff --git a/cparser/pre_parser_aux.ml b/cparser/pre_parser_aux.ml index c6b48608..717396a7 100644 --- a/cparser/pre_parser_aux.ml +++ b/cparser/pre_parser_aux.ml @@ -18,20 +18,11 @@ type identifier_type = | TypedefId | OtherId -(* These functions push and pop a context on the contexts stack. *) -let open_context:(unit -> unit) ref = ref (fun () -> assert false) -let close_context:(unit -> unit) ref = ref (fun () -> assert false) +(* Applying once this functions saves the current context stack, and + applying it the second time restores it. *) +let save_context:(unit -> (unit -> unit)) ref = ref (fun _ -> assert false) -(* Applying once this functions saves the whole contexts stack, and - applying it the second time restores it. - - This is mainly used to rollback the context stack to a previous - state. This is usefull for example when we pop too much contexts at - the end of the first branch of an if statement. See - pre_parser.mly. *) -let save_contexts_stk:(unit -> (unit -> unit)) ref = ref (fun _ -> assert false) - -(* Change the context at the top of the top stack of context, by - changing an identifier to be a varname or a typename*) +(* Change the context by changing an identifier to be a varname or a + typename *) let declare_varname:(string -> unit) ref = ref (fun _ -> assert false) let declare_typename:(string -> unit) ref = ref (fun _ -> assert false) diff --git a/test/regression/parsing.c b/test/regression/parsing.c index 24b954c1..8687d6aa 100644 --- a/test/regression/parsing.c +++ b/test/regression/parsing.c @@ -7,6 +7,12 @@ T f(T a(T)) { T b; return 1; } + +T f1(T(x)); +T f1(T x) { + return x; +} + int g(int x) { T:; T y; @@ -96,6 +102,17 @@ void m() { else printf("ERROR m\n"); } +int j() { + T T; +} + +int k() { + { T T; } + T t; + for(T T; ; ); + T u; +} + int main () { f(g); i(); -- cgit From b960c83725d7e185ac5c6e3c0d6043c7dcd2f556 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Sun, 1 Nov 2015 22:32:23 +0100 Subject: Better handling of old-style K&R function declarations: - Added a Cabs.PROTO_OLD constructor to Cabs.decl_type - Refactored the Parser.vy and pre_parser.mly grammars - Rewritten the conversion of old function definitions to new-style --- cparser/Cabs.v | 4 +- cparser/Cabshelper.ml | 10 +- cparser/Elab.ml | 196 +++++++++++++++++++++------------------- cparser/Parser.vy | 146 ++++++++++++++++-------------- cparser/pre_parser.mly | 178 ++++++++++++++++++------------------ test/regression/Results/parsing | 10 ++ test/regression/parsing.c | 62 ++++++++++++- 7 files changed, 345 insertions(+), 261 deletions(-) diff --git a/cparser/Cabs.v b/cparser/Cabs.v index 6d9e95d5..ab53a3a8 100644 --- a/cparser/Cabs.v +++ b/cparser/Cabs.v @@ -81,6 +81,7 @@ with decl_type := | PTR : list cvspec -> decl_type -> decl_type (* The bool is true for variable length parameters. *) | PROTO : decl_type -> list parameter * bool -> decl_type + | PROTO_OLD : decl_type -> list string -> decl_type with parameter := | PARAM : list spec_elem -> option string -> decl_type -> list attribute -> cabsloc -> parameter @@ -190,8 +191,7 @@ Definition asm_flag := (bool * list char_code)%type. ** Declaration definition (at toplevel) *) Inductive definition := - | FUNDEF : list spec_elem -> name -> statement -> cabsloc -> definition - | KRFUNDEF : list spec_elem -> name -> list string -> list definition -> statement -> cabsloc -> definition + | FUNDEF : list spec_elem -> name -> list definition -> statement -> cabsloc -> definition | DECDEF : init_name_group -> cabsloc -> definition (* global variable(s), or function prototype *) | PRAGMA : string -> cabsloc -> definition diff --git a/cparser/Cabshelper.ml b/cparser/Cabshelper.ml index 5e6a19d0..b3782ba8 100644 --- a/cparser/Cabshelper.ml +++ b/cparser/Cabshelper.ml @@ -46,8 +46,7 @@ let rec isTypedef = function let get_definitionloc (d : definition) : cabsloc = match d with - | FUNDEF(_, _, _, l) -> l - | KRFUNDEF(_, _, _, _, _, l) -> l + | FUNDEF(_, _, _, _, l) -> l | DECDEF(_, l) -> l | PRAGMA(_, l) -> l @@ -78,10 +77,3 @@ let string_of_cabsloc l = let format_cabsloc pp l = Format.fprintf pp "%s:%d" l.filename l.lineno - -let rec append_decltype dt1 dt2 = - match dt1 with - | JUSTBASE -> dt2 - | ARRAY(dt, attr, sz) -> ARRAY(append_decltype dt dt2, attr, sz) - | PTR(attr, dt) -> PTR(attr, append_decltype dt dt2) - | PROTO(dt, params) -> PROTO(append_decltype dt dt2, params) diff --git a/cparser/Elab.ml b/cparser/Elab.ml index 4d3d1d02..d078cdac 100644 --- a/cparser/Elab.ml +++ b/cparser/Elab.ml @@ -518,9 +518,9 @@ and elab_cvspecs env cv_specs = (* Elaboration of a type declarator. C99 section 6.7.5. *) -and elab_type_declarator loc env ty = function +and elab_type_declarator loc env ty kr_ok = function | Cabs.JUSTBASE -> - (ty, env) + ((ty, None), env) | Cabs.ARRAY(d, cv_specs, sz) -> let a = elab_cvspecs env cv_specs in let sz' = @@ -536,32 +536,41 @@ and elab_type_declarator loc env ty = function | None -> error loc "array size is not a compile-time constant"; Some 1L in (* produces better error messages later *) - elab_type_declarator loc env (TArray(ty, sz', a)) d + elab_type_declarator loc env (TArray(ty, sz', a)) kr_ok d | Cabs.PTR(cv_specs, d) -> let a = elab_cvspecs env cv_specs in - elab_type_declarator loc env (TPtr(ty, a)) d + elab_type_declarator loc env (TPtr(ty, a)) kr_ok d | Cabs.PROTO(d, (params, vararg)) -> - begin match unroll env ty with - | TArray _ | TFun _ -> - error loc "illegal function return type@ %a" Cprint.typ ty - | _ -> () - end; - let params' = elab_parameters env params in - elab_type_declarator loc env (TFun(ty, params', vararg, [])) d + begin match unroll env ty with + | TArray _ | TFun _ -> + error loc "Illegal function return type@ %a" Cprint.typ ty + | _ -> () + end; + let params' = elab_parameters env params in + elab_type_declarator loc env (TFun(ty, Some params', vararg, [])) kr_ok d + | Cabs.PROTO_OLD(d, params) -> + begin match unroll env ty with + | TArray _ | TFun _ -> + error loc "Illegal function return type@ %a" Cprint.typ ty + | _ -> () + end; + match params with + | [] -> + elab_type_declarator loc env (TFun(ty, None, false, [])) kr_ok d + | _ -> + if not kr_ok || d <> Cabs.JUSTBASE then + fatal_error loc "Illegal old-style K&R function definition"; + ((TFun(ty, None, false, []), Some params), env) (* Elaboration of parameters in a prototype *) and elab_parameters env params = - match params with - | [] -> (* old-style K&R prototype *) - None - | _ -> - (* Prototype introduces a new scope *) - let (vars, _) = mmap elab_parameter (Env.new_scope env) params in - (* Catch special case f(t) where t is void or a typedef to void *) - match vars with - | [ ( {name=""}, t) ] when is_void_type env t -> Some [] - | _ -> Some vars + (* Prototype introduces a new scope *) + let (vars, _) = mmap elab_parameter (Env.new_scope env) params in + (* Catch special case f(t) where t is void or a typedef to void *) + match vars with + | [ ( {name=""}, t) ] when is_void_type env t -> [] + | _ -> vars (* Elaboration of a function parameter *) @@ -569,7 +578,7 @@ and elab_parameter env (PARAM (spec, id, decl, attr, loc)) = let (sto, inl, tydef, bty, env1) = elab_specifier loc env spec in if tydef then error loc "'typedef' used in function parameter"; - let (ty, env2) = elab_type_declarator loc env1 bty decl in + let ((ty, _), env2) = elab_type_declarator loc env1 bty false decl in let ty = add_attributes_type (elab_attributes env attr) ty in if sto <> Storage_default && sto <> Storage_register then error loc @@ -586,13 +595,13 @@ and elab_parameter env (PARAM (spec, id, decl, attr, loc)) = (* Elaboration of a (specifier, Cabs "name") pair *) -and elab_name env spec (Name (id, decl, attr, loc)) = +and elab_fundef_name env spec (Name (id, decl, attr, loc)) = let (sto, inl, tydef, bty, env') = elab_specifier loc env spec in if tydef then error loc "'typedef' is forbidden here"; - let (ty, env'') = elab_type_declarator loc env' bty decl in + let ((ty, kr_params), env'') = elab_type_declarator loc env' bty true decl in let a = elab_attributes env attr in - (id, sto, inl, add_attributes_type a ty, env'') + (id, sto, inl, add_attributes_type a ty, kr_params, env'') (* Elaboration of a name group. C99 section 6.7.6 *) @@ -604,8 +613,8 @@ and elab_name_group loc env (spec, namelist) = if inl then error loc "'inline' is forbidden here"; let elab_one_name env (Name (id, decl, attr, loc)) = - let (ty, env1) = - elab_type_declarator loc env bty decl in + let ((ty, _), env1) = + elab_type_declarator loc env bty false decl in let a = elab_attributes env attr in ((id, add_attributes_type a ty), env1) in (mmap elab_one_name env' namelist, sto) @@ -616,8 +625,8 @@ and elab_init_name_group loc env (spec, namelist) = let (sto, inl, tydef, bty, env') = elab_specifier ~only:(namelist=[]) loc env spec in let elab_one_name env (Init_name (Name (id, decl, attr, loc), init)) = - let (ty, env1) = - elab_type_declarator loc env bty decl in + let ((ty, _), env1) = + elab_type_declarator loc env bty false decl in let a = elab_attributes env attr in if inl && not (is_function_type env ty) then error loc "'inline' can only appear on functions"; @@ -818,7 +827,7 @@ and elab_enum only loc tag optmembers attrs env = let elab_type loc env spec decl = let (sto, inl, tydef, bty, env') = elab_specifier loc env spec in - let (ty, env'') = elab_type_declarator loc env' bty decl in + let ((ty, _), env'') = elab_type_declarator loc env' bty false decl in if sto <> Storage_default || inl || tydef then error loc "'typedef', 'extern', 'static', 'register' and 'inline' are meaningless in cast"; (ty, env'') @@ -1737,8 +1746,8 @@ let elab_expr loc env a = match args, params with | [], [] -> [] | [], _::_ -> err "not enough arguments in function call"; [] - | _::_, [] -> - if vararg + | _::_, [] -> + if vararg then args else (err "too many arguments in function call"; args) | arg1 :: argl, (_, ty_p) :: paraml -> @@ -1881,20 +1890,70 @@ let enter_decdefs local loc env sto dl = let (decls, env') = List.fold_left enter_decdef ([], env) dl in (List.rev decls, env') -let elab_fundef env spec name body loc = - let (s, sto, inline, ty, env1) = elab_name env spec name in +let elab_fundef env spec name defs body loc = + let (s, sto, inline, ty, kr_params, env1) = elab_fundef_name env spec name in if sto = Storage_register then - fatal_error loc "a function definition cannot have 'register' storage class"; - (* Fix up the type. We can have params = None but only for an - old-style parameterless function "int f() {...}" *) - let ty = - match ty with - | TFun(ty_ret, None, vararg, attr) -> TFun(ty_ret, Some [], vararg, attr) - | _ -> ty in + fatal_error loc "A function definition cannot have 'register' storage class"; + begin match kr_params, defs with + | None, d::_ -> + error (get_definitionloc d) + "Old-style parameter declaration in a new-style function definition" + | _ -> () + end; + let (ty, env1) = + match ty, kr_params with + | TFun(ty_ret, None, vararg, attr), None -> + (TFun(ty_ret, Some [], vararg, attr), env1) + | ty, None -> + (ty, env1) + | TFun(ty_ret, None, false, attr), Some params -> + warning loc "Non-prototype, pre-standard function definition.@ Converting to prototype form"; + (* Check that the parameters have unique names *) + List.iter (fun id -> + if List.length (List.filter ((=) id) params) > 1 then + fatal_error loc "Parameter '%s' appears more than once in function declaration" id) + params; + (* Check that the declarations only declare parameters *) + let extract_name (Init_name(Name(s, dty, attrs, loc') as name, ie)) = + if not (List.mem s params) then + error loc' "Declaration of '%s' which is not a function parameter" s; + if ie <> NO_INIT then + error loc' "Illegal initialization of function parameter '%s'" s; + name + in + (* Convert old-style K&R function definition to modern prototyped form *) + let elab_kr_param_def env = function + | DECDEF((spec', name_init_list), loc') -> + let name_list = List.map extract_name name_init_list in + let (paramsenv, sto) = elab_name_group loc' env (spec', name_list) in + if sto <> Storage_default && sto <> Storage_register then + error loc' "'extern' or 'static' storage not supported for function parameter"; + paramsenv + | d -> + (* Should never be produced by the parser *) + fatal_error (get_definitionloc d) + "Illegal declaration of function parameter" in + let (kr_params_defs, env1) = mmap elab_kr_param_def env1 defs in + let kr_params_defs = List.concat kr_params_defs in + let rec search_param_type param = + match List.filter (fun (p, _) -> p = param) kr_params_defs with + | [] -> + (* Parameter is not declared, defaults to "int" in ISO C90, + is an error in ISO C99. Just emit a warning. *) + warning loc "Type of '%s' defaults to 'int'" param; + (Env.fresh_ident param, TInt (IInt, [])) + | (_,ty)::q -> + if q <> [] then error loc "Parameter '%s' defined more than once" param; + (Env.fresh_ident param, argument_conversion env1 ty) + in + let params' = List.map search_param_type params in + (TFun(ty_ret, Some params', false, attr), env1) + | _, Some params -> assert false + in (* Extract info from type *) let (ty_ret, params, vararg, attr) = match ty with - | TFun(ty_ret, Some params, vararg, attr) -> + | TFun(ty_ret, Some params, vararg, attr) -> if wrap incomplete_type loc env1 ty_ret && not (is_void_type env ty_ret) then fatal_error loc "return type is an incomplete type"; (ty_ret, params, vararg, attr) @@ -1938,66 +1997,19 @@ let elab_fundef env spec name body loc = emit_elab env1 loc (Gfundef fn); env1 -let elab_kr_fundef env spec name params defs body loc = - warning loc "Non-prototype, pre-standard function definition.@ Converting to prototype form"; - (* Check that the declarations only declare parameters *) - let check_one_decl (Init_name(Name(s, dty, attrs, loc'), ie)) = - if not (List.mem s params) then - error loc' "Declaration of '%s' which is not a function parameter" s; - if ie <> NO_INIT then - error loc' "Illegal initialization of function parameter '%s'" s in - let check_decl = function - | DECDEF((spec', name_init_list), loc') -> - List.iter check_one_decl name_init_list - | d -> - (* Should never be produced by the parser *) - fatal_error (get_definitionloc d) - "Illegal declaration of function parameter" in - List.iter check_decl defs; - (* Convert old-style K&R function definition to modern prototyped form *) - let rec convert_param param = function - | [] -> - (* Parameter is not declared, defaults to "int" in ISO C90, - is an error in ISO C99. Just emit a warning. *) - warning loc "Type of '%s' defaults to 'int'" param; - PARAM([SpecType Tint], Some param, JUSTBASE, [], loc) - | DECDEF((spec', name_init_list), loc') :: defs -> - let rec convert = function - | [] -> convert_param param defs - | Init_name(Name(s, dty, attrs, loc''), ie) :: l -> - if s = param - then PARAM(spec', Some param, dty, attrs, loc'') - else convert l - in convert name_init_list - | _ -> - assert false (* checked earlier *) in - let params' = - List.map (fun p -> convert_param p defs) params in - let name' = - let (Name(s, dty, attr, loc')) = name in - Name(s, append_decltype dty (PROTO(JUSTBASE, (params', false))), - attr, loc') in - (* Elaborate the prototyped form *) - elab_fundef env spec name' body loc - let rec elab_definition (local: bool) (env: Env.t) (def: Cabs.definition) : decl list * Env.t = match def with (* "int f(int x) { ... }" *) - | FUNDEF(spec, name, body, loc) -> - if local then error loc "local definition of a function"; - let env1 = elab_fundef env spec name body loc in - ([], env1) - (* "int f(x, y) double y; { ... }" *) - | KRFUNDEF(spec, name, params, defs, body, loc) -> + | FUNDEF(spec, name, defs, body, loc) -> if local then error loc "local definition of a function"; - let env1 = elab_kr_fundef env spec name params defs body loc in + let env1 = elab_fundef env spec name defs body loc in ([], env1) (* "int x = 12, y[10], *z" *) | DECDEF(init_name_group, loc) -> - let ((dl, env1), sto, tydef) = + let ((dl, env1), sto, tydef) = elab_init_name_group loc env init_name_group in if tydef then let env2 = enter_typedefs loc env1 sto dl diff --git a/cparser/Parser.vy b/cparser/Parser.vy index 7c0bfb55..16f6a0ef 100644 --- a/cparser/Parser.vy +++ b/cparser/Parser.vy @@ -52,6 +52,7 @@ Require Import List. %type argument_expression_list %type declaration %type declaration_specifiers +%type declaration_specifiers_typespec_opt %type init_declarator_list %type init_declarator %type storage_class_specifier @@ -65,9 +66,9 @@ Require Import List. %type enumerator_list %type enumerator %type enumeration_constant -%type type_qualifier +%type type_qualifier type_qualifier_noattr %type function_specifier -%type declarator direct_declarator +%type declarator declarator_noattrend direct_declarator %type<(decl_type -> decl_type) * cabsloc> pointer %type type_qualifier_list %type parameter_type_list @@ -95,7 +96,6 @@ Require Import List. %type gcc_attribute %type gcc_attribute_list %type gcc_attribute_word -%type old_function_declarator direct_old_function_declarator %type identifier_list %type asm_flags %type