Merge pull request #69 from jhjourdan/parser_fix

Parser : duplicate identifier tokens, fix K&R definition parsing
author: Bernhard Schommer <bschommer@users.noreply.github.com> 2015-11-12 17:35:41 +0100
committer: Bernhard Schommer <bschommer@users.noreply.github.com> 2015-11-12 17:35:41 +0100
commit: d90ba4443294b80bd940daedfdcdc3d4334fdc7c (patch)
tree: d647ca2216c342c433c35783d32e1abe8545a72c /cparser/Lexer.mll
parent: 9054efbd25eedd5627b9e6e62bf1204e5fa0ae94 (diff)
parent: 0ebefc1d145f82783829174bad1f41bb319742b4 (diff)
download: compcert-kvx-d90ba4443294b80bd940daedfdcdc3d4334fdc7c.tar.gz
compcert-kvx-d90ba4443294b80bd940daedfdcdc3d4334fdc7c.zip
1 files changed, 40 insertions, 39 deletions
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 17c4528c..6fac15e8 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -20,13 +20,12 @@ open Pre_parser_aux
 open Cabshelper
 open Camlcoq
 
-module SMap = Map.Make(String)
+module SSet = Set.Make(String)
 
-let contexts_stk : (Cabs.cabsloc -> token) SMap.t list ref = ref []
+let lexicon : (string, Cabs.cabsloc -> token) Hashtbl.t = Hashtbl.create 17
 
-let init_ctx =
-  List.fold_left (fun ctx (key, builder) -> SMap.add key builder ctx)
-    SMap.empty
+let () =
+  List.iter (fun (key, builder) -> Hashtbl.add lexicon key builder)
     [ ("_Alignas", fun loc -> ALIGNAS loc);
       ("_Alignof", fun loc -> ALIGNOF loc);
       ("_Bool", fun loc -> UNDERSCORE_BOOL loc);
@@ -82,38 +81,24 @@ let init_ctx =
       ("unsigned", fun loc -> UNSIGNED loc);
       ("void", fun loc -> VOID loc);
       ("volatile", fun loc -> VOLATILE loc);
-      ("while", fun loc -> WHILE loc);
-      (let id = "__builtin_va_list" in
-       id, fun loc -> TYPEDEF_NAME (id, ref TypedefId, loc))]
+      ("while", fun loc -> WHILE loc)]
+
+let init_ctx = SSet.singleton "__builtin_va_list"
+let types_context : SSet.t ref = ref init_ctx
 
 let _ =
   (* See comments in pre_parser_aux.ml *)
-  open_context := begin fun () ->
-    contexts_stk := List.hd !contexts_stk::!contexts_stk
-  end;
-
-  close_context := begin fun () ->
-    contexts_stk := List.tl !contexts_stk
-  end;
-
-  save_contexts_stk := begin fun () ->
-    let save = !contexts_stk in
-    fun () -> contexts_stk := save
+  save_context := begin fun () ->
+    let save = !types_context in
+    fun () -> types_context := save
   end;
 
   declare_varname := begin fun id ->
-    match !contexts_stk with
-    (* This is the default, so there is no need to have an entry in this case. *)
-    | ctx::stk -> contexts_stk := SMap.remove id ctx::stk
-    | [] -> assert false
+    types_context := SSet.remove id !types_context
   end;
 
   declare_typename := begin fun id ->
-    match !contexts_stk with
-    | ctx::stk ->
-      contexts_stk :=
-        SMap.add id (fun loc -> TYPEDEF_NAME (id, ref TypedefId, loc)) ctx::stk
-    | [] -> assert false
+    types_context := SSet.add id !types_context
   end
 
 let init filename channel : Lexing.lexbuf =
@@ -340,8 +325,8 @@ rule initial = parse
   | ","                           { COMMA(currentLoc lexbuf) }
   | "."                           { DOT(currentLoc lexbuf) }
   | identifier as id              {
-      try SMap.find id (List.hd !contexts_stk) (currentLoc lexbuf)
-      with Not_found -> VAR_NAME (id, ref VarId, currentLoc lexbuf) }
+      try Hashtbl.find lexicon id (currentLoc lexbuf)
+      with Not_found -> PRE_NAME id }
   | eof                           { EOF }
   | _ as c                        { fatal_error lexbuf "invalid symbol %C" c }
 
@@ -452,16 +437,33 @@ and singleline_comment = parse
   (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also: 1-
      records the token stream into the FIFO queue [tokens] and 2- records the
      start and end positions of the last two tokens in the two-place buffer
-     [buffer]. *)
+     [buffer] and 3- duplicates identifier tokens into PRE_NAME and
+     VAR/TYPE_NAME. *)
 
   let lexer tokens buffer : lexbuf -> Pre_parser.token =
+    let curr_id = ref None in
+    types_context := init_ctx;
     fun lexbuf ->
-      let token = lexer lexbuf in
-      Queue.push token tokens;
-      let startp = lexbuf.lex_start_p
-      and endp = lexbuf.lex_curr_p in
-      buffer := ErrorReports.update !buffer (startp, endp);
-      token
+      match !curr_id with
+      | Some id ->
+        curr_id := None;
+        let loc = currentLoc lexbuf in
+        let token =
+          if SSet.mem id !types_context then TYPEDEF_NAME (id, ref TypedefId, loc)
+          else VAR_NAME (id, ref VarId, loc)
+        in
+        Queue.push token tokens;
+        token
+      | None ->
+        let token = lexer lexbuf in
+        begin match token with
+        | PRE_NAME id -> curr_id := Some id
+        | _ -> Queue.push token tokens
+        end;
+        let startp = lexbuf.lex_start_p
+        and endp = lexbuf.lex_curr_p in
+        buffer := ErrorReports.update !buffer (startp, endp);
+        token
 
   (* [invoke_pre_parser] is in charge of calling the pre_parser. It uses
      the incremental API, which allows us to do our own error handling. *)
@@ -482,11 +484,9 @@ and singleline_comment = parse
      of (appropriately classified) tokens. *)
 
   let tokens_stream filename text : token coq_Stream =
-    contexts_stk := [init_ctx];
     let tokens = Queue.create () in
     let buffer = ref ErrorReports.Zero in
     invoke_pre_parser filename text (lexer tokens buffer) buffer;
-    assert (List.length !contexts_stk = 1);
     let rec compute_token_stream () =
       let loop t v =
         Cons (Coq_existT (t, Obj.magic v), Lazy.from_fun compute_token_stream)
@@ -606,6 +606,7 @@ and singleline_comment = parse
       | ATTRIBUTE loc -> loop ATTRIBUTE't loc
       | ASM loc -> loop ASM't loc
       | PRAGMA (s, loc) -> loop PRAGMA't (s, loc)
+      | PRE_NAME _ -> assert false
     in
     Lazy.from_fun compute_token_stream
author	Bernhard Schommer <bschommer@users.noreply.github.com>	2015-11-12 17:35:41 +0100
committer	Bernhard Schommer <bschommer@users.noreply.github.com>	2015-11-12 17:35:41 +0100
commit	d90ba4443294b80bd940daedfdcdc3d4334fdc7c (patch)
tree	d647ca2216c342c433c35783d32e1abe8545a72c /cparser/Lexer.mll
parent	9054efbd25eedd5627b9e6e62bf1204e5fa0ae94 (diff)
parent	0ebefc1d145f82783829174bad1f41bb319742b4 (diff)
download	compcert-kvx-d90ba4443294b80bd940daedfdcdc3d4334fdc7c.tar.gz compcert-kvx-d90ba4443294b80bd940daedfdcdc3d4334fdc7c.zip