From 581ac226fb42a0a005baa8941e5f39b181acc6cb Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Thu, 22 Oct 2015 18:09:36 +0200
Subject: Fix [Lexer.char_literal] and [Lexer.string_literal] to properly keep
 track of [lex_start_p]. This is required for Menhir to pick up the correct
 start position of the token.

---
 cparser/Lexer.mll | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 5cfe74fd..6be59aa8 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -283,15 +283,15 @@ rule initial = parse
                                       currentLoc lexbuf)}
   | preprocessing_number as s     { error lexbuf "invalid numerical constant '%s'@ These characters form a preprocessor number, but not a constant" s;
                                     CONSTANT (Cabs.CONST_INT "0", currentLoc lexbuf) }
-  | "'"                           { let l = char_literal [] lexbuf in
+  | "'"                           { let l = char_literal lexbuf.lex_start_p [] lexbuf in
                                     CONSTANT (Cabs.CONST_CHAR(false, l),
                                               currentLoc lexbuf) }
-  | "L'"                          { let l = char_literal [] lexbuf in
+  | "L'"                          { let l = char_literal lexbuf.lex_start_p [] lexbuf in
                                     CONSTANT (Cabs.CONST_CHAR(true, l),
                                               currentLoc lexbuf) }
-  | "\""                          { let l = string_literal [] lexbuf in
+  | "\""                          { let l = string_literal lexbuf.lex_start_p [] lexbuf in
                                     STRING_LITERAL(false, l, currentLoc lexbuf) }
-  | "L\""                         { let l = string_literal [] lexbuf in
+  | "L\""                         { let l = string_literal lexbuf.lex_start_p [] lexbuf in
                                     STRING_LITERAL(true, l, currentLoc lexbuf) }
   | "..."                         { ELLIPSIS(currentLoc lexbuf) }
   | "+="                          { ADD_ASSIGN(currentLoc lexbuf) }
@@ -376,15 +376,17 @@ and char = parse
   | _ as c
       { Int64.of_int (Char.code c) }
 
-and char_literal accu = parse
-  | '\''       { List.rev accu }
+and char_literal startp accu = parse
+  | '\''       { lexbuf.lex_start_p <- startp;
+                 List.rev accu }
   | '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" }
-  | ""         { let c = char lexbuf in char_literal (c :: accu) lexbuf }
+  | ""         { let c = char lexbuf in char_literal startp (c :: accu) lexbuf }
 
-and string_literal accu = parse
-  | '\"'       { List.rev accu }
+and string_literal startp accu = parse
+  | '\"'       { lexbuf.lex_start_p <- startp;
+                 List.rev accu }
   | '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" }
-  | ""         { let c = char lexbuf in string_literal (c :: accu) lexbuf }
+  | ""         { let c = char lexbuf in string_literal startp (c :: accu) lexbuf }
 
 (* We assume gcc -E syntax but try to tolerate variations. *)
 and hash = parse
-- 
cgit 


From 03f39523094fd41c8aad5ee7a8169ffc448cfd4a Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 16 Oct 2015 17:56:06 +0200
Subject: Read the whole source C file into memory instad of reading it on
 demand. Having the file in memory will help build an error message. Also,
 this may be slightly faster.

---
 cparser/Lexer.mll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 6be59aa8..aca1be27 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -440,7 +440,7 @@ and singleline_comment = parse
   open Parser
   open Aut.GramDefs
 
-  let tokens_stream filename channel : token coq_Stream =
+  let tokens_stream filename text : token coq_Stream =
     let tokens = Queue.create () in
     let lexer_wraper lexbuf : Pre_parser.token =
       let res =
@@ -452,7 +452,7 @@ and singleline_comment = parse
       Queue.push res tokens;
       res
     in
-    let lexbuf = Lexing.from_channel channel in
+    let lexbuf = Lexing.from_string text in
     lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1};
     contexts_stk := [init_ctx];
     Pre_parser.translation_unit_file lexer_wraper lexbuf;
-- 
cgit 


From 8fee5abd0a1d0865d3d3d4b4de48aacb4be0914e Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 23 Oct 2015 13:08:56 +0200
Subject: Lexer cleanup: isolate the entry point into the lexer.

---
 cparser/Lexer.mll | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index aca1be27..c453b15b 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -440,15 +440,19 @@ and singleline_comment = parse
   open Parser
   open Aut.GramDefs
 
+  (* This is the main entry point to the lexer. *)
+
+  let lexer : lexbuf -> Pre_parser.token =
+    fun lexbuf ->
+      if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then
+        initial_linebegin lexbuf
+      else
+        initial lexbuf
+
   let tokens_stream filename text : token coq_Stream =
     let tokens = Queue.create () in
     let lexer_wraper lexbuf : Pre_parser.token =
-      let res =
-        if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then
-          initial_linebegin lexbuf
-        else
-          initial lexbuf
-      in
+      let res = lexer lexbuf in
       Queue.push res tokens;
       res
     in
-- 
cgit 


From dca619cc34e64e63fe36ec9b5acdb42aafe665a8 Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 23 Oct 2015 13:11:23 +0200
Subject: Lexer cleanup: isolate [lexer_wraper] and rename it to [lexer].

---
 cparser/Lexer.mll | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index c453b15b..ab4e03fe 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -449,17 +449,21 @@ and singleline_comment = parse
       else
         initial lexbuf
 
+  (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also
+     records the token stream into the FIFO queue [tokens]. *)
+
+  let lexer tokens : lexbuf -> Pre_parser.token =
+    fun lexbuf ->
+      let token = lexer lexbuf in
+      Queue.push token tokens;
+      token
+
   let tokens_stream filename text : token coq_Stream =
     let tokens = Queue.create () in
-    let lexer_wraper lexbuf : Pre_parser.token =
-      let res = lexer lexbuf in
-      Queue.push res tokens;
-      res
-    in
     let lexbuf = Lexing.from_string text in
     lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1};
     contexts_stk := [init_ctx];
-    Pre_parser.translation_unit_file lexer_wraper lexbuf;
+    Pre_parser.translation_unit_file (lexer tokens) lexbuf;
     assert (List.length !contexts_stk = 1);
     let rec compute_token_stream () =
       let loop t v =
-- 
cgit 


From 8d1a15f7f5c8fbea194a67c49c5aa10d6371b267 Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 23 Oct 2015 13:16:35 +0200
Subject: Lexer update: use Menhir's incremental API instead of Menhir's
 traditional API. This means that CompCert must now be compiled in --table
 mode. At this point, the error message for a syntax error is still just
 "syntax error".

---
 cparser/Lexer.mll | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index ab4e03fe..62764a48 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -458,12 +458,25 @@ and singleline_comment = parse
       Queue.push token tokens;
       token
 
-  let tokens_stream filename text : token coq_Stream =
-    let tokens = Queue.create () in
+  (* [invoke_pre_parser] is in charge of calling the pre_parser. It uses
+     the incremental API, which allows us to do our own error handling. *)
+
+  let invoke_pre_parser filename text lexer =
     let lexbuf = Lexing.from_string text in
     lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1};
+    let module I = Pre_parser.MenhirInterpreter in
+    let checkpoint = Pre_parser.Incremental.translation_unit_file()
+    and supplier = I.lexer_lexbuf_to_supplier lexer lexbuf
+    and succeed () = ()
+    and fail checkpoint =
+      Cerrors.fatal_error_raw "syntax error"
+    in
+    I.loop_handle succeed fail supplier checkpoint
+
+  let tokens_stream filename text : token coq_Stream =
     contexts_stk := [init_ctx];
-    Pre_parser.translation_unit_file (lexer tokens) lexbuf;
+    let tokens = Queue.create () in
+    invoke_pre_parser filename text (lexer tokens);
     assert (List.length !contexts_stk = 1);
     let rec compute_token_stream () =
       let loop t v =
-- 
cgit 


From f8be3f5f2937b053b9cb75ada7937a6c1b20f019 Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 23 Oct 2015 13:34:43 +0200
Subject: Install the new system for reporting syntax errors. This requires the
 development version of Menhir, to be released soon. In summary:

handcrafted.messages is new.
  It contains a mapping of erroneous sentences to error messages,
  together with a lot of comments.

Makefile.extr is new.
  It contains a rule to generate cparser/pre_parser_messages.ml
  based on this mapping.

cparser/ErrorReports.{ml,mli} are new.
  They construct syntax error messages, based on the compiled mapping.

cparser/Lexer.mll is modified.
  The last two tokens that have been read are stored in a buffer.
  ErrorReports is called to construct a syntax error message.

cparser/GNUmakefile is new.
  It offers several commands for working on the pre-parser.

cparser/deLexer.ml is new.
  It is a script (it is not linked into CompCert).
  It translates the symbolic name of a token to an example of this
  token in concrete C syntax.
  It is used by [make -C cparser concrete] to produce the .c files
  in tests/generated/.

cparser/tests/generated/Makefile is new.
  It runs ccomp, clang and gcc on each of the generated C files,
  so as to allow a comparison of the error messages.
---
 cparser/Lexer.mll | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'cparser/Lexer.mll')

diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 62764a48..23d8ab8d 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -449,19 +449,24 @@ and singleline_comment = parse
       else
         initial lexbuf
 
-  (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also
-     records the token stream into the FIFO queue [tokens]. *)
+  (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also: 1-
+     records the token stream into the FIFO queue [tokens] and 2- records the
+     start and end positions of the last two tokens in the two-place buffer
+     [buffer]. *)
 
-  let lexer tokens : lexbuf -> Pre_parser.token =
+  let lexer tokens buffer : lexbuf -> Pre_parser.token =
     fun lexbuf ->
       let token = lexer lexbuf in
       Queue.push token tokens;
+      let startp = lexbuf.lex_start_p
+      and endp = lexbuf.lex_curr_p in
+      buffer := ErrorReports.update !buffer (startp, endp);
       token
 
   (* [invoke_pre_parser] is in charge of calling the pre_parser. It uses
      the incremental API, which allows us to do our own error handling. *)
 
-  let invoke_pre_parser filename text lexer =
+  let invoke_pre_parser filename text lexer buffer =
     let lexbuf = Lexing.from_string text in
     lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1};
     let module I = Pre_parser.MenhirInterpreter in
@@ -469,14 +474,18 @@ and singleline_comment = parse
     and supplier = I.lexer_lexbuf_to_supplier lexer lexbuf
     and succeed () = ()
     and fail checkpoint =
-      Cerrors.fatal_error_raw "syntax error"
+      Cerrors.fatal_error_raw "%s" (ErrorReports.report text !buffer checkpoint)
     in
     I.loop_handle succeed fail supplier checkpoint
 
+  (* [tokens_stream filename text] runs the pre_parser and produces a stream
+     of (appropriately classified) tokens. *)
+
   let tokens_stream filename text : token coq_Stream =
     contexts_stk := [init_ctx];
     let tokens = Queue.create () in
-    invoke_pre_parser filename text (lexer tokens);
+    let buffer = ref ErrorReports.Zero in
+    invoke_pre_parser filename text (lexer tokens buffer) buffer;
     assert (List.length !contexts_stk = 1);
     let rec compute_token_stream () =
       let loop t v =
-- 
cgit