From 581ac226fb42a0a005baa8941e5f39b181acc6cb Mon Sep 17 00:00:00 2001 From: François Pottier Date: Thu, 22 Oct 2015 18:09:36 +0200 Subject: Fix [Lexer.char_literal] and [Lexer.string_literal] to properly keep track of [lex_start_p]. This is required for Menhir to pick up the correct start position of the token. --- cparser/Lexer.mll | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index 5cfe74fd..6be59aa8 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -283,15 +283,15 @@ rule initial = parse currentLoc lexbuf)} | preprocessing_number as s { error lexbuf "invalid numerical constant '%s'@ These characters form a preprocessor number, but not a constant" s; CONSTANT (Cabs.CONST_INT "0", currentLoc lexbuf) } - | "'" { let l = char_literal [] lexbuf in + | "'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in CONSTANT (Cabs.CONST_CHAR(false, l), currentLoc lexbuf) } - | "L'" { let l = char_literal [] lexbuf in + | "L'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in CONSTANT (Cabs.CONST_CHAR(true, l), currentLoc lexbuf) } - | "\"" { let l = string_literal [] lexbuf in + | "\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in STRING_LITERAL(false, l, currentLoc lexbuf) } - | "L\"" { let l = string_literal [] lexbuf in + | "L\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in STRING_LITERAL(true, l, currentLoc lexbuf) } | "..." { ELLIPSIS(currentLoc lexbuf) } | "+=" { ADD_ASSIGN(currentLoc lexbuf) } @@ -376,15 +376,17 @@ and char = parse | _ as c { Int64.of_int (Char.code c) } -and char_literal accu = parse - | '\'' { List.rev accu } +and char_literal startp accu = parse + | '\'' { lexbuf.lex_start_p <- startp; + List.rev accu } | '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" } - | "" { let c = char lexbuf in char_literal (c :: accu) lexbuf } + | "" { let c = char lexbuf in char_literal startp (c :: accu) lexbuf } -and string_literal accu = parse - | '\"' { List.rev accu } +and string_literal startp accu = parse + | '\"' { lexbuf.lex_start_p <- startp; + List.rev accu } | '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" } - | "" { let c = char lexbuf in string_literal (c :: accu) lexbuf } + | "" { let c = char lexbuf in string_literal startp (c :: accu) lexbuf } (* We assume gcc -E syntax but try to tolerate variations. *) and hash = parse -- cgit From 03f39523094fd41c8aad5ee7a8169ffc448cfd4a Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 16 Oct 2015 17:56:06 +0200 Subject: Read the whole source C file into memory instad of reading it on demand. Having the file in memory will help build an error message. Also, this may be slightly faster. --- cparser/Lexer.mll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index 6be59aa8..aca1be27 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -440,7 +440,7 @@ and singleline_comment = parse open Parser open Aut.GramDefs - let tokens_stream filename channel : token coq_Stream = + let tokens_stream filename text : token coq_Stream = let tokens = Queue.create () in let lexer_wraper lexbuf : Pre_parser.token = let res = @@ -452,7 +452,7 @@ and singleline_comment = parse Queue.push res tokens; res in - let lexbuf = Lexing.from_channel channel in + let lexbuf = Lexing.from_string text in lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1}; contexts_stk := [init_ctx]; Pre_parser.translation_unit_file lexer_wraper lexbuf; -- cgit From 8fee5abd0a1d0865d3d3d4b4de48aacb4be0914e Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:08:56 +0200 Subject: Lexer cleanup: isolate the entry point into the lexer. --- cparser/Lexer.mll | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index aca1be27..c453b15b 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -440,15 +440,19 @@ and singleline_comment = parse open Parser open Aut.GramDefs + (* This is the main entry point to the lexer. *) + + let lexer : lexbuf -> Pre_parser.token = + fun lexbuf -> + if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then + initial_linebegin lexbuf + else + initial lexbuf + let tokens_stream filename text : token coq_Stream = let tokens = Queue.create () in let lexer_wraper lexbuf : Pre_parser.token = - let res = - if lexbuf.lex_curr_p.pos_cnum = lexbuf.lex_curr_p.pos_bol then - initial_linebegin lexbuf - else - initial lexbuf - in + let res = lexer lexbuf in Queue.push res tokens; res in -- cgit From dca619cc34e64e63fe36ec9b5acdb42aafe665a8 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:11:23 +0200 Subject: Lexer cleanup: isolate [lexer_wraper] and rename it to [lexer]. --- cparser/Lexer.mll | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index c453b15b..ab4e03fe 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -449,17 +449,21 @@ and singleline_comment = parse else initial lexbuf + (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also + records the token stream into the FIFO queue [tokens]. *) + + let lexer tokens : lexbuf -> Pre_parser.token = + fun lexbuf -> + let token = lexer lexbuf in + Queue.push token tokens; + token + let tokens_stream filename text : token coq_Stream = let tokens = Queue.create () in - let lexer_wraper lexbuf : Pre_parser.token = - let res = lexer lexbuf in - Queue.push res tokens; - res - in let lexbuf = Lexing.from_string text in lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1}; contexts_stk := [init_ctx]; - Pre_parser.translation_unit_file lexer_wraper lexbuf; + Pre_parser.translation_unit_file (lexer tokens) lexbuf; assert (List.length !contexts_stk = 1); let rec compute_token_stream () = let loop t v = -- cgit From 8d1a15f7f5c8fbea194a67c49c5aa10d6371b267 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:16:35 +0200 Subject: Lexer update: use Menhir's incremental API instead of Menhir's traditional API. This means that CompCert must now be compiled in --table mode. At this point, the error message for a syntax error is still just "syntax error". --- cparser/Lexer.mll | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index ab4e03fe..62764a48 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -458,12 +458,25 @@ and singleline_comment = parse Queue.push token tokens; token - let tokens_stream filename text : token coq_Stream = - let tokens = Queue.create () in + (* [invoke_pre_parser] is in charge of calling the pre_parser. It uses + the incremental API, which allows us to do our own error handling. *) + + let invoke_pre_parser filename text lexer = let lexbuf = Lexing.from_string text in lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1}; + let module I = Pre_parser.MenhirInterpreter in + let checkpoint = Pre_parser.Incremental.translation_unit_file() + and supplier = I.lexer_lexbuf_to_supplier lexer lexbuf + and succeed () = () + and fail checkpoint = + Cerrors.fatal_error_raw "syntax error" + in + I.loop_handle succeed fail supplier checkpoint + + let tokens_stream filename text : token coq_Stream = contexts_stk := [init_ctx]; - Pre_parser.translation_unit_file (lexer tokens) lexbuf; + let tokens = Queue.create () in + invoke_pre_parser filename text (lexer tokens); assert (List.length !contexts_stk = 1); let rec compute_token_stream () = let loop t v = -- cgit From f8be3f5f2937b053b9cb75ada7937a6c1b20f019 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:34:43 +0200 Subject: Install the new system for reporting syntax errors. This requires the development version of Menhir, to be released soon. In summary: handcrafted.messages is new. It contains a mapping of erroneous sentences to error messages, together with a lot of comments. Makefile.extr is new. It contains a rule to generate cparser/pre_parser_messages.ml based on this mapping. cparser/ErrorReports.{ml,mli} are new. They construct syntax error messages, based on the compiled mapping. cparser/Lexer.mll is modified. The last two tokens that have been read are stored in a buffer. ErrorReports is called to construct a syntax error message. cparser/GNUmakefile is new. It offers several commands for working on the pre-parser. cparser/deLexer.ml is new. It is a script (it is not linked into CompCert). It translates the symbolic name of a token to an example of this token in concrete C syntax. It is used by [make -C cparser concrete] to produce the .c files in tests/generated/. cparser/tests/generated/Makefile is new. It runs ccomp, clang and gcc on each of the generated C files, so as to allow a comparison of the error messages. --- cparser/Lexer.mll | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'cparser/Lexer.mll') diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index 62764a48..23d8ab8d 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -449,19 +449,24 @@ and singleline_comment = parse else initial lexbuf - (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also - records the token stream into the FIFO queue [tokens]. *) + (* [lexer tokens buffer] is a new lexer, which wraps [lexer], and also: 1- + records the token stream into the FIFO queue [tokens] and 2- records the + start and end positions of the last two tokens in the two-place buffer + [buffer]. *) - let lexer tokens : lexbuf -> Pre_parser.token = + let lexer tokens buffer : lexbuf -> Pre_parser.token = fun lexbuf -> let token = lexer lexbuf in Queue.push token tokens; + let startp = lexbuf.lex_start_p + and endp = lexbuf.lex_curr_p in + buffer := ErrorReports.update !buffer (startp, endp); token (* [invoke_pre_parser] is in charge of calling the pre_parser. It uses the incremental API, which allows us to do our own error handling. *) - let invoke_pre_parser filename text lexer = + let invoke_pre_parser filename text lexer buffer = let lexbuf = Lexing.from_string text in lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname = filename; pos_lnum = 1}; let module I = Pre_parser.MenhirInterpreter in @@ -469,14 +474,18 @@ and singleline_comment = parse and supplier = I.lexer_lexbuf_to_supplier lexer lexbuf and succeed () = () and fail checkpoint = - Cerrors.fatal_error_raw "syntax error" + Cerrors.fatal_error_raw "%s" (ErrorReports.report text !buffer checkpoint) in I.loop_handle succeed fail supplier checkpoint + (* [tokens_stream filename text] runs the pre_parser and produces a stream + of (appropriately classified) tokens. *) + let tokens_stream filename text : token coq_Stream = contexts_stk := [init_ctx]; let tokens = Queue.create () in - invoke_pre_parser filename text (lexer tokens); + let buffer = ref ErrorReports.Zero in + invoke_pre_parser filename text (lexer tokens buffer) buffer; assert (List.length !contexts_stk = 1); let rec compute_token_stream () = let loop t v = -- cgit