From f8be3f5f2937b053b9cb75ada7937a6c1b20f019 Mon Sep 17 00:00:00 2001
From: François Pottier <francois.pottier@inria.fr>
Date: Fri, 23 Oct 2015 13:34:43 +0200
Subject: Install the new system for reporting syntax errors. This requires the
 development version of Menhir, to be released soon. In summary:

handcrafted.messages is new.
  It contains a mapping of erroneous sentences to error messages,
  together with a lot of comments.

Makefile.extr is new.
  It contains a rule to generate cparser/pre_parser_messages.ml
  based on this mapping.

cparser/ErrorReports.{ml,mli} are new.
  They construct syntax error messages, based on the compiled mapping.

cparser/Lexer.mll is modified.
  The last two tokens that have been read are stored in a buffer.
  ErrorReports is called to construct a syntax error message.

cparser/GNUmakefile is new.
  It offers several commands for working on the pre-parser.

cparser/deLexer.ml is new.
  It is a script (it is not linked into CompCert).
  It translates the symbolic name of a token to an example of this
  token in concrete C syntax.
  It is used by [make -C cparser concrete] to produce the .c files
  in tests/generated/.

cparser/tests/generated/Makefile is new.
  It runs ccomp, clang and gcc on each of the generated C files,
  so as to allow a comparison of the error messages.
---
 cparser/GNUmakefile | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 cparser/GNUmakefile

(limited to 'cparser/GNUmakefile')

diff --git a/cparser/GNUmakefile b/cparser/GNUmakefile
new file mode 100644
index 00000000..6c1df250
--- /dev/null
+++ b/cparser/GNUmakefile
@@ -0,0 +1,175 @@
+# This is a development Makefile.
+# It is meant to be used by developers who wish to modify the pre_parser.
+
+# A collection of erroneous input sentences (and accompanying error messages)
+# for the pre_parser is stored in the file handcrafted.messages.
+
+# After a modification to the pre_parser, one must ensure that this collection of
+# erroneous input sentences remains correct, irredundant, and complete. By this,
+# we mean:
+# - every input sentence in the collection leads to an error (on its last token);
+# - every state where an error can occur is reached by some input sentence in the
+#   collection;
+# - every state where an error can occur is reached by at most one input sentence
+#   in the collection.
+
+# These properties are checked by [make correct] and [make complete] below.
+# [make correct] is cheap and is performed automatically when CompCert is
+# compiled. [make complete] is expensive and must be called explicitly.
+
+# After a modification to the pre_parser, one should also reconstruct the
+# auto-generated comments in handcrafted.messages. This is done by calling
+# [make update].
+
+# Finally, after a modification to the pre_parser, one should ensure that every
+# error message remains meaningful. Indeed, an error message should not be
+# specific of the example sentence that causes this error; it should reflect all
+# possible sentences that cause an error in this state. One must look at the
+# description of the error state (which is part of the auto-generated comments)
+# and decide, based on this description, whether the error message seems
+# appropriate.
+
+# If you wish to play with one input sentence and see how the automaton behaves,
+# use [make interpret] or [make interpret_error].
+
+# If you wish to translate the database of erroneous input sentences to concrete
+# syntax, so as to be able to run a C compiler on these sentences, use [make
+# concrete].
+
+# ------------------------------------------------------------------------------
+
+# Make sure we use the same Menhir setup as the main CompCert Makefiles.
+
+include ../Makefile.menhir
+
+# Be more verbose about the automaton.
+
+FLAGS    := $(MENHIR_FLAGS) -v -la 2
+
+# So, here is our basic Menhir command.
+
+COMMAND  := $(MENHIR) $(MENHIR_MODE) pre_parser.mly $(FLAGS)
+
+# And this is the database of messages.
+
+DATABASE := handcrafted.messages
+
+# We use (GNU) cut when de-lexing examples sentences.
+
+CUT      := $(shell if which gcut >/dev/null ; then echo gcut ; else echo cut ; fi)
+
+# ------------------------------------------------------------------------------
+
+# Working with the error message database.
+
+.PHONY: nothing correct complete update
+
+nothing:
+	@ echo "If you wish to compile CompCert, please run \"make\" one level up."
+	@ echo "If you wish to work on the pre-parser, please read GNUmakefile."
+
+# Checking correctness and irredundancy is done via --compile-errors.
+# This entry is also called from [Makefile.extr] to compile the message
+# database down to OCaml code.
+# This also produces a description of the automaton in pre_parser.automaton
+# and a description of the conflicts (if any) in pre_parser.conflicts.
+correct:
+	@ $(COMMAND) --compile-errors $(DATABASE) > pre_parser_messages.ml && \
+	    echo "OK. The set of erroneous inputs is correct and irredundant."
+
+# Checking completeness is done by first generating a complete list of
+# erroneous sentences (in pre_parser.messages), then comparing the two
+# .messages files for inclusion.
+complete:
+# Re-generate the list of error messages. This takes time (about 25 seconds).
+	@ $(COMMAND) --list-errors >pre_parser.messages
+	@ echo "Number of error sentences that involve spurious reductions:"
+	@ grep WARNING pre_parser.messages | wc -l
+# Check $(DATABASE) for completeness.
+# We assume pre_parser.messages is up-to-date.
+	@ $(COMMAND) --compare-errors pre_parser.messages --compare-errors $(DATABASE) && \
+	    echo "OK. The set of erroneous inputs is complete."
+
+update:
+# Update the auto-comments in $(DATABASE).
+	@ mv -f $(DATABASE) $(DATABASE).bak
+	@ if ! $(COMMAND) --update-errors $(DATABASE).bak > $(DATABASE) ; then \
+	  cp $(DATABASE).bak $(DATABASE) ; \
+	fi
+	@ echo "The auto-generated comments in $(DATABASE) have been re-generated."
+
+# ------------------------------------------------------------------------------
+
+# Trying out an input sentence.
+
+# [make interpret] waits for you to interactively type an input sentence,
+# in symbolic syntax (e.g. ALIGNAS LPAREN TYPEDEF_NAME INT). It runs the
+# automaton on this sentence in --trace mode.
+
+# [make interpret_error] is analogous, but expects the sentence to cause
+# an error at the last token, and displays in what state the error takes
+# place.
+
+.PHONY: interpret interpret_error
+
+interpret:
+# Interpret one sentence (interactive).
+	@ $(COMMAND) --trace --interpret
+
+interpret_error:
+# Interpret one error sentence (interactive).
+	@ $(COMMAND) --interpret-error
+
+# ------------------------------------------------------------------------------
+
+# Translating the database of erroneous input sentences to concrete syntax.
+
+# [make concrete] destroys and re-creates the C files in tests/generated/.
+# Once this is done, run [make -C tests/generated] to submit these C files
+# to CompCert, clang and gcc.
+
+.PHONY: concrete
+
+# First, we translate $(DATABASE) to a text file which contains just the
+# erroneous input sentences, nothing else (no comments, no blank lines).
+%.messages.raw: %.messages
+	@ $(COMMAND) --echo-errors $< > $@
+
+# We compile deLexer.ml to native code, because running it as an ocaml script
+# is way too slow. This little utility translates symbolic tokens to their
+# concrete C syntax, e.g., EQ is translated to = and so on.
+deLexer: deLexer.ml
+	@ ocamlopt -o $@ str.cmxa $<
+
+concrete: $(DATABASE).raw deLexer
+# Destroy the C files in tests/generated.
+	@ rm -f tests/generated/*.c
+# Read $(DATABASE).raw, line by line.
+# For each sentence, create a new C file.
+# A sentence takes the form "<start symbol>: SYMBOLS...".
+# We cut the start symbol away,
+# and translate the rest to concrete C syntax using the deLexer.
+# We declare a type name "t", which the de-lexer uses as a type name.
+	@ f=0 ; \
+	while read -r line ; do \
+	  filename=`printf "tests/generated/%03d.c" $$f` ; \
+	  rm -f $$filename ; \
+	  echo "typedef int t;" >> $$filename ; \
+	  echo "$$line" \
+	    | $(CUT) --fields="1" --delimiter=" " --complement \
+	    | ./deLexer \
+	    >> $$filename ; \
+	  f=$$((f+1)) ; \
+	done < $<
+
+# ------------------------------------------------------------------------------
+
+# Cleaning up.
+
+clean:
+	rm -f pre_parser.automaton
+	rm -f pre_parser.conflicts
+	rm -f pre_parser.messages
+	rm -f $(DATABASE).raw
+	rm -f deLexer
+
-- 
cgit