From f8be3f5f2937b053b9cb75ada7937a6c1b20f019 Mon Sep 17 00:00:00 2001 From: François Pottier Date: Fri, 23 Oct 2015 13:34:43 +0200 Subject: Install the new system for reporting syntax errors. This requires the development version of Menhir, to be released soon. In summary: handcrafted.messages is new. It contains a mapping of erroneous sentences to error messages, together with a lot of comments. Makefile.extr is new. It contains a rule to generate cparser/pre_parser_messages.ml based on this mapping. cparser/ErrorReports.{ml,mli} are new. They construct syntax error messages, based on the compiled mapping. cparser/Lexer.mll is modified. The last two tokens that have been read are stored in a buffer. ErrorReports is called to construct a syntax error message. cparser/GNUmakefile is new. It offers several commands for working on the pre-parser. cparser/deLexer.ml is new. It is a script (it is not linked into CompCert). It translates the symbolic name of a token to an example of this token in concrete C syntax. It is used by [make -C cparser concrete] to produce the .c files in tests/generated/. cparser/tests/generated/Makefile is new. It runs ccomp, clang and gcc on each of the generated C files, so as to allow a comparison of the error messages. --- cparser/deLexer.ml | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 cparser/deLexer.ml (limited to 'cparser/deLexer.ml') diff --git a/cparser/deLexer.ml b/cparser/deLexer.ml new file mode 100644 index 00000000..7ecfca0c --- /dev/null +++ b/cparser/deLexer.ml @@ -0,0 +1,120 @@ +(* [delex] converts a terminal symbol (represented as a symbolic string) to a + concrete string, which the lexer would accept. *) + +(* This can be used to convert an error sentence produced by Menhir to C code. *) + +(* [delex] should be maintained in sync with the lexer! *) + +let delex (symbol : string) : string = + match symbol with + | "ALIGNAS" -> "_Alignas" + | "ALIGNOF" -> "__alignof__" (* use the gcc-compatible form *) + | "UNDERSCORE_BOOL" -> "_Bool" + | "ASM" -> "__asm" + | "ATTRIBUTE" -> "__attribute" + | "BUILTIN_VA_ARG" -> "__builtin_va_arg" + | "CONST" -> "const" + | "INLINE" -> "inline" + | "PACKED" -> "__packed__" + | "RESTRICT" -> "restrict" + | "SIGNED" -> "signed" + | "VOLATILE" -> "volatile" + | "AUTO" -> "auto" + | "BREAK" -> "break" + | "CASE" -> "case" + | "CHAR" -> "char" + | "CONTINUE" -> "continue" + | "DEFAULT" -> "default" + | "DO" -> "do" + | "DOUBLE" -> "double" + | "ELSE" -> "else" + | "ENUM" -> "enum" + | "EXTERN" -> "extern" + | "FLOAT" -> "float" + | "FOR" -> "for" + | "GOTO" -> "goto" + | "IF" -> "if" + | "INT" -> "int" + | "LONG" -> "long" + | "REGISTER" -> "register" + | "RETURN" -> "return" + | "SHORT" -> "short" + | "SIZEOF" -> "sizeof" + | "STATIC" -> "static" + | "STRUCT" -> "struct" + | "SWITCH" -> "switch" + | "TYPEDEF" -> "typedef" + | "UNION" -> "union" + | "UNSIGNED" -> "unsigned" + | "VOID" -> "void" + | "WHILE" -> "while" + | "TYPEDEF_NAME" -> "t" (* this should be a type name *) + | "VAR_NAME" -> "x" (* this should be a variable name *) + | "CONSTANT" -> "42" + | "STRING_LITERAL" -> "\"\"" + | "ELLIPSIS" -> "..." + | "ADD_ASSIGN" -> "+=" + | "SUB_ASSIGN" -> "-=" + | "MUL_ASSIGN" -> "*=" + | "DIV_ASSIGN" -> "/=" + | "MOD_ASSIGN" -> "%=" + | "OR_ASSIGN" -> "|=" + | "AND_ASSIGN" -> "&=" + | "XOR_ASSIGN" -> "^=" + | "LEFT_ASSIGN" -> "<<=" + | "RIGHT_ASSIGN" -> ">>=" + | "LEFT" -> "<<" + | "RIGHT" -> ">>" + | "EQEQ" -> "==" + | "NEQ" -> "!=" + | "LEQ" -> "<=" + | "GEQ" -> ">=" + | "EQ" -> "=" + | "LT" -> "<" + | "GT" -> ">" + | "INC" -> "++" + | "DEC" -> "--" + | "PTR" -> "->" + | "PLUS" -> "+" + | "MINUS" -> "-" + | "STAR" -> "*" + | "SLASH" -> "/" + | "PERCENT" -> "%" + | "BANG" -> "!" + | "ANDAND" -> "&&" + | "BARBAR" -> "||" + | "AND" -> "&" + | "BAR" -> "|" + | "HAT" -> "^" + | "QUESTION" -> "?" + | "COLON" -> ":" + | "TILDE" -> "~" + | "LBRACE" -> "{" + | "RBRACE" -> "}" + | "LBRACK" -> "[" + | "RBRACK" -> "]" + | "LPAREN" -> "(" + | "RPAREN" -> ")" + | "SEMICOLON" -> ";" + | "COMMA" -> "," + | "DOT" -> "." + | "PRAGMA" -> "#pragma \n" + | "EOF" -> "" (* this should be ok *) + | _ -> raise Not_found (* this should not happen *) + +(* De-lexing a sentence. *) + +let delex sentence = + let symbols = Str.split (Str.regexp " ") sentence in + let symbols = List.map delex symbols in + List.iter (fun symbol -> + Printf.printf "%s " symbol + ) symbols + +(* This file is meant to be run as a script. We read one line from the standard + input channel and delex it. *) + +let () = + delex (input_line stdin); + print_newline() + -- cgit