path: root/cparser/deLexer.ml
diff options
authorFrançois Pottier <francois.pottier@inria.fr>2015-10-23 13:34:43 +0200
committerFrançois Pottier <francois.pottier@inria.fr>2015-10-23 13:40:40 +0200
commitf8be3f5f2937b053b9cb75ada7937a6c1b20f019 (patch)
tree23b4cc1187762f0b956a4109b11fd0736da67e85 /cparser/deLexer.ml
parent8d1a15f7f5c8fbea194a67c49c5aa10d6371b267 (diff)
Install the new system for reporting syntax errors.
This requires the development version of Menhir, to be released soon. In summary: handcrafted.messages is new. It contains a mapping of erroneous sentences to error messages, together with a lot of comments. Makefile.extr is new. It contains a rule to generate cparser/pre_parser_messages.ml based on this mapping. cparser/ErrorReports.{ml,mli} are new. They construct syntax error messages, based on the compiled mapping. cparser/Lexer.mll is modified. The last two tokens that have been read are stored in a buffer. ErrorReports is called to construct a syntax error message. cparser/GNUmakefile is new. It offers several commands for working on the pre-parser. cparser/deLexer.ml is new. It is a script (it is not linked into CompCert). It translates the symbolic name of a token to an example of this token in concrete C syntax. It is used by [make -C cparser concrete] to produce the .c files in tests/generated/. cparser/tests/generated/Makefile is new. It runs ccomp, clang and gcc on each of the generated C files, so as to allow a comparison of the error messages.
Diffstat (limited to 'cparser/deLexer.ml')
1 files changed, 120 insertions, 0 deletions
diff --git a/cparser/deLexer.ml b/cparser/deLexer.ml
new file mode 100644
index 00000000..7ecfca0c
--- /dev/null
+++ b/cparser/deLexer.ml
@@ -0,0 +1,120 @@
+(* [delex] converts a terminal symbol (represented as a symbolic string) to a
+ concrete string, which the lexer would accept. *)
+(* This can be used to convert an error sentence produced by Menhir to C code. *)
+(* [delex] should be maintained in sync with the lexer! *)
+let delex (symbol : string) : string =
+ match symbol with
+ | "ALIGNAS" -> "_Alignas"
+ | "ALIGNOF" -> "__alignof__" (* use the gcc-compatible form *)
+ | "UNDERSCORE_BOOL" -> "_Bool"
+ | "ASM" -> "__asm"
+ | "ATTRIBUTE" -> "__attribute"
+ | "BUILTIN_VA_ARG" -> "__builtin_va_arg"
+ | "CONST" -> "const"
+ | "INLINE" -> "inline"
+ | "PACKED" -> "__packed__"
+ | "RESTRICT" -> "restrict"
+ | "SIGNED" -> "signed"
+ | "VOLATILE" -> "volatile"
+ | "AUTO" -> "auto"
+ | "BREAK" -> "break"
+ | "CASE" -> "case"
+ | "CHAR" -> "char"
+ | "CONTINUE" -> "continue"
+ | "DEFAULT" -> "default"
+ | "DO" -> "do"
+ | "DOUBLE" -> "double"
+ | "ELSE" -> "else"
+ | "ENUM" -> "enum"
+ | "EXTERN" -> "extern"
+ | "FLOAT" -> "float"
+ | "FOR" -> "for"
+ | "GOTO" -> "goto"
+ | "IF" -> "if"
+ | "INT" -> "int"
+ | "LONG" -> "long"
+ | "REGISTER" -> "register"
+ | "RETURN" -> "return"
+ | "SHORT" -> "short"
+ | "SIZEOF" -> "sizeof"
+ | "STATIC" -> "static"
+ | "STRUCT" -> "struct"
+ | "SWITCH" -> "switch"
+ | "TYPEDEF" -> "typedef"
+ | "UNION" -> "union"
+ | "UNSIGNED" -> "unsigned"
+ | "VOID" -> "void"
+ | "WHILE" -> "while"
+ | "TYPEDEF_NAME" -> "t" (* this should be a type name *)
+ | "VAR_NAME" -> "x" (* this should be a variable name *)
+ | "CONSTANT" -> "42"
+ | "STRING_LITERAL" -> "\"\""
+ | "ELLIPSIS" -> "..."
+ | "ADD_ASSIGN" -> "+="
+ | "SUB_ASSIGN" -> "-="
+ | "MUL_ASSIGN" -> "*="
+ | "DIV_ASSIGN" -> "/="
+ | "MOD_ASSIGN" -> "%="
+ | "OR_ASSIGN" -> "|="
+ | "AND_ASSIGN" -> "&="
+ | "XOR_ASSIGN" -> "^="
+ | "LEFT_ASSIGN" -> "<<="
+ | "RIGHT_ASSIGN" -> ">>="
+ | "LEFT" -> "<<"
+ | "RIGHT" -> ">>"
+ | "EQEQ" -> "=="
+ | "NEQ" -> "!="
+ | "LEQ" -> "<="
+ | "GEQ" -> ">="
+ | "EQ" -> "="
+ | "LT" -> "<"
+ | "GT" -> ">"
+ | "INC" -> "++"
+ | "DEC" -> "--"
+ | "PTR" -> "->"
+ | "PLUS" -> "+"
+ | "MINUS" -> "-"
+ | "STAR" -> "*"
+ | "SLASH" -> "/"
+ | "PERCENT" -> "%"
+ | "BANG" -> "!"
+ | "ANDAND" -> "&&"
+ | "BARBAR" -> "||"
+ | "AND" -> "&"
+ | "BAR" -> "|"
+ | "HAT" -> "^"
+ | "QUESTION" -> "?"
+ | "COLON" -> ":"
+ | "TILDE" -> "~"
+ | "LBRACE" -> "{"
+ | "RBRACE" -> "}"
+ | "LBRACK" -> "["
+ | "RBRACK" -> "]"
+ | "LPAREN" -> "("
+ | "RPAREN" -> ")"
+ | "SEMICOLON" -> ";"
+ | "COMMA" -> ","
+ | "DOT" -> "."
+ | "PRAGMA" -> "#pragma \n"
+ | "EOF" -> "" (* this should be ok *)
+ | _ -> raise Not_found (* this should not happen *)
+(* De-lexing a sentence. *)
+let delex sentence =
+ let symbols = Str.split (Str.regexp " ") sentence in
+ let symbols = List.map delex symbols in
+ List.iter (fun symbol ->
+ Printf.printf "%s " symbol
+ ) symbols
+(* This file is meant to be run as a script. We read one line from the standard
+ input channel and delex it. *)
+let () =
+ delex (input_line stdin);
+ print_newline()