aboutsummaryrefslogtreecommitdiffstats
path: root/cparser/deLexer.ml
blob: 7ecfca0cc2919407be5c9fa53e711915fc6c1664 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
(* [delex] converts a terminal symbol (represented as a symbolic string) to a
   concrete string, which the lexer would accept. *)

(* This can be used to convert an error sentence produced by Menhir to C code. *)

(* [delex] should be maintained in sync with the lexer! *)

let delex (symbol : string) : string =
  match symbol with
  | "ALIGNAS" -> "_Alignas"
  | "ALIGNOF" -> "__alignof__" (* use the gcc-compatible form *)
  | "UNDERSCORE_BOOL" -> "_Bool"
  | "ASM" -> "__asm"
  | "ATTRIBUTE" -> "__attribute"
  | "BUILTIN_VA_ARG" -> "__builtin_va_arg"
  | "CONST" -> "const"
  | "INLINE" -> "inline"
  | "PACKED" -> "__packed__"
  | "RESTRICT" -> "restrict"
  | "SIGNED" -> "signed"
  | "VOLATILE" -> "volatile"
  | "AUTO" -> "auto"
  | "BREAK" -> "break"
  | "CASE" -> "case"
  | "CHAR" -> "char"
  | "CONTINUE" -> "continue"
  | "DEFAULT" -> "default"
  | "DO" -> "do"
  | "DOUBLE" -> "double"
  | "ELSE" -> "else"
  | "ENUM" -> "enum"
  | "EXTERN" -> "extern"
  | "FLOAT" -> "float"
  | "FOR" -> "for"
  | "GOTO" -> "goto"
  | "IF" -> "if"
  | "INT" -> "int"
  | "LONG" -> "long"
  | "REGISTER" -> "register"
  | "RETURN" -> "return"
  | "SHORT" -> "short"
  | "SIZEOF" -> "sizeof"
  | "STATIC" -> "static"
  | "STRUCT" -> "struct"
  | "SWITCH" -> "switch"
  | "TYPEDEF" -> "typedef"
  | "UNION" -> "union"
  | "UNSIGNED" -> "unsigned"
  | "VOID" -> "void"
  | "WHILE" -> "while"
  | "TYPEDEF_NAME" -> "t"          (* this should be a type name *)
  | "VAR_NAME" -> "x"          (* this should be a variable name *)
  | "CONSTANT" -> "42"
  | "STRING_LITERAL" -> "\"\""
  | "ELLIPSIS" -> "..."
  | "ADD_ASSIGN" -> "+="
  | "SUB_ASSIGN" -> "-="
  | "MUL_ASSIGN" -> "*="
  | "DIV_ASSIGN" -> "/="
  | "MOD_ASSIGN" -> "%="
  | "OR_ASSIGN" -> "|="
  | "AND_ASSIGN" -> "&="
  | "XOR_ASSIGN" -> "^="
  | "LEFT_ASSIGN" -> "<<="
  | "RIGHT_ASSIGN" -> ">>="
  | "LEFT" -> "<<"
  | "RIGHT" -> ">>"
  | "EQEQ" -> "=="
  | "NEQ" -> "!="
  | "LEQ" -> "<="
  | "GEQ" -> ">="
  | "EQ" -> "="
  | "LT" -> "<"
  | "GT" -> ">"
  | "INC" -> "++"
  | "DEC" -> "--"
  | "PTR" -> "->"
  | "PLUS" -> "+"
  | "MINUS" -> "-"
  | "STAR" -> "*"
  | "SLASH" -> "/"
  | "PERCENT" -> "%"
  | "BANG" -> "!"
  | "ANDAND" -> "&&"
  | "BARBAR" -> "||"
  | "AND" -> "&"
  | "BAR" -> "|"
  | "HAT" -> "^"
  | "QUESTION" -> "?"
  | "COLON" -> ":"
  | "TILDE" -> "~"
  | "LBRACE" -> "{"
  | "RBRACE" -> "}"
  | "LBRACK" -> "["
  | "RBRACK" -> "]"
  | "LPAREN" -> "("
  | "RPAREN" -> ")"
  | "SEMICOLON" -> ";"
  | "COMMA" -> ","
  | "DOT" -> "."
  | "PRAGMA" -> "#pragma \n"
  | "EOF" -> ""                             (* this should be ok *)
  | _ -> raise Not_found               (* this should not happen *)

(* De-lexing a sentence. *)

let delex sentence =
  let symbols = Str.split (Str.regexp " ") sentence in
  let symbols = List.map delex symbols in
  List.iter (fun symbol ->
    Printf.printf "%s " symbol
  ) symbols

(* This file is meant to be run as a script. We read one line from the standard
   input channel and delex it. *)

let () =
  delex (input_line stdin);
  print_newline()