aboutsummaryrefslogtreecommitdiffstats
path: root/cil/src/formatlex.mll
diff options
context:
space:
mode:
Diffstat (limited to 'cil/src/formatlex.mll')
-rw-r--r--cil/src/formatlex.mll308
1 files changed, 308 insertions, 0 deletions
diff --git a/cil/src/formatlex.mll b/cil/src/formatlex.mll
new file mode 100644
index 00000000..584a060d
--- /dev/null
+++ b/cil/src/formatlex.mll
@@ -0,0 +1,308 @@
+(*
+ *
+ * Copyright (c) 2001-2002,
+ * George C. Necula <necula@cs.berkeley.edu>
+ * Scott McPeak <smcpeak@cs.berkeley.edu>
+ * Wes Weimer <weimer@cs.berkeley.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. The names of the contributors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *)
+(* A simple lexical analyzer for constructing CIL based on format strings *)
+{
+open Formatparse
+exception Eof
+exception InternalError of string
+module H = Hashtbl
+module E = Errormsg
+(*
+** Keyword hashtable
+*)
+let keywords = H.create 211
+
+(*
+** Useful primitives
+*)
+let scan_ident id =
+ try H.find keywords id
+ with Not_found -> IDENT id (* default to variable name *)
+
+(*
+** Buffer processor
+*)
+
+
+let init ~(prog: string) : Lexing.lexbuf =
+ H.clear keywords;
+ Lexerhack.currentPattern := prog;
+ List.iter
+ (fun (key, token) -> H.add keywords key token)
+ [ ("const", CONST); ("__const", CONST); ("__const__", CONST);
+ ("static", STATIC);
+ ("extern", EXTERN);
+ ("long", LONG);
+ ("short", SHORT);
+ ("signed", SIGNED);
+ ("unsigned", UNSIGNED);
+ ("volatile", VOLATILE);
+ ("char", CHAR);
+ ("int", INT);
+ ("float", FLOAT);
+ ("double", DOUBLE);
+ ("void", VOID);
+ ("enum", ENUM);
+ ("struct", STRUCT);
+ ("typedef", TYPEDEF);
+ ("union", UNION);
+ ("break", BREAK);
+ ("continue", CONTINUE);
+ ("goto", GOTO);
+ ("return", RETURN);
+ ("switch", SWITCH);
+ ("case", CASE);
+ ("default", DEFAULT);
+ ("while", WHILE);
+ ("do", DO);
+ ("for", FOR);
+ ("if", IF);
+ ("else", ELSE);
+ ("__attribute__", ATTRIBUTE); ("__attribute", ATTRIBUTE);
+ ("__int64", INT64);
+ ("__builtin_va_arg", BUILTIN_VA_ARG);
+ ];
+ E.startParsingFromString prog
+
+let finish () =
+ E.finishParsing ()
+
+(*** Error handling ***)
+let error msg =
+ E.parse_error msg
+
+
+(*** escape character management ***)
+let scan_escape str =
+ match str with
+ "n" -> "\n"
+ | "r" -> "\r"
+ | "t" -> "\t"
+ | "b" -> "\b"
+ | "f" -> "\012" (* ASCII code 12 *)
+ | "v" -> "\011" (* ASCII code 11 *)
+ | "a" -> "\007" (* ASCII code 7 *)
+ | "e" -> "\027" (* ASCII code 27. This is a GCC extension *)
+ | _ -> str
+
+let get_value chr =
+ match chr with
+ '0'..'9' -> (Char.code chr) - (Char.code '0')
+ | 'a'..'z' -> (Char.code chr) - (Char.code 'a') + 10
+ | 'A'..'Z' -> (Char.code chr) - (Char.code 'A') + 10
+ | _ -> 0
+let scan_hex_escape str =
+ String.make 1 (Char.chr (
+ (get_value (String.get str 0)) * 16
+ + (get_value (String.get str 1))
+ ))
+let scan_oct_escape str =
+ (* weimer: wide-character constants like L'\400' may be bigger than
+ * 256 (in fact, may be up to 511), so Char.chr cannot be used directly *)
+ let the_value = (get_value (String.get str 0)) * 64
+ + (get_value (String.get str 1)) * 8
+ + (get_value (String.get str 2)) in
+ if the_value < 256 then String.make 1 (Char.chr the_value )
+ else (String.make 1 (Char.chr (the_value / 256))) ^
+ (String.make 1 (Char.chr (the_value mod 256)))
+
+(* ISO standard locale-specific function to convert a wide character
+ * into a sequence of normal characters. Here we work on strings.
+ * We convert L"Hi" to "H\000i\000" *)
+let wbtowc wstr =
+ let len = String.length wstr in
+ let dest = String.make (len * 2) '\000' in
+ for i = 0 to len-1 do
+ dest.[i*2] <- wstr.[i] ;
+ done ;
+ dest
+
+(* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' } *)
+let wstr_to_warray wstr =
+ let len = String.length wstr in
+ let res = ref "{ " in
+ for i = 0 to len-1 do
+ res := !res ^ (Printf.sprintf "L'%c', " wstr.[i])
+ done ;
+ res := !res ^ "}" ;
+ !res
+
+let getArgName (l: Lexing.lexbuf) (prefixlen: int) =
+ let lexeme = Lexing.lexeme l in
+ let ll = String.length lexeme in
+ if ll > prefixlen then
+ String.sub lexeme (prefixlen + 1) (ll - prefixlen - 1)
+ else
+ ""
+}
+
+let decdigit = ['0'-'9']
+let octdigit = ['0'-'7']
+let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
+let letter = ['a'- 'z' 'A'-'Z']
+
+let floatsuffix = ['f' 'F' 'l' 'L']
+
+let usuffix = ['u' 'U']
+let lsuffix = "l"|"L"|"ll"|"LL"
+let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix
+
+let intnum = decdigit+ intsuffix?
+let octnum = '0' octdigit+ intsuffix?
+let hexnum = '0' ['x' 'X'] hexdigit+ intsuffix?
+
+let exponent = ['e' 'E']['+' '-']? decdigit+
+let fraction = '.' decdigit+
+let floatraw = (intnum? fraction)
+ |(intnum exponent)
+ |(intnum? fraction exponent)
+ |(intnum '.')
+ |(intnum '.' exponent)
+let floatnum = floatraw floatsuffix?
+
+let ident = (letter|'_')(letter|decdigit|'_')*
+let attribident = (letter|'_')(letter|decdigit|'_'|':')
+let blank = [' ' '\t' '\012' '\r']
+let escape = '\\' _
+let hex_escape = '\\' ['x' 'X'] hexdigit hexdigit
+let oct_escape = '\\' octdigit octdigit octdigit
+
+
+(* The arguments are of the form %l:foo *)
+let argname = ':' ident
+
+rule initial =
+ parse blank { initial lexbuf}
+| "/*" { let _ = comment lexbuf in
+ initial lexbuf}
+| "//" { endline lexbuf }
+| "\n" { E.newline (); initial lexbuf}
+| floatnum {CST_FLOAT (Lexing.lexeme lexbuf)}
+| hexnum {CST_INT (Lexing.lexeme lexbuf)}
+| octnum {CST_INT (Lexing.lexeme lexbuf)}
+| intnum {CST_INT (Lexing.lexeme lexbuf)}
+
+| "<<=" {INF_INF_EQ}
+| ">>=" {SUP_SUP_EQ}
+| "*=" {STAR_EQ}
+| "/=" {SLASH_EQ}
+| "&=" {AND_EQ}
+| "|=" {PIPE_EQ}
+| "^=" {CIRC_EQ}
+| "%=" {PERCENT_EQ}
+
+
+| "..." {ELLIPSIS}
+| "-=" {MINUS_EQ}
+| "+=" {PLUS_EQ}
+| "*=" {STAR_EQ}
+| "<<" {INF_INF}
+| ">>" {SUP_SUP}
+| "==" {EQ_EQ}
+| "!=" {EXCLAM_EQ}
+| "<=" {INF_EQ}
+| ">=" {SUP_EQ}
+| "=" {EQ}
+| "<" {INF}
+| ">" {SUP}
+| "++" {PLUS_PLUS}
+| "--" {MINUS_MINUS}
+| "->" {ARROW}
+| '+' {PLUS}
+| '-' {MINUS}
+| '*' {STAR}
+| '/' {SLASH}
+| '!' {EXCLAM}
+| '&' {AND}
+| '|' {PIPE}
+| '^' {CIRC}
+| '~' {TILDE}
+| '[' {LBRACKET}
+| ']' {RBRACKET}
+| '{' {LBRACE}
+| '}' {RBRACE}
+| '(' {LPAREN}
+| ')' {RPAREN}
+| ';' {SEMICOLON}
+| ',' {COMMA}
+| '.' {DOT}
+| ':' {COLON}
+| '?' {QUEST}
+| "sizeof" {SIZEOF}
+
+| "%eo" argname {ARG_eo (getArgName lexbuf 3) }
+| "%e" argname {ARG_e (getArgName lexbuf 2) }
+| "%E" argname {ARG_E (getArgName lexbuf 2) }
+| "%u" argname {ARG_u (getArgName lexbuf 2) }
+| "%b" argname {ARG_b (getArgName lexbuf 2) }
+| "%t" argname {ARG_t (getArgName lexbuf 2) }
+| "%d" argname {ARG_d (getArgName lexbuf 2) }
+| "%lo" argname {ARG_lo (getArgName lexbuf 3) }
+| "%l" argname {ARG_l (getArgName lexbuf 2) }
+| "%i" argname {ARG_i (getArgName lexbuf 2) }
+| "%I" argname {ARG_I (getArgName lexbuf 2) }
+| "%o" argname {ARG_o (getArgName lexbuf 2) }
+| "%va" argname {ARG_va (getArgName lexbuf 3) }
+| "%v" argname {ARG_v (getArgName lexbuf 2) }
+| "%k" argname {ARG_k (getArgName lexbuf 2) }
+| "%f" argname {ARG_f (getArgName lexbuf 2) }
+| "%F" argname {ARG_F (getArgName lexbuf 2) }
+| "%p" argname {ARG_p (getArgName lexbuf 2) }
+| "%P" argname {ARG_P (getArgName lexbuf 2) }
+| "%s" argname {ARG_s (getArgName lexbuf 2) }
+| "%S" argname {ARG_S (getArgName lexbuf 2) }
+| "%g" argname {ARG_g (getArgName lexbuf 2) }
+| "%A" argname {ARG_A (getArgName lexbuf 2) }
+| "%c" argname {ARG_c (getArgName lexbuf 2) }
+
+| '%' {PERCENT}
+| ident {scan_ident (Lexing.lexeme lexbuf)}
+| eof {EOF}
+| _ {E.parse_error
+ "Formatlex: Invalid symbol"
+ }
+
+and comment =
+ parse
+ "*/" { () }
+| '\n' { E.newline (); comment lexbuf }
+| _ { comment lexbuf }
+
+
+and endline = parse
+ '\n' { E.newline (); initial lexbuf}
+| _ { endline lexbuf}