diff options
Diffstat (limited to 'src/lfsc/lfscLexer.mll')
-rw-r--r-- | src/lfsc/lfscLexer.mll | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/src/lfsc/lfscLexer.mll b/src/lfsc/lfscLexer.mll new file mode 100644 index 0000000..3e8d5f9 --- /dev/null +++ b/src/lfsc/lfscLexer.mll @@ -0,0 +1,357 @@ +{ +(**************************************************************************) +(* *) +(* SMTCoq *) +(* Copyright (C) 2011 - 2019 *) +(* *) +(* See file "AUTHORS" for the list of authors *) +(* *) +(* This file is distributed under the terms of the CeCILL-C licence *) +(* *) +(**************************************************************************) + + +(* This parser is adapted from Jane Street sexplib parser *) + + open Printf + open Lexing + open LfscParser + + let char_for_backslash = function + | 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + + let lf = '\010' + + let dec_code c1 c2 c3 = + 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48) + + let hex_code c1 c2 = + let d1 = Char.code c1 in + let val1 = + if d1 >= 97 then d1 - 87 + else if d1 >= 65 then d1 - 55 + else d1 - 48 in + let d2 = Char.code c2 in + let val2 = + if d2 >= 97 then d2 - 87 + else if d2 >= 65 then d2 - 55 + else d2 - 48 in + val1 * 16 + val2 + + let found_newline ({ lex_curr_p; _ } as lexbuf) diff = + lexbuf.lex_curr_p <- + { + lex_curr_p with + pos_lnum = lex_curr_p.pos_lnum + 1; + pos_bol = lex_curr_p.pos_cnum - diff; + } + + (* same length computation as in [Lexing.lexeme] *) + let lexeme_len { lex_start_pos; lex_curr_pos; _ } = lex_curr_pos - lex_start_pos + + let main_failure lexbuf msg = + let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_start_p lexbuf in + let msg = + sprintf + "Sexplib.Lexer.main: %s at line %d char %d" + msg pos_lnum (pos_cnum - pos_bol) + in + failwith msg + + module type T = sig + module Quoted_string_buffer : sig + type t + val create : int -> t + val add_char : t -> char -> unit + val add_substring : t -> string -> int -> int -> unit + val add_lexeme : t -> lexbuf -> unit + val clear : t -> unit + val of_buffer : Buffer.t -> t + end + module Token : sig + type t + val lparen : t + val rparen : t + val lambda : t + val biglam : t + val pi : t + val colon : t + val hole : t + val sc : t + val at : t + val integer : string -> t + val ident : string -> t + val eof : t + val simple_string : string -> t + val hash_semi : t + val quoted_string : Lexing.position -> Quoted_string_buffer.t -> t + type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t + val comment : string -> main:s -> s + val block_comment : Lexing.position -> main:s -> s + end + end + + + (* Create and populate a hashtable *) + let mk_hashtbl init = + let tbl = List.length init |> Hashtbl.create in + init |> List.iter (fun (k, v) -> Hashtbl.add tbl k v) ; + tbl + + let keywords = mk_hashtbl [ + ("check", CHECK); + ("define", DEFINE); + ("declare", DECLARE); + ("type", TYPE); + ("kind", KIND); + ("mpz", MPZ); + ("mpq", MPQ); + ("program", PROGRAM); + ("unsat", UNSAT); + ("sat", SAT); + ] + + module Make (X : T) : sig + val main : ?buf:Buffer.t -> Lexing.lexbuf -> X.Token.t + end = struct (* BEGIN FUNCTOR BODY CONTAINING GENERATED CODE *) + open X + +} + +let lf = '\010' +let lf_cr = ['\010' '\013'] +let dos_newline = "\013\010" +let blank = [' ' '\009' '\012'] +let unquoted = [^ ';' '(' ')' '"' '\\' ':' '@' '!' ] # blank # lf_cr +let digit = ['0'-'9'] +let hexdigit = digit | ['a'-'f' 'A'-'F'] + +let unquoted_start = + unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#'] + +let integer = digit+ +let ident = ('_')* ['a'-'z' 'A'-'Z' '\'' ]['a'-'z' 'A'-'Z' '0'-'9' '\\' '_']* + + +rule main buf = parse + | lf | dos_newline { found_newline lexbuf 0; + main buf lexbuf } + | blank+ { main buf lexbuf } + | (';' (_ # lf_cr)*) as text { Token.comment text ~main buf lexbuf } + | '(' { Token.lparen } + | ')' { Token.rparen } + | '\\' { Token.lambda } + | '!' { Token.pi } + | '%' { Token.biglam } + | '_' { Token.hole } + | ':' { Token.colon } + | '^' { Token.sc } + | '@' { Token.at } + | '(' '~' (integer as i) ')' {Token.integer ("-"^i) } + | integer as i { Token.integer i } + | '"' + { + let pos = Lexing.lexeme_start_p lexbuf in + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf pos lexbuf; + let tok = Token.quoted_string pos buf in + Quoted_string_buffer.clear buf; + tok + } + | "#;" { Token.hash_semi } + | "#|" + { + let pos = Lexing.lexeme_start_p lexbuf in + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_block_comment buf [pos] lexbuf; + let tok = Token.block_comment pos ~main buf lexbuf in + Quoted_string_buffer.clear buf; + tok + } + | "|#" { main_failure lexbuf "illegal end of comment" } + | "#" "#"+ "|" unquoted* (* unquoted_start can match ##, so ##| (which should be + refused) would not not be parsed by this case if the regexp + on the left was not there *) + | "|" "|"+ "#" unquoted* + | unquoted_start unquoted* ("#|" | "|#") unquoted* + { main_failure lexbuf "comment tokens in unquoted atom" } + | "#" | "|" | unquoted_start unquoted* as str { Token.simple_string str } + | eof { Token.eof } + +and scan_string buf start = parse + | '"' { Quoted_string_buffer.add_lexeme buf lexbuf; () } + | '\\' lf [' ' '\t']* + { + let len = lexeme_len lexbuf - 2 in + found_newline lexbuf len; + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | '\\' dos_newline [' ' '\t']* + { + let len = lexeme_len lexbuf - 3 in + found_newline lexbuf len; + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c) + { + Quoted_string_buffer.add_char buf (char_for_backslash c); + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | '\\' (digit as c1) (digit as c2) (digit as c3) + { + let v = dec_code c1 c2 c3 in + if v > 255 then ( + let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_end_p lexbuf in + let msg = + sprintf + "Sexplib.Lexer.scan_string: \ + illegal escape at line %d char %d: `\\%c%c%c'" + pos_lnum (pos_cnum - pos_bol - 3) + c1 c2 c3 in + failwith msg); + Quoted_string_buffer.add_char buf (Char.chr v); + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | '\\' 'x' (hexdigit as c1) (hexdigit as c2) + { + let v = hex_code c1 c2 in + Quoted_string_buffer.add_char buf (Char.chr v); + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | '\\' (_ as c) + { + Quoted_string_buffer.add_char buf '\\'; + Quoted_string_buffer.add_char buf c; + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | lf + { + found_newline lexbuf 0; + Quoted_string_buffer.add_char buf lf; + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | ([^ '\\' '"'] # lf)+ + { + let ofs = lexbuf.lex_start_pos in + let len = lexbuf.lex_curr_pos - ofs in + Quoted_string_buffer.add_substring buf lexbuf.lex_buffer ofs len; + Quoted_string_buffer.add_lexeme buf lexbuf; + scan_string buf start lexbuf + } + | eof + { + let msg = + sprintf + "Sexplib.Lexer.scan_string: unterminated string at line %d char %d" + start.pos_lnum (start.pos_cnum - start.pos_bol) + in + failwith msg + } + +and scan_block_comment buf locs = parse + | ('#'* | '|'*) lf + { Quoted_string_buffer.add_lexeme buf lexbuf; + found_newline lexbuf 0; scan_block_comment buf locs lexbuf } + | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+ + { Quoted_string_buffer.add_lexeme buf lexbuf; + scan_block_comment buf locs lexbuf } + | ('#'* | '|'*) '"' + { + Quoted_string_buffer.add_lexeme buf lexbuf; + let cur = lexeme_end_p lexbuf in + let start = { cur with pos_cnum = cur.pos_cnum - 1 } in + scan_string buf start lexbuf; + scan_block_comment buf locs lexbuf + } + | '#'+ '|' + { + Quoted_string_buffer.add_lexeme buf lexbuf; + let cur = lexeme_end_p lexbuf in + let start = { cur with pos_cnum = cur.pos_cnum - 2 } in + scan_block_comment buf (start :: locs) lexbuf + } + | '|'+ '#' + { + Quoted_string_buffer.add_lexeme buf lexbuf; + match locs with + | [_] -> () (* the comment is finished *) + | _ :: (_ :: _ as t) -> scan_block_comment buf t lexbuf + | [] -> assert false (* impossible *) + } + | eof + { + match locs with + | [] -> assert false + | { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } :: _ -> + let msg = + sprintf "Sexplib.Lexer.scan_block_comment: \ + unterminated block comment at line %d char %d" + pos_lnum (pos_cnum - pos_bol) + in + failwith msg + } + +{ (* RESUME FUNCTOR BODY CONTAINING GENERATED CODE *) + + let main ?buf = + let buf = + match buf with + | None -> Quoted_string_buffer.create 64 + | Some buf -> + Buffer.clear buf; + Quoted_string_buffer.of_buffer buf + in + main buf + + end (* END FUNCTOR BODY CONTAINING GENERATED CODE *) + + module Vanilla = + Make (struct + module Quoted_string_buffer = struct + include Buffer + let add_lexeme _ _ = () + let of_buffer b = b + end + module Token = struct + open LfscParser + type t = token + type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t + let eof = EOF + let lparen = LPAREN + let rparen = RPAREN + let lambda = LAMBDA + let pi = PI + let biglam = BIGLAMBDA + let hole = HOLE + let colon = COLON + let sc = SC + let at = AT + let hash_semi = HASH_SEMI + let integer i = INT (Big_int.big_int_of_string i) + let ident i = + try Hashtbl.find keywords i with Not_found -> STRING i + let simple_string x = + try Hashtbl.find keywords x with Not_found -> STRING x + let quoted_string _ buf = STRING (Buffer.contents buf) + let block_comment _pos ~main buf lexbuf = + main buf lexbuf + let comment _text ~main buf lexbuf = + main buf lexbuf (* skip and continue lexing *) + end + end) + + + let main = Vanilla.main + +} |