{ (**************************************************************************) (* *) (* SMTCoq *) (* Copyright (C) 2011 - 2022 *) (* *) (* See file "AUTHORS" for the list of authors *) (* *) (* This file is distributed under the terms of the CeCILL-C licence *) (* *) (**************************************************************************) (* This parser is adapted from Jane Street sexplib parser *) open Printf open Lexing open LfscParser module type T = sig module Quoted_string_buffer : sig type t val create : int -> t val add_char : t -> char -> unit val add_substring : t -> string -> int -> int -> unit val add_lexeme : t -> lexbuf -> unit val clear : t -> unit val of_buffer : Buffer.t -> t end module Token : sig type t val lparen : t val rparen : t val lambda : t val biglam : t val pi : t val colon : t val hole : t val sc : t val at : t val integer : string -> t val ident : string -> t val eof : t val simple_string : string -> t val hash_semi : t val quoted_string : Lexing.position -> Quoted_string_buffer.t -> t type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t val comment : string -> main:s -> s val block_comment : Lexing.position -> main:s -> s end end (* Create and populate a hashtable *) let mk_hashtbl init = let tbl = List.length init |> Hashtbl.create in init |> List.iter (fun (k, v) -> Hashtbl.add tbl k v) ; tbl let keywords = mk_hashtbl [ ("check", CHECK); ("define", DEFINE); ("declare", DECLARE); ("type", TYPE); ("kind", KIND); ("mpz", MPZ); ("mpq", MPQ); ("program", PROGRAM); ("unsat", UNSAT); ("sat", SAT); ] module Make (X : T) : sig val main : ?buf:Buffer.t -> Lexing.lexbuf -> X.Token.t end = struct (* BEGIN FUNCTOR BODY CONTAINING GENERATED CODE *) open X } let lf = '\010' let lf_cr = ['\010' '\013'] let dos_newline = "\013\010" let blank = [' ' '\009' '\012'] let unquoted = [^ ';' '(' ')' '"' '\\' ':' '@' '!' ] # blank # lf_cr let digit = ['0'-'9'] let hexdigit = digit | ['a'-'f' 'A'-'F'] let unquoted_start = unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#'] let integer = digit+ let ident = ('_')* ['a'-'z' 'A'-'Z' '\'' ]['a'-'z' 'A'-'Z' '0'-'9' '\\' '_']* rule main buf = parse | lf | dos_newline { SmtMisc.found_newline lexbuf 0; main buf lexbuf } | blank+ { main buf lexbuf } | (';' (_ # lf_cr)*) as text { Token.comment text ~main buf lexbuf } | '(' { Token.lparen } | ')' { Token.rparen } | '\\' { Token.lambda } | '!' { Token.pi } | '%' { Token.biglam } | '_' { Token.hole } | ':' { Token.colon } | '^' { Token.sc } | '@' { Token.at } | '(' '~' (integer as i) ')' {Token.integer ("-"^i) } | integer as i { Token.integer i } | '"' { let pos = Lexing.lexeme_start_p lexbuf in Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf pos lexbuf; let tok = Token.quoted_string pos buf in Quoted_string_buffer.clear buf; tok } | "#;" { Token.hash_semi } | "#|" { let pos = Lexing.lexeme_start_p lexbuf in Quoted_string_buffer.add_lexeme buf lexbuf; scan_block_comment buf [pos] lexbuf; let tok = Token.block_comment pos ~main buf lexbuf in Quoted_string_buffer.clear buf; tok } | "|#" { SmtMisc.main_failure lexbuf "illegal end of comment" } | "#" "#"+ "|" unquoted* (* unquoted_start can match ##, so ##| (which should be refused) would not not be parsed by this case if the regexp on the left was not there *) | "|" "|"+ "#" unquoted* | unquoted_start unquoted* ("#|" | "|#") unquoted* { SmtMisc.main_failure lexbuf "comment tokens in unquoted atom" } | "#" | "|" | unquoted_start unquoted* as str { Token.simple_string str } | eof { Token.eof } and scan_string buf start = parse | '"' { Quoted_string_buffer.add_lexeme buf lexbuf; () } | '\\' lf [' ' '\t']* { let len = SmtMisc.lexeme_len lexbuf - 2 in SmtMisc.found_newline lexbuf len; Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | '\\' dos_newline [' ' '\t']* { let len = SmtMisc.lexeme_len lexbuf - 3 in SmtMisc.found_newline lexbuf len; Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c) { Quoted_string_buffer.add_char buf (SmtMisc.char_for_backslash c); Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | '\\' (digit as c1) (digit as c2) (digit as c3) { let v = SmtMisc.dec_code c1 c2 c3 in if v > 255 then ( let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_end_p lexbuf in let msg = sprintf "Sexplib.Lexer.scan_string: \ illegal escape at line %d char %d: `\\%c%c%c'" pos_lnum (pos_cnum - pos_bol - 3) c1 c2 c3 in failwith msg); Quoted_string_buffer.add_char buf (Char.chr v); Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | '\\' 'x' (hexdigit as c1) (hexdigit as c2) { let v = SmtMisc.hex_code c1 c2 in Quoted_string_buffer.add_char buf (Char.chr v); Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | '\\' (_ as c) { Quoted_string_buffer.add_char buf '\\'; Quoted_string_buffer.add_char buf c; Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | lf { SmtMisc.found_newline lexbuf 0; Quoted_string_buffer.add_char buf SmtMisc.lf; Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | ([^ '\\' '"'] # lf)+ { let ofs = lexbuf.lex_start_pos in let len = lexbuf.lex_curr_pos - ofs in Quoted_string_buffer.add_substring buf (Bytes.to_string lexbuf.lex_buffer) ofs len; Quoted_string_buffer.add_lexeme buf lexbuf; scan_string buf start lexbuf } | eof { let msg = sprintf "Sexplib.Lexer.scan_string: unterminated string at line %d char %d" start.pos_lnum (start.pos_cnum - start.pos_bol) in failwith msg } and scan_block_comment buf locs = parse | ('#'* | '|'*) lf { Quoted_string_buffer.add_lexeme buf lexbuf; SmtMisc.found_newline lexbuf 0; scan_block_comment buf locs lexbuf } | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+ { Quoted_string_buffer.add_lexeme buf lexbuf; scan_block_comment buf locs lexbuf } | ('#'* | '|'*) '"' { Quoted_string_buffer.add_lexeme buf lexbuf; let cur = lexeme_end_p lexbuf in let start = { cur with pos_cnum = cur.pos_cnum - 1 } in scan_string buf start lexbuf; scan_block_comment buf locs lexbuf } | '#'+ '|' { Quoted_string_buffer.add_lexeme buf lexbuf; let cur = lexeme_end_p lexbuf in let start = { cur with pos_cnum = cur.pos_cnum - 2 } in scan_block_comment buf (start :: locs) lexbuf } | '|'+ '#' { Quoted_string_buffer.add_lexeme buf lexbuf; match locs with | [_] -> () (* the comment is finished *) | _ :: (_ :: _ as t) -> scan_block_comment buf t lexbuf | [] -> assert false (* impossible *) } | eof { match locs with | [] -> assert false | { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } :: _ -> let msg = sprintf "Sexplib.Lexer.scan_block_comment: \ unterminated block comment at line %d char %d" pos_lnum (pos_cnum - pos_bol) in failwith msg } { (* RESUME FUNCTOR BODY CONTAINING GENERATED CODE *) let main ?buf = let buf = match buf with | None -> Quoted_string_buffer.create 64 | Some buf -> Buffer.clear buf; Quoted_string_buffer.of_buffer buf in main buf end (* END FUNCTOR BODY CONTAINING GENERATED CODE *) module Vanilla = Make (struct module Quoted_string_buffer = struct include Buffer let add_lexeme _ _ = () let of_buffer b = b end module Token = struct open LfscParser type t = token type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t let eof = EOF let lparen = LPAREN let rparen = RPAREN let lambda = LAMBDA let pi = PI let biglam = BIGLAMBDA let hole = HOLE let colon = COLON let sc = SC let at = AT let hash_semi = HASH_SEMI let integer i = INT (Big_int.big_int_of_string i) let ident i = try Hashtbl.find keywords i with Not_found -> STRING i let simple_string x = try Hashtbl.find keywords x with Not_found -> STRING x let quoted_string _ buf = STRING (Buffer.contents buf) let block_comment _pos ~main buf lexbuf = main buf lexbuf let comment _text ~main buf lexbuf = main buf lexbuf (* skip and continue lexing *) end end) let main = Vanilla.main }