aboutsummaryrefslogtreecommitdiffstats
path: root/src/lfsc/lfscLexer.mll
diff options
context:
space:
mode:
Diffstat (limited to 'src/lfsc/lfscLexer.mll')
-rw-r--r--src/lfsc/lfscLexer.mll357
1 files changed, 357 insertions, 0 deletions
diff --git a/src/lfsc/lfscLexer.mll b/src/lfsc/lfscLexer.mll
new file mode 100644
index 0000000..3e8d5f9
--- /dev/null
+++ b/src/lfsc/lfscLexer.mll
@@ -0,0 +1,357 @@
+{
+(**************************************************************************)
+(* *)
+(* SMTCoq *)
+(* Copyright (C) 2011 - 2019 *)
+(* *)
+(* See file "AUTHORS" for the list of authors *)
+(* *)
+(* This file is distributed under the terms of the CeCILL-C licence *)
+(* *)
+(**************************************************************************)
+
+
+(* This parser is adapted from Jane Street sexplib parser *)
+
+ open Printf
+ open Lexing
+ open LfscParser
+
+ let char_for_backslash = function
+ | 'n' -> '\010'
+ | 'r' -> '\013'
+ | 'b' -> '\008'
+ | 't' -> '\009'
+ | c -> c
+
+ let lf = '\010'
+
+ let dec_code c1 c2 c3 =
+ 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
+
+ let hex_code c1 c2 =
+ let d1 = Char.code c1 in
+ let val1 =
+ if d1 >= 97 then d1 - 87
+ else if d1 >= 65 then d1 - 55
+ else d1 - 48 in
+ let d2 = Char.code c2 in
+ let val2 =
+ if d2 >= 97 then d2 - 87
+ else if d2 >= 65 then d2 - 55
+ else d2 - 48 in
+ val1 * 16 + val2
+
+ let found_newline ({ lex_curr_p; _ } as lexbuf) diff =
+ lexbuf.lex_curr_p <-
+ {
+ lex_curr_p with
+ pos_lnum = lex_curr_p.pos_lnum + 1;
+ pos_bol = lex_curr_p.pos_cnum - diff;
+ }
+
+ (* same length computation as in [Lexing.lexeme] *)
+ let lexeme_len { lex_start_pos; lex_curr_pos; _ } = lex_curr_pos - lex_start_pos
+
+ let main_failure lexbuf msg =
+ let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_start_p lexbuf in
+ let msg =
+ sprintf
+ "Sexplib.Lexer.main: %s at line %d char %d"
+ msg pos_lnum (pos_cnum - pos_bol)
+ in
+ failwith msg
+
+ module type T = sig
+ module Quoted_string_buffer : sig
+ type t
+ val create : int -> t
+ val add_char : t -> char -> unit
+ val add_substring : t -> string -> int -> int -> unit
+ val add_lexeme : t -> lexbuf -> unit
+ val clear : t -> unit
+ val of_buffer : Buffer.t -> t
+ end
+ module Token : sig
+ type t
+ val lparen : t
+ val rparen : t
+ val lambda : t
+ val biglam : t
+ val pi : t
+ val colon : t
+ val hole : t
+ val sc : t
+ val at : t
+ val integer : string -> t
+ val ident : string -> t
+ val eof : t
+ val simple_string : string -> t
+ val hash_semi : t
+ val quoted_string : Lexing.position -> Quoted_string_buffer.t -> t
+ type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t
+ val comment : string -> main:s -> s
+ val block_comment : Lexing.position -> main:s -> s
+ end
+ end
+
+
+ (* Create and populate a hashtable *)
+ let mk_hashtbl init =
+ let tbl = List.length init |> Hashtbl.create in
+ init |> List.iter (fun (k, v) -> Hashtbl.add tbl k v) ;
+ tbl
+
+ let keywords = mk_hashtbl [
+ ("check", CHECK);
+ ("define", DEFINE);
+ ("declare", DECLARE);
+ ("type", TYPE);
+ ("kind", KIND);
+ ("mpz", MPZ);
+ ("mpq", MPQ);
+ ("program", PROGRAM);
+ ("unsat", UNSAT);
+ ("sat", SAT);
+ ]
+
+ module Make (X : T) : sig
+ val main : ?buf:Buffer.t -> Lexing.lexbuf -> X.Token.t
+ end = struct (* BEGIN FUNCTOR BODY CONTAINING GENERATED CODE *)
+ open X
+
+}
+
+let lf = '\010'
+let lf_cr = ['\010' '\013']
+let dos_newline = "\013\010"
+let blank = [' ' '\009' '\012']
+let unquoted = [^ ';' '(' ')' '"' '\\' ':' '@' '!' ] # blank # lf_cr
+let digit = ['0'-'9']
+let hexdigit = digit | ['a'-'f' 'A'-'F']
+
+let unquoted_start =
+ unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#']
+
+let integer = digit+
+let ident = ('_')* ['a'-'z' 'A'-'Z' '\'' ]['a'-'z' 'A'-'Z' '0'-'9' '\\' '_']*
+
+
+rule main buf = parse
+ | lf | dos_newline { found_newline lexbuf 0;
+ main buf lexbuf }
+ | blank+ { main buf lexbuf }
+ | (';' (_ # lf_cr)*) as text { Token.comment text ~main buf lexbuf }
+ | '(' { Token.lparen }
+ | ')' { Token.rparen }
+ | '\\' { Token.lambda }
+ | '!' { Token.pi }
+ | '%' { Token.biglam }
+ | '_' { Token.hole }
+ | ':' { Token.colon }
+ | '^' { Token.sc }
+ | '@' { Token.at }
+ | '(' '~' (integer as i) ')' {Token.integer ("-"^i) }
+ | integer as i { Token.integer i }
+ | '"'
+ {
+ let pos = Lexing.lexeme_start_p lexbuf in
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf pos lexbuf;
+ let tok = Token.quoted_string pos buf in
+ Quoted_string_buffer.clear buf;
+ tok
+ }
+ | "#;" { Token.hash_semi }
+ | "#|"
+ {
+ let pos = Lexing.lexeme_start_p lexbuf in
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_block_comment buf [pos] lexbuf;
+ let tok = Token.block_comment pos ~main buf lexbuf in
+ Quoted_string_buffer.clear buf;
+ tok
+ }
+ | "|#" { main_failure lexbuf "illegal end of comment" }
+ | "#" "#"+ "|" unquoted* (* unquoted_start can match ##, so ##| (which should be
+ refused) would not not be parsed by this case if the regexp
+ on the left was not there *)
+ | "|" "|"+ "#" unquoted*
+ | unquoted_start unquoted* ("#|" | "|#") unquoted*
+ { main_failure lexbuf "comment tokens in unquoted atom" }
+ | "#" | "|" | unquoted_start unquoted* as str { Token.simple_string str }
+ | eof { Token.eof }
+
+and scan_string buf start = parse
+ | '"' { Quoted_string_buffer.add_lexeme buf lexbuf; () }
+ | '\\' lf [' ' '\t']*
+ {
+ let len = lexeme_len lexbuf - 2 in
+ found_newline lexbuf len;
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | '\\' dos_newline [' ' '\t']*
+ {
+ let len = lexeme_len lexbuf - 3 in
+ found_newline lexbuf len;
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
+ {
+ Quoted_string_buffer.add_char buf (char_for_backslash c);
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | '\\' (digit as c1) (digit as c2) (digit as c3)
+ {
+ let v = dec_code c1 c2 c3 in
+ if v > 255 then (
+ let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_end_p lexbuf in
+ let msg =
+ sprintf
+ "Sexplib.Lexer.scan_string: \
+ illegal escape at line %d char %d: `\\%c%c%c'"
+ pos_lnum (pos_cnum - pos_bol - 3)
+ c1 c2 c3 in
+ failwith msg);
+ Quoted_string_buffer.add_char buf (Char.chr v);
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | '\\' 'x' (hexdigit as c1) (hexdigit as c2)
+ {
+ let v = hex_code c1 c2 in
+ Quoted_string_buffer.add_char buf (Char.chr v);
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | '\\' (_ as c)
+ {
+ Quoted_string_buffer.add_char buf '\\';
+ Quoted_string_buffer.add_char buf c;
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | lf
+ {
+ found_newline lexbuf 0;
+ Quoted_string_buffer.add_char buf lf;
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | ([^ '\\' '"'] # lf)+
+ {
+ let ofs = lexbuf.lex_start_pos in
+ let len = lexbuf.lex_curr_pos - ofs in
+ Quoted_string_buffer.add_substring buf lexbuf.lex_buffer ofs len;
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_string buf start lexbuf
+ }
+ | eof
+ {
+ let msg =
+ sprintf
+ "Sexplib.Lexer.scan_string: unterminated string at line %d char %d"
+ start.pos_lnum (start.pos_cnum - start.pos_bol)
+ in
+ failwith msg
+ }
+
+and scan_block_comment buf locs = parse
+ | ('#'* | '|'*) lf
+ { Quoted_string_buffer.add_lexeme buf lexbuf;
+ found_newline lexbuf 0; scan_block_comment buf locs lexbuf }
+ | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+
+ { Quoted_string_buffer.add_lexeme buf lexbuf;
+ scan_block_comment buf locs lexbuf }
+ | ('#'* | '|'*) '"'
+ {
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ let cur = lexeme_end_p lexbuf in
+ let start = { cur with pos_cnum = cur.pos_cnum - 1 } in
+ scan_string buf start lexbuf;
+ scan_block_comment buf locs lexbuf
+ }
+ | '#'+ '|'
+ {
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ let cur = lexeme_end_p lexbuf in
+ let start = { cur with pos_cnum = cur.pos_cnum - 2 } in
+ scan_block_comment buf (start :: locs) lexbuf
+ }
+ | '|'+ '#'
+ {
+ Quoted_string_buffer.add_lexeme buf lexbuf;
+ match locs with
+ | [_] -> () (* the comment is finished *)
+ | _ :: (_ :: _ as t) -> scan_block_comment buf t lexbuf
+ | [] -> assert false (* impossible *)
+ }
+ | eof
+ {
+ match locs with
+ | [] -> assert false
+ | { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } :: _ ->
+ let msg =
+ sprintf "Sexplib.Lexer.scan_block_comment: \
+ unterminated block comment at line %d char %d"
+ pos_lnum (pos_cnum - pos_bol)
+ in
+ failwith msg
+ }
+
+{ (* RESUME FUNCTOR BODY CONTAINING GENERATED CODE *)
+
+ let main ?buf =
+ let buf =
+ match buf with
+ | None -> Quoted_string_buffer.create 64
+ | Some buf ->
+ Buffer.clear buf;
+ Quoted_string_buffer.of_buffer buf
+ in
+ main buf
+
+ end (* END FUNCTOR BODY CONTAINING GENERATED CODE *)
+
+ module Vanilla =
+ Make (struct
+ module Quoted_string_buffer = struct
+ include Buffer
+ let add_lexeme _ _ = ()
+ let of_buffer b = b
+ end
+ module Token = struct
+ open LfscParser
+ type t = token
+ type s = Quoted_string_buffer.t -> Lexing.lexbuf -> t
+ let eof = EOF
+ let lparen = LPAREN
+ let rparen = RPAREN
+ let lambda = LAMBDA
+ let pi = PI
+ let biglam = BIGLAMBDA
+ let hole = HOLE
+ let colon = COLON
+ let sc = SC
+ let at = AT
+ let hash_semi = HASH_SEMI
+ let integer i = INT (Big_int.big_int_of_string i)
+ let ident i =
+ try Hashtbl.find keywords i with Not_found -> STRING i
+ let simple_string x =
+ try Hashtbl.find keywords x with Not_found -> STRING x
+ let quoted_string _ buf = STRING (Buffer.contents buf)
+ let block_comment _pos ~main buf lexbuf =
+ main buf lexbuf
+ let comment _text ~main buf lexbuf =
+ main buf lexbuf (* skip and continue lexing *)
+ end
+ end)
+
+
+ let main = Vanilla.main
+
+}