diff options
Diffstat (limited to 'src/smtlib2/sExprLexer.mll')
-rw-r--r-- | src/smtlib2/sExprLexer.mll | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/src/smtlib2/sExprLexer.mll b/src/smtlib2/sExprLexer.mll new file mode 100644 index 0000000..6d0cd22 --- /dev/null +++ b/src/smtlib2/sExprLexer.mll @@ -0,0 +1,297 @@ +(**************************************************************************) +(* *) +(* SMTCoq *) +(* Copyright (C) 2011 - 2019 *) +(* *) +(* See file "AUTHORS" for the list of authors *) +(* *) +(* This file is distributed under the terms of the CeCILL-C licence *) +(* *) +(**************************************************************************) + + +(* Lexer for S-expressions + + Adapted from the OCaml sexplib, which is part of the ocaml-core + alternative standard library for OCaml. + +*) + +{ + (** Lexer: Lexer Specification for S-expressions *) + + open Printf + open Lexing + open SExprParser + + let char_for_backslash = function + | 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + + let lf = '\010' + + let dec_code c1 c2 c3 = + 100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48) + + let hex_code c1 c2 = + let d1 = Char.code c1 in + let val1 = + if d1 >= 97 then d1 - 87 + else if d1 >= 65 then d1 - 55 + else d1 - 48 in + let d2 = Char.code c2 in + let val2 = + if d2 >= 97 then d2 - 87 + else if d2 >= 65 then d2 - 55 + else d2 - 48 in + val1 * 16 + val2 + + let found_newline ({ lex_curr_p; _ } as lexbuf) diff = + lexbuf.lex_curr_p <- + { + lex_curr_p with + pos_lnum = lex_curr_p.pos_lnum + 1; + pos_bol = lex_curr_p.pos_cnum - diff; + } + + (* same length computation as in [Lexing.lexeme] *) + let lexeme_len { lex_start_pos; lex_curr_pos; _ } = lex_curr_pos - lex_start_pos + + let main_failure lexbuf msg = + let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_start_p lexbuf in + let msg = + sprintf + "Sexplib.Lexer.main: %s at line %d char %d" + msg pos_lnum (pos_cnum - pos_bol) + in + failwith msg +} + +let lf = '\010' +let lf_cr = ['\010' '\013'] +let dos_newline = "\013\010" +let blank = [' ' '\009' '\012'] +let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr +let digit = ['0'-'9'] +let hexdigit = digit | ['a'-'f' 'A'-'F'] + +let unquoted_start = + unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#'] + +rule main buf = parse + | lf | dos_newline { found_newline lexbuf 0; main buf lexbuf } + | blank+ | ';' (_ # lf_cr)* { main buf lexbuf } + | '(' { LPAREN } + | ')' { RPAREN } + | '"' + { + scan_string buf (lexeme_start_p lexbuf) lexbuf; + let str = Buffer.contents buf in + Buffer.clear buf; + STRING str + } + | "#;" { HASH_SEMI } + | "#|" + { + scan_block_comment buf [lexeme_start_p lexbuf] lexbuf; + main buf lexbuf + } + | "|#" { main_failure lexbuf "illegal end of comment" } + | '|' + { + scan_quoted buf (lexeme_start_p lexbuf) lexbuf; + let str = Buffer.contents buf in + Buffer.clear buf; + STRING ("|"^ str ^"|") + } + | unquoted_start unquoted* ("#|" | "|#") unquoted* + { main_failure lexbuf "comment tokens in unquoted atom" } + | "#" | unquoted_start unquoted* as str { STRING str } + | eof { EOF } + +and scan_string buf start = parse + | '"' { () } + | '\\' lf [' ' '\t']* + { + found_newline lexbuf (lexeme_len lexbuf - 2); + scan_string buf start lexbuf + } + | '\\' dos_newline [' ' '\t']* + { + found_newline lexbuf (lexeme_len lexbuf - 3); + scan_string buf start lexbuf + } + | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c) + { + Buffer.add_char buf (char_for_backslash c); + scan_string buf start lexbuf + } + | '\\' (digit as c1) (digit as c2) (digit as c3) + { + let v = dec_code c1 c2 c3 in + if v > 255 then ( + let { pos_lnum; pos_bol; pos_cnum; _ } = lexeme_end_p lexbuf in + let msg = + sprintf + "Sexplib.Lexer.scan_string: \ + illegal escape at line %d char %d: `\\%c%c%c'" + pos_lnum (pos_cnum - pos_bol - 3) + c1 c2 c3 in + failwith msg); + Buffer.add_char buf (Char.chr v); + scan_string buf start lexbuf + } + | '\\' 'x' (hexdigit as c1) (hexdigit as c2) + { + let v = hex_code c1 c2 in + Buffer.add_char buf (Char.chr v); + scan_string buf start lexbuf + } + | '\\' (_ as c) + { + Buffer.add_char buf '\\'; + Buffer.add_char buf c; + scan_string buf start lexbuf + } + | lf + { + found_newline lexbuf 0; + Buffer.add_char buf lf; + scan_string buf start lexbuf + } + | ([^ '\\' '"'] # lf)+ + { + Buffer.add_string buf (lexeme lexbuf); + scan_string buf start lexbuf + } + | eof + { + let msg = + sprintf + "Sexplib.Lexer.scan_string: unterminated string at line %d char %d" + start.pos_lnum (start.pos_cnum - start.pos_bol) + in + failwith msg + } + +and scan_quoted buf start = parse + | '|' { () } + | '\\' lf [' ' '\t']* + { + found_newline lexbuf (lexeme_len lexbuf - 2); + scan_quoted buf start lexbuf + } + | '\\' dos_newline [' ' '\t']* + { + found_newline lexbuf (lexeme_len lexbuf - 3); + scan_quoted buf start lexbuf + } + | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' ' '|'] as c) + { + Buffer.add_char buf (char_for_backslash c); + scan_quoted buf start lexbuf + } + | '\\' (digit as c1) (digit as c2) (digit as c3) + { + let v = dec_code c1 c2 c3 in + if v > 255 then ( + let { pos_lnum; pos_bol; pos_cnum; _ } = lexeme_end_p lexbuf in + let msg = + sprintf + "Sexplib.Lexer.scan_quoted: \ + illegal escape at line %d char %d: `\\%c%c%c'" + pos_lnum (pos_cnum - pos_bol - 3) + c1 c2 c3 in + failwith msg); + Buffer.add_char buf (Char.chr v); + scan_quoted buf start lexbuf + } + | '\\' 'x' (hexdigit as c1) (hexdigit as c2) + { + let v = hex_code c1 c2 in + Buffer.add_char buf (Char.chr v); + scan_quoted buf start lexbuf + } + | '\\' (_ as c) + { + Buffer.add_char buf '\\'; + Buffer.add_char buf c; + scan_quoted buf start lexbuf + } + | lf + { + found_newline lexbuf 0; + Buffer.add_char buf lf; + scan_quoted buf start lexbuf + } + | ([^ '\\' '|'] # lf)+ + { + Buffer.add_string buf (lexeme lexbuf); + scan_quoted buf start lexbuf + } + | eof + { + let msg = + sprintf + "Sexplib.Lexer.scan_quoted: unterminated ident at line %d char %d" + start.pos_lnum (start.pos_cnum - start.pos_bol) + in + failwith msg + } + +and scan_block_comment buf locs = parse + | ('#'* | '|'*) lf + { found_newline lexbuf 0; scan_block_comment buf locs lexbuf } + | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+ { scan_block_comment buf locs lexbuf } + | ('#'* | '|'*) '"' + { + let cur = lexeme_end_p lexbuf in + let start = { cur with pos_cnum = cur.pos_cnum - 1 } in + scan_string buf start lexbuf; + Buffer.clear buf; + scan_block_comment buf locs lexbuf + } + | '#'+ '|' + { + let cur = lexeme_end_p lexbuf in + let start = { cur with pos_cnum = cur.pos_cnum - 2 } in + scan_block_comment buf (start :: locs) lexbuf + } + | '|'+ '#' + { + match locs with + | [_] -> () + | _ :: t -> scan_block_comment buf t lexbuf + | [] -> assert false (* impossible *) + } + | eof + { + match locs with + | [] -> assert false + | { pos_lnum; pos_bol; pos_cnum; _ } :: _ -> + let msg = + sprintf "Sexplib.Lexer.scan_block_comment: \ + unterminated block comment at line %d char %d" + pos_lnum (pos_cnum - pos_bol) + in + failwith msg + } + +and ruleTail acc = parse + | eof { acc } + | _* as str { ruleTail (acc ^ str) lexbuf } + + +{ + let main ?buf = + let buf = + match buf with + | None -> Buffer.create 64 + | Some buf -> Buffer.clear buf; buf + in + main buf +} + |