diff options
-rw-r--r-- | exportclight/Clightdefs.v | 101 | ||||
-rw-r--r-- | exportclight/Clightgen.ml | 9 | ||||
-rw-r--r-- | exportclight/ExportClight.ml | 35 | ||||
-rw-r--r-- | lib/Camlcoq.ml | 79 |
4 files changed, 204 insertions, 20 deletions
diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v index 83d82d88..c2b38a92 100644 --- a/exportclight/Clightdefs.v +++ b/exportclight/Clightdefs.v @@ -15,7 +15,7 @@ (** All imports and definitions used by .v Clight files generated by clightgen *) -From Coq Require Import String List ZArith. +From Coq Require Import Ascii String List ZArith. From compcert Require Import Integers Floats Maps Errors AST Ctypes Cop Clight. Definition tvoid := Tvoid. @@ -80,3 +80,102 @@ Definition mkprogram (types: list composite_definition) prog_types := types; prog_comp_env := ce; prog_comp_env_eq := EQ |}. + +(** The following encoding of character strings as positive numbers + must be kept consistent with the OCaml function [Camlcoq.pos_of_string]. *) + +Definition append_bit_pos (b: bool) (p: positive) : positive := + if b then xI p else xO p. + +Definition append_char_pos_default (c: ascii) (p: positive) : positive := + let '(Ascii b7 b6 b5 b4 b3 b2 b1 b0) := c in + xI (xI (xI (xI (xI (xI + (append_bit_pos b0 (append_bit_pos b1 + (append_bit_pos b2 (append_bit_pos b3 + (append_bit_pos b4 (append_bit_pos b5 + (append_bit_pos b6 (append_bit_pos b7 p))))))))))))). + +Definition append_char_pos (c: ascii) (p: positive) : positive := + match c with + | "0"%char => xO (xO (xO (xO (xO (xO p))))) + | "1"%char => xI (xO (xO (xO (xO (xO p))))) + | "2"%char => xO (xI (xO (xO (xO (xO p))))) + | "3"%char => xI (xI (xO (xO (xO (xO p))))) + | "4"%char => xO (xO (xI (xO (xO (xO p))))) + | "5"%char => xI (xO (xI (xO (xO (xO p))))) + | "6"%char => xO (xI (xI (xO (xO (xO p))))) + | "7"%char => xI (xI (xI (xO (xO (xO p))))) + | "8"%char => xO (xO (xO (xI (xO (xO p))))) + | "9"%char => xI (xO (xO (xI (xO (xO p))))) + | "a"%char => xO (xI (xO (xI (xO (xO p))))) + | "b"%char => xI (xI (xO (xI (xO (xO p))))) + | "c"%char => xO (xO (xI (xI (xO (xO p))))) + | "d"%char => xI (xO (xI (xI (xO (xO p))))) + | "e"%char => xO (xI (xI (xI (xO (xO p))))) + | "f"%char => xI (xI (xI (xI (xO (xO p))))) + | "g"%char => xO (xO (xO (xO (xI (xO p))))) + | "h"%char => xI (xO (xO (xO (xI (xO p))))) + | "i"%char => xO (xI (xO (xO (xI (xO p))))) + | "j"%char => xI (xI (xO (xO (xI (xO p))))) + | "k"%char => xO (xO (xI (xO (xI (xO p))))) + | "l"%char => xI (xO (xI (xO (xI (xO p))))) + | "m"%char => xO (xI (xI (xO (xI (xO p))))) + | "n"%char => xI (xI (xI (xO (xI (xO p))))) + | "o"%char => xO (xO (xO (xI (xI (xO p))))) + | "p"%char => xI (xO (xO (xI (xI (xO p))))) + | "q"%char => xO (xI (xO (xI (xI (xO p))))) + | "r"%char => xI (xI (xO (xI (xI (xO p))))) + | "s"%char => xO (xO (xI (xI (xI (xO p))))) + | "t"%char => xI (xO (xI (xI (xI (xO p))))) + | "u"%char => xO (xI (xI (xI (xI (xO p))))) + | "v"%char => xI (xI (xI (xI (xI (xO p))))) + | "w"%char => xO (xO (xO (xO (xO (xI p))))) + | "x"%char => xI (xO (xO (xO (xO (xI p))))) + | "y"%char => xO (xI (xO (xO (xO (xI p))))) + | "z"%char => xI (xI (xO (xO (xO (xI p))))) + | "A"%char => xO (xO (xI (xO (xO (xI p))))) + | "B"%char => xI (xO (xI (xO (xO (xI p))))) + | "C"%char => xO (xI (xI (xO (xO (xI p))))) + | "D"%char => xI (xI (xI (xO (xO (xI p))))) + | "E"%char => xO (xO (xO (xI (xO (xI p))))) + | "F"%char => xI (xO (xO (xI (xO (xI p))))) + | "G"%char => xO (xI (xO (xI (xO (xI p))))) + | "H"%char => xI (xI (xO (xI (xO (xI p))))) + | "I"%char => xO (xO (xI (xI (xO (xI p))))) + | "J"%char => xI (xO (xI (xI (xO (xI p))))) + | "K"%char => xO (xI (xI (xI (xO (xI p))))) + | "L"%char => xI (xI (xI (xI (xO (xI p))))) + | "M"%char => xO (xO (xO (xO (xI (xI p))))) + | "N"%char => xI (xO (xO (xO (xI (xI p))))) + | "O"%char => xO (xI (xO (xO (xI (xI p))))) + | "P"%char => xI (xI (xO (xO (xI (xI p))))) + | "Q"%char => xO (xO (xI (xO (xI (xI p))))) + | "R"%char => xI (xO (xI (xO (xI (xI p))))) + | "S"%char => xO (xI (xI (xO (xI (xI p))))) + | "T"%char => xI (xI (xI (xO (xI (xI p))))) + | "U"%char => xO (xO (xO (xI (xI (xI p))))) + | "V"%char => xI (xO (xO (xI (xI (xI p))))) + | "W"%char => xO (xI (xO (xI (xI (xI p))))) + | "X"%char => xI (xI (xO (xI (xI (xI p))))) + | "Y"%char => xO (xO (xI (xI (xI (xI p))))) + | "Z"%char => xI (xO (xI (xI (xI (xI p))))) + | "_"%char => xO (xI (xI (xI (xI (xI p))))) + | _ => append_char_pos_default c p + end. + +Fixpoint ident_of_string (s: string) : ident := + match s with + | EmptyString => xH + | String c s => append_char_pos c (ident_of_string s) + end. + +(** A convenient notation [$ "ident"] to force evaluation of + [ident_of_string "ident"] *) + +Ltac ident_of_string s := + let x := constr:(ident_of_string s) in + let y := eval compute in x in + exact y. + +Notation "$ s" := (ltac:(ident_of_string s)) + (at level 1, only parsing) : string_scope. diff --git a/exportclight/Clightgen.ml b/exportclight/Clightgen.ml index f7279a5e..637454f0 100644 --- a/exportclight/Clightgen.ml +++ b/exportclight/Clightgen.ml @@ -98,6 +98,8 @@ Recognized source files: .i or .p C source file that should not be preprocessed Processing options: -normalize Normalize the generated Clight code w.r.t. loads in expressions + -canonical-idents Use canonical numbers to represent identifiers (default) + -short-idents Use canonical numbers to represent identifiers -E Preprocess only, send result to standard output -o <file> Generate output in <file> |} ^ @@ -142,6 +144,8 @@ let cmdline_actions = (* Processing options *) [ Exact "-E", Set option_E; Exact "-normalize", Set option_normalize; + Exact "-canonical-idents", Set Camlcoq.use_canonical_atoms; + Exact "-short-idents", Unset Camlcoq.use_canonical_atoms; Exact "-o", String(fun s -> option_o := Some s); Prefix "-o", Self (fun s -> let s = String.sub s 2 ((String.length s) - 2) in option_o := Some s);] @@ -175,12 +179,13 @@ let cmdline_actions = ] let _ = - try +try Gc.set { (Gc.get()) with Gc.minor_heap_size = 524288; (* 512k *) Gc.major_heap_increment = 4194304 (* 4M *) }; Printexc.record_backtrace true; + Camlcoq.use_canonical_atoms := true; Frontend.init (); parse_cmdline cmdline_actions; if !option_o <> None && !num_input_files >= 2 then @@ -188,7 +193,7 @@ let _ = if !num_input_files = 0 then fatal_error no_loc "no input file"; perform_actions () - with +with | Sys_error msg | CmdError msg -> error no_loc "%s" msg; exit 2 | Abort -> exit 2 diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml index c9d6fced..87956b58 100644 --- a/exportclight/ExportClight.ml +++ b/exportclight/ExportClight.ml @@ -81,8 +81,12 @@ let define_idents p = string_of_atom (fun (id, name) -> try - fprintf p "Definition _%s : ident := %ld%%positive.@ " - (sanitize name) (P.to_int32 id) + if id = pos_of_string name then + fprintf p "Definition _%s : ident := $\"%s\".@ " + (sanitize name) name + else + fprintf p "Definition _%s : ident := %ld%%positive.@ " + (sanitize name) (P.to_int32 id) with Not_an_identifier -> ()); iter_hashtbl_sorted @@ -93,9 +97,11 @@ let define_idents p = fprintf p "@ " let name_temporary t = - let t1 = P.to_int t and t0 = P.to_int (first_unused_ident ()) in - if t1 >= t0 && not (Hashtbl.mem temp_names t) - then Hashtbl.add temp_names t (sprintf "_t'%d" (t1 - t0 + 1)) + if not (Hashtbl.mem string_of_atom t) && not (Hashtbl.mem temp_names t) + then begin + let t1 = P.to_int t and t0 = P.to_int (first_unused_ident ()) in + Hashtbl.add temp_names t (sprintf "_t'%d" (t1 - t0 + 1)) + end let name_opt_temporary = function | None -> () @@ -468,7 +474,7 @@ let print_assertion p (txt, targs) = | Text _ -> () | Param n -> max_param := max n !max_param) frags; - fprintf p " | \"%s\"%%string, " txt; + fprintf p " | \"%s\", " txt; list_iteri (fun i targ -> fprintf p "_x%d :: " (i + 1)) targs; @@ -495,7 +501,8 @@ let print_assertions p = let prologue = "\ From Coq Require Import String List ZArith.\n\ From compcert Require Import Coqlib Integers Floats AST Ctypes Cop Clight Clightdefs.\n\ -Local Open Scope Z_scope.\n" +Local Open Scope Z_scope.\n\ +Local Open Scope string_scope.\n" (* Naming the compiler-generated temporaries occurring in the program *) @@ -554,15 +561,15 @@ let name_program p = let print_clightgen_info p sourcefile normalized = fprintf p "@[<v 2>Module Info."; - fprintf p "@ Definition version := %S%%string." Version.version; - fprintf p "@ Definition build_number := %S%%string." Version.buildnr; - fprintf p "@ Definition build_tag := %S%%string." Version.tag; - fprintf p "@ Definition arch := %S%%string." Configuration.arch; - fprintf p "@ Definition model := %S%%string." Configuration.model; - fprintf p "@ Definition abi := %S%%string." Configuration.abi; + fprintf p "@ Definition version := %S." Version.version; + fprintf p "@ Definition build_number := %S." Version.buildnr; + fprintf p "@ Definition build_tag := %S." Version.tag; + fprintf p "@ Definition arch := %S." Configuration.arch; + fprintf p "@ Definition model := %S." Configuration.model; + fprintf p "@ Definition abi := %S." Configuration.abi; fprintf p "@ Definition bitsize := %d." (if Archi.ptr64 then 64 else 32); fprintf p "@ Definition big_endian := %B." Archi.big_endian; - fprintf p "@ Definition source_file := %S%%string." sourcefile; + fprintf p "@ Definition source_file := %S." sourcefile; fprintf p "@ Definition normalized := %B." normalized; fprintf p "@]@ End Info.@ @ " diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml index 66322efb..af65b28e 100644 --- a/lib/Camlcoq.ml +++ b/lib/Camlcoq.ml @@ -282,23 +282,96 @@ type atom = positive let atom_of_string = (Hashtbl.create 17 : (string, atom) Hashtbl.t) let string_of_atom = (Hashtbl.create 17 : (atom, string) Hashtbl.t) let next_atom = ref Coq_xH +let use_canonical_atoms = ref false + +(* If [use_canonical_atoms] is false, strings are numbered from 1 up + in the order in which they are encountered. This produces small + numbers, and is therefore efficient, but the number for a given + string may differ between the compilation of different units. + + If [use_canonical_atoms] is true, strings are Huffman-encoded as bit + sequences, which are then encoded as positive numbers. The same + string is always represented by the same number in all compilation + units. However, the numbers are bigger than in the first + implementation. Also, this places a hard limit on the number of + fresh identifiers that can be generated starting with + [first_unused_ident]. *) + +let rec append_bits_pos nbits n p = + if nbits <= 0 then p else + if n land 1 = 0 + then Coq_xO (append_bits_pos (nbits - 1) (n lsr 1) p) + else Coq_xI (append_bits_pos (nbits - 1) (n lsr 1) p) + +(* The encoding of strings as bit sequences is optimized for C identifiers: + - numbers are encoded as a 6-bit integer between 0 and 9 + - lowercase letters are encoded as a 6-bit integer between 10 and 35 + - uppercase letters are encoded as a 6-bit integer between 36 and 61 + - the underscore character is encoded as the 6-bit integer 62 + - all other characters are encoded as 6 "one" bits followed by + the 8-bit encoding of the character. *) + +let append_char_pos c p = + match c with + | '0'..'9' -> append_bits_pos 6 (Char.code c - Char.code '0') p + | 'a'..'z' -> append_bits_pos 6 (Char.code c - Char.code 'a' + 10) p + | 'A'..'Z' -> append_bits_pos 6 (Char.code c - Char.code 'A' + 36) p + | '_' -> append_bits_pos 6 62 p + | _ -> append_bits_pos 6 63 (append_bits_pos 8 (Char.code c) p) + +(* The empty string is represented as the positive "1", that is, [xH]. *) + +let pos_of_string s = + let rec encode i accu = + if i < 0 then accu else encode (i - 1) (append_char_pos s.[i] accu) + in encode (String.length s - 1) Coq_xH + +let fresh_atom () = + let a = !next_atom in + next_atom := Pos.succ !next_atom; + a let intern_string s = try Hashtbl.find atom_of_string s with Not_found -> - let a = !next_atom in - next_atom := Pos.succ !next_atom; + let a = + if !use_canonical_atoms then pos_of_string s else fresh_atom () in Hashtbl.add atom_of_string s a; Hashtbl.add string_of_atom a s; a + let extern_atom a = try Hashtbl.find string_of_atom a with Not_found -> Printf.sprintf "$%d" (P.to_int a) -let first_unused_ident () = !next_atom +(* Ignoring the terminating "1" bit, canonical encodings of strings can + be viewed as lists of bits, formed by concatenation of 6-bit fragments + (for letters, numbers, and underscore) and 14-bit fragments (for other + characters). Hence, not all positive numbers are canonical encodings: + only those whose log2 is of the form [6n + 14m]. + + Here are the first intervals of positive numbers corresponding to strings: + - [1, 1] for the empty string + - [2^6, 2^7-1] for one "compact" character + - [2^12, 2^13-1] for two "compact" characters + - [2^14, 2^14-1] for one "escaped" character + + Hence, between 2^7 and 2^12 - 1, we have 3968 consecutive positive + numbers that cannot be the encoding of a string. These are the positive + numbers we'll use as temporaries in the SimplExpr pass if canonical + atoms are in use. + + If short atoms are used, we just number the temporaries consecutively + starting one above the last generated atom. +*) + +let first_unused_ident () = + if !use_canonical_atoms + then P.of_int 128 + else !next_atom (* Strings *) |