aboutsummaryrefslogtreecommitdiffstats
path: root/exportclight
diff options
context:
space:
mode:
authorXavier Leroy <xavierleroy@users.noreply.github.com>2020-05-19 10:25:45 +0200
committerGitHub <noreply@github.com>2020-05-19 10:25:45 +0200
commit0eba6f63b6bc458d856e477f6f8ec6b78ef78c58 (patch)
tree6ab59e34aea0369013ed7e7461f3bf2f732d97bd /exportclight
parent4a676623badb718da4055b7f26ee05f5097f4e7b (diff)
downloadcompcert-kvx-0eba6f63b6bc458d856e477f6f8ec6b78ef78c58.tar.gz
compcert-kvx-0eba6f63b6bc458d856e477f6f8ec6b78ef78c58.zip
Add a canonical encoding of identifiers as numbers and use it in clightgen (#353)
Within CompCert, identifiers (names of C functions, variables, types, etc) are represented by unique positive numbers, sometimes called "atoms". In the original implementation, atoms 1, 2, ..., N are assigned to identifiers as they are encountered. The resulting number are small and are efficient when used as keys in data structures such as PTrees. However, the mapping from C source-level identifiers to atoms differs between compilation units. This is not a problem for CompCert but complicates CompCert-based verification tools that need to combine several compilation units. This commit introduces an alternate implementation of atoms, suggested by Andrew Appel. The choice between implementations is governed by the Boolean reference `Camlcoq.use_canonical_atoms`. In the alternate implementation, identifiers are converted to bit sequences via a Huffman encoding, then the bits are represented as positive numbers. The same identifier is always represented by the same number. However, the numbers are usually bigger than in the original implementation, making PTree operations slower: lookups and updates take time linear in the length of the identifier, instead of logarithmic time in the number of identifiers encountered. The CompCert compiler (the `ccomp` executable) still uses the original implementation, but the `clightgen` tool used in conjunction with the VST program logic can use either implementations: - The alternate "canonical atoms" implementation is used by default, and also if the `-canonical-idents` option is given. - The original implementation is used if the `-short-idents` option is given. Closes: #222 Closes: #311
Diffstat (limited to 'exportclight')
-rw-r--r--exportclight/Clightdefs.v101
-rw-r--r--exportclight/Clightgen.ml9
-rw-r--r--exportclight/ExportClight.ml35
3 files changed, 128 insertions, 17 deletions
diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v
index 83d82d88..c2b38a92 100644
--- a/exportclight/Clightdefs.v
+++ b/exportclight/Clightdefs.v
@@ -15,7 +15,7 @@
(** All imports and definitions used by .v Clight files generated by clightgen *)
-From Coq Require Import String List ZArith.
+From Coq Require Import Ascii String List ZArith.
From compcert Require Import Integers Floats Maps Errors AST Ctypes Cop Clight.
Definition tvoid := Tvoid.
@@ -80,3 +80,102 @@ Definition mkprogram (types: list composite_definition)
prog_types := types;
prog_comp_env := ce;
prog_comp_env_eq := EQ |}.
+
+(** The following encoding of character strings as positive numbers
+ must be kept consistent with the OCaml function [Camlcoq.pos_of_string]. *)
+
+Definition append_bit_pos (b: bool) (p: positive) : positive :=
+ if b then xI p else xO p.
+
+Definition append_char_pos_default (c: ascii) (p: positive) : positive :=
+ let '(Ascii b7 b6 b5 b4 b3 b2 b1 b0) := c in
+ xI (xI (xI (xI (xI (xI
+ (append_bit_pos b0 (append_bit_pos b1
+ (append_bit_pos b2 (append_bit_pos b3
+ (append_bit_pos b4 (append_bit_pos b5
+ (append_bit_pos b6 (append_bit_pos b7 p))))))))))))).
+
+Definition append_char_pos (c: ascii) (p: positive) : positive :=
+ match c with
+ | "0"%char => xO (xO (xO (xO (xO (xO p)))))
+ | "1"%char => xI (xO (xO (xO (xO (xO p)))))
+ | "2"%char => xO (xI (xO (xO (xO (xO p)))))
+ | "3"%char => xI (xI (xO (xO (xO (xO p)))))
+ | "4"%char => xO (xO (xI (xO (xO (xO p)))))
+ | "5"%char => xI (xO (xI (xO (xO (xO p)))))
+ | "6"%char => xO (xI (xI (xO (xO (xO p)))))
+ | "7"%char => xI (xI (xI (xO (xO (xO p)))))
+ | "8"%char => xO (xO (xO (xI (xO (xO p)))))
+ | "9"%char => xI (xO (xO (xI (xO (xO p)))))
+ | "a"%char => xO (xI (xO (xI (xO (xO p)))))
+ | "b"%char => xI (xI (xO (xI (xO (xO p)))))
+ | "c"%char => xO (xO (xI (xI (xO (xO p)))))
+ | "d"%char => xI (xO (xI (xI (xO (xO p)))))
+ | "e"%char => xO (xI (xI (xI (xO (xO p)))))
+ | "f"%char => xI (xI (xI (xI (xO (xO p)))))
+ | "g"%char => xO (xO (xO (xO (xI (xO p)))))
+ | "h"%char => xI (xO (xO (xO (xI (xO p)))))
+ | "i"%char => xO (xI (xO (xO (xI (xO p)))))
+ | "j"%char => xI (xI (xO (xO (xI (xO p)))))
+ | "k"%char => xO (xO (xI (xO (xI (xO p)))))
+ | "l"%char => xI (xO (xI (xO (xI (xO p)))))
+ | "m"%char => xO (xI (xI (xO (xI (xO p)))))
+ | "n"%char => xI (xI (xI (xO (xI (xO p)))))
+ | "o"%char => xO (xO (xO (xI (xI (xO p)))))
+ | "p"%char => xI (xO (xO (xI (xI (xO p)))))
+ | "q"%char => xO (xI (xO (xI (xI (xO p)))))
+ | "r"%char => xI (xI (xO (xI (xI (xO p)))))
+ | "s"%char => xO (xO (xI (xI (xI (xO p)))))
+ | "t"%char => xI (xO (xI (xI (xI (xO p)))))
+ | "u"%char => xO (xI (xI (xI (xI (xO p)))))
+ | "v"%char => xI (xI (xI (xI (xI (xO p)))))
+ | "w"%char => xO (xO (xO (xO (xO (xI p)))))
+ | "x"%char => xI (xO (xO (xO (xO (xI p)))))
+ | "y"%char => xO (xI (xO (xO (xO (xI p)))))
+ | "z"%char => xI (xI (xO (xO (xO (xI p)))))
+ | "A"%char => xO (xO (xI (xO (xO (xI p)))))
+ | "B"%char => xI (xO (xI (xO (xO (xI p)))))
+ | "C"%char => xO (xI (xI (xO (xO (xI p)))))
+ | "D"%char => xI (xI (xI (xO (xO (xI p)))))
+ | "E"%char => xO (xO (xO (xI (xO (xI p)))))
+ | "F"%char => xI (xO (xO (xI (xO (xI p)))))
+ | "G"%char => xO (xI (xO (xI (xO (xI p)))))
+ | "H"%char => xI (xI (xO (xI (xO (xI p)))))
+ | "I"%char => xO (xO (xI (xI (xO (xI p)))))
+ | "J"%char => xI (xO (xI (xI (xO (xI p)))))
+ | "K"%char => xO (xI (xI (xI (xO (xI p)))))
+ | "L"%char => xI (xI (xI (xI (xO (xI p)))))
+ | "M"%char => xO (xO (xO (xO (xI (xI p)))))
+ | "N"%char => xI (xO (xO (xO (xI (xI p)))))
+ | "O"%char => xO (xI (xO (xO (xI (xI p)))))
+ | "P"%char => xI (xI (xO (xO (xI (xI p)))))
+ | "Q"%char => xO (xO (xI (xO (xI (xI p)))))
+ | "R"%char => xI (xO (xI (xO (xI (xI p)))))
+ | "S"%char => xO (xI (xI (xO (xI (xI p)))))
+ | "T"%char => xI (xI (xI (xO (xI (xI p)))))
+ | "U"%char => xO (xO (xO (xI (xI (xI p)))))
+ | "V"%char => xI (xO (xO (xI (xI (xI p)))))
+ | "W"%char => xO (xI (xO (xI (xI (xI p)))))
+ | "X"%char => xI (xI (xO (xI (xI (xI p)))))
+ | "Y"%char => xO (xO (xI (xI (xI (xI p)))))
+ | "Z"%char => xI (xO (xI (xI (xI (xI p)))))
+ | "_"%char => xO (xI (xI (xI (xI (xI p)))))
+ | _ => append_char_pos_default c p
+ end.
+
+Fixpoint ident_of_string (s: string) : ident :=
+ match s with
+ | EmptyString => xH
+ | String c s => append_char_pos c (ident_of_string s)
+ end.
+
+(** A convenient notation [$ "ident"] to force evaluation of
+ [ident_of_string "ident"] *)
+
+Ltac ident_of_string s :=
+ let x := constr:(ident_of_string s) in
+ let y := eval compute in x in
+ exact y.
+
+Notation "$ s" := (ltac:(ident_of_string s))
+ (at level 1, only parsing) : string_scope.
diff --git a/exportclight/Clightgen.ml b/exportclight/Clightgen.ml
index f7279a5e..637454f0 100644
--- a/exportclight/Clightgen.ml
+++ b/exportclight/Clightgen.ml
@@ -98,6 +98,8 @@ Recognized source files:
.i or .p C source file that should not be preprocessed
Processing options:
-normalize Normalize the generated Clight code w.r.t. loads in expressions
+ -canonical-idents Use canonical numbers to represent identifiers (default)
+ -short-idents Use canonical numbers to represent identifiers
-E Preprocess only, send result to standard output
-o <file> Generate output in <file>
|} ^
@@ -142,6 +144,8 @@ let cmdline_actions =
(* Processing options *)
[ Exact "-E", Set option_E;
Exact "-normalize", Set option_normalize;
+ Exact "-canonical-idents", Set Camlcoq.use_canonical_atoms;
+ Exact "-short-idents", Unset Camlcoq.use_canonical_atoms;
Exact "-o", String(fun s -> option_o := Some s);
Prefix "-o", Self (fun s -> let s = String.sub s 2 ((String.length s) - 2) in
option_o := Some s);]
@@ -175,12 +179,13 @@ let cmdline_actions =
]
let _ =
- try
+try
Gc.set { (Gc.get()) with
Gc.minor_heap_size = 524288; (* 512k *)
Gc.major_heap_increment = 4194304 (* 4M *)
};
Printexc.record_backtrace true;
+ Camlcoq.use_canonical_atoms := true;
Frontend.init ();
parse_cmdline cmdline_actions;
if !option_o <> None && !num_input_files >= 2 then
@@ -188,7 +193,7 @@ let _ =
if !num_input_files = 0 then
fatal_error no_loc "no input file";
perform_actions ()
- with
+with
| Sys_error msg
| CmdError msg -> error no_loc "%s" msg; exit 2
| Abort -> exit 2
diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml
index c9d6fced..87956b58 100644
--- a/exportclight/ExportClight.ml
+++ b/exportclight/ExportClight.ml
@@ -81,8 +81,12 @@ let define_idents p =
string_of_atom
(fun (id, name) ->
try
- fprintf p "Definition _%s : ident := %ld%%positive.@ "
- (sanitize name) (P.to_int32 id)
+ if id = pos_of_string name then
+ fprintf p "Definition _%s : ident := $\"%s\".@ "
+ (sanitize name) name
+ else
+ fprintf p "Definition _%s : ident := %ld%%positive.@ "
+ (sanitize name) (P.to_int32 id)
with Not_an_identifier ->
());
iter_hashtbl_sorted
@@ -93,9 +97,11 @@ let define_idents p =
fprintf p "@ "
let name_temporary t =
- let t1 = P.to_int t and t0 = P.to_int (first_unused_ident ()) in
- if t1 >= t0 && not (Hashtbl.mem temp_names t)
- then Hashtbl.add temp_names t (sprintf "_t'%d" (t1 - t0 + 1))
+ if not (Hashtbl.mem string_of_atom t) && not (Hashtbl.mem temp_names t)
+ then begin
+ let t1 = P.to_int t and t0 = P.to_int (first_unused_ident ()) in
+ Hashtbl.add temp_names t (sprintf "_t'%d" (t1 - t0 + 1))
+ end
let name_opt_temporary = function
| None -> ()
@@ -468,7 +474,7 @@ let print_assertion p (txt, targs) =
| Text _ -> ()
| Param n -> max_param := max n !max_param)
frags;
- fprintf p " | \"%s\"%%string, " txt;
+ fprintf p " | \"%s\", " txt;
list_iteri
(fun i targ -> fprintf p "_x%d :: " (i + 1))
targs;
@@ -495,7 +501,8 @@ let print_assertions p =
let prologue = "\
From Coq Require Import String List ZArith.\n\
From compcert Require Import Coqlib Integers Floats AST Ctypes Cop Clight Clightdefs.\n\
-Local Open Scope Z_scope.\n"
+Local Open Scope Z_scope.\n\
+Local Open Scope string_scope.\n"
(* Naming the compiler-generated temporaries occurring in the program *)
@@ -554,15 +561,15 @@ let name_program p =
let print_clightgen_info p sourcefile normalized =
fprintf p "@[<v 2>Module Info.";
- fprintf p "@ Definition version := %S%%string." Version.version;
- fprintf p "@ Definition build_number := %S%%string." Version.buildnr;
- fprintf p "@ Definition build_tag := %S%%string." Version.tag;
- fprintf p "@ Definition arch := %S%%string." Configuration.arch;
- fprintf p "@ Definition model := %S%%string." Configuration.model;
- fprintf p "@ Definition abi := %S%%string." Configuration.abi;
+ fprintf p "@ Definition version := %S." Version.version;
+ fprintf p "@ Definition build_number := %S." Version.buildnr;
+ fprintf p "@ Definition build_tag := %S." Version.tag;
+ fprintf p "@ Definition arch := %S." Configuration.arch;
+ fprintf p "@ Definition model := %S." Configuration.model;
+ fprintf p "@ Definition abi := %S." Configuration.abi;
fprintf p "@ Definition bitsize := %d." (if Archi.ptr64 then 64 else 32);
fprintf p "@ Definition big_endian := %B." Archi.big_endian;
- fprintf p "@ Definition source_file := %S%%string." sourcefile;
+ fprintf p "@ Definition source_file := %S." sourcefile;
fprintf p "@ Definition normalized := %B." normalized;
fprintf p "@]@ End Info.@ @ "