diff options
author | Xavier Leroy <xavierleroy@users.noreply.github.com> | 2020-05-19 10:25:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-19 10:25:45 +0200 |
commit | 0eba6f63b6bc458d856e477f6f8ec6b78ef78c58 (patch) | |
tree | 6ab59e34aea0369013ed7e7461f3bf2f732d97bd /exportclight/Clightdefs.v | |
parent | 4a676623badb718da4055b7f26ee05f5097f4e7b (diff) | |
download | compcert-0eba6f63b6bc458d856e477f6f8ec6b78ef78c58.tar.gz compcert-0eba6f63b6bc458d856e477f6f8ec6b78ef78c58.zip |
Add a canonical encoding of identifiers as numbers and use it in clightgen (#353)
Within CompCert, identifiers (names of C functions, variables, types,
etc) are represented by unique positive numbers, sometimes called
"atoms".
In the original implementation, atoms 1, 2, ..., N are assigned
to identifiers as they are encountered. The resulting number
are small and are efficient when used as keys in data structures
such as PTrees. However, the mapping from C source-level identifiers
to atoms differs between compilation units. This is not a problem
for CompCert but complicates CompCert-based verification tools
that need to combine several compilation units.
This commit introduces an alternate implementation of atoms, suggested
by Andrew Appel. The choice between implementations is governed by
the Boolean reference `Camlcoq.use_canonical_atoms`.
In the alternate implementation, identifiers are converted to bit
sequences via a Huffman encoding, then the bits are represented as
positive numbers. The same identifier is always represented by the
same number. However, the numbers are usually bigger than in the
original implementation, making PTree operations slower: lookups and
updates take time linear in the length of the identifier, instead of
logarithmic time in the number of identifiers encountered.
The CompCert compiler (the `ccomp` executable) still uses the original
implementation, but the `clightgen` tool used in conjunction with the
VST program logic can use either implementations:
- The alternate "canonical atoms" implementation is used by default,
and also if the `-canonical-idents` option is given.
- The original implementation is used if the `-short-idents` option is
given.
Closes: #222
Closes: #311
Diffstat (limited to 'exportclight/Clightdefs.v')
-rw-r--r-- | exportclight/Clightdefs.v | 101 |
1 files changed, 100 insertions, 1 deletions
diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v index 83d82d88..c2b38a92 100644 --- a/exportclight/Clightdefs.v +++ b/exportclight/Clightdefs.v @@ -15,7 +15,7 @@ (** All imports and definitions used by .v Clight files generated by clightgen *) -From Coq Require Import String List ZArith. +From Coq Require Import Ascii String List ZArith. From compcert Require Import Integers Floats Maps Errors AST Ctypes Cop Clight. Definition tvoid := Tvoid. @@ -80,3 +80,102 @@ Definition mkprogram (types: list composite_definition) prog_types := types; prog_comp_env := ce; prog_comp_env_eq := EQ |}. + +(** The following encoding of character strings as positive numbers + must be kept consistent with the OCaml function [Camlcoq.pos_of_string]. *) + +Definition append_bit_pos (b: bool) (p: positive) : positive := + if b then xI p else xO p. + +Definition append_char_pos_default (c: ascii) (p: positive) : positive := + let '(Ascii b7 b6 b5 b4 b3 b2 b1 b0) := c in + xI (xI (xI (xI (xI (xI + (append_bit_pos b0 (append_bit_pos b1 + (append_bit_pos b2 (append_bit_pos b3 + (append_bit_pos b4 (append_bit_pos b5 + (append_bit_pos b6 (append_bit_pos b7 p))))))))))))). + +Definition append_char_pos (c: ascii) (p: positive) : positive := + match c with + | "0"%char => xO (xO (xO (xO (xO (xO p))))) + | "1"%char => xI (xO (xO (xO (xO (xO p))))) + | "2"%char => xO (xI (xO (xO (xO (xO p))))) + | "3"%char => xI (xI (xO (xO (xO (xO p))))) + | "4"%char => xO (xO (xI (xO (xO (xO p))))) + | "5"%char => xI (xO (xI (xO (xO (xO p))))) + | "6"%char => xO (xI (xI (xO (xO (xO p))))) + | "7"%char => xI (xI (xI (xO (xO (xO p))))) + | "8"%char => xO (xO (xO (xI (xO (xO p))))) + | "9"%char => xI (xO (xO (xI (xO (xO p))))) + | "a"%char => xO (xI (xO (xI (xO (xO p))))) + | "b"%char => xI (xI (xO (xI (xO (xO p))))) + | "c"%char => xO (xO (xI (xI (xO (xO p))))) + | "d"%char => xI (xO (xI (xI (xO (xO p))))) + | "e"%char => xO (xI (xI (xI (xO (xO p))))) + | "f"%char => xI (xI (xI (xI (xO (xO p))))) + | "g"%char => xO (xO (xO (xO (xI (xO p))))) + | "h"%char => xI (xO (xO (xO (xI (xO p))))) + | "i"%char => xO (xI (xO (xO (xI (xO p))))) + | "j"%char => xI (xI (xO (xO (xI (xO p))))) + | "k"%char => xO (xO (xI (xO (xI (xO p))))) + | "l"%char => xI (xO (xI (xO (xI (xO p))))) + | "m"%char => xO (xI (xI (xO (xI (xO p))))) + | "n"%char => xI (xI (xI (xO (xI (xO p))))) + | "o"%char => xO (xO (xO (xI (xI (xO p))))) + | "p"%char => xI (xO (xO (xI (xI (xO p))))) + | "q"%char => xO (xI (xO (xI (xI (xO p))))) + | "r"%char => xI (xI (xO (xI (xI (xO p))))) + | "s"%char => xO (xO (xI (xI (xI (xO p))))) + | "t"%char => xI (xO (xI (xI (xI (xO p))))) + | "u"%char => xO (xI (xI (xI (xI (xO p))))) + | "v"%char => xI (xI (xI (xI (xI (xO p))))) + | "w"%char => xO (xO (xO (xO (xO (xI p))))) + | "x"%char => xI (xO (xO (xO (xO (xI p))))) + | "y"%char => xO (xI (xO (xO (xO (xI p))))) + | "z"%char => xI (xI (xO (xO (xO (xI p))))) + | "A"%char => xO (xO (xI (xO (xO (xI p))))) + | "B"%char => xI (xO (xI (xO (xO (xI p))))) + | "C"%char => xO (xI (xI (xO (xO (xI p))))) + | "D"%char => xI (xI (xI (xO (xO (xI p))))) + | "E"%char => xO (xO (xO (xI (xO (xI p))))) + | "F"%char => xI (xO (xO (xI (xO (xI p))))) + | "G"%char => xO (xI (xO (xI (xO (xI p))))) + | "H"%char => xI (xI (xO (xI (xO (xI p))))) + | "I"%char => xO (xO (xI (xI (xO (xI p))))) + | "J"%char => xI (xO (xI (xI (xO (xI p))))) + | "K"%char => xO (xI (xI (xI (xO (xI p))))) + | "L"%char => xI (xI (xI (xI (xO (xI p))))) + | "M"%char => xO (xO (xO (xO (xI (xI p))))) + | "N"%char => xI (xO (xO (xO (xI (xI p))))) + | "O"%char => xO (xI (xO (xO (xI (xI p))))) + | "P"%char => xI (xI (xO (xO (xI (xI p))))) + | "Q"%char => xO (xO (xI (xO (xI (xI p))))) + | "R"%char => xI (xO (xI (xO (xI (xI p))))) + | "S"%char => xO (xI (xI (xO (xI (xI p))))) + | "T"%char => xI (xI (xI (xO (xI (xI p))))) + | "U"%char => xO (xO (xO (xI (xI (xI p))))) + | "V"%char => xI (xO (xO (xI (xI (xI p))))) + | "W"%char => xO (xI (xO (xI (xI (xI p))))) + | "X"%char => xI (xI (xO (xI (xI (xI p))))) + | "Y"%char => xO (xO (xI (xI (xI (xI p))))) + | "Z"%char => xI (xO (xI (xI (xI (xI p))))) + | "_"%char => xO (xI (xI (xI (xI (xI p))))) + | _ => append_char_pos_default c p + end. + +Fixpoint ident_of_string (s: string) : ident := + match s with + | EmptyString => xH + | String c s => append_char_pos c (ident_of_string s) + end. + +(** A convenient notation [$ "ident"] to force evaluation of + [ident_of_string "ident"] *) + +Ltac ident_of_string s := + let x := constr:(ident_of_string s) in + let y := eval compute in x in + exact y. + +Notation "$ s" := (ltac:(ident_of_string s)) + (at level 1, only parsing) : string_scope. |