diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | aarch64/Asm.v | 4 | ||||
-rw-r--r-- | backend/CSE3analysis.v | 53 | ||||
-rw-r--r-- | backend/CSE3analysisaux.ml | 146 | ||||
-rw-r--r-- | backend/CSE3analysisproof.v | 74 | ||||
-rw-r--r-- | backend/Duplicate.v | 26 | ||||
-rw-r--r-- | backend/Duplicateaux.ml | 208 | ||||
-rw-r--r-- | backend/Duplicatepasses.v | 58 | ||||
-rw-r--r-- | backend/Duplicateproof.v | 5 | ||||
-rw-r--r-- | backend/LICMaux.ml | 58 | ||||
-rw-r--r-- | backend/PrintLTL.ml | 4 | ||||
-rw-r--r-- | backend/Tunneling.v | 138 | ||||
-rw-r--r-- | backend/Tunnelingaux.ml | 283 | ||||
-rw-r--r-- | backend/Tunnelingproof.v | 504 | ||||
-rw-r--r-- | driver/Clflags.ml | 6 | ||||
-rw-r--r-- | driver/Compiler.vexpand | 3 | ||||
-rw-r--r-- | driver/Compopts.v | 3 | ||||
-rw-r--r-- | driver/Driver.ml | 7 | ||||
-rw-r--r-- | extraction/extraction.v | 2 | ||||
-rw-r--r-- | lib/UnionFind.v | 63 | ||||
-rw-r--r-- | test/monniaux/loop_nest/polybench.h | 202 | ||||
-rw-r--r-- | test/monniaux/loop_nest/syrk.h | 54 | ||||
-rw-r--r-- | tools/compiler_expand.ml | 94 |
24 files changed, 1565 insertions, 440 deletions
@@ -83,7 +83,7 @@ BACKEND=\ Profiling.v Profilingproof.v \ ProfilingExploit.v ProfilingExploitproof.v \ Renumber.v Renumberproof.v \ - Duplicate.v Duplicateproof.v \ + Duplicate.v Duplicateproof.v Duplicatepasses.v \ RTLtyping.v \ Kildall.v Liveness.v \ ValueDomain.v ValueAOp.v ValueAnalysis.v \ @@ -21,7 +21,7 @@ This is a special version with additions from Verimag and Kalray : * A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details. * Some general-purpose optimization phases (e.g. profiling). - - see [`PROFILING.md`](PROFILING.md) for details on the profiling system + * see [`PROFILING.md`](PROFILING.md) for details on the profiling system The people responsible for this version are @@ -29,10 +29,12 @@ The people responsible for this version are * David Monniaux (CNRS, Verimag) * Cyril Six (Kalray) -## Papers on this CompCert version +## Papers, docs, etc on this CompCert version -* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend. +* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend +(also on [YouTube if you need subtitles](https://www.youtube.com/watch?v=RAzMDS9OVSw)). * [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. +* [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx) ## License CompCert is not free software. This non-commercial release can only diff --git a/aarch64/Asm.v b/aarch64/Asm.v index 5dafb01f..8d3d4ff7 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -1093,9 +1093,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pfmov rd r1 => Next (nextinstr (rs#rd <- (rs#r1))) m | Pfmovimms rd f => - Next (nextinstr (rs#rd <- (Vsingle f))) m + Next (nextinstr (rs#rd <- (Vsingle f))#X16 <- Vundef) m | Pfmovimmd rd f => - Next (nextinstr (rs#rd <- (Vfloat f))) m + Next (nextinstr (rs#rd <- (Vfloat f))#X16 <- Vundef) m | Pfmovi S rd r1 => Next (nextinstr (rs#rd <- (float32_of_bits rs##r1))) m | Pfmovi D rd r1 => diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v index 7316c9a9..5ed04bc4 100644 --- a/backend/CSE3analysis.v +++ b/backend/CSE3analysis.v @@ -282,12 +282,14 @@ Section OPERATIONS. Definition oper2 (dst : reg) (op: sym_op)(args : list reg) (rel : RELATION.t) : RELATION.t := - let rel' := kill_reg dst rel in match eq_find {| eq_lhs := dst; eq_op := op; eq_args:= args |} with - | Some id => PSet.add id rel' - | None => rel' + | Some id => + if PSet.contains rel id + then rel + else PSet.add id (kill_reg dst rel) + | None => kill_reg dst rel end. Definition oper1 (dst : reg) (op: sym_op) (args : list reg) @@ -324,18 +326,21 @@ Section OPERATIONS. | _ => kill_reg dst rel end else - if is_trivial_sym_op op - then kill_reg dst rel - else - let args' := forward_move_l rel args in - match rhs_find op args' rel with - | Some r => - if Compopts.optim_CSE3_glb tt - then RELATION.glb (move r dst rel) - (oper1 dst op args' rel) - else oper1 dst op args' rel - | None => oper1 dst op args' rel - end. + let args' := forward_move_l rel args in + match rhs_find op args rel with + | Some r => + if Compopts.optim_CSE3_glb tt + then RELATION.glb (move r dst rel) + (RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel)) + else RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + | None => RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + end. Definition clever_kill_store (chunk : memory_chunk) (addr: addressing) (args : list reg) @@ -378,11 +383,21 @@ Section OPERATIONS. end else rel'. - Definition store + Definition store (tenv : typing_env) (chunk : memory_chunk) (addr: addressing) (args : list reg) - (src : reg) (ty: typ) + (src : reg) (rel : RELATION.t) : RELATION.t := - store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel. + let args' := forward_move_l rel args in + let src' := forward_move rel src in + let tsrc := tenv src in + let tsrc' := tenv src' in + RELATION.glb + (RELATION.glb + (store1 chunk addr args src tsrc rel) + (store1 chunk addr args' src tsrc rel)) + (RELATION.glb + (store1 chunk addr args src' tsrc' rel) + (store1 chunk addr args' src' tsrc' rel)). Definition kill_builtin_res res rel := match res with @@ -425,7 +440,7 @@ Section OPERATIONS. | Icond _ _ _ _ _ | Ijumptable _ _ => Some rel | Istore chunk addr args src _ => - Some (store chunk addr args src (tenv (forward_move rel src)) rel) + Some (store tenv chunk addr args src rel) | Iop op args dst _ => Some (oper dst (SOp op) args rel) | Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel) | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel)) diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml index 3990b765..e37ef61f 100644 --- a/backend/CSE3analysisaux.ml +++ b/backend/CSE3analysisaux.ml @@ -14,7 +14,8 @@ open CSE3analysis open Maps open HashedSet open Camlcoq - +open Coqlib + let flatten_eq eq = ((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);; @@ -39,12 +40,12 @@ let print_reg channel i = let print_eq channel (lhs, sop, args) = match sop with | SOp op -> - Printf.printf "%a = %a\n" print_reg lhs (PrintOp.print_operation print_reg) (op, args) + Printf.printf "%a = %a" print_reg lhs (PrintOp.print_operation print_reg) (op, args) | SLoad(chunk, addr) -> - Printf.printf "%a = %s @ %a\n" print_reg lhs (string_of_chunk chunk) + Printf.printf "%a = %s @ %a" print_reg lhs (string_of_chunk chunk) (PrintOp.print_addressing print_reg) (addr, args);; -let pp_set oc s = +let pp_intset oc s = Printf.fprintf oc "{ "; List.iter (fun i -> Printf.fprintf oc "%d; " (P.to_int i)) (PSet.elements s); Printf.fprintf oc "}";; @@ -70,6 +71,102 @@ let pp_option pp oc = function let is_trivial eq = (eq.eq_op = SOp Op.Omove) && (eq.eq_args = [eq.eq_lhs]);; +let rec pp_list separator pp_item chan = function + | [] -> () + | [h] -> pp_item chan h + | h::t -> + pp_item chan h; + output_string chan separator; + pp_list separator pp_item chan t;; + +let pp_set separator pp_item chan s = + pp_list separator pp_item chan (PSet.elements s);; + +let pp_equation hints chan x = + match PTree.get x hints.hint_eq_catalog with + | None -> output_string chan "???" + | Some eq -> + print_eq chan (P.to_int eq.eq_lhs, eq.eq_op, List.map P.to_int eq.eq_args);; + +let pp_relation hints chan rel = + pp_set "; " (pp_equation hints) chan rel;; + +let pp_relation_b hints chan = function + | None -> output_string chan "bot" + | Some rel -> pp_relation hints chan rel;; + +let pp_results f (invariants : RB.t PMap.t) hints chan = + let max_pc = P.to_int (RTL.max_pc_function f) in + for pc=max_pc downto 1 + do + Printf.fprintf chan "%d: %a\n\n" pc + (pp_relation_b hints) (PMap.get (P.of_int pc) invariants) + done + +module IntSet=Set.Make(struct type t=int let compare = ( - ) end);; + +let rec union_list prev = function + | [] -> prev + | h::t -> union_list (RB.lub prev h) t;; + +let rb_glb (x : RB.t) (y : RB.t) : RB.t = + match x, y with + | None, _ | _, None -> None + | (Some x'), (Some y') -> Some (RELATION.glb x' y');; + +let refine_invariants + (nodes : RTL.node list) + (entrypoint : RTL.node) + (successors : RTL.node -> RTL.node list) + (predecessors : RTL.node -> RTL.node list) + (tfr : RTL.node -> RB.t -> RB.t) (invariants0 : RB.t PMap.t) = + let todo = ref IntSet.empty + and invariants = ref invariants0 in + let add_todo (pc : RTL.node) = + todo := IntSet.add (P.to_int pc) !todo in + let update_node (pc : RTL.node) = + (if !Clflags.option_debug_compcert > 9 + then Printf.printf "updating node %d\n" (P.to_int pc)); + if not (peq pc entrypoint) + then + let cur = PMap.get pc !invariants in + let nxt = union_list RB.bot + (List.map + (fun pred_pc-> + rb_glb cur + (tfr pred_pc (PMap.get pred_pc !invariants))) + (predecessors pc)) in + if not (RB.beq cur nxt) + then + begin + (if !Clflags.option_debug_compcert > 4 + then Printf.printf "refining CSE3 node %d\n" (P.to_int pc)); + List.iter add_todo (successors pc) + end in + (List.iter add_todo nodes); + while not (IntSet.is_empty !todo) do + let nxt = IntSet.max_elt !todo in + todo := IntSet.remove nxt !todo; + update_node (P.of_int nxt) + done; + !invariants;; + +let get_default default x ptree = + match PTree.get x ptree with + | None -> default + | Some y -> y;; + +let refine_analysis ctx tenv + (f : RTL.coq_function) (invariants0 : RB.t PMap.t) = + let succ_map = RTL.successors_map f in + let succ_f x = get_default [] x succ_map in + let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in + let pred_f x = get_default [] x pred_map in + let tfr = apply_instr' ctx tenv f.RTL.fn_code in + refine_invariants + (List.map fst (PTree.elements f.RTL.fn_code)) + f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;; + let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let cur_eq_id = ref 0 and cur_catalog = ref PTree.empty @@ -81,7 +178,8 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let eq_find_oracle node eq = assert (not (is_trivial eq)); let o = Hashtbl.find_opt eq_table (flatten_eq eq) in - (if !Clflags.option_debug_compcert > 1 + (if o = None then failwith "eq_find_oracle"); + (if !Clflags.option_debug_compcert > 5 then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node) pp_eq eq (pp_option pp_P) o); o @@ -90,9 +188,9 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = match Hashtbl.find_opt rhs_table (sop, List.map P.to_int args) with | None -> PSet.empty | Some s -> s in - (if !Clflags.option_debug_compcert > 1 + (if !Clflags.option_debug_compcert > 5 then Printf.printf "@%d: rhs_find %a = %a\n" - (P.to_int node) pp_rhs (sop, args) pp_set o); + (P.to_int node) pp_rhs (sop, args) pp_intset o); o in let mutating_eq_find_oracle node eq : P.t option = let (flat_eq_lhs, flat_eq_op, flat_eq_args) as flat_eq = flatten_eq eq in @@ -124,23 +222,29 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = Some coq_id end in - (if !Clflags.option_debug_compcert > 1 + (if !Clflags.option_debug_compcert > 5 then Printf.printf "@%d: mutating_eq_find %a -> %a\n" (P.to_int node) pp_eq eq (pp_option pp_P) o); o in - match - internal_analysis - { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); - eq_find_oracle = mutating_eq_find_oracle; - eq_rhs_oracle = rhs_find_oracle ; - eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); - eq_kill_mem = (fun () -> !cur_kill_mem); - eq_moves = (fun reg -> PMap.get reg !cur_moves) - } tenv f + let ctx = { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); + eq_find_oracle = mutating_eq_find_oracle; + eq_rhs_oracle = rhs_find_oracle ; + eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); + eq_kill_mem = (fun () -> !cur_kill_mem); + eq_moves = (fun reg -> PMap.get reg !cur_moves) + } in + match internal_analysis ctx tenv f with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3" | Some invariants -> - invariants, - { hint_eq_catalog = !cur_catalog; - hint_eq_find_oracle= eq_find_oracle; - hint_eq_rhs_oracle = rhs_find_oracle };; + let invariants' = + if ! Clflags.option_fcse3_refine + then refine_analysis ctx tenv f invariants + else invariants + and hints = { hint_eq_catalog = !cur_catalog; + hint_eq_find_oracle= eq_find_oracle; + hint_eq_rhs_oracle = rhs_find_oracle } in + (if !Clflags.option_debug_compcert > 1 + then pp_results f invariants' hints stdout); + invariants', hints +;; diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index 66b199cc..1e5b88c3 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -699,6 +699,27 @@ Section SOUNDNESS. + congruence. Qed. + Lemma arglist_idem_write: + forall { A : Type} args (rs : Regmap.t A) dst, + (rs # dst <- (rs # dst)) ## args = rs ## args. + Proof. + induction args; trivial. + intros. cbn. + f_equal; trivial. + apply Regmap.gsident. + Qed. + + Lemma sem_rhs_idem_write: + forall sop args rs dst m v, + sem_rhs sop args rs m v -> + sem_rhs sop args (rs # dst <- (rs # dst)) m v. + Proof. + intros. + unfold sem_rhs in *. + rewrite arglist_idem_write. + assumption. + Qed. + Theorem oper2_sound: forall no dst sop args rel rs m v, sem_rel rel rs m -> @@ -726,6 +747,17 @@ Section SOUNDNESS. rewrite Regmap.gss. apply sem_rhs_depends_on_args_only; auto. } + intros INi. + destruct (PSet.contains rel e) eqn:CONTAINSe. + { pose proof (REL e {| eq_lhs := dst; eq_op := sop; eq_args := args |} CONTAINSe H) as RELe. + pose proof (REL i eq CONTAINS INi) as RELi. + unfold sem_eq in *. + cbn in RELe. + replace v with (rs # dst) by (eapply sem_rhs_det; eassumption). + rewrite Regmap.gsident. + apply sem_rhs_idem_write. + assumption. + } rewrite PSet.gaddo in CONTAINS by congruence. apply (kill_reg_sound rel rs m dst v REL i eq); auto. Qed. @@ -821,24 +853,24 @@ Section SOUNDNESS. subst. rewrite <- (forward_move_sound rel rs m r) by auto. apply move_sound; auto. - - destruct (is_trivial_sym_op sop). - { - apply kill_reg_sound; auto. - } - destruct rhs_find as [src |] eqn:RHS_FIND. + - destruct rhs_find as [src |] eqn:RHS_FIND. + destruct (Compopts.optim_CSE3_glb tt). * apply sem_rel_glb; split. - ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND. - eapply forward_move_rhs_sound in RHS. - 2: eassumption. + ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND. rewrite <- (sem_rhs_det SOUND RHS). apply move_sound; auto. + ** apply sem_rel_glb; split. + *** apply oper1_sound; auto. + *** apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. + * apply sem_rel_glb; split. + ** apply oper1_sound; auto. ** apply oper1_sound; auto. apply forward_move_rhs_sound; auto. - * ** apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. - + apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. + + apply sem_rel_glb; split. + * apply oper1_sound; auto. + * apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. Qed. Hint Resolve oper_sound : cse3. @@ -927,22 +959,28 @@ Section SOUNDNESS. Hint Resolve store1_sound : cse3. + Theorem store_sound: forall no chunk addr args a src rel tenv rs m m', sem_rel rel rs m -> wt_regset tenv rs -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.storev chunk m a (rs#src) = Some m' -> - sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'. + sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'. Proof. unfold store. intros until m'. intros REL WT ADDR STORE. - rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - apply store1_sound with (a := a) (m := m); trivial. - (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - assumption. *) + apply sem_rel_glb; split. + - apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. + - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. + apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. Qed. Hint Resolve store_sound : cse3. diff --git a/backend/Duplicate.v b/backend/Duplicate.v index 0e04b07d..7dea752b 100644 --- a/backend/Duplicate.v +++ b/backend/Duplicate.v @@ -18,14 +18,28 @@ Require Import AST RTL Maps Globalenvs. Require Import Coqlib Errors Op. -Local Open Scope error_monad_scope. -Local Open Scope positive_scope. -(** External oracle returning the new RTL code (entry point unchanged), + +Module Type DuplicateOracle. + + (** External oracle returning the new RTL code (entry point unchanged), along with the new entrypoint, and a mapping of new nodes to old nodes *) -Axiom duplicate_aux: function -> code * node * (PTree.t node). + Parameter duplicate_aux: function -> code * node * (PTree.t node). + +End DuplicateOracle. + + -Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux". +Module Duplicate (D: DuplicateOracle). + +Export D. + +Definition duplicate_aux := duplicate_aux. + +(* Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux". *) + +Local Open Scope error_monad_scope. +Local Open Scope positive_scope. (** * Verification of node duplications *) @@ -215,3 +229,5 @@ Definition transf_fundef (f: fundef) : res fundef := Definition transf_program (p: program) : res program := transform_partial_program transf_fundef p. + +End Duplicate. diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index eb9f42e0..76b5616b 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -30,7 +30,9 @@ let get_some = LICMaux.get_some let rtl_successors = LICMaux.rtl_successors (* Get list of nodes following a BFS of the code *) -let bfs code entrypoint = begin +(* Stops when predicate is reached + * Excludes any node given in excluded function *) +let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin debug "bfs\n"; let visited = ref (PTree.map (fun n i -> false) code) and bfs_list = ref [] @@ -40,20 +42,24 @@ let bfs code entrypoint = begin Queue.add entrypoint to_visit; while not (Queue.is_empty to_visit) do node := Queue.pop to_visit; - if not (get_some @@ PTree.get !node !visited) then begin + if (not (get_some @@ PTree.get !node !visited)) then begin visited := PTree.set !node true !visited; - match PTree.get !node code with - | None -> failwith "No such node" - | Some i -> - bfs_list := !node :: !bfs_list; - let succ = rtl_successors i in - List.iter (fun n -> Queue.add n to_visit) succ + if not (excluded !node) then begin + match PTree.get !node code with + | None -> failwith "No such node" + | Some i -> + bfs_list := !node :: !bfs_list; + if not (predicate !node) then + let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ + end end done; List.rev !bfs_list end end +let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false) + let optbool o = match o with Some _ -> true | None -> false let ptree_get_some n ptree = get_some @@ PTree.get n ptree @@ -81,15 +87,7 @@ end module PSet = Set.Make(PInt) -let print_intlist oc l = - let rec f oc = function - | [] -> () - | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln) - in begin - if !debug_flag then begin - Printf.fprintf oc "[%a]" f l - end - end +let print_intlist = LICMaux.print_intlist let print_intset s = let seq = PSet.to_seq s @@ -403,19 +401,7 @@ let print_traces oc traces = Printf.fprintf oc "Traces: {%a}\n" f traces end -(* Adapted from backend/PrintRTL.ml: print_function *) -let print_code code = let open PrintRTL in let open Printf in - if (!debug_flag) then begin - fprintf stdout "{\n"; - let instrs = - List.sort - (fun (pc1, _) (pc2, _) -> compare pc2 pc1) - (List.rev_map - (fun (pc, i) -> (P.to_int pc, i)) - (PTree.elements code)) in - List.iter (print_instruction stdout) instrs; - fprintf stdout "}" - end +let print_code code = LICMaux.print_code code (* Dumb (but linear) trace selection *) let select_traces_linear code entrypoint = @@ -627,15 +613,17 @@ let invert_iconds code = type innerLoop = { preds: P.t list; - body: HashedSet.PSet.t; + body: P.t list; head: P.t; (* head of the loop *) - final: P.t (* the final instruction, which loops back to the head *) + finals: P.t list (* the final instructions, which loops back to the head *) + (* There may be more than one ; for instance if there is an if inside the loop with both + * branches leading to a goto backedge *) } let print_pset = LICMaux.pp_pset let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body + debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body let rec print_inner_loops = function | [] -> () @@ -657,6 +645,53 @@ let print_ptree printer pt = let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else () +let cb_exit_node = function + | Icond (_,_,n1,n2,p) -> begin match p with + | Some true -> Some n2 + | Some false -> Some n1 + | None -> None + end + | _ -> None + + (* +(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *) +let get_natural_loop code predmap n = + let is_final_node m = + let successors = rtl_successors @@ get_some @@ PTree.get m code in + List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors + in + let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in + let is_excluded m = match excluded_node with + | None -> false + | Some ex -> P.to_int ex == P.to_int m + in + debug "get_natural_loop for %d\n" (P.to_int n); + let body = bfs_until code n is_final_node is_excluded in + debug "BODY: %a\n" print_intlist body; + let final = List.find is_final_node body in + debug "FINAL: %d\n" (P.to_int final); + let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in + debug "PREDS: %a\n" print_intlist preds; + { preds = preds; body = body; head = n; final = final } + +let rec count_loop_headers is_loop_header = function + | [] -> 0 + | n :: ln -> + let rem = count_loop_headers is_loop_header ln in + if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem + +let get_inner_loops f code is_loop_header = + let predmap = get_predecessors_rtl code in + let iloops = ref [] in + List.iter (fun (n, ilh) -> if ilh then begin + let iloop = get_natural_loop code predmap n in + let nb_headers = count_loop_headers is_loop_header iloop.body in + if nb_headers == 1 then (* innermost loop *) + iloops := iloop :: !iloops end + ) (PTree.elements is_loop_header); + !iloops + *) + let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in @@ -674,17 +709,16 @@ let get_inner_loops f code is_loop_header = assert (List.length heads == 1); List.hd heads end in - let final = (* the predecessors from head that are in the body *) + let finals = (* the predecessors from head that are in the body *) let head_preds = ptree_get_some head predmap in let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in begin debug "HEAD: %d\n" (P.to_int head); debug "BODY: %a\n" print_pset body; debug "HEADPREDS: %a\n" print_intlist head_preds; - assert (List.length filtered == 1); - List.hd filtered + filtered end in - { preds = preds; body = body; head = head; final = final } + { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ -710,6 +744,8 @@ let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap let apply_map fw n = P.of_int @@ ptree_get_some n fw +let apply_map_list fw ln = List.map (apply_map fw) ln + let apply_map_opt fw n = match PTree.get n fw with | Some n' -> P.of_int n' @@ -769,7 +805,7 @@ let rec count_ignore_nops code = function * 3) Links the last instruction of body' into the first instruction of body *) let unroll_inner_loop_single code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in if count_ignore_nops code body > !Clflags.option_funrollsingle then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle; (code, revmap) @@ -777,12 +813,12 @@ let unroll_inner_loop_single code revmap iloop = let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in - let final' = apply_map fwmap (iloop.final) in + let finals' = apply_map_list fwmap (iloop.finals) in begin debug "PREDS: %a\n" print_intlist iloop.preds; debug "IHEAD: %d\n" (P.to_int iloop.head); code' := change_pointers !code' (iloop.head) head' (iloop.preds); - code' := change_pointers !code' head' (iloop.head) [final']; + code' := change_pointers !code' head' (iloop.head) finals'; (!code', revmap2) end @@ -806,7 +842,7 @@ let unroll_inner_loops_single f code revmap = * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; @@ -815,16 +851,18 @@ let unroll_inner_loop_body code revmap iloop = let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in - let final' = apply_map fwmap (iloop.final) in + let finals' = apply_map_list fwmap (iloop.finals) in begin - code' := change_pointers !code' iloop.head head' [iloop.final]; - code' := change_pointers !code' head' iloop.head [final']; + code' := change_pointers !code' iloop.head head' iloop.finals; + code' := change_pointers !code' head' iloop.head finals'; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in + debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in let revmap' = ref revmap in begin @@ -832,46 +870,102 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap + ) inner_loops; (* debug_flag := false; *) + (!code', !revmap') + end + +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + if count_ignore_nops code header > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap ) inner_loops; (!code', !revmap') end -let duplicate_aux f = - (* initializing *) +let loop_rotate f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) - (* static prediction *) +let static_predict f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let code = if !Clflags.option_fpredict then update_directions code entrypoint else code in + let code = + if !Clflags.option_fpredict then + invert_iconds code + else code in + ((code, entrypoint), revmap) - (* unroll single *) +let unroll_single f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollsingle > 0 then unroll_inner_loops_single f code revmap else (code, revmap) in + ((code, entrypoint), revmap) - (* unroll body *) +let unroll_body f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollbody > 0 then unroll_inner_loops_body f code revmap else (code, revmap) in + ((code, entrypoint), revmap) - (* static prediction bis *) - let code = - if !Clflags.option_fpredict then - invert_iconds code - else code in - - (* tail duplication *) +let tail_duplicate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in superblockify_traces code preds traces revmap else (code, revmap) in - ((code, entrypoint), revmap) diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v new file mode 100644 index 00000000..7e58eedf --- /dev/null +++ b/backend/Duplicatepasses.v @@ -0,0 +1,58 @@ +Require Import RTL. +Require Import Maps. +Require Import Duplicate. +Require Import Duplicateproof. + +(** Static Prediction *) + +Module StaticPredictOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.static_predict". +End StaticPredictOracle. + +Module Staticpredictproof := DuplicateProof StaticPredictOracle. + +Module Staticpredict := Staticpredictproof. + +(** Unrolling one iteration out of the body *) + +Module UnrollSingleOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.unroll_single". +End UnrollSingleOracle. + +Module Unrollsingleproof := DuplicateProof UnrollSingleOracle. + +Module Unrollsingle := Unrollsingleproof. + +(** Unrolling the body of innermost loops *) + +Module UnrollBodyOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.unroll_body". +End UnrollBodyOracle. + +Module Unrollbodyproof := DuplicateProof UnrollBodyOracle. + +Module Unrollbody := Unrollbodyproof. + +(** Tail Duplication *) + +Module TailDuplicateOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.tail_duplicate". +End TailDuplicateOracle. + +Module Tailduplicateproof := DuplicateProof TailDuplicateOracle. + +Module Tailduplicate := Tailduplicateproof. + +(** Loop Rotate *) + +Module LoopRotateOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.loop_rotate". +End LoopRotateOracle. + +Module Looprotateproof := DuplicateProof LoopRotateOracle. +Module Looprotate := Looprotateproof. diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v index 62455076..2480ccf0 100644 --- a/backend/Duplicateproof.v +++ b/backend/Duplicateproof.v @@ -17,6 +17,9 @@ Require Import AST Linking Errors Globalenvs Smallstep. Require Import Coqlib Maps Events Values. Require Import Op RTL Duplicate. +Module DuplicateProof (D: DuplicateOracle). +Include Duplicate D. + Local Open Scope positive_scope. (** * Definition of [match_states] (independently of the translation) *) @@ -535,3 +538,5 @@ Proof. Qed. End PRESERVATION. + +End DuplicateProof. diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml index 0ca4418b..6283e129 100644 --- a/backend/LICMaux.ml +++ b/backend/LICMaux.ml @@ -39,6 +39,42 @@ let rtl_successors = function | Icond (_,_,n1,n2,_) -> [n1; n2] | Ijumptable (_,ln) -> ln +let print_ptree_bool oc pt = + if !debug_flag then + let elements = PTree.elements pt in + begin + Printf.fprintf oc "["; + List.iter (fun (n, b) -> + if b then Printf.fprintf oc "%d, " (P.to_int n) + ) elements; + Printf.fprintf oc "]\n" + end + else () + +let print_intlist oc l = + let rec f oc = function + | [] -> () + | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln) + in begin + if !debug_flag then begin + Printf.fprintf oc "[%a]" f l + end + end + +(* Adapted from backend/PrintRTL.ml: print_function *) +let print_code code = let open PrintRTL in let open Printf in + if (!debug_flag) then begin + fprintf stdout "{\n"; + let instrs = + List.sort + (fun (pc1, _) (pc2, _) -> compare pc2 pc1) + (List.rev_map + (fun (pc, i) -> (P.to_int pc, i)) + (PTree.elements code)) in + List.iter (print_instruction stdout) instrs; + fprintf stdout "}" + end + (** Getting loop branches with a DFS visit : * Each node is either Unvisited, Visited, or Processed * pre-order: node becomes Processed @@ -53,23 +89,34 @@ let get_loop_headers code entrypoint = begin in let rec dfs_visit code = function | [] -> () | node :: ln -> + debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln; match (get_some @@ PTree.get node !visited) with - | Visited -> () + | Visited -> begin + debug "\tNode %d is already Visited, skipping\n" (P.to_int node); + dfs_visit code ln + end | Processed -> begin debug "Node %d is a loop header\n" (P.to_int node); is_loop_header := PTree.set node true !is_loop_header; - visited := PTree.set node Visited !visited + visited := PTree.set node Visited !visited; + dfs_visit code ln end | Unvisited -> begin visited := PTree.set node Processed !visited; - match PTree.get node code with + debug "Node %d is Processed\n" (P.to_int node); + (match PTree.get node code with | None -> failwith "No such node" - | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits; + | Some i -> let next_visits = rtl_successors i in begin + debug "About to visit: %a\n" print_intlist next_visits; + dfs_visit code next_visits + end); + debug "Node %d is Visited!\n" (P.to_int node); visited := PTree.set node Visited !visited; dfs_visit code ln end in begin dfs_visit code [entrypoint]; + debug "LOOP HEADERS: %a\n" print_ptree_bool !is_loop_header; !is_loop_header end end @@ -208,7 +255,8 @@ let rewrite_loop_body (last_alloc : reg ref) (List.map (map_reg mapper) args), new_res)); PTree.set res new_res mapper - | Iload(trap, chunk, addr, args, res, pc') + | Iload(_, chunk, addr, args, res, pc') + | Istore(chunk, addr, args, res, pc') when Archi.has_notrap_loads && !Clflags.option_fnontrap_loads -> let new_res = P.succ !last_alloc in diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml index d8f2ac12..8259297b 100644 --- a/backend/PrintLTL.ml +++ b/backend/PrintLTL.ml @@ -133,10 +133,10 @@ let print_program pp (prog: LTL.program) = let destination : string option ref = ref None -let print_if prog = +let print_if passno prog = match !destination with | None -> () | Some f -> - let oc = open_out f in + let oc = open_out (f ^ "." ^ Z.to_string passno) in print_program oc prog; close_out oc diff --git a/backend/Tunneling.v b/backend/Tunneling.v index 78458582..269ebb6f 100644 --- a/backend/Tunneling.v +++ b/backend/Tunneling.v @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -12,7 +13,7 @@ (** Branch tunneling (optimization of branches to branches). *) -Require Import Coqlib Maps UnionFind. +Require Import Coqlib Maps Errors. Require Import AST. Require Import LTL. @@ -21,10 +22,10 @@ Require Import LTL. so that they jump directly to the end of the branch sequence. For example: << - L1: nop L2; L1: nop L3; - L2; nop L3; becomes L2: nop L3; + L1: if (cond) nop L2; L1: nop L3; + L2: nop L3; becomes L2: nop L3; L3: instr; L3: instr; - L4: if (cond) goto L1; L4: if (cond) goto L3; + L4: if (cond) goto L1; L4: if (cond) nop L1; >> This optimization can be applied to several of our intermediate languages. We choose to perform it on the [LTL] language, @@ -37,11 +38,14 @@ Require Import LTL. dead code (as the "nop L3" in the example above). *) -(** The naive implementation of branch tunneling would replace - any branch to a node [pc] by a branch to the node - [branch_target f pc], defined as follows: +(** The implementation consists in two passes: the first pass + records the branch t of each "nop" + and the second pass replace any "nop" node to [pc] + by a branch to a "nop" at [branch_t f pc] + +Naively, we may define [branch_t f pc] as follows: << - branch_target f pc = branch_target f pc' if f(pc) = nop pc' + branch_t f pc = branch_t f pc' if f(pc) = nop pc' = pc otherwise >> However, this definition can fail to terminate if @@ -50,56 +54,114 @@ Require Import LTL. L1: nop L1; >> or -<< L1: nop L2; +<< + L1: nop L2; L2: nop L1; >> Coq warns us of this fact by not accepting the definition - of [branch_target] above. + of [branch_t] above. + + To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] + for every instruction [pc: nop pc'] in the function. + + Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, + we need to iterate until we reach a fixpoint. + + Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure + in order to help the proof. + + A verifier checks that this data-structure is correct. +*) + +Definition UF := PTree.t (node * Z). - To handle this problem, we proceed in two passes. The first pass - populates a union-find data structure, adding equalities [pc = pc'] - for every instruction [pc: nop pc'] in the function. *) +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: LTL.function -> UF. +Extract Constant branch_target => "Tunnelingaux.branch_target". -Module U := UnionFind.UF(PTree). +Local Open Scope error_monad_scope. -Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t := - match b with - | Lbranch s :: _ => U.union uf pc s - | _ => uf +Definition get (td: UF) pc:node*Z := + match td!pc with + | Some (t,d) => (t,Z.abs d) + | _ => (pc,0) end. -Definition record_gotos (f: LTL.function) : U.t := - PTree.fold record_goto f.(fn_code) U.empty. +Definition target (td: UF) (pc:node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +Definition check_included (td: UF) (c: code): option bblock + := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). + +(* we check the validity of targets and their bound: + the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit + := match td!pc with + | None => OK tt + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match bb with + | Lbranch s ::_ => + let (ts, ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Lbranch") + else Error (msg "invalid skip of Lbranch") + | Lcond _ _ s1 s2 _ :: _ => + let (ts1, ds1) := get td s1 in + let (ts2, ds2) := get td s2 in + if peq tpc ts1 then + if peq tpc ts2 then + if zlt ds1 dpc then + if zlt ds2 dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this block") + end + end. + +Definition check_code (td: UF) (c:code): res unit + := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). (** The second pass rewrites all LTL instructions, replacing every - successor [s] of every instruction by the canonical representative + successor [s] of every instruction by [t s], the canonical representative of its equivalence class in the union-find data structure. *) -Definition tunnel_instr (uf: U.t) (i: instruction) : instruction := +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := match i with - | Lbranch s => Lbranch (U.repr uf s) + | Lbranch s => Lbranch (t s) | Lcond cond args s1 s2 info => - let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in + let s1' := t s1 in let s2' := t s2 in if peq s1' s2' then Lbranch s1' else Lcond cond args s1' s2' info - | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl) + | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) | _ => i end. -Definition tunnel_block (uf: U.t) (b: bblock) : bblock := - List.map (tunnel_instr uf) b. +Definition tunnel_block (t: node -> node) (b: bblock) : bblock := + List.map (tunnel_instr t) b. -Definition tunnel_function (f: LTL.function) : LTL.function := - let uf := record_gotos f in - mkfunction - (fn_sig f) - (fn_stacksize f) - (PTree.map1 (tunnel_block uf) (fn_code f)) - (U.repr uf (fn_entrypoint f)). +Definition tunnel_function (f: LTL.function) : res LTL.function := + let td := branch_target f in + let c := (fn_code f) in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction + (fn_sig f) + (fn_stacksize f) + (PTree.map1 (tunnel_block td) c) + (td (fn_entrypoint f))) + else + Error (msg "Some node of the union-find is not in the CFG") + . -Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef := - transf_fundef tunnel_function f. +Definition tunnel_fundef (f: fundef) : res fundef := + transf_partial_fundef tunnel_function f. -Definition transf_program (p: LTL.program) : LTL.program := - transform_program tunnel_fundef p. +Definition transf_program (p: program) : res program := + transform_partial_program tunnel_fundef p. diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml new file mode 100644 index 00000000..af89adea --- /dev/null +++ b/backend/Tunnelingaux.ml @@ -0,0 +1,283 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [Tunneling.v] + +*) + +open Coqlib +open LTL +open Maps +open Camlcoq + +let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) +let debug_flag = ref false +let final_dump = false (* set to true to have a more verbose debugging *) + +let debug fmt = + if !debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + +exception BugOnPC of int + +(* type of labels in the cfg *) +type label = int * P.t + +(* instructions under analyzis *) +type simple_inst = (* a simplified view of LTL instructions *) + LBRANCH of node +| LCOND of node * node +| OTHER +and node = { + lab : label; + mutable inst: simple_inst; + mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) + mutable dist: int; + mutable tag: int + } + +(* type of the (simplified) CFG *) +type cfg = { + nodes: (int, node) Hashtbl.t; + mutable rems: node list; (* remaining conditions that may become lbranch or not *) + mutable num_rems: int; + mutable iter_num: int (* number of iterations in elimination of conditions *) + } + +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab + +let rec target c n = (* inspired from the "find" of union-find algorithm *) + match n.inst with + | LCOND(s1,s2) -> + if n.link != n + then update c n + else if n.tag < c.iter_num then ( + (* we try to change the condition ... *) + n.tag <- c.iter_num; (* ... but at most once by iteration *) + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then (n.link <- ts1; ts1) else n + ) else n + | _ -> + if n.link != n + then update c n + else n +and update c n = + let t = target c n.link in + n.link <- t; t + +let get_node c p = + let li = P.to_int p in + try + Hashtbl.find c.nodes li + with + Not_found -> + let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n; + n + +let set_branch c p s = + let li = P.to_int p in + try + let n = Hashtbl.find c.nodes li in + n.inst <- LBRANCH s; + n.link <- target c s + with + Not_found -> + let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n + + +(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) +let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; + acc + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- LCOND(ns1, ns2); + npc::acc + | _ -> acc + +(* try to change a condition into a branch +[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond +*) +let try_change_cond c acc pc = + match pc.inst with + | LCOND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* LCOND expected *) + +(* repeat [try_change_cond] until no condition is changed into a branch *) +let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + +(* compute the final distance of each nop nodes to its target *) +let undef_dist = -1 +let self_dist = undef_dist-1 +let rec dist n = + if n.dist = undef_dist + then ( + n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) + n.dist <- + (match n.inst with + | OTHER -> 0 + | LBRANCH p -> 1 + dist p + | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); + n.dist + ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) + else n.dist + +let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + (tn, n)::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc (tn,n) -> PTree.set (lab_p n) (lab_p tn, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count; + res + +(*********************************************) +(*** START: printing and debugging functions *) + +let string_of_labeli nodes ipc = + try + let pc = Hashtbl.find nodes ipc in + if pc.link == pc + then Printf.sprintf "(Target@%d)" (dist pc) + else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) + with + Not_found -> "" + +let print_bblock c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> debug "%d " pc; true + + +let print_cfg f c = + let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int f.fn_entrypoint in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (print_bblock c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + +(*************************************************************) +(* Copy-paste of the extracted code of the verifier *) +(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + +let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) + +let check_bblock td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () + +(** val check_code : coq_UF -> code -> unit res **) + +let check_code td c = + PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (()) + +(*** END: copy-paste & debugging functions *******) + +let branch_target f = + debug "* Tunneling.branch_target: starting on a new function\n"; + if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; + repeat_change_cond c; + let res = final_export f c in + if !debug_flag then ( + try + check_code res f.fn_code; + if final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res f.fn_code + ) + ); + res diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v index cdf6c800..126b7b87 100644 --- a/backend/Tunnelingproof.v +++ b/backend/Tunnelingproof.v @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -12,131 +13,163 @@ (** Correctness proof for the branch tunneling optimization. *) -Require Import Coqlib Maps UnionFind. +Require Import Coqlib Maps Errors. Require Import AST Linking. Require Import Values Memory Events Globalenvs Smallstep. Require Import Op Locations LTL. Require Import Tunneling. -Definition match_prog (p tp: program) := - match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp. +Local Open Scope nat. -Lemma transf_program_match: - forall p, match_prog p (transf_program p). + +(** * Properties of the branch_target, when the verifier succeeds *) + +Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td: UF) (c: code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_included_spec c). +- (* extensionality *) + unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_included_spec. + intros m [|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. + intros; eapply IND; try congruence. +Qed. + +Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := + | TB_default (TB: target pc = pc) ob + : target_bounds target bound pc ob + | TB_branch s bb + (EQ: target pc = target s) + (DECREASE: bound s < bound pc) + : target_bounds target bound pc (Some (Lbranch s::bb)) + | TB_cond cond args s1 s2 info bb + (EQ1: target pc = target s1) + (EQ2: target pc = target s2) + (DEC1: bound s1 < bound pc) + (DEC2: bound s2 < bound pc) + : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) + . +Local Hint Resolve TB_default: core. + +Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. Proof. - intros. eapply match_transform_program; eauto. + unfold target, get. intros H; rewrite H; auto. Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. -(** * Properties of the branch map computed using union-find. *) +Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. +Proof. + unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; omega || auto. +Qed. +Local Hint Resolve get_nonneg: core. -(** A variant of [record_goto] that also incrementally computes a measure [f: node -> nat] - counting the number of [Lnop] instructions starting at a given [pc] that were eliminated. *) +Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). -Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat := - fun x => if peq (U.repr u s) pc then f x - else if peq (U.repr u x) pc then (f x + f s + 1)%nat - else f x. +Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): + check_bblock td pc bb = OK tt -> + target_bounds (target td) (bound td) pc (Some bb). +Proof. + unfold check_bblock, bound. + destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. + assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } + assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } + destruct bb as [|[ ] bb]; simpl; try congruence. + + destruct (get td s) as (ts, ds) eqn:Hs. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_branch. + * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. + * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. + + destruct (get td s1) as (ts1, ds1) eqn:Hs1. + destruct (get td s2) as (ts2, ds2) eqn:Hs2. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_cond. + * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. + * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. + * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. + * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. +Qed. -Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) := - match b with - | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f) - | _ => uf - end. +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). -Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop := - forall pc, - match c!pc with - | Some(Lbranch s :: b) => - U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat - | _ => - U.repr (fst uf) pc = pc - end. +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_code_spec td). +- (* extensionality *) + unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_code_spec. + intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. + intros X; inversion X; subst. + apply check_bblock_correct; auto. +Qed. -Lemma record_gotos'_correct: - forall c, - branch_map_correct c (PTree.fold record_goto' c (U.empty, fun (x: node) => O)). +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). Proof. - intros. - apply PTree_Properties.fold_rec with (P := fun c uf => branch_map_correct c uf). + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros _. + destruct ((fn_code f)!pc) eqn:X. + - exploit check_code_correct; eauto. + - exploit check_included_correct; eauto. + congruence. +Qed. -- (* extensionality *) - intros. red; intros. rewrite <- H. apply H0. +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros X; inversion X; clear X; subst. + simpl. rewrite PTree.gmap1. auto. +Qed. -- (* base case *) - red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty. +Lemma tunnel_fundef_Internal: + forall f tf, tunnel_fundef (Internal f) = OK tf + -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. +Proof. + intros f tf; simpl. + destruct (tunnel_function f) eqn:X; simpl; try congruence. + intros EQ; inversion EQ. + eexists; split; eauto. +Qed. -- (* inductive case *) - intros m uf pc bb; intros. destruct uf as [u f]. - assert (PC: U.repr u pc = pc). - generalize (H1 pc). rewrite H. auto. - assert (record_goto' (u, f) pc bb = (u, f) - \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)). - unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto. - destruct H2 as [B | [s [bb' [EQ B]]]]. - -+ (* u and f are unchanged *) - rewrite B. - red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. - destruct bb; auto. destruct i; auto. - apply H1. - -+ (* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *) - rewrite B. - red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ. - -* (* The new instruction *) - rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3. - unfold measure_edge. destruct (peq (U.repr u s) pc). auto. right. split. auto. - rewrite PC. rewrite peq_true. omega. - -* (* An old instruction *) - assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc'). - { intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. } - generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto. - intros [P | [P Q]]. left; auto. right. - split. apply U.sameclass_union_2. auto. - unfold measure_edge. destruct (peq (U.repr u s) pc). auto. - rewrite P. destruct (peq (U.repr u s0) pc). omega. auto. -Qed. - -Definition record_gotos' (f: function) := - PTree.fold record_goto' f.(fn_code) (U.empty, fun (x: node) => O). - -Lemma record_gotos_gotos': - forall f, fst (record_gotos' f) = record_gotos f. -Proof. - intros. unfold record_gotos', record_gotos. - repeat rewrite PTree.fold_spec. - generalize (PTree.elements (fn_code f)) (U.empty) (fun _ : node => O). - induction l; intros; simpl. - auto. - unfold record_goto' at 2. unfold record_goto at 2. - destruct (snd a). apply IHl. destruct i; apply IHl. -Qed. - -Definition branch_target (f: function) (pc: node) : node := - U.repr (record_gotos f) pc. - -Definition count_gotos (f: function) (pc: node) : nat := - snd (record_gotos' f) pc. - -Theorem record_gotos_correct: - forall f pc, - match f.(fn_code)!pc with - | Some(Lbranch s :: b) => - branch_target f pc = pc \/ - (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat - | _ => branch_target f pc = pc - end. +Lemma tunnel_fundef_External: + forall tf ef, tunnel_fundef (External ef) = OK tf + -> tf = External ef. Proof. - intros. - generalize (record_gotos'_correct f.(fn_code) pc). simpl. - fold (record_gotos' f). unfold branch_map_correct, branch_target, count_gotos. - rewrite record_gotos_gotos'. auto. + intros tf ef; simpl. intros H; inversion H; auto. Qed. (** * Preservation of semantics *) +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. + Section PRESERVATION. Variables prog tprog: program. @@ -145,32 +178,65 @@ Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. Lemma functions_translated: - forall v f, + forall (v: val) (f: fundef), Genv.find_funct ge v = Some f -> - Genv.find_funct tge v = Some (tunnel_fundef f). -Proof (Genv.find_funct_transf TRANSL). + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. exploit (Genv.find_funct_match TRANSL); eauto. + intros (cu & tf & A & B & C). + repeat eexists; intuition eauto. +Qed. Lemma function_ptr_translated: forall v f, Genv.find_funct_ptr ge v = Some f -> - Genv.find_funct_ptr tge v = Some (tunnel_fundef f). -Proof (Genv.find_funct_ptr_transf TRANSL). + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. + exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. +Qed. -Lemma symbols_preserved: - forall id, - Genv.find_symbol tge id = Genv.find_symbol ge id. -Proof (Genv.find_symbol_transf TRANSL). +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. +Qed. Lemma senv_preserved: Senv.equiv ge tge. -Proof (Genv.senv_transf TRANSL). +Proof. + eapply (Genv.senv_match TRANSL). +Qed. Lemma sig_preserved: - forall f, funsig (tunnel_fundef f) = funsig f. + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. Proof. - destruct f; reflexivity. + intros. destruct f. + - simpl in H. monadInv H. unfold tunnel_function in EQ. + destruct (check_included _ _); try congruence. + monadInv EQ. simpl; auto. + - simpl in H. monadInv H. reflexivity. Qed. +Lemma fn_stacksize_preserved: + forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + +Lemma fn_entrypoint_preserved: + forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + + (** The proof of semantic preservation is a simulation argument based on diagrams of the following form: << @@ -185,7 +251,7 @@ Qed. between states [st1] and [st2], as well as the postcondition between [st1'] and [st2']. One transition in the source code (left) can correspond to zero or one transition in the transformed code (right). The - "zero transition" case occurs when executing a [Lgoto] instruction + "zero transition" case occurs when executing a [Lnop] instruction in the source code that has been removed by tunneling. In the definition of [match_states], what changes between the original and @@ -194,52 +260,52 @@ Qed. and memory states, since some [Vundef] values can become more defined as a consequence of eliminating useless [Lcond] instructions. *) -Definition tunneled_block (f: function) (b: bblock) := - tunnel_block (record_gotos f) b. - -Definition tunneled_code (f: function) := - PTree.map1 (tunneled_block f) (fn_code f). - Definition locmap_lessdef (ls1 ls2: locset) : Prop := forall l, Val.lessdef (ls1 l) (ls2 l). Inductive match_stackframes: stackframe -> stackframe -> Prop := | match_stackframes_intro: - forall f sp ls0 bb tls0, + forall f tf sp ls0 bb tls0, locmap_lessdef ls0 tls0 -> + tunnel_function f = OK tf -> match_stackframes (Stackframe f sp ls0 bb) - (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)). + (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). Inductive match_states: state -> state -> Prop := | match_states_intro: - forall s f sp pc ls m ts tls tm + forall s f tf sp pc ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), match_states (State s f sp pc ls m) - (State ts (tunnel_function f) sp (branch_target f pc) tls tm) + (State ts tf sp (branch_target f pc) tls tm) | match_states_block: - forall s f sp bb ls m ts tls tm + forall s f tf sp bb ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), match_states (Block s f sp bb ls m) - (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm) + (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) | match_states_interm: - forall s f sp pc bb ls m ts tls tm + forall s f tf sp pc i bb ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), - match_states (Block s f sp (Lbranch pc :: bb) ls m) - (State ts (tunnel_function f) sp (branch_target f pc) tls tm) + (MEM: Mem.extends m tm) + (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp (i :: bb) ls m) + (State ts tf sp pc tls tm) | match_states_call: - forall s f ls m ts tls tm + forall s f tf ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_fundef f = OK tf), match_states (Callstate s f ls m) - (Callstate ts (tunnel_fundef f) tls tm) + (Callstate ts tf tls tm) | match_states_return: forall s ls m ts tls tm (STK: list_forall2 match_stackframes s ts) @@ -289,22 +355,6 @@ Proof. induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. Qed. -(* -Lemma locmap_undef_lessdef: - forall ll ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2). -Proof. - induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. -Qed. - -Lemma locmap_undef_lessdef_1: - forall ll ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2. -Proof. - induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. -Qed. -*) - Lemma locmap_getpair_lessdef: forall p ls1 ls2, locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). @@ -348,15 +398,16 @@ Lemma find_function_translated: forall ros ls tls fd, locmap_lessdef ls tls -> find_function ge ros ls = Some fd -> - find_function tge ros tls = Some (tunnel_fundef fd). + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. Proof. intros. destruct ros; simpl in *. - assert (E: tls (R m) = ls (R m)). { exploit Genv.find_funct_inv; eauto. intros (b & EQ). generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } - rewrite E. apply functions_translated; auto. + rewrite E. exploit functions_translated; eauto. - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. - apply function_ptr_translated; auto. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. Qed. Lemma call_regs_lessdef: @@ -383,11 +434,12 @@ Qed. Definition measure (st: state) : nat := match st with - | State s f sp pc ls m => (count_gotos f pc * 2)%nat - | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat - | Block s f sp bb ls m => 0%nat - | Callstate s f ls m => 0%nat - | Returnstate s ls m => 0%nat + | State s f sp pc ls m => (bound (branch_target f) pc) * 2 + | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 + | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 + | Block s f sp bb ls m => 0 + | Callstate s f ls m => 0 + | Returnstate s ls m => 0 end. Lemma match_parent_locset: @@ -406,24 +458,23 @@ Lemma tunnel_step_correct: (exists st2', step tge st1' t st2' /\ match_states st2 st2') \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. Proof. - induction 1; intros; try inv MS. + induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). - (* entering a block *) - assert (DEFAULT: branch_target f pc = pc -> - (exists st2' : state, - step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2' - /\ match_states (Block s f sp bb rs m) st2')). - { intros. rewrite H0. econstructor; split. - econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto. - econstructor; eauto. } - - generalize (record_gotos_correct f pc). rewrite H. - destruct bb; auto. destruct i; auto. - intros [A | [B C]]. auto. - right. split. simpl. omega. - split. auto. - rewrite B. econstructor; eauto. - + exploit (branch_target_bounds f tf pc); eauto. + rewrite H. intros X; inversion X. + + (* TB_default *) + rewrite TB; left. econstructor; split. + * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. + * econstructor; eauto. + + (* FT_branch *) + simpl; right. + rewrite EQ; repeat (econstructor; omega || eauto). + + (* FT_cond *) + simpl; right. + repeat (econstructor; omega || eauto); simpl. + apply Nat.max_case; omega. + destruct (peq _ _); try congruence. - (* Lop *) exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. intros (tv & EV & LD). @@ -485,20 +536,25 @@ Proof. eauto. eauto. econstructor; eauto using locmap_undef_regs_lessdef. - (* Lcall *) - left; simpl; econstructor; split. - eapply exec_Lcall with (fd := tunnel_fundef fd); eauto. - eapply find_function_translated; eauto. - rewrite sig_preserved. auto. - econstructor; eauto. - constructor; auto. - constructor; auto. + left; simpl. + exploit find_function_translated; eauto. + intros (tfd & Htfd & FIND). + econstructor; split. + + eapply exec_Lcall; eauto. + erewrite sig_preserved; eauto. + + econstructor; eauto. + constructor; auto. + constructor; auto. - (* Ltailcall *) - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + exploit find_function_translated. 2: eauto. + { eauto using return_regs_lessdef, match_parent_locset. } + intros (tfd & Htfd & FIND). + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). left; simpl; econstructor; split. - eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto. - eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset. - apply sig_preserved. - econstructor; eauto using return_regs_lessdef, match_parent_locset. + + eapply exec_Ltailcall; eauto. + * eapply sig_preserved; eauto. + * erewrite fn_stacksize_preserved; eauto. + + econstructor; eauto using return_regs_lessdef, match_parent_locset. - (* Lbuiltin *) exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). @@ -513,45 +569,58 @@ Proof. fold (branch_target f pc). econstructor; eauto. - (* Lbranch (eliminated) *) right; split. simpl. omega. split. auto. constructor; auto. - -- (* Lcond *) - simpl tunneled_block. - set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2). - destruct (peq s1 s2). -+ left; econstructor; split. - eapply exec_Lbranch. - destruct b. -* constructor; eauto using locmap_undef_regs_lessdef_1. -* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. -+ left; econstructor; split. - eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. - destruct b; econstructor; eauto using locmap_undef_regs_lessdef. - +- (* Lcond (preserved) *) + simpl; left; destruct (peq _ _) eqn: EQ. + + econstructor; split. + eapply exec_Lbranch. + destruct b. + * constructor; eauto using locmap_undef_regs_lessdef_1. + * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. + + econstructor; split. + eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. + destruct b; econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcond (eliminated) *) + destruct (peq _ _) eqn: EQ; try inv H1. + right; split; simpl. + + destruct b. + generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega. + generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega. + + destruct b. + -- repeat (constructor; auto). + -- rewrite e; repeat (constructor; auto). - (* Ljumptable *) assert (tls (R arg) = Vint n). { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } left; simpl; econstructor; split. eapply exec_Ljumptable. - eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto. + eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. econstructor; eauto using locmap_undef_regs_lessdef. - (* Lreturn *) exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). left; simpl; econstructor; split. - eapply exec_Lreturn; eauto. - constructor; eauto using return_regs_lessdef, match_parent_locset. + + eapply exec_Lreturn; eauto. + erewrite fn_stacksize_preserved; eauto. + + constructor; eauto using return_regs_lessdef, match_parent_locset. - (* internal function *) + exploit tunnel_fundef_Internal; eauto. + intros (tf' & TF' & ITF). subst. exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. - intros (tm' & ALLOC & MEM'). - left; simpl; econstructor; split. - eapply exec_function_internal; eauto. - simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. + intros (tm' & ALLOC & MEM'). + left; simpl. + econstructor; split. + + eapply exec_function_internal; eauto. + erewrite fn_stacksize_preserved; eauto. + + simpl. + erewrite (fn_entrypoint_preserved f tf'); auto. + econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. - (* external function *) exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. intros (tvres & tm' & A & B & C & D). left; simpl; econstructor; split. - eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. + + erewrite (tunnel_fundef_External tf ef); eauto. + eapply exec_function_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. - (* return *) inv STK. inv H1. left; econstructor; split. @@ -564,14 +633,15 @@ Lemma transf_initial_states: exists st2, initial_state tprog st2 /\ match_states st1 st2. Proof. intros. inversion H. - exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split. + exploit function_ptr_translated; eauto. + intros (tf & Htf & Hf). + exists (Callstate nil tf (Locmap.init Vundef) m0); split. econstructor; eauto. - apply (Genv.init_mem_transf TRANSL); auto. + apply (Genv.init_mem_transf_partial TRANSL); auto. rewrite (match_program_main TRANSL). rewrite symbols_preserved. eauto. - apply function_ptr_translated; auto. - rewrite <- H3. apply sig_preserved. - constructor. constructor. red; simpl; auto. apply Mem.extends_refl. + rewrite <- H3. apply sig_preserved. auto. + constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. Qed. Lemma transf_final_states: diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 9df58903..d1e7dd7f 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -27,11 +27,14 @@ let option_ftailcalls = ref true let option_fconstprop = ref true let option_fcse = ref true let option_fcse2 = ref false + let option_fcse3 = ref true let option_fcse3_alias_analysis = ref true let option_fcse3_across_calls = ref false let option_fcse3_across_merges = ref true let option_fcse3_glb = ref true +let option_fcse3_trivial_ops = ref false +let option_fcse3_refine = ref true let option_fredundancy = ref true (** Options relative to superblock scheduling *) @@ -40,6 +43,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) +let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *) let option_fpostpass = ref true let option_fpostpass_sched = ref "list" @@ -93,7 +97,7 @@ let option_div_i32 = ref "stsud" let option_div_i64 = ref "stsud" let option_fcoalesce_mem = ref true let option_fforward_moves = ref false -let option_fmove_loop_invariants = ref true +let option_fmove_loop_invariants = ref false let option_fnontrap_loads = ref true let option_all_loads_nontrap = ref false let option_inline_auto_threshold = ref 0 diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand index 0f59aab7..3acec956 100644 --- a/driver/Compiler.vexpand +++ b/driver/Compiler.vexpand @@ -35,6 +35,7 @@ Require Cshmgen. Require Cminorgen. Require Selection. Require RTLgen. +Require Import Duplicatepasses. EXPAND_RTL_REQUIRE Require Asmgen. (** Proofs of semantic preservation. *) @@ -53,7 +54,7 @@ Require Import Compopts. Parameter print_Clight: Clight.program -> unit. Parameter print_Cminor: Cminor.program -> unit. Parameter print_RTL: Z -> RTL.program -> unit. -Parameter print_LTL: LTL.program -> unit. +Parameter print_LTL: Z -> LTL.program -> unit. Parameter print_Mach: Mach.program -> unit. Local Open Scope string_scope. diff --git a/driver/Compopts.v b/driver/Compopts.v index 540e8922..0c90ee52 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -54,6 +54,9 @@ Parameter optim_CSE3_across_merges: unit -> bool. (** Flag -fcse3-glb *) Parameter optim_CSE3_glb: unit -> bool. +(** Flag -fcse3-trivial-ops. For DMonniaux's common subexpression elimination, simplify trivial operations as well. *) +Parameter optim_CSE3_trivial_ops: unit -> bool. + (** Flag -fmove-loop-invariants. *) Parameter optim_move_loop_invariants: unit -> bool. diff --git a/driver/Driver.ml b/driver/Driver.ml index 12f50762..d93578b6 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -202,6 +202,8 @@ Processing options: -fcse3-across-calls Propagate CSE3 information across function calls [off] -fcse3-across-merges Propagate CSE3 information across control-flow merges [on] -fcse3-glb Refine CSE3 information using greatest lower bounds [on] + -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off] + -fcse3-refine Refine CSE3 invariants by descending iteration [on] -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] -fpostpass Perform postpass scheduling (only for K1 architecture) [on] @@ -213,6 +215,8 @@ Processing options: -ftracelinearize Uses branch prediction information to improve the Linearize [on] -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] + -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0] + Doesn't duplicate if the size of that header is strictly greater than n -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -415,6 +419,8 @@ let cmdline_actions = @ f_opt "cse3-across-calls" option_fcse3_across_calls @ f_opt "cse3-across-merges" option_fcse3_across_merges @ f_opt "cse3-glb" option_fcse3_glb + @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops + @ f_opt "cse3-refine" option_fcse3_refine @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "redundancy" option_fredundancy @ f_opt "postpass" option_fpostpass @@ -422,6 +428,7 @@ let cmdline_actions = @ f_opt "predict" option_fpredict @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] + @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline diff --git a/extraction/extraction.v b/extraction/extraction.v index 5de66797..f5b8291b 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -128,6 +128,8 @@ Extract Constant Compopts.optim_CSE3_across_merges => "fun _ -> !Clflags.option_fcse3_across_merges". Extract Constant Compopts.optim_CSE3_glb => "fun _ -> !Clflags.option_fcse3_glb". +Extract Constant Compopts.optim_CSE3_trivial_ops => + "fun _ -> !Clflags.option_fcse3_trivial_ops". Extract Constant Compopts.optim_move_loop_invariants => "fun _ -> !Clflags.option_fmove_loop_invariants". diff --git a/lib/UnionFind.v b/lib/UnionFind.v index 20bb91cd..bd1b763b 100644 --- a/lib/UnionFind.v +++ b/lib/UnionFind.v @@ -124,6 +124,15 @@ Module Type UNIONFIND. pathlen uf x + pathlen uf b + 1 else pathlen uf x. + Axiom pathlen_union: + forall uf a b x, + pathlen (union uf a b) x = + if elt_eq (repr uf a) (repr uf b) then + pathlen uf x + else if elt_eq (repr uf x) (repr uf a) then + (pathlen uf x)+1 + else + (pathlen uf x). Axiom pathlen_gt_merge: forall uf a b x y, repr uf x = repr uf y -> @@ -531,6 +540,7 @@ Qed. End PATHLEN. + (* Path length and merge *) Lemma pathlen_merge: @@ -549,16 +559,49 @@ Proof. set (uf' := identify uf (repr uf a) b (repr_res_none uf a) (not_eq_sym n)). pattern x. apply (well_founded_ind (mwf uf')); intros. rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G. - rewrite H; auto. simpl in G. rewrite M.gsspec in G. - destruct (M.elt_eq x0 (repr uf a)). rewrite e. rewrite repr_canonical. rewrite dec_eq_true. - inversion G. subst x'. rewrite dec_eq_false; auto. - replace (pathlen uf (repr uf a)) with 0. omega. - symmetry. apply pathlen_none. apply repr_res_none. - rewrite (repr_unroll uf x0), (pathlen_unroll uf x0); rewrite G. - destruct (M.elt_eq (repr uf x') (repr uf a)); omega. - simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate. - rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto. - symmetry. apply pathlen_zero; auto. apply repr_none; auto. + + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G. + destruct (M.elt_eq x0 (repr uf a)). + - rewrite e, repr_canonical, dec_eq_true. + inversion G. subst x'. rewrite dec_eq_false; auto. + replace (pathlen uf (repr uf a)) with 0; try omega. + symmetry. apply pathlen_none. apply repr_res_none. + - rewrite (repr_unroll uf x0), (pathlen_unroll uf x0), G. + destruct (M.elt_eq (repr uf x') (repr uf a)); omega. + + clear H; simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate. + rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto. + symmetry. apply pathlen_zero; auto. apply repr_none; auto. +Qed. + +Lemma pathlen_union: + forall uf a b x, + pathlen (union uf a b) x = + if M.elt_eq (repr uf a) (repr uf b) then + pathlen uf x + else if M.elt_eq (repr uf x) (repr uf a) then + (pathlen uf x)+1 + else + (pathlen uf x). +Proof. + intros. unfold union. + destruct (M.elt_eq (repr uf a) (repr uf b)). + auto. + set (uf' := identify uf _ _ _ _). + assert (LENa: pathlen uf (repr uf a) = 0). + { apply pathlen_none. apply repr_res_none. } + pattern x. apply (well_founded_ind (mwf uf')); intros. + rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G. + + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G. + destruct (M.elt_eq x0 (repr uf a)). + - inversion G; clear G. subst. + rewrite !repr_canonical, dec_eq_true. + rewrite dec_eq_false; auto. + rewrite LENa. rewrite (pathlen_none uf (repr uf b)); try omega. + apply repr_res_none. + - rewrite (repr_unroll uf x0), G, ! (pathlen_some _ _ _ G). + destruct (M.elt_eq _ _); auto. + + clear H. simpl in G. rewrite M.gsspec in G. + destruct (M.elt_eq _ (repr uf a)); try discriminate. + rewrite (repr_none _ _ G), !(pathlen_none _ _ G), dec_eq_false; auto. Qed. Lemma pathlen_gt_merge: diff --git a/test/monniaux/loop_nest/polybench.h b/test/monniaux/loop_nest/polybench.h new file mode 100644 index 00000000..7d092e45 --- /dev/null +++ b/test/monniaux/loop_nest/polybench.h @@ -0,0 +1,202 @@ +/** + * polybench.h: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu> + * Web address: http://polybench.sourceforge.net + */ +/* + * Polybench header for instrumentation. + * + * Programs must be compiled with `-I utilities utilities/polybench.c' + * + * Optionally, one can define: + * + * -DPOLYBENCH_TIME, to report the execution time, + * OR (exclusive): + * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c) + * + * + * See README or utilities/polybench.c for additional options. + * + */ +#ifndef POLYBENCH_H +# define POLYBENCH_H + +# include <stdlib.h> + +/* Array padding. By default, none is used. */ +# ifndef POLYBENCH_PADDING_FACTOR +/* default: */ +# define POLYBENCH_PADDING_FACTOR 0 +# endif + + +/* C99 arrays in function prototype. By default, do not use. */ +# ifdef POLYBENCH_USE_C99_PROTO +# define POLYBENCH_C99_SELECT(x,y) y +# else +/* default: */ +# define POLYBENCH_C99_SELECT(x,y) x +# endif + + +/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */ +# ifdef POLYBENCH_USE_SCALAR_LB +# define POLYBENCH_LOOP_BOUND(x,y) x +# else +/* default: */ +# define POLYBENCH_LOOP_BOUND(x,y) y +# endif + + +/* Macros to reference an array. Generic for heap and stack arrays + (C99). Each array dimensionality has his own macro, to be used at + declaration or as a function argument. + Example: + int b[x] => POLYBENCH_1D_ARRAY(b, x) + int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N) +*/ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_ARRAY(x) *x +# define POLYBENCH_FREE_ARRAY(x) free((void*)x); +# define POLYBENCH_DECL_VAR(x) (*x) +# else +# define POLYBENCH_ARRAY(x) x +# define POLYBENCH_FREE_ARRAY(x) +# define POLYBENCH_DECL_VAR(x) x +# endif +/* Macros for using arrays in the function prototypes. */ +# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR] +# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR] + + +/* Macros to allocate heap arrays. + Example: + polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double) + and returns a pointer to the 2d array + */ +# define POLYBENCH_ALLOC_1D_ARRAY(n1, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type)) +# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type)) +# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type) \ + (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type)) + +/* Macros for array declaration. */ +# ifndef POLYBENCH_STACK_ARRAYS +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); \ + var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \ + var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \ + var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, ,dim3, dim4, ddim1, ddim2, ddim3, ddim4); \ + var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \ + var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type); +# else +# define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1) \ + type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); +# define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2) \ + type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); +# define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \ + type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); +# define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \ + type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4); +# define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \ + type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); +# endif + + +/* Dead-code elimination macros. Use argc/argv for the run-time check. */ +# ifndef POLYBENCH_DUMP_ARRAYS +# define POLYBENCH_DCE_ONLY_CODE if (argc > 42 && ! strcmp(argv[0], "")) +# else +# define POLYBENCH_DCE_ONLY_CODE +# endif + +# define polybench_prevent_dce(func) \ + POLYBENCH_DCE_ONLY_CODE \ + func + + +/* Performance-related instrumentation. See polybench.c */ +# define polybench_start_instruments +# define polybench_stop_instruments +# define polybench_print_instruments + + +/* PAPI support. */ +# ifdef POLYBENCH_PAPI +extern const unsigned int polybench_papi_eventlist[]; +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_set_papi_thread_report(x) \ + polybench_papi_counters_threadid = x; +# define polybench_start_instruments \ + polybench_prepare_instruments(); \ + polybench_papi_init(); \ + int evid; \ + for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++) \ + { \ + if (polybench_papi_start_counter(evid)) \ + continue; \ + +# define polybench_stop_instruments \ + polybench_papi_stop_counter(evid); \ + } \ + polybench_papi_close(); \ + +# define polybench_print_instruments polybench_papi_print(); +# endif + + +/* Timing support. */ +# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS) +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_start_instruments polybench_timer_start(); +# define polybench_stop_instruments polybench_timer_stop(); +# define polybench_print_instruments polybench_timer_print(); +extern double polybench_program_total_flops; +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +/* Function declaration. */ +# ifdef POLYBENCH_TIME +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); +# endif + +# ifdef POLYBENCH_PAPI +extern void polybench_prepare_instruments(); +extern int polybench_papi_start_counter(int evid); +extern void polybench_papi_stop_counter(int evid); +extern void polybench_papi_init(); +extern void polybench_papi_close(); +extern void polybench_papi_print(); +# endif + +/* Function prototypes. */ +extern void* polybench_alloc_data(unsigned long long int n, int elt_size); + + +#endif /* !POLYBENCH_H */ diff --git a/test/monniaux/loop_nest/syrk.h b/test/monniaux/loop_nest/syrk.h new file mode 100644 index 00000000..c753ff3b --- /dev/null +++ b/test/monniaux/loop_nest/syrk.h @@ -0,0 +1,54 @@ +/** + * syrk.h: This file is part of the PolyBench/C 3.2 test suite. + * + * + * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu> + * Web address: http://polybench.sourceforge.net + */ +#ifndef SYRK_H +# define SYRK_H + +/* Default to STANDARD_DATASET. */ +# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) +# define STANDARD_DATASET +# endif + +/* Do not define anything if the user manually defines the size. */ +# if !defined(NI) && !defined(NJ) +/* Define the possible dataset sizes. */ +# ifdef MINI_DATASET +# define NI 32 +# define NJ 32 +# endif + +# ifdef SMALL_DATASET +# define NI 128 +# define NJ 128 +# endif + +# ifdef STANDARD_DATASET /* Default if unspecified. */ +# define NI 1024 +# define NJ 1024 +# endif + +# ifdef LARGE_DATASET +# define NI 2000 +# define NJ 2000 +# endif + +# ifdef EXTRALARGE_DATASET +# define NI 4000 +# define NJ 4000 +# endif +# endif /* !N */ + +# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni) +# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj) + +# ifndef DATA_TYPE +# define DATA_TYPE double +# define DATA_PRINTF_MODIFIER "%0.2lf " +# endif + + +#endif /* !SYRK */ diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index e5cab30c..febb0282 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -11,65 +11,79 @@ David Monniaux, CNRS, VERIMAG type is_partial = TOTAL | PARTIAL;; type print_result = Noprint | Print of string;; type when_triggered = Always | Option of string;; +type needs_require = Require | NoRequire;; +(* FIXME - The gestion of NoRequire is a bit ugly right now. *) let rtl_passes = [| -TOTAL, (Option "optim_tailcalls"), (Some "Tail calls"), "Tailcall"; -PARTIAL, Always, (Some "Inlining"), "Inlining"; -TOTAL, (Option "profile_arcs"), (Some "Profiling insertion"), "Profiling"; -TOTAL, (Option "branch_probabilities"), (Some "Profiling use"), "ProfilingExploit"; -TOTAL, (Option "optim_move_loop_invariants"), (Some "Inserting initial nop"), "FirstNop"; -TOTAL, Always, (Some "Renumbering"), "Renumber"; -PARTIAL, (Option "optim_CSE"), (Some "CSE"), "CSE"; -PARTIAL, Always, (Some "Duplicating blocks"), "Duplicate"; -TOTAL, Always, (Some "Renumbering pre constprop"), "Renumber"; -TOTAL, (Option "optim_constprop"), (Some "Constant propagation"), "Constprop"; -TOTAL, Always, (Some "Renumbering pre CSE"), "Renumber"; -TOTAL, (Option "optim_CSE2"), (Some "CSE2"), "CSE2"; -PARTIAL, (Option "optim_CSE3"), (Some "CSE3"), "CSE3"; -TOTAL, (Option "optim_CSE3"), (Some "Kill useless moves after CSE3"), "KillUselessMoves"; -TOTAL, (Option "optim_forward_moves"), (Some "Forwarding moves"), "ForwardMoves"; -PARTIAL, (Option "optim_redundancy"), (Some "Redundancy elimination"), "Deadcode"; -PARTIAL, (Option "optim_move_loop_invariants"), (Some "LICM"), "LICM"; -TOTAL, (Option "optim_move_loop_invariants"), (Some "Renumbering for LICM"), "Renumber"; -PARTIAL, (Option "optim_move_loop_invariants"), (Some "CSE3 for LICM"), "CSE3"; -PARTIAL, (Option "optim_move_loop_invariants"), (Some "Redundancy elimination for LICM"), "Deadcode"; -TOTAL, (Option "all_loads_nontrap"), None, "Allnontrap"; -PARTIAL, Always, (Some "Unused globals"), "Unusedglob" +TOTAL, (Option "optim_tailcalls"), Require, (Some "Tail calls"), "Tailcall"; +PARTIAL, Always, Require, (Some "Inlining"), "Inlining"; +TOTAL, (Option "profile_arcs"), Require, (Some "Profiling insertion"), "Profiling"; +TOTAL, (Option "branch_probabilities"), Require, (Some "Profiling use"), "ProfilingExploit"; +TOTAL, (Option "optim_move_loop_invariants"), Require, (Some "Inserting initial nop"), "FirstNop"; +TOTAL, Always, Require, (Some "Renumbering"), "Renumber"; +PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE"; +PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict"; +PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle"; +TOTAL, Always, NoRequire, (Some "Renumbering pre unroll"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody"; +TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate"; +TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; +TOTAL, (Option "optim_constprop"), Require, (Some "Constant propagation"), "Constprop"; +TOTAL, Always, NoRequire, (Some "Renumbering pre CSE"), "Renumber"; +TOTAL, (Option "optim_CSE2"), Require, (Some "CSE2"), "CSE2"; +PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3"; +TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves"; +TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves"; +PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode"; +TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; +TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; +PARTIAL, (Option "optim_move_loop_invariants"), Require, (Some "LICM"), "LICM"; +TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; +PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3"; +PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode"; +TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap"; +PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob" |];; let post_rtl_passes = [| - PARTIAL, Always, (Some "Register allocation"), "Allocation", (Print "LTL"); - TOTAL, Always, (Some "Branch tunneling"), "Tunneling", Noprint; - PARTIAL, Always, (Some "CFG linearization"), "Linearize", Noprint; - TOTAL, Always, (Some "Label cleanup"), "CleanupLabels", Noprint; - PARTIAL, (Option "debug"), (Some "Debugging info for local variables"), "Debugvar", Noprint; - PARTIAL, Always, (Some "Mach generation"), "Stacking", (Print "Mach") + PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1"); + PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2"); + PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint; + TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint; + PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint; + PARTIAL, Always, Require, (Some "Mach generation"), "Stacking", (Print "Mach") |];; let all_passes = Array.concat [Array.mapi - (fun i (a,b,c,d) -> (a,b,c,d, Print (Printf.sprintf "RTL %d" (i+1)))) + (fun i (a,b,r,c,d) -> (a,b,r,c,d, Print (Printf.sprintf "RTL %d" (i+1)))) rtl_passes; post_rtl_passes];; let totality = function TOTAL -> "total" | PARTIAL -> "partial";; let print_rtl_require oc = - Array.iter (fun (partial, trigger, time_label, pass_name, printing) -> - Printf.fprintf oc "Require %s.\n" pass_name) - all_passes;; + Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) -> + match require with Require -> + Printf.fprintf oc "Require %s.\n" pass_name + | _ -> () + ) all_passes;; let print_rtl_require_proof oc = - Array.iter (fun (partial, trigger, time_label, pass_name, printing) -> - Printf.fprintf oc "Require %sproof.\n" pass_name) - all_passes;; + Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) -> + match require with Require -> + Printf.fprintf oc "Require %sproof.\n" pass_name + | _ -> () + ) all_passes;; let print_rtl_transf oc = Array.iteri - (fun i (partial, trigger, time_label, pass_name, printing) -> + (fun i (partial, trigger, require, time_label, pass_name, printing) -> output_string oc (match partial with | TOTAL -> " @@ " | PARTIAL -> " @@@ "); @@ -90,7 +104,7 @@ let print_rtl_transf oc = ) all_passes;; let print_rtl_mkpass oc = - Array.iter (fun (partial, trigger, time_label, pass_name, printing) -> + Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) -> output_string oc " ::: mkpass ("; (match trigger with | Always -> () @@ -106,7 +120,7 @@ let print_if kind oc = function let numbering_base = 7 let print_rtl_proof oc = - Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) -> + Array.iteri (fun i (partial, trigger, require, time_label, pass_name, printing) -> let j = i+numbering_base in match partial with | TOTAL -> @@ -118,7 +132,7 @@ let print_rtl_proof oc = all_passes;; let print_rtl_proof2 oc = - Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) -> + Array.iteri (fun i (partial, trigger, require, time_label, pass_name, printing) -> let j = i+numbering_base in Printf.fprintf oc " exists p%d; split. " j; (match trigger with @@ -131,7 +145,7 @@ let print_rtl_proof2 oc = all_passes;; let print_rtl_forward_simulations oc = - Array.iter (fun (partial, trigger, time_label, pass_name) -> + Array.iter (fun (partial, trigger, require, time_label, pass_name) -> output_string oc " eapply compose_forward_simulations.\n "; (match trigger with | Always -> () |