diff options
author | Cyril SIX <cyril.six@kalray.eu> | 2020-12-04 17:41:14 +0100 |
---|---|---|
committer | Cyril SIX <cyril.six@kalray.eu> | 2020-12-04 17:41:14 +0100 |
commit | 60ff1e39bac5ab35c46698cbc1ed7a76fc936cab (patch) | |
tree | 87ff5f3b209e5659e967f862dab1517bb2b32baa /backend | |
parent | f2fb8540c94ceb9892510f83bd7d6734fe9d422f (diff) | |
parent | d2197102d6b81e225865cfac5f1d319d168e1e23 (diff) | |
download | compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.tar.gz compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.zip |
Merge branch 'kvx-work' into kvx-work-merge3.8
Conflicts:
Makefile
configure
Diffstat (limited to 'backend')
-rw-r--r-- | backend/CSE3analysis.v | 22 | ||||
-rw-r--r-- | backend/CSE3analysisaux.ml | 4 | ||||
-rw-r--r-- | backend/CSE3analysisproof.v | 143 | ||||
-rw-r--r-- | backend/Duplicateaux.ml | 91 | ||||
-rw-r--r-- | backend/LICMaux.ml | 6 | ||||
-rw-r--r-- | backend/Lineartyping.v | 1 | ||||
-rw-r--r-- | backend/PrintLTL.ml | 4 | ||||
-rw-r--r-- | backend/Tunneling.v | 138 | ||||
-rw-r--r-- | backend/Tunnelingaux.ml | 283 | ||||
-rw-r--r-- | backend/Tunnelingproof.v | 504 | ||||
-rw-r--r-- | backend/ValueDomain.v | 483 |
11 files changed, 1393 insertions, 286 deletions
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v index 5ed04bc4..8b7f1190 100644 --- a/backend/CSE3analysis.v +++ b/backend/CSE3analysis.v @@ -181,12 +181,24 @@ Definition eq_depends_on_mem eq := | SOp op => op_depends_on_memory op end. +Definition eq_depends_on_store eq := + match eq_op eq with + | SLoad _ _ => true + | SOp op => false + end. + Definition get_mem_kills (eqs : PTree.t equation) : PSet.t := PTree.fold (fun already (eqno : eq_id) (eq : equation) => if eq_depends_on_mem eq then PSet.add eqno already else already) eqs PSet.empty. +Definition get_store_kills (eqs : PTree.t equation) : PSet.t := + PTree.fold (fun already (eqno : eq_id) (eq : equation) => + if eq_depends_on_store eq + then PSet.add eqno already + else already) eqs PSet.empty. + Definition is_move (op : operation) : { op = Omove } + { op <> Omove }. Proof. @@ -216,6 +228,7 @@ Record eq_context := mkeqcontext eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t; eq_kill_reg : reg -> PSet.t; eq_kill_mem : unit -> PSet.t; + eq_kill_store : unit -> PSet.t; eq_moves : reg -> PSet.t }. Section OPERATIONS. @@ -342,6 +355,9 @@ Section OPERATIONS. (oper1 dst op args' rel) end. + Definition kill_store (rel : RELATION.t) : RELATION.t := + PSet.subtract rel (eq_kill_store ctx tt). + Definition clever_kill_store (chunk : memory_chunk) (addr: addressing) (args : list reg) (src : reg) @@ -358,7 +374,7 @@ Section OPERATIONS. may_overlap chunk addr args chunk' addr' (eq_args eq) end end) - (PSet.inter rel (eq_kill_mem ctx tt))). + (PSet.inter rel (eq_kill_store ctx tt))). Definition store2 (chunk : memory_chunk) (addr: addressing) (args : list reg) @@ -366,7 +382,7 @@ Section OPERATIONS. (rel : RELATION.t) : RELATION.t := if Compopts.optim_CSE3_alias_analysis tt then clever_kill_store chunk addr args src rel - else kill_mem rel. + else kill_store rel. Definition store1 (chunk : memory_chunk) (addr: addressing) (args : list reg) @@ -501,6 +517,7 @@ Definition context_from_hints (hints : analysis_hints) := let eqs := hint_eq_catalog hints in let reg_kills := get_reg_kills eqs in let mem_kills := get_mem_kills eqs in + let store_kills := get_store_kills eqs in let moves := get_moves eqs in {| eq_catalog := fun eq_id => PTree.get eq_id eqs; @@ -508,5 +525,6 @@ Definition context_from_hints (hints : analysis_hints) := eq_rhs_oracle := hint_eq_rhs_oracle hints; eq_kill_reg := fun reg => PMap.get reg reg_kills; eq_kill_mem := fun _ => mem_kills; + eq_kill_store := fun _ => store_kills; eq_moves := fun reg => PMap.get reg moves |}. diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml index e37ef61f..e038331c 100644 --- a/backend/CSE3analysisaux.ml +++ b/backend/CSE3analysisaux.ml @@ -174,6 +174,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = and rhs_table = Hashtbl.create 100 and cur_kill_reg = ref (PMap.init PSet.empty) and cur_kill_mem = ref PSet.empty + and cur_kill_store = ref PSet.empty and cur_moves = ref (PMap.init PSet.empty) in let eq_find_oracle node eq = assert (not (is_trivial eq)); @@ -216,6 +217,8 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = (eq.eq_lhs :: eq.eq_args); (if eq_depends_on_mem eq then cur_kill_mem := PSet.add coq_id !cur_kill_mem); + (if eq_depends_on_store eq + then cur_kill_store := PSet.add coq_id !cur_kill_store); (match eq.eq_op, eq.eq_args with | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves eq.eq_lhs coq_id | _, _ -> ()); @@ -232,6 +235,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = eq_rhs_oracle = rhs_find_oracle ; eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); eq_kill_mem = (fun () -> !cur_kill_mem); + eq_kill_store = (fun () -> !cur_kill_store); eq_moves = (fun reg -> PMap.get reg !cur_moves) } in match internal_analysis ctx tenv f diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index 1e5b88c3..b298ea65 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -133,13 +133,18 @@ Definition xlget_kills (eqs : list (eq_id * equation)) (m : Regmap.t PSet.t) : add_i_j (eq_lhs (snd item)) (fst item) (add_ilist_j (eq_args (snd item)) (fst item) already)) eqs m. - Definition xlget_mem_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t := (fold_left (fun (a : PSet.t) (p : positive * equation) => if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a) eqs m). +Definition xlget_store_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t := +(fold_left + (fun (a : PSet.t) (p : positive * equation) => + if eq_depends_on_store (snd p) then PSet.add (fst p) a else a) + eqs m). + Lemma xlget_kills_monotone : forall eqs m i j, PSet.contains (Regmap.get i m) j = true -> @@ -170,6 +175,24 @@ Qed. Hint Resolve xlget_mem_kills_monotone : cse3. +Lemma xlget_store_kills_monotone : + forall eqs m j, + PSet.contains m j = true -> + PSet.contains (xlget_store_kills eqs m) j = true. +Proof. + induction eqs; simpl; trivial. + intros. + destruct eq_depends_on_store. + - apply IHeqs. + destruct (peq (fst a) j). + + subst j. apply PSet.gadds. + + rewrite PSet.gaddo by congruence. + trivial. + - auto. +Qed. + +Hint Resolve xlget_store_kills_monotone : cse3. + Lemma xlget_kills_has_lhs : forall eqs m lhs sop args j, In (j, {| eq_lhs := lhs; @@ -333,6 +356,60 @@ Qed. Hint Resolve context_from_hints_get_kills_has_eq_depends_on_mem : cse3. +Lemma xlget_kills_has_eq_depends_on_store : + forall eqs eq j m, + In (j, eq) eqs -> + eq_depends_on_store eq = true -> + PSet.contains (xlget_store_kills eqs m) j = true. +Proof. + induction eqs; simpl. + contradiction. + intros. + destruct H. + { subst a. + simpl. + rewrite H0. + apply xlget_store_kills_monotone. + apply PSet.gadds. + } + eauto. +Qed. + +Hint Resolve xlget_kills_has_eq_depends_on_store : cse3. + +Lemma get_kills_has_eq_depends_on_store : + forall eqs eq j, + PTree.get j eqs = Some eq -> + eq_depends_on_store eq = true -> + PSet.contains (get_store_kills eqs) j = true. +Proof. + intros. + unfold get_store_kills. + rewrite PTree.fold_spec. + change (fold_left + (fun (a : PSet.t) (p : positive * equation) => + if eq_depends_on_store (snd p) then PSet.add (fst p) a else a) + (PTree.elements eqs) PSet.empty) + with (xlget_store_kills (PTree.elements eqs) PSet.empty). + eapply xlget_kills_has_eq_depends_on_store. + apply PTree.elements_correct. + eassumption. + trivial. +Qed. + +Lemma context_from_hints_get_kills_has_eq_depends_on_store : + forall hints eq j, + PTree.get j (hint_eq_catalog hints) = Some eq -> + eq_depends_on_store eq = true -> + PSet.contains (eq_kill_store (context_from_hints hints) tt) j = true. +Proof. + intros. + simpl. + eapply get_kills_has_eq_depends_on_store; eassumption. +Qed. + +Hint Resolve context_from_hints_get_kills_has_eq_depends_on_store : cse3. + Definition eq_involves (eq : equation) (i : reg) := i = (eq_lhs eq) \/ In i (eq_args eq). @@ -418,6 +495,12 @@ Section SOUNDNESS. eq_depends_on_mem eq = true -> PSet.contains (eq_kill_mem ctx tt) j = true. + Hypothesis ctx_kill_store_has_depends_on_store : + forall eq j, + eq_catalog ctx j = Some eq -> + eq_depends_on_store eq = true -> + PSet.contains (eq_kill_store ctx tt) j = true. + Theorem kill_reg_sound : forall rel rs m dst v, (sem_rel rel rs m) -> @@ -574,6 +657,55 @@ Section SOUNDNESS. Hint Resolve kill_mem_sound : cse3. + (* TODO: shouldn't this already be proved somewhere else? *) + Lemma store_preserves_validity: + forall m m' wchunk a v + (STORE : Mem.storev wchunk m a v = Some m') + (b : block) (z : Z), + Mem.valid_pointer m' b z = Mem.valid_pointer m b z. + Proof. + unfold Mem.storev. + intros. + destruct a; try discriminate. + Local Transparent Mem.store. + unfold Mem.store in STORE. + destruct Mem.valid_access_dec in STORE. + 2: discriminate. + inv STORE. + reflexivity. + Qed. + + Hint Resolve store_preserves_validity : cse3. + + Theorem kill_store_sound : + forall rel rs m m' wchunk a v, + (sem_rel rel rs m) -> + (Mem.storev wchunk m a v = Some m') -> + (sem_rel (kill_store (ctx:=ctx) rel) rs m'). + Proof. + unfold sem_rel, sem_eq, sem_rhs, kill_store. + intros until v. + intros REL STORE i eq. + specialize REL with (i := i) (eq0 := eq). + intros SUBTRACT CATALOG. + rewrite PSet.gsubtract in SUBTRACT. + rewrite andb_true_iff in SUBTRACT. + intuition. + destruct (eq_op eq) as [op | chunk addr] eqn:OP. + - rewrite op_valid_pointer_eq with (m2 := m). + assumption. + eapply store_preserves_validity; eauto. + - specialize ctx_kill_store_has_depends_on_store with (eq0 := eq) (j := i). + destruct eq as [lhs op args]; simpl in *. + rewrite OP in ctx_kill_store_has_depends_on_store. + rewrite negb_true_iff in H0. + rewrite OP in CATALOG. + intuition. + congruence. + Qed. + + Hint Resolve kill_store_sound : cse3. + Theorem eq_find_sound: forall no eq id, eq_find (ctx := ctx) no eq = Some id -> @@ -895,13 +1027,12 @@ Section SOUNDNESS. unfold sem_eq in *. simpl in *. destruct eq_op as [op' | chunk' addr']; simpl. - - destruct (op_depends_on_memory op') eqn:DEPENDS. - + erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto. - discriminate. - + rewrite op_depends_on_memory_correct with (m2:=m); trivial. + - rewrite op_valid_pointer_eq with (m2 := m). + + cbn in *. apply REL; auto. + + eapply store_preserves_validity; eauto. - simpl in REL. - erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto. + erewrite ctx_kill_store_has_depends_on_store in CONTAINS by eauto. simpl in CONTAINS. rewrite negb_true_iff in CONTAINS. destruct (eval_addressing genv sp addr' rs ## eq_args) as [a'|] eqn:ADDR'. diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 76b5616b..c9985dc4 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -115,16 +115,18 @@ let ptree_printbool pt = (* Looks ahead (until a branch) to see if a node further down verifies * the given predicate *) -let rec look_ahead code node is_loop_header predicate = +let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate = if (predicate node) then true - else match (rtl_successors @@ get_some @@ PTree.get node code) with + else match (successors @@ get_some @@ PTree.get node code) with | [n] -> if (predicate n) then true else ( if (get_some @@ PTree.get n is_loop_header) then false - else look_ahead code n is_loop_header predicate + else look_ahead_gen successors code n is_loop_header predicate ) | _ -> false +let look_ahead = look_ahead_gen rtl_successors + (** * Heuristics mostly based on the paper Branch Prediction for Free *) @@ -545,7 +547,8 @@ let is_a_nop code n = * preds: mapping node -> predecessors * ptree: the revmap * trace: the trace to follow tail duplication on *) -let tail_duplicate code preds ptree trace = +let tail_duplicate code preds is_loop_header ptree trace = + debug "Tail_duplicate on that trace: %a\n" print_trace trace; (* next_int: unused integer that can be used for the next duplication *) let next_int = ref (next_free_pc code) (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *) @@ -560,7 +563,12 @@ let tail_duplicate code preds ptree trace = if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) else let node_preds = ptree_get_some n preds - in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds + in let node_preds_nolast = + (* We traverse loop headers without initiating tail duplication + * (see case of two imbricated loops) *) + if (get_some @@ PTree.get n is_loop_header) then [] + else List.filter (fun e -> e <> get_some !last_node) node_preds + (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) in let final_node_preds = match !last_duplicate with | None -> node_preds_nolast | Some n' -> n' :: node_preds_nolast @@ -581,12 +589,12 @@ let tail_duplicate code preds ptree trace = in let new_code, new_ptree = f code ptree true trace in (new_code, new_ptree, !nb_duplicated) -let superblockify_traces code preds traces ptree = +let superblockify_traces code preds is_loop_header traces ptree = let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> - let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace + let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace in if (nb_duplicated < max_nb_duplicated) then (debug "End duplication\n"; f new_code new_ptree traces) else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) @@ -615,15 +623,29 @@ type innerLoop = { preds: P.t list; body: P.t list; head: P.t; (* head of the loop *) - finals: P.t list (* the final instructions, which loops back to the head *) + finals: P.t list; (* the final instructions, which loops back to the head *) (* There may be more than one ; for instance if there is an if inside the loop with both - * branches leading to a goto backedge *) + * branches leading to a goto backedge + * Such cases usually happen after a tail-duplication *) + sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) + (* may be None if we predict that we do not loop *) } let print_pset = LICMaux.pp_pset +let print_option_pint oc o = + if !debug_flag then + match o with + | None -> Printf.fprintf oc "None" + | Some n -> Printf.fprintf oc "Some %d" (P.to_int n) + let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body + debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n" + print_intlist iloop.preds + print_intlist iloop.body + (P.to_int iloop.head) + print_intlist iloop.finals + print_option_pint iloop.sb_final let rec print_inner_loops = function | [] -> () @@ -692,6 +714,34 @@ let get_inner_loops f code is_loop_header = !iloops *) +let rtl_successors_pref = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,p) -> (match p with + | Some true -> [n1] + | Some false -> [n2] + | None -> [n1; n2]) +| Ijumptable (_,ln) -> ln + +(* Find the last node of a trace (starting at "node"), until a loop is encountered. + * If a non-predicted branch is encountered, returns None *) +let rec find_last_node_before_loop code node trace is_loop_header = + let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in + let headers = List.filter (fun n -> + get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in + match headers with + | [] -> ( + let next_nodes = List.filter (fun n -> HashedSet.PSet.contains trace n) + (rtl_successors_pref @@ get_some @@ PTree.get node code) in + match next_nodes with + | [n] -> find_last_node_before_loop code n trace is_loop_header + | _ -> None (* May happen when we predict that a loop is not taken *) + ) + | [h] -> Some node + | _ -> failwith "Multiple branches leading to a loop" + +(* The computation of sb_final requires to already have branch prediction *) let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in @@ -718,7 +768,10 @@ let get_inner_loops f code is_loop_header = debug "HEADPREDS: %a\n" print_intlist head_preds; filtered end in - { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals } + let sb_final = find_last_node_before_loop code head body is_loop_header in + let body = HashedSet.PSet.elements body in + { preds = preds; body = body; head = head; finals = finals; + sb_final = sb_final; } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ -836,31 +889,36 @@ let unroll_inner_loops_single f code revmap = (!code', !revmap') end +let is_some o = match o with Some _ -> true | None -> false + (* Unrolls the body of the inner loop once - duplicating the exit condition as well * 1) Clones body into body' - * 2) Links the last instruction of body into the first of body' + * 2) Links the last instruction of body (sb_final) into the first of body' * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = + debug "iloop = "; print_inner_loop iloop; let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; (code, revmap) + end else if not @@ is_some iloop.sb_final then begin + debug "The loop body does not form a superblock OR we have predicted that we do not loop"; + (code, revmap) end else let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in let finals' = apply_map_list fwmap (iloop.finals) in begin - code' := change_pointers !code' iloop.head head' iloop.finals; + code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final]; code' := change_pointers !code' head' iloop.head finals'; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in - (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in @@ -870,7 +928,7 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap - ) inner_loops; (* debug_flag := false; *) + ) inner_loops; (!code', !revmap') end @@ -966,6 +1024,7 @@ let tail_duplicate f = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in - superblockify_traces code preds traces revmap + let is_loop_header = get_loop_headers code entrypoint in + superblockify_traces code preds is_loop_header traces revmap else (code, revmap) in ((code, entrypoint), revmap) diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml index 6283e129..602d078d 100644 --- a/backend/LICMaux.ml +++ b/backend/LICMaux.ml @@ -255,8 +255,8 @@ let rewrite_loop_body (last_alloc : reg ref) (List.map (map_reg mapper) args), new_res)); PTree.set res new_res mapper - | Iload(_, chunk, addr, args, res, pc') - | Istore(chunk, addr, args, res, pc') + | Iload(_, chunk, addr, args, v, pc') + | Istore(chunk, addr, args, v, pc') when Archi.has_notrap_loads && !Clflags.option_fnontrap_loads -> let new_res = P.succ !last_alloc in @@ -264,7 +264,7 @@ let rewrite_loop_body (last_alloc : reg ref) add_inj (INJload(chunk, addr, (List.map (map_reg mapper) args), new_res)); - PTree.set res new_res mapper + PTree.set v new_res mapper | _ -> mapper in List.iter (fun x -> if PSet.contains loop_body x diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v index 3fe61470..22658fb7 100644 --- a/backend/Lineartyping.v +++ b/backend/Lineartyping.v @@ -324,7 +324,6 @@ Local Opaque mreg_type. apply wt_setreg; auto; try (apply wt_undef_regs; auto). eapply Val.has_subtype; eauto. - change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto. red; intros; subst op. simpl in ISMOVE. destruct args; try discriminate. destruct args; discriminate. diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml index 4c172020..87e8a1fc 100644 --- a/backend/PrintLTL.ml +++ b/backend/PrintLTL.ml @@ -133,10 +133,10 @@ let print_program pp (prog: LTL.program) = let destination : string option ref = ref None -let print_if prog = +let print_if passno prog = match !destination with | None -> () | Some f -> - let oc = open_out f in + let oc = open_out (f ^ "." ^ Z.to_string passno) in print_program oc prog; close_out oc diff --git a/backend/Tunneling.v b/backend/Tunneling.v index 78458582..269ebb6f 100644 --- a/backend/Tunneling.v +++ b/backend/Tunneling.v @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -12,7 +13,7 @@ (** Branch tunneling (optimization of branches to branches). *) -Require Import Coqlib Maps UnionFind. +Require Import Coqlib Maps Errors. Require Import AST. Require Import LTL. @@ -21,10 +22,10 @@ Require Import LTL. so that they jump directly to the end of the branch sequence. For example: << - L1: nop L2; L1: nop L3; - L2; nop L3; becomes L2: nop L3; + L1: if (cond) nop L2; L1: nop L3; + L2: nop L3; becomes L2: nop L3; L3: instr; L3: instr; - L4: if (cond) goto L1; L4: if (cond) goto L3; + L4: if (cond) goto L1; L4: if (cond) nop L1; >> This optimization can be applied to several of our intermediate languages. We choose to perform it on the [LTL] language, @@ -37,11 +38,14 @@ Require Import LTL. dead code (as the "nop L3" in the example above). *) -(** The naive implementation of branch tunneling would replace - any branch to a node [pc] by a branch to the node - [branch_target f pc], defined as follows: +(** The implementation consists in two passes: the first pass + records the branch t of each "nop" + and the second pass replace any "nop" node to [pc] + by a branch to a "nop" at [branch_t f pc] + +Naively, we may define [branch_t f pc] as follows: << - branch_target f pc = branch_target f pc' if f(pc) = nop pc' + branch_t f pc = branch_t f pc' if f(pc) = nop pc' = pc otherwise >> However, this definition can fail to terminate if @@ -50,56 +54,114 @@ Require Import LTL. L1: nop L1; >> or -<< L1: nop L2; +<< + L1: nop L2; L2: nop L1; >> Coq warns us of this fact by not accepting the definition - of [branch_target] above. + of [branch_t] above. + + To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] + for every instruction [pc: nop pc'] in the function. + + Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, + we need to iterate until we reach a fixpoint. + + Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure + in order to help the proof. + + A verifier checks that this data-structure is correct. +*) + +Definition UF := PTree.t (node * Z). - To handle this problem, we proceed in two passes. The first pass - populates a union-find data structure, adding equalities [pc = pc'] - for every instruction [pc: nop pc'] in the function. *) +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: LTL.function -> UF. +Extract Constant branch_target => "Tunnelingaux.branch_target". -Module U := UnionFind.UF(PTree). +Local Open Scope error_monad_scope. -Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t := - match b with - | Lbranch s :: _ => U.union uf pc s - | _ => uf +Definition get (td: UF) pc:node*Z := + match td!pc with + | Some (t,d) => (t,Z.abs d) + | _ => (pc,0) end. -Definition record_gotos (f: LTL.function) : U.t := - PTree.fold record_goto f.(fn_code) U.empty. +Definition target (td: UF) (pc:node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +Definition check_included (td: UF) (c: code): option bblock + := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). + +(* we check the validity of targets and their bound: + the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit + := match td!pc with + | None => OK tt + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match bb with + | Lbranch s ::_ => + let (ts, ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Lbranch") + else Error (msg "invalid skip of Lbranch") + | Lcond _ _ s1 s2 _ :: _ => + let (ts1, ds1) := get td s1 in + let (ts2, ds2) := get td s2 in + if peq tpc ts1 then + if peq tpc ts2 then + if zlt ds1 dpc then + if zlt ds2 dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this block") + end + end. + +Definition check_code (td: UF) (c:code): res unit + := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). (** The second pass rewrites all LTL instructions, replacing every - successor [s] of every instruction by the canonical representative + successor [s] of every instruction by [t s], the canonical representative of its equivalence class in the union-find data structure. *) -Definition tunnel_instr (uf: U.t) (i: instruction) : instruction := +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := match i with - | Lbranch s => Lbranch (U.repr uf s) + | Lbranch s => Lbranch (t s) | Lcond cond args s1 s2 info => - let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in + let s1' := t s1 in let s2' := t s2 in if peq s1' s2' then Lbranch s1' else Lcond cond args s1' s2' info - | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl) + | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) | _ => i end. -Definition tunnel_block (uf: U.t) (b: bblock) : bblock := - List.map (tunnel_instr uf) b. +Definition tunnel_block (t: node -> node) (b: bblock) : bblock := + List.map (tunnel_instr t) b. -Definition tunnel_function (f: LTL.function) : LTL.function := - let uf := record_gotos f in - mkfunction - (fn_sig f) - (fn_stacksize f) - (PTree.map1 (tunnel_block uf) (fn_code f)) - (U.repr uf (fn_entrypoint f)). +Definition tunnel_function (f: LTL.function) : res LTL.function := + let td := branch_target f in + let c := (fn_code f) in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction + (fn_sig f) + (fn_stacksize f) + (PTree.map1 (tunnel_block td) c) + (td (fn_entrypoint f))) + else + Error (msg "Some node of the union-find is not in the CFG") + . -Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef := - transf_fundef tunnel_function f. +Definition tunnel_fundef (f: fundef) : res fundef := + transf_partial_fundef tunnel_function f. -Definition transf_program (p: LTL.program) : LTL.program := - transform_program tunnel_fundef p. +Definition transf_program (p: program) : res program := + transform_partial_program tunnel_fundef p. diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml new file mode 100644 index 00000000..87e6d303 --- /dev/null +++ b/backend/Tunnelingaux.ml @@ -0,0 +1,283 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [Tunneling.v] + +*) + +open Coqlib +open LTL +open Maps +open Camlcoq + +let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) +let debug_flag = ref false +let final_dump = false (* set to true to have a more verbose debugging *) + +let debug fmt = + if !debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + +exception BugOnPC of int + +(* type of labels in the cfg *) +type label = int * P.t + +(* instructions under analyzis *) +type simple_inst = (* a simplified view of LTL instructions *) + LBRANCH of node +| LCOND of node * node +| OTHER +and node = { + lab : label; + mutable inst: simple_inst; + mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) + mutable dist: int; + mutable tag: int + } + +(* type of the (simplified) CFG *) +type cfg = { + nodes: (int, node) Hashtbl.t; + mutable rems: node list; (* remaining conditions that may become lbranch or not *) + mutable num_rems: int; + mutable iter_num: int (* number of iterations in elimination of conditions *) + } + +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab + +let rec target c n = (* inspired from the "find" of union-find algorithm *) + match n.inst with + | LCOND(s1,s2) -> + if n.link != n + then update c n + else if n.tag < c.iter_num then ( + (* we try to change the condition ... *) + n.tag <- c.iter_num; (* ... but at most once by iteration *) + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then (n.link <- ts1; ts1) else n + ) else n + | _ -> + if n.link != n + then update c n + else n +and update c n = + let t = target c n.link in + n.link <- t; t + +let get_node c p = + let li = P.to_int p in + try + Hashtbl.find c.nodes li + with + Not_found -> + let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n; + n + +let set_branch c p s = + let li = P.to_int p in + try + let n = Hashtbl.find c.nodes li in + n.inst <- LBRANCH s; + n.link <- target c s + with + Not_found -> + let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n + + +(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) +let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; + acc + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- LCOND(ns1, ns2); + npc::acc + | _ -> acc + +(* try to change a condition into a branch +[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond +*) +let try_change_cond c acc pc = + match pc.inst with + | LCOND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* LCOND expected *) + +(* repeat [try_change_cond] until no condition is changed into a branch *) +let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + +(* compute the final distance of each nop nodes to its target *) +let undef_dist = -1 +let self_dist = undef_dist-1 +let rec dist n = + if n.dist = undef_dist + then ( + n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) + n.dist <- + (match n.inst with + | OTHER -> 0 + | LBRANCH p -> 1 + dist p + | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); + n.dist + ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) + else n.dist + +let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + n::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count; + res + +(*********************************************) +(*** START: printing and debugging functions *) + +let string_of_labeli nodes ipc = + try + let pc = Hashtbl.find nodes ipc in + if pc.link == pc + then Printf.sprintf "(Target@%d)" (dist pc) + else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) + with + Not_found -> "" + +let print_bblock c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> debug "%d " pc; true + + +let print_cfg f c = + let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int f.fn_entrypoint in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (print_bblock c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + +(*************************************************************) +(* Copy-paste of the extracted code of the verifier *) +(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + +let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) + +let check_bblock td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () + +(** val check_code : coq_UF -> code -> unit res **) + +let check_code td c = + PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (()) + +(*** END: copy-paste & debugging functions *******) + +let branch_target f = + debug "* Tunneling.branch_target: starting on a new function\n"; + if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; + repeat_change_cond c; + let res = final_export f c in + if !debug_flag then ( + try + check_code res f.fn_code; + if final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res f.fn_code + ) + ); + res diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v index cdf6c800..126b7b87 100644 --- a/backend/Tunnelingproof.v +++ b/backend/Tunnelingproof.v @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -12,131 +13,163 @@ (** Correctness proof for the branch tunneling optimization. *) -Require Import Coqlib Maps UnionFind. +Require Import Coqlib Maps Errors. Require Import AST Linking. Require Import Values Memory Events Globalenvs Smallstep. Require Import Op Locations LTL. Require Import Tunneling. -Definition match_prog (p tp: program) := - match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp. +Local Open Scope nat. -Lemma transf_program_match: - forall p, match_prog p (transf_program p). + +(** * Properties of the branch_target, when the verifier succeeds *) + +Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td: UF) (c: code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_included_spec c). +- (* extensionality *) + unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_included_spec. + intros m [|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. + intros; eapply IND; try congruence. +Qed. + +Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := + | TB_default (TB: target pc = pc) ob + : target_bounds target bound pc ob + | TB_branch s bb + (EQ: target pc = target s) + (DECREASE: bound s < bound pc) + : target_bounds target bound pc (Some (Lbranch s::bb)) + | TB_cond cond args s1 s2 info bb + (EQ1: target pc = target s1) + (EQ2: target pc = target s2) + (DEC1: bound s1 < bound pc) + (DEC2: bound s2 < bound pc) + : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) + . +Local Hint Resolve TB_default: core. + +Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. Proof. - intros. eapply match_transform_program; eauto. + unfold target, get. intros H; rewrite H; auto. Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. -(** * Properties of the branch map computed using union-find. *) +Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. +Proof. + unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; omega || auto. +Qed. +Local Hint Resolve get_nonneg: core. -(** A variant of [record_goto] that also incrementally computes a measure [f: node -> nat] - counting the number of [Lnop] instructions starting at a given [pc] that were eliminated. *) +Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). -Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat := - fun x => if peq (U.repr u s) pc then f x - else if peq (U.repr u x) pc then (f x + f s + 1)%nat - else f x. +Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): + check_bblock td pc bb = OK tt -> + target_bounds (target td) (bound td) pc (Some bb). +Proof. + unfold check_bblock, bound. + destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. + assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } + assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } + destruct bb as [|[ ] bb]; simpl; try congruence. + + destruct (get td s) as (ts, ds) eqn:Hs. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_branch. + * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. + * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. + + destruct (get td s1) as (ts1, ds1) eqn:Hs1. + destruct (get td s2) as (ts2, ds2) eqn:Hs2. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_cond. + * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. + * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. + * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. + * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. +Qed. -Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) := - match b with - | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f) - | _ => uf - end. +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). -Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop := - forall pc, - match c!pc with - | Some(Lbranch s :: b) => - U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat - | _ => - U.repr (fst uf) pc = pc - end. +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_code_spec td). +- (* extensionality *) + unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_code_spec. + intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. + intros X; inversion X; subst. + apply check_bblock_correct; auto. +Qed. -Lemma record_gotos'_correct: - forall c, - branch_map_correct c (PTree.fold record_goto' c (U.empty, fun (x: node) => O)). +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). Proof. - intros. - apply PTree_Properties.fold_rec with (P := fun c uf => branch_map_correct c uf). + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros _. + destruct ((fn_code f)!pc) eqn:X. + - exploit check_code_correct; eauto. + - exploit check_included_correct; eauto. + congruence. +Qed. -- (* extensionality *) - intros. red; intros. rewrite <- H. apply H0. +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros X; inversion X; clear X; subst. + simpl. rewrite PTree.gmap1. auto. +Qed. -- (* base case *) - red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty. +Lemma tunnel_fundef_Internal: + forall f tf, tunnel_fundef (Internal f) = OK tf + -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. +Proof. + intros f tf; simpl. + destruct (tunnel_function f) eqn:X; simpl; try congruence. + intros EQ; inversion EQ. + eexists; split; eauto. +Qed. -- (* inductive case *) - intros m uf pc bb; intros. destruct uf as [u f]. - assert (PC: U.repr u pc = pc). - generalize (H1 pc). rewrite H. auto. - assert (record_goto' (u, f) pc bb = (u, f) - \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)). - unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto. - destruct H2 as [B | [s [bb' [EQ B]]]]. - -+ (* u and f are unchanged *) - rewrite B. - red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. - destruct bb; auto. destruct i; auto. - apply H1. - -+ (* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *) - rewrite B. - red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ. - -* (* The new instruction *) - rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3. - unfold measure_edge. destruct (peq (U.repr u s) pc). auto. right. split. auto. - rewrite PC. rewrite peq_true. omega. - -* (* An old instruction *) - assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc'). - { intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. } - generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto. - intros [P | [P Q]]. left; auto. right. - split. apply U.sameclass_union_2. auto. - unfold measure_edge. destruct (peq (U.repr u s) pc). auto. - rewrite P. destruct (peq (U.repr u s0) pc). omega. auto. -Qed. - -Definition record_gotos' (f: function) := - PTree.fold record_goto' f.(fn_code) (U.empty, fun (x: node) => O). - -Lemma record_gotos_gotos': - forall f, fst (record_gotos' f) = record_gotos f. -Proof. - intros. unfold record_gotos', record_gotos. - repeat rewrite PTree.fold_spec. - generalize (PTree.elements (fn_code f)) (U.empty) (fun _ : node => O). - induction l; intros; simpl. - auto. - unfold record_goto' at 2. unfold record_goto at 2. - destruct (snd a). apply IHl. destruct i; apply IHl. -Qed. - -Definition branch_target (f: function) (pc: node) : node := - U.repr (record_gotos f) pc. - -Definition count_gotos (f: function) (pc: node) : nat := - snd (record_gotos' f) pc. - -Theorem record_gotos_correct: - forall f pc, - match f.(fn_code)!pc with - | Some(Lbranch s :: b) => - branch_target f pc = pc \/ - (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat - | _ => branch_target f pc = pc - end. +Lemma tunnel_fundef_External: + forall tf ef, tunnel_fundef (External ef) = OK tf + -> tf = External ef. Proof. - intros. - generalize (record_gotos'_correct f.(fn_code) pc). simpl. - fold (record_gotos' f). unfold branch_map_correct, branch_target, count_gotos. - rewrite record_gotos_gotos'. auto. + intros tf ef; simpl. intros H; inversion H; auto. Qed. (** * Preservation of semantics *) +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. + Section PRESERVATION. Variables prog tprog: program. @@ -145,32 +178,65 @@ Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. Lemma functions_translated: - forall v f, + forall (v: val) (f: fundef), Genv.find_funct ge v = Some f -> - Genv.find_funct tge v = Some (tunnel_fundef f). -Proof (Genv.find_funct_transf TRANSL). + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. exploit (Genv.find_funct_match TRANSL); eauto. + intros (cu & tf & A & B & C). + repeat eexists; intuition eauto. +Qed. Lemma function_ptr_translated: forall v f, Genv.find_funct_ptr ge v = Some f -> - Genv.find_funct_ptr tge v = Some (tunnel_fundef f). -Proof (Genv.find_funct_ptr_transf TRANSL). + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. + exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. +Qed. -Lemma symbols_preserved: - forall id, - Genv.find_symbol tge id = Genv.find_symbol ge id. -Proof (Genv.find_symbol_transf TRANSL). +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. +Qed. Lemma senv_preserved: Senv.equiv ge tge. -Proof (Genv.senv_transf TRANSL). +Proof. + eapply (Genv.senv_match TRANSL). +Qed. Lemma sig_preserved: - forall f, funsig (tunnel_fundef f) = funsig f. + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. Proof. - destruct f; reflexivity. + intros. destruct f. + - simpl in H. monadInv H. unfold tunnel_function in EQ. + destruct (check_included _ _); try congruence. + monadInv EQ. simpl; auto. + - simpl in H. monadInv H. reflexivity. Qed. +Lemma fn_stacksize_preserved: + forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + +Lemma fn_entrypoint_preserved: + forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + + (** The proof of semantic preservation is a simulation argument based on diagrams of the following form: << @@ -185,7 +251,7 @@ Qed. between states [st1] and [st2], as well as the postcondition between [st1'] and [st2']. One transition in the source code (left) can correspond to zero or one transition in the transformed code (right). The - "zero transition" case occurs when executing a [Lgoto] instruction + "zero transition" case occurs when executing a [Lnop] instruction in the source code that has been removed by tunneling. In the definition of [match_states], what changes between the original and @@ -194,52 +260,52 @@ Qed. and memory states, since some [Vundef] values can become more defined as a consequence of eliminating useless [Lcond] instructions. *) -Definition tunneled_block (f: function) (b: bblock) := - tunnel_block (record_gotos f) b. - -Definition tunneled_code (f: function) := - PTree.map1 (tunneled_block f) (fn_code f). - Definition locmap_lessdef (ls1 ls2: locset) : Prop := forall l, Val.lessdef (ls1 l) (ls2 l). Inductive match_stackframes: stackframe -> stackframe -> Prop := | match_stackframes_intro: - forall f sp ls0 bb tls0, + forall f tf sp ls0 bb tls0, locmap_lessdef ls0 tls0 -> + tunnel_function f = OK tf -> match_stackframes (Stackframe f sp ls0 bb) - (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)). + (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). Inductive match_states: state -> state -> Prop := | match_states_intro: - forall s f sp pc ls m ts tls tm + forall s f tf sp pc ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), match_states (State s f sp pc ls m) - (State ts (tunnel_function f) sp (branch_target f pc) tls tm) + (State ts tf sp (branch_target f pc) tls tm) | match_states_block: - forall s f sp bb ls m ts tls tm + forall s f tf sp bb ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), match_states (Block s f sp bb ls m) - (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm) + (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) | match_states_interm: - forall s f sp pc bb ls m ts tls tm + forall s f tf sp pc i bb ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), - match_states (Block s f sp (Lbranch pc :: bb) ls m) - (State ts (tunnel_function f) sp (branch_target f pc) tls tm) + (MEM: Mem.extends m tm) + (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp (i :: bb) ls m) + (State ts tf sp pc tls tm) | match_states_call: - forall s f ls m ts tls tm + forall s f tf ls m ts tls tm (STK: list_forall2 match_stackframes s ts) (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), + (MEM: Mem.extends m tm) + (TF: tunnel_fundef f = OK tf), match_states (Callstate s f ls m) - (Callstate ts (tunnel_fundef f) tls tm) + (Callstate ts tf tls tm) | match_states_return: forall s ls m ts tls tm (STK: list_forall2 match_stackframes s ts) @@ -289,22 +355,6 @@ Proof. induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. Qed. -(* -Lemma locmap_undef_lessdef: - forall ll ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2). -Proof. - induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. -Qed. - -Lemma locmap_undef_lessdef_1: - forall ll ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2. -Proof. - induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. -Qed. -*) - Lemma locmap_getpair_lessdef: forall p ls1 ls2, locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). @@ -348,15 +398,16 @@ Lemma find_function_translated: forall ros ls tls fd, locmap_lessdef ls tls -> find_function ge ros ls = Some fd -> - find_function tge ros tls = Some (tunnel_fundef fd). + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. Proof. intros. destruct ros; simpl in *. - assert (E: tls (R m) = ls (R m)). { exploit Genv.find_funct_inv; eauto. intros (b & EQ). generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } - rewrite E. apply functions_translated; auto. + rewrite E. exploit functions_translated; eauto. - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. - apply function_ptr_translated; auto. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. Qed. Lemma call_regs_lessdef: @@ -383,11 +434,12 @@ Qed. Definition measure (st: state) : nat := match st with - | State s f sp pc ls m => (count_gotos f pc * 2)%nat - | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat - | Block s f sp bb ls m => 0%nat - | Callstate s f ls m => 0%nat - | Returnstate s ls m => 0%nat + | State s f sp pc ls m => (bound (branch_target f) pc) * 2 + | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 + | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 + | Block s f sp bb ls m => 0 + | Callstate s f ls m => 0 + | Returnstate s ls m => 0 end. Lemma match_parent_locset: @@ -406,24 +458,23 @@ Lemma tunnel_step_correct: (exists st2', step tge st1' t st2' /\ match_states st2 st2') \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. Proof. - induction 1; intros; try inv MS. + induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). - (* entering a block *) - assert (DEFAULT: branch_target f pc = pc -> - (exists st2' : state, - step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2' - /\ match_states (Block s f sp bb rs m) st2')). - { intros. rewrite H0. econstructor; split. - econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto. - econstructor; eauto. } - - generalize (record_gotos_correct f pc). rewrite H. - destruct bb; auto. destruct i; auto. - intros [A | [B C]]. auto. - right. split. simpl. omega. - split. auto. - rewrite B. econstructor; eauto. - + exploit (branch_target_bounds f tf pc); eauto. + rewrite H. intros X; inversion X. + + (* TB_default *) + rewrite TB; left. econstructor; split. + * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. + * econstructor; eauto. + + (* FT_branch *) + simpl; right. + rewrite EQ; repeat (econstructor; omega || eauto). + + (* FT_cond *) + simpl; right. + repeat (econstructor; omega || eauto); simpl. + apply Nat.max_case; omega. + destruct (peq _ _); try congruence. - (* Lop *) exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. intros (tv & EV & LD). @@ -485,20 +536,25 @@ Proof. eauto. eauto. econstructor; eauto using locmap_undef_regs_lessdef. - (* Lcall *) - left; simpl; econstructor; split. - eapply exec_Lcall with (fd := tunnel_fundef fd); eauto. - eapply find_function_translated; eauto. - rewrite sig_preserved. auto. - econstructor; eauto. - constructor; auto. - constructor; auto. + left; simpl. + exploit find_function_translated; eauto. + intros (tfd & Htfd & FIND). + econstructor; split. + + eapply exec_Lcall; eauto. + erewrite sig_preserved; eauto. + + econstructor; eauto. + constructor; auto. + constructor; auto. - (* Ltailcall *) - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + exploit find_function_translated. 2: eauto. + { eauto using return_regs_lessdef, match_parent_locset. } + intros (tfd & Htfd & FIND). + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). left; simpl; econstructor; split. - eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto. - eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset. - apply sig_preserved. - econstructor; eauto using return_regs_lessdef, match_parent_locset. + + eapply exec_Ltailcall; eauto. + * eapply sig_preserved; eauto. + * erewrite fn_stacksize_preserved; eauto. + + econstructor; eauto using return_regs_lessdef, match_parent_locset. - (* Lbuiltin *) exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). @@ -513,45 +569,58 @@ Proof. fold (branch_target f pc). econstructor; eauto. - (* Lbranch (eliminated) *) right; split. simpl. omega. split. auto. constructor; auto. - -- (* Lcond *) - simpl tunneled_block. - set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2). - destruct (peq s1 s2). -+ left; econstructor; split. - eapply exec_Lbranch. - destruct b. -* constructor; eauto using locmap_undef_regs_lessdef_1. -* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. -+ left; econstructor; split. - eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. - destruct b; econstructor; eauto using locmap_undef_regs_lessdef. - +- (* Lcond (preserved) *) + simpl; left; destruct (peq _ _) eqn: EQ. + + econstructor; split. + eapply exec_Lbranch. + destruct b. + * constructor; eauto using locmap_undef_regs_lessdef_1. + * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. + + econstructor; split. + eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. + destruct b; econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcond (eliminated) *) + destruct (peq _ _) eqn: EQ; try inv H1. + right; split; simpl. + + destruct b. + generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega. + generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega. + + destruct b. + -- repeat (constructor; auto). + -- rewrite e; repeat (constructor; auto). - (* Ljumptable *) assert (tls (R arg) = Vint n). { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } left; simpl; econstructor; split. eapply exec_Ljumptable. - eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto. + eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. econstructor; eauto using locmap_undef_regs_lessdef. - (* Lreturn *) exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). left; simpl; econstructor; split. - eapply exec_Lreturn; eauto. - constructor; eauto using return_regs_lessdef, match_parent_locset. + + eapply exec_Lreturn; eauto. + erewrite fn_stacksize_preserved; eauto. + + constructor; eauto using return_regs_lessdef, match_parent_locset. - (* internal function *) + exploit tunnel_fundef_Internal; eauto. + intros (tf' & TF' & ITF). subst. exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. - intros (tm' & ALLOC & MEM'). - left; simpl; econstructor; split. - eapply exec_function_internal; eauto. - simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. + intros (tm' & ALLOC & MEM'). + left; simpl. + econstructor; split. + + eapply exec_function_internal; eauto. + erewrite fn_stacksize_preserved; eauto. + + simpl. + erewrite (fn_entrypoint_preserved f tf'); auto. + econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. - (* external function *) exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. intros (tvres & tm' & A & B & C & D). left; simpl; econstructor; split. - eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. + + erewrite (tunnel_fundef_External tf ef); eauto. + eapply exec_function_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. - (* return *) inv STK. inv H1. left; econstructor; split. @@ -564,14 +633,15 @@ Lemma transf_initial_states: exists st2, initial_state tprog st2 /\ match_states st1 st2. Proof. intros. inversion H. - exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split. + exploit function_ptr_translated; eauto. + intros (tf & Htf & Hf). + exists (Callstate nil tf (Locmap.init Vundef) m0); split. econstructor; eauto. - apply (Genv.init_mem_transf TRANSL); auto. + apply (Genv.init_mem_transf_partial TRANSL); auto. rewrite (match_program_main TRANSL). rewrite symbols_preserved. eauto. - apply function_ptr_translated; auto. - rewrite <- H3. apply sig_preserved. - constructor. constructor. red; simpl; auto. apply Mem.extends_refl. + rewrite <- H3. apply sig_preserved. auto. + constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. Qed. Lemma transf_final_states: diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v index 779e7bb9..f1a46baa 100644 --- a/backend/ValueDomain.v +++ b/backend/ValueDomain.v @@ -2069,7 +2069,6 @@ Definition divfs := binop_single Float32.div. Lemma divfs_sound: forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y). Proof (binop_single_sound Float32.div). - (** Conversions *) Definition zero_ext (nbits: Z) (v: aval) := @@ -2483,6 +2482,468 @@ Proof. destruct 1; simpl; auto with va. Qed. + +(* Extensions for KVX and Risc-V *) + +Definition intoffloat_total (x: aval) := + match x with + | F f => + match Float.to_int f with + | Some i => I i + | None => ntop + end + | _ => ntop1 x + end. + +Definition intuoffloat_total (x: aval) := + match x with + | F f => + match Float.to_intu f with + | Some i => I i + | None => ntop + end + | _ => ntop1 x + end. + +Definition intofsingle_total (x: aval) := + match x with + | FS f => + match Float32.to_int f with + | Some i => I i + | None => ntop + end + | _ => ntop1 x + end. + +Definition intuofsingle_total (x: aval) := + match x with + | FS f => + match Float32.to_intu f with + | Some i => I i + | None => ntop + end + | _ => ntop1 x + end. + +Definition longoffloat_total (x: aval) := + match x with + | F f => + match Float.to_long f with + | Some i => L i + | None => ntop + end + | _ => ntop1 x + end. + +Definition longuoffloat_total (x: aval) := + match x with + | F f => + match Float.to_longu f with + | Some i => L i + | None => ntop + end + | _ => ntop1 x + end. + +Definition longofsingle_total (x: aval) := + match x with + | FS f => + match Float32.to_long f with + | Some i => L i + | None => ntop + end + | _ => ntop1 x + end. + +Definition longuofsingle_total (x: aval) := + match x with + | FS f => + match Float32.to_longu f with + | Some i => L i + | None => ntop + end + | _ => ntop1 x + end. + +Lemma intoffloat_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.intoffloat v)) (intoffloat_total x). +Proof. + unfold Val.intoffloat, intoffloat_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma intuoffloat_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x). +Proof. + unfold Val.intoffloat, intoffloat_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma intofsingle_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.intofsingle v)) (intofsingle_total x). +Proof. + unfold Val.intofsingle, intofsingle_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float32.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma intuofsingle_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x). +Proof. + unfold Val.intofsingle, intofsingle_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma singleofint_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.singleofint v)) (singleofint x). +Proof. + unfold Val.singleofint, singleofint; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma singleofintu_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.singleofintu v)) (singleofintu x). +Proof. + unfold Val.singleofintu, singleofintu; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma longoffloat_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.longoffloat v)) (longoffloat_total x). +Proof. + unfold Val.longoffloat, longoffloat_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma longuoffloat_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x). +Proof. + unfold Val.longoffloat, longoffloat_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma longofsingle_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.longofsingle v)) (longofsingle_total x). +Proof. + unfold Val.longofsingle, longofsingle_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float32.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma longuofsingle_total_sound: + forall v x + (MATCH : vmatch v x), + vmatch (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x). +Proof. + unfold Val.longofsingle, longofsingle_total. intros. + inv MATCH; simpl in *; try constructor. + all: destruct (Float32.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor]. +Qed. + +Lemma singleoflong_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.singleoflong v)) (singleoflong x). +Proof. + unfold Val.singleoflong, singleoflong; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma singleoflongu_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.singleoflongu v)) (singleoflongu x). +Proof. + unfold Val.singleoflongu, singleoflongu; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma floatoflong_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.floatoflong v)) (floatoflong x). +Proof. + unfold Val.floatoflong, floatoflong; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma floatoflongu_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.floatoflongu v)) (floatoflongu x). +Proof. + unfold Val.floatoflongu, floatoflongu; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma floatofint_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.floatofint v)) (floatofint x). +Proof. + unfold Val.floatofint, floatofint; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + +Lemma floatofintu_total_sound: + forall v x, vmatch v x -> + vmatch (Val.maketotal (Val.floatofintu v)) (floatofintu x). +Proof. + unfold Val.floatofintu, floatofintu; intros. + inv H; simpl. + all: auto with va. + all: unfold ntop1, provenance. + all: try constructor. +Qed. + + +Definition divs_total (v w: aval) := + match w, v with + | I i2, I i1 => + if Int.eq i2 Int.zero + || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone + then ntop + else I (Int.divs i1 i2) + | _, _ => ntop2 v w + end. + +Lemma divs_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divs v w)) (divs_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va. + } + all: unfold ntop2; auto with va. + all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va. +Qed. + +Definition divu_total (v w: aval) := + match w, v with + | I i2, I i1 => + if Int.eq i2 Int.zero + then ntop + else I (Int.divu i1 i2) + | _, _ => ntop2 v w + end. + +Lemma divu_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divu v w)) (divu_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + { destruct Int.eq eqn:E; cbn; unfold ntop; auto with va. + } + all: unfold ntop2; auto with va. + all: destruct Int.eq eqn:E; unfold ntop2; cbn; auto with va. +Qed. + +Definition mods_total (v w: aval) := + match w, v with + | I i2, I i1 => + if Int.eq i2 Int.zero + || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone + then ntop + else I (Int.mods i1 i2) + | _, _ => ntop2 v w + end. + +Lemma mods_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.mods v w)) (mods_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va. + } + all: unfold ntop2; auto with va. + all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va. +Qed. + +Definition modu_total (v w: aval) := + match w, v with + | I i2, I i1 => + if Int.eq i2 Int.zero + then ntop + else I (Int.modu i1 i2) + | I i2, _ => uns (provenance v) (usize i2) + | _, _ => ntop2 v w + end. + +Lemma modu_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modu v w)) (modu_total x y). +Proof. + assert (UNS: forall i j, j <> Int.zero -> is_uns (usize j) (Int.modu i j)). + { + intros. apply is_uns_mon with (usize (Int.modu i j)). + { apply is_uns_usize. + } + unfold usize, Int.size. + apply Zsize_monotone. + generalize (Int.unsigned_range_2 j); intros RANGE. + assert (Int.unsigned j <> 0). + { red; intros; elim H. rewrite <- (Int.repr_unsigned j). rewrite H0. auto. } + exploit (Z_mod_lt (Int.unsigned i) (Int.unsigned j)). omega. intros MOD. + unfold Int.modu. rewrite Int.unsigned_repr. omega. omega. + } + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + { destruct Int.eq eqn:E; unfold ntop; cbn; auto with va. + } + all: try discriminate. + all: unfold ntop2; auto with va. + all: try (destruct Int.eq eqn:E; cbn; unfold ntop2; auto with va; fail). + all: try apply vmatch_uns_undef. + + all: + generalize (Int.eq_spec i0 Int.zero); + destruct (Int.eq i0 Int.zero); + cbn; + intro. + all: try apply vmatch_uns_undef. + all: apply vmatch_uns; auto. +Qed. + + +Lemma shrx_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrx v w)) (shrx x y). +Proof. + intros until y. intros HX HY. + inv HX; inv HY; cbn. + all: unfold ntop1; auto with va. + all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va. +Qed. + + +Definition divls_total (v w: aval) := + match w, v with + | L i2, L i1 => + if Int64.eq i2 Int64.zero + || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone + then ntop + else L (Int64.divs i1 i2) + | _, _ => ntop2 v w + end. + +Lemma divls_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divls v w)) (divls_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + all: unfold ntop2; auto with va. + all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va. +Qed. + +Definition divlu_total (v w: aval) := + match w, v with + | L i2, L i1 => + if Int64.eq i2 Int64.zero + then ntop + else L (Int64.divu i1 i2) + | _, _ => ntop2 v w + end. + +Lemma divlu_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divlu v w)) (divlu_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + all: unfold ntop2; auto with va. + all: destruct Int64.eq eqn:E; unfold ntop2, ntop; cbn; auto with va. +Qed. + + +Definition modls_total (v w: aval) := + match w, v with + | L i2, L i1 => + if Int64.eq i2 Int64.zero + || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone + then ntop + else L (Int64.mods i1 i2) + | _, _ => ntop2 v w + end. + +Lemma modls_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modls v w)) (modls_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + all: unfold ntop2; auto with va. + all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va. +Qed. + + +Definition modlu_total (v w: aval) := + match w, v with + | L i2, L i1 => + if Int64.eq i2 Int64.zero + then ntop + else L (Int64.modu i1 i2) + | _, _ => ntop2 v w + end. + +Lemma modlu_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modlu v w)) (modlu_total x y). +Proof. + intros until y. + intros HX HY. + inv HX; inv HY; cbn in *. + all: unfold ntop2; auto with va. + all: destruct Int64.eq eqn:E; cbn; unfold ntop2, ntop; auto with va. +Qed. + +Lemma shrxl_total_sound: + forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrxl v w)) (shrxl x y). +Proof. + intros until y. intros HX HY. + inv HX; inv HY; cbn. + all: unfold ntop1; auto with va. + all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va. +Qed. + (** Comparisons and variation intervals *) Definition cmp_intv (c: comparison) (i: Z * Z) (n: Z) : abool := @@ -4734,6 +5195,26 @@ Hint Resolve cnot_sound symbol_address_sound longoffloat_sound longuoffloat_sound floatoflong_sound floatoflongu_sound longofsingle_sound longuofsingle_sound singleoflong_sound singleoflongu_sound longofwords_sound loword_sound hiword_sound + intoffloat_total_sound + intuoffloat_total_sound + intofsingle_total_sound + intuofsingle_total_sound + singleofint_total_sound + singleofintu_total_sound + longoffloat_total_sound + longuoffloat_total_sound + longofsingle_total_sound + longuofsingle_total_sound + singleoflong_total_sound + singleoflongu_total_sound + floatoflong_total_sound + floatoflongu_total_sound + floatofint_total_sound + floatofintu_total_sound + divu_total_sound divs_total_sound + modu_total_sound mods_total_sound shrx_total_sound + divlu_total_sound divls_total_sound + modlu_total_sound modls_total_sound shrxl_total_sound cmpu_bool_sound cmp_bool_sound cmplu_bool_sound cmpl_bool_sound cmpf_bool_sound cmpfs_bool_sound maskzero_sound : va. |