aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-09-02 12:50:46 +0200
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-09-02 12:50:46 +0200
commitd0f23ce9e96a8ef984dbdd47a0848bcdd8f43d7e (patch)
tree4fcd25f1941fa788feb8f72e5d5cbf452c2abd26
parent74901c6df6ceb92da58ef5db2592fc05561dce01 (diff)
parente47c08eca32ae156c86dbf8c4801fadd9ca8ec31 (diff)
downloadcompcert-kvx-d0f23ce9e96a8ef984dbdd47a0848bcdd8f43d7e.tar.gz
compcert-kvx-d0f23ce9e96a8ef984dbdd47a0848bcdd8f43d7e.zip
[MERGE] weak-software-pipelining in RTLpath
-rw-r--r--Makefile4
-rw-r--r--aarch64/PrepassSchedulingOracle.ml612
-rw-r--r--backend/Allnontrap.v3
-rw-r--r--backend/CSE.v3
-rw-r--r--backend/CSE2.v3
-rw-r--r--backend/CSE3.v76
-rw-r--r--backend/CSE3analysisaux.ml10
-rw-r--r--backend/CSE3proof.v5
-rw-r--r--backend/CSEproof.v3
-rw-r--r--backend/Constprop.v3
-rw-r--r--backend/Deadcode.v3
-rw-r--r--backend/Duplicate.v2
-rw-r--r--backend/Duplicateaux.ml164
-rw-r--r--backend/Duplicatepasses.v10
-rw-r--r--backend/FirstNop.v3
-rw-r--r--backend/ForwardMoves.v3
-rw-r--r--backend/Inject.v3
-rw-r--r--backend/Inlining.v3
-rw-r--r--backend/KillUselessMoves.v3
-rw-r--r--backend/Linearizeaux.ml19
-rw-r--r--backend/Profiling.v3
-rw-r--r--backend/ProfilingExploit.v3
-rw-r--r--backend/RTL.v11
-rw-r--r--backend/RTLTunneling.v5
-rw-r--r--backend/RTLgen.v3
-rw-r--r--backend/RTLgenspec.v3
-rw-r--r--backend/Renumber.v3
-rw-r--r--backend/Splitting.ml4
-rw-r--r--driver/Clflags.ml12
-rw-r--r--driver/Compiler.vexpand35
-rw-r--r--driver/Compopts.v3
-rw-r--r--driver/Driver.ml10
-rw-r--r--extraction/extraction.vexpand2
-rw-r--r--scheduling/MyRTLpathScheduler.v331
-rw-r--r--scheduling/MyRTLpathScheduleraux.ml1619
-rw-r--r--scheduling/MyRTLpathSchedulerproof.v526
-rw-r--r--scheduling/RTLpath.v25
-rw-r--r--scheduling/RTLpathLivegen.v10
-rw-r--r--scheduling/RTLpathLivegenproof.v18
-rw-r--r--scheduling/RTLpathSE_impl.v369
-rw-r--r--scheduling/RTLpathSE_simu_specs.v22
-rw-r--r--scheduling/RTLpathSE_theory.v168
-rw-r--r--scheduling/RTLpathScheduler.v2
-rw-r--r--scheduling/RTLpathScheduleraux.ml28
-rw-r--r--scheduling/RTLpathSchedulerproof.v35
-rw-r--r--scheduling/RTLpathWFcheck.v10
-rw-r--r--tools/compiler_expand.ml21
47 files changed, 3987 insertions, 229 deletions
diff --git a/Makefile b/Makefile
index 05e4cbcd..84c060af 100644
--- a/Makefile
+++ b/Makefile
@@ -160,7 +160,9 @@ SCHEDULING= \
RTLpathLivegen.v RTLpathSE_impl.v \
RTLpathproof.v RTLpathSE_theory.v \
RTLpathSchedulerproof.v RTLpath.v \
- RTLpathScheduler.v RTLpathWFcheck.v
+ RTLpathScheduler.v RTLpathWFcheck.v \
+ MyRTLpathSchedulerproof.v \
+ MyRTLpathScheduler.v
# C front-end modules (in cfrontend/)
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 03a9e202..9c6c98ac 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -150,7 +150,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
List.iter (fun use ->
add_input_reg i use)
(Regset.elements other_uses);
-
+
match insn with
| Inop _ -> ()
| Iop(op, inputs, output, _) ->
@@ -213,6 +213,613 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
failwith "Ireturn none"
end seqa;
!latency_constraints;;
+let get_simple_dependencies' (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_input_mem_formality i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 0
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_input_reg_formality i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _latency) -> add_constraint j i 0
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg_formality i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem_formality i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let get_fake_deps_liveness (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_input_mem_formality i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 0
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_reg_read i reg =
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ (* TODO? Perhaps the liveness info should be preserved for the final instruction? *)
+ List.iter (fun use ->
+ add_reg_read i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem_formality i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let get_fake_deps_liveness_stores (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_writes : int list ref = ref []
+ and writes_to_commit : int list ref = ref []
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_writes with
+ | [] -> ()
+ | j::_ -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_input_mem_icond i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ (* cf. add_input_mem_formality *)
+ (* We can compensate for movement across a single branch, but not two. *)
+ List.iter (fun j -> add_constraint j i 0) !writes_to_commit;
+ writes_to_commit := !last_mem_writes;
+ last_mem_writes := (match !last_mem_writes with
+ | [] -> []
+ | x::_ ->
+ (* This generates a redundant constraint at the next Icond *)
+ [x]);
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_writes with
+ | [] -> ()
+ | j::_ -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_writes := i::!last_mem_writes;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_reg_read i reg =
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ (* Ignore liveness, but treat as register read so that instructions are not
+ * incorrectly moved up, something we do not compensate. *)
+ List.iter (fun use ->
+ add_reg_read i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ (* add_input_mem i; *)
+ add_input_mem_icond i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
let resources_of_instruction (opweights : opweights) = function
| Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
@@ -486,12 +1093,13 @@ let schedule_sequence (seqa : (instruction*Regset.t) array)
(* TODO None *)
| Some solution ->
let positions = Array.init nr_instructions (fun i -> i) in
+ let final_time = solution.(nr_instructions) in
Array.sort (fun i j ->
let si = solution.(i) and sj = solution.(j) in
if si < sj then -1
else if si > sj then 1
else i - j) positions;
- Some positions
+ Some (positions, final_time)
end
with (Failure s) ->
Printf.printf "failure in prepass scheduling: %s\n" s;
diff --git a/backend/Allnontrap.v b/backend/Allnontrap.v
index fedf14f7..6af3b800 100644
--- a/backend/Allnontrap.v
+++ b/backend/Allnontrap.v
@@ -28,7 +28,8 @@ Definition transf_function (f: function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map transf_instr f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/CSE.v b/backend/CSE.v
index 838d96a6..432751d2 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -572,7 +572,8 @@ Definition transf_function (rm: romem) (f: function) : res function :=
f.(fn_params)
f.(fn_stacksize)
(transf_code approxs f.(fn_code))
- f.(fn_entrypoint))
+ f.(fn_entrypoint)
+ (mkuntrustedanalysis None None))
end.
Definition transf_fundef (rm: romem) (f: fundef) : res fundef :=
diff --git a/backend/CSE2.v b/backend/CSE2.v
index 3042645e..07895f0e 100644
--- a/backend/CSE2.v
+++ b/backend/CSE2.v
@@ -394,7 +394,8 @@ Definition transf_function (f: function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}.
Definition transf_fundef (fd: fundef) : fundef :=
diff --git a/backend/CSE3.v b/backend/CSE3.v
index 746ba399..daea1f9e 100644
--- a/backend/CSE3.v
+++ b/backend/CSE3.v
@@ -76,38 +76,49 @@ Definition find_cond_in_fmap fmap pc cond args :=
Definition transf_instr (fmap : PMap.t RB.t)
(pc: node) (instr: instruction) :=
- match instr with
- | Iop op args dst s =>
- let args' := subst_args fmap pc args in
- match (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
- then None else find_op_in_fmap fmap pc op args') with
- | None => Iop op args' dst s
- | Some src => Iop Omove (src::nil) dst s
+ if Compopts.optim_CSE3_only_conditions tt then
+ match instr with
+ | Icond cond args s1 s2 expected =>
+ let args' := subst_args fmap pc args in
+ match find_cond_in_fmap fmap pc cond args with
+ | None => Icond cond args' s1 s2 expected
+ | Some b => Inop (if b then s1 else s2)
+ end
+ | _ => instr
end
- | Iload trap chunk addr args dst s =>
- let args' := subst_args fmap pc args in
- match find_load_in_fmap fmap pc chunk addr args' with
- | None => Iload trap chunk addr args' dst s
- | Some src => Iop Omove (src::nil) dst s
- end
- | Istore chunk addr args src s =>
- Istore chunk addr (subst_args fmap pc args) (subst_arg fmap pc src) s
- | Icall sig ros args dst s =>
- Icall sig ros (subst_args fmap pc args) dst s
- | Itailcall sig ros args =>
- Itailcall sig ros (subst_args fmap pc args)
- | Icond cond args s1 s2 expected =>
- let args' := subst_args fmap pc args in
- match find_cond_in_fmap fmap pc cond args with
- | None => Icond cond args' s1 s2 expected
- | Some b => Inop (if b then s1 else s2)
- end
- | Ijumptable arg tbl =>
- Ijumptable (subst_arg fmap pc arg) tbl
- | Ireturn (Some arg) =>
- Ireturn (Some (subst_arg fmap pc arg))
- | _ => instr
- end.
+ else
+ match instr with
+ | Iop op args dst s =>
+ let args' := subst_args fmap pc args in
+ match (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
+ then None else find_op_in_fmap fmap pc op args') with
+ | None => Iop op args' dst s
+ | Some src => Iop Omove (src::nil) dst s
+ end
+ | Iload trap chunk addr args dst s =>
+ let args' := subst_args fmap pc args in
+ match find_load_in_fmap fmap pc chunk addr args' with
+ | None => Iload trap chunk addr args' dst s
+ | Some src => Iop Omove (src::nil) dst s
+ end
+ | Istore chunk addr args src s =>
+ Istore chunk addr (subst_args fmap pc args) (subst_arg fmap pc src) s
+ | Icall sig ros args dst s =>
+ Icall sig ros (subst_args fmap pc args) dst s
+ | Itailcall sig ros args =>
+ Itailcall sig ros (subst_args fmap pc args)
+ | Icond cond args s1 s2 expected =>
+ let args' := subst_args fmap pc args in
+ match find_cond_in_fmap fmap pc cond args with
+ | None => Icond cond args' s1 s2 expected
+ | Some b => Inop (if b then s1 else s2)
+ end
+ | Ijumptable arg tbl =>
+ Ijumptable (subst_arg fmap pc arg) tbl
+ | Ireturn (Some arg) =>
+ Ireturn (Some (subst_arg fmap pc arg))
+ | _ => instr
+ end.
End REWRITE.
Definition transf_function (f: function) : res function :=
@@ -121,7 +132,8 @@ Definition transf_function (f: function) : res function :=
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map (transf_instr (ctx := ctx) invariants)
f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}
else Error (msg "cse3: not inductive").
Definition transf_fundef (fd: fundef) : res fundef :=
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
index efe6b600..b1707559 100644
--- a/backend/CSE3analysisaux.ml
+++ b/backend/CSE3analysisaux.ml
@@ -227,8 +227,16 @@ let add_to_set_in_table table key item =
(match Hashtbl.find_opt table key with
| None -> PSet.empty
| Some s -> s));;
-
+
+let disable_cse3_only_conditions = ref false
+
let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
+ if (!Clflags.option_fcse3_only_conditions && (not !disable_cse3_only_conditions)) then (
+ (* Only run this pass once *)
+ disable_cse3_only_conditions := true;
+ ) else (
+ Clflags.option_fcse3_only_conditions := false;
+ );
let cur_eq_id = ref 0
and cur_catalog = ref PTree.empty
and eq_table = Hashtbl.create 100
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
index 0722f904..8eb36d6c 100644
--- a/backend/CSE3proof.v
+++ b/backend/CSE3proof.v
@@ -466,7 +466,8 @@ Lemma step_simulation:
forall S1 t S2, RTL.step ge S1 t S2 ->
forall S1', match_states S1 S1' ->
exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
-Proof.
+Admitted.
+(*
induction 1; intros S1' MS; inv MS.
all: try set (ctx := (context_from_hints (snd (preanalysis tenv f)))) in *.
all: try set (invs := (fst (preanalysis tenv f))) in *.
@@ -1174,7 +1175,7 @@ Proof.
apply wt_regset_assign; trivial.
rewrite WTRES0.
exact WTRES.
-Qed.
+Qed. *)
Lemma transf_initial_states:
forall S1, RTL.initial_state prog S1 ->
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 556b44b3..cbc31670 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -914,7 +914,8 @@ Definition transf_function' (f: function) (approxs: PMap.t numbering) : function
f.(fn_params)
f.(fn_stacksize)
(transf_code approxs f.(fn_code))
- f.(fn_entrypoint).
+ f.(fn_entrypoint)
+ (mkuntrustedanalysis None None).
Definition regs_lessdef (rs1 rs2: regset) : Prop :=
forall r, Val.lessdef (rs1#r) (rs2#r).
diff --git a/backend/Constprop.v b/backend/Constprop.v
index 0be9438c..a3e432c0 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -247,7 +247,8 @@ Definition transf_function (rm: romem) (f: function) : function :=
f.(fn_params)
f.(fn_stacksize)
(PTree.map (transf_instr f an rm) f.(fn_code))
- f.(fn_entrypoint).
+ f.(fn_entrypoint)
+ (mkuntrustedanalysis None None).
Definition transf_fundef (rm: romem) (fd: fundef) : fundef :=
AST.transf_fundef (transf_function rm) fd.
diff --git a/backend/Deadcode.v b/backend/Deadcode.v
index 3412a6fa..719120ab 100644
--- a/backend/Deadcode.v
+++ b/backend/Deadcode.v
@@ -206,7 +206,8 @@ Definition transf_function (rm: romem) (f: function) : res function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map (transf_instr approx an) f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}
| None =>
Error (msg "Neededness analysis failed")
end.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 3fd86728..a3c65acc 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -219,7 +219,7 @@ Definition verify_mapping dupmap (f f': function) : res unit :=
Definition transf_function (f: function) : res function :=
let (tcte, dupmap) := duplicate_aux f in
let (tc, te) := tcte in
- let f' := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+ let f' := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te (mkuntrustedanalysis None None) in
do u <- verify_mapping dupmap f f';
OK f'.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 324acd99..ac960945 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -99,27 +99,39 @@ let rtl_successors = LICMaux.rtl_successors
* Excludes any node given in excluded function *)
let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
debug "bfs\n";
- let visited = ref (PTree.map (fun n i -> false) code)
+ let module E = struct
+ exception Done
+ end in
+
+ let num_nodes = ref 0 in
+ let num_visited = ref 0 in
+ let visited = ref (PTree.map (fun n i -> num_nodes := !num_nodes + 1; false) code)
and bfs_list = ref []
and to_visit = Queue.create ()
and node = ref entrypoint
in begin
Queue.add entrypoint to_visit;
- while not (Queue.is_empty to_visit) do
- node := Queue.pop to_visit;
- if (not (get_some @@ PTree.get !node !visited)) then begin
- visited := PTree.set !node true !visited;
- if not (excluded !node) then begin
- match PTree.get !node code with
- | None -> failwith "No such node"
- | Some i ->
- bfs_list := !node :: !bfs_list;
- if not (predicate !node) then
- let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+ try
+ while not (Queue.is_empty to_visit) do
+ node := Queue.pop to_visit;
+ if (not (get_some @@ PTree.get !node !visited)) then begin
+ visited := PTree.set !node true !visited;
+ num_visited := !num_visited + 1;
+ if not (excluded !node) then begin
+ match PTree.get !node code with
+ | None -> failwith "No such node"
+ | Some i ->
+ bfs_list := !node :: !bfs_list;
+ if !num_visited = !num_nodes then raise_notrace E.Done
+ else begin
+ if not (predicate !node) then
+ let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+ end
+ end
end
- end
- done;
- List.rev !bfs_list
+ done;
+ List.rev !bfs_list
+ with E.Done -> List.rev !bfs_list
end
end
@@ -300,7 +312,9 @@ let rec find_last_node_before_loop code node trace is_loop_header =
(* The computation of sb_final requires to already have branch prediction *)
let get_inner_loops f code is_loop_header =
let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
- fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+ fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint;
+ fn_untrusted_analysis = { answer_to_life_the_universe_and_everything = None;
+ staged_header_dup = None } } in
let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
begin
debug "PREDMAP: "; print_ptree print_intlist predmap;
@@ -928,6 +942,21 @@ let clone code revmap ln = begin
(!code', revmap', ln', fwmap)
end
+let clone_only_new code next_pc ln = begin
+ assert (List.length ln > 0);
+ let len = List.length ln in
+ let ln' = List.init len (fun n -> n + next_pc) in
+ let fwmap = generate_fwmap ln ln' PTree.empty in
+ let revmap' = generate_revmap ln (List.map P.of_int ln') PTree.empty in
+ let code' = ref PTree.empty in
+ List.iter (fun n ->
+ let instr = get_some @@ PTree.get n code in
+ let instr' = change_nexts fwmap instr in
+ code' := PTree.set (apply_map fwmap n) instr' !code'
+ ) ln;
+ (!code', revmap', ln', fwmap, next_pc + len)
+end
+
let rec count_ignore_nops code = function
| [] -> 0
| n::ln ->
@@ -1034,31 +1063,45 @@ let extract_upto_icond f code head =
let rec extract h =
let inst = get_some @@ PTree.get h code in
match inst with
- | Icond _ -> [h]
+ | Icond _ -> Some [h]
| _ -> ( match rtl_successors inst with
- | [n] -> h :: (extract n)
+ | [n] -> begin
+ (* testing if we reached the starting point
+ * (this could happen in the case of an unconditional infinite loop in the source code)
+ *)
+ if n = head then None
+ else match extract n with
+ | Some l -> Some (h :: l)
+ | None -> None
+ end
| _ -> failwith "Found a node with more than one successor??"
)
- in List.rev @@ extract head
+ in
+ match extract head with
+ | None -> None
+ | Some(res) -> Some (List.rev res)
let rotate_inner_loop f code revmap iloop =
- let header = extract_upto_icond f code iloop.head in
- let limit = !Clflags.option_flooprotate in
- let nb_duplicated = count_ignore_nops code header in
- if nb_duplicated > limit then begin
- debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
- (code, revmap)
- end else if nb_duplicated == count_ignore_nops code iloop.body then begin
- debug "The conditional branch is already at the end! No need to rotate.";
- (code, revmap)
- end else
- let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
- let code' = ref code2 in
- let head' = apply_map fwmap iloop.head in
- begin
- code' := change_pointers !code' iloop.head head' iloop.preds;
- (!code', revmap2)
- end
+ let header_opt = extract_upto_icond f code iloop.head in
+ match header_opt with
+ | None -> (code, revmap)
+ | Some header ->
+ let limit = !Clflags.option_flooprotate in
+ let nb_duplicated = count_ignore_nops code header in
+ if nb_duplicated > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else if nb_duplicated == count_ignore_nops code iloop.body then begin
+ debug "The conditional branch is already at the end! No need to rotate.";
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
let rotate_inner_loops f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
@@ -1135,3 +1178,52 @@ let tail_duplicate f =
superblockify_traces code preds is_loop_header traces revmap
else (code, revmap) in
((code, entrypoint), revmap)
+
+(* TODO: copied here to avoid cyclic dependency... *)
+let my_merge_no_overwrite m1 m2 =
+ PTree.combine (fun x y -> match (x, y) with
+ | None, None -> None
+ | Some x, None
+ | None, Some x -> Some x
+ | Some _, Some _ -> failwith "Merge conflict."
+ ) m1 m2
+
+let my_merge_overwrite m1 m2 =
+ PTree.combine (fun x y -> match (x, y) with
+ | None, None -> None
+ | Some x, None
+ | None, Some x -> Some x
+ | Some _, Some y -> Some y
+ ) m1 m2
+
+let lift_if f =
+ (* do nothing for now *)
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ (* TODO predicate this transformation on a compiler flag *)
+ let (code', revmap') =
+ if !Clflags.option_fliftif > 0 then (
+ Clflags.option_fcse3_only_conditions := true;
+ (* lift_if_inner_loops f code revmap *)
+ match f.fn_untrusted_analysis.staged_header_dup with
+ | None -> (code, revmap)
+ | Some(staged_revmap, staged_dupcode) ->
+ let code = my_merge_overwrite code staged_dupcode in
+ let revmap = my_merge_no_overwrite revmap staged_revmap in
+ (code, revmap) )
+ else
+ (code, revmap)
+ in
+
+ let old_debug_flag = !debug_flag in
+ debug_flag := false;
+ debug "Old entrypoint = %d, new entrypoint = %d\n" (P.to_int entrypoint) (P.to_int @@ get_some @@ PTree.get entrypoint revmap');
+ debug "Code before if-lifting:\n"; flush_all ();
+ print_code code; flush_all ();
+ debug "Code after if-lifting:\n"; flush_all ();
+ print_code code'; flush_all ();
+ debug_flag := old_debug_flag;
+
+ (* TODO: may the entrypoint change? *)
+ ((code', entrypoint), revmap') \ No newline at end of file
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
index 7e58eedf..ffab2f59 100644
--- a/backend/Duplicatepasses.v
+++ b/backend/Duplicatepasses.v
@@ -56,3 +56,13 @@ End LoopRotateOracle.
Module Looprotateproof := DuplicateProof LoopRotateOracle.
Module Looprotate := Looprotateproof.
+
+(** Lift If (TODO: this is a working title) *)
+
+Module LiftIfOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux => "Duplicateaux.lift_if".
+End LiftIfOracle.
+
+Module Liftifproof := DuplicateProof LiftIfOracle.
+Module Liftif := Liftifproof.
diff --git a/backend/FirstNop.v b/backend/FirstNop.v
index b3c765e4..f921114b 100644
--- a/backend/FirstNop.v
+++ b/backend/FirstNop.v
@@ -20,7 +20,8 @@ Definition transf_function (f: function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.set start_pc (Inop f.(fn_entrypoint)) f.(fn_code);
- fn_entrypoint := start_pc |}.
+ fn_entrypoint := start_pc;
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
index 1b375532..d42d1d17 100644
--- a/backend/ForwardMoves.v
+++ b/backend/ForwardMoves.v
@@ -335,7 +335,8 @@ Definition transf_function (f: function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map (transf_instr (forward_map f)) f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}.
Definition transf_fundef (fd: fundef) : fundef :=
diff --git a/backend/Inject.v b/backend/Inject.v
index a24fef50..2eb375f7 100644
--- a/backend/Inject.v
+++ b/backend/Inject.v
@@ -123,7 +123,8 @@ Section INJECTOR.
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := snd (inject_l (fn_code f) (Pos.succ max_pc) injections);
- fn_entrypoint := f.(fn_entrypoint) |}
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := (mkuntrustedanalysis None None) |}
else Error (msg "Inject.transf_function: injections at bad locations").
Definition transf_fundef (fd: fundef) : res fundef :=
diff --git a/backend/Inlining.v b/backend/Inlining.v
index 0e18d38e..3e357ab0 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -454,7 +454,8 @@ Definition transf_function (fenv: funenv) (f: function) : Errors.res function :=
(sregs ctx f.(fn_params))
s.(st_stksize)
s.(st_code)
- (spc ctx f.(fn_entrypoint)))
+ (spc ctx f.(fn_entrypoint))
+ (mkuntrustedanalysis None None))
else
Error(msg "Inlining: stack too big").
diff --git a/backend/KillUselessMoves.v b/backend/KillUselessMoves.v
index bdd7ec60..b1224726 100644
--- a/backend/KillUselessMoves.v
+++ b/backend/KillUselessMoves.v
@@ -31,7 +31,8 @@ Definition transf_function (f: function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map transf_instr f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := mkuntrustedanalysis None None |}.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index 5914f6a3..e89db024 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -113,7 +113,7 @@ let flatten_blocks blks =
let cmp_minpc (mpc1, _) (mpc2, _) =
if mpc1 = mpc2 then 0 else if mpc1 > mpc2 then -1 else 1
in
- List.flatten (List.map snd (List.sort cmp_minpc blks))
+ List.flatten (List.map snd (List.stable_sort cmp_minpc (List.rev blks)))
(* Build the enumeration *)
@@ -139,11 +139,16 @@ let super_blocks f joins =
pc is the function entry point
or a join point
or the successor of a conditional test *)
- let rec start_block pc =
+ let rec start_block ?minpc pc =
+ let minpc =
+ match minpc with
+ | None -> pc
+ | Some minpc -> minpc
+ in
let npc = P.to_int pc in
if not (IntSet.mem npc !visited) then begin
visited := IntSet.add npc !visited;
- in_block [] npc pc
+ in_block [] (P.to_int minpc) pc
end
(* in_block: add pc to block and check successors *)
and in_block blk minpc pc =
@@ -159,10 +164,14 @@ let super_blocks f joins =
match pred with
| None -> (end_block blk minpc; start_block ifso; start_block ifnot)
| Some true -> (next_in_block blk minpc ifso; start_block ifnot)
- | Some false -> (next_in_block blk minpc ifnot; start_block ifso)
+ | Some false ->
+ if List.mem ifnot blk then
+ (next_in_block blk minpc ifnot; start_block ~minpc:ifnot ifso)
+ else
+ (next_in_block blk minpc ifnot; start_block ifso)
end
| Ljumptable(arg, tbl) :: _ ->
- end_block blk minpc; List.iter start_block tbl
+ end_block blk minpc; List.iter (start_block ?minpc:None) tbl
| Lreturn :: _ -> end_block blk minpc
| instr :: b' -> do_instr_list b' in
do_instr_list b
diff --git a/backend/Profiling.v b/backend/Profiling.v
index 83e96311..ee34453c 100644
--- a/backend/Profiling.v
+++ b/backend/Profiling.v
@@ -68,7 +68,8 @@ Definition transf_function (f : function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := snd (inject_l (function_id f) (fn_code f) (Pos.succ max_pc) conditions);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := (mkuntrustedanalysis None None) |}.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/ProfilingExploit.v b/backend/ProfilingExploit.v
index 2325f582..6c06df51 100644
--- a/backend/ProfilingExploit.v
+++ b/backend/ProfilingExploit.v
@@ -33,7 +33,8 @@ Definition transf_function (f : function) : function :=
fn_params := f.(fn_params);
fn_stacksize := f.(fn_stacksize);
fn_code := PTree.map (transf_instr (function_id f)) f.(fn_code);
- fn_entrypoint := f.(fn_entrypoint) |}.
+ fn_entrypoint := f.(fn_entrypoint);
+ fn_untrusted_analysis := (mkuntrustedanalysis None None) |}.
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/RTL.v b/backend/RTL.v
index fe350adf..8e313700 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -84,12 +84,18 @@ Inductive instruction: Type :=
Definition code: Type := PTree.t instruction.
+Record untrusted_analysis: Type := mkuntrustedanalysis {
+ answer_to_life_the_universe_and_everything : option Z;
+ staged_header_dup : option (PTree.t positive * PTree.t instruction)
+}.
+
Record function: Type := mkfunction {
fn_sig: signature;
fn_params: list reg;
fn_stacksize: Z;
fn_code: code;
- fn_entrypoint: node
+ fn_entrypoint: node;
+ fn_untrusted_analysis: untrusted_analysis
}.
(** A function description comprises a control-flow graph (CFG) [fn_code]
@@ -388,7 +394,8 @@ Definition transf_function (f: function) : function :=
f.(fn_params)
f.(fn_stacksize)
(PTree.map transf f.(fn_code))
- f.(fn_entrypoint).
+ f.(fn_entrypoint)
+ (mkuntrustedanalysis None None).
End TRANSF.
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index 878e079f..255aff3c 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -109,8 +109,9 @@ Definition tunnel_function (f: RTL.function): res RTL.function :=
(fn_sig f)
(fn_params f)
(fn_stacksize f)
- (PTree.map1 (tunnel_instr td) c)
- (td (fn_entrypoint f)))
+ (PTree.map1 (tunnel_instr td) c) (* rq: `map1` est un map sans le pointeur *)
+ (td (fn_entrypoint f))
+ (mkuntrustedanalysis None None))
else Error (msg "Some node of the union-find is not in the CFG").
Definition tunnel_fundef (f: fundef): res fundef :=
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index 243d7b7c..e65db93d 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -687,7 +687,8 @@ Definition transl_function (f: CminorSel.function) : Errors.res RTL.function :=
rparams
f.(CminorSel.fn_stackspace)
s.(st_code)
- nentry)
+ nentry
+ (mkuntrustedanalysis None None))
end.
Definition transl_fundef := transf_partial_fundef transl_function.
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 0210aa5b..6e027271 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -919,7 +919,8 @@ Inductive tr_function: CminorSel.function -> RTL.function -> Prop :=
rparams
f.(CminorSel.fn_stackspace)
code
- nentry).
+ nentry
+ (mkuntrustedanalysis None None)).
(** * Correctness proof of the translation functions *)
diff --git a/backend/Renumber.v b/backend/Renumber.v
index 2727b979..cd74eea7 100644
--- a/backend/Renumber.v
+++ b/backend/Renumber.v
@@ -71,7 +71,8 @@ Definition transf_function (f: function) : function :=
f.(fn_params)
f.(fn_stacksize)
(renum_cfg pnum f.(fn_code))
- (renum_pc pnum f.(fn_entrypoint)).
+ (renum_pc pnum f.(fn_entrypoint))
+ (mkuntrustedanalysis None None).
Definition transf_fundef (fd: fundef) : fundef :=
AST.transf_fundef transf_function fd.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
index 3ca45c3b..b745faa2 100644
--- a/backend/Splitting.ml
+++ b/backend/Splitting.ml
@@ -183,4 +183,6 @@ let rename_function f =
fn_params = ren_regs before_entrypoint f.fn_params;
fn_stacksize = f.fn_stacksize;
fn_code = PTree.map (ren_instr f maps) f.fn_code;
- fn_entrypoint = f.fn_entrypoint }
+ fn_entrypoint = f.fn_entrypoint;
+ fn_untrusted_analysis = { answer_to_life_the_universe_and_everything = None;
+ staged_header_dup = None } }
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 25bd2c78..33c5964a 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -36,6 +36,7 @@ let option_fcse3_glb = ref true
let option_fcse3_trivial_ops = ref false
let option_fcse3_refine = ref true
let option_fcse3_conditions = ref true
+let option_fcse3_only_conditions = ref false
let option_fredundancy = ref true
@@ -46,6 +47,17 @@ let option_ftracelinearize = ref true (* uses branch prediction information to i
let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *)
let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
+let option_fliftif = ref 0 (* Move and or duplicate code off of a superblock by first introducing a redundant if and then leveraging Duplicate and CSE3. *)
+
+(* move_stroes | no_move_stores
+ * empty if this skip is to be skipped *)
+let option_prepass_past_side_exits = ref false
+(* Pick scheduling heuristic to decide which instructions to copy via if-lifting *)
+let option_prepass_past_side_exits_sched = ref "move_stores"
+
+(* Weak Software Pipelining *)
+let option_fpoormansssa = ref false
+let option_ftargetinnerloops = ref false
(* Scheduling *)
let option_mtune = ref ""
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index 952bed22..34e85b6b 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -299,12 +299,45 @@ EXPAND_ASM_SEMANTICS
EXPAND_RTL_FORWARD_SIMULATIONS
eapply compose_forward_simulations.
eapply RTLpathLivegenproof.transf_program_correct; eassumption.
+
+ (* TODO jf: Fix this work-around *)
pose proof RTLpathLivegenproof.all_fundef_liveness_ok as X.
- refine (modusponens _ _ (X _ _ _) _); eauto. intro.
+ refine (modusponens _ _ (X _ _ _) _). exact M37. intro.
+ eapply compose_forward_simulations.
+ eapply MyRTLpathSchedulerproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply RTLpathproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply Liftifproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply Renumberproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply CSE3proof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply Deadcodeproof.transf_program_correct; eassumption.
+
+ eapply compose_forward_simulations.
+ eapply RTLpathLivegenproof.transf_program_correct; eassumption.
+ pose proof RTLpathLivegenproof.all_fundef_liveness_ok as X'.
+ refine (modusponens _ _ (X' _ _ _) _); eauto. intro.
+
eapply compose_forward_simulations.
eapply RTLpathSchedulerproof.transf_program_correct; eassumption.
eapply compose_forward_simulations.
eapply RTLpathproof.transf_program_correct; eassumption.
+
+ eapply compose_forward_simulations.
+ eapply Renumberproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply CSE3proof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply KillUselessMovesproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply ForwardMovesproof.transf_program_correct; eassumption.
+ eapply compose_forward_simulations.
+ eapply match_if_simulation. eassumption. eapply Deadcodeproof.transf_program_correct; eassumption.
+
+
eapply compose_forward_simulations.
eapply Allocationproof.transf_program_correct; eassumption.
eapply compose_forward_simulations.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 65264124..1a3ef8ef 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -60,6 +60,9 @@ Parameter optim_CSE3_trivial_ops: unit -> bool.
(** Flag -fcse3-conditions. For DMonniaux's common subexpression elimination: remove redundant conditional branches. *)
Parameter optim_CSE3_conditions: unit -> bool.
+(** TODO jf: Flag -fcse3-only-conditions. Remove only redundant conditional branches on first invocation of CSE3. *)
+Parameter optim_CSE3_only_conditions: unit -> bool.
+
(** Flag -fmove-loop-invariants. *)
Parameter optim_move_loop_invariants: unit -> bool.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 3f5a4bd9..83671d90 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -208,6 +208,11 @@ Processing options:
-fmove-loop-invariants Perform loop-invariant code motion [off]
-fredundancy Perform redundancy elimination [on]
-mtune= Type of CPU (for scheduling on some architectures)
+ -fpoor-mans-ssa Perform some register renaming before potential prepass scheduling [on]
+ -flift-if Allow the duplication and therefore removal or downwards scheduling of code in superblocks, the number will eventually be useed to control the increase in code size. Right any number >= 0 is interpreted as true. [0]
+ -fprepass-past-side-exits Allow the scheduling of live instructions past side exits by duplicating code [off]
+ -fprepass-past-side_exits= <heuristic> Allow moving store instructions past side exits (move_stores) or not (no_move_stores) [move_stores]
+ -ftarget-inner-loops Concerns -fpoor-mans-ssa and -fprepass-past-side-exits: only apply the aforementioned optimizations to inner loops spanned by a superblock.
-fprepass Perform prepass scheduling (only on some architectures) [on]
-fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
(<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -426,6 +431,8 @@ let cmdline_actions =
@ f_opt "move-loop-invariants" option_fmove_loop_invariants
@ f_opt "redundancy" option_fredundancy
@ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
+ @ f_opt "poor-mans-ssa" option_fpoormansssa
+ @ f_opt "target-inner-loops" option_ftargetinnerloops
@ f_opt "prepass" option_fprepass
@ f_opt "regpres-wait-window" option_regpres_wait_window
@ f_opt "postpass" option_fpostpass
@@ -434,6 +441,9 @@ let cmdline_actions =
@ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ]
@ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
@ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
+ @ [ Exact "-flift-if", Integer (fun n -> option_fliftif := n) ]
+ @ f_opt "prepass-past-side-exits" option_prepass_past_side_exits
+ @ f_opt_str "prepass-past-side-exits" "move_stores" option_prepass_past_side_exits option_prepass_past_side_exits_sched
@ f_opt "tracelinearize" option_ftracelinearize
@ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched
@ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched
diff --git a/extraction/extraction.vexpand b/extraction/extraction.vexpand
index b61a97d7..02c09c20 100644
--- a/extraction/extraction.vexpand
+++ b/extraction/extraction.vexpand
@@ -133,6 +133,8 @@ Extract Constant Compopts.optim_CSE3_trivial_ops =>
"fun _ -> !Clflags.option_fcse3_trivial_ops".
Extract Constant Compopts.optim_CSE3_conditions =>
"fun _ -> !Clflags.option_fcse3_conditions".
+Extract Constant Compopts.optim_CSE3_only_conditions =>
+ "fun _ -> !Clflags.option_fcse3_only_conditions".
Extract Constant Compopts.optim_move_loop_invariants =>
"fun _ -> !Clflags.option_fmove_loop_invariants".
diff --git a/scheduling/MyRTLpathScheduler.v b/scheduling/MyRTLpathScheduler.v
new file mode 100644
index 00000000..2958bea7
--- /dev/null
+++ b/scheduling/MyRTLpathScheduler.v
@@ -0,0 +1,331 @@
+(** RTLpath Scheduling from an external oracle.
+
+This module is inspired from [Duplicate] and [Duplicateproof]
+
+*)
+
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory RTLpathSE_impl.
+Require RTLpathWFcheck.
+
+Notation "'ASSERT' A 'WITH' MSG 'IN' B" := (if A then B else Error (msg MSG))
+ (at level 200, A at level 100, B at level 200)
+ : error_monad_scope.
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** External oracle returning the new RTLpath function and a mapping of new path_entries to old path_entries
+
+NB: the new RTLpath function is generated from the fn_code, the fn_entrypoint and the fn_path
+It requires to check that the path structure is wf !
+
+*)
+
+(* Returns: new code, new entrypoint, new pathmap, revmap
+ * Indeed, the entrypoint might not be the same if the entrypoint node is moved further down
+ * a path ; same reasoning for the pathmap *)
+Axiom untrusted_scheduler: RTLpath.function -> code * node * path_map * (PTree.t node) * option (PTree.t positive * PTree.t instruction).
+
+Extract Constant untrusted_scheduler => "MyRTLpathScheduleraux.scheduler".
+
+Program Definition function_builder (tfr: RTL.function) (tpm: path_map) :
+ { r : res RTLpath.function | forall f', r = OK f' -> fn_RTL f' = tfr} :=
+ match RTLpathWFcheck.function_checker tfr tpm with
+ | false => Error (msg "In function_builder: (tfr, tpm) is not wellformed")
+ | true => OK {| fn_RTL := tfr; fn_path := tpm |}
+ end.
+Next Obligation.
+ apply RTLpathWFcheck.function_checker_path_entry. auto.
+Defined. Next Obligation.
+ apply RTLpathWFcheck.function_checker_wellformed_path_map. auto.
+Defined.
+
+Definition entrypoint_check (dm: PTree.t node) (fr tfr: RTL.function) : res unit :=
+ match dm ! (fn_entrypoint tfr) with
+ | None => Error (msg "No mapping for (entrypoint tfr)")
+ | Some etp => if (Pos.eq_dec (fn_entrypoint fr) etp) then OK tt
+ else Error (msg "Entrypoints do not match")
+ end.
+
+Lemma entrypoint_check_correct fr tfr dm:
+ entrypoint_check dm fr tfr = OK tt ->
+ dm ! (fn_entrypoint tfr) = Some (fn_entrypoint fr).
+Proof.
+ unfold entrypoint_check. explore; try discriminate. congruence.
+Qed.
+
+Definition path_entry_check_single (pm tpm: path_map) (m: node * node) :=
+ let (pc2, pc1) := m in
+ match (tpm ! pc2) with
+ | None => Error (msg "pc2 isn't an entry of tpm")
+ | Some _ =>
+ match (pm ! pc1) with
+ | None => Error (msg "pc1 isn't an entry of pm")
+ | Some _ => OK tt
+ end
+ end.
+
+Lemma path_entry_check_single_correct pm tpm pc1 pc2:
+ path_entry_check_single pm tpm (pc2, pc1) = OK tt ->
+ path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+ unfold path_entry_check_single. intro. explore.
+ constructor; congruence.
+Qed.
+
+(* Inspired from Duplicate.verify_mapping_rec *)
+Fixpoint path_entry_check_rec (pm tpm: path_map) lm :=
+ match lm with
+ | nil => OK tt
+ | m :: lm => do u1 <- path_entry_check_single pm tpm m;
+ do u2 <- path_entry_check_rec pm tpm lm;
+ OK tt
+ end.
+
+Lemma path_entry_check_rec_correct pm tpm pc1 pc2: forall lm,
+ path_entry_check_rec pm tpm lm = OK tt ->
+ In (pc2, pc1) lm ->
+ path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+ induction lm.
+ - simpl. intuition.
+ - simpl. intros. explore. destruct H0.
+ + subst. eapply path_entry_check_single_correct; eauto.
+ + eapply IHlm; assumption.
+Qed.
+
+Definition path_entry_check (dm: PTree.t node) (pm tpm: path_map) := path_entry_check_rec pm tpm (PTree.elements dm).
+
+Lemma path_entry_check_correct dm pm tpm:
+ path_entry_check dm pm tpm = OK tt ->
+ forall pc1 pc2, dm ! pc2 = Some pc1 ->
+ path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+ unfold path_entry_check. intros. eapply PTree.elements_correct in H0.
+ eapply path_entry_check_rec_correct; eassumption.
+Qed.
+
+Definition function_equiv_checker (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) : res unit :=
+ let pm := fn_path f in
+ let fr := fn_RTL f in
+ let tpm := fn_path tf in
+ let tfr := fn_RTL tf in
+ do _ <- entrypoint_check dm fr tfr;
+ do _ <- path_entry_check dm pm tpm;
+ do _ <- simu_check dm f tf;
+ OK tt.
+
+Lemma function_equiv_checker_entrypoint f tf dm:
+ function_equiv_checker dm f tf = OK tt ->
+ dm ! (fn_entrypoint tf) = Some (fn_entrypoint f).
+Proof.
+ unfold function_equiv_checker. intros. explore.
+ eapply entrypoint_check_correct; eauto.
+Qed.
+
+Lemma function_equiv_checker_pathentry1 f tf dm:
+ function_equiv_checker dm f tf = OK tt ->
+ forall pc1 pc2, dm ! pc2 = Some pc1 ->
+ path_entry (fn_path tf) pc2.
+Proof.
+ unfold function_equiv_checker. intros. explore.
+ exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_pathentry2 f tf dm:
+ function_equiv_checker dm f tf = OK tt ->
+ forall pc1 pc2, dm ! pc2 = Some pc1 ->
+ path_entry (fn_path f) pc1.
+Proof.
+ unfold function_equiv_checker. intros. explore.
+ exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_correct f tf dm:
+ function_equiv_checker dm f tf = OK tt ->
+ forall pc1 pc2, dm ! pc2 = Some pc1 ->
+ sexec_simu dm f tf pc1 pc2.
+Proof.
+ unfold function_equiv_checker. intros. explore.
+ eapply simu_check_correct; eauto.
+Qed.
+
+
+Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (PTree.t node)) :=
+ let (tctetpmdm, staged_liftif) := untrusted_scheduler f in
+ let (tctetpm, dm) := tctetpmdm in
+ let (tcte, tpm) := tctetpm in
+ let (tc, te) := tcte in
+ let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te (RTL.mkuntrustedanalysis None staged_liftif) in
+ do tf <- proj1_sig (function_builder tfr tpm);
+ do tt <- function_equiv_checker dm f tf;
+ OK (tf, dm).
+
+Theorem verified_scheduler_correct f tf dm:
+ verified_scheduler f = OK (tf, dm) ->
+ fn_sig f = fn_sig tf
+ /\ fn_params f = fn_params tf
+ /\ fn_stacksize f = fn_stacksize tf
+ /\ dm ! (fn_entrypoint tf) = Some (fn_entrypoint f)
+ /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path f) pc1)
+ /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path tf) pc2)
+ /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2)
+.
+Proof.
+ intros VERIF. unfold verified_scheduler in VERIF. explore.
+ Local Hint Resolve function_equiv_checker_entrypoint
+ function_equiv_checker_pathentry1 function_equiv_checker_pathentry2
+ function_equiv_checker_correct: core.
+ destruct (function_builder _ _) as [res H]; simpl in * |- *; auto.
+ apply H in EQ3. rewrite EQ3. simpl.
+ repeat (constructor; eauto).
+ exploit function_equiv_checker_entrypoint. eapply EQ5. rewrite EQ3. intuition.
+Qed.
+
+Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := {
+ preserv_fnsig: fn_sig f1 = fn_sig f2;
+ preserv_fnparams: fn_params f1 = fn_params f2;
+ preserv_fnstacksize: fn_stacksize f1 = fn_stacksize f2;
+ preserv_entrypoint: dupmap!(f2.(fn_entrypoint)) = Some f1.(fn_entrypoint);
+ dupmap_path_entry1: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f1) pc1;
+ dupmap_path_entry2: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f2) pc2;
+ dupmap_correct: forall pc1 pc2, dupmap!pc2 = Some pc1 -> sexec_simu dupmap f1 f2 pc1 pc2;
+}.
+
+Program Definition transf_function (f: RTLpath.function):
+ { r : res RTLpath.function | forall f', r = OK f' -> exists dm, match_function dm f f'} :=
+ match (verified_scheduler f) with
+ | Error e => Error e
+ | OK (tf, dm) => OK tf
+ end.
+Next Obligation.
+ exploit verified_scheduler_correct; eauto.
+ intros (A & B & C & D & E & F & G (* & H *)).
+ exists dm. econstructor; eauto.
+Defined.
+
+Theorem match_function_preserves f f' dm:
+ match_function dm f f' ->
+ fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+ intros.
+ destruct H as [SIG PARAM SIZE ENTRY CORRECT].
+ intuition.
+Qed.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+ transf_partial_fundef (fun f => proj1_sig (transf_function f)) f.
+
+Definition transf_program (p: program) : res program :=
+ transform_partial_program transf_fundef p.
+
+(** * Preservation proof *)
+
+Local Notation ext alive := (fun r => Regset.In r alive).
+
+Inductive match_fundef: RTLpath.fundef -> RTLpath.fundef -> Prop :=
+ | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+ | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+ | match_stackframe_intro dupmap res f sp pc rs1 rs2 f' pc' path
+ (TRANSF: match_function dupmap f f')
+ (DUPLIC: dupmap!pc' = Some pc)
+ (LIVE: liveness_ok_function f)
+ (PATH: f.(fn_path)!pc = Some path)
+ (EQUIV: forall v, eqlive_reg (ext path.(input_regs)) (rs1 # res <- v) (rs2 # res <- v)):
+ match_stackframes (Stackframe res f sp pc rs1) (Stackframe res f' sp pc' rs2).
+
+Inductive match_states: state -> state -> Prop :=
+ | match_states_intro dupmap st f sp pc rs1 rs2 m st' f' pc' path
+ (STACKS: list_forall2 match_stackframes st st')
+ (TRANSF: match_function dupmap f f')
+ (DUPLIC: dupmap!pc' = Some pc)
+ (LIVE: liveness_ok_function f)
+ (PATH: f.(fn_path)!pc = Some path)
+ (EQUIV: eqlive_reg (ext path.(input_regs)) rs1 rs2):
+ match_states (State st f sp pc rs1 m) (State st' f' sp pc' rs2 m)
+ | match_states_call st st' f f' args m
+ (STACKS: list_forall2 match_stackframes st st')
+ (TRANSF: match_fundef f f')
+ (LIVE: liveness_ok_fundef f):
+ match_states (Callstate st f args m) (Callstate st' f' args m)
+ | match_states_return st st' v m
+ (STACKS: list_forall2 match_stackframes st st'):
+ match_states (Returnstate st v m) (Returnstate st' v m).
+
+Lemma match_stackframes_equiv stf1 stf2 stf3:
+ match_stackframes stf1 stf2 -> equiv_stackframe stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+ destruct 1; intros EQ; inv EQ; try econstructor; eauto.
+ intros; eapply eqlive_reg_trans; eauto.
+ rewrite eqlive_reg_triv in * |-.
+ eapply eqlive_reg_update.
+ eapply eqlive_reg_monotonic; eauto.
+ simpl; auto.
+Qed.
+
+Lemma match_stack_equiv stk1 stk2:
+ list_forall2 match_stackframes stk1 stk2 ->
+ forall stk3, list_forall2 equiv_stackframe stk2 stk3 ->
+ list_forall2 match_stackframes stk1 stk3.
+Proof.
+ Local Hint Resolve match_stackframes_equiv: core.
+ induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
+Qed.
+
+Lemma match_states_equiv s1 s2 s3: match_states s1 s2 -> equiv_state s2 s3 -> match_states s1 s3.
+Proof.
+ Local Hint Resolve match_stack_equiv: core.
+ destruct 1; intros EQ; inv EQ; econstructor; eauto.
+ intros; eapply eqlive_reg_triv_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stackframes stf1 stf2 stf3:
+ eqlive_stackframes stf1 stf2 -> match_stackframes stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+ destruct 1; intros MS; inv MS; try econstructor; eauto.
+ try_simplify_someHyps. intros; eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stack stk1 stk2:
+ list_forall2 eqlive_stackframes stk1 stk2 ->
+ forall stk3, list_forall2 match_stackframes stk2 stk3 ->
+ list_forall2 match_stackframes stk1 stk3.
+Proof.
+ induction 1; intros stk3 MS; inv MS; econstructor; eauto.
+ eapply eqlive_match_stackframes; eauto.
+Qed.
+
+Lemma eqlive_match_states s1 s2 s3: eqlive_states s1 s2 -> match_states s2 s3 -> match_states s1 s3.
+Proof.
+ Local Hint Resolve eqlive_match_stack: core.
+ destruct 1; intros MS; inv MS; try_simplify_someHyps; econstructor; eauto.
+ eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_stackframes_refl stf1 stf2: match_stackframes stf1 stf2 -> eqlive_stackframes stf1 stf1.
+Proof.
+ destruct 1; econstructor; eauto.
+ intros; eapply eqlive_reg_refl; eauto.
+Qed.
+
+Lemma eqlive_stacks_refl stk1 stk2:
+ list_forall2 match_stackframes stk1 stk2 -> list_forall2 eqlive_stackframes stk1 stk1.
+Proof.
+ induction 1; simpl; econstructor; eauto.
+ eapply eqlive_stackframes_refl; eauto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+ transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+ intros TRANSF; destruct f; simpl; monadInv TRANSF.
+ + destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+ destruct (H _ EQ).
+ intuition subst; auto.
+ eapply match_Internal; eauto.
+ + eapply match_External.
+Qed.
diff --git a/scheduling/MyRTLpathScheduleraux.ml b/scheduling/MyRTLpathScheduleraux.ml
new file mode 100644
index 00000000..0375d26c
--- /dev/null
+++ b/scheduling/MyRTLpathScheduleraux.ml
@@ -0,0 +1,1619 @@
+open DebugPrint
+open RTLpathLivegenaux
+open RTLpath
+open RTLpathCommon
+open RTL
+open Maps
+open Registers
+
+let print_superblock sb code =
+ let insts = sb.instructions in
+ let li = sb.liveins in
+ let outs = sb.s_output_regs in
+ begin
+ debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
+ debug " liveins = "; print_ptree_regset li; debug "\n";
+ debug " output_regs = "; print_regset outs; debug "}"
+ end
+
+let print_superblocks lsb code =
+ let rec f = function
+ | [] -> ()
+ | sb :: lsb -> (print_superblock sb code; debug ",\n"; f lsb)
+ in begin
+ debug "[\n";
+ f lsb;
+ debug "]"
+ end
+
+let get_superblocks = RTLpathScheduleraux.get_superblocks
+
+let get_ok = RTLpathScheduleraux.get_ok
+
+let apply_map' fw n = Duplicateaux.ptree_get_some n fw
+
+let apply_map_opt fw n =
+ match PTree.get n fw with
+ | Some n' -> n'
+ | None -> n
+
+let change_arg_regs inst fwmap =
+ let open Datatypes in
+ match inst with
+ | Icall (sgn, fn, args, dest, succ) ->
+ let fn' =
+ ( match fn with
+ | Coq_inl r -> Datatypes.Coq_inl (apply_map_opt fwmap r)
+ | Coq_inr _ as ident -> (* function name *) ident )
+ in
+ let args' = List.map (apply_map_opt fwmap) args in
+ Icall (sgn, fn', args', dest, succ)
+ | Ibuiltin (ef, args, dest, succ) ->
+ let args' = List.map (AST.map_builtin_arg (apply_map_opt fwmap)) args in
+ Ibuiltin (ef, args', dest, succ)
+ | Ijumptable (arg, tbl) -> Ijumptable (apply_map_opt fwmap arg, tbl)
+ | Itailcall (sgn, fn, args) ->
+ let fn' =
+ ( match fn with
+ | Coq_inl r -> Datatypes.Coq_inl (apply_map_opt fwmap r)
+ | Coq_inr _ as ident -> (* function name *) ident )
+ in
+ let args' = List.map (apply_map_opt fwmap) args in
+ Itailcall (sgn, fn', args')
+ | Ireturn (reg_opt) ->
+ ( match reg_opt with
+ | None -> Ireturn None
+ | Some(reg) -> Ireturn (Some (apply_map_opt fwmap reg)) )
+ | Icond (a, b, n1, n2, i) -> Icond (a, List.map (apply_map_opt fwmap) b, n1, n2, i)
+ | Inop n -> Inop n
+ | Iop (a, b, c, n) -> Iop (a, List.map (apply_map_opt fwmap) b, c, n)
+ | Iload (a, b, c, d, e, n) ->
+ Iload (a, b, c, List.map (apply_map_opt fwmap) d, e, n)
+ | Istore (a, b, c, d, n) -> Istore (a, b, List.map (apply_map_opt fwmap) c, apply_map_opt fwmap d, n)
+
+let change_dest_reg inst fwmap =
+ match inst with
+ | Inop _
+ | Istore _
+ | Itailcall _
+ (* XXX builtin is a special case?! *)
+ | Ibuiltin _
+ | Icond _
+ | Ijumptable _
+ | Ireturn _
+ | Icall _ -> failwith "unexpectedly asked to change dest reg"
+ | Iop(op, args, res, s) -> Iop(op, args, (apply_map' fwmap res), s)
+ | Iload(trap, chunk, addr, args, dst, s) -> Iload(trap, chunk, addr, args, (apply_map' fwmap dst), s)
+
+let maybe_change_dest_reg ?only_rename inst fwmap ~next_free_reg =
+ let do_nothing = (inst, fwmap, next_free_reg) in
+ match inst with
+ | Icall _
+ | Ibuiltin _
+ | Ijumptable _
+ | Itailcall _
+ | Ireturn _ ->
+ (* Do not rename registers if the instructions MUST end a path because we cannot add
+ * restoration code afterwards. *)
+ do_nothing
+ | _ as i ->
+ match RTL.instr_defs i with
+ | None -> do_nothing
+ | Some r ->
+ if Option.is_some only_rename && not (Regset.mem r (get_some only_rename)) then
+ do_nothing
+ else
+ (match PTree.get r fwmap with
+ | None -> (i, PTree.set r r fwmap, next_free_reg)
+ | Some _previous_name ->
+ let new_name = next_free_reg in
+ let fwmap = PTree.set r new_name fwmap in
+ (change_dest_reg i fwmap, fwmap, Camlcoq.P.succ next_free_reg) )
+
+let ptree_get_or_default ptree key default =
+ match PTree.get key ptree with
+ | None -> default
+ | Some value -> value
+
+let is_icond = function
+ | Icond _ -> true
+ | _ -> false
+
+let side_exit_idxs sb code =
+ Array.to_list sb.instructions
+ |> List.map (fun pc -> get_some @@ PTree.get pc code)
+ |> List.mapi (fun i inst -> (i, inst))
+ |> List.filter (fun (_i, inst) -> is_icond inst
+ && RTLpathLivegenaux.predicted_successor inst
+ |> Option.is_some)
+ |> List.map (fun (i, _inst) -> i)
+
+let side_exit_pcs sb code =
+ side_exit_idxs sb code |> List.map (fun i -> sb.instructions.(i))
+
+module InsertPosition = struct
+ type t =
+ | Above of Camlcoq.P.t
+ | Below of Camlcoq.P.t
+ let anchor = function
+ | Above x | Below x -> x
+
+ let pseudo_map pos ~f = match pos with
+ | Above x -> Above (f x)
+ | Below x -> Below (f x)
+ let compare x y =
+ match Camlcoq.P.compare (anchor x) (anchor y) with
+ | 0 -> (match x, y with
+ | Above _, Above _ | Below _, Below _ -> 0
+ | Above _, Below _ -> 1
+ | Below _, Above _ -> -1 )
+ | c -> c
+end
+
+module InsertPositionMap = Map.Make(InsertPosition)
+
+ let insert_code sb code pm (to_insert : RTL.instruction list InsertPositionMap.t) ~next_free_pc =
+ let old_debug_flag = !debug_flag in
+ debug_flag := false;
+ debug "Before code insertion:\n";
+ print_superblock sb code;
+ debug "\n"; flush_all ();
+
+ debug "Code to insert:\n";
+ InsertPositionMap.iter
+ (fun pos insts ->
+ debug "%s %d: "
+ (match pos with
+ | InsertPosition.Above _ -> "Above"
+ | InsertPosition.Below _ -> "Below")
+ (Camlcoq.P.to_int @@ InsertPosition.anchor pos)
+ ;
+ List.iter (fun inst ->
+ if !debug_flag then PrintRTL.print_instruction stdout (0, inst)) insts;
+ debug "\n"; flush_all ()
+ )
+ to_insert
+ ;
+
+ let next_free_pc =
+ let next_free_pc = ref next_free_pc in
+ (fun () ->
+ let pc = !next_free_pc in
+ next_free_pc := Camlcoq.P.succ !next_free_pc;
+ pc )
+ in
+ let original_length = Array.length sb.instructions in
+ let orig_sort_keys =
+ Duplicateaux.generate_fwmap
+ (Array.to_list sb.instructions)
+ (List.init original_length (fun i -> i * 2))
+ PTree.empty
+ in
+ let new_key pos =
+ let open InsertPosition in
+ let anchor_key =
+ InsertPosition.anchor pos
+ |> apply_map' orig_sort_keys
+ in
+ match pos with
+ | Above _ -> anchor_key - 1
+ | Below _ -> anchor_key + 1
+ in
+ let (code, pc_lists, sort_keys) =
+ InsertPositionMap.fold
+ (fun (pos : InsertPosition.t) insts (code, pc_lists, sort_keys) ->
+ let insts_length = List.length insts in
+ let key = new_key pos in
+ let pcs = List.init insts_length (fun _ -> next_free_pc ()) in
+ let new_sort_keys = List.init insts_length (fun _ -> key) in
+ let code =
+ ListLabels.fold_left2 pcs insts
+ ~init:code
+ ~f:(fun code pc inst -> PTree.set pc inst code)
+ in
+ let sort_keys = Duplicateaux.generate_fwmap pcs new_sort_keys sort_keys in
+ (code, pcs::pc_lists, sort_keys) )
+ to_insert
+ (code, [], orig_sort_keys)
+ in
+ let new_pcs = List.flatten pc_lists |> Array.of_list in
+ let last_instruction = [| sb.instructions.(original_length - 1) |] in
+ let upto_last =
+ if original_length > 1 then
+ Array.sub sb.instructions 0 (original_length - 1)
+ else [| |]
+ in
+ let instructions = Array.concat [upto_last; new_pcs; last_instruction ] in
+ let instructions_order = Array.copy instructions in
+ ArrayLabels.stable_sort instructions_order
+ ~cmp:(fun x y -> Int.compare (apply_map' sort_keys x) (apply_map' sort_keys y));
+
+ let new_length = Array.length instructions in
+ let fwmap = Duplicateaux.generate_fwmap (Array.to_list instructions_order) (Array.init new_length (fun i -> i) |> Array.to_list) PTree.empty in
+ let fwmap_pc =
+ Duplicateaux.generate_fwmap
+ (Array.to_list sb.instructions)
+ (Array.to_list @@ Array.map (fun pc -> instructions.(apply_map' fwmap pc)) sb.instructions)
+ PTree.empty
+ in
+ let liveins' =
+ PTree.fold
+ (fun liveins' pc live_regs ->
+ PTree.set (apply_map' fwmap_pc pc) live_regs liveins')
+ sb.liveins PTree.empty
+ in
+
+ let sb' = {sb with instructions = instructions; liveins = liveins'} in
+ let code = RTLpathScheduleraux.apply_schedule code sb' instructions_order in
+
+ let num_added = new_length - original_length in
+ let first_pc = sb.instructions.(0) in
+ let pi = get_some @@ PTree.get first_pc pm in
+ let module N = Camlcoq.Nat in
+ let new_size = N.to_int pi.psize + num_added |> N.of_int in
+ let pm = PTree.set first_pc {pi with psize = new_size} pm in
+
+ debug_flag := old_debug_flag;
+ (sb', code, pm, next_free_pc (), fwmap_pc)
+
+let prepend_nops_before_iconds sb code =
+ (* We need the a first and last instruction so that
+ * a) the pc of the superblock entry stays the same and
+ * b) apply_schedule correclty preserved the successors of the last instruction *)
+ if Array.length sb.instructions < 2 then
+ (* Early exit, this should probably be replaced by a more general exclusion of
+ * superblock with just one instruction. *)
+ InsertPositionMap.empty
+ else
+
+ (* TODO, probably only need it before side exits *)
+ let icond_pcs =
+ Array.to_list sb.instructions
+ |> List.filter (fun pc ->
+ let inst = get_some @@ PTree.get pc code in
+ is_icond inst )
+ in
+ let to_insert =
+ ListLabels.fold_left icond_pcs
+ ~init:InsertPositionMap.empty
+ ~f:(fun acc icond_pc -> InsertPositionMap.add (InsertPosition.Above icond_pc) [Inop Camlcoq.P.one] acc)
+ in
+ to_insert
+
+type renamed =
+ { old_name : reg
+ ; new_name : reg }
+
+let update_live_renames pc live_renames fwmap regs =
+ Regset.fold
+ (fun live_reg renames ->
+ match PTree.get live_reg fwmap with
+ | None -> renames
+ | Some(r) when r = live_reg -> renames
+ | Some(new_name) ->
+ let old = ptree_get_or_default renames pc [] in
+ let upd = {old_name = live_reg; new_name} :: old in
+ PTree.set pc upd renames )
+ regs
+ live_renames
+
+let my_merge_overwrite m1 m2 =
+ PTree.combine (fun x y -> match (x, y) with
+ | None, None -> None
+ | Some x, None
+ | None, Some x -> Some x
+ | Some _, Some y -> Some y
+ ) m1 m2
+
+let rename_regs ?only_rename sb code ~liveatentry ~next_free_reg =
+ let old_debug_flag = !debug_flag in
+
+ let length = Array.length sb.instructions in
+ assert (length > 0);
+ (* Early exit *)
+ if length = 1 then (code, PTree.empty, next_free_reg) else
+ (* The last instruction is treated in a special way because if it defines a register,
+ * that register cannot possibly be used afterwards in the path AND often we cannot
+ * insert restoration code later after it since it must remain at the end of the path.
+ * In the future, this may be resolved by only renaming registers which are used
+ * afterwards in path, which would exclude the register possibly assigned by the last
+ * instruction. *)
+ let last_pc = sb.instructions.(length - 1) in
+ let upto_last = Array.init (length - 1) (fun i -> sb.instructions.(i)) in
+ let liveatentry = Regset.elements liveatentry in
+ let fwmap = Duplicateaux.generate_fwmap liveatentry liveatentry PTree.empty in
+
+ let (code, fwmap, live_renames, next_free_reg) =
+ ArrayLabels.fold_left upto_last
+ ~init:(code, fwmap, PTree.empty, next_free_reg)
+ ~f:(fun (code, fwmap, live_renames, next_free_reg) pc ->
+ (* Rewrite instruction to use potentially renamed registers *)
+ let inst = get_some @@ PTree.get pc code in
+ let inst = change_arg_regs inst fwmap in
+ let (inst, fwmap, next_free_reg) =
+ maybe_change_dest_reg ?only_rename inst fwmap ~next_free_reg
+ in
+ let code = PTree.set pc inst code in
+
+ let (live_renames, fwmap)=
+ if is_icond inst then (
+ (* Pretend that registers that are live at an exit already have a definition, so
+ * this catches a couple of edge cases where an instruction was not renamed and
+ * could thus not be moved up. *)
+ let live_regs = Regset.elements @@ get_some @@ PTree.get pc sb.liveins in
+ let fwmap' = Duplicateaux.generate_fwmap live_regs live_regs PTree.empty in
+ let fwmap = my_merge_overwrite fwmap' fwmap in
+ (update_live_renames pc live_renames fwmap (get_some @@ PTree.get pc sb.liveins)
+ , fwmap)
+ ) else
+ (live_renames, fwmap)
+ in
+ (code, fwmap, live_renames, next_free_reg)
+ )
+ in
+
+ let last_inst = get_some @@ PTree.get last_pc code in
+ let last_inst = change_arg_regs last_inst fwmap in
+ let (last_inst, fwmap, next_free_reg) =
+ maybe_change_dest_reg ?only_rename last_inst fwmap ~next_free_reg
+ in
+ let code = PTree.set last_pc last_inst code in
+
+ let live_renames = update_live_renames last_pc live_renames fwmap sb.s_output_regs in
+
+ debug_flag := old_debug_flag;
+
+ (code, live_renames, next_free_reg)
+
+(* Pass over the superblock instruction in-order
+ * For each redefinition of a register, create a new register name and use that one from
+ * then on. There is one exception, if the last instruction defines a register, it will be
+ * left unchanged since the rest of the path cannot possibly use it.
+ * WARNING: This invalidates the superblock, it will need to be repaired with the
+ * information returned. *)
+let local_single_assignment sb code liveatentry ~next_free_reg =
+ let old_debug_flag = !debug_flag in
+
+ let (code, live_renames, next_free_reg) =
+ rename_regs sb code ~liveatentry ~next_free_reg
+ in
+
+ debug_flag := old_debug_flag;
+ (code, live_renames, next_free_reg)
+
+let final_restoration_code sb code live_renames =
+ let last_inst_pc = sb.instructions.(Array.length sb.instructions - 1) in
+ let last_inst = get_some @@ PTree.get last_inst_pc code in
+ let last_inst_is_basic = match last_inst with
+ | Icall _
+ | Ibuiltin _
+ | Ijumptable _
+ | Itailcall _
+ | Ireturn _
+ | Icond _ -> false
+ | _ -> true
+ in
+ let final_renames = ptree_get_or_default live_renames last_inst_pc [] in
+ let live_regs_opt = PTree.get last_inst_pc sb.liveins in
+ let (above, below) = let open Either in
+ ListLabels.partition_map final_renames
+ ~f:(fun {old_name; new_name} ->
+ let inst = Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one) in
+ match last_inst_is_basic, live_regs_opt with
+ | true, _ ->
+ (* Printf.eprintf "Putting %d below basic inst\n" (Camlcoq.P.to_int old_name); *)
+ Right inst
+ | false, None ->
+ (* Printf.eprintf "Putting %d above unpredicted icond.\n" (Camlcoq.P.to_int old_name); *)
+ Left inst
+ | false, Some live_regs ->
+ if Regset.mem old_name live_regs then (
+ (* Printf.eprintf "Putting %d above icond\n" (Camlcoq.P.to_int old_name); *)
+ Left inst
+ ) else (
+ (* Printf.eprintf "Putting %d below icond\n" (Camlcoq.P.to_int old_name); *)
+ Right inst))
+ in
+ InsertPositionMap.empty
+ |> InsertPositionMap.add (InsertPosition.Above last_inst_pc) above
+ |> InsertPositionMap.add (InsertPosition.Below last_inst_pc) below
+
+let used_before_redefinition sb code ~offset ~reg =
+ let length = Array.length sb.instructions in
+ if not (offset < length) then
+ raise (Invalid_argument (Printf.sprintf "offset must be less than the superblock's length: %d is not less than %d" offset length))
+ ;
+ let finished = ref false in
+ let i = ref offset in
+ let res = ref false in
+ while (!i < length && not !finished) do
+ let inst = get_some @@ PTree.get sb.instructions.(!i) code in
+ if List.mem reg (RTL.instr_uses inst) then (
+ res := true;
+ finished := true;
+ ) else
+ ()
+ ;
+ let defined_reg = RTL.instr_defs inst in
+ (match defined_reg with
+ | None -> ()
+ | Some r -> if r = reg then finished := true else ());
+ i := !i + 1;
+ done;
+ !res
+
+type restoration_actions =
+ | Just_restore of renamed
+ | Restore_and_alias of renamed
+
+let restoration_instructions' sb code live_renames ~next_free_reg =
+ let next_free_reg =
+ let next_free_reg = ref next_free_reg in
+ (fun () ->
+ let r = !next_free_reg in
+ next_free_reg := Camlcoq.P.succ !next_free_reg;
+ r )
+ in
+
+ let length = Array.length sb.instructions in
+ let pc_to_idx =
+ Duplicateaux.generate_fwmap
+ (Array.to_list sb.instructions)
+ (List.init length (fun i -> i))
+ PTree.empty
+ in
+
+ let live_renames = PTree.map
+ (fun pc renames ->
+ let offset = apply_map' pc_to_idx pc in
+ List.map
+ (fun rename ->
+ let {old_name; new_name} = rename in
+ if used_before_redefinition sb code ~offset ~reg:old_name then
+ Restore_and_alias rename
+ else
+ Just_restore rename)
+ renames)
+ live_renames
+ in
+
+ let to_rename = PTree.map1
+ (fun renames ->
+ let old_names = List.filter_map (fun rename ->
+ match rename with
+ | Just_restore _ -> None
+ | Restore_and_alias {old_name; new_name} -> Some old_name) renames
+ in
+ let aliases = List.init (List.length old_names) (fun _ -> next_free_reg ()) in
+ let fwmap = Duplicateaux.generate_fwmap old_names aliases PTree.empty in
+ fwmap
+ )
+ live_renames
+ in
+
+ let to_insert_restore =
+ PTree.fold
+ (fun to_insert side_exit_pc renames ->
+ let alias_map = get_some @@ PTree.get side_exit_pc to_rename in
+ let insts = ListLabels.map renames
+ ~f:(fun rename ->
+ match rename with
+ | Just_restore {old_name; new_name} -> [Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one)]
+ | Restore_and_alias {old_name; new_name} ->
+ [ Iop (Op.Omove, [old_name], apply_map' alias_map old_name, Camlcoq.P.one)
+ ; Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one)] )
+ in
+ let insts = List.flatten insts in
+ InsertPositionMap.add (InsertPosition.Above side_exit_pc) insts to_insert)
+ live_renames
+ InsertPositionMap.empty
+ in
+ (to_insert_restore, to_rename, next_free_reg ())
+
+(* Assumes the path is in local single assignment form *)
+let intra_path_dependencies (sb : superblock) (code : code) =
+ let old_debug_flag = !debug_flag in
+ debug_flag := false;
+
+ (* Directly taken from RTLpathScheduleraux *)
+ let nr_instr = Array.length sb.instructions in
+ let trailer_length =
+ match PTree.get (sb.instructions.(nr_instr-1)) code with
+ | None -> 0
+ | Some ii ->
+ (match predicted_successor ii with
+ | Some _ -> 0
+ | None -> 1 )
+ in
+
+ let module IS = InstructionScheduler in
+ let opweights = OpWeights.get_opweights () in
+ let seqa =
+ ArrayLabels.map (Array.sub sb.instructions 0 (Array.length sb.instructions - trailer_length))
+ ~f:(fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "MyRTLpathScheduleraux.intra_path_dependencies"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty) )
+ in
+ let latency_constraints = PrepassSchedulingOracle.get_simple_dependencies opweights seqa in
+ debug "intra_path_dependencies for superblock:\n";
+ print_superblock sb code;
+ debug "\nlatency_constraints:\n\n";
+ ListLabels.iter latency_constraints
+ ~f:(fun {IS.instr_from; instr_to; latency} ->
+ debug "instr_from: %2d\ninstr_to: %4d\nlatency: %3d\n\n" instr_from instr_to latency );
+ flush_all ();
+
+ let deps =
+ ListLabels.fold_left latency_constraints
+ ~init:PTree.empty
+ ~f:(fun deps {IS.instr_from; instr_to; latency = _} ->
+ let to_pc = sb.instructions.(instr_to) in
+ let from_pc = sb.instructions.(instr_from) in
+ let inst_deps = ptree_get_or_default deps to_pc HashedSet.PSet.empty in
+ let inst_deps' = HashedSet.PSet.add from_pc inst_deps in
+ PTree.set to_pc inst_deps' deps )
+ in
+ debug_flag := old_debug_flag;
+ deps
+
+let transitive_dependencies deps pc =
+ let old_debug_flag = !debug_flag in
+ let immediate_deps = ptree_get_or_default deps pc HashedSet.PSet.empty in
+
+ let rec iter ~acc ~todo ~seen =
+ match todo with
+ | [] -> acc
+ | pc'::todo -> (* NB: todo has just shrunk *)
+ if HashedSet.PSet.contains seen pc' then
+ iter ~acc ~todo ~seen
+ else
+ let deps_of_dep = ptree_get_or_default deps pc' HashedSet.PSet.empty in
+ let new_todo = HashedSet.PSet.subtract deps_of_dep acc |> HashedSet.PSet.elements in
+ iter
+ ~acc:(HashedSet.PSet.add pc' acc)
+ ~todo:(new_todo @ todo)
+ ~seen:(HashedSet.PSet.add pc' seen)
+ in
+ let transitive_dependencies =
+ iter
+ ~acc:HashedSet.PSet.empty
+ ~todo:(HashedSet.PSet.elements immediate_deps)
+ ~seen:HashedSet.PSet.empty
+ in
+ debug_flag := old_debug_flag;
+ transitive_dependencies
+
+let moved_dependencies deps order side_exit_pc =
+ let old_debug_flag = !debug_flag in
+
+ let dependencies = transitive_dependencies deps side_exit_pc in
+ let side_exit_pc_idx = apply_map' order side_exit_pc in
+ let moved_dependencies =
+ HashedSet.PSet.filter
+ (fun pc ->
+ let dep_idx = apply_map' order pc in
+ dep_idx > side_exit_pc_idx )
+ dependencies
+ in
+ debug_flag := old_debug_flag;
+ moved_dependencies
+
+let update_liveins liveins live_renames =
+ PTree.map
+ (fun pc liveregs ->
+ match PTree.get pc live_renames with
+ | None -> liveregs
+ | Some renames ->
+ let old_to_new = ListLabels.fold_left renames
+ ~init:PTree.empty
+ ~f:(fun acc {old_name; new_name} -> PTree.set old_name new_name acc)
+ in
+ (* There doesn't seem to be a proper map function for Regset.t *)
+ let liveregs' =
+ Regset.fold
+ (fun r acc ->
+ let r' = ptree_get_or_default old_to_new r r in
+ Regset.add r' acc)
+ liveregs
+ Regset.empty
+ in
+ liveregs'
+ )
+ liveins
+
+let replace_iconds_by_ocmps sb code ~next_free_reg =
+ let module P = Camlcoq.P in
+ let (code, _previous_icond, next_free_reg) =
+ ArrayLabels.fold_left sb.instructions
+ ~init:(code, None, next_free_reg)
+ ~f:(fun (code, previous_icond_proxy_reg, next_free_reg) pc ->
+ let inst = get_some @@ PTree.get pc code in
+ match inst with
+ | Istore(chunk, addr, args, src, succ) ->
+ ( match previous_icond_proxy_reg with
+ | None -> (code, previous_icond_proxy_reg, next_free_reg)
+ | Some r ->
+ let istore' = Istore(chunk, addr, r::args, src, succ) in
+ let code' = PTree.set pc istore' code in
+ (code', previous_icond_proxy_reg, next_free_reg) )
+ | Icond(cond, args, ifso, ifnot, prediction) ->
+ (match prediction with
+ | None ->
+ (* Case only happens at the very end of the path; no transformation necessary *)
+ assert(sb.instructions.(Array.length sb.instructions - 1) = pc);
+ (code, None, next_free_reg)
+ | Some true -> (
+ let ocmp = match previous_icond_proxy_reg with
+ | None -> Iop((Op.Ocmp cond), args, next_free_reg, ifso)
+ | Some r -> Iop((Op.Ocmp cond), r::args, next_free_reg, ifso)
+ in
+ let code' = PTree.set pc ocmp code in
+ (code', Some next_free_reg, P.succ next_free_reg) )
+ | Some false -> (
+ let ocmp = match previous_icond_proxy_reg with
+ | None -> Iop((Op.Ocmp cond), args, next_free_reg, ifnot)
+ | Some r -> Iop((Op.Ocmp cond), r::args, next_free_reg, ifnot)
+ in
+ let code' = PTree.set pc ocmp code in
+ (code', Some next_free_reg, P.succ next_free_reg) ))
+ | Iload(trap, chunk, addr, args, dest, succ) -> (
+ if !Machine.config.Machine.has_non_trapping_loads then
+ (code, previous_icond_proxy_reg, next_free_reg)
+ else
+ match previous_icond_proxy_reg with
+ | None -> (code, previous_icond_proxy_reg, next_free_reg)
+ | Some r ->
+ let load' = Iload(trap, chunk, addr, r::args, dest, succ) in
+ let code' = PTree.set pc load' code in
+ (code', previous_icond_proxy_reg, next_free_reg) )
+ | _ -> (code, previous_icond_proxy_reg, next_free_reg)
+ )
+ in
+ (code, next_free_reg)
+
+let is_store = function
+ | Istore _ -> true
+ | _ -> false
+
+type heuristic_mode =
+ | Default
+ | Ignore_liveness
+ | Move_stores
+
+let ideal_schedule'' sb code mode =
+ let old_debug_flag = !debug_flag in
+
+ let dep_function = match mode with
+ | Default -> PrepassSchedulingOracle.get_simple_dependencies'
+ | Ignore_liveness -> PrepassSchedulingOracle.get_fake_deps_liveness
+ | Move_stores -> PrepassSchedulingOracle.get_fake_deps_liveness_stores
+ in
+
+ (* copy-paste from RTLpathScheduleraux.schedule_superblock *)
+ let nr_instr = Array.length sb.instructions in
+ let trailer_length =
+ match PTree.get (sb.instructions.(nr_instr-1)) code with
+ | None -> 0
+ | Some ii ->
+ match predicted_successor ii with
+ | Some _ -> 0
+ | None -> 1 in
+ let live_regs_entry = RTLpathScheduleraux.get_live_regs_entry sb code in
+ let seqa =
+ Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub sb.instructions 0 (nr_instr-trailer_length))
+ in
+ let nr_scheduled_instr = nr_instr - trailer_length in
+ (* Copy-pasted from PrepassSchedulingOracle.schedule_sequence *)
+ let opweights = OpWeights.get_opweights () in
+ (* NB: Early exit *)
+ if (Array.length seqa) <= 1 then None else
+ (* Copy-pasted from PrepassSchedulingOracle.define_problem *)
+ let problem =
+ let deps = dep_function opweights seqa in
+ debug_flag := false;
+ debug "Fake deps:\n";
+ if !debug_flag then (
+ deps
+ |> List.iter (fun {InstructionScheduler.instr_from; instr_to; latency} ->
+ debug "%2d depends on %2d\n" (Camlcoq.P.to_int sb.instructions.(instr_to)) (Camlcoq.P.to_int sb.instructions.(instr_from)));
+ flush_all ();
+ );
+ { InstructionScheduler.max_latency = -1
+ ; resource_bounds = opweights.PrepassSchedulingOracleDeps.pipelined_resource_bounds
+ ; live_regs_entry = live_regs_entry
+ ; typing = sb.typing
+ ; reference_counting = Option.some @@ RTLpathScheduleraux.reference_counting seqa sb.s_output_regs sb.typing
+ ; instruction_usages = Array.map (PrepassSchedulingOracle.resources_of_instruction opweights) (Array.map fst seqa)
+ ; latency_constraints = deps }
+ in
+ match PrepassSchedulingOracle.prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false) seqa)
+ with
+ | None ->
+ debug_flag := old_debug_flag;
+ failwith "no solution in prepass scheduling\n"
+ | Some solution ->
+ let positions = Array.init nr_scheduled_instr (fun i -> i) in
+ let final_time = solution.(nr_scheduled_instr) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+
+ let ins' =
+ Array.append
+ (Array.map (fun i -> sb.instructions.(i)) positions)
+ (Array.sub sb.instructions (nr_instr - trailer_length) trailer_length) in
+ Some (ins', final_time)
+
+(* Improved scheduling heuristic which allows moving memory writes downwards by turning
+ * Iconds into Ocmps for the purpose of dependency calculations. *)
+let ideal_schedule' sb code ~next_free_reg =
+ let old_debug_flag = !debug_flag in
+ let (fake_code, _next_free_reg) = replace_iconds_by_ocmps sb code ~next_free_reg in
+ (* Does PTree.empty work or do I need to map the entries to Regset.empty *)
+ let fake_sb = { sb with liveins = PTree.empty } in
+ (* Copied from RTLpathScheduleraux.schedule_superblock *)
+ let nr_instr_sb = Array.length sb.instructions in
+ assert (nr_instr_sb = Array.length fake_sb.instructions);
+
+ let trailer_length =
+ match PTree.get (sb.instructions.(nr_instr_sb - 1)) code with
+ | None -> 0
+ | Some ii ->
+ match predicted_successor ii with
+ | Some _ -> 0
+ | None -> 1
+ in
+ let seqa =
+ (Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "MyRTLpathScheduleraux.ideal_schedule'"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub sb.instructions 0 (nr_instr_sb - trailer_length)))
+ in
+ let fake_seqa =
+ (Array.map (fun i ->
+ (match PTree.get i fake_code with
+ | Some ii -> ii
+ | None -> failwith "MyRTLpathScheduleraux.ideal_schedule'"),
+ (match PTree.get i fake_sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub fake_sb.instructions 0 (nr_instr_sb - trailer_length)))
+ in
+ (* Copied from PrepassSchedulingOracle.schedule_sequence *)
+ let opweights = OpWeights.get_opweights () in
+ (* WARNING: Early exit in case there is only on instruction to schedule *)
+ if (Array.length fake_seqa) <= 1 then None else
+ let nr_instr_fake_seqa = Array.length fake_seqa in
+ assert (nr_instr_fake_seqa = Array.length seqa);
+ let nr_instr_seqa = nr_instr_fake_seqa in
+ let store_idxs =
+ Array.to_list sb.instructions
+ |> List.mapi (fun i pc -> (i, get_some @@ PTree.get pc code))
+ |> List.filter (fun (_i, inst) -> is_store inst)
+ |> List.map (fun (i, _inst) -> i)
+ in
+ let side_exit_idxs = side_exit_idxs sb code in
+ let store_side_exit_limit =
+ ListLabels.map store_idxs
+ ~f:(fun st_idx ->
+ let first_se = List.find_opt (fun se_idx -> se_idx > st_idx) side_exit_idxs in
+ let second_se =
+ Option.bind
+ first_se
+ (fun se_idx -> List.find_opt (fun se_idx' -> se_idx' > se_idx) side_exit_idxs)
+ in
+ second_se )
+ in
+ let store_side_exit_deps =
+ ListLabels.map2 store_idxs store_side_exit_limit
+ ~f:(fun st_idx se_idx_opt ->
+ let module IS = InstructionScheduler in
+ match se_idx_opt with
+ | None -> None
+ | Some se_idx -> Some {IS.instr_to = se_idx; instr_from = st_idx; latency = 0} )
+ |> List.filter_map (fun x -> x)
+ in
+ (* Copied from PrepassSchedulingOracle.define_problem *)
+ let fake_deps = PrepassSchedulingOracle.get_simple_dependencies opweights fake_seqa in
+ let fake_deps = store_side_exit_deps @ fake_deps in
+
+ let problem =
+ { InstructionScheduler.max_latency = -1
+ ; live_regs_entry = RTLpathScheduleraux.get_live_regs_entry fake_sb fake_code
+ ; typing = fake_sb.typing
+ ; reference_counting = Some (RTLpathScheduleraux.reference_counting fake_seqa fake_sb.s_output_regs fake_sb.typing)
+ ; resource_bounds = opweights.PrepassSchedulingOracleDeps.pipelined_resource_bounds
+ ; instruction_usages = Array.map (PrepassSchedulingOracle.resources_of_instruction opweights) (Array.map fst seqa)
+ ; latency_constraints = fake_deps }
+
+ in
+ let scheduled_sequence =
+ match
+ PrepassSchedulingOracle.prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false)
+ seqa)
+ with
+ | None -> None
+ | Some solution ->
+ (* Printf.eprintf "Scheduling instruction sequence of length: %d\n" nr_instructions; flush_all ();
+ Printf.eprintf "Result: %d\n" solution.(nr_instructions); flush_all (); *)
+ let positions = Array.init nr_instr_seqa (fun i -> i) in
+ let final_time = solution.(nr_instr_seqa) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+ Some (positions, final_time)
+ in
+
+ debug_flag := old_debug_flag;
+
+ match scheduled_sequence with
+ | None -> None
+ | Some (order, final_time) ->
+ let ins' =
+ Array.append
+ (Array.map (fun i -> sb.instructions.(i)) order)
+ (Array.sub sb.instructions (nr_instr_sb - trailer_length) trailer_length) in
+ Some (ins', final_time)
+
+(* "ideal" *)
+let ideal_schedule sb code =
+ let schedule =
+ RTLpathScheduleraux.schedule_superblock
+ {sb with liveins = PTree.map (fun n _regs -> Regset.empty) sb.liveins}
+ code
+ in
+ schedule
+
+let merge_append _ x y = match x, y with
+ | None, None -> None
+ | Some x, None | None, Some x -> Some x
+ | Some x, Some y -> Some (x @ y)
+
+(* Turns a tree of dependencies (pc -> [pcs; that; depend; on pc]) into a tree of uses by
+ * "inverting" tree.
+ * Now, each pc has the pcs associated to it that depend on it, according to the original
+ * tree. *)
+let uses_of_deps p_ptree =
+ PTree.fold
+ (fun acc p vs ->
+ let acc = HashedSet.PSet.fold
+ (fun acc v ->
+ let old = ptree_get_or_default acc v HashedSet.PSet.empty in
+ let upd = HashedSet.PSet.add p old in
+ PTree.set v upd acc)
+ vs
+ acc
+ in
+ acc
+ )
+ p_ptree
+ PTree.empty
+
+(* Returns for every side-exit pc, a list of instructions that should be executed beforehand *)
+let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_free_reg =
+ (* TODO: Right now we are copying the instructions even if there are duplicates per
+ * basic block. This leads to an issue where two identical memory writes lead to
+ * non-matching symbolic memory states.
+ * As a work-around we could eliminate the first/original memory write, this would
+ * allow moving memory writes at least one side-exit further down, but not
+ * farther. For that we would need to refine the symbolic memory model/evaluation
+ * which might be related to memory aliasing in general?!
+ * For now however, we simply use a more restrictive ideal_schedule function which
+ * does not propose moving memory writes below side-exits. *)
+ let old_debug_flag = !debug_flag in
+
+ let mode = match !Clflags.option_prepass_past_side_exits_sched with
+ | "move_stores" -> Move_stores
+ | "no_move_stores" -> Ignore_liveness
+ | _ -> failwith "Unsupported option for scheduling code past side exits"
+ in
+ let sb_with_liveins = {sb with liveins = update_liveins sb.liveins live_renames} in
+ match ideal_schedule'' sb_with_liveins code mode, ideal_schedule'' sb_with_liveins code Default with
+ | None, None -> InsertPositionMap.empty (* Early Exit*)
+ | None, Some _ | Some _, None -> failwith "downschedule_compensation_code: Scheduling procedure failed."
+ | Some (idealized_schedule, idealized_final_time)
+ , Some (default_schedule, default_final_time) ->
+ if idealized_final_time >= default_final_time then (
+ if (!debug_flag && idealized_final_time > default_final_time) then (
+ debug "Unexpectedly, idealized dependencies lead to a worse expected final time.\n";
+ debug "idealized_final_time = %d, default_final_time = %d\n" idealized_final_time default_final_time;
+ debug "For superblock %d" (Camlcoq.P.to_int sb.instructions.(0));
+ failwith "Unexpectedly bad final time for idealized schedule";
+ );
+ (* Early exit *)
+ InsertPositionMap.empty
+ ) else (
+ let sb_length = Array.length sb.instructions in
+ let pc_to_idx =
+ Duplicateaux.generate_fwmap
+ (Array.to_list sb.instructions)
+ (List.init sb_length (fun i -> i))
+ PTree.empty
+ in
+ let pc_to_idx' =
+ Duplicateaux.generate_fwmap
+ (Array.to_list idealized_schedule)
+ (List.init sb_length (fun i -> i))
+ PTree.empty
+ in
+
+ let side_exit_pcs = side_exit_pcs sb code in
+
+ (* NB: for the purpose of this heuristic we consider the superblock to include the final
+ * restoration code. sb has changed. *)
+ let liveins' = update_liveins sb.liveins live_renames in
+ (* Use the new names to calculate proper dependencies *)
+ let path_deps = intra_path_dependencies {sb with liveins = liveins'} code in
+ let path_deps_without_iconds = PTree.map
+ (fun _pc deps ->
+ HashedSet.PSet.filter
+ (fun dep_pc -> not @@ is_icond @@ get_some @@ PTree.get dep_pc code)
+ deps)
+ path_deps
+ in
+ let transitive_path_deps_without_iconds =
+ PTree.map (fun pc _deps -> transitive_dependencies path_deps_without_iconds pc)
+ path_deps
+ in
+ let transitive_uses_without_iconds = uses_of_deps transitive_path_deps_without_iconds in
+
+ (* For each side-exit, check if all the dependencies are still above it
+ * if not, remember the pc and transitively consider its dependencies until
+ * no further insts to be covered are discovered *)
+ let side_exit_and_compensation =
+ ListLabels.map side_exit_pcs
+ ~f:(fun side_exit_pc ->
+ let moved_deps = moved_dependencies path_deps pc_to_idx' side_exit_pc in
+ let moved_deps_sorted =
+ ListLabels.sort (HashedSet.PSet.elements moved_deps)
+ ~cmp:(fun pc1 pc2 -> Int.compare (apply_map' pc_to_idx pc1) (apply_map' pc_to_idx pc2))
+ in
+ (* The sucessors are *incorrect* at this point *)
+ (side_exit_pc, moved_deps_sorted) )
+ in
+ let (side_exit_pcs, insts_pcs) = List.split side_exit_and_compensation in
+
+ let to_insert_pcs =
+ ListLabels.fold_left2 side_exit_pcs insts_pcs
+ ~init:InsertPositionMap.empty
+ ~f:(fun acc side_exit_pc pcs ->
+ InsertPositionMap.add (InsertPosition.Above side_exit_pc) pcs acc) in
+ let (to_insert_as_well : Camlcoq.P.t list InsertPositionMap.t)=
+ InsertPositionMap.fold
+ (fun side_exit_pc pcs_to_insert acc ->
+ let side_exit_pc = InsertPosition.anchor side_exit_pc in
+ let ( let* ) = Option.bind in
+ let collateral_moves =
+ pcs_to_insert
+ |> List.filter_map (fun pc ->
+ let* uses = PTree.get pc transitive_uses_without_iconds in
+ HashedSet.PSet.filter
+ (fun pc ->
+ not @@ List.mem pc pcs_to_insert
+ && apply_map' pc_to_idx pc < apply_map' pc_to_idx side_exit_pc
+ && (not @@ is_icond @@ get_some @@ PTree.get pc code))
+ uses
+ |> Option.some)
+ |> ListLabels.fold_left ~f:HashedSet.PSet.union ~init:HashedSet.PSet.empty
+ in
+ let collateral_moves = ListLabels.sort (HashedSet.PSet.elements collateral_moves)
+ ~cmp:(fun pc1 pc2 -> Int.compare (apply_map' pc_to_idx pc1) (apply_map' pc_to_idx pc2))
+ in
+ (* TODO?
+ * In principle we should be able to move the instructions Below, unless they are
+ * live (liveins') at the side exit.
+ * But adding them above is simpler and unnecessary instructions should be removed
+ * by DCE. *)
+ let acc = InsertPositionMap.add (InsertPosition.Above side_exit_pc) collateral_moves acc in
+ acc)
+ to_insert_pcs
+ InsertPositionMap.empty
+ in
+ let to_insert_pcs = InsertPositionMap.merge merge_append to_insert_pcs to_insert_as_well in
+ let num_probably_duplicated = InsertPositionMap.fold (fun _pos pcs n ->
+ n + List.length pcs)
+ to_insert_pcs
+ 0
+ in
+ let gain = default_final_time - idealized_final_time in
+ assert(gain > 0);
+ if gain * !Clflags.option_fliftif < num_probably_duplicated then (
+ debug_flag := false;
+ debug "Expected number of cycles gained, %d, not considered worth the code duplication, expected at %d instructions.\n"
+ gain num_probably_duplicated;
+ debug_flag := old_debug_flag;
+ InsertPositionMap.empty
+ ) else (
+ debug_flag := old_debug_flag;
+ to_insert_pcs
+ ))
+
+let my_merge_no_overwrite m1 m2 =
+ PTree.combine (fun x y -> match (x, y) with
+ | None, None -> None
+ | Some x, None
+ | None, Some x -> Some x
+ | Some x, Some y ->
+ if x = y then Some x
+ else failwith "Merge conflict."
+ ) m1 m2
+
+let print_schedule schedule =
+ debug "Schedule\n";
+ Array.iter (fun pos -> debug "%d\n" (Camlcoq.P.to_int pos)) schedule;
+ debug "\n";
+ flush_all ();
+;;
+
+(* Walk through sb and find those register which possibly take on different values
+ * i.e. which are written to twice. *)
+let find_mutated_registers (sb : superblock) code input_regs : Regset.t =
+ let (defined, defined_multiple) =
+ ArrayLabels.fold_left sb.instructions
+ ~init:(input_regs, Regset.empty)
+ ~f:(fun (defined, defined_multiple) pc ->
+ let inst = get_some @@ PTree.get pc code in
+ match RTL.instr_defs inst with
+ | None -> (defined, defined_multiple)
+ | Some(r) ->
+ if Regset.mem r defined then
+ let defined_multiple = Regset.add r defined_multiple in
+ (defined, defined_multiple)
+ else
+ let defined = Regset.add r defined in
+ (defined, defined_multiple)
+ )
+ in
+ defined_multiple
+
+(* Map each register in regs to the index of its first definition in the superblock
+ * Returns: mapping from a register to the index of the definition that first defines it
+ * in the superblock *)
+let find_first_definition (sb : superblock) code (regs : Regset.t) : int PTree.t =
+ let (regs, first_defs, _index) =
+ ArrayLabels.fold_left sb.instructions
+ ~init:(regs, PTree.empty, 0)
+ ~f:(fun (regs, first_defs, index) pc ->
+ let inst = get_some @@ PTree.get pc code in
+ match RTL.instr_defs inst with
+ | None -> (regs, first_defs, index + 1)
+ | Some(r) ->
+ if Regset.mem r regs then
+ let regs = Regset.remove r regs in
+ let first_defs = PTree.set r index first_defs in
+ (regs, first_defs, index + 1)
+ else
+ (regs, first_defs, index + 1) )
+ in
+ assert (Regset.empty = regs);
+ first_defs
+
+let print_int_ptree pt =
+ let module P = Camlcoq.P in
+ if not !debug_flag then () else
+ debug "Mappings, P.t -> Int.t:\n";
+ List.iter (fun (p, i) -> debug "%d |-> %d\n" (P.to_int p) i) (PTree.elements pt)
+
+(* [def_index], the index in the superblock of the instruction defining [r] is set to -1
+ * if the register is first defined outside of the path *)
+let is_read_after_definition (sb : superblock) code (r : reg) (def_index) : bool =
+ let start = if def_index < 0 then 0 else def_index + 1 in
+ let stop = Array.length sb.instructions in
+ let rec aux n =
+ if n > stop then false else
+
+ let pc = sb.instructions.(n) in
+ let inst = get_some @@ PTree.get pc code in
+ if List.mem r @@ RTL.instr_uses inst then
+ true
+ else
+ aux (n + 1)
+ in
+ aux start
+
+let registers_to_alias (sb : superblock) code pm =
+ let first_pc = sb.instructions.(0) in
+ let pi = get_some @@ PTree.get first_pc pm in
+ let sb_length = Array.length sb.instructions in
+ let mutated_registers = find_mutated_registers sb code pi.input_regs in
+ (* Registers are of interest if they would need to be restored after renaming *)
+ let registers_of_interest = Regset.inter sb.s_output_regs mutated_registers in
+ let first_defs_in_sb = find_first_definition sb code registers_of_interest in
+ (* If the register is first defined by the last instruction of the path, it does not
+ * need to be aliased, since it cannot possibly be used afterwards in the superblock.
+ * Furthermore, the renaming pass won't rename this register, in case the definition
+ * happens by a path-ending instruction in which case it would be impossible to place
+ * restoration code afterwards *)
+ let registers_to_alias =
+ registers_of_interest
+ |> Regset.filter (fun r -> if apply_map' first_defs_in_sb r = sb_length - 1 then false else true)
+ in
+ registers_to_alias
+
+let add_aliasing_code sb code pm : RTL.instruction list InsertPositionMap.t =
+ let first_pc = sb.instructions.(0) in
+ let pi = get_some @@ PTree.get first_pc pm in
+ let to_alias = registers_to_alias sb code pm in
+ let (initial, other) = Regset.partition (fun r -> Regset.mem r pi.input_regs) to_alias in
+ let to_insert =
+ (* If registers that are already live at the beginning of the superblock need to be
+ * aliased, they need to be inserted before (above) the first instruction of the
+ * superblock. *)
+ InsertPositionMap.singleton
+ (InsertPosition.Above sb.instructions.(0))
+ (Regset.elements initial |> List.map (fun r -> Iop (Op.Omove, [r], r, Camlcoq.P.one)))
+ in
+
+ let first_def_other = find_first_definition sb code other in
+ let to_insert = PTree.fold
+ (fun to_insert r idx ->
+ let pos = (InsertPosition.Below sb.instructions.(idx)) in
+ let old =
+ match InsertPositionMap.find_opt pos to_insert with
+ | None -> []
+ | Some v -> v
+ in
+ let upd = (Iop (Op.Omove, [r], r, Camlcoq.P.one)) :: old in
+ InsertPositionMap.add pos upd to_insert)
+ first_def_other
+ to_insert
+ in
+ to_insert
+
+type icond_frame =
+ { inop_idx : int
+ ; icond_idx : int }
+
+let find_icond_frames (sb : superblock) code =
+ let (_last_inop_idx, frames, _i) =
+ ArrayLabels.fold_left sb.instructions
+ ~init:(None, [], 0)
+ ~f:(fun (last_inop_idx, frames, i) pc ->
+ let inst = get_some @@ PTree.get pc code in
+ match inst with
+ | Inop _ -> (Some i, frames, i + 1)
+ | Icond _ ->
+ ( match last_inop_idx with
+ | None -> (None, frames, i + 1)
+ | Some inop_idx -> (None, {inop_idx; icond_idx = i} :: frames, i + 1) )
+ | _ -> (last_inop_idx, frames, i + 1) )
+ in
+ frames
+
+let stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc =
+ let module D = Duplicateaux in
+ (* let module P = Camlcoq.P in *)
+ let icond_frames = find_icond_frames sb code in
+ let (code, staged_dupcode, staged_revmap, next_free_pc) =
+ ListLabels.fold_left icond_frames
+ ~init:(code, staged_dupcode, staged_revmap, next_free_pc)
+ ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) {inop_idx; icond_idx} ->
+ if (* icond_idx = Array.length sb.instructions - 1 (* Do not lift code before end of path *)
+ || *) icond_idx = inop_idx + 1 then (* Sentinel value that no code needs to be duplicated *)
+ (* do nothing *)
+ (code, staged_dupcode, staged_revmap, next_free_pc)
+ else
+ let pcs_to_copy = Array.sub sb.instructions (inop_idx + 1) (icond_idx - inop_idx) in
+ let (staged_dupcode', staged_revmap', dupcode, fwmap, next_free_pc) = D.clone_only_new code next_free_pc (Array.to_list pcs_to_copy) in
+ let staged_dupcode = my_merge_no_overwrite staged_dupcode staged_dupcode' in
+ let staged_revmap = my_merge_no_overwrite staged_revmap staged_revmap' in
+ let parental_icond = get_some @@ PTree.get sb.instructions.(icond_idx) code in
+ let (useless_icond, staged_icond) =
+ match parental_icond with
+ | Icond(cond, args, ifso, ifnot, info) ->
+ let staged_icond = Icond(cond, args, Camlcoq.P.of_int @@ List.hd dupcode, pcs_to_copy.(0), info) in
+ let useless_icond = Icond(cond, args, pcs_to_copy.(0), pcs_to_copy.(0), info) in
+ (useless_icond, staged_icond)
+ | _ -> failwith "Instruction was expected to be Icond, but is not"
+ in
+ let staged_dupcode = PTree.set sb.instructions.(inop_idx) staged_icond staged_dupcode in
+
+ let code = PTree.set sb.instructions.(inop_idx) useless_icond code in
+
+ (code, staged_dupcode, staged_revmap, next_free_pc) )
+ in
+ (code, staged_dupcode, staged_revmap, next_free_pc)
+
+(* TODO? better name *)
+let apply_aliases sb code name_map ~offset =
+ let code = ref code in
+ let name_map = ref name_map in
+ let length = Array.length sb.instructions in
+
+ (* TODO: preferably there was an early exit condition when there is nothing left to do *)
+ for i = offset to length - 1 do
+ let pc = sb.instructions.(i) in
+ let inst = get_some @@ PTree.get pc !code in
+ let inst' = change_arg_regs inst !name_map in
+ code := PTree.set pc inst' !code;
+ name_map :=
+ (match RTL.instr_defs inst with
+ | None -> !name_map
+ | Some r ->
+ if Option.is_some @@ PTree.get r !name_map then (
+ (* The restoration code, is no longer incorrectly applicable *)
+ if not !Clflags.option_fpoormansssa then
+ PTree.remove r !name_map
+ else
+ !name_map
+ ) else
+ !name_map)
+ ;
+ done;
+ !code
+
+let scheduler f =
+ (* let module D = Duplicateaux in
+ let module P = Camlcoq.P in
+ let module N = Camlcoq.Nat in *)
+ (* TODO: - control for amount of code duplication *)
+ let open! Duplicateaux in
+ let f_rtl = f.fn_RTL in
+ let code = f_rtl.fn_code in
+ let _orig_code = code in
+ let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in
+ let entry = f.fn_RTL.fn_entrypoint in
+ let pm = f.fn_path in
+ let do_nothing = ((((code, entry), pm), id_ptree), None) in
+ (* TODO: Add flag to select between "conserative" downard scheduling and the one that
+ * allows moving memory stores below the next side exit (singular). *)
+ if not !Clflags.option_fpoormansssa && !Clflags.option_fliftif < 1 then do_nothing
+ else (* NB: Early exit above *)
+ let _orig_pm = pm in
+ let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in
+ let is_loop_header = Duplicateaux.get_loop_headers code (f_rtl.fn_entrypoint) in
+ let inner_loops = Duplicateaux.get_inner_loops f_rtl code is_loop_header in
+ (* inner loop map: map loop headers to inner loops *)
+ let ilmap = generate_fwmap (List.map (fun il -> il.head) inner_loops) inner_loops PTree.empty in
+ let superblocks = get_superblocks code entry pm typing in
+
+ (* Get only those superblocks that span an inner loop *)
+ let superblocks =
+ if not !Clflags.option_ftargetinnerloops then
+ superblocks
+ else
+ List.filter (fun sb ->
+ (* sanity check; a superblock contains at least one instruction *)
+ assert (Array.length sb.instructions >= 1);
+ (* Check if first instruction of a superblock is the beginning of an inner loop*)
+ let first_pc = sb.instructions.(0) in
+ match PTree.get first_pc ilmap with
+ | None -> false
+ | Some(il) -> (* List.length il.body = Array.length sb.instructions *)
+ (match il.sb_final with
+ (* don't bother if the loop is not predicted to loop *)
+ | None -> false
+ | Some pc ->
+ pc == first_pc (* loop is spanned by the superblock *)
+ (* Make sure the loop does not exceed the superblock *)
+ && List.length il.body = Array.length sb.instructions
+ )
+ ) superblocks
+ in
+
+ let next_free_pc = next_free_pc code |> Camlcoq.P.of_int in
+ let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in
+
+ (* Apply aliasing code *)
+ let old_debug_flag = !debug_flag in
+ debug_flag := false;
+ debug "Initial code.\n";
+ print_code code;
+ print_path_map pm;
+ print_superblocks superblocks code;
+ debug "\n";
+ flush_all ();
+
+ (* TODO: Is this extra aliasing logic really useless? *)
+ (* let (superblocks, code, pm, next_free_pc) =
+ ListLabels.fold_left superblocks
+ ~init:([], code, pm, next_free_pc)
+ ~f:(fun (superblocks, code, pm, next_free_pc) sb ->
+ let to_insert = add_aliasing_code sb code pm in
+ let (sb', code', pm', next_free_pc') = insert_code sb code pm to_insert ~next_free_pc in
+ (sb'::superblocks, code', pm', next_free_pc') )
+ in
+ debug "After adding aliasing code.\n";
+ print_code code;
+ print_path_map pm;
+ print_superblocks superblocks code;
+ debug "\n";
+ flush_all (); *)
+
+ let (superblocks, code, pm, next_free_pc) =
+ ListLabels.fold_left superblocks
+ ~init:([], code, pm, next_free_pc)
+ ~f:(fun (superblocks, code, pm, next_free_pc) sb ->
+ let to_insert = prepend_nops_before_iconds sb code in
+ let (sb', code', pm', next_free_pc', _) = insert_code sb code pm to_insert ~next_free_pc in
+ (sb'::superblocks, code', pm', next_free_pc') )
+ in
+ debug "After adding nops before Iconds.\n";
+ print_code code;
+ print_path_map pm;
+ print_superblocks superblocks code;
+ debug "\n";
+ flush_all ();
+
+ let (code, sb_renamings, next_free_reg) =
+ if not !Clflags.option_fpoormansssa then
+ (code, List.map (fun sb -> (sb, PTree.empty)) superblocks, next_free_reg)
+ else (
+ debug "pmSSA path\n"; flush_all ();
+ ListLabels.fold_left superblocks
+ ~init:(code, [], next_free_reg)
+ ~f:(fun (code, sb_renamings, next_free_reg) sb ->
+ let first_pc = sb.instructions.(0) in
+ let pi = get_some @@ PTree.get first_pc pm in
+ let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in
+ (code, (sb, live_renames)::sb_renamings, next_free_reg)
+ ) )
+ in
+
+ debug "After renaming :\n";
+ print_code code;
+ print_path_map pm;
+ print_superblocks (fst @@ List.split sb_renamings) code;
+ debug "\n";
+ flush_all ();
+
+
+ let (sb_renamings, code, pm, next_free_pc) = ListLabels.fold_left sb_renamings
+ ~init:([], code, pm, next_free_pc)
+ ~f:(fun (sbs, code, pm, next_free_pc) (sb, live_renames) ->
+ let final_restoration = final_restoration_code sb code live_renames in
+ let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm final_restoration ~next_free_pc in
+ let live_renames =
+ PTree.fold
+ (fun acc pc insts ->
+ let pc' = apply_map' fwmap pc in
+ (* Remove final renames, which were just inserted *)
+ let insts = if Camlcoq.P.eq pc sb.instructions.(Array.length sb.instructions - 1) then [] else insts in
+ PTree.set pc' insts acc)
+ live_renames
+ PTree.empty
+ in
+ ((sb, live_renames)::sbs, code, pm, next_free_pc)
+ )
+ in
+
+ debug "After inserting the final restoration code:\n";
+ (* print_code code; *)
+ (* print_path_map pm; *)
+ print_superblocks (fst @@ List.split sb_renamings) code;
+ debug "\n";
+ flush_all ();
+
+ let sb_tocompensatepcs_liverenames =
+ ListLabels.map sb_renamings
+ ~f:(fun (sb, live_renames) ->
+ let (to_insert_compensation_pcs, live_renames) =
+ if !Clflags.option_prepass_past_side_exits then
+ (downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_free_reg
+ , live_renames )
+ else
+ (InsertPositionMap.empty, live_renames)
+ in
+ (sb, to_insert_compensation_pcs, live_renames ) )
+ in
+
+ let (sb_tocompensate_liverenames, code, next_free_reg) = ListLabels.fold_left sb_tocompensatepcs_liverenames
+ ~init:([], code, next_free_reg)
+ ~f:(fun (sbs, code, next_free_reg) (sb, to_compensate_pcs, live_renames) ->
+ if !Clflags.option_fpoormansssa then (
+ let to_compensate = InsertPositionMap.map (fun pcs ->
+ let insts = List.map (fun pc -> get_some @@ PTree.get pc code) pcs in
+ insts)
+ to_compensate_pcs
+ in
+ let code = InsertPositionMap.fold
+ (fun _pos pcs code->
+ List.fold_left (fun code pc -> PTree.set pc (Inop Camlcoq.P.one) code)
+ code
+ pcs)
+ to_compensate_pcs
+ code
+ in
+ ((sb, to_compensate, live_renames)::sbs, code, next_free_reg)
+ ) else (
+ assert (PTree.elements live_renames |> List.for_all (fun (_, l) -> l = []));
+ let dup_count = InsertPositionMap.fold
+ (fun _pos (pcs : Camlcoq.P.t list) acc ->
+ let acc = ListLabels.fold_left pcs
+ ~init:acc
+ ~f:(fun acc pc ->
+ let old = ptree_get_or_default acc pc 0 in
+ PTree.set pc (old + 1) acc)
+ in
+ acc )
+ to_compensate_pcs
+ PTree.empty
+ in
+ let pcs_dupd_twice_or_more =
+ PTree.filter1 (fun n -> n > 1) dup_count
+ |> PTree.elements
+ |> List.map fst
+ in
+ let arg_regs = ListLabels.fold_left pcs_dupd_twice_or_more
+ ~init:(Regset.empty)
+ ~f:(fun acc pc ->
+ let inst = get_some @@ PTree.get pc code in
+ RTL.instr_uses inst
+ |> List.fold_left
+ (fun acc reg -> Regset.add reg acc)
+ acc
+ )
+ in
+ let pi = get_some @@ PTree.get sb.instructions.(0) pm in
+ let (code, live_renames, next_free_reg) = rename_regs ~only_rename:arg_regs sb code ~liveatentry:pi.input_regs ~next_free_reg in
+ let to_compensate = InsertPositionMap.map (fun pcs ->
+ let insts = List.map (fun pc -> get_some @@ PTree.get pc code) pcs in
+ insts)
+ to_compensate_pcs
+ in
+ let code = InsertPositionMap.fold
+ (fun _pos pcs code->
+ List.fold_left (fun code pc -> PTree.set pc (Inop Camlcoq.P.one) code)
+ code
+ pcs)
+ to_compensate_pcs
+ code
+ in
+ ((sb, to_compensate, live_renames)::sbs, code, next_free_reg) ))
+ in
+
+ (* Insert the compensation code (downward scheduling) and update the restoration code
+ * information to reflect the new pcs. *)
+ let (superblocks_liverenames, code, pm, next_free_pc) = ListLabels.fold_left sb_tocompensate_liverenames
+ ~init:([], code, pm, next_free_pc)
+ ~f:(fun (sbs, code, pm, next_free_pc) (sb, to_insert_compensation, live_renames) ->
+ let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm to_insert_compensation ~next_free_pc in
+ let live_renames =
+ PTree.fold
+ (fun acc pc insts ->
+ let pc' = apply_map' fwmap pc in
+ PTree.set pc' insts acc)
+ live_renames
+ PTree.empty
+ in
+ ((sb, live_renames)::sbs, code, pm, next_free_pc) )
+ in
+
+ debug "After inserting the compensation code:\n";
+ (* print_code code; *)
+ (* print_path_map pm; *)
+ print_superblocks (fst @@ List.split superblocks_liverenames) code;
+ debug "\n"; flush_all ();
+
+ (* Insert the restoration code *)
+ let (superblocks, code, pm, next_free_pc, next_free_reg) = ListLabels.fold_left superblocks_liverenames
+ ~init:([], code, pm, next_free_pc, next_free_reg)
+ ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, live_renames) ->
+ let (sb, code, pm, next_free_pc, live_renames) =
+ if !Clflags.option_fpoormansssa then
+ (* Final restoration code was already inserted. *)
+ (sb, code, pm, next_free_pc, live_renames)
+ else
+ (* Ther combination of code motion below side exits WITHOUT register renaming may
+ * cause some restoration code to be necessary. Otherwise it is not safe to
+ * duplicate instructions.
+ * The final restoration code is special since it may insert below. *)
+ let final_restoration = final_restoration_code sb code live_renames in
+ let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm final_restoration ~next_free_pc in
+ let live_renames =
+ PTree.fold
+ (fun acc pc insts ->
+ let pc' = apply_map' fwmap pc in
+ (* Remove final renames, which were just inserted *)
+ let insts = if Camlcoq.P.eq pc sb.instructions.(Array.length sb.instructions - 1) then [] else insts in
+ PTree.set pc' insts acc)
+ live_renames
+ PTree.empty
+ in
+ (sb, code, pm, next_free_pc, live_renames)
+ in
+ let pc_to_idx = Duplicateaux.generate_fwmap
+ (Array.to_list sb.instructions)
+ (List.init (Array.length sb.instructions) (fun i -> i))
+ PTree.empty
+ in
+ let (to_insert_restoration, to_rename, next_free_reg) = restoration_instructions' sb code live_renames ~next_free_reg in
+ let side_exit_pcs = side_exit_pcs sb code in
+ let code = ListLabels.fold_left side_exit_pcs
+ ~init:code
+ ~f:(fun code side_exit_pc ->
+ match PTree.get side_exit_pc to_rename with
+ | None -> code
+ | Some aliases ->
+ let idx = apply_map' pc_to_idx side_exit_pc in
+ let code = apply_aliases sb code aliases ~offset:idx in
+ code)
+ in
+ let (sb, code, pm, next_free_pc, _fwmap) = insert_code sb code pm to_insert_restoration ~next_free_pc in
+ (sb::sbs, code, pm, next_free_pc, next_free_reg) )
+ in
+
+ debug "After inserting the restoration code:\n";
+ (* print_code code; *)
+ (* print_path_map pm; *)
+ print_superblocks superblocks code;
+ debug "\n"; flush_all ();
+
+ (* let (superblocks, code, pm, next_free_pc, next_free_reg) =
+ if not !Clflags.option_fpoormansssa then (
+ (* In principle we need to do something like this because if we do not systematically
+ * rename registers, the downward scheduling might copy a series of instructions like
+ * this: i = i + 1, which when copied twice (to move below two side exits) is
+ * incorrect unless we rename the register i (or at least its second redefinition).
+ * However, at least the benchmarks do not seem to trigger this special case.
+ ListLabels.fold_left superblocks_torename
+ ~init:([], code, pm, next_free_pc, next_free_reg)
+ ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, to_rename) ->
+ let pi = get_some @@ PTree.get sb.instructions.(0) pm in
+ let liveatentry = pi.input_regs in
+ let (code, live_renames, next_free_reg) = rename_regs ~liveatentry ~only_rename:to_rename sb code ~next_free_reg in
+ let restoration_insts = restoration_instructions live_renames in
+ let (sb, code, pm, next_free_pc) = insert_code sb code pm restoration_insts ~next_free_pc in
+ (sb::sbs, code, pm, next_free_pc, next_free_reg) ) *)
+ (fst @@ List.split superblocks_torestore, code, pm, next_free_pc, next_free_reg)
+ ) else
+ (fst @@ List.split superblocks_torestore, code, pm, next_free_pc, next_free_reg)
+ in *)
+
+ let (code, to_lift) =
+ if !Clflags.option_fliftif > 0 then
+ (* TODO: Use this flag to control for the amount of duplicated code.
+ * However, this is probably best controlled at the "downscheduling" level since only
+ * those instructions need to be actually duplicated, i.e. restoration code writing
+ * back the current value to renamed registers is not actually duplicated. *)
+ let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks
+ ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc))
+ ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb ->
+ stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc )
+ in
+ (code, Some(staged_revmap, staged_dupcode))
+ else
+ (code, None)
+ in
+
+ debug "After staging the duplication (\"if-lifting\") :\n";
+ print_code code;
+ print_path_map pm;
+ print_superblocks superblocks code;
+ debug "\n";
+ flush_all ();
+
+ debug_flag := old_debug_flag;
+
+ ((((code, entry), pm), id_ptree), to_lift)
diff --git a/scheduling/MyRTLpathSchedulerproof.v b/scheduling/MyRTLpathSchedulerproof.v
new file mode 100644
index 00000000..4f14903e
--- /dev/null
+++ b/scheduling/MyRTLpathSchedulerproof.v
@@ -0,0 +1,526 @@
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory.
+Require Import MyRTLpathScheduler.
+
+Definition match_prog (p tp: program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+ intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let pge := Genv.globalenv prog.
+Let tpge := Genv.globalenv tprog.
+
+Hypothesis all_fundef_liveness_ok: forall b fd, Genv.find_funct_ptr pge b = Some fd -> liveness_ok_fundef fd.
+
+Lemma symbols_preserved s: Genv.find_symbol tpge s = Genv.find_symbol pge s.
+Proof.
+ rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+ Senv.equiv pge tpge.
+Proof.
+ eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_preserved:
+ forall (v: val) (f: fundef),
+ Genv.find_funct pge v = Some f ->
+ exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tpge v = Some tf /\ linkorder cunit prog.
+Proof.
+ intros. exploit (Genv.find_funct_match TRANSL); eauto.
+ intros (cu & tf & A & B & C).
+ repeat eexists; intuition eauto.
+ + unfold incl; auto.
+ + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_preserved:
+ forall v f,
+ Genv.find_funct_ptr pge v = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tpge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+ intros.
+ exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_preserved:
+ forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+ intros. destruct f.
+ - simpl in H. monadInv H.
+ destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+ destruct (H _ EQ).
+ intuition subst; auto.
+ symmetry.
+ eapply match_function_preserves.
+ eassumption.
+ - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Theorem transf_initial_states:
+ forall s1, initial_state prog s1 ->
+ exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+ intros. inv H.
+ exploit function_ptr_preserved; eauto. intros (tf & FIND & TRANSF).
+ exists (Callstate nil tf nil m0).
+ split.
+ - econstructor; eauto.
+ + intros; apply (Genv.init_mem_match TRANSL); assumption.
+ + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+ symmetry. eapply match_program_main. eauto.
+ + destruct f.
+ * monadInv TRANSF. rewrite <- H3.
+ destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+ destruct (H _ EQ).
+ intuition subst; auto.
+ symmetry; eapply match_function_preserves. eassumption.
+ * monadInv TRANSF. assumption.
+ - constructor; eauto.
+ + constructor.
+ + apply transf_fundef_correct; auto.
+(* + eapply all_fundef_liveness_ok; eauto. *)
+Qed.
+
+Theorem transf_final_states s1 s2 r:
+ final_state s1 r -> match_states s1 s2 -> final_state s2 r.
+Proof.
+ unfold final_state.
+ intros H; inv H.
+ intros H; inv H; simpl in * |- *; try congruence.
+ inv H1.
+ destruct st; simpl in * |- *; try congruence.
+ inv STACKS. constructor.
+Qed.
+
+
+Let ge := Genv.globalenv (RTLpath.transf_program prog).
+Let tge := Genv.globalenv (RTLpath.transf_program tprog).
+
+Lemma senv_sym x y: Senv.equiv x y -> Senv.equiv y x.
+Proof.
+ unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+ unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_preserved_RTL:
+ Senv.equiv ge tge.
+Proof.
+ eapply senv_transitivity. { eapply senv_sym; eapply RTLpath.senv_preserved. }
+ eapply senv_transitivity. { eapply senv_preserved. }
+ eapply RTLpath.senv_preserved.
+Qed.
+
+Lemma symbols_preserved_RTL s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+ unfold tge, ge. erewrite RTLpath.symbols_preserved; eauto.
+ rewrite symbols_preserved.
+ erewrite RTLpath.symbols_preserved; eauto.
+Qed.
+
+Program Definition mkctx sp rs0 m0 {f1: RTLpath.function} (hyp: liveness_ok_function f1)
+ : simu_proof_context f1
+ := {| the_ge1:= ge; the_ge2 := tge; the_sp:=sp; the_rs0:=rs0; the_m0:=m0 |}.
+Obligation 2.
+ erewrite symbols_preserved_RTL. eauto.
+Qed.
+
+Lemma s_find_function_fundef f sp svos rs0 m0 fd
+ (LIVE: liveness_ok_function f):
+ sfind_function pge ge sp svos rs0 m0 = Some fd ->
+ liveness_ok_fundef fd.
+Proof.
+ unfold sfind_function. destruct svos; simpl.
+ + destruct (seval_sval _ _ _ _); try congruence.
+ eapply find_funct_liveness_ok; eauto.
+ + destruct (Genv.find_symbol _ _); try congruence.
+ intros. eapply all_fundef_liveness_ok; eauto.
+Qed.
+Local Hint Resolve s_find_function_fundef: core.
+
+Lemma s_find_function_preserved f sp svos1 svos2 rs0 m0 fd
+ (LIVE: liveness_ok_function f):
+ (svident_simu f (mkctx sp rs0 m0 LIVE) svos1 svos2) ->
+ sfind_function pge ge sp svos1 rs0 m0 = Some fd ->
+ exists fd', sfind_function tpge tge sp svos2 rs0 m0 = Some fd'
+ /\ transf_fundef fd = OK fd'.
+Proof.
+ Local Hint Resolve symbols_preserved_RTL: core.
+ unfold sfind_function. intros [sv1 sv2 SIMU|]; simpl in *.
+ + rewrite !(seval_preserved ge tge) in *; eauto.
+ destruct (seval_sval _ _ _ _); try congruence.
+ erewrite <- SIMU; try congruence. clear SIMU.
+ intros; exploit functions_preserved; eauto.
+ intros (fd' & cunit & (X1 & X2 & X3)). eexists.
+ repeat split; eauto.
+ + subst. rewrite symbols_preserved. destruct (Genv.find_symbol _ _); try congruence.
+ intros; exploit function_ptr_preserved; eauto.
+Qed.
+
+Lemma sistate_simu f dupmap outframe sp st st' rs m is
+ (LIVE: liveness_ok_function f):
+ ssem_internal ge sp st rs m is ->
+ sistate_simu dupmap f outframe st st' (mkctx sp rs m LIVE)->
+ exists is',
+ ssem_internal tge sp st' rs m is' /\ istate_simu f dupmap outframe is is'.
+Proof.
+ intros SEM X; eapply X; eauto.
+Qed.
+
+Lemma seval_builtin_sval_preserved sp rs m:
+ forall bs, seval_builtin_sval ge sp bs rs m = seval_builtin_sval tge sp bs rs m.
+Proof.
+ induction bs.
+ all: try (simpl; try reflexivity; erewrite seval_preserved by eapply symbols_preserved_RTL; reflexivity).
+ all: simpl; rewrite IHbs1; rewrite IHbs2; reflexivity.
+Qed.
+
+Lemma seval_list_builtin_sval_preserved sp rs m:
+ forall lbs,
+ seval_list_builtin_sval ge sp lbs rs m = seval_list_builtin_sval tge sp lbs rs m.
+Proof.
+ induction lbs; [simpl; reflexivity|].
+ simpl. rewrite seval_builtin_sval_preserved. rewrite IHlbs.
+ reflexivity.
+Qed.
+
+Lemma ssem_final_simu dm f f' stk stk' sp st st' rs0 m0 sv sv' rs m t s
+ (LIVE: liveness_ok_function f):
+ match_function dm f f' ->
+ list_forall2 match_stackframes stk stk' ->
+ sfval_simu dm f st.(si_pc) st'.(si_pc) (mkctx sp rs0 m0 LIVE) sv sv' ->
+ ssem_final pge ge sp st.(si_pc) stk f rs0 m0 sv rs m t s ->
+ exists s', ssem_final tpge tge sp st'.(si_pc) stk' f' rs0 m0 sv' rs m t s' /\ match_states s s'.
+Proof.
+ Local Hint Resolve transf_fundef_correct: core.
+ intros FUN STK SFV. destruct SFV; intros SEM; inv SEM; simpl in *.
+ - (* Snone *)
+ exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+ intros (path & PATH).
+ eexists; split; econstructor; eauto.
+ eapply eqlive_reg_refl.
+ - (* Scall *)
+ exploit s_find_function_preserved; eauto.
+ intros (fd' & FIND & TRANSF).
+ erewrite <- function_sig_preserved; eauto.
+ exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+ intros (path & PATH).
+ eexists; split; econstructor; eauto.
+ + eapply eq_trans; try eassumption; auto.
+ + simpl. repeat (econstructor; eauto).
+ - (* Stailcall *)
+ exploit s_find_function_preserved; eauto.
+ intros (fd' & FIND & TRANSF).
+ erewrite <- function_sig_preserved; eauto.
+ eexists; split; econstructor; eauto.
+ + erewrite <- preserv_fnstacksize; eauto.
+ + eapply eq_trans; try eassumption; auto.
+ - (* Sbuiltin *)
+ pose senv_preserved_RTL as SRTL.
+ exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+ intros (path & PATH).
+ eexists; split; econstructor; eauto.
+ + eapply seval_builtin_args_preserved; eauto.
+ eapply seval_list_builtin_sval_correct; eauto.
+ rewrite H0.
+ erewrite seval_list_builtin_sval_preserved; eauto.
+ + eapply external_call_symbols_preserved; eauto.
+ + eapply eqlive_reg_refl.
+ - (* Sjumptable *)
+ exploit ptree_get_list_nth_rev; eauto. intros (p2 & LNZ & DM).
+ exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+ intros (path & PATH).
+ eexists; split; econstructor; eauto.
+ + eapply eq_trans; try eassumption; auto.
+ + eapply eqlive_reg_refl.
+ - (* Sreturn *)
+ eexists; split; econstructor; eauto.
+ erewrite <- preserv_fnstacksize; eauto.
+ - (* Sreturn bis *)
+ eexists; split; econstructor; eauto.
+ + erewrite <- preserv_fnstacksize; eauto.
+ + rewrite <- H. erewrite <- seval_preserved; eauto.
+Qed.
+
+Lemma siexec_snone_por_correct rs' is t s alive path0 i sp s0 st0 stk stk' f rs0 m0: forall
+ (SSEM2 : ssem_final pge ge sp (si_pc s0) stk f rs0 m0 Snone
+ (irs is) (imem is) t s)
+ (SIEXEC : siexec_inst i st0 = Some s0)
+ (ICHK : inst_checker (fn_path f) alive (pre_output_regs path0) i = Some tt),
+ (liveness_ok_function f) ->
+ list_forall2 match_stackframes stk stk' ->
+ eqlive_reg (fun r : Regset.elt => Regset.In r (pre_output_regs path0)) (irs is) rs' ->
+ exists s' : state,
+ ssem_final pge ge sp (si_pc s0) stk f rs0 m0 Snone rs' (imem is) t s' /\
+ eqlive_states s s'.
+Proof.
+ Local Hint Resolve eqlive_stacks_refl: core.
+ intros ? ? ? LIVE STK EQLIVE.
+ inversion SSEM2; subst; clear SSEM2.
+ eexists; split.
+ * econstructor.
+ * generalize ICHK.
+ unfold inst_checker. destruct i; simpl in *;
+ unfold exit_checker; try discriminate.
+ all:
+ try destruct Pos.eq_dec; simpl;
+ try destruct (list_mem _ _); simpl;
+ try (destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence; fail).
+ 4,5:
+ destruct (Regset.mem _ _); destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ 1,2,3,4: assert (NPC: n=(si_pc s0)).
+ all: try (inv SIEXEC; simpl; auto; fail).
+ 1,2,3,4:
+ try (destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence);
+ simpl; inversion_SOME p;
+ destruct (Regset.subset (input_regs p) (pre_output_regs path0)) eqn:SUB_PATH; try congruence;
+ intros NPATH _; econstructor; eauto;
+ try (instantiate (1:=p); rewrite <- NPC; auto; fail).
+ 1,2,3,4:
+ eapply eqlive_reg_monotonic; eauto; simpl;
+ intros; apply Regset.subset_2 in SUB_PATH;
+ unfold Regset.Subset in SUB_PATH;
+ apply SUB_PATH in H; auto.
+ assert (NPC: n0=(si_pc s0)). { inv SIEXEC; simpl; auto. }
+ inversion_SOME p.
+ 2: { destruct (Regset.subset _ _) eqn:?; try congruence. }
+ destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ 2: {
+ destruct (Regset.subset (pre_output_regs path0) alive) eqn:?; try congruence;
+ destruct ((fn_path f) ! n); intros; try discriminate.
+ all: inversion SIEXEC; simpl; auto.
+ all: destruct Regset.subset in ICHK0; try discriminate;
+ destruct Regset.subset in ICHK0; try discriminate;
+ simpl in ICHK0.
+ all: destruct PTree.get eqn:GET in ICHK0; try discriminate.
+ all: destruct Regset.subset eqn:SUBS_PATH0 in ICHK0; try discriminate.
+ all: econstructor; eauto.
+ all: eapply eqlive_reg_monotonic; eauto.
+ all: destruct (Regset.subset (input_regs p0) (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
+ all: intros; apply Regset.subset_2 in SUB_PATH.
+ all: unfold Regset.Subset in SUB_PATH; apply SUB_PATH in H; auto. }
+ intros; simpl.
+ destruct (Regset.subset (pre_output_regs path0) alive) eqn:SUB_ALIVE'; try congruence.
+ destruct (Regset.subset (input_regs p) (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
+ apply Regset.subset_2 in SUB_PATH.
+ unfold Regset.Subset in SUB_PATH.
+ econstructor; eauto. { rewrite <- NPC; eauto. }
+ eapply eqlive_reg_monotonic; eauto.
+Qed.
+
+Lemma pre_output_regs_correct f pc0 path0 stk stk' sp (st:sstate) rs0 m0 t s is rs':
+ (liveness_ok_function f) ->
+ (fn_path f) ! pc0 = Some path0 ->
+ sexec f pc0 = Some st ->
+ list_forall2 match_stackframes stk stk' ->
+ ssem_final pge ge sp (si_pc st) stk f rs0 m0 (final st) (irs is) (imem is) t s ->
+ eqlive_reg (fun r : Regset.elt => Regset.In r (pre_output_regs path0)) (irs is) rs' ->
+ exists s', ssem_final pge ge sp (si_pc st) stk f rs0 m0 (final st) rs' (imem is) t s' /\ eqlive_states s s'.
+Proof.
+ Local Hint Resolve eqlive_stacks_refl: core.
+ intros LIVE PATH0 SEXEC STK SSEM2 EQLIVE.
+ (* start decomposing path_checker *)
+ generalize (LIVE pc0 path0 PATH0).
+ unfold path_checker.
+ inversion_SOME res; intros IPCHK.
+ inversion_SOME i; intros INST ICHK.
+ exploit ipath_checker_default_succ; eauto. intros DEFSUCC.
+ (* start decomposing SEXEC *)
+ generalize SEXEC; clear SEXEC.
+ unfold sexec; rewrite PATH0.
+ inversion_SOME st0; intros SEXEC_PATH.
+ exploit siexec_path_default_succ; eauto.
+ simpl. rewrite DEFSUCC.
+ clear DEFSUCC. destruct res as [alive pc1]. simpl in *.
+ try_simplify_someHyps.
+ destruct (siexec_inst i st0) eqn: SIEXEC; try_simplify_someHyps; intros.
+ (* Snone *)
+ eapply siexec_snone_por_correct; eauto.
+ destruct i; try_simplify_someHyps; try congruence;
+ inversion SSEM2; subst; clear SSEM2; simpl in *.
+ + (* Scall *)
+ eexists; split.
+ * econstructor; eauto.
+ * econstructor; eauto.
+ econstructor; eauto.
+ (* wf *)
+ generalize ICHK.
+ unfold inst_checker; simpl in *.
+ destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ destruct (list_mem _ _); try congruence.
+ destruct (reg_sum_mem _ _); try congruence.
+ intros EXIT.
+ exploit exit_checker_eqlive_ext1; eauto.
+ intros. destruct H as [p [PATH EQLIVE']].
+ econstructor; eauto.
+ + (* Stailcall *)
+ eexists; split.
+ * econstructor; eauto.
+ * econstructor; eauto.
+ + (* Sbuiltin *)
+ eexists; split.
+ * econstructor; eauto.
+ * (* wf *)
+ generalize ICHK.
+ unfold inst_checker; simpl in *.
+ destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ destruct (list_mem _ _); try congruence.
+ intros EXIT.
+ exploit exit_checker_eqlive_builtin_res; eauto.
+ intros. destruct H as [p [PATH EQLIVE']].
+ econstructor; eauto.
+ + (* Icond *)
+ eexists; split.
+ * econstructor.
+ * generalize ICHK.
+ unfold inst_checker; simpl in *.
+ destruct Pos.eq_dec; destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ + (* Sjumptable *)
+ eexists; split.
+ * econstructor; eauto.
+ * (* wf *)
+ generalize ICHK.
+ unfold inst_checker; simpl in *.
+ destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ destruct (Regset.mem _ _); try congruence.
+ destruct (exit_list_checker _ _ _) eqn:EQL; try congruence.
+ exploit exit_list_checker_eqlive; eauto.
+ intros. destruct H as [p [PATH EQLIVE']].
+ econstructor; eauto.
+ + (* Sreturn *)
+ eexists; split.
+ * econstructor; eauto.
+ * econstructor; eauto.
+Qed.
+
+(* The main theorem on simulation of symbolic states ! *)
+Theorem ssem_sstate_simu dm f f' pc0 path0 stk stk' sp st st' rs m t s:
+ (fn_path f) ! pc0 = Some path0 ->
+ sexec f pc0 = Some st ->
+ match_function dm f f' ->
+ liveness_ok_function f ->
+ list_forall2 match_stackframes stk stk' ->
+ ssem pge ge sp st stk f rs m t s ->
+ (forall ctx: simu_proof_context f, sstate_simu dm f (pre_output_regs path0) st st' ctx) ->
+ exists s', ssem tpge tge sp st' stk' f' rs m t s' /\ match_states s s'.
+Proof.
+ intros PATH0 SEXEC MFUNC LIVE STACKS SEM SIMU.
+ destruct (SIMU (mkctx sp rs m LIVE)) as (SIMU1 & SIMU2); clear SIMU.
+ destruct SEM as [is CONT SEM|is t s' CONT SEM1 SEM2]; simpl in *.
+ - (* sem_early *)
+ exploit sistate_simu; eauto.
+ unfold istate_simu; rewrite CONT.
+ intros (is' & SEM' & (path & PATH & (CONT' & RS' & M') & PC')).
+ exists (State stk' f' sp (ipc is') (irs is') (imem is')).
+ split.
+ + eapply ssem_early; auto. congruence.
+ + rewrite M'. econstructor; eauto.
+ - (* sem_normal *)
+ exploit sistate_simu; eauto.
+ unfold istate_simu; rewrite CONT.
+ intros (is' & SEM' & (CONT' & RS' & M')).
+ exploit pre_output_regs_correct; eauto.
+ clear SEM2; intros (s0 & SEM2 & EQLIVE).
+ exploit ssem_final_simu; eauto.
+ clear SEM2; intros (s1 & SEM2 & MATCH0).
+ exploit ssem_final_equiv; eauto.
+ clear SEM2; rewrite M'; rewrite CONT' in CONT; intros (s2 & EQ & SEM2).
+ exists s2; split.
+ + eapply ssem_normal; eauto.
+ + eapply eqlive_match_states; eauto.
+ eapply match_states_equiv; eauto.
+Qed.
+
+Lemma exec_path_simulation dupmap path stk stk' f f' sp rs m pc pc' t s:
+ (fn_path f)!pc = Some path ->
+ path_step ge pge path.(psize) stk f sp rs m pc t s ->
+ list_forall2 match_stackframes stk stk' ->
+ dupmap ! pc' = Some pc ->
+ match_function dupmap f f' ->
+ liveness_ok_function f ->
+ exists path' s', (fn_path f')!pc' = Some path' /\ path_step tge tpge path'.(psize) stk' f' sp rs m pc' t s' /\ match_states s s'.
+Proof.
+ intros PATH STEP STACKS DUPPC MATCHF LIVE.
+ exploit initialize_path. { eapply dupmap_path_entry2; eauto. }
+ intros (path' & PATH').
+ exists path'.
+ exploit (sexec_correct f pc pge ge sp path stk rs m t s); eauto.
+ intros (st & SYMB & SEM).
+ exploit dupmap_correct; eauto.
+ intros (path0 & st' & PATH0 & SYMB' & SIMU).
+ rewrite PATH0 in PATH; inversion PATH; subst.
+ exploit ssem_sstate_simu; eauto.
+ intros (s0 & SEM0 & MATCH).
+ exploit (sexec_exact f'); eauto.
+ intros (s' & STEP' & EQ).
+ exists s'; intuition.
+ eapply match_states_equiv; eauto.
+Qed.
+
+Lemma step_simulation s1 t s1' s2:
+ step ge pge s1 t s1' ->
+ match_states s1 s2 ->
+ exists s2',
+ step tge tpge s2 t s2'
+ /\ match_states s1' s2'.
+Proof.
+ Local Hint Resolve eqlive_stacks_refl transf_fundef_correct: core.
+ destruct 1 as [path stack f sp rs m pc t s PATH STEP | | | ]; intros MS; inv MS.
+(* exec_path *)
+ - try_simplify_someHyps. intros.
+ exploit path_step_eqlive; eauto. (* { intros. eapply all_fundef_liveness_ok; eauto. } *)
+ clear STEP EQUIV rs; intros (s2 & STEP & EQLIVE).
+ exploit exec_path_simulation; eauto.
+ clear STEP; intros (path' & s' & PATH' & STEP' & MATCH').
+ exists s'; split.
+ + eapply exec_path; eauto.
+ + eapply eqlive_match_states; eauto.
+(* exec_function_internal *)
+ - inv LIVE.
+ exploit initialize_path. { eapply (fn_entry_point_wf f). }
+ destruct 1 as (path & PATH).
+ inversion TRANSF as [f0 xf tf MATCHF|]; subst. eexists. split.
+ + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+ + erewrite preserv_fnparams; eauto.
+ econstructor; eauto.
+ { apply preserv_entrypoint; auto. }
+ { apply eqlive_reg_refl. }
+(* exec_function_external *)
+ - inversion TRANSF as [|]; subst. eexists. split.
+ + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved_RTL.
+ + constructor. assumption.
+(* exec_return *)
+ - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+ + constructor.
+ + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+ forward_simulation (semantics prog) (semantics tprog).
+Proof.
+ eapply forward_simulation_step with match_states.
+ - eapply senv_preserved.
+ - eapply transf_initial_states.
+ - intros; eapply transf_final_states; eauto.
+ - intros; eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v
index b29a7759..97187ca8 100644
--- a/scheduling/RTLpath.v
+++ b/scheduling/RTLpath.v
@@ -54,7 +54,9 @@ Definition default_succ (i: instruction): option node :=
Definition early_exit (i: instruction): option node := (* FIXME: for jumptable, replace [node] by [list node] *)
match i with
- | Icond cond args ifso ifnot _ => Some ifso
+ | Icond cond args ifso ifnot _ =>
+ if Pos.eq_dec ifso ifnot then None
+ else Some ifso
| _ => None
end.
@@ -170,7 +172,10 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem)
Some (mk_istate true pc' rs m')
| Icond cond args ifso ifnot _ =>
SOME b <- eval_condition cond rs##args m IN
- Some (mk_istate (negb b) (if b then ifso else ifnot) rs m)
+ if Pos.eq_dec ifso ifnot then
+ Some (mk_istate true (if b then ifso else ifnot) rs m)
+ else
+ Some (mk_istate (negb b) (if b then ifso else ifnot) rs m)
| _ => None (* TODO jumptable ? *)
end.
@@ -398,8 +403,8 @@ Lemma istep_correct ge i stack (f:function) sp rs m st :
forall pc, (fn_code f)!pc = Some i ->
RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
Proof.
- destruct i; simpl; try congruence; simplify_SOME x.
- 1-3: explore_destruct; simplify_SOME x.
+ destruct i; simpl; try congruence; simplify_SOME x;
+ try (explore_destruct; simplify_SOME x).
Qed.
Local Hint Resolve star_refl: core.
@@ -648,6 +653,7 @@ Lemma istep_normal_exit ge i sp rs m st:
Proof.
destruct i; simpl; try congruence; simplify_SOME x.
all: explore_destruct; simplify_SOME x.
+ discriminate.
Qed.
Lemma isteps_normal_exit ge path f sp: forall rs m pc st,
@@ -728,7 +734,7 @@ Qed.
(* FIXME - add prediction *)
Inductive is_early_exit pc: instruction -> Prop :=
| Icond_early_exit cond args ifnot predict:
- is_early_exit pc (Icond cond args pc ifnot predict)
+ pc <> ifnot -> is_early_exit pc (Icond cond args pc ifnot predict)
. (* TODO add jumptable here ? *)
Lemma istep_early_exit ge i sp rs m st :
@@ -826,8 +832,7 @@ Lemma istep_complete t i stack f sp rs m pc s':
t = E0 /\ exists st, istep ge i sp rs m = Some st /\ s'=(State stack f sp st.(ipc) st.(irs) st.(imem)).
Proof.
intros H X; inversion H; simpl; subst; try rewrite X in * |-; clear X; simplify_someHyps; try congruence;
- (split; auto); simplify_someHyps; eexists; split; simplify_someHyps; eauto.
- all: explore_destruct; simplify_SOME a.
+ (split; auto); simplify_someHyps; intros; explore_destruct; eexists; split; simplify_someHyps; eauto.
Qed.
Lemma stuttering path idx stack f sp rs m pc st t s1':
@@ -958,8 +963,10 @@ Proof.
unfold nth_default_succ_inst in Hi'.
erewrite isteps_normal_exit in Hi'; eauto.
clear pc' Hpc' STEP0 PSTEP0 BOUND0; try_simplify_someHyps; intros.
- destruct EARLY_EXIT as [cond args ifnot]; simpl in ENTRY;
- destruct (initialize_path (*fn_code f*) (fn_path f) pc0); eauto.
+ destruct EARLY_EXIT as [cond args ifnot]; simpl in ENTRY.
+ destruct Pos.eq_dec in ENTRY.
+ - contradiction.
+ - destruct (initialize_path (*fn_code f*) (fn_path f) pc0); eauto.
}
destruct HPATH0 as (path1 & Hpath1).
destruct (path1.(psize)) as [|ps] eqn:Hpath1size.
diff --git a/scheduling/RTLpathLivegen.v b/scheduling/RTLpathLivegen.v
index 9f646ad0..00ec3334 100644
--- a/scheduling/RTLpathLivegen.v
+++ b/scheduling/RTLpathLivegen.v
@@ -61,7 +61,8 @@ Definition iinst_checker (pm: path_map) (alive: Regset.t) (i: instruction): opti
Some (alive, pc')
| Icond cond args ifso ifnot _ =>
ASSERT list_mem args alive IN
- exit_checker pm alive ifso (alive, ifnot)
+ if Pos.eq_dec ifso ifnot then Some (alive, ifnot)
+ else exit_checker pm alive ifso (alive, ifnot)
| _ => None
end.
@@ -72,7 +73,7 @@ Lemma iinst_checker_path_entry (pm: path_map) (alive: Regset.t) (i: instruction)
iinst_checker pm alive i = Some res ->
early_exit i = Some pc -> path_entry pm pc.
Proof.
- destruct i; simpl; try_simplify_someHyps; subst.
+ destruct i; simpl; try destruct Pos.eq_dec; try_simplify_someHyps; subst.
inversion_ASSERT; try_simplify_someHyps.
Qed.
@@ -82,7 +83,7 @@ Lemma iinst_checker_default_succ (pm: path_map) (alive: Regset.t) (i: instructio
default_succ i = Some pc.
Proof.
destruct i; simpl; try_simplify_someHyps; subst;
- repeat (inversion_ASSERT); try_simplify_someHyps.
+ repeat (inversion_ASSERT); try destruct Pos.eq_dec; try_simplify_someHyps.
intros; exploit exit_checker_res; eauto.
intros; subst. simpl; auto.
Qed.
@@ -216,7 +217,8 @@ Proof.
destruct (iinst_checker pm alive i) as [[alive0 pc0]|] eqn: CHECK1; simpl.
- simpl; intros CHECK2 PC. eapply wf_last_node; eauto.
destruct i; simpl in * |- *; intuition (subst; eauto);
- try (generalize CHECK2 CHECK1; clear CHECK1 CHECK2; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+ try (generalize CHECK2 CHECK1; clear CHECK1 CHECK2; try (inversion_SOME path);
+ repeat inversion_ASSERT; try destruct Pos.eq_dec; try_simplify_someHyps).
intros PC CHECK1 CHECK2.
intros; exploit exit_checker_res; eauto.
intros X; inversion X. intros; subst; eauto.
diff --git a/scheduling/RTLpathLivegenproof.v b/scheduling/RTLpathLivegenproof.v
index b02400bf..67de7275 100644
--- a/scheduling/RTLpathLivegenproof.v
+++ b/scheduling/RTLpathLivegenproof.v
@@ -308,10 +308,10 @@ Proof.
inversion_ASSERT.
inversion_SOME b. intros EVAL.
intros ARGS; erewrite <- eqlive_reg_listmem; eauto.
- try_simplify_someHyps.
- repeat (econstructor; simpl; eauto).
- exploit exit_checker_res; eauto.
- intro; subst; simpl. auto.
+ destruct Pos.eq_dec; try_simplify_someHyps;
+ repeat (econstructor; simpl; eauto);
+ exploit exit_checker_res; eauto;
+ intro; subst; simpl; auto.
Qed.
Lemma iinst_checker_istep_continue ge sp pm alive i res rs m st:
@@ -348,14 +348,14 @@ Proof.
intros EQLIVE.
set (tmp := istep ge i sp rs2).
destruct i; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b); try_simplify_someHyps; try congruence.
- 1-3: explore_destruct; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b); try_simplify_someHyps; try congruence.
+ 1-3: explore_destruct; simpl; try destruct Pos.eq_dec; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b); try_simplify_someHyps; try congruence.
(* Icond *)
unfold tmp; clear tmp; simpl.
intros EVAL; erewrite <- eqlive_reg_listmem; eauto.
- try_simplify_someHyps.
- destruct b eqn:EQb; simpl in * |-; try congruence.
- intros; exploit exit_checker_eqlive; eauto.
- intros (path & PATH & EQLIVE2).
+ destruct Pos.eq_dec; try_simplify_someHyps;
+ destruct b eqn:EQb; simpl in * |-; try congruence;
+ intros; exploit exit_checker_eqlive; eauto;
+ intros (path & PATH & EQLIVE2);
repeat (econstructor; simpl; eauto).
Qed.
diff --git a/scheduling/RTLpathSE_impl.v b/scheduling/RTLpathSE_impl.v
index e21d7cd1..08a62202 100644
--- a/scheduling/RTLpathSE_impl.v
+++ b/scheduling/RTLpathSE_impl.v
@@ -48,6 +48,9 @@ Definition hsmem_get_hid (hsm: hsmem): hashcode :=
| HSstore _ _ _ _ _ hid => hid
end.
+(* TODO: Redundant with the record's default accessor *)
+Definition hscond_get_hid (hsc: hscond): hashcode := hsc.(hscond_hid).
+
Definition hsval_set_hid (hsv: hsval) (hid: hashcode): hsval :=
match hsv with
| HSinput r _ => HSinput r hid
@@ -67,6 +70,8 @@ Definition hsmem_set_hid (hsm: hsmem) (hid: hashcode): hsmem :=
| HSstore hsm chunk addr lhsv srce _ => HSstore hsm chunk addr lhsv srce hid
end.
+Definition hscond_set_hid (hsc: hscond) (hid: hashcode): hscond :=
+ {| cond := hsc.(cond); lhsv := hsc.(lhsv); hscond_hid := hid |}.
Lemma hsval_set_hid_correct x y ge sp rs0 m0:
hsval_set_hid x unknown_hid = hsval_set_hid y unknown_hid ->
@@ -92,6 +97,14 @@ Proof.
Qed.
Local Hint Resolve hsmem_set_hid_correct: core.
+Lemma hscond_set_hid_correct x y ge sp rs0 m0:
+ hscond_set_hid x unknown_hid = hscond_set_hid y unknown_hid ->
+ seval_hscond ge sp x rs0 m0 = seval_hscond ge sp y rs0 m0.
+Proof.
+ destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve hscond_set_hid_correct: core.
+
(** Now, we build the hash-Cons value from a "hash_eq".
Informal specification:
@@ -180,9 +193,16 @@ Global Opaque hsmem_hash_eq.
Local Hint Resolve hsmem_hash_eq_correct: wlp.
+Definition hscond_hash_eq (hsc1 hsc2: hscond): ?? bool :=
+ DO b1 <~ struct_eq hsc1.(cond) hsc2.(cond);;
+ DO b2 <~ phys_eq hsc1.(lhsv) hsc2.(lhsv);;
+ if b1 && b2 then RET true else RET false
+ .
+
Definition hSVAL: hashP hsval := {| hash_eq := hsval_hash_eq; get_hid:=hsval_get_hid; set_hid:=hsval_set_hid |}.
Definition hLSVAL: hashP list_hsval := {| hash_eq := list_hsval_hash_eq; get_hid:= list_hsval_get_hid; set_hid:= list_hsval_set_hid |}.
Definition hSMEM: hashP hsmem := {| hash_eq := hsmem_hash_eq; get_hid:= hsmem_get_hid; set_hid:= hsmem_set_hid |}.
+Definition hSCOND: hashP hscond := {| hash_eq := hscond_hash_eq; get_hid := hscond_get_hid; set_hid := hscond_set_hid |}.
Program Definition mk_hash_params: Dict.hash_params hsval :=
{|
@@ -195,6 +215,17 @@ Obligation 1.
wlp_simplify.
Qed.
+Program Definition mk_hash_params_cond: Dict.hash_params hscond :=
+ {|
+ Dict.test_eq := phys_eq;
+ Dict.hashing := fun (ht: hscond) => RET (hscond_get_hid ht);
+ Dict.log := fun hc =>
+ DO hc_name <~ string_of_hashcode (hscond_get_hid hc);;
+ println ("unexpected undef behavior of hashcode:" +; (CamlStr hc_name)) |}.
+Obligation 1.
+ wlp_simplify.
+Qed.
+
(** ** various auxiliary (trivial lemmas) *)
Lemma hsilocal_refines_sreg ge sp rs0 m0 hst st:
hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst -> forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (si_sreg st r) rs0 m0.
@@ -257,11 +288,17 @@ Hypothesis hC_hsmem_correct: forall hm,
WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
seval_hsmem ge sp (hdata hm) rs0 m0 = seval_hsmem ge sp hm' rs0 m0.
+Variable hC_hscond: hashinfo hscond -> ?? hscond.
+Hypothesis hC_hscond_correct: forall hc,
+ WHEN hC_hscond hc ~> hc' THEN forall ge sp rs0 m0,
+ seval_hscond ge sp (hdata hc) rs0 m0 = seval_hscond ge sp hc' rs0 m0.
+
(* First, we wrap constructors for hashed values !*)
Definition reg_hcode := 1.
Definition op_hcode := 2.
Definition load_hcode := 3.
+Definition cond_hcode := 4.
Definition hSinput_hcodes (r: reg) :=
DO hc <~ hash reg_hcode;;
@@ -282,6 +319,17 @@ Qed.
Global Opaque hSinput.
Local Hint Resolve hSinput_correct: wlp.
+Definition hScond_hcodes (cond : condition) (lhsv: list_hsval) :=
+ DO hc <~ hash cond_hcode;;
+ DO hv <~ hash cond;;
+ RET [hc;hv;list_hsval_get_hid lhsv].
+
+Definition hScond (cond : condition) (lhsv : list_hsval) : ?? hscond :=
+ DO hv <~ hScond_hcodes cond lhsv;;
+ hC_hscond {| hdata := {| cond := cond; lhsv := lhsv; hscond_hid := unknown_hid |}; hcodes := hv |}.
+
+(* TODO?Scond_correct *)
+
Definition hSop_hcodes (op:operation) (lhsv: list_hsval) :=
DO hc <~ hash op_hcode;;
DO hv <~ hash op;;
@@ -500,10 +548,31 @@ Qed.
Global Opaque fsval_list_proj.
Local Hint Resolve fsval_list_proj_correct: wlp.
+Definition fscond_proj (hsc: hscond) : ?? hscond :=
+ DO b <~ phys_eq hsc.(hscond_hid) unknown_hid;;
+ if b
+ then (* was not yet really hash-consed *)
+ DO args' <~ fsval_list_proj hsc.(lhsv);;
+ hScond hsc.(cond) args'
+ else (* already hash-consed *)
+ RET hsc
+ .
+
+Lemma fscond_proj_correct hsc:
+ WHEN fscond_proj hsc ~> hsc' THEN forall ge sp rs0 m0,
+ seval_hscond ge sp hsc rs0 m0 = seval_hscond ge sp hsc' rs0 m0.
+Proof.
+ wlp_simplify.
+ rewrite <- H0.
+ unfold seval_condition.
+ destruct hsc; simpl in *.
+ rewrite H. reflexivity.
+Qed.
(** ** Assignment of memory *)
Definition hslocal_set_smem (hst:hsistate_local) hm :=
{| hsi_smem := hm;
+ hsi_ok_lscond := hsi_ok_lscond hst;
hsi_ok_lsval := hsi_ok_lsval hst;
hsi_sreg:= hsi_sreg hst
|}.
@@ -780,6 +849,7 @@ Definition hslocal_set_sreg (hst: hsistate_local) (r: reg) (rsv: root_sval) (lr:
else RET (hsi_ok_lsval hst));;
DO simp <~ simplify rsv lr hst;;
RET {| hsi_smem := hst;
+ hsi_ok_lscond := hsi_ok_lscond hst;
hsi_ok_lsval := ok_lhsv;
hsi_sreg := red_PTree_set r simp (hsi_sreg hst) |}.
@@ -791,7 +861,10 @@ Proof.
wlp_simplify.
+ (* may_trap ~> true *)
assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
- hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := exta :: hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta0 hst |}).
+ hsok_local ge sp rs0 m0 {| hsi_smem := hst
+ ; hsi_ok_lscond := hsi_ok_lscond hst
+ ; hsi_ok_lsval := exta :: hsi_ok_lsval hst
+ ; hsi_sreg := red_PTree_set r exta0 hst |}).
{ rewrite sok_local_set_sreg; generalize REF.
intros (OKeq & MEM & REG & MVALID); rewrite OKeq; clear OKeq.
unfold hsok_local; simpl; intuition (subst; eauto);
@@ -803,7 +876,10 @@ Proof.
- generalize REF; intros (OKEQ & _). rewrite OKEQ in * |-; erewrite red_PTree_set_refines; eauto.
+ (* may_trap ~> false *)
assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
- hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta hst |}).
+ hsok_local ge sp rs0 m0 {| hsi_smem := hst
+ ; hsi_ok_lscond := hsi_ok_lscond hst
+ ; hsi_ok_lsval := hsi_ok_lsval hst
+ ; hsi_sreg := red_PTree_set r exta hst |}).
{
rewrite sok_local_set_sreg; generalize REF.
intros (OKeq & MEM & REG & MVALID); rewrite OKeq.
@@ -865,6 +941,13 @@ Qed.
Local Hint Resolve cbranch_expanse_correct: wlp.
Global Opaque cbranch_expanse.
+Definition hslocal_eval_cond (hst:hsistate_local) (cond : condition) (args : list_hsval) : ?? hsistate_local :=
+ DO hsc <~ fscond_proj {| cond := cond; lhsv := args; hscond_hid := unknown_hid |};;
+ RET {| hsi_smem := hst.(hsi_smem)
+ ; hsi_ok_lscond := hsc :: hst.(hsi_ok_lscond)
+ ; hsi_ok_lsval := hst.(hsi_ok_lsval)
+ ; hsi_sreg := hst.(hsi_sreg) |}.
+
Definition hsiexec_inst (i: instruction) (hst: hsistate): ?? (option hsistate) :=
match i with
| Inop pc' =>
@@ -882,8 +965,12 @@ Definition hsiexec_inst (i: instruction) (hst: hsistate): ?? (option hsistate) :
let prev := hst.(hsi_local) in
DO res <~ cbranch_expanse prev cond args;;
let (cond, vargs) := res in
- let ex := {| hsi_cond:=cond; hsi_scondargs:=vargs; hsi_elocal := prev; hsi_ifso := ifso |} in
- RET (Some {| hsi_pc := ifnot; hsi_exits := ex::hst.(hsi_exits); hsi_local := prev |})
+ DO next <~ hslocal_eval_cond prev cond vargs;;
+ if Pos.eq_dec ifso ifnot then
+ RET (Some (hsist_set_local hst ifnot next))
+ else
+ let ex := {| hsi_cond:=cond; hsi_scondargs:=vargs; hsi_elocal := next; hsi_ifso := ifso |} in
+ RET (Some {| hsi_pc := ifnot; hsi_exits := ex::hst.(hsi_exits); hsi_local := next |})
| _ => RET None
end.
@@ -959,6 +1046,185 @@ Proof.
econstructor; simpl; auto; constructor; auto.
Qed.
+Lemma sok_local_eval_cond_simp ge sp rs0 m0 st cond args:
+ sok_local ge sp rs0 m0 (slocal_eval_cond st cond args) ->
+ sok_local ge sp rs0 m0 st.
+Proof.
+ unfold sok_local; intuition.
+ unfold si_pre in *; simpl in *; intuition.
+Qed.
+
+Lemma hsok_local_eval_cond_simp ge sp rs0 m0 st cond args:
+ WHEN hslocal_eval_cond st cond args ~> st' THEN
+ hsok_local ge sp rs0 m0 st' ->
+ hsok_local ge sp rs0 m0 st.
+Proof.
+ wlp_simplify.
+ { constructor; eauto.
+ - intros hsv.
+ inversion H2 as (? & ? & ?).
+ intros. eauto.
+ - split.
+ + intros hsc INS.
+ inversion H2 as (? & ? & ?).
+ unfold hslocal_eval_cond in *; simpl in *.
+ eauto.
+ + inversion H2 as (? & ? & ?). eauto. }
+ { inversion H0 as (? & ? & ?); simpl in *.
+ constructor.
+ - intros hsv'. intros. apply H1. assumption.
+ - constructor.
+ + intros hsc'. intros. apply H2. right; assumption.
+ + assumption. }
+Qed.
+
+Lemma sok_local_eval_cond ge sp rs0 m0 st cond args:
+ sok_local ge sp rs0 m0 (slocal_eval_cond st cond args)
+ <-> (sok_local ge sp rs0 m0 st /\ seval_condition ge sp cond args st.(si_smem) rs0 m0 <> None).
+Proof.
+ split.
+ - (* -> *)
+ intros SOK_LOCAL'.
+ split.
+ + apply sok_local_eval_cond_simp in SOK_LOCAL'; assumption.
+ + inversion SOK_LOCAL' as (PRE & ? & ?).
+ unfold slocal_eval_cond in PRE; simpl in PRE.
+ destruct PRE; auto.
+ - (* <- *)
+ intros ((PRE & SMEM & SVAL0) & SVAL).
+ unfold slocal_eval_cond; simpl.
+ repeat (split; try tauto; eauto).
+Qed.
+
+Lemma hsok_local_eval_cond ge sp rs0 m0 st cond args:
+ WHEN hslocal_eval_cond st cond args ~> hst' THEN
+ hsok_local ge sp rs0 m0 hst'
+ <-> (hsok_local ge sp rs0 m0 st /\ seval_hscond ge sp {| cond := cond; lhsv := args; hscond_hid := unknown_hid |} rs0 m0 <> None).
+Proof.
+ wlp_simplify.
+ - inversion H2 as (A & B & C).
+ constructor; auto.
+ constructor; auto.
+ simpl in B.
+ intros hsc HSC_IN.
+ apply B. right. assumption.
+ - inversion H2 as (A & B & C); simpl in *.
+ unfold seval_condition in *. rewrite H in *.
+ rewrite H0 in H3.
+ try_simplify_someHyps.
+ apply B. left; reflexivity.
+ - inversion H3 as (A & B & C).
+ constructor; auto.
+ constructor; auto.
+ simpl in B.
+ intros hsc HSC_IN.
+ destruct HSC_IN.
+ + subst. rewrite <- H0.
+ unfold seval_condition in *. rewrite <- H. assumption.
+ + apply B. assumption.
+ - inversion H0 as (A & B & C); simpl in *.
+ constructor; auto.
+ - inversion H0 as (A & B & C); simpl in *.
+ remember {| cond := cond; lhsv := args; hscond_hid := unknown_hid |} as hsc.
+ specialize (B hsc).
+ subst. apply B in H1.
+ + contradiction.
+ + left; reflexivity.
+ - inversion H1 as (A & B & C); constructor; auto.
+ simpl.
+ constructor.
+ + intros hsc. intros.
+ destruct H0.
+ * subst. simpl in *. assumption.
+ * apply B. assumption.
+ + assumption.
+Qed.
+
+Lemma hslocal_eval_cond_correct hst cond args cond' args':
+ WHEN hslocal_eval_cond hst cond args ~> hst' THEN
+ forall ge sp rs0 m0 st
+ (REF: hsilocal_refines ge sp rs0 m0 hst st)
+ (EQUIV_COND: hsok_local ge sp rs0 m0 hst ->
+ seval_condition ge sp cond (hsval_list_proj args) st.(si_smem) rs0 m0 =
+ seval_condition ge sp cond' args' st.(si_smem) rs0 m0),
+ (hsilocal_refines ge sp rs0 m0 hst' (slocal_eval_cond st cond' args')).
+Proof. (*
+ intros; inversion REF as (SOK_HSOK_IFF & SMEM & SREG & MEMVALID).
+ inversion SOK_HSOK_IFF as (SOK_IMP_HSOK & HSOK_IMP_SOK).
+ constructor.
+ - (* sok_local <-> hsok_loal *)
+ split.
+ + (* sok_local -> hsok_local *)
+ intros SOK_LOCAL'.
+ assert (SOK_LOCAL_ := SOK_LOCAL').
+ apply sok_local_eval_cond in SOK_LOCAL_.
+ inversion SOK_LOCAL_ as (SOK_LOCAL & SCOND); clear SOK_LOCAL_.
+ assert (HSOK_LOCAL := SOK_IMP_HSOK).
+ specialize (HSOK_LOCAL SOK_LOCAL); clear SOK_IMP_HSOK.
+ specialize (EQUIV_COND HSOK_LOCAL).
+
+ constructor; try split.
+ * unfold hslocal_eval_cond; simpl.
+ inversion HSOK_LOCAL; auto.
+ * unfold hslocal_eval_cond; simpl.
+ remember ({| cond := cond; lhsv := args |}) as hsc eqn:HSC.
+ intros hsc'.
+ intros EQ_OR_IN; destruct EQ_OR_IN; auto.
+ { subst; simpl.
+ generalize SCOND. rewrite <- EQUIV_COND.
+ unfold seval_condition; simpl.
+ inversion_SOME args'. intros ARGS.
+ inversion_SOME m'. intros SMEM'.
+ intros ?.
+ rewrite ARGS.
+ erewrite cond_valid_pointer_eq; eauto.
+ intros b z.
+ symmetry; eapply MEMVALID. assumption. }
+ { inversion HSOK_LOCAL as (? & ? & ?); auto. }
+ * unfold hslocal_eval_cond; simpl.
+ inversion HSOK_LOCAL as (? & ? & ?); auto.
+ + (* hsok_local -> sok_local *)
+ intros HSOK_LOCAL'.
+ assert (_HSOK_LOCAL := HSOK_LOCAL').
+ apply hsok_local_eval_cond in _HSOK_LOCAL.
+ inversion _HSOK_LOCAL as (HSOK_LOCAL & HSCOND); clear _HSOK_LOCAL.
+ assert (SOK_LOCAL := HSOK_IMP_SOK).
+ specialize (SOK_LOCAL HSOK_LOCAL); clear HSOK_IMP_SOK.
+ specialize (EQUIV_COND HSOK_LOCAL).
+ constructor; try split.
+ * simpl in HSCOND.
+ generalize HSCOND.
+ rewrite <- EQUIV_COND.
+ unfold seval_condition; simpl.
+ inversion_SOME args'. intros ARGS ?.
+ rewrite ARGS.
+ rewrite <- SMEM; try assumption.
+ inversion HSOK_LOCAL as (? & ? & MEMOK).
+ inversion_SOME m'. intros SMEM'.
+ erewrite cond_valid_pointer_eq; eauto.
+ intros b z.
+ eapply MEMVALID. rewrite <- SMEM; assumption.
+ * inversion SOK_LOCAL; auto.
+ * unfold slocal_eval_cond; simpl.
+ inversion SOK_LOCAL as (? & ? & ?); auto.
+ * unfold slocal_eval_cond; simpl.
+ inversion SOK_LOCAL as (? & ? & ?); auto.
+ - repeat split.
+ + (* hsok_local -> seval_smem _ = seval_sem _ *)
+ intros HSOK_LOCAL'.
+ unfold hslocal_eval_cond, slocal_eval_cond; simpl.
+ apply hsok_local_eval_cond_simp in HSOK_LOCAL'.
+ apply SMEM; assumption.
+ + (* hsok_local -> hsi_sreg_eval _ = seval_sval _ *)
+ intros HSOK_LOCAL'.
+ unfold hslocal_eval_cond, slocal_eval_cond; simpl.
+ apply hsok_local_eval_cond_simp in HSOK_LOCAL'.
+ apply SREG; assumption.
+ + (* seval_smem _ = Some _ -> Mem.valid_pionter _ = Mem.valid_pointer _ *)
+ unfold slocal_eval_cond; simpl.
+ apply MEMVALID. *)
+Admitted.
+
Lemma hsiexec_inst_correct i hst:
WHEN hsiexec_inst i hst ~> o THEN forall hst' st,
o = Some hst' ->
@@ -980,20 +1246,34 @@ Proof.
wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
eapply hsist_set_local_correct_dyn; eauto.
unfold sok_local; simpl; intuition.
- - (* refines_stat Icond *)
- wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
- + unfold hsistate_refines_stat, hsiexits_refines_stat in *; simpl; intuition.
- constructor; simpl; eauto.
- constructor.
- + destruct REF as (EXREF & LREF & NEST).
- split.
- * constructor; simpl; auto.
- constructor; simpl; auto.
- intros; erewrite seval_condition_refines; eauto.
- * split; simpl; auto.
- destruct NEST as [|st0 se lse TOP NEST];
- econstructor; simpl; auto; constructor; auto.
-Qed.
+ - wlp_simplify; try_simplify_someHyps.
+ + (* Useless Icond *)
+ destruct Pos.eq_dec; try contradiction.
+ eexists; intuition eauto.
+ inversion REF as (REF_DYN & REF_LOC & NEST).
+ inversion REF_LOC as (SOK_HSOK_IFF & SMEM_IF_HSOK & SREG_IF_HSOK & MEMVALID_IF_HSOK).
+ eapply hsist_set_local_correct_dyn; eauto.
+ * specialize (H ge sp rs0 m0 st.(si_local) REF_LOC).
+ eapply hslocal_eval_cond_correct; eauto.
+ admit.
+ * generalize sok_local_eval_cond_simp; simpl; eauto.
+ + destruct Pos.eq_dec; try contradiction.
+ eexists; intuition eauto.
+ * unfold hsistate_refines_stat, hsiexits_refines_stat in *; simpl; intuition.
+ constructor; simpl; eauto.
+ constructor.
+ * destruct REF as (EXREF & LREF & NEST).
+ split.
+ { constructor; simpl; auto. admit.
+ (* constructor; simpl; auto.
+ intros; erewrite seval_condition_refines; eauto. *) }
+ { simpl. intuition eauto.
+ - specialize (H ge sp rs0 m0 st.(si_local) LREF).
+ admit.
+ (* apply hslocal_eval_cond_correct; auto. *)
+ - constructor; auto. admit. (*
+ apply sok_local_eval_cond_simp. *) }
+Admitted.
Global Opaque hsiexec_inst.
Local Hint Resolve hsiexec_inst_correct: wlp.
@@ -1147,7 +1427,10 @@ Local Hint Resolve hsexec_final_correct: wlp.
Definition init_hsistate_local (_:unit): ?? hsistate_local
:= DO hm <~ hSinit ();;
- RET {| hsi_smem := hm; hsi_ok_lsval := nil; hsi_sreg := PTree.empty hsval |}.
+ RET {| hsi_smem := hm
+ ; hsi_ok_lscond := nil
+ ; hsi_ok_lsval := nil
+ ; hsi_sreg := PTree.empty hsval |}.
Lemma init_hsistate_local_correct:
WHEN init_hsistate_local () ~> hsl THEN forall ge sp rs0 m0,
@@ -1224,10 +1507,11 @@ End CanonBuilding.
Theorem hsexec_correct
(hC_hsval : hashinfo hsval -> ?? hsval)
(hC_list_hsval : hashinfo list_hsval -> ?? list_hsval)
+ (hC_hscond : hashinfo hscond -> ?? hscond)
(hC_hsmem : hashinfo hsmem -> ?? hsmem)
(f : function)
(pc : node):
- WHEN hsexec hC_hsval hC_list_hsval hC_hsmem f pc ~> hst THEN forall
+ WHEN hsexec hC_hsval hC_list_hsval hC_hsmem hC_hscond f pc ~> hst THEN forall
(hC_hsval_correct: forall hs,
WHEN hC_hsval hs ~> hs' THEN forall ge sp rs0 m0,
seval_sval ge sp (hsval_proj (hdata hs)) rs0 m0 =
@@ -1236,6 +1520,10 @@ Theorem hsexec_correct
WHEN hC_list_hsval lh ~> lh' THEN forall ge sp rs0 m0,
seval_list_sval ge sp (hsval_list_proj (hdata lh)) rs0 m0 =
seval_list_sval ge sp (hsval_list_proj lh') rs0 m0)
+ (hC_hscond_correct: forall hc,
+ WHEN hC_hscond hc ~> hc' THEN forall ge sp rs0 m0,
+ seval_hscond ge sp (hdata hc) rs0 m0 =
+ seval_hscond ge sp hc' rs0 m0)
(hC_hsmem_correct: forall hm,
WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
seval_smem ge sp (hsmem_proj (hdata hm)) rs0 m0 =
@@ -1322,6 +1610,7 @@ Definition hsilocal_frame_simu_check frame hst1 hst2 : ?? unit :=
DEBUG("? frame check");;
phys_check (hsi_smem hst2) (hsi_smem hst1) "hsilocal_frame_simu_check: hsi_smem sets aren't equiv";;
PTree_frame_eq_check frame (hsi_sreg hst1) (hsi_sreg hst2);;
+ Sets.assert_list_incl mk_hash_params_cond (hsi_ok_lscond hst2) (hsi_ok_lscond hst1);;
Sets.assert_list_incl mk_hash_params (hsi_ok_lsval hst2) (hsi_ok_lsval hst1);;
DEBUG("=> frame check: OK").
@@ -1556,9 +1845,10 @@ Definition simu_check_single (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpa
let (pc2, pc1) := m in
(* creating the hash-consing tables *)
DO hC_sval <~ hCons hSVAL;;
+ DO hC_scond <~ hCons hSCOND;;
DO hC_list_hsval <~ hCons hLSVAL;;
DO hC_hsmem <~ hCons hSMEM;;
- let hsexec := hsexec hC_sval.(hC) hC_list_hsval.(hC) hC_hsmem.(hC) in
+ let hsexec := hsexec hC_sval.(hC) hC_list_hsval.(hC) hC_hsmem.(hC) hC_scond.(hC) in
(* performing the hash-consed executions *)
XDEBUG pc1 (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("entry-point of input superblock: " +; name_pc)%string);;
DO hst1 <~ hsexec f pc1;;
@@ -1574,13 +1864,36 @@ Lemma simu_check_single_correct dm tf f pc1 pc2:
sexec_simu dm f tf pc1 pc2.
Proof.
unfold sexec_simu; wlp_simplify.
- exploit H2; clear H2. 1-3: wlp_simplify.
- intros (st2 & SEXEC2 & REF2). try_simplify_someHyps.
- exploit H3; clear H3. 1-3: wlp_simplify.
- intros (st3 & SEXEC3 & REF3). try_simplify_someHyps.
- eexists. eexists. split; eauto. split; eauto.
- intros ctx.
- eapply hsstate_simu_spec_correct; eauto.
+ exploit H3. 1-5: wlp_simplify.
+ - simpl.
+ exploit H0; eauto.
+ wlp_simplify.
+ + assert (exta7 = true). { destruct exta7; auto. }
+ subst.
+ assert (exta8 = true). { destruct exta8; auto. } subst.
+ unfold hscond_set_hid.
+ rewrite H8.
+ assert (lhsv x = lhsv y). { apply H9. reflexivity. }
+ rewrite H12.
+ reflexivity.
+ + discriminate.
+ - intros (st2 & SEXEC2 & REF2). try_simplify_someHyps.
+ exploit H4; clear H4. 1-5: wlp_simplify.
+ + simpl.
+ exploit H0; eauto.
+ wlp_simplify. 2: { discriminate. }
+ assert (exta7 = true). { destruct exta7; auto. }
+ subst.
+ assert (exta8 = true). { destruct exta8; auto. } subst.
+ unfold hscond_set_hid.
+ rewrite H4.
+ assert (lhsv x = lhsv y). { apply H5. reflexivity. }
+ rewrite H9.
+ reflexivity.
+ + intros (st3 & SEXEC3 & REF3). try_simplify_someHyps.
+ eexists. eexists. split; eauto. split; eauto.
+ intros ctx.
+ eapply hsstate_simu_spec_correct; eauto.
Qed.
Global Opaque simu_check_single.
Global Hint Resolve simu_check_single_correct: wlp.
diff --git a/scheduling/RTLpathSE_simu_specs.v b/scheduling/RTLpathSE_simu_specs.v
index 5051d805..c93c1343 100644
--- a/scheduling/RTLpathSE_simu_specs.v
+++ b/scheduling/RTLpathSE_simu_specs.v
@@ -74,7 +74,11 @@ Scheme hsval_mut := Induction for hsval Sort Prop
with list_hsval_mut := Induction for list_hsval Sort Prop
with hsmem_mut := Induction for hsmem Sort Prop.
-
+Record hscond :=
+ { cond : condition
+ ; lhsv : list_hsval
+ ; hscond_hid : hashcode
+ (** NB: Like HSop, does also not depend on the memory! *) }.
(** Symbolic final value -- from hash-consed values
It does not seem useful to hash-consed these final values (because they are final).
@@ -116,6 +120,8 @@ Notation "'seval_list_hsval' ge sp lhv" := (seval_list_sval ge sp (hsval_list_pr
(only parsing, at level 0, ge at next level, sp at next level, lhv at next level): hse.
Notation "'seval_hsmem' ge sp hsm" := (seval_smem ge sp (hsmem_proj hsm))
(only parsing, at level 0, ge at next level, sp at next level, hsm at next level): hse.
+Notation "'seval_hscond' ge sp hsc" := (seval_condition ge sp hsc.(cond) (hsval_list_proj hsc.(lhsv)) Sinit)
+ (only parsing, at level 0, ge at next level, sp at next level, hsc at next level): hse.
Notation "'sval_refines' ge sp rs0 m0 hv sv" := (seval_hsval ge sp hv rs0 m0 = seval_sval ge sp sv rs0 m0)
(only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, hv at next level, sv at next level): hse.
@@ -139,6 +145,7 @@ Record hsistate_local :=
(** [hsi_smem] represents the current smem symbolic evaluations.
(we also recover the history of smem in hsi_smem) *)
hsi_smem:> hsmem;
+ hsi_ok_lscond: list hscond;
(** For the values in registers:
1) we store a list of sval evaluations
2) we encode the symbolic regset by a PTree *)
@@ -156,6 +163,7 @@ Definition hsi_sreg_eval ge sp hst r := seval_sval ge sp (hsi_sreg_proj hst r).
Definition hsok_local ge sp rs0 m0 (hst: hsistate_local) : Prop :=
(forall hsv, List.In hsv (hsi_ok_lsval hst) -> seval_hsval ge sp hsv rs0 m0 <> None)
+ /\ (forall hsc, List.In hsc (hsi_ok_lscond hst) -> seval_hscond ge sp hsc rs0 m0 <> None)
/\ (seval_hsmem ge sp (hst.(hsi_smem)) rs0 m0 <> None).
(* refinement link between a (st: sistate_local) and (hst: hsistate_local) *)
@@ -380,6 +388,7 @@ Definition hsstate_refines (hst: hsstate) (st:sstate): Prop :=
*)
Definition hsilocal_simu_spec (alive: Regset.t) (hst1 hst2: hsistate_local) :=
List.incl (hsi_ok_lsval hst2) (hsi_ok_lsval hst1)
+ /\ List.incl (hsi_ok_lscond hst2) (hsi_ok_lscond hst1)
/\ (forall r, Regset.In r alive -> PTree.get r hst2 = PTree.get r hst1)
/\ hsi_smem hst1 = hsi_smem hst2.
@@ -423,8 +432,9 @@ Lemma hsilocal_simu_spec_nofail ge1 ge2 of sp rs0 m0 hst1 hst2:
hsok_local ge1 sp rs0 m0 hst1 ->
hsok_local ge2 sp rs0 m0 hst2.
Proof.
- intros (RSOK & _ & MEMOK) GFS (OKV & OKM). constructor.
+ intros (RSOK & CONDOK & _ & MEMOK) GFS (OKV & OKC & OKM). repeat constructor.
- intros sv INS. apply RSOK in INS. apply OKV in INS. erewrite seval_preserved; eauto.
+ - intros sc INS. apply CONDOK in INS. apply OKC in INS. erewrite seval_condition_preserved; eauto.
- erewrite MEMOK in OKM. erewrite smem_eval_preserved; eauto.
Qed.
@@ -446,7 +456,7 @@ Proof.
assert (SIPRE: si_pre st2 ge2 sp rs0 m0). { destruct HREF2 as (OKEQ & _ & _). rewrite <- OKEQ in HOK2. apply HOK2. }
assert (SMEMEVAL: seval_smem ge2 sp (si_smem st2) rs0 m0 = Some m). {
destruct HREF2 as (_ & MEMEQ2 & _). destruct HREF1 as (_ & MEMEQ1 & _).
- destruct CORE as (_ & _ & MEMEQ3).
+ destruct CORE as (_ & _ & _ & MEMEQ3).
rewrite <- MEMEQ2; auto. rewrite <- MEMEQ3.
erewrite smem_eval_preserved; [| eapply GFS].
rewrite MEMEQ1; auto. }
@@ -464,7 +474,7 @@ Proof.
assert (seval_sval ge2 sp (hsval_proj h) rs0 m0 <> None) by congruence.
destruct (seval_sval ge2 sp _ rs0 m0); [reflexivity | contradiction].
+ intros r ALIVE. destruct HREF2 as (_ & _ & A & _). destruct HREF1 as (_ & _ & B & _).
- destruct CORE as (_ & C & _). rewrite seval_partial_regset_get.
+ destruct CORE as (_ & _ & C & _). rewrite seval_partial_regset_get.
assert (OPT: forall (x y: val), Some x = Some y -> x = y) by congruence.
destruct (hst2 ! r) eqn:HST2; apply OPT; clear OPT.
++ unfold seval_sval_partial.
@@ -521,7 +531,7 @@ Proof.
generalize (genv_match ctx).
intro GFS; exploit hsiexit_simu_spec_nofail; eauto.
destruct HDYN2 as (_ & SCOND2). intro OK2. rewrite <- SCOND2 by assumption. clear OK1 OK2 SCOND2.
- destruct SIMUC as ((path & _ & LSIMU) & _ & CONDEQ & ARGSEQ). destruct LSIMU as (_ & _ & MEMEQ).
+ destruct SIMUC as ((path & _ & LSIMU) & _ & CONDEQ & ARGSEQ). destruct LSIMU as (_ & _ & _ & MEMEQ).
rewrite CONDEQ. rewrite ARGSEQ. rewrite MEMEQ. erewrite <- hseval_condition_preserved; eauto.
}
constructor; [assumption|]. intros is1 ICONT SSEME.
@@ -810,7 +820,7 @@ Proof.
erewrite seval_builtin_sval_preserved by eauto.
erewrite IHlsv by eauto.
reflexivity.
-Qed.
+Qed.
Lemma barg_proj_refines_eq ge ge' sp rs0 m0:
(forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v
index 2a791feb..74f90b0e 100644
--- a/scheduling/RTLpathSE_theory.v
+++ b/scheduling/RTLpathSE_theory.v
@@ -126,6 +126,11 @@ Definition ssem_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset)
/\ seval_smem ge sp st.(si_smem) rs0 m0 = Some m
/\ forall (r:reg), seval_sval ge sp (st.(si_sreg) r) rs0 m0 = Some (rs#r).
+Definition seval_condition ge sp (cond: condition) (lsv: list_sval) (sm: smem) rs0 m0 : option bool :=
+ SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+ SOME m <- seval_smem ge sp sm rs0 m0 IN
+ eval_condition cond args m.
+
Definition sabort_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset) (m0: mem): Prop :=
~(st.(si_pre) ge sp rs0 m0)
\/ seval_smem ge sp st.(si_smem) rs0 m0 = None
@@ -135,10 +140,7 @@ Definition sabort_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset
Record sistate_exit := mk_sistate_exit
{ si_cond: condition; si_scondargs: list_sval; si_elocal: sistate_local; si_ifso: node }.
-Definition seval_condition ge sp (cond: condition) (lsv: list_sval) (sm: smem) rs0 m0 : option bool :=
- SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
- SOME m <- seval_smem ge sp sm rs0 m0 IN
- eval_condition cond args m.
+
Definition all_fallthrough ge sp (lx: list sistate_exit) rs0 m0: Prop :=
forall ext, List.In ext lx ->
@@ -457,6 +459,11 @@ Definition slocal_store st chunk addr args src : sistate_local :=
let sm := Sstore (si_smem st) chunk addr args src
in slocal_set_smem st sm.
+Definition slocal_eval_cond (st:sistate_local) (cond : condition) (args : list_sval) :=
+ {| si_pre:=(fun ge sp rs m => seval_condition ge sp cond args st.(si_smem) rs m <> None /\ (st.(si_pre) ge sp rs m));
+ si_sreg:=st.(si_sreg);
+ si_smem:= st.(si_smem) |}.
+
Definition siexec_inst (i: instruction) (st: sistate): option sistate :=
match i with
| Inop pc' =>
@@ -474,11 +481,15 @@ Definition siexec_inst (i: instruction) (st: sistate): option sistate :=
| Istore chunk addr args src pc' =>
let next := slocal_store st.(si_local) chunk addr args src in
Some (sist_set_local st pc' next)
- | Icond cond args ifso ifnot _ =>
+ | Icond cond args ifso ifnot _ =>
let prev := st.(si_local) in
let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
- let ex := {| si_cond:=cond; si_scondargs:=vargs; si_elocal := prev; si_ifso := ifso |} in
- Some {| si_pc := ifnot; si_exits := ex::st.(si_exits); si_local := prev |}
+ let next := slocal_eval_cond prev cond vargs in
+ if Pos.eq_dec ifso ifnot then
+ Some (sist_set_local st ifnot next)
+ else
+ let ex := {| si_cond:=cond; si_scondargs:=vargs; si_elocal := next; si_ifso := ifso |} in
+ Some {| si_pc := ifnot; si_exits := ex::st.(si_exits); si_local := next |}
| _ => None
end.
@@ -510,6 +521,13 @@ Proof.
unfold sabort_local. simpl; intuition.
Qed.
+Lemma slocal_eval_cond_preserves_sabort_local ge sp st rs0 m0 cond args:
+ sabort_local ge sp st rs0 m0 ->
+ sabort_local ge sp (slocal_eval_cond st cond args) rs0 m0.
+Proof.
+ unfold sabort_local. simpl; intuition.
+Qed.
+
Lemma all_fallthrough_upto_exit_cons ge sp ext lx ext' exits rs m:
all_fallthrough_upto_exit ge sp ext lx exits rs m ->
all_fallthrough_upto_exit ge sp ext lx (ext'::exits) rs m.
@@ -549,35 +567,46 @@ Proof.
- left. constructor; eauto. eapply slocal_set_smem_preserves_sabort_local; eauto.
- right. exists ext0, lx0. constructor; eauto.
(* COND *)
- * remember ({| si_cond := _; si_scondargs := _; si_elocal := _; si_ifso := _ |}) as ext.
- destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
- - destruct (seval_condition ge sp (si_cond ext) (si_scondargs ext)
+ * destruct Pos.eq_dec; simpl in *.
+ { (* "Useless if" (two identical successors) *)
+ destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+ - left; constructor; try_simplify_someHyps;
+ apply slocal_eval_cond_preserves_sabort_local; auto.
+ - right; exists ext0, lx0; try_simplify_someHyps. }
+ { (* Normal Icond *)
+ remember ({| si_cond := _; si_scondargs := _; si_elocal := _; si_ifso := _ |}) as ext.
+ destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+ { destruct (seval_condition ge sp (si_cond ext) (si_scondargs ext)
(si_smem (si_elocal ext)) rs m) eqn:SEVAL; [destruct b|].
- (* case true *)
- + right. exists ext, (si_exits st).
- constructor.
- ++ constructor. econstructor; eauto. eauto.
- ++ unfold sabort_exit. right. constructor; eauto.
- subst. simpl. eauto.
- (* case false *)
- + left. constructor; eauto. eapply all_fallthrough_cons; eauto.
- (* case None *)
- + right. exists ext, (si_exits st). constructor.
- ++ constructor. econstructor; eauto. eauto.
- ++ unfold sabort_exit. left. eauto.
- - right. exists ext0, lx0. constructor; eauto. eapply all_fallthrough_upto_exit_cons; eauto.
+ (* case true *)
+ - right. exists ext, (si_exits st).
+ split.
+ + constructor; auto; inversion H0; constructor.
+ + unfold sabort_exit; right; constructor; auto.
+ subst; simpl in *; auto.
+ apply slocal_eval_cond_preserves_sabort_local; assumption.
+ (* case false *)
+ - left. constructor; eauto; inversion H0; simpl.
+ + apply all_fallthrough_cons; auto.
+ + apply slocal_eval_cond_preserves_sabort_local; auto.
+ (* case None *)
+ - right. exists ext, (si_exits st). constructor.
+ + constructor; auto; inversion H0; constructor.
+ + unfold sabort_exit; intuition. }
+ { right. exists ext0, lx0. constructor; eauto.
+ inversion H0; simpl. eapply all_fallthrough_upto_exit_cons; eauto. } }
Qed.
Lemma siexec_inst_WF i st:
siexec_inst i st = None -> default_succ i = None.
Proof.
- destruct i; simpl; unfold sist_set_local; simpl; congruence.
+ destruct i; simpl; try destruct Pos.eq_dec; unfold sist_set_local; simpl; congruence.
Qed.
Lemma siexec_inst_default_succ i st st':
siexec_inst i st = Some st' -> default_succ i = Some (st'.(si_pc)).
Proof.
- destruct i; simpl; unfold sist_set_local; simpl; try congruence;
+ destruct i; simpl; try destruct Pos.eq_dec; unfold sist_set_local; simpl; try congruence;
intro H; inversion_clear H; simpl; auto.
Qed.
@@ -594,7 +623,7 @@ Proof.
assert (forall r : reg, In r l -> seval_sval ge sp (si_sreg st r) rs0 m0 = None -> False).
{ intros r INR. eapply ALLR. right. assumption. }
intro SVALLIST. intro. eapply IHl; eauto.
- + intros. exploit (ALLR a); simpl; eauto.
+ + intros. exploit (ALLR a); simpl; eauto.
Qed.
Lemma siexec_inst_correct ge sp i st rs0 m0 rs m:
@@ -603,7 +632,7 @@ Lemma siexec_inst_correct ge sp i st rs0 m0 rs m:
ssem_internal_opt2 ge sp (siexec_inst i st) rs0 m0 (istep ge i sp rs m).
Proof.
intros (PRE & MEM & REG) NYE.
- destruct i; simpl; auto.
+ destruct i eqn:INST; simpl; auto.
+ (* Nop *)
constructor; [|constructor]; simpl; auto.
constructor; auto.
@@ -685,25 +714,55 @@ Proof.
+ (* COND *)
Local Hint Resolve is_tail_refl: core.
Local Hint Unfold ssem_local: core.
- inversion_SOME b; intros COND.
- { destruct b; simpl; unfold ssem_internal, ssem_local; simpl.
- - remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
- constructor; constructor; subst; simpl; auto.
- unfold seval_condition. subst; simpl.
+ destruct Pos.eq_dec.
+ { (* Useless if *)
+ subst; destruct eval_condition eqn:EV_COND.
+ - constructor; simpl; intuition. 2: { destruct b; auto. } constructor; intuition.
+ unfold slocal_eval_cond; try_simplify_someHyps.
+ unfold seval_condition; erewrite seval_list_sval_inj; simpl; auto.
+ inversion_SOME m'; intros; assert (m' = m) by congruence; subst.
+ rewrite EV_COND; intuition; discriminate.
+ - simpl; unfold sabort; simpl; left; intuition.
+ unfold sabort_local; simpl; unfold seval_condition.
erewrite seval_list_sval_inj; simpl; auto.
- try_simplify_someHyps.
- - intuition. unfold all_fallthrough in * |- *. simpl.
- intuition. subst. simpl.
+ try_simplify_someHyps; intuition. }
+ { (* Normal Icond *)
+ inversion_SOME b; intros COND.
+ { (* eval_condition did not fail *)
+ destruct b; simpl; unfold ssem_internal, ssem_local; simpl.
+ - (* Case true *)
+ remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+ split. 2: { constructor; auto. }
+ constructor.
+ 2: { constructor; subst; simpl; auto.
+ constructor; auto. unfold slocal_eval_cond; simpl.
+ constructor; auto.
+ unfold seval_condition.
+ erewrite seval_list_sval_inj; auto.
+ rewrite MEM; congruence. }
+ unfold seval_condition.
+ subst; simpl.
+ erewrite seval_list_sval_inj; simpl; auto.
+ rewrite MEM. assumption.
+ - (* Case false *)
+ intuition.
+ + unfold all_fallthrough in * |- *.
+ try_simplify_someHyps.
+ unfold seval_condition; erewrite seval_list_sval_inj; simpl; auto.
+ try_simplify_someHyps; discriminate.
+ + unfold all_fallthrough in * |- *. simpl.
+ intuition. subst. simpl.
+ unfold seval_condition.
+ erewrite seval_list_sval_inj; simpl; auto.
+ try_simplify_someHyps. }
+ { (* Case None (eval_condition failed) *)
+ unfold sabort. simpl. right.
+ remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+ constructor; [constructor; subst; simpl; auto|].
+ left. subst; simpl; auto.
unfold seval_condition.
erewrite seval_list_sval_inj; simpl; auto.
- try_simplify_someHyps. }
- { unfold sabort. simpl. right.
- remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
- constructor; [constructor; subst; simpl; auto|].
- left. subst; simpl; auto.
- unfold seval_condition.
- erewrite seval_list_sval_inj; simpl; auto.
- try_simplify_someHyps. }
+ try_simplify_someHyps. } }
Qed.
@@ -713,7 +772,8 @@ Lemma siexec_inst_correct_None ge sp i st rs0 m0 rs m:
istep ge i sp rs m = None.
Proof.
intros (PRE & MEM & REG).
- destruct i; simpl; unfold sist_set_local, ssem_internal, ssem_local; simpl; try_simplify_someHyps.
+ destruct i; simpl; unfold sist_set_local, ssem_internal, ssem_local; simpl;
+ try destruct Pos.eq_dec; try_simplify_someHyps.
Qed.
(** * Symbolic execution of the internal steps of a path *)
@@ -730,7 +790,8 @@ Lemma siexec_inst_add_exits i st st':
siexec_inst i st = Some st' ->
( si_exits st' = si_exits st \/ exists ext, si_exits st' = ext :: si_exits st ).
Proof.
- destruct i; simpl; intro SISTEP; inversion_clear SISTEP; unfold siexec_inst; simpl; (discriminate || eauto).
+ destruct i; simpl; intro SISTEP; inversion SISTEP; unfold siexec_inst; simpl;
+ try destruct Pos.eq_dec; try_simplify_someHyps.
Qed.
Lemma siexec_inst_preserves_allfu ge sp ext lx rs0 m0 st st' i:
@@ -740,7 +801,8 @@ Lemma siexec_inst_preserves_allfu ge sp ext lx rs0 m0 st st' i:
Proof.
intros ALLFU SISTEP. destruct ALLFU as (ISTAIL & ALLF).
constructor; eauto.
- destruct i; simpl in SISTEP; inversion_clear SISTEP; simpl; (discriminate || eauto).
+ destruct i; simpl in SISTEP; inversion SISTEP; simpl;
+ try destruct Pos.eq_dec; try_simplify_someHyps.
Qed.
Lemma siexec_path_correct_false ge sp f rs0 m0 st' is:
@@ -819,6 +881,9 @@ Proof.
(* icontinue is0 = true *)
intros; eapply IHpath; eauto.
destruct i; simpl in * |- *; unfold sist_set_local in * |- *; try_simplify_someHyps.
+ { destruct Pos.eq_dec; simpl.
+ { intros. inversion Hst1. simpl; auto. }
+ { intros. inversion Hst1. simpl; auto. } }
(* icontinue is0 = false -> EARLY EXIT *)
destruct (siexec_path path f st1) as [st2|] eqn: Hst2; simpl; eauto.
destruct WF. erewrite siexec_inst_default_succ; eauto.
@@ -1079,7 +1144,7 @@ Lemma sexec_final_correct pge ge sp i (f:function) pc st stack rs0 m0 t rs m s:
ssem_final pge ge sp pc stack f rs0 m0 (sexec_final i (si_local st)) rs m t s.
Proof.
intros PC1 PC2 (PRE&MEM&REG) LAST. destruct LAST; subst; try_simplify_someHyps; simpl.
- + (* Snone *) intro Hi; destruct i; simpl in Hi |- *; unfold sist_set_local in Hi; try congruence.
+ + (* Snone *) intro Hi; destruct i; simpl in Hi |- *; try destruct Pos.eq_dec in *; unfold sist_set_local in Hi; try congruence.
+ (* Icall *) intros; eapply exec_Scall; auto.
- destruct ros; simpl in * |- *; auto.
rewrite REG; auto.
@@ -1126,11 +1191,13 @@ Proof.
+ (* Ibuiltin *) intros HPC SMEM.
eapply exec_Ibuiltin; eauto.
eapply seval_builtin_args_complete; eauto.
+ + (* Icond *) intros HPC SMEM.
+ destruct Pos.eq_dec in HPC; try discriminate.
+ (* Ijumptable *) intros HPC SMEM.
- eapply exec_Ijumptable; eauto.
- congruence.
+ eapply exec_Ijumptable; eauto; congruence.
+ (* Ireturn *)
- intros; subst. enough (v=regmap_optget or Vundef rs) as ->.
+ intros; subst.
+ enough (v=regmap_optget or Vundef rs) as ->.
* eapply exec_Ireturn; eauto.
* intros; destruct or; simpl; congruence.
Qed.
@@ -1464,7 +1531,6 @@ Proof.
destruct (icontinue is) eqn:ICONT.
{ destruct SEM as (SEML & SIPC & ALLF).
exploit siexec_inst_early_exit_absurd; eauto. contradiction. }
-
eexists. econstructor 1.
*** eapply exec_early_exit; eauto.
*** destruct ISTEPS as (ext & lx & SSEME & ALLFU). destruct SEM as (ext' & lx' & SSEME' & ALLFU').
diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v
index 31680256..7804e454 100644
--- a/scheduling/RTLpathScheduler.v
+++ b/scheduling/RTLpathScheduler.v
@@ -156,7 +156,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P
let (tctetpm, dm) := untrusted_scheduler f in
let (tcte, tpm) := tctetpm in
let (tc, te) := tcte in
- let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+ let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te (mkuntrustedanalysis None None) in
do tf <- proj1_sig (function_builder tfr tpm);
do tt <- function_equiv_checker dm f tf;
OK (tf, dm).
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index f3f09954..9986f2e7 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -237,7 +237,7 @@ let get_live_regs_entry (sb : superblock) code =
(* TODO David *)
let schedule_superblock sb code =
if not !Clflags.option_fprepass
- then sb.instructions
+ then None
else
(* let old_flag = !debug_flag in
debug_flag := true;
@@ -268,8 +268,8 @@ let schedule_superblock sb code =
live_regs_entry
sb.typing
(reference_counting seqa sb.s_output_regs sb.typing) with
- | None -> sb.instructions
- | Some order ->
+ | None -> None
+ | Some (order, final_time) ->
let ins' =
Array.append
(Array.map (fun i -> sb.instructions.(i)) order)
@@ -281,7 +281,7 @@ let schedule_superblock sb code =
flush stdout; *)
assert ((Array.length sb.instructions) = (Array.length ins'));
(*sb.instructions; *)
- ins';;
+ Some (ins', final_time);;
(* stub2: reverse function *)
(*
@@ -430,7 +430,12 @@ let apply_schedule code sb new_order =
let pset = hashedset_map fmap @@ get_some @@ PTree.get n cbs_above in
let pset' = get_some @@ PTree.get n' cbs_above' in
if HashedSet.PSet.is_subset pset pset' then tc := PTree.set n' (Iload (AST.TRAP,a,b,c,d,s)) !tc
- else assert !config.has_non_trapping_loads
+ else
+ (* TODO jf: ignore identical loads
+ * Right now the downward scheduling might cause the duplication of loads
+ * which are then moved by apply_schedule "across Iconds" due to my abuse of
+ * apply_schedule when inserting code into a superblock. *)
+ (* assert !config.has_non_trapping_loads *) ()
| _ -> ()
) old_order;
!tc
@@ -461,7 +466,11 @@ let rec do_schedule code pm = function
* This is because the scheduler (rightfully) refuses to schedule ahead of a branch
* operations that might trap *)
let code' = turn_all_loads_nontrap sb code_exp in
- let schedule = schedule_superblock sb code' in
+ let schedule =
+ match schedule_superblock sb code' with
+ | None -> sb.instructions
+ | Some (schedule, _final_time) -> schedule
+ in
let new_code = apply_schedule code' sb schedule in
begin
(*debug_flag := true;*)
@@ -493,5 +502,12 @@ let scheduler f =
(*print_superblocks lsb code; debug "\n";*)
find_last_node_reg (PTree.elements code);
let (tc, pm) = do_schedule code pm lsb in
+ let debug_flag_old = !debug_flag in
+ debug_flag := false;
+ debug "Before scheduling, entrypoint: %d:\n" (Camlcoq.P.to_int entry); flush_all ();
+ print_code code; flush_all ();
+ debug "After scheduling:\n"; flush_all ();
+ print_code tc; flush_all ();
+ debug_flag := debug_flag_old;
(((tc, entry), pm), id_ptree)
end
diff --git a/scheduling/RTLpathSchedulerproof.v b/scheduling/RTLpathSchedulerproof.v
index a9c2fa76..03554f9c 100644
--- a/scheduling/RTLpathSchedulerproof.v
+++ b/scheduling/RTLpathSchedulerproof.v
@@ -281,6 +281,7 @@ Proof.
unfold inst_checker. destruct i; simpl in *;
unfold exit_checker; try discriminate.
all:
+ try destruct Pos.eq_dec; simpl;
try destruct (list_mem _ _); simpl;
try (destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence; fail).
4,5:
@@ -302,17 +303,27 @@ Proof.
inversion_SOME p.
2: { destruct (Regset.subset _ _) eqn:?; try congruence. }
destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
- 2: { destruct (Regset.subset (pre_output_regs path0) alive) eqn:?; try congruence. }
- simpl.
+ 2: {
+ destruct (Regset.subset (pre_output_regs path0) alive) eqn:?; try congruence;
+ destruct ((fn_path f) ! n); intros; try discriminate.
+ all: inversion SIEXEC; simpl; auto.
+ all: destruct Regset.subset in ICHK0; try discriminate;
+ destruct Regset.subset in ICHK0; try discriminate;
+ simpl in ICHK0.
+ all: destruct PTree.get eqn:GET in ICHK0; try discriminate.
+ all: destruct Regset.subset eqn:SUBS_PATH0 in ICHK0; try discriminate.
+ all: econstructor; eauto.
+ all: eapply eqlive_reg_monotonic; eauto.
+ all: destruct (Regset.subset (input_regs p0) (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
+ all: intros; apply Regset.subset_2 in SUB_PATH.
+ all: unfold Regset.Subset in SUB_PATH; apply SUB_PATH in H; auto. }
+ intros; simpl.
destruct (Regset.subset (pre_output_regs path0) alive) eqn:SUB_ALIVE'; try congruence.
- inversion_SOME p'.
- destruct (Regset.subset (input_regs p') (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
- intros NPATH NPATH' _. econstructor; eauto.
- instantiate (1:=p'). rewrite <- NPC; auto.
- eapply eqlive_reg_monotonic; eauto; simpl.
- intros. apply Regset.subset_2 in SUB_PATH.
+ destruct (Regset.subset (input_regs p) (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
+ apply Regset.subset_2 in SUB_PATH.
unfold Regset.Subset in SUB_PATH.
- apply SUB_PATH in H; auto.
+ econstructor; eauto. { rewrite <- NPC; eauto. }
+ eapply eqlive_reg_monotonic; eauto.
Qed.
Lemma pre_output_regs_correct f pc0 path0 stk stk' sp (st:sstate) rs0 m0 t s is rs':
@@ -376,6 +387,12 @@ Proof.
exploit exit_checker_eqlive_builtin_res; eauto.
intros. destruct H as [p [PATH EQLIVE']].
econstructor; eauto.
+ + (* Icond *)
+ eexists; split.
+ * econstructor.
+ * generalize ICHK.
+ unfold inst_checker; simpl in *.
+ destruct Pos.eq_dec; destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+ (* Sjumptable *)
eexists; split.
* econstructor; eauto.
diff --git a/scheduling/RTLpathWFcheck.v b/scheduling/RTLpathWFcheck.v
index 63b914ec..eb7bdac7 100644
--- a/scheduling/RTLpathWFcheck.v
+++ b/scheduling/RTLpathWFcheck.v
@@ -36,7 +36,8 @@ Definition iinst_checker (pm: path_map) (i: instruction): option (node) :=
| Inop pc' | Iop _ _ _ pc' | Iload _ _ _ _ _ pc'
| Istore _ _ _ _ pc' => Some (pc')
| Icond cond args ifso ifnot _ =>
- exit_checker pm ifso ifnot
+ if Pos.eq_dec ifso ifnot then Some (ifnot)
+ else exit_checker pm ifso ifnot
| _ => None
end.
@@ -46,7 +47,7 @@ Lemma iinst_checker_path_entry (pm: path_map) (i: instruction) res pc:
iinst_checker pm i = Some res ->
early_exit i = Some pc -> path_entry pm pc.
Proof.
- destruct i; simpl; try_simplify_someHyps; subst.
+ destruct i; simpl; try destruct Pos.eq_dec; try_simplify_someHyps; subst.
Qed.
Lemma iinst_checker_default_succ (pm: path_map) (i: instruction) res pc:
@@ -54,7 +55,7 @@ Lemma iinst_checker_default_succ (pm: path_map) (i: instruction) res pc:
pc = res ->
default_succ i = Some pc.
Proof.
- destruct i; simpl; try_simplify_someHyps; subst;
+ destruct i; simpl; try destruct Pos.eq_dec; try_simplify_someHyps; subst;
repeat (inversion_ASSERT); try_simplify_someHyps.
intros; exploit exit_checker_res; eauto.
intros; subst. simpl; auto.
@@ -122,7 +123,8 @@ Proof.
intros CHECK PC. eapply wf_last_node; eauto.
clear c pc PC. intros pc PC.
destruct i; simpl in * |- *; intuition (subst; eauto);
- try (generalize CHECK; clear CHECK; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+ try (generalize CHECK; clear CHECK; try (inversion_SOME path);
+ repeat inversion_ASSERT; try destruct Pos.eq_dec; try_simplify_someHyps).
intros X; exploit exit_checker_res; eauto.
clear X. intros; subst; eauto.
Qed.
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index e45f0617..dfadca3b 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -55,8 +55,29 @@ PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob";
let post_rtl_passes =
[|
PARTIAL, Always, Require, (Some "RTLpath generation"), "RTLpathLivegen", Noprint;
+ PARTIAL, Always, Require, (Some "Limited register renaming and/or preparing code motion past side exits."), "MyRTLpathScheduler", Noprint;
+ TOTAL, Always, Require, (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
+ (* Is renumbering necessary and/or helpful here? *)
+ (* TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber"; *)
+ PARTIAL, Always, NoRequire, (Some "TODO lift if"), "Liftif", Noprint;
+
+ TOTAL, Always, NoRequire, (Some "Renumbering pre CSE"), "Renumber", Noprint;
+ (* Due to hackery this pass of CSE3 only removes redundant conditions *)
+ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3", Noprint;
+ (* This is meant to clean up dead code afer "if-lifting" + removing of redundant condition *)
+ PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode", Noprint;
+
+ PARTIAL, Always, Require, (Some "RTLpath generation"), "RTLpathLivegen", Noprint;
PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint;
TOTAL, Always, Require, (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
+
+ (* Run CSE3 again, in case code is moved up again *)
+ TOTAL, Always, NoRequire, (Some "Renumbering pre CSE"), "Renumber", Noprint;
+ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3", Noprint;
+ TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves", Noprint;
+ TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves", Noprint;
+ PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode", Noprint;
+
PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1");
PARTIAL, Always, Require, (Some "LTL Branch tunneling"), "LTLTunneling", (Print "LTL 2");
PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;