aboutsummaryrefslogtreecommitdiffstats
path: root/scheduling
diff options
context:
space:
mode:
Diffstat (limited to 'scheduling')
-rw-r--r--scheduling/BTL.v8
-rw-r--r--scheduling/BTLRenumber.ml2
-rw-r--r--scheduling/BTLScheduleraux.ml217
-rw-r--r--scheduling/BTL_SEtheory.v4
-rw-r--r--scheduling/BTLcommonaux.ml23
-rw-r--r--scheduling/BTLtypes.ml10
-rw-r--r--scheduling/InstructionScheduler.ml488
-rw-r--r--scheduling/InstructionScheduler.mli16
-rw-r--r--scheduling/RTLpath.v2
-rw-r--r--scheduling/RTLpathSE_theory.v4
-rw-r--r--scheduling/RTLpathScheduleraux.ml187
-rw-r--r--scheduling/RTLtoBTLaux.ml11
-rw-r--r--scheduling/postpass_lib/Machblock.v4
13 files changed, 933 insertions, 43 deletions
diff --git a/scheduling/BTL.v b/scheduling/BTL.v
index 96377943..f832085c 100644
--- a/scheduling/BTL.v
+++ b/scheduling/BTL.v
@@ -214,7 +214,7 @@ Inductive has_loaded sp rs m chunk addr args v: trapping_mode -> Prop :=
: has_loaded sp rs m chunk addr args v trap
| has_loaded_default
(LOAD: forall a, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None)
- (DEFAULT: v = default_notrap_load_value chunk)
+ (DEFAULT: v = Vundef)
: has_loaded sp rs m chunk addr args v NOTRAP
.
Local Hint Constructors has_loaded: core.
@@ -272,12 +272,10 @@ Fixpoint iblock_istep_run sp ib rs m: option outcome :=
match Mem.loadv chunk m a with
| Some v => Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
| None =>
- let v := default_notrap_load_value chunk in
- Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
+ Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |}
end
| None =>
- let v := default_notrap_load_value chunk in
- Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
+ Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |}
end
| Bstore chunk addr args src _ =>
SOME a <- eval_addressing ge sp addr rs##args IN
diff --git a/scheduling/BTLRenumber.ml b/scheduling/BTLRenumber.ml
index 6ff42a27..2f4f9203 100644
--- a/scheduling/BTLRenumber.ml
+++ b/scheduling/BTLRenumber.ml
@@ -94,7 +94,7 @@ let regenerate_btl_tree btl entry =
let ib = renumber_iblock old_ibf.entry in
let n = get_inumb_or_next ib in
let n_pos = i2p n in
- let bi = mk_binfo n in
+ let bi = mk_binfo n old_ibf.binfo.s_output_regs old_ibf.binfo.typing in
let ibf = { entry = ib; input_regs = old_ibf.input_regs; binfo = bi } in
if old_n = entry then new_entry := n_pos;
dm := PTree.set old_n n_pos !dm;
diff --git a/scheduling/BTLScheduleraux.ml b/scheduling/BTLScheduleraux.ml
index 00be2aa7..75672243 100644
--- a/scheduling/BTLScheduleraux.ml
+++ b/scheduling/BTLScheduleraux.ml
@@ -4,7 +4,9 @@ open BTLtypes
open Machine
open DebugPrint
open PrintBTL
+open Registers
open RTLcommonaux
+open BTLcommonaux
open ExpansionOracle
open PrepassSchedulingOracle
@@ -88,11 +90,179 @@ let apply_schedule bseq olast positions =
in
build_iblock ibl
+(** the useful one. Returns a hashtable with bindings of shape
+ ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
+ ** [t] is its class (according to [typing]), and [n] the number of
+ ** times it's referenced as an argument in instructions of [seqa] ;
+ ** and an arrray containg the list of regs referenced by each
+ ** instruction, with a boolean to know whether it's as arg or dest *)
+let reference_counting (seqa : (iblock * Regset.t) array) (out_regs : Regset.t)
+ (typing : RTLtyping.regenv) :
+ (reg, int * int) Hashtbl.t * (reg * bool) list array =
+ let retl = Hashtbl.create 42 in
+ let retr = Array.make (Array.length seqa) [] in
+ (* retr.(i) : (r, b) -> (r', b') -> ...
+ * where b = true if seen as arg, false if seen as dest
+ *)
+ List.iter
+ (fun reg ->
+ Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1))
+ (Regset.elements out_regs);
+ let add_reg reg =
+ match Hashtbl.find_opt retl reg with
+ | Some (t, n) -> Hashtbl.add retl reg (t, n + 1)
+ | None -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1)
+ in
+ let map_true = List.map (fun r -> (r, true)) in
+ Array.iteri
+ (fun i (ins, _) ->
+ match ins with
+ | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) ->
+ List.iter add_reg args;
+ retr.(i) <- (dest, false) :: map_true args
+ | Bcond (_, args, _, _, _) ->
+ List.iter add_reg args;
+ retr.(i) <- map_true args
+ | Bstore (_, _, args, src, _) ->
+ List.iter add_reg args;
+ add_reg src;
+ retr.(i) <- (src, true) :: map_true args
+ | BF (Bcall (_, fn, args, dest, _), _) ->
+ List.iter add_reg args;
+ retr.(i) <-
+ (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (dest, false) :: (reg, true) :: map_true args
+ | _ -> (dest, false) :: map_true args)
+ | BF (Btailcall (_, fn, args), _) ->
+ List.iter add_reg args;
+ retr.(i) <-
+ (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (reg, true) :: map_true args
+ | _ -> map_true args)
+ | BF (Bbuiltin (_, args, dest, _), _) ->
+ let rec bar = function
+ | AST.BA r ->
+ add_reg r;
+ retr.(i) <- (r, true) :: retr.(i)
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ bar hi;
+ bar lo
+ | _ -> ()
+ in
+ List.iter bar args;
+ let rec bad = function
+ | AST.BR r -> retr.(i) <- (r, false) :: retr.(i)
+ | AST.BR_splitlong (hi, lo) ->
+ bad hi;
+ bad lo
+ | _ -> ()
+ in
+ bad dest
+ | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) ->
+ add_reg reg;
+ retr.(i) <- [ (reg, true) ]
+ | _ -> ())
+ seqa;
+ (* print_string "mentions\n";
+ * Array.iteri (fun i l ->
+ * print_int i;
+ * print_string ": [";
+ * List.iter (fun (r, b) ->
+ * print_int (Camlcoq.P.to_int r);
+ * print_string ":";
+ * print_string (if b then "a:" else "d");
+ * if b then print_int (snd (Hashtbl.find retl r));
+ * print_string ", "
+ * ) l;
+ * print_string "]\n";
+ * flush stdout;
+ * ) retr; *)
+ (retl, retr)
+
+let get_live_regs_entry seqa ibf btl =
+ if !Clflags.option_debug_compcert > 6 then debug_flag := true;
+ let ret =
+ Array.fold_right
+ (fun (ins, liveins) regset_i ->
+ let regset = Regset.union liveins regset_i in
+ match ins with
+ | Bnop _ -> regset
+ | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) ->
+ List.fold_left
+ (fun set reg -> Regset.add reg set)
+ (Regset.remove dest regset)
+ args
+ | Bstore (_, _, args, src, _) ->
+ List.fold_left
+ (fun set reg -> Regset.add reg set)
+ (Regset.add src regset) args
+ | BF (Bcall (_, fn, args, dest, _), _) ->
+ List.fold_left
+ (fun set reg -> Regset.add reg set)
+ ((match fn with
+ | Datatypes.Coq_inl reg -> Regset.add reg
+ | Datatypes.Coq_inr _ -> fun x -> x)
+ (Regset.remove dest regset))
+ args
+ | BF (Btailcall (_, fn, args), _) ->
+ List.fold_left
+ (fun set reg -> Regset.add reg set)
+ (match fn with
+ | Datatypes.Coq_inl reg -> Regset.add reg regset
+ | Datatypes.Coq_inr _ -> regset)
+ args
+ | BF (Bbuiltin (_, args, dest, _), _) ->
+ List.fold_left
+ (fun set arg ->
+ let rec add reg set =
+ match reg with
+ | AST.BA r -> Regset.add r set
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ add hi (add lo set)
+ | _ -> set
+ in
+ add arg set)
+ (let rec rem dest set =
+ match dest with
+ | AST.BR r -> Regset.remove r set
+ | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set)
+ | _ -> set
+ in
+ rem dest regset)
+ args
+ | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) ->
+ Regset.add reg regset
+ | Bcond (_, args, _, _, _) ->
+ List.fold_left (fun set reg -> Regset.add reg set) regset args
+ | _ -> regset)
+ seqa ibf.binfo.s_output_regs
+ in
+ debug "live in regs: ";
+ print_regset ret;
+ debug "\n";
+ debug_flag := false;
+ ret
+
let schedule_blk n ibf btl =
if not !Clflags.option_fprepass then btl
else
let bseq, olast = flatten_blk_basics ibf in
- match schedule_sequence bseq btl with
+ let seqa =
+ Array.mapi
+ (fun i inst -> (inst, get_liveins inst))
+ bseq
+ in
+ let live_regs_entry = get_live_regs_entry seqa ibf btl in
+ let refcount =
+ reference_counting seqa ibf.binfo.s_output_regs ibf.binfo.typing
+ in
+ match
+ schedule_sequence seqa btl live_regs_entry ibf.binfo.typing refcount
+ with
| Some positions ->
let new_ib = apply_schedule bseq olast positions in
let new_ibf =
@@ -116,29 +286,52 @@ let turn_all_loads_nontrap n ibf btl =
in
PTree.set n ibf' btl
+let compute_liveins n ibf btl =
+ let rec traverse_rec ib =
+ match ib with
+ | Bseq (ib1, ib2) ->
+ traverse_rec ib1;
+ traverse_rec ib2
+ | BF (Bgoto succ, iinfo)
+ | BF (Bcall (_, _, _, _, succ), iinfo)
+ | BF (Bbuiltin (_, _, _, succ), iinfo) ->
+ let lives = (get_some @@ PTree.get succ btl).input_regs in
+ iinfo.liveins <- lives
+ | BF (Bjumptable (_, tbl), iinfo) ->
+ List.iter
+ (fun ex ->
+ let lives = (get_some @@ PTree.get ex btl).input_regs in
+ iinfo.liveins <- Regset.union iinfo.liveins lives)
+ tbl
+ | Bcond (_, _, BF (Bgoto succ, _), Bnop None, iinfo) -> (
+ match iinfo.pcond with
+ | Some predb ->
+ assert (predb = false);
+ let lives = (get_some @@ PTree.get succ btl).input_regs in
+ iinfo.liveins <- lives
+ | None -> ())
+ | _ -> ()
+ in
+ traverse_rec ibf.entry
+
let rec do_schedule btl = function
| [] -> btl
| (n, ibf) :: blks ->
+ compute_liveins n ibf btl;
let code_exp = expanse n ibf btl in
let ibf_exp = get_some @@ PTree.get n code_exp in
let code_nt = turn_all_loads_nontrap n ibf_exp code_exp in
let ibf_nt = get_some @@ PTree.get n code_nt in
let btl' = schedule_blk n ibf_nt code_nt in
- begin
- (*debug_flag := true;*)
- print_btl_code stderr code_nt;
- print_btl_code stderr btl';
- (*debug_flag := false;*)
- do_schedule btl' blks
- end
+ (*debug_flag := true;*)
+ print_btl_code stderr btl;
+ print_btl_code stderr btl';
+ (*debug_flag := false;*)
+ do_schedule btl' blks
let btl_scheduler f =
let btl = f.fn_code in
- (*debug_flag := true;*)
let elts = PTree.elements btl in
find_last_reg elts;
let btl' = do_schedule btl elts in
- debug "Scheduled BTL Code:\n";
- print_btl_code stderr btl';
- (*debug_flag := false;*)
btl'
diff --git a/scheduling/BTL_SEtheory.v b/scheduling/BTL_SEtheory.v
index e2e6be6b..033ed843 100644
--- a/scheduling/BTL_SEtheory.v
+++ b/scheduling/BTL_SEtheory.v
@@ -93,11 +93,11 @@ Fixpoint eval_sval ctx (sv: sval): option val :=
| NOTRAP =>
SOME args <- eval_list_sval ctx lsv IN
match (eval_addressing (cge ctx) (csp ctx) addr args) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some a =>
SOME m <- eval_smem ctx sm IN
match (Mem.loadv chunk m a) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some val => Some val
end
end
diff --git a/scheduling/BTLcommonaux.ml b/scheduling/BTLcommonaux.ml
index 4605d613..577e4828 100644
--- a/scheduling/BTLcommonaux.ml
+++ b/scheduling/BTLcommonaux.ml
@@ -1,15 +1,22 @@
open Maps
open BTL
+open Registers
open BTLtypes
open RTLcommonaux
let undef_node = -1
-let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false }
+let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false; liveins = Regset.empty }
-let def_iinfo () = { inumb = undef_node; pcond = None; visited = false }
+let def_iinfo () = { inumb = undef_node; pcond = None; visited = false; liveins = Regset.empty }
-let mk_binfo _bnumb = { bnumb = _bnumb; visited = false }
+let mk_binfo _bnumb _s_output_regs _typing =
+ {
+ bnumb = _bnumb;
+ visited = false;
+ s_output_regs = _s_output_regs;
+ typing = _typing;
+ }
let reset_visited_ibf btl =
PTree.map
@@ -68,3 +75,13 @@ let rec get_inumb_or_next = function
iinfo.inumb
| Bseq (ib1, _) -> get_inumb_or_next ib1
| _ -> failwith "get_inumb_or_next: Bnop None"
+
+let get_liveins = function
+ | BF (_, iinfo)
+ | Bnop (Some iinfo)
+ | Bop (_, _, _, iinfo)
+ | Bload (_, _, _, _, _, iinfo)
+ | Bstore (_, _, _, _, iinfo)
+ | Bcond (_, _, _, _, iinfo) ->
+ iinfo.liveins
+ | _ -> failwith "get_liveins: invalid iblock"
diff --git a/scheduling/BTLtypes.ml b/scheduling/BTLtypes.ml
index 3972fd6b..12ca30e8 100644
--- a/scheduling/BTLtypes.ml
+++ b/scheduling/BTLtypes.ml
@@ -1,7 +1,15 @@
+open Registers
+
type inst_info = {
mutable inumb : int;
mutable pcond : bool option;
mutable visited : bool;
+ mutable liveins: Regset.t;
}
-type block_info = { mutable bnumb : int; mutable visited : bool }
+type block_info = {
+ mutable bnumb : int;
+ mutable visited : bool;
+ s_output_regs : Regset.t;
+ typing : RTLtyping.regenv;
+}
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..e3a421a5 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -33,6 +33,10 @@ type latency_constraint = {
type problem = {
max_latency : int;
resource_bounds : int array;
+ live_regs_entry : Registers.Regset.t;
+ typing : RTLtyping.regenv;
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
instruction_usages : int array array;
latency_constraints : latency_constraint list;
};;
@@ -118,6 +122,13 @@ let vector_less_equal a b =
true
with Exit -> false;;
+(* let vector_add a b =
+ * assert ((Array.length a) = (Array.length b));
+ * for i=0 to (Array.length a)-1
+ * do
+ * b.(i) <- b.(i) + a.(i)
+ * done;; *)
+
let vector_subtract a b =
assert ((Array.length a) = (Array.length b));
for i=0 to (Array.length a)-1
@@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order)
assert(!time >= 0);
!time
with Exit -> -1
-
in
+
let advance_time() =
begin
(if !current_time < max_time-1
@@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order)
Array.blit problem.resource_bounds 0 current_resources 0
(Array.length current_resources);
ready.(!current_time + 1) <-
- InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time + 1));
ready.(!current_time) <- InstrSet.empty;
end);
incr current_time
@@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
(* dummy code for placating ocaml's warnings *)
let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
+
+(* A scheduler sensitive to register pressure *)
+let reg_pres_scheduler (problem : problem) : solution option =
+ (* DebugPrint.debug_flag := true; *)
+ let nr_instructions = get_nr_instructions problem in
+
+ if !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+ flush stderr);
+
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ let available_regs = Array.copy Machregsaux.nr_regs in
+
+ let nr_types_regs = Array.length available_regs in
+
+ let thres = Array.fold_left (min)
+ (max !(Clflags.option_regpres_threshold) 0)
+ Machregsaux.nr_regs
+ in
+
+
+ let regs_thresholds = Array.make nr_types_regs thres in
+ (* placeholder value *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ available_regs.(classe)
+ <- available_regs.(classe) - 1;
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+ let csr_b = ref false in
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta i = (fun a (r, b) ->
+ a +
+ if class_r r <> i then 0 else
+ (if b then
+ if (Hashtbl.find counts r = (i, 1))
+ then 1 else 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ )) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let module InstrSet =
+ struct
+ module MSet =
+ Set.Make (struct
+ type t=int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end)
+
+ let empty = MSet.empty
+ let is_empty = MSet.is_empty
+ let add = MSet.add
+ let remove = MSet.remove
+ let union = MSet.union
+ let iter = MSet.iter
+
+ let compare_regs i x y =
+ let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
+ (* print_int y;
+ * print_string " ";
+ * print_int pyi;
+ * print_newline ();
+ * flush stdout; *)
+ let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
+ match pyi - pxi with
+ | 0 -> (match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z)
+ | z -> z
+
+ (** t is the register class *)
+ let sched_CSR t ready usages =
+ (* print_string "looking for max delta";
+ * print_newline ();
+ * flush stdout; *)
+ let result = ref (-1) in
+ iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then if !result = -1 || (compare_regs t !result i > 0)
+ then result := i) ready;
+ !result
+ end
+ in
+
+ let max_time = bound_max_time problem + 5*nr_instructions in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+ (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
+ let cnt = ref 0 in
+
+ let attempt_scheduling ready usages =
+ let result = ref (-1) in
+ DebugPrint.debug "\n\nREADY: ";
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready;
+ DebugPrint.debug "\n\n";
+ try
+ Array.iteri (fun i avlregs ->
+ DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n"
+ i avlregs (Hashtbl.length live_regs);
+ if !cnt < 5 && avlregs <= regs_thresholds.(i)
+ then (
+ csr_b := true;
+ let maybe = InstrSet.sched_CSR i ready usages in
+ DebugPrint.debug "maybe %d\n" maybe;
+ (if maybe >= 0 &&
+ let delta =
+ List.fold_left (fold_delta i) 0 mentions.(maybe) in
+ DebugPrint.debug "delta %d\n" delta;
+ delta > 0
+ then
+ (vector_subtract usages.(maybe) current_resources;
+ result := maybe)
+ else
+ if not !Clflags.option_regpres_wait_window
+ then
+ (InstrSet.iter (fun ins ->
+ if vector_less_equal usages.(ins) current_resources &&
+ List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0
+ then result := ins
+ ) ready;
+ if !result <> -1 then
+ vector_subtract usages.(!result) current_resources;
+ incr cnt)
+ else
+ (incr cnt)
+ );
+ raise Exit)) available_regs;
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then (
+ vector_subtract usages.(i) current_resources;
+ result := i;
+ raise Exit)) ready;
+ -1
+ with Exit ->
+ !result in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (assert(times.(i) < 0);
+ (DebugPrint.debug "INSTR ISSUED: %d\n" i;
+ if !csr_b && !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "REGPRES: high pres class %d\n" i;
+ flush stderr);
+ csr_b := false;
+ (* if !Clflags.option_regpres_wait_window then *)
+ cnt := 0;
+ List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ available_regs.(t)
+ <- available_regs.(t) + 1))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ available_regs.(t)
+ <- available_regs.(t) - 1)
+ | Some i -> ()
+ ) mentions.(i));
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ DebugPrint.debug "%d " i;
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ (* DebugPrint.debug_flag := false; *)
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ (* DebugPrint.debug_flag := false; *)
+ None
+
+;;
+
+
+(********************************************************************)
+
+let reg_pres_scheduler_bis (problem : problem) : solution option =
+ (* Printf.printf "\nNEW\n\n"; *)
+ let nr_instructions = get_nr_instructions problem in
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ (* let available_regs = Array.copy Machregsaux.nr_regs in *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ (* available_regs.(classe)
+ * <- available_regs.(classe) - 1; *)
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta a (r, b) =
+ a + (if b then
+ match Hashtbl.find_opt counts r with
+ | Some (_, 1) -> 1
+ | _ -> 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ ) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let compare_pres x y =
+ let pdy = List.fold_left (fold_delta) 0 mentions.(y) in
+ let pdx = List.fold_left (fold_delta) 0 mentions.(x) in
+ match pdy - pdx with
+ | 0 -> x - y
+ | z -> z
+ in
+
+ let module InstrSet =
+ Set.Make (struct
+ type t = int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end) in
+
+ let max_time = bound_max_time problem (* + 5*nr_instructions *) in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (* Printf.printf "ADV\n";
+ * flush stdout; *)
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+
+ let attempt_scheduling ready usages =
+ let result = ref [] in
+ try
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then
+ if !result = [] || priorities.(i) = priorities.(List.hd (!result))
+ then
+ result := i::(!result)
+ else raise Exit
+ ) ready;
+ if !result <> [] then raise Exit;
+ -1
+ with
+ Exit ->
+ let mini = List.fold_left (fun a b ->
+ if a = -1 || compare_pres a b > 0
+ then b else a
+ ) (-1) !result in
+ vector_subtract usages.(mini) current_resources;
+ mini
+ in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (
+ DebugPrint.debug "ISSUED: %d\nREADY: " i;
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i)
+ ready.(!current_time);
+ DebugPrint.debug "\nSUCC: ";
+ List.iter (fun (i, l) -> DebugPrint.debug "%d " i)
+ successors.(i);
+ DebugPrint.debug "\n\n";
+ assert(times.(i) < 0);
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ (List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ (* available_regs.(t)
+ * <- available_regs.(t) + 1 *)))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ (* available_regs.(t)
+ * <- available_regs.(t) - 1 *))
+ | Some i -> ()
+ ) mentions.(i));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ (* print_int i;
+ * flush stdout; *)
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ None
+
+;;
+
+(********************************************************************)
+
type bundle = int list;;
let rec extract_deps_to index = function
@@ -438,6 +914,12 @@ let reverse_problem problem =
{
max_latency = problem.max_latency;
resource_bounds = problem.resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+ (* Not needed for the revlist sched, and for now we wont bother
+ with creating a reverse scheduler aware of reg press *)
+
+ typing = problem.typing;
+ reference_counting = problem.reference_counting;
instruction_usages = Array.init (nr_instructions + 1)
(fun i ->
if i=0
@@ -1259,5 +1741,7 @@ let scheduler_by_name name =
| "ilp" -> validated_scheduler cascaded_scheduler
| "list" -> validated_scheduler list_scheduler
| "revlist" -> validated_scheduler reverse_list_scheduler
+ | "regpres" -> validated_scheduler reg_pres_scheduler
+ | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis
| "greedy" -> greedy_scheduler
| s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index fb7af3f6..48c7bc09 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -23,6 +23,16 @@ type problem = {
resource_bounds : int array;
(** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
+ live_regs_entry : Registers.Regset.t;
+ (** The set of live pseudo-registers at entry. *)
+
+ typing : RTLtyping.regenv;
+ (** Register type map. *)
+
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
+ (** See RTLpathScheduleraux.reference_counting. *)
+
instruction_usages: int array array;
(** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
@@ -68,6 +78,12 @@ Once a clock tick is full go to the next.
@return [Some solution] when a solution is found, [None] if not. *)
val list_scheduler : problem -> solution option
+(** WIP : Same as list_scheduler, but schedules instructions which decrease
+register pressure when it gets too high. *)
+val reg_pres_scheduler : problem -> solution option
+
+val reg_pres_scheduler_bis : problem -> solution option
+
(** Schedule the problem using the order of instructions without any reordering *)
val greedy_scheduler : problem -> solution option
diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v
index a4fce97e..b29a7759 100644
--- a/scheduling/RTLpath.v
+++ b/scheduling/RTLpath.v
@@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem)
SOME v <- Mem.loadv chunk m a IN
Some (mk_istate true pc' (rs#dst <- v) m)
| Iload NOTRAP chunk addr args dst pc' =>
- let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in
+ let default_state := mk_istate true pc' rs#dst <- Vundef m in
match (eval_addressing ge sp addr rs##args) with
| None => Some default_state
| Some a => match (Mem.loadv chunk m a) with
diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v
index aa8db342..2a791feb 100644
--- a/scheduling/RTLpathSE_theory.v
+++ b/scheduling/RTLpathSE_theory.v
@@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem):
| NOTRAP =>
SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
match (eval_addressing ge sp addr args) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some a =>
SOME m <- seval_smem ge sp sm rs0 m0 IN
match (Mem.loadv chunk m a) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some val => Some val
end
end
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 3db25d82..659a8ba7 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -18,7 +18,7 @@ let print_superblock (sb: superblock) code =
begin
debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
debug " liveins = "; print_ptree_regset li; debug "\n";
- debug " output_regs = "; print_regset outs; debug "}"
+ debug " output_regs = "; print_regset outs; debug "\n}"
end
let print_superblocks lsb code =
@@ -73,6 +73,168 @@ let get_superblocks code entry pm typing =
lsb
end
+(** the useful one. Returns a hashtable with bindings of shape
+ ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
+ ** [t] is its class (according to [typing]), and [n] the number of
+ ** times it's referenced as an argument in instructions of [seqa] ;
+ ** and an arrray containg the list of regs referenced by each
+ ** instruction, with a boolean to know whether it's as arg or dest *)
+let reference_counting (seqa : (instruction * Regset.t) array)
+ (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) :
+ (Registers.reg, int * int) Hashtbl.t *
+ (Registers.reg * bool) list array =
+ let retl = Hashtbl.create 42 in
+ let retr = Array.make (Array.length seqa) [] in
+ (* retr.(i) : (r, b) -> (r', b') -> ...
+ * where b = true if seen as arg, false if seen as dest
+ *)
+ List.iter (fun reg ->
+ Hashtbl.add retl
+ reg (Machregsaux.class_of_type (typing reg), 1)
+ ) (Registers.Regset.elements out_regs);
+ let add_reg reg =
+ match Hashtbl.find_opt retl reg with
+ | Some (t, n) -> Hashtbl.add retl reg (t, n+1)
+ | None -> Hashtbl.add retl reg (Machregsaux.class_of_type
+ (typing reg), 1)
+ in
+ let map_true = List.map (fun r -> r, true) in
+ Array.iteri (fun i (ins, _) ->
+ match ins with
+ | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (dest, false)::(map_true args)
+ | Icond(_,args,_,_,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- map_true args
+ | Istore(_,_,args,src,_) ->
+ List.iter (add_reg) args;
+ add_reg src;
+ retr.(i) <- (src, true)::(map_true args)
+ | Icall(_,fn,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (dest,false)::(reg, true)::(map_true args)
+ | _ -> (dest,false)::(map_true args))
+
+ | Itailcall(_,fn,args) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (reg, true)::(map_true args)
+ | _ -> map_true args)
+ | Ibuiltin(_,args,dest,_) ->
+ let rec bar = function
+ | AST.BA r -> add_reg r;
+ retr.(i) <- (r, true)::retr.(i)
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ bar hi; bar lo
+ | _ -> ()
+ in
+ List.iter (bar) args;
+ let rec bad = function
+ | AST.BR r -> retr.(i) <- (r, false)::retr.(i)
+ | AST.BR_splitlong (hi, lo) ->
+ bad hi; bad lo
+ | _ -> ()
+ in
+ bad dest;
+ | Ijumptable (reg,_) | Ireturn (Some reg) ->
+ add_reg reg;
+ retr.(i) <- [reg, true]
+ | _ -> ()
+ ) seqa;
+ (* print_string "mentions\n";
+ * Array.iteri (fun i l ->
+ * print_int i;
+ * print_string ": [";
+ * List.iter (fun (r, b) ->
+ * print_int (Camlcoq.P.to_int r);
+ * print_string ":";
+ * print_string (if b then "a:" else "d");
+ * if b then print_int (snd (Hashtbl.find retl r));
+ * print_string ", "
+ * ) l;
+ * print_string "]\n";
+ * flush stdout;
+ * ) retr; *)
+ retl, retr
+
+
+let get_live_regs_entry (sb : superblock) code =
+ (if !Clflags.option_debug_compcert > 6
+ then debug_flag := true);
+ debug "getting live regs for superblock:\n";
+ print_superblock sb code;
+ debug "\n";
+ let seqa = Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.get_live_regs_entry"
+ ),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ sb.instructions in
+ let ret =
+ Array.fold_right (fun (ins, liveins) regset_i ->
+ let regset = Registers.Regset.union liveins regset_i in
+ match ins with
+ | Inop _ -> regset
+ | Iop (_, args, dest, _)
+ | Iload (_, _, _, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.remove dest regset) args
+ | Istore (_, _, args, src, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.add src regset) args
+ | Icall (_, fn, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ ((match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
+ | Datatypes.Coq_inr _ -> (fun x -> x))
+ (Registers.Regset.remove dest regset))
+ args
+ | Itailcall (_, fn, args) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
+ | Datatypes.Coq_inr _ -> regset)
+ args
+ | Ibuiltin (_, args, dest, _) ->
+ List.fold_left (fun set arg ->
+ let rec add reg set =
+ match reg with
+ | AST.BA r -> Registers.Regset.add r set
+ | AST.BA_splitlong (hi, lo)
+ | AST.BA_addptr (hi, lo) -> add hi (add lo set)
+ | _ -> set
+ in add arg set)
+ (let rec rem dest set =
+ match dest with
+ | AST.BR r -> Registers.Regset.remove r set
+ | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set)
+ | _ -> set
+ in rem dest regset)
+ args
+ | Icond (_, args, _, _, _) ->
+ List.fold_left (fun set reg ->
+ Registers.Regset.add reg set)
+ regset args
+ | Ijumptable (reg, _)
+ | Ireturn (Some reg) ->
+ Registers.Regset.add reg regset
+ | _ -> regset
+ ) seqa sb.s_output_regs
+ in debug "live in regs: ";
+ print_regset ret;
+ debug "\n";
+ debug_flag := false;
+ ret
+
(* TODO David *)
let schedule_superblock sb code =
if not !Clflags.option_fprepass
@@ -91,15 +253,22 @@ let schedule_superblock sb code =
match predicted_successor ii with
| Some _ -> 0
| None -> 1 in
+ debug "hello\n";
+ let live_regs_entry = get_live_regs_entry sb code in
+ let seqa =
+ Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in
match PrepassSchedulingOracle.schedule_sequence
- (Array.map (fun i ->
- (match PTree.get i code with
- | Some ii -> ii
- | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
- (match PTree.get i sb.liveins with
- | Some s -> s
- | None -> Regset.empty))
- (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+ seqa
+ live_regs_entry
+ sb.typing
+ (reference_counting seqa sb.s_output_regs sb.typing) with
| None -> sb.instructions
| Some order ->
let ins' =
diff --git a/scheduling/RTLtoBTLaux.ml b/scheduling/RTLtoBTLaux.ml
index d544e87f..3c06f4cb 100644
--- a/scheduling/RTLtoBTLaux.ml
+++ b/scheduling/RTLtoBTLaux.ml
@@ -45,7 +45,7 @@ let translate_inst (iinfo : BTL.inst_info) inst is_final =
in
if is_final then encaps_final btli !osucc else btli
-let translate_function code entry join_points liveness =
+let translate_function code entry join_points liveness (typing : RTLtyping.regenv) =
let reached = ref (PTree.map (fun n i -> false) code) in
let btl_code = ref PTree.empty in
let rec build_btl_tree e =
@@ -53,6 +53,7 @@ let translate_function code entry join_points liveness =
else (
reached := PTree.set e true !reached;
let next_nodes = ref [] in
+ let last = ref None in
let rec build_btl_block n =
let inst = get_some @@ PTree.get n code in
let psucc = predicted_successor inst in
@@ -78,13 +79,16 @@ let translate_function code entry join_points liveness =
| None ->
debug "BLOCK END.\n";
next_nodes := !next_nodes @ successors_inst inst;
+ last := Some inst;
translate_inst iinfo inst true
in
let ib = build_btl_block e in
let succs = !next_nodes in
let inputs = get_some @@ PTree.get e liveness in
- let bi = mk_binfo (p2i e) in
+ let soutput = get_outputs liveness e (get_some @@ !last) in
+
+ let bi = mk_binfo (p2i e) soutput typing in
let ibf = { entry = ib; input_regs = inputs; binfo = bi } in
btl_code := PTree.set e ibf !btl_code;
List.iter build_btl_tree succs)
@@ -97,7 +101,8 @@ let rtl2btl (f : RTL.coq_function) =
let entry = f.fn_entrypoint in
let join_points = get_join_points code entry in
let liveness = analyze f in
- let btl = translate_function code entry join_points liveness in
+ let typing = get_ok @@ RTLtyping.type_function f in
+ let btl = translate_function code entry join_points liveness typing in
let dm = PTree.map (fun n i -> n) btl in
(*debug_flag := true;*)
debug "Entry %d\n" (p2i entry);
diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v
index c8eadbd7..b588cca8 100644
--- a/scheduling/postpass_lib/Machblock.v
+++ b/scheduling/postpass_lib/Machblock.v
@@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m:
| exec_MBload_notrap1:
forall addr args rs' chunk dst,
eval_addressing ge sp addr rs##args = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
| exec_MBload_notrap2:
forall addr args a rs' chunk dst,
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
| exec_MBstore:
forall chunk addr args src m' a rs',