From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sat, 20 Apr 2013 07:54:52 +0000 Subject: Big merge of the newregalloc-int64 branch. Lots of changes in two directions: 1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- backend/Regalloc.ml | 986 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 986 insertions(+) create mode 100644 backend/Regalloc.ml (limited to 'backend/Regalloc.ml') diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml new file mode 100644 index 00000000..fe981e32 --- /dev/null +++ b/backend/Regalloc.ml @@ -0,0 +1,986 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Register allocation by coloring of an interference graph *) + +(* The algorithm in a nutshell: + + - Split live ranges + - Convert from RTL to XTL + - Eliminate dead code + - Repeat: + . Construct interference graph + . Color interference graph using IRC algorithm + . Check for variables that were spilled and must be in registers + . If none, convert to LTL and exit. + . If some, insert spill and reload instructions and try again + End Repeat +*) + +open Format +open Clflags +open Camlcoq +open Datatypes +open Coqlib +open Maps +open AST +open Memdata +open Kildall +open Registers +open Op +open Machregs +open Locations +open Conventions1 +open Conventions +open IRC +open XTL + +(* Detection of 2-address operations *) + +let is_two_address op args = + if two_address_op op then + match args with + | [] -> assert false + | arg1 :: argl -> Some(arg1, argl) + else None + +(* For tracing *) + +let destination_alloctrace : string option ref = ref None +let pp = ref std_formatter + +let init_trace () = + if !option_dalloctrace && !pp == std_formatter then begin + match !destination_alloctrace with + | None -> () (* should not happen *) + | Some f -> pp := formatter_of_out_channel (open_out f) + end + + +(**************** Initial conversion from RTL to XTL **************) + +let vreg tyenv r = V(r, tyenv r) + +let vregs tyenv rl = List.map (vreg tyenv) rl + +let rec expand_regs tyenv = function + | [] -> [] + | r :: rl -> + match tyenv r with + | Tlong -> V(r, Tint) :: V(twin_reg r, Tint) :: expand_regs tyenv rl + | ty -> V(r, ty) :: expand_regs tyenv rl + +let constrain_reg v c = + match c with + | None -> v + | Some mr -> L(R mr) + +let rec constrain_regs vl cl = + match vl, cl with + | [], _ -> [] + | v1 :: vl', [] -> vl + | v1 :: vl', Some mr1 :: cl' -> L(R mr1) :: constrain_regs vl' cl' + | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl' + +let move v1 v2 k = + if v1 = v2 then k else Xmove(v1, v2) :: k + +let rec movelist vl1 vl2 k = + match vl1, vl2 with + | [], [] -> k + | v1 :: vl1, v2 :: vl2 -> move v1 v2 (movelist vl1 vl2 k) + | _, _ -> assert false + +let xparmove srcs dsts k = + assert (List.length srcs = List.length dsts); + match srcs, dsts with + | [], [] -> k + | [src], [dst] -> Xmove(src, dst) :: k + | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k + +(* Return the XTL basic block corresponding to the given RTL instruction. + Move and parallel move instructions are introduced to honor calling + conventions and register constraints on some operations. + 64-bit integer variables are split in two 32-bit halves. *) + +let block_of_RTL_instr funsig tyenv = function + | RTL.Inop s -> + [Xbranch s] + | RTL.Iop(Omove, [arg], res, s) -> + if tyenv arg = Tlong then + [Xmove(V(arg, Tint), V(res, Tint)); + Xmove(V(twin_reg arg, Tint), V(twin_reg res, Tint)); + Xbranch s] + else + [Xmove(vreg tyenv arg, vreg tyenv res); Xbranch s] + | RTL.Iop(Omakelong, [arg1; arg2], res, s) -> + [Xmove(V(arg1, Tint), V(res, Tint)); + Xmove(V(arg2, Tint), V(twin_reg res, Tint)); + Xbranch s] + | RTL.Iop(Olowlong, [arg], res, s) -> + [Xmove(V(twin_reg arg, Tint), V(res, Tint)); Xbranch s] + | RTL.Iop(Ohighlong, [arg], res, s) -> + [Xmove(V(arg, Tint), V(res, Tint)); Xbranch s] + | RTL.Iop(op, args, res, s) -> + let (cargs, cres) = mregs_for_operation op in + let args1 = vregs tyenv args and res1 = vreg tyenv res in + let args2 = constrain_regs args1 cargs and res2 = constrain_reg res1 cres in + let (args3, res3) = + match is_two_address op args2 with + | None -> + (args2, res2) + | Some(arg, args2') -> + if arg = res2 || not (List.mem res2 args2') then + (args2, res2) + else + let t = new_temp (tyenv res) in (t :: args2', t) in + movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s]) + | RTL.Iload(chunk, addr, args, dst, s) -> + if chunk = Mint64 then begin + match offset_addressing addr (coqint_of_camlint 4l) with + | None -> assert false + | Some addr' -> + [Xload(Mint32, addr, vregs tyenv args, + V((if big_endian then dst else twin_reg dst), Tint)); + Xload(Mint32, addr', vregs tyenv args, + V((if big_endian then twin_reg dst else dst), Tint)); + Xbranch s] + end else + [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s] + | RTL.Istore(chunk, addr, args, src, s) -> + if chunk = Mint64 then begin + match offset_addressing addr (coqint_of_camlint 4l) with + | None -> assert false + | Some addr' -> + [Xstore(Mint32, addr, vregs tyenv args, + V((if big_endian then src else twin_reg src), Tint)); + Xstore(Mint32, addr', vregs tyenv args, + V((if big_endian then twin_reg src else src), Tint)); + Xbranch s] + end else + [Xstore(chunk, addr, vregs tyenv args, vreg tyenv src); Xbranch s] + | RTL.Icall(sg, ros, args, res, s) -> + let args' = vlocs (loc_arguments sg) + and res' = vmregs (loc_result sg) in + xparmove (expand_regs tyenv args) args' + (Xcall(sg, sum_left_map (vreg tyenv) ros, args', res') :: + xparmove res' (expand_regs tyenv [res]) + [Xbranch s]) + | RTL.Itailcall(sg, ros, args) -> + let args' = vlocs (loc_arguments sg) in + xparmove (expand_regs tyenv args) args' + [Xtailcall(sg, sum_left_map (vreg tyenv) ros, args')] + | RTL.Ibuiltin(ef, args, res, s) -> + let (cargs, cres) = mregs_for_builtin ef in + let args1 = expand_regs tyenv args and res1 = expand_regs tyenv [res] in + let args2 = constrain_regs args1 cargs and res2 = constrain_regs res1 cres in + movelist args1 args2 + (Xbuiltin(ef, args2, res2) :: movelist res2 res1 [Xbranch s]) + | RTL.Icond(cond, args, s1, s2) -> + [Xcond(cond, vregs tyenv args, s1, s2)] + | RTL.Ijumptable(arg, tbl) -> + [Xjumptable(vreg tyenv arg, tbl)] + | RTL.Ireturn None -> + [Xreturn []] + | RTL.Ireturn (Some arg) -> + let args' = vmregs (loc_result funsig) in + xparmove (expand_regs tyenv [arg]) args' [Xreturn args'] + +(* One above the [pc] nodes of the given RTL function *) + +let next_pc f = + PTree.fold + (fun npc pc i -> if P.lt pc npc then npc else P.succ pc) + f.RTL.fn_code P.one + +(* Translate an RTL function to an XTL function *) + +let function_of_RTL_function f tyenv = + let xc = PTree.map1 (block_of_RTL_instr f.RTL.fn_sig tyenv) f.RTL.fn_code in + (* Add moves for function parameters *) + let pc_entrypoint = next_pc f in + let b_entrypoint = + xparmove (vlocs (loc_parameters f.RTL.fn_sig)) + (expand_regs tyenv f.RTL.fn_params) + [Xbranch f.RTL.fn_entrypoint] in + { fn_sig = f.RTL.fn_sig; + fn_stacksize = f.RTL.fn_stacksize; + fn_entrypoint = pc_entrypoint; + fn_code = PTree.set pc_entrypoint b_entrypoint xc } + + +(***************** Liveness analysis *****************) + +let vset_removelist vl after = List.fold_right VSet.remove vl after +let vset_addlist vl after = List.fold_right VSet.add vl after +let vset_addros vos after = + match vos with Coq_inl v -> VSet.add v after | Coq_inr id -> after + +let live_before instr after = + match instr with + | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) -> + if VSet.mem dst after + then VSet.add src (VSet.remove dst after) + else after + | Xparmove(srcs, dsts, itmp, ftmp) -> + vset_addlist srcs (vset_removelist dsts after) + | Xop(op, args, res) -> + if VSet.mem res after + then vset_addlist args (VSet.remove res after) + else after + | Xload(chunk, addr, args, dst) -> + if VSet.mem dst after + then vset_addlist args (VSet.remove dst after) + else after + | Xstore(chunk, addr, args, src) -> + vset_addlist args (VSet.add src after) + | Xcall(sg, ros, args, res) -> + vset_addlist args (vset_addros ros (vset_removelist res after)) + | Xtailcall(sg, ros, args) -> + vset_addlist args (vset_addros ros VSet.empty) + | Xbuiltin(ef, args, res) -> + vset_addlist args (vset_removelist res after) + | Xbranch s -> + after + | Xcond(cond, args, s1, s2) -> + List.fold_right VSet.add args after + | Xjumptable(arg, tbl) -> + VSet.add arg after + | Xreturn args -> + vset_addlist args VSet.empty + +let rec live_before_block blk after = + match blk with + | [] -> after + | instr :: blk -> live_before instr (live_before_block blk after) + +let transfer_live f pc after = + match PTree.get pc f.fn_code with + | None -> VSet.empty + | Some blk -> live_before_block blk after + +module VSetLat = struct + type t = VSet.t + let beq = VSet.equal + let bot = VSet.empty + let lub = VSet.union +end + +module Liveness_Solver = Backward_Dataflow_Solver(VSetLat)(NodeSetBackward) + +let liveness_analysis f = + match Liveness_Solver.fixpoint (successors f) (transfer_live f) [] with + | None -> assert false + | Some lv -> lv + +(* Pair the instructions of a block with their live-before sets *) + +let pair_block_live blk after = + let rec pair_rec accu after = function + | [] -> accu + | instr :: blk -> + let before = live_before instr after in + pair_rec ((instr, before) :: accu) before blk in + pair_rec [] after (List.rev blk) + + +(**************** Dead code elimination **********************) + +(* Eliminate pure instructions whose results are not used later. *) + +let rec dce_parmove srcs dsts after = + match srcs, dsts with + | [], [] -> [], [] + | src1 :: srcs, dst1 :: dsts -> + let (srcs', dsts') = dce_parmove srcs dsts after in + if VSet.mem dst1 after + then (src1 :: srcs', dst1 :: dsts') + else (srcs', dsts') + | _, _ -> assert false + +let dce_instr instr after k = + match instr with + | Xmove(src, dst) -> + if VSet.mem dst after + then instr :: k + else k + | Xparmove(srcs, dsts, itmp, ftmp) -> + let (srcs', dsts') = dce_parmove srcs dsts after in + Xparmove(srcs', dsts', itmp, ftmp) :: k + | Xop(op, args, res) -> + if VSet.mem res after + then instr :: k + else k + | Xload(chunk, addr, args, dst) -> + if VSet.mem dst after + then instr :: k + else k + | _ -> + instr :: k + +let rec dce_block blk after = + match blk with + | [] -> (after, []) + | instr :: blk' -> + let (after', tblk') = dce_block blk' after in + (live_before instr after', dce_instr instr after' tblk') + +let dead_code_elimination f liveness = + { f with fn_code = + PTree.map (fun pc blk -> snd(dce_block blk (PMap.get pc liveness))) + f.fn_code } + + +(*********************** Spill costs ****************************) + +(* Estimate spill costs and count other statistics for every variable. + Variables that must not be spilled are given infinite costs. *) + +let spill_costs f = + + let costs = ref PTree.empty in + + let get_stats r = + match PTree.get r !costs with + | Some st -> st + | None -> + let st = {cost = 0; usedefs = 0} in + costs := PTree.set r st !costs; + st in + + let charge amount uses v = + match v with + | L l -> () + | V(r, ty) -> + let st = get_stats r in + let c1 = st.cost + amount in + let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in + st.cost <- c2; + st.usedefs <- st.usedefs + uses in + + let charge_list amount uses vl = + List.iter (charge amount uses) vl in + + let charge_ros amount ros = + match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in + + let charge_instr = function + | Xmove(src, dst) -> + charge 1 1 src; charge 1 1 dst + | Xreload(src, dst) -> + charge 1 1 src; charge max_int 1 dst + (* dest must not be spilled! *) + | Xspill(src, dst) -> + charge max_int 1 src; charge 1 1 dst + (* source must not be spilled! *) + | Xparmove(srcs, dsts, itmp, ftmp) -> + charge_list 1 1 srcs; charge_list 1 1 dsts; + charge max_int 0 itmp; charge max_int 0 ftmp + (* temps must not be spilled *) + | Xop(op, args, res) -> + charge_list 10 1 args; charge 10 1 res + | Xload(chunk, addr, args, dst) -> + charge_list 10 1 args; charge 10 1 dst + | Xstore(chunk, addr, args, src) -> + charge_list 10 1 args; charge 10 1 src + | Xcall(sg, vos, args, res) -> + charge_ros 10 vos + | Xtailcall(sg, vos, args) -> + charge_ros 10 vos + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_vstore _ | EF_vstore_global _ | EF_memcpy _ -> + (* result is not used but should not be spilled *) + charge_list 10 1 args; charge_list max_int 0 res + | EF_annot _ -> + (* arguments are not actually used, so don't charge; + result is never used but should not be spilled *) + charge_list max_int 0 res + | EF_annot_val _ -> + (* like a move *) + charge_list 1 1 args; charge_list 1 1 res + | _ -> + charge_list 10 1 args; charge_list 10 1 res + end + | Xbranch _ -> () + | Xcond(cond, args, _, _) -> + charge_list 10 1 args + | Xjumptable(arg, _) -> + charge 10 1 arg + | Xreturn optarg -> + () in + + let charge_block blk = List.iter charge_instr blk in + + PTree.fold + (fun () pc blk -> charge_block blk) + f.fn_code (); + if !option_dalloctrace then begin + fprintf !pp "------------------ Unspillable variables --------------@ @."; + fprintf !pp "@["; + PTree.fold + (fun () r st -> + if st.cost = max_int then fprintf !pp "@ x%ld" (P.to_int32 r)) + !costs (); + fprintf !pp "@]@ @." + end; + (* Result is cost function: pseudoreg -> stats *) + get_stats + + +(********* Construction and coloring of the interference graph **************) + +let add_interfs_def g res live = + VSet.iter (fun v -> if v <> res then IRC.add_interf g v res) live + +let add_interfs_move g src dst live = + VSet.iter (fun v -> if v <> src && v <> dst then IRC.add_interf g v dst) live + +let add_interfs_destroyed g live mregs = + List.iter + (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live) + mregs + +let add_interfs_live g live v = + VSet.iter (fun v' -> IRC.add_interf g v v') live + +let add_interfs_list g v vl = + List.iter (IRC.add_interf g v) vl + +let rec add_interfs_pairwise g = function + | [] -> () + | v1 :: vl -> add_interfs_list g v1 vl; add_interfs_pairwise g vl + +let add_interfs_instr g instr live = + match instr with + | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) -> + IRC.add_pref g src dst; + add_interfs_move g src dst live + | Xparmove(srcs, dsts, itmp, ftmp) -> + List.iter2 (IRC.add_pref g) srcs dsts; + (* Interferences with live across *) + let across = vset_removelist dsts live in + List.iter (add_interfs_live g across) dsts; + add_interfs_live g across itmp; add_interfs_live g across ftmp; + (* All destinations must be pairwise different *) + add_interfs_pairwise g dsts; + (* The temporaries must be different from sources and dests *) + add_interfs_list g itmp srcs; add_interfs_list g itmp dsts; + add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts; + (* Take into account destroyed reg when accessing Incoming param *) + if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs + then add_interfs_list g (vmreg temp_for_parent_frame) dsts + | Xop(op, args, res) -> + begin match is_two_address op args with + | None -> + add_interfs_def g res live; + | Some(arg1, argl) -> + (* Treat as "res := arg1; res := op(res, argl)" *) + add_interfs_def g res live; + IRC.add_pref g arg1 res; + add_interfs_move g arg1 res + (vset_addlist (res :: argl) (VSet.remove res live)) + end; + add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op); + | Xload(chunk, addr, args, dst) -> + add_interfs_def g dst live; + add_interfs_destroyed g (VSet.remove dst live) + (destroyed_by_load chunk addr) + | Xstore(chunk, addr, args, src) -> + add_interfs_destroyed g live (destroyed_by_store chunk addr) + | Xcall(sg, vos, args, res) -> + add_interfs_destroyed g (vset_removelist res live) destroyed_at_call + | Xtailcall(sg, Coq_inl v, args) -> + List.iter (fun r -> IRC.add_interf g (vmreg r) v) int_callee_save_regs + | Xtailcall(sg, Coq_inr id, args) -> + () + | Xbuiltin(ef, args, res) -> + (* Interferences with live across *) + let across = vset_removelist res live in + List.iter (add_interfs_live g across) res; + (* All results must be pairwise different *) + add_interfs_pairwise g res; + add_interfs_destroyed g across (destroyed_by_builtin ef); + begin match ef, args, res with + | EF_annot_val _, [arg], [res] -> IRC.add_pref g arg res (* like a move *) + | _ -> () + end + | Xbranch s -> + () + | Xcond(cond, args, s1, s2) -> + add_interfs_destroyed g live (destroyed_by_cond cond) + | Xjumptable(arg, tbl) -> + add_interfs_destroyed g live destroyed_by_jumptable + | Xreturn optarg -> + () + +let rec add_interfs_block g blk live = + match blk with + | [] -> live + | instr :: blk' -> + let live' = add_interfs_block g blk' live in + add_interfs_instr g instr live'; + live_before instr live' + +let find_coloring f liveness = + (*type_function f; (* for debugging *)*) + let g = IRC.init (spill_costs f) in + PTree.fold + (fun () pc blk -> ignore (add_interfs_block g blk (PMap.get pc liveness))) + f.fn_code (); + IRC.coloring g + + +(*********** Determination of variables that need spill code insertion *****) + +let is_reg alloc v = + match alloc v with R _ -> true | S _ -> false + +let add_tospill alloc v ts = + match alloc v with R _ -> ts | S _ -> VSet.add v ts + +let addlist_tospill alloc vl ts = + List.fold_right (add_tospill alloc) vl ts + +let addros_tospill alloc ros ts = + match ros with Coq_inl v -> add_tospill alloc v ts | Coq_inr s -> ts + +let tospill_instr alloc instr ts = + match instr with + | Xmove(src, dst) -> + if is_reg alloc src || is_reg alloc dst || alloc src = alloc dst + then ts + else VSet.add src (VSet.add dst ts) + | Xreload(src, dst) -> + assert (is_reg alloc dst); + ts + | Xspill(src, dst) -> + assert (is_reg alloc src); + ts + | Xparmove(srcs, dsts, itmp, ftmp) -> + assert (is_reg alloc itmp && is_reg alloc ftmp); + ts + | Xop(op, args, res) -> + addlist_tospill alloc args (add_tospill alloc res ts) + | Xload(chunk, addr, args, dst) -> + addlist_tospill alloc args (add_tospill alloc dst ts) + | Xstore(chunk, addr, args, src) -> + addlist_tospill alloc args (add_tospill alloc src ts) + | Xcall(sg, vos, args, res) -> + addros_tospill alloc vos ts + | Xtailcall(sg, vos, args) -> + addros_tospill alloc vos ts + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> ts + | _ -> addlist_tospill alloc args (addlist_tospill alloc res ts) + end + | Xbranch s -> + ts + | Xcond(cond, args, s1, s2) -> + addlist_tospill alloc args ts + | Xjumptable(arg, tbl) -> + add_tospill alloc arg ts + | Xreturn optarg -> + ts + +let rec tospill_block alloc blk ts = + match blk with + | [] -> ts + | instr :: blk' -> tospill_block alloc blk' (tospill_instr alloc instr ts) + +let tospill_function f alloc = + PTree.fold + (fun ts pc blk -> tospill_block alloc blk ts) + f.fn_code VSet.empty + + +(********************* Spilling ***********************) + +(* We follow a semi-naive spilling strategy. By default, we spill at + every definition of a variable that could not be allocated a register, + and we reload before every use. However, we also maintain a list of + equations of the form [spilled-var = temp] that keep track of + variables that were recently spilled or reloaded. Based on these + equations, we can avoid reloading a spilled variable if its value + is still available in a temporary register. *) + +let rec find_reg_containing v = function + | [] -> None + | (var, temp, date) :: eqs -> + if var = v then Some temp else find_reg_containing v eqs + +let add v t eqs = (v, t, 0) :: eqs + +let kill x eqs = + List.filter (fun (v, t, date) -> v <> x && t <> x) eqs + +let reload_var tospill eqs v = + if not (VSet.mem v tospill) then + (v, [], eqs) + else + match find_reg_containing v eqs with + | Some t -> + (*printf "Reusing %a for %a@ @." PrintXTL.var t PrintXTL.var v;*) + (t, [], eqs) + | None -> + let t = new_temp (typeof v) in (t, [Xreload(v, t)], add v t eqs) + +let rec reload_vars tospill eqs vl = + match vl with + | [] -> ([], [], eqs) + | v1 :: vs -> + let (t1, c1, eqs1) = reload_var tospill eqs v1 in + let (ts, cs, eqs2) = reload_vars tospill eqs1 vs in + (t1 :: ts, c1 @ cs, eqs2) + +let save_var tospill eqs v = + if not (VSet.mem v tospill) then + (v, [], kill v eqs) + else begin + let t = new_temp (typeof v) in + (t, [Xspill(t, v)], add v t (kill v eqs)) + end + +let rec save_vars tospill eqs vl = + match vl with + | [] -> ([], [], eqs) + | v1 :: vs -> + let (t1, c1, eqs1) = save_var tospill eqs v1 in + let (ts, cs, eqs2) = save_vars tospill eqs1 vs in + (t1 :: ts, c1 @ cs, eqs2) + +(* Trimming equations when we have too many or when they are too old. + The goal is to limit the live range of unspillable temporaries. + By setting [max_age] to zero, we can effectively deactivate + the reuse strategy and fall back to a naive "reload at every use" + strategy. *) + +let max_age = ref 3 +let max_num_eqs = ref 3 + +let rec trim count eqs = + if count <= 0 then [] else + match eqs with + | [] -> [] + | (v, t, date) :: eqs' -> + if date <= !max_age + then (v, t, date + 1) :: trim (count - 1) eqs' + else [] + +(* Insertion of spill and reload instructions. *) + +let spill_instr tospill eqs instr = + let eqs = trim !max_num_eqs eqs in + match instr with + | Xmove(src, dst) -> + if VSet.mem src tospill && VSet.mem dst tospill then begin + let (src', c1, eqs1) = reload_var tospill eqs src in + (c1 @ [Xspill(src', dst)], add dst src' (kill dst eqs1)) + end else begin + ([instr], kill dst eqs) + end + | Xreload(src, dst) -> + assert false + | Xspill(src, dst) -> + assert false + | Xparmove(srcs, dsts, itmp, ftmp) -> + ([instr], List.fold_right kill dsts eqs) + | Xop(op, args, res) -> + begin match is_two_address op args with + | None -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (res', c2, eqs2) = save_var tospill eqs1 res in + (c1 @ Xop(op, args', res') :: c2, eqs2) + | Some(arg1, argl) -> + begin match VSet.mem res tospill, VSet.mem arg1 tospill with + | false, false -> + let (argl', c1, eqs1) = reload_vars tospill eqs argl in + (c1 @ [Xop(op, arg1 :: argl', res)], kill res eqs1) + | true, false -> + let tmp = new_temp (typeof res) in + let (argl', c1, eqs1) = reload_vars tospill eqs argl in + (c1 @ [Xmove(arg1, tmp); Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], + add res tmp (kill res eqs1)) + | false, true -> + let eqs1 = add arg1 res (kill res eqs) in + let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in + (Xreload(arg1, res) :: c1 @ [Xop(op, res :: argl', res)], + kill res eqs2) + | true, true -> + let tmp = new_temp (typeof res) in + let eqs1 = add arg1 tmp eqs in + let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in + (Xreload(arg1, tmp) :: c1 @ [Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], + add res tmp (kill tmp (kill res eqs2))) + end + end + | Xload(chunk, addr, args, dst) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (dst', c2, eqs2) = save_var tospill eqs1 dst in + (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2) + | Xstore(chunk, addr, args, src) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (src', c2, eqs2) = reload_var tospill eqs1 src in + (c1 @ c2 @ [Xstore(chunk, addr, args', src')], eqs2) + | Xcall(sg, Coq_inl v, args, res) -> + let (v', c1, eqs1) = reload_var tospill eqs v in + (c1 @ [Xcall(sg, Coq_inl v', args, res)], []) + | Xcall(sg, Coq_inr id, args, res) -> + ([instr], []) + | Xtailcall(sg, Coq_inl v, args) -> + let (v', c1, eqs1) = reload_var tospill eqs v in + (c1 @ [Xtailcall(sg, Coq_inl v', args)], []) + | Xtailcall(sg, Coq_inr id, args) -> + ([instr], []) + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> + ([instr], eqs) + | _ -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + let (res', c2, eqs2) = save_vars tospill eqs1 res in + (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2) + end + | Xbranch s -> + ([instr], eqs) + | Xcond(cond, args, s1, s2) -> + let (args', c1, eqs1) = reload_vars tospill eqs args in + (c1 @ [Xcond(cond, args', s1, s2)], eqs1) + | Xjumptable(arg, tbl) -> + let (arg', c1, eqs1) = reload_var tospill eqs arg in + (c1 @ [Xjumptable(arg', tbl)], eqs1) + | Xreturn optarg -> + ([instr], []) + +let rec spill_block tospill pc blk eqs = + match blk with + | [] -> ([], eqs) + | instr :: blk' -> + let (c1, eqs1) = spill_instr tospill eqs instr in + let (c2, eqs2) = spill_block tospill pc blk' eqs1 in + (c1 @ c2, eqs2) + +(* +let spill_block tospill pc blk eqs = + printf "@[spill_block: at %ld: " (camlint_of_positive pc); + List.iter (fun (x,y,d) -> printf "@ %a=%a" PrintXTL.var x PrintXTL.var y) eqs; + printf "@]@."; + spill_block tospill pc blk eqs +*) + +let spill_function f tospill round = + max_num_eqs := 3; + max_age := (if round <= 10 then 3 else if round <= 20 then 1 else 0); + transform_basic_blocks (spill_block tospill) [] f + + +(***************** Generation of LTL from XTL ***********************) + +(** Apply a register allocation to an XTL function, producing an LTL function. + Raise [Bad_LTL] if some pseudoregisters were mapped to stack locations + while machine registers were expected, or in other words if spilling + and reloading code must be inserted. *) + +exception Bad_LTL + +let mreg_of alloc v = match alloc v with R mr -> mr | S _ -> raise Bad_LTL + +let mregs_of alloc vl = List.map (mreg_of alloc) vl + +let mros_of alloc vos = sum_left_map (mreg_of alloc) vos + +let make_move src dst k = + match src, dst with + | R rsrc, R rdst -> + if rsrc = rdst then k else LTL.Lop(Omove, [rsrc], rdst) :: k + | R rsrc, S(sl, ofs, ty) -> + LTL.Lsetstack(rsrc, sl, ofs, ty) :: k + | S(sl, ofs, ty), R rdst -> + LTL.Lgetstack(sl, ofs, ty, rdst) :: k + | S _, S _ -> + if src = dst then k else raise Bad_LTL + +type parmove_status = To_move | Being_moved | Moved + +let make_parmove srcs dsts itmp ftmp k = + let src = Array.of_list srcs + and dst = Array.of_list dsts in + let n = Array.length src in + assert (Array.length dst = n); + let status = Array.make n To_move in + let temp_for = + function Tint -> itmp | Tfloat -> ftmp | Tlong -> assert false in + let code = ref [] in + let add_move s d = + match s, d with + | R rs, R rd -> + code := LTL.Lop(Omove, [rs], rd) :: !code + | R rs, S(sl, ofs, ty) -> + code := LTL.Lsetstack(rs, sl, ofs, ty) :: !code + | S(sl, ofs, ty), R rd -> + code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code + | S(sls, ofss, tys), S(sld, ofsd, tyd) -> + let tmp = temp_for tys in + (* code will be reversed at the end *) + code := LTL.Lsetstack(tmp, sld, ofsd, tyd) :: + LTL.Lgetstack(sls, ofss, tys, tmp) :: !code + in + let rec move_one i = + if src.(i) <> dst.(i) then begin + status.(i) <- Being_moved; + for j = 0 to n - 1 do + if src.(j) = dst.(i) then + match status.(j) with + | To_move -> + move_one j + | Being_moved -> + let tmp = R (temp_for (Loc.coq_type src.(j))) in + add_move src.(j) tmp; + src.(j) <- tmp + | Moved -> + () + done; + add_move src.(i) dst.(i); + status.(i) <- Moved + end in + for i = 0 to n - 1 do + if status.(i) = To_move then move_one i + done; + List.rev_append !code k + +let transl_instr alloc instr k = + match instr with + | Xmove(src, dst) | Xreload(src, dst) | Xspill(src, dst) -> + make_move (alloc src) (alloc dst) k + | Xparmove(srcs, dsts, itmp, ftmp) -> + make_parmove (List.map alloc srcs) (List.map alloc dsts) + (mreg_of alloc itmp) (mreg_of alloc ftmp) k + | Xop(op, args, res) -> + let rargs = mregs_of alloc args + and rres = mreg_of alloc res in + begin match is_two_address op rargs with + | None -> + LTL.Lop(op, rargs, rres) :: k + | Some(rarg1, rargl) -> + if rarg1 = rres then + LTL.Lop(op, rargs, rres) :: k + else + LTL.Lop(Omove, [rarg1], rres) :: + LTL.Lop(op, rres :: rargl, rres) :: k + end + | Xload(chunk, addr, args, dst) -> + LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k + | Xstore(chunk, addr, args, src) -> + LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k + | Xcall(sg, vos, args, res) -> + LTL.Lcall(sg, mros_of alloc vos) :: k + | Xtailcall(sg, vos, args) -> + LTL.Ltailcall(sg, mros_of alloc vos) :: [] + | Xbuiltin(ef, args, res) -> + begin match ef with + | EF_annot _ -> + LTL.Lannot(ef, List.map alloc args) :: k + | _ -> + LTL.Lbuiltin(ef, mregs_of alloc args, mregs_of alloc res) :: k + end + | Xbranch s -> + LTL.Lbranch s :: [] + | Xcond(cond, args, s1, s2) -> + LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: [] + | Xjumptable(arg, tbl) -> + LTL.Ljumptable(mreg_of alloc arg, tbl) :: [] + | Xreturn optarg -> + LTL.Lreturn :: [] + +let rec transl_block alloc blk = + match blk with + | [] -> [] + | instr :: blk' -> transl_instr alloc instr (transl_block alloc blk') + +let transl_function fn alloc = + { LTL.fn_sig = fn.fn_sig; + LTL.fn_stacksize = fn.fn_stacksize; + LTL.fn_entrypoint = fn.fn_entrypoint; + LTL.fn_code = PTree.map1 (transl_block alloc) fn.fn_code + } + + +(******************* All together *********************) + +exception Timeout + +let rec first_round f liveness = + let alloc = find_coloring f liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- After initial register allocation@ @."; + PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f + end; + let ts = tospill_function f alloc in + if VSet.is_empty ts then success f alloc else more_rounds f ts 1 + +and more_rounds f ts count = + if count >= 40 then raise Timeout; + let f' = spill_function f ts count in + let liveness = liveness_analysis f' in + let alloc = find_coloring f' liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- After register allocation (round %d)@ @." count; + PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f' + end; + let ts' = tospill_function f' alloc in + if VSet.is_empty ts' + then success f' alloc + else begin + if !option_dalloctrace then begin + fprintf !pp "--- Remain to be spilled:@ @."; + VSet.iter (fun v -> fprintf !pp "%a " PrintXTL.var v) ts'; + fprintf !pp "@ @." + end; + more_rounds f (VSet.union ts ts') (count + 1) + end + +and success f alloc = + let f' = transl_function f alloc in + if !option_dalloctrace then begin + fprintf !pp "-------------- Candidate allocation@ @."; + PrintLTL.print_function !pp P.one f' + end; + f' + +open Errors + +let regalloc f = + init_trace(); + reset_temps(); + let f1 = Splitting.rename_function f in + match RTLtyping.type_function f1 with + | Error msg -> + Error(MSG (coqstring_of_camlstring "RTL code after splitting is ill-typed:") :: msg) + | OK tyenv -> + let f2 = function_of_RTL_function f1 tyenv in + let liveness = liveness_analysis f2 in + let f3 = dead_code_elimination f2 liveness in + if !option_dalloctrace then begin + fprintf !pp "-------------- Initial XTL@ @."; + PrintXTL.print_function !pp f3 + end; + try + OK(first_round f3 liveness) + with + | Timeout -> + Error(msg (coqstring_of_camlstring "Spilling fails to converge")) + | Type_error_at pc -> + Error [MSG(coqstring_of_camlstring "Ill-typed XTL code at PC "); + POS pc] + | Bad_LTL -> + Error(msg (coqstring_of_camlstring "Bad LTL after spilling")) -- cgit