From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sat, 20 Apr 2013 07:54:52 +0000
Subject: Big merge of the newregalloc-int64 branch.  Lots of changes in two
 directions: 1- new register allocator (+ live range splitting,
 spilling&reloading, etc)    based on a posteriori validation using the
 Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long
 long").

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 backend/Regalloc.ml | 986 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 986 insertions(+)
 create mode 100644 backend/Regalloc.ml

(limited to 'backend/Regalloc.ml')

diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
new file mode 100644
index 00000000..fe981e32
--- /dev/null
+++ b/backend/Regalloc.ml
@@ -0,0 +1,986 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Register allocation by coloring of an interference graph *)
+
+(* The algorithm in a nutshell:
+
+   - Split live ranges
+   - Convert from RTL to XTL
+   - Eliminate dead code
+   - Repeat:
+       . Construct interference graph
+       . Color interference graph using IRC algorithm
+       . Check for variables that were spilled and must be in registers
+       . If none, convert to LTL and exit.
+       . If some, insert spill and reload instructions and try again
+     End Repeat
+*)
+
+open Format
+open Clflags
+open Camlcoq
+open Datatypes
+open Coqlib
+open Maps
+open AST
+open Memdata
+open Kildall
+open Registers
+open Op
+open Machregs
+open Locations
+open Conventions1
+open Conventions
+open IRC
+open XTL
+
+(* Detection of 2-address operations *)
+
+let is_two_address op args =
+  if two_address_op op then
+    match args with
+    | [] -> assert false
+    | arg1 :: argl -> Some(arg1, argl)
+  else None
+
+(* For tracing *)
+
+let destination_alloctrace : string option ref = ref None
+let pp = ref std_formatter
+
+let init_trace () =
+  if !option_dalloctrace && !pp == std_formatter then begin
+    match !destination_alloctrace with
+    | None -> ()  (* should not happen *)
+    | Some f -> pp := formatter_of_out_channel (open_out f)
+  end
+
+
+(**************** Initial conversion from RTL to XTL **************)
+
+let vreg tyenv r = V(r, tyenv r)
+
+let vregs tyenv rl = List.map (vreg tyenv) rl
+
+let rec expand_regs tyenv = function
+  | [] -> []
+  | r :: rl ->
+      match tyenv r with
+      | Tlong -> V(r, Tint) :: V(twin_reg r, Tint) :: expand_regs tyenv rl
+      | ty    -> V(r, ty) :: expand_regs tyenv rl
+
+let constrain_reg v c =
+  match c with
+  | None -> v
+  | Some mr -> L(R mr)
+
+let rec constrain_regs vl cl =
+  match vl, cl with
+  | [], _ -> []
+  | v1 :: vl', [] -> vl
+  | v1 :: vl', Some mr1 :: cl' -> L(R mr1) :: constrain_regs vl' cl'
+  | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl'
+
+let move v1 v2 k =
+  if v1 = v2 then k else Xmove(v1, v2) :: k
+
+let rec movelist vl1 vl2 k =
+  match vl1, vl2 with
+  | [], [] -> k
+  | v1 :: vl1, v2 :: vl2 -> move v1 v2 (movelist vl1 vl2 k)
+  | _, _ -> assert false
+
+let xparmove srcs dsts k =
+  assert (List.length srcs = List.length dsts);
+  match srcs, dsts with
+  | [], [] -> k
+  | [src], [dst] -> Xmove(src, dst) :: k
+  | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k
+
+(* Return the XTL basic block corresponding to the given RTL instruction.
+   Move and parallel move instructions are introduced to honor calling
+   conventions and register constraints on some operations.
+   64-bit integer variables are split in two 32-bit halves. *)
+
+let block_of_RTL_instr funsig tyenv = function
+  | RTL.Inop s ->
+      [Xbranch s]
+  | RTL.Iop(Omove, [arg], res, s) ->
+      if tyenv arg = Tlong then
+        [Xmove(V(arg, Tint), V(res, Tint));
+         Xmove(V(twin_reg arg, Tint), V(twin_reg res, Tint));
+         Xbranch s]
+      else
+        [Xmove(vreg tyenv arg, vreg tyenv res); Xbranch s]
+  | RTL.Iop(Omakelong, [arg1; arg2], res, s) ->
+      [Xmove(V(arg1, Tint), V(res, Tint));
+       Xmove(V(arg2, Tint), V(twin_reg res, Tint));
+       Xbranch s]
+  | RTL.Iop(Olowlong, [arg], res, s) ->
+      [Xmove(V(twin_reg arg, Tint), V(res, Tint)); Xbranch s]
+  | RTL.Iop(Ohighlong, [arg], res, s) ->
+      [Xmove(V(arg, Tint), V(res, Tint)); Xbranch s]
+  | RTL.Iop(op, args, res, s) ->
+      let (cargs, cres) = mregs_for_operation op in
+      let args1 = vregs tyenv args and res1 = vreg tyenv res in
+      let args2 = constrain_regs args1 cargs and res2 = constrain_reg res1 cres in
+      let (args3, res3) =
+        match is_two_address op args2 with
+        | None ->
+            (args2, res2)
+        | Some(arg, args2') ->
+            if arg = res2 || not (List.mem res2 args2') then
+              (args2, res2)
+            else
+              let t = new_temp (tyenv res) in (t :: args2', t) in
+      movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
+  | RTL.Iload(chunk, addr, args, dst, s) ->
+      if chunk = Mint64 then begin
+        match offset_addressing addr (coqint_of_camlint 4l) with
+        | None -> assert false
+        | Some addr' ->
+            [Xload(Mint32, addr, vregs tyenv args,
+                   V((if big_endian then dst else twin_reg dst), Tint));
+             Xload(Mint32, addr', vregs tyenv args,
+                   V((if big_endian then twin_reg dst else dst), Tint));
+             Xbranch s]
+      end else
+        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
+  | RTL.Istore(chunk, addr, args, src, s) ->
+      if chunk = Mint64 then begin
+        match offset_addressing addr (coqint_of_camlint 4l) with
+        | None -> assert false
+        | Some addr' ->
+            [Xstore(Mint32, addr, vregs tyenv args,
+                   V((if big_endian then src else twin_reg src), Tint));
+             Xstore(Mint32, addr', vregs tyenv args,
+                   V((if big_endian then twin_reg src else src), Tint));
+             Xbranch s]
+      end else
+        [Xstore(chunk, addr, vregs tyenv args, vreg tyenv src); Xbranch s]
+  | RTL.Icall(sg, ros, args, res, s) ->
+      let args' = vlocs (loc_arguments sg)
+      and res' = vmregs (loc_result sg) in
+      xparmove (expand_regs tyenv args) args'
+       (Xcall(sg, sum_left_map (vreg tyenv) ros, args', res') ::
+         xparmove res' (expand_regs tyenv [res]) 
+           [Xbranch s])
+  | RTL.Itailcall(sg, ros, args) ->
+      let args' = vlocs (loc_arguments sg) in
+      xparmove (expand_regs tyenv args) args'
+        [Xtailcall(sg, sum_left_map (vreg tyenv) ros, args')]
+  | RTL.Ibuiltin(ef, args, res, s) ->
+      let (cargs, cres) = mregs_for_builtin ef in
+      let args1 = expand_regs tyenv args and res1 = expand_regs tyenv [res] in
+      let args2 = constrain_regs args1 cargs and res2 = constrain_regs res1 cres in
+      movelist args1 args2
+         (Xbuiltin(ef, args2, res2) :: movelist res2 res1 [Xbranch s])
+  | RTL.Icond(cond, args, s1, s2) ->
+      [Xcond(cond, vregs tyenv args, s1, s2)]
+  | RTL.Ijumptable(arg, tbl) ->
+      [Xjumptable(vreg tyenv arg, tbl)]
+  | RTL.Ireturn None ->
+      [Xreturn []]
+  | RTL.Ireturn (Some arg) ->
+      let args' = vmregs (loc_result funsig) in
+      xparmove (expand_regs tyenv [arg]) args' [Xreturn args']
+
+(* One above the [pc] nodes of the given RTL function *)
+
+let next_pc f =
+  PTree.fold
+    (fun npc pc i -> if P.lt pc npc then npc else P.succ pc)
+    f.RTL.fn_code P.one
+
+(* Translate an RTL function to an XTL function *)
+
+let function_of_RTL_function f tyenv =
+  let xc = PTree.map1 (block_of_RTL_instr f.RTL.fn_sig tyenv) f.RTL.fn_code in
+  (* Add moves for function parameters *)
+  let pc_entrypoint = next_pc f in
+  let b_entrypoint = 
+     xparmove (vlocs (loc_parameters f.RTL.fn_sig))
+              (expand_regs tyenv f.RTL.fn_params)
+              [Xbranch f.RTL.fn_entrypoint] in
+  { fn_sig = f.RTL.fn_sig;
+    fn_stacksize = f.RTL.fn_stacksize;
+    fn_entrypoint = pc_entrypoint;
+    fn_code = PTree.set pc_entrypoint b_entrypoint xc }
+
+
+(***************** Liveness analysis *****************)
+
+let vset_removelist vl after = List.fold_right VSet.remove vl after
+let vset_addlist vl after = List.fold_right VSet.add vl after
+let vset_addros vos after =
+  match vos with Coq_inl v -> VSet.add v after | Coq_inr id -> after
+
+let live_before instr after =
+  match instr with
+  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
+      if VSet.mem dst after
+      then VSet.add src (VSet.remove dst after)
+      else after
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      vset_addlist srcs (vset_removelist dsts after)
+  | Xop(op, args, res) ->
+      if VSet.mem res after
+      then vset_addlist args (VSet.remove res after)
+      else after
+  | Xload(chunk, addr, args, dst) ->
+      if VSet.mem dst after
+      then vset_addlist args (VSet.remove dst after)
+      else after
+  | Xstore(chunk, addr, args, src) ->
+      vset_addlist args (VSet.add src after)
+  | Xcall(sg, ros, args, res) ->
+      vset_addlist args (vset_addros ros (vset_removelist res after))
+  | Xtailcall(sg, ros, args) ->
+      vset_addlist args (vset_addros ros VSet.empty)
+  | Xbuiltin(ef, args, res) ->
+      vset_addlist args (vset_removelist res after)
+  | Xbranch s ->
+      after
+  | Xcond(cond, args, s1, s2) ->
+      List.fold_right VSet.add args after
+  | Xjumptable(arg, tbl) ->
+      VSet.add arg after
+  | Xreturn args ->
+      vset_addlist args VSet.empty
+
+let rec live_before_block blk after =
+  match blk with
+  | [] -> after
+  | instr :: blk -> live_before instr (live_before_block blk after)
+
+let transfer_live f pc after =
+  match PTree.get pc f.fn_code with
+  | None -> VSet.empty
+  | Some blk -> live_before_block blk after
+
+module VSetLat = struct
+  type t = VSet.t
+  let beq = VSet.equal
+  let bot = VSet.empty
+  let lub = VSet.union
+end
+
+module Liveness_Solver = Backward_Dataflow_Solver(VSetLat)(NodeSetBackward)
+
+let liveness_analysis f =
+  match Liveness_Solver.fixpoint (successors f) (transfer_live f) [] with
+  | None -> assert false
+  | Some lv -> lv
+
+(* Pair the instructions of a block with their live-before sets *)
+
+let pair_block_live blk after =
+  let rec pair_rec accu after = function
+  | [] -> accu
+  | instr :: blk ->
+      let before = live_before instr after in
+      pair_rec ((instr, before) :: accu) before blk in
+  pair_rec [] after (List.rev blk)
+
+
+(**************** Dead code elimination **********************)
+
+(* Eliminate pure instructions whose results are not used later. *)
+
+let rec dce_parmove srcs dsts after =
+  match srcs, dsts with
+  | [], [] -> [], []
+  | src1 :: srcs, dst1 :: dsts ->
+      let (srcs', dsts') = dce_parmove srcs dsts after in
+      if VSet.mem dst1 after
+      then (src1 :: srcs', dst1 :: dsts')
+      else (srcs', dsts')
+  | _, _ -> assert false
+
+let dce_instr instr after k =
+  match instr with
+  | Xmove(src, dst) ->
+      if VSet.mem dst after
+      then instr :: k
+      else k
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      let (srcs', dsts') = dce_parmove srcs dsts after in
+      Xparmove(srcs', dsts', itmp, ftmp) :: k
+  | Xop(op, args, res) ->
+      if VSet.mem res after
+      then instr :: k
+      else k
+  | Xload(chunk, addr, args, dst) ->
+      if VSet.mem dst after
+      then instr :: k
+      else k
+  | _ ->
+      instr :: k
+
+let rec dce_block blk after =
+  match blk with
+  | [] -> (after, [])
+  | instr :: blk' ->
+      let (after', tblk') = dce_block blk' after in
+      (live_before instr after', dce_instr instr after' tblk')
+
+let dead_code_elimination f liveness =
+  { f with fn_code =
+      PTree.map (fun pc blk -> snd(dce_block blk (PMap.get pc liveness)))
+                 f.fn_code }
+
+
+(*********************** Spill costs ****************************)
+
+(* Estimate spill costs and count other statistics for every variable.
+   Variables that must not be spilled are given infinite costs. *)
+
+let spill_costs f =
+
+  let costs = ref PTree.empty in
+
+  let get_stats r =
+    match PTree.get r !costs with
+    | Some st -> st
+    | None ->
+        let st = {cost = 0; usedefs = 0} in
+        costs := PTree.set r st !costs;
+        st in
+
+  let charge amount uses v =
+    match v with
+    | L l -> ()
+    | V(r, ty) ->
+        let st = get_stats r in
+        let c1 = st.cost + amount in
+        let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
+        st.cost <- c2;
+        st.usedefs <- st.usedefs + uses in
+
+  let charge_list amount uses vl =
+    List.iter (charge amount uses) vl in
+
+  let charge_ros amount ros =
+    match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in
+
+  let charge_instr = function
+    | Xmove(src, dst) ->
+        charge 1 1 src; charge 1 1 dst
+    | Xreload(src, dst) ->
+        charge 1 1 src; charge max_int 1 dst
+        (* dest must not be spilled! *)
+    | Xspill(src, dst) ->
+        charge max_int 1 src; charge 1 1 dst
+        (* source must not be spilled! *)
+    | Xparmove(srcs, dsts, itmp, ftmp) ->
+        charge_list 1 1 srcs; charge_list 1 1 dsts;
+        charge max_int 0 itmp; charge max_int 0 ftmp
+        (* temps must not be spilled *)
+    | Xop(op, args, res) ->
+        charge_list 10 1 args; charge 10 1 res
+    | Xload(chunk, addr, args, dst) ->
+        charge_list 10 1 args; charge 10 1 dst
+    | Xstore(chunk, addr, args, src) ->
+        charge_list 10 1 args; charge 10 1 src
+    | Xcall(sg, vos, args, res) ->
+        charge_ros 10 vos
+    | Xtailcall(sg, vos, args) ->
+        charge_ros 10 vos
+    | Xbuiltin(ef, args, res) ->
+        begin match ef with
+        | EF_vstore _ | EF_vstore_global _ | EF_memcpy _ ->
+            (* result is not used but should not be spilled *)
+            charge_list 10 1 args; charge_list max_int 0 res
+        | EF_annot _ ->
+            (* arguments are not actually used, so don't charge;
+               result is never used but should not be spilled *)
+            charge_list max_int 0 res
+        | EF_annot_val _ ->
+            (* like a move *)
+            charge_list 1 1 args; charge_list 1 1 res
+        | _ ->
+            charge_list 10 1 args; charge_list 10 1 res
+        end
+    | Xbranch _ -> ()
+    | Xcond(cond, args, _, _) ->
+        charge_list 10 1 args
+    | Xjumptable(arg, _) ->
+        charge 10 1 arg
+    | Xreturn optarg ->
+        () in
+
+  let charge_block blk = List.iter charge_instr blk in
+
+  PTree.fold
+    (fun () pc blk -> charge_block blk)
+    f.fn_code ();
+  if !option_dalloctrace then begin
+    fprintf !pp "------------------ Unspillable variables --------------@ @.";
+    fprintf !pp "@[<hov 1>";
+    PTree.fold
+      (fun () r st ->
+        if st.cost = max_int then fprintf !pp "@ x%ld" (P.to_int32 r))
+      !costs ();
+    fprintf !pp "@]@ @."
+  end;
+  (* Result is cost function: pseudoreg -> stats *)
+  get_stats
+
+
+(********* Construction and coloring of the interference graph **************)
+
+let add_interfs_def g res live =
+  VSet.iter (fun v -> if v <> res then IRC.add_interf g v res) live
+
+let add_interfs_move g src dst live =
+  VSet.iter (fun v -> if v <> src && v <> dst then IRC.add_interf g v dst) live
+
+let add_interfs_destroyed g live mregs =
+  List.iter
+    (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live)
+    mregs
+
+let add_interfs_live g live v =
+  VSet.iter (fun v' -> IRC.add_interf g v v') live
+
+let add_interfs_list g v vl =
+  List.iter (IRC.add_interf g v) vl
+
+let rec add_interfs_pairwise g = function
+  | [] -> ()
+  | v1 :: vl -> add_interfs_list g v1 vl; add_interfs_pairwise g vl
+
+let add_interfs_instr g instr live =
+  match instr with
+  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
+      IRC.add_pref g src dst;
+      add_interfs_move g src dst live
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      List.iter2 (IRC.add_pref g) srcs dsts;
+      (* Interferences with live across *)
+      let across = vset_removelist dsts live in
+      List.iter (add_interfs_live g across) dsts;
+      add_interfs_live g across itmp; add_interfs_live g across ftmp;
+      (* All destinations must be pairwise different *)
+      add_interfs_pairwise g dsts;
+      (* The temporaries must be different from sources and dests *)
+      add_interfs_list g itmp srcs; add_interfs_list g itmp dsts;
+      add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts;
+      (* Take into account destroyed reg when accessing Incoming param *)
+      if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs
+      then add_interfs_list g (vmreg temp_for_parent_frame) dsts
+  | Xop(op, args, res) ->
+      begin match is_two_address op args with
+      | None ->
+          add_interfs_def g res live;
+      | Some(arg1, argl) ->
+          (* Treat as "res := arg1; res := op(res, argl)" *)
+          add_interfs_def g res live;
+          IRC.add_pref g arg1 res;
+          add_interfs_move g arg1 res
+            (vset_addlist (res :: argl) (VSet.remove res live))
+      end;
+      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op);
+  | Xload(chunk, addr, args, dst) ->
+      add_interfs_def g dst live;
+      add_interfs_destroyed g (VSet.remove dst live) 
+                              (destroyed_by_load chunk addr)
+  | Xstore(chunk, addr, args, src) ->
+      add_interfs_destroyed g live (destroyed_by_store chunk addr)
+  | Xcall(sg, vos, args, res) ->
+      add_interfs_destroyed g (vset_removelist res live) destroyed_at_call
+  | Xtailcall(sg, Coq_inl v, args) ->
+      List.iter (fun r -> IRC.add_interf g (vmreg r) v) int_callee_save_regs
+  | Xtailcall(sg, Coq_inr id, args) ->
+      ()
+  | Xbuiltin(ef, args, res) ->
+      (* Interferences with live across *)
+      let across = vset_removelist res live in
+      List.iter (add_interfs_live g across) res;
+      (* All results must be pairwise different *)
+      add_interfs_pairwise g res;
+      add_interfs_destroyed g across (destroyed_by_builtin ef);
+      begin match ef, args, res with
+      | EF_annot_val _, [arg], [res] -> IRC.add_pref g arg res (* like a move *)
+      | _ -> ()
+      end
+  | Xbranch s ->
+      ()
+  | Xcond(cond, args, s1, s2) ->
+      add_interfs_destroyed g live (destroyed_by_cond cond)
+  | Xjumptable(arg, tbl) ->
+      add_interfs_destroyed g live destroyed_by_jumptable
+  | Xreturn optarg ->
+      ()
+
+let rec add_interfs_block g blk live =
+  match blk with
+  | [] -> live
+  | instr :: blk' ->
+      let live' = add_interfs_block g blk' live in
+      add_interfs_instr g instr live';
+      live_before instr live'
+
+let find_coloring f liveness =
+  (*type_function f;  (* for debugging *)*)
+  let g = IRC.init (spill_costs f) in
+  PTree.fold
+    (fun () pc blk -> ignore (add_interfs_block g blk (PMap.get pc liveness)))
+    f.fn_code ();
+  IRC.coloring g
+
+
+(*********** Determination of variables that need spill code insertion *****)
+
+let is_reg alloc v =
+  match alloc v with R _ -> true | S _ -> false
+
+let add_tospill alloc v ts =
+  match alloc v with R _ -> ts | S _ -> VSet.add v ts
+
+let addlist_tospill alloc vl ts =
+  List.fold_right (add_tospill alloc) vl ts
+
+let addros_tospill alloc ros ts =
+  match ros with Coq_inl v -> add_tospill alloc v ts | Coq_inr s -> ts
+
+let tospill_instr alloc instr ts =
+  match instr with
+  | Xmove(src, dst) ->
+      if is_reg alloc src || is_reg alloc dst || alloc src = alloc dst
+      then ts
+      else VSet.add src (VSet.add dst ts)
+  | Xreload(src, dst) ->
+      assert (is_reg alloc dst);
+      ts
+  | Xspill(src, dst) ->
+      assert (is_reg alloc src);
+      ts
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      assert (is_reg alloc itmp && is_reg alloc ftmp);
+      ts
+  | Xop(op, args, res) ->
+      addlist_tospill alloc args (add_tospill alloc res ts)
+  | Xload(chunk, addr, args, dst) ->
+      addlist_tospill alloc args (add_tospill alloc dst ts)
+  | Xstore(chunk, addr, args, src) ->
+      addlist_tospill alloc args (add_tospill alloc src ts)
+  | Xcall(sg, vos, args, res) ->
+      addros_tospill alloc vos ts
+  | Xtailcall(sg, vos, args) ->
+      addros_tospill alloc vos ts
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ -> ts
+      |      _     -> addlist_tospill alloc args (addlist_tospill alloc res ts)
+      end
+  | Xbranch s ->
+      ts
+  | Xcond(cond, args, s1, s2) ->
+      addlist_tospill alloc args ts
+  | Xjumptable(arg, tbl) ->
+      add_tospill alloc arg ts
+  | Xreturn optarg ->
+      ts
+
+let rec tospill_block alloc blk ts =
+  match blk with
+  | [] -> ts
+  | instr :: blk' -> tospill_block alloc blk' (tospill_instr alloc instr ts)
+
+let tospill_function f alloc =
+  PTree.fold
+    (fun ts pc blk -> tospill_block alloc blk ts)
+    f.fn_code VSet.empty
+
+
+(********************* Spilling ***********************)
+
+(* We follow a semi-naive spilling strategy.  By default, we spill at
+  every definition of a variable that could not be allocated a register,
+  and we reload before every use.  However, we also maintain a list of
+  equations of the form [spilled-var = temp] that keep track of
+  variables that were recently spilled or reloaded.  Based on these
+  equations, we can avoid reloading a spilled variable if its value
+  is still available in a temporary register. *)
+
+let rec find_reg_containing v = function
+  | [] -> None
+  | (var, temp, date) :: eqs ->
+      if var = v then Some temp else find_reg_containing v eqs
+
+let add v t eqs = (v, t, 0) :: eqs
+
+let kill x eqs =
+  List.filter (fun (v, t, date) -> v <> x && t <> x) eqs
+  
+let reload_var tospill eqs v =
+  if not (VSet.mem v tospill) then
+    (v, [], eqs)
+  else
+    match find_reg_containing v eqs with
+    | Some t ->
+        (*printf "Reusing %a for %a@ @." PrintXTL.var t PrintXTL.var v;*)
+        (t, [], eqs)
+    | None ->
+        let t = new_temp (typeof v) in (t, [Xreload(v, t)], add v t eqs)
+
+let rec reload_vars tospill eqs vl =
+  match vl with
+  | [] -> ([], [], eqs)
+  | v1 :: vs ->
+      let (t1, c1, eqs1) = reload_var tospill eqs v1 in
+      let (ts, cs, eqs2) = reload_vars tospill eqs1 vs in
+      (t1 :: ts, c1 @ cs, eqs2)
+
+let save_var tospill eqs v =
+  if not (VSet.mem v tospill) then
+    (v, [], kill v eqs)
+  else begin
+    let t = new_temp (typeof v) in
+    (t, [Xspill(t, v)], add v t (kill v eqs))
+  end
+
+let rec save_vars tospill eqs vl =
+  match vl with
+  | [] -> ([], [], eqs)
+  | v1 :: vs ->
+      let (t1, c1, eqs1) = save_var tospill eqs v1 in
+      let (ts, cs, eqs2) = save_vars tospill eqs1 vs in
+      (t1 :: ts, c1 @ cs, eqs2)
+
+(* Trimming equations when we have too many or when they are too old.
+   The goal is to limit the live range of unspillable temporaries.
+   By setting [max_age] to zero, we can effectively deactivate
+   the reuse strategy and fall back to a naive "reload at every use"
+   strategy. *)
+
+let max_age = ref 3
+let max_num_eqs = ref 3
+
+let rec trim count eqs =
+  if count <= 0 then [] else
+    match eqs with
+    | [] -> []
+    | (v, t, date) :: eqs' -> 
+        if date <= !max_age
+        then (v, t, date + 1) :: trim (count - 1) eqs'
+        else []
+
+(* Insertion of spill and reload instructions. *)
+
+let spill_instr tospill eqs instr =
+  let eqs = trim !max_num_eqs eqs in
+  match instr with
+  | Xmove(src, dst) ->
+      if VSet.mem src tospill && VSet.mem dst tospill then begin
+        let (src', c1, eqs1) = reload_var tospill eqs src in
+        (c1 @ [Xspill(src', dst)], add dst src' (kill dst eqs1))
+      end else begin
+        ([instr], kill dst eqs)
+      end
+  | Xreload(src, dst) ->
+      assert false
+  | Xspill(src, dst) ->
+      assert false
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      ([instr], List.fold_right kill dsts eqs)
+  | Xop(op, args, res) ->
+      begin match is_two_address op args with
+      | None ->
+          let (args', c1, eqs1) = reload_vars tospill eqs args in
+          let (res', c2, eqs2) = save_var tospill eqs1 res in
+          (c1 @ Xop(op, args', res') :: c2, eqs2)
+      | Some(arg1, argl) ->
+          begin match VSet.mem res tospill, VSet.mem arg1 tospill with
+          | false, false ->
+              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
+              (c1 @ [Xop(op, arg1 :: argl', res)], kill res eqs1)
+          | true, false ->
+              let tmp = new_temp (typeof res) in
+              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
+              (c1 @ [Xmove(arg1, tmp); Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], 
+               add res tmp (kill res eqs1))
+          | false, true ->
+              let eqs1 = add arg1 res (kill res eqs) in
+              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
+              (Xreload(arg1, res) :: c1 @ [Xop(op, res :: argl', res)],
+               kill res eqs2)
+          | true, true ->
+              let tmp = new_temp (typeof res) in              
+              let eqs1 = add arg1 tmp eqs in
+              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
+              (Xreload(arg1, tmp) :: c1 @ [Xop(op, tmp :: argl', tmp); Xspill(tmp, res)],
+               add res tmp (kill tmp (kill res eqs2)))
+          end
+      end          
+  | Xload(chunk, addr, args, dst) ->
+      let (args', c1, eqs1) = reload_vars tospill eqs args in
+      let (dst', c2, eqs2) = save_var tospill eqs1 dst in
+      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
+  | Xstore(chunk, addr, args, src) ->
+      let (args', c1, eqs1) = reload_vars tospill eqs args in
+      let (src', c2, eqs2) = reload_var tospill eqs1 src in
+      (c1 @ c2 @ [Xstore(chunk, addr, args', src')], eqs2)
+  | Xcall(sg, Coq_inl v, args, res) ->
+      let (v', c1, eqs1) = reload_var tospill eqs v in
+      (c1 @ [Xcall(sg, Coq_inl v', args, res)], [])
+  | Xcall(sg, Coq_inr id, args, res) ->
+      ([instr], [])
+  | Xtailcall(sg, Coq_inl v, args) ->
+      let (v', c1, eqs1) = reload_var tospill eqs v in
+      (c1 @ [Xtailcall(sg, Coq_inl v', args)], [])
+  | Xtailcall(sg, Coq_inr id, args) ->
+      ([instr], [])
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ ->
+          ([instr], eqs)
+      | _ ->
+          let (args', c1, eqs1) = reload_vars tospill eqs args in
+          let (res', c2, eqs2) = save_vars tospill eqs1 res in
+          (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
+      end
+  | Xbranch s ->
+      ([instr], eqs)
+  | Xcond(cond, args, s1, s2) ->
+     let (args', c1, eqs1) = reload_vars tospill eqs args in
+     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+  | Xjumptable(arg, tbl) ->
+      let (arg', c1, eqs1) = reload_var tospill eqs arg in
+      (c1 @ [Xjumptable(arg', tbl)], eqs1)
+  | Xreturn optarg ->
+      ([instr], [])
+
+let rec spill_block tospill pc blk eqs =
+  match blk with
+  | [] -> ([], eqs)
+  | instr :: blk' ->
+      let (c1, eqs1) = spill_instr tospill eqs instr in
+      let (c2, eqs2) = spill_block tospill pc blk' eqs1 in
+      (c1 @ c2, eqs2)
+
+(*
+let spill_block tospill pc blk eqs =
+  printf "@[<hov 2>spill_block: at %ld: " (camlint_of_positive pc);
+  List.iter (fun (x,y,d) -> printf "@ %a=%a" PrintXTL.var x PrintXTL.var y) eqs;
+  printf "@]@.";
+  spill_block tospill pc blk eqs
+*)
+
+let spill_function f tospill round =
+  max_num_eqs := 3;
+  max_age := (if round <= 10 then 3 else if round <= 20 then 1 else 0);
+  transform_basic_blocks (spill_block tospill) [] f
+
+
+(***************** Generation of LTL from XTL ***********************)
+
+(** Apply a register allocation to an XTL function, producing an LTL function.
+  Raise [Bad_LTL] if some pseudoregisters were mapped to stack locations
+  while machine registers were expected, or in other words if spilling
+  and reloading code must be inserted. *)
+
+exception Bad_LTL
+
+let mreg_of alloc v = match alloc v with R mr -> mr | S _ -> raise Bad_LTL
+
+let mregs_of alloc vl = List.map (mreg_of alloc) vl
+
+let mros_of alloc vos = sum_left_map (mreg_of alloc) vos
+
+let make_move src dst k =
+  match src, dst with
+  | R rsrc, R rdst ->
+      if rsrc = rdst then k else LTL.Lop(Omove, [rsrc], rdst) :: k
+  | R rsrc, S(sl, ofs, ty) ->
+      LTL.Lsetstack(rsrc, sl, ofs, ty) :: k
+  | S(sl, ofs, ty), R rdst ->
+      LTL.Lgetstack(sl, ofs, ty, rdst) :: k
+  | S _, S _ ->
+      if src = dst then k else raise Bad_LTL
+
+type parmove_status = To_move | Being_moved | Moved
+
+let make_parmove srcs dsts itmp ftmp k =
+  let src = Array.of_list srcs
+  and dst = Array.of_list dsts in
+  let n = Array.length src in
+  assert (Array.length dst = n);
+  let status = Array.make n To_move in
+  let temp_for =
+    function Tint -> itmp | Tfloat -> ftmp | Tlong -> assert false in
+  let code = ref [] in
+  let add_move s d =
+    match s, d with
+    | R rs, R rd ->
+        code := LTL.Lop(Omove, [rs], rd) :: !code
+    | R rs, S(sl, ofs, ty) ->
+        code := LTL.Lsetstack(rs, sl, ofs, ty) :: !code
+    | S(sl, ofs, ty), R rd ->
+        code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code
+    | S(sls, ofss, tys), S(sld, ofsd, tyd) ->
+        let tmp = temp_for tys in
+        (* code will be reversed at the end *)
+        code := LTL.Lsetstack(tmp, sld, ofsd, tyd) ::
+                LTL.Lgetstack(sls, ofss, tys, tmp) :: !code
+    in
+  let rec move_one i =
+    if src.(i) <> dst.(i) then begin
+      status.(i) <- Being_moved;
+      for j = 0 to n - 1 do
+        if src.(j) = dst.(i) then
+          match status.(j) with
+          | To_move ->
+              move_one j
+          | Being_moved ->
+              let tmp = R (temp_for (Loc.coq_type src.(j))) in
+              add_move src.(j) tmp;
+              src.(j) <- tmp
+          | Moved ->
+              ()
+      done;
+      add_move src.(i) dst.(i);
+      status.(i) <- Moved
+    end in
+  for i = 0 to n - 1 do
+    if status.(i) = To_move then move_one i
+  done;
+  List.rev_append !code k
+
+let transl_instr alloc instr k =
+  match instr with
+  | Xmove(src, dst) | Xreload(src, dst) | Xspill(src, dst) ->
+      make_move (alloc src) (alloc dst) k
+  | Xparmove(srcs, dsts, itmp, ftmp) ->
+      make_parmove (List.map alloc srcs) (List.map alloc dsts)
+                   (mreg_of alloc itmp) (mreg_of alloc ftmp) k
+  | Xop(op, args, res) ->
+      let rargs = mregs_of alloc args
+      and rres  = mreg_of alloc res in
+      begin match is_two_address op rargs with
+      | None ->
+          LTL.Lop(op, rargs, rres) :: k
+      | Some(rarg1, rargl) ->
+          if rarg1 = rres then
+            LTL.Lop(op, rargs, rres) :: k
+          else
+            LTL.Lop(Omove, [rarg1], rres) :: 
+            LTL.Lop(op, rres :: rargl, rres) :: k
+      end
+  | Xload(chunk, addr, args, dst) ->
+      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
+  | Xstore(chunk, addr, args, src) ->
+      LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
+  | Xcall(sg, vos, args, res) ->
+      LTL.Lcall(sg, mros_of alloc vos) :: k
+  | Xtailcall(sg, vos, args) ->
+      LTL.Ltailcall(sg, mros_of alloc vos) :: []
+  | Xbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_annot _ ->
+          LTL.Lannot(ef, List.map alloc args) :: k
+      | _ ->
+          LTL.Lbuiltin(ef, mregs_of alloc args, mregs_of alloc res) :: k
+      end
+  | Xbranch s ->
+      LTL.Lbranch s :: []
+  | Xcond(cond, args, s1, s2) ->
+      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+  | Xjumptable(arg, tbl) ->
+      LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
+  | Xreturn optarg ->
+      LTL.Lreturn :: []
+
+let rec transl_block alloc blk =
+  match blk with
+  | [] -> []
+  | instr :: blk' -> transl_instr alloc instr (transl_block alloc blk')
+
+let transl_function fn alloc =
+  { LTL.fn_sig = fn.fn_sig;
+    LTL.fn_stacksize = fn.fn_stacksize;
+    LTL.fn_entrypoint = fn.fn_entrypoint;
+    LTL.fn_code = PTree.map1 (transl_block alloc) fn.fn_code 
+  }
+
+
+(******************* All together *********************)
+
+exception Timeout
+
+let rec first_round f liveness =
+  let alloc = find_coloring f liveness in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- After initial register allocation@ @.";
+    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f
+  end;
+  let ts = tospill_function f alloc in
+  if VSet.is_empty ts then success f alloc else more_rounds f ts 1
+
+and more_rounds f ts count =
+  if count >= 40 then raise Timeout;
+  let f' = spill_function f ts count in
+  let liveness = liveness_analysis f' in
+  let alloc = find_coloring f' liveness in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- After register allocation (round %d)@ @." count;
+    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f'
+  end;
+  let ts' = tospill_function f' alloc in
+  if VSet.is_empty ts'
+  then success f' alloc
+  else begin
+    if !option_dalloctrace then begin
+      fprintf !pp "--- Remain to be spilled:@ @.";
+      VSet.iter (fun v -> fprintf !pp "%a " PrintXTL.var v) ts';
+      fprintf !pp "@ @."
+    end;      
+    more_rounds f (VSet.union ts ts') (count + 1)
+  end
+
+and success f alloc =
+  let f' = transl_function f alloc in
+  if !option_dalloctrace then begin
+    fprintf !pp "-------------- Candidate allocation@ @.";
+    PrintLTL.print_function !pp P.one f'
+  end;
+  f'
+
+open Errors
+
+let regalloc f =
+  init_trace();
+  reset_temps();
+  let f1 = Splitting.rename_function f in
+  match RTLtyping.type_function f1 with
+  | Error msg ->
+      Error(MSG (coqstring_of_camlstring "RTL code after splitting is ill-typed:") :: msg)
+  | OK tyenv ->
+      let f2 = function_of_RTL_function f1 tyenv in
+      let liveness = liveness_analysis f2 in
+      let f3 = dead_code_elimination f2 liveness in
+      if !option_dalloctrace then begin
+        fprintf !pp "-------------- Initial XTL@ @.";
+        PrintXTL.print_function !pp f3
+      end;
+      try
+        OK(first_round f3 liveness)
+      with 
+      | Timeout ->
+          Error(msg (coqstring_of_camlstring "Spilling fails to converge"))
+      | Type_error_at pc ->
+          Error [MSG(coqstring_of_camlstring "Ill-typed XTL code at PC "); 
+                 POS pc]
+      | Bad_LTL ->
+          Error(msg (coqstring_of_camlstring "Bad LTL after spilling"))
-- 
cgit