aboutsummaryrefslogtreecommitdiffstats
path: root/backend/IRC.ml
diff options
context:
space:
mode:
authorxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
committerxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
commit255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree7951b1b13e8fd5e525b9223e8be0580e83550f55 /backend/IRC.ml
parent6e5041958df01c56762e90770abd704b95a36e5d (diff)
downloadcompcert-kvx-255cee09b71255051c2b40eae0c88bffce1f6f32.tar.gz
compcert-kvx-255cee09b71255051c2b40eae0c88bffce1f6f32.zip
Big merge of the newregalloc-int64 branch. Lots of changes in two directions:
1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'backend/IRC.ml')
-rw-r--r--backend/IRC.ml894
1 files changed, 894 insertions, 0 deletions
diff --git a/backend/IRC.ml b/backend/IRC.ml
new file mode 100644
index 00000000..573c3d72
--- /dev/null
+++ b/backend/IRC.ml
@@ -0,0 +1,894 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+open Printf
+open Camlcoq
+open Datatypes
+open AST
+open Registers
+open Machregs
+open Locations
+open Conventions1
+open Conventions
+open XTL
+
+(* Iterated Register Coalescing: George and Appel's graph coloring algorithm *)
+
+type var_stats = {
+ mutable cost: int; (* estimated cost of a spill *)
+ mutable usedefs: int (* number of uses and defs *)
+}
+
+(* Representation of the interference graph. Each node of the graph
+ (i.e. each variable) is represented as follows. *)
+
+type node =
+ { ident: int; (*r unique identifier *)
+ typ: typ; (*r its type *)
+ var: var; (*r the XTL variable it comes from *)
+ regclass: int; (*r identifier of register class *)
+ mutable accesses: int; (*r number of defs and uses *)
+ mutable spillcost: float; (*r estimated cost of spilling *)
+ mutable adjlist: node list; (*r all nodes it interferes with *)
+ mutable degree: int; (*r number of adjacent nodes *)
+ mutable movelist: move list; (*r list of moves it is involved in *)
+ mutable extra_adj: node list; (*r extra interferences (see below) *)
+ mutable extra_pref: move list; (*r extra preferences (see below) *)
+ mutable alias: node option; (*r [Some n] if coalesced with [n] *)
+ mutable color: loc option; (*r chosen color *)
+ mutable nstate: nodestate; (*r in which set of nodes it is *)
+ mutable nprev: node; (*r for double linking *)
+ mutable nnext: node (*r for double linking *)
+ }
+
+(* These are the possible states for nodes. *)
+
+and nodestate =
+ | Colored
+ | Initial
+ | SimplifyWorklist
+ | FreezeWorklist
+ | SpillWorklist
+ | CoalescedNodes
+ | SelectStack
+
+(* Each move (i.e. wish to be put in the same location) is represented
+ as follows. *)
+
+and move =
+ { src: node; (*r source of the move *)
+ dst: node; (*r destination of the move *)
+ mutable mstate: movestate; (*r in which set of moves it is *)
+ mutable mprev: move; (*r for double linking *)
+ mutable mnext: move (*r for double linking *)
+ }
+
+(* These are the possible states for moves *)
+
+and movestate =
+ | CoalescedMoves
+ | ConstrainedMoves
+ | FrozenMoves
+ | WorklistMoves
+ | ActiveMoves
+
+(* Note on "precolored" nodes and how they are handled:
+
+The register allocator can express interferences and preferences between
+any two values of type [var]: either pseudoregisters, to be colored by IRC,
+or fixed, "precolored" locations.
+
+I and P between two pseudoregisters are recorded in the graph that IRC
+modifies, via the [adjlist] and [movelist] fields.
+
+I and P between a pseudoregister and a machine register are also
+recorded in the IRC graph, but only in the [adjlist] and [movelist]
+fields of the pseudoregister. This is the special case described
+in George and Appel's papers.
+
+I and P between a pseudoregister and a stack slot
+are omitted from the IRC graph, as they contribute nothing to the
+simplification and coalescing process. We record them in the
+[extra_adj] and [extra_pref] fields, where they can be honored
+after IRC elimination, when assigning a stack slot to a spilled variable. *)
+
+let name_of_loc = function
+ | R r ->
+ begin match Machregsaux.name_of_register r with
+ | None -> "fixed-reg"
+ | Some s -> s
+ end
+ | S (Local, ofs, ty) ->
+ sprintf "L%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+ | S (Incoming, ofs, ty) ->
+ sprintf "I%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+ | S (Outgoing, ofs, ty) ->
+ sprintf "O%c%ld" (PrintXTL.short_name_of_type ty) (camlint_of_coqint ofs)
+
+let name_of_node n =
+ match n.var with
+ | V(r, ty) -> sprintf "x%ld" (P.to_int32 r)
+ | L l -> name_of_loc l
+
+(* The algorithm manipulates partitions of the nodes and of the moves
+ according to their states, frequently moving a node or a move from
+ a state to another, and frequently enumerating all nodes or all moves
+ of a given state. To support these operations efficiently,
+ nodes or moves having the same state are put into imperative doubly-linked
+ lists, allowing for constant-time insertion and removal, and linear-time
+ scanning. We now define the operations over these doubly-linked lists. *)
+
+module DLinkNode = struct
+ type t = node
+ let make state =
+ let rec empty =
+ { ident = 0; typ = Tint; var = V(P.one, Tint); regclass = 0;
+ adjlist = []; degree = 0; accesses = 0; spillcost = 0.0;
+ movelist = []; extra_adj = []; extra_pref = [];
+ alias = None; color = None;
+ nstate = state; nprev = empty; nnext = empty }
+ in empty
+ let dummy = make Colored
+ let clear dl = dl.nnext <- dl; dl.nprev <- dl
+ let notempty dl = dl.nnext != dl
+ let insert n dl =
+ n.nstate <- dl.nstate;
+ n.nnext <- dl.nnext; n.nprev <- dl;
+ dl.nnext.nprev <- n; dl.nnext <- n
+ let remove n dl =
+ assert (n.nstate = dl.nstate);
+ n.nnext.nprev <- n.nprev; n.nprev.nnext <- n.nnext
+ let move n dl1 dl2 =
+ remove n dl1; insert n dl2
+ let pick dl =
+ let n = dl.nnext in remove n dl; n
+ let iter f dl =
+ let rec iter n = if n != dl then (f n; iter n.nnext)
+ in iter dl.nnext
+ let fold f dl accu =
+ let rec fold n accu = if n == dl then accu else fold n.nnext (f n accu)
+ in fold dl.nnext accu
+end
+
+module DLinkMove = struct
+ type t = move
+ let make state =
+ let rec empty =
+ { src = DLinkNode.dummy; dst = DLinkNode.dummy;
+ mstate = state; mprev = empty; mnext = empty }
+ in empty
+ let dummy = make CoalescedMoves
+ let clear dl = dl.mnext <- dl; dl.mprev <- dl
+ let notempty dl = dl.mnext != dl
+ let insert m dl =
+ m.mstate <- dl.mstate;
+ m.mnext <- dl.mnext; m.mprev <- dl;
+ dl.mnext.mprev <- m; dl.mnext <- m
+ let remove m dl =
+ assert (m.mstate = dl.mstate);
+ m.mnext.mprev <- m.mprev; m.mprev.mnext <- m.mnext
+ let move m dl1 dl2 =
+ remove m dl1; insert m dl2
+ let pick dl =
+ let m = dl.mnext in remove m dl; m
+ let iter f dl =
+ let rec iter m = if m != dl then (f m; iter m.mnext)
+ in iter dl.mnext
+ let fold f dl accu =
+ let rec fold m accu = if m == dl then accu else fold m.mnext (f m accu)
+ in fold dl.mnext accu
+end
+
+(* Auxiliary data structures *)
+
+module IntSet = Set.Make(struct
+ type t = int
+ let compare (x:int) (y:int) = compare x y
+end)
+
+module IntPairSet = Set.Make(struct
+ type t = int * int
+ let compare ((x1, y1): (int * int)) (x2, y2) =
+ if x1 < x2 then -1 else
+ if x1 > x2 then 1 else
+ if y1 < y2 then -1 else
+ if y1 > y2 then 1 else
+ 0
+ end)
+
+(* The global state of the algorithm *)
+
+type graph = {
+ (* Machine registers available for allocation *)
+ caller_save_registers: mreg array array;
+ callee_save_registers: mreg array array;
+ num_available_registers: int array;
+ start_points: int array;
+ allocatable_registers: mreg list;
+ (* Costs for pseudo-registers *)
+ stats_of_reg: reg -> var_stats;
+ (* Mapping from XTL variables to nodes *)
+ varTable: (var, node) Hashtbl.t;
+ mutable nextIdent: int;
+ (* The adjacency set *)
+ mutable adjSet: IntPairSet.t;
+ (* Low-degree, non-move-related nodes *)
+ simplifyWorklist: DLinkNode.t;
+ (* Low-degree, move-related nodes *)
+ freezeWorklist: DLinkNode.t;
+ (* High-degree nodes *)
+ spillWorklist: DLinkNode.t;
+ (* Nodes that have been coalesced *)
+ coalescedNodes: DLinkNode.t;
+ (* Moves that have been coalesced *)
+ coalescedMoves: DLinkMove.t;
+ (* Moves whose source and destination interfere *)
+ constrainedMoves: DLinkMove.t;
+ (* Moves that will no longer be considered for coalescing *)
+ frozenMoves: DLinkMove.t;
+ (* Moves enabled for possible coalescing *)
+ worklistMoves: DLinkMove.t;
+ (* Moves not yet ready for coalescing *)
+ activeMoves: DLinkMove.t
+}
+
+(* Register classes and reserved registers *)
+
+let num_register_classes = 2
+
+let class_of_type = function Tint -> 0 | Tfloat -> 1 | Tlong -> assert false
+
+let reserved_registers = ref ([]: mreg list)
+
+let rec remove_reserved = function
+ | [] -> []
+ | hd :: tl ->
+ if List.mem hd !reserved_registers
+ then remove_reserved tl
+ else hd :: remove_reserved tl
+
+(* Initialize and return an empty graph *)
+
+let init costs =
+ let int_caller_save = remove_reserved int_caller_save_regs
+ and float_caller_save = remove_reserved float_caller_save_regs
+ and int_callee_save = remove_reserved int_callee_save_regs
+ and float_callee_save = remove_reserved float_callee_save_regs in
+ {
+ caller_save_registers =
+ [| Array.of_list int_caller_save; Array.of_list float_caller_save |];
+ callee_save_registers =
+ [| Array.of_list int_callee_save; Array.of_list float_callee_save |];
+ num_available_registers =
+ [| List.length int_caller_save + List.length int_callee_save;
+ List.length float_caller_save + List.length float_callee_save |];
+ start_points =
+ [| 0; 0 |];
+ allocatable_registers =
+ int_caller_save @ int_callee_save @ float_caller_save @ float_callee_save;
+ stats_of_reg = costs;
+ varTable = Hashtbl.create 253;
+ nextIdent = 0;
+ adjSet = IntPairSet.empty;
+ simplifyWorklist = DLinkNode.make SimplifyWorklist;
+ freezeWorklist = DLinkNode.make FreezeWorklist;
+ spillWorklist = DLinkNode.make SpillWorklist;
+ coalescedNodes = DLinkNode.make CoalescedNodes;
+ coalescedMoves = DLinkMove.make CoalescedMoves;
+ constrainedMoves = DLinkMove.make ConstrainedMoves;
+ frozenMoves = DLinkMove.make FrozenMoves;
+ worklistMoves = DLinkMove.make WorklistMoves;
+ activeMoves = DLinkMove.make ActiveMoves
+ }
+
+(* Create nodes corresponding to XTL variables *)
+
+let weightedSpillCost st =
+ if st.cost < max_int
+ then float_of_int st.cost
+ else infinity
+
+let newNodeOfReg g r ty =
+ let st = g.stats_of_reg r in
+ g.nextIdent <- g.nextIdent + 1;
+ { ident = g.nextIdent; typ = ty;
+ var = V(r, ty); regclass = class_of_type ty;
+ accesses = st.usedefs;
+ spillcost = weightedSpillCost st;
+ adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+ alias = None;
+ color = None;
+ nstate = Initial;
+ nprev = DLinkNode.dummy; nnext = DLinkNode.dummy }
+
+let newNodeOfLoc g l =
+ let ty = Loc.coq_type l in
+ g.nextIdent <- g.nextIdent + 1;
+ { ident = g.nextIdent; typ = ty;
+ var = L l; regclass = class_of_type ty;
+ accesses = 0; spillcost = 0.0;
+ adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+ alias = None;
+ color = Some l;
+ nstate = Colored;
+ nprev = DLinkNode.dummy; nnext = DLinkNode.dummy }
+
+let nodeOfVar g v =
+ try
+ Hashtbl.find g.varTable v
+ with Not_found ->
+ let n =
+ match v with V(r, ty) -> newNodeOfReg g r ty | L l -> newNodeOfLoc g l in
+ Hashtbl.add g.varTable v n;
+ n
+
+(* Determine if two nodes interfere *)
+
+let interfere g n1 n2 =
+ let i1 = n1.ident and i2 = n2.ident in
+ let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+ IntPairSet.mem p g.adjSet
+
+(* Add an edge to the graph. *)
+
+let recordInterf n1 n2 =
+ match n2.color with
+ | None | Some (R _) ->
+ if n1.regclass = n2.regclass then begin
+ n1.adjlist <- n2 :: n1.adjlist;
+ n1.degree <- 1 + n1.degree
+ end else begin
+ n1.extra_adj <- n2 :: n1.extra_adj
+ end
+ | Some (S _) ->
+ (*i printf "extra adj %s to %s\n" (name_of_node n1) (name_of_node n2); *)
+ n1.extra_adj <- n2 :: n1.extra_adj
+
+let addEdge g n1 n2 =
+ (*i printf "edge %s -- %s;\n" (name_of_node n1) (name_of_node n2);*)
+ assert (n1 != n2);
+ if not (interfere g n1 n2) then begin
+ let i1 = n1.ident and i2 = n2.ident in
+ let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+ g.adjSet <- IntPairSet.add p g.adjSet;
+ if n1.nstate <> Colored then recordInterf n1 n2;
+ if n2.nstate <> Colored then recordInterf n2 n1
+ end
+
+(* Add a move preference. *)
+
+let recordMove g n1 n2 =
+ let m =
+ { src = n1; dst = n2; mstate = WorklistMoves;
+ mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+ n1.movelist <- m :: n1.movelist;
+ n2.movelist <- m :: n2.movelist;
+ DLinkMove.insert m g.worklistMoves
+
+let recordExtraPref n1 n2 =
+ let m =
+ { src = n1; dst = n2; mstate = FrozenMoves;
+ mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+ n1.extra_pref <- m :: n1.extra_pref
+
+let addMovePref g n1 n2 =
+ assert (n1.regclass = n2.regclass);
+ match n1.color, n2.color with
+ | None, None ->
+ recordMove g n1 n2
+ | Some (R mr1), None ->
+ if List.mem mr1 g.allocatable_registers then recordMove g n1 n2
+ | None, Some (R mr2) ->
+ if List.mem mr2 g.allocatable_registers then recordMove g n1 n2
+ | Some (S _), None ->
+ recordExtraPref n2 n1
+ | None, Some (S _) ->
+ recordExtraPref n1 n2
+ | _, _ ->
+ ()
+
+(* Apply the given function to the relevant adjacent nodes of a node *)
+
+let iterAdjacent f n =
+ List.iter
+ (fun n ->
+ match n.nstate with
+ | SelectStack | CoalescedNodes -> ()
+ | _ -> f n)
+ n.adjlist
+
+(* Determine the moves affecting a node *)
+
+let moveIsActiveOrWorklist m =
+ match m.mstate with
+ | ActiveMoves | WorklistMoves -> true
+ | _ -> false
+
+let nodeMoves n =
+ List.filter moveIsActiveOrWorklist n.movelist
+
+(* Determine whether a node is involved in a move *)
+
+let moveRelated n =
+ List.exists moveIsActiveOrWorklist n.movelist
+
+(* Initial partition of nodes into spill / freeze / simplify *)
+
+let initialNodePartition g =
+ let part_node v n =
+ match n.nstate with
+ | Initial ->
+ let k = g.num_available_registers.(n.regclass) in
+ if n.degree >= k then
+ DLinkNode.insert n g.spillWorklist
+ else if moveRelated n then
+ DLinkNode.insert n g.freezeWorklist
+ else
+ DLinkNode.insert n g.simplifyWorklist
+ | Colored -> ()
+ | _ -> assert false in
+ Hashtbl.iter part_node g.varTable
+
+
+(* Check invariants *)
+
+let degreeInvariant g n =
+ let c = ref 0 in
+ iterAdjacent (fun n -> incr c) n;
+ if !c <> n.degree then
+ failwith("degree invariant violated by " ^ name_of_node n)
+
+let simplifyWorklistInvariant g n =
+ if n.degree < g.num_available_registers.(n.regclass)
+ && not (moveRelated n)
+ then ()
+ else failwith("simplify worklist invariant violated by " ^ name_of_node n)
+
+let freezeWorklistInvariant g n =
+ if n.degree < g.num_available_registers.(n.regclass)
+ && moveRelated n
+ then ()
+ else failwith("freeze worklist invariant violated by " ^ name_of_node n)
+
+let spillWorklistInvariant g n =
+ if n.degree >= g.num_available_registers.(n.regclass)
+ then ()
+ else failwith("spill worklist invariant violated by " ^ name_of_node n)
+
+let checkInvariants g =
+ DLinkNode.iter
+ (fun n -> degreeInvariant g n; simplifyWorklistInvariant g n)
+ g.simplifyWorklist;
+ DLinkNode.iter
+ (fun n -> degreeInvariant g n; freezeWorklistInvariant g n)
+ g.freezeWorklist;
+ DLinkNode.iter
+ (fun n -> degreeInvariant g n; spillWorklistInvariant g n)
+ g.spillWorklist
+
+(* Enable moves that have become low-degree related *)
+
+let enableMoves g n =
+ List.iter
+ (fun m ->
+ if m.mstate = ActiveMoves
+ then DLinkMove.move m g.activeMoves g.worklistMoves)
+ (nodeMoves n)
+
+(* Simulate the removal of a node from the graph *)
+
+let decrementDegree g n =
+ let k = g.num_available_registers.(n.regclass) in
+ let d = n.degree in
+ n.degree <- d - 1;
+ if d = k then begin
+ enableMoves g n;
+ iterAdjacent (enableMoves g) n;
+ if moveRelated n
+ then DLinkNode.move n g.spillWorklist g.freezeWorklist
+ else DLinkNode.move n g.spillWorklist g.simplifyWorklist
+ end
+
+(* Simulate the effect of combining nodes [n1] and [n3] on [n2],
+ where [n2] is a node adjacent to [n3]. *)
+
+let combineEdge g n1 n2 =
+ assert (n1 != n2);
+ if interfere g n1 n2 then begin
+ (* The two edges n2--n3 and n2--n1 become one, so degree of n2 decreases *)
+ decrementDegree g n2
+ end else begin
+ (* Add new edge *)
+ let i1 = n1.ident and i2 = n2.ident in
+ let p = if i1 < i2 then (i1, i2) else (i2, i1) in
+ g.adjSet <- IntPairSet.add p g.adjSet;
+ if n1.nstate <> Colored then begin
+ n1.adjlist <- n2 :: n1.adjlist;
+ n1.degree <- 1 + n1.degree
+ end;
+ if n2.nstate <> Colored then begin
+ n2.adjlist <- n1 :: n2.adjlist;
+ (* n2's degree stays the same because the old edge n2--n3 disappears
+ and becomes the new edge n2--n1 *)
+ end
+ end
+
+(* Simplification of a low-degree node *)
+
+let simplify g =
+ let n = DLinkNode.pick g.simplifyWorklist in
+ (*i printf "Simplifying %s\n" (name_of_node n); *)
+ n.nstate <- SelectStack;
+ iterAdjacent (decrementDegree g) n;
+ n
+
+(* Briggs's conservative coalescing criterion. In the terminology of
+ Hailperin, "Comparing Conservative Coalescing Criteria",
+ TOPLAS 27(3) 2005, this is the full Briggs criterion, slightly
+ more powerful than the one in George and Appel's paper. *)
+
+let canCoalesceBriggs g u v =
+ let seen = ref IntSet.empty in
+ let k = g.num_available_registers.(u.regclass) in
+ let c = ref 0 in
+ let consider other n =
+ if not (IntSet.mem n.ident !seen) then begin
+ seen := IntSet.add n.ident !seen;
+ (* if n interferes with both u and v, its degree will decrease by one
+ after coalescing *)
+ let degree_after_coalescing =
+ if interfere g n other then n.degree - 1 else n.degree in
+ if degree_after_coalescing >= k || n.nstate = Colored then begin
+ incr c;
+ if !c >= k then raise Exit
+ end
+ end in
+ try
+ iterAdjacent (consider v) u;
+ iterAdjacent (consider u) v;
+ (*i printf " Briggs: OK for %s and %s\n" (name_of_node u) (name_of_node v); *)
+ true
+ with Exit ->
+ (*i printf " Briggs: no\n"; *)
+ false
+
+(* George's conservative coalescing criterion: all high-degree neighbors
+ of [v] are neighbors of [u]. *)
+
+let canCoalesceGeorge g u v =
+ let k = g.num_available_registers.(u.regclass) in
+ let isOK t =
+ if t.nstate = Colored then
+ if u.nstate = Colored || interfere g t u then () else raise Exit
+ else
+ if t.degree < k || interfere g t u then () else raise Exit
+ in
+ try
+ iterAdjacent isOK v;
+ (*i printf " George: OK for %s and %s\n" (name_of_node u) (name_of_node v); *)
+ true
+ with Exit ->
+ (*i printf " George: no\n"; *)
+ false
+
+(* The combined coalescing criterion. [u] can be precolored, but
+ [v] is not. According to George and Appel's paper:
+- If [u] is precolored, use George's criterion.
+- If [u] is not precolored, use Briggs's criterion.
+
+ As noted by Hailperin, for non-precolored nodes, George's criterion
+ is incomparable with Briggs's: there are cases where G says yes
+ and B says no. Typically, [u] is a long-lived variable with many
+ interferences, and [v] is a short-lived temporary copy of [u]
+ that has no more interferences than [u]. Coalescing [u] and [v]
+ is "weakly safe" in Hailperin's terminology: [u] is no harder to color,
+ [u]'s neighbors are no harder to color either, but if we end up
+ spilling [u], we'll spill [v] as well. So, we restrict this heuristic
+ to [v] having a small number of uses.
+*)
+
+let thresholdGeorge = 3
+
+let canCoalesce g u v =
+ (*i printf "canCoalesce %s[%.2f] %s[%.2f]\n"
+ (name_of_node u) u.spillcost (name_of_node v) v.spillcost; *)
+ if u.nstate = Colored
+ then canCoalesceGeorge g u v
+ else canCoalesceBriggs g u v
+ || (u.spillcost < infinity && v.spillcost < infinity &&
+ ((v.accesses <= thresholdGeorge && canCoalesceGeorge g u v)
+ || (u.accesses <= thresholdGeorge && canCoalesceGeorge g v u)))
+
+(* Update worklists after a move was processed *)
+
+let addWorkList g u =
+ if (not (u.nstate = Colored))
+ && u.degree < g.num_available_registers.(u.regclass)
+ && (not (moveRelated u))
+ then DLinkNode.move u g.freezeWorklist g.simplifyWorklist
+
+(* Return the canonical representative of a possibly coalesced node *)
+
+let rec getAlias n =
+ match n.alias with None -> n | Some n' -> getAlias n'
+
+(* Combine two nodes *)
+
+let combine g u v =
+ (*i printf "Combining %s and %s\n" (name_of_node u) (name_of_node v); *)
+ (*i if u.spillcost = infinity then
+ printf "Warning: combining unspillable %s\n" (name_of_node u);
+ if v.spillcost = infinity then
+ printf "Warning: combining unspillable %s\n" (name_of_node v);*)
+ if v.nstate = FreezeWorklist
+ then DLinkNode.move v g.freezeWorklist g.coalescedNodes
+ else DLinkNode.move v g.spillWorklist g.coalescedNodes;
+ v.alias <- Some u;
+ (* Precolored nodes often have big movelists, and if one of [u] and [v]
+ is precolored, it is []u. So, append [v.movelist] to [u.movelist]
+ instead of the other way around. *)
+ u.movelist <- List.rev_append v.movelist u.movelist;
+ u.spillcost <- u.spillcost +. v.spillcost;
+ iterAdjacent (combineEdge g u) v; (*r original code using [decrementDegree] is buggy *)
+ u.extra_adj <- u.extra_adj @ v.extra_adj;
+ u.extra_pref <- u.extra_pref @ v.extra_pref;
+ enableMoves g v; (*r added as per Appel's book erratum *)
+ if u.degree >= g.num_available_registers.(u.regclass)
+ && u.nstate = FreezeWorklist
+ then DLinkNode.move u g.freezeWorklist g.spillWorklist
+
+(* Attempt coalescing *)
+
+let coalesce g =
+ let m = DLinkMove.pick g.worklistMoves in
+ let x = getAlias m.src and y = getAlias m.dst in
+ let (u, v) = if y.nstate = Colored then (y, x) else (x, y) in
+ (*i printf "Attempt coalescing %s and %s\n" (name_of_node u) (name_of_node v);*)
+ if u == v then begin
+ DLinkMove.insert m g.coalescedMoves;
+ addWorkList g u
+ end else if v.nstate = Colored || interfere g u v then begin
+ DLinkMove.insert m g.constrainedMoves;
+ addWorkList g u;
+ addWorkList g v
+ end else if canCoalesce g u v then begin
+ DLinkMove.insert m g.coalescedMoves;
+ combine g u v;
+ addWorkList g u
+ end else begin
+ DLinkMove.insert m g.activeMoves
+ end
+
+(* Freeze moves associated with node [u] *)
+
+let freezeMoves g u =
+ let u' = getAlias u in
+ let freeze m =
+ let y = getAlias m.src in
+ let v = if y == u' then getAlias m.dst else y in
+ DLinkMove.move m g.activeMoves g.frozenMoves;
+ if not (moveRelated v)
+ && v.degree < g.num_available_registers.(v.regclass)
+ && v.nstate <> Colored
+ then DLinkNode.move v g.freezeWorklist g.simplifyWorklist in
+ List.iter freeze (nodeMoves u)
+
+(* Pick a move and freeze it *)
+
+let freeze g =
+ let u = DLinkNode.pick g.freezeWorklist in
+ (*i printf "Freezing %s\n" (name_of_node u); *)
+ DLinkNode.insert u g.simplifyWorklist;
+ freezeMoves g u
+
+(* This is the original spill cost function from Chaitin 1982 *)
+
+(*
+let spillCost n =
+(*i
+ printf "spillCost %s: cost = %.2f degree = %d rank = %.2f\n"
+ (name_of_node n) n.spillcost n.degree
+ (n.spillcost /. float n.degree);
+*)
+ n.spillcost /. float n.degree
+*)
+
+(* This is spill cost function h_0 from Bernstein et al 1989. It performs
+ slightly better than Chaitin's and than functions h_1 and h_2. *)
+
+let spillCost n =
+ let deg = float n.degree in n.spillcost /. (deg *. deg)
+
+(* Spill a node *)
+
+let selectSpill g =
+ (*i printf "Attempt spilling\n"; *)
+ (* Find a spillable node of minimal cost *)
+ let (n, cost) =
+ DLinkNode.fold
+ (fun n (best_node, best_cost as best) ->
+ let cost = spillCost n in
+ if cost <= best_cost then (n, cost) else best)
+ g.spillWorklist (DLinkNode.dummy, infinity) in
+ assert (n != DLinkNode.dummy);
+ if cost = infinity then begin
+ printf "Warning: spilling unspillable %s\n" (name_of_node n);
+ printf " spill queue is:";
+ DLinkNode.iter (fun n -> printf " %s" (name_of_node n)) g.spillWorklist;
+ printf "\n"
+ end;
+ DLinkNode.remove n g.spillWorklist;
+ (*i printf "Spilling %s\n" (name_of_node n); *)
+ freezeMoves g n;
+ n.nstate <- SelectStack;
+ iterAdjacent (decrementDegree g) n;
+ n
+
+(* Produce the order of nodes that we'll use for coloring *)
+
+let rec nodeOrder g stack =
+ (*i checkInvariants g; *)
+ if DLinkNode.notempty g.simplifyWorklist then
+ (let n = simplify g in nodeOrder g (n :: stack))
+ else if DLinkMove.notempty g.worklistMoves then
+ (coalesce g; nodeOrder g stack)
+ else if DLinkNode.notempty g.freezeWorklist then
+ (freeze g; nodeOrder g stack)
+ else if DLinkNode.notempty g.spillWorklist then
+ (let n = selectSpill g in nodeOrder g (n :: stack))
+ else
+ stack
+
+(* Assign a color (i.e. a hardware register or a stack location)
+ to a node. The color is chosen among the colors that are not
+ assigned to nodes with which this node interferes. The choice
+ is guided by the following heuristics: consider first caller-save
+ hardware register of the correct type; second, callee-save registers;
+ third, a stack location. Callee-save registers and stack locations
+ are ``expensive'' resources, so we try to minimize their number
+ by picking the smallest available callee-save register or stack location.
+ In contrast, caller-save registers are ``free'', so we pick an
+ available one pseudo-randomly. *)
+
+module Regset =
+ Set.Make(struct type t = mreg let compare = compare end)
+
+let find_reg g conflicts regclass =
+ let rec find avail curr last =
+ if curr >= last then None else begin
+ let r = avail.(curr) in
+ if Regset.mem r conflicts
+ then find avail (curr + 1) last
+ else Some (R r)
+ end in
+ let caller_save = g.caller_save_registers.(regclass)
+ and callee_save = g.callee_save_registers.(regclass)
+ and start = g.start_points.(regclass) in
+ match find caller_save start (Array.length caller_save) with
+ | Some _ as res ->
+ g.start_points.(regclass) <-
+ (if start + 1 < Array.length caller_save then start + 1 else 0);
+ res
+ | None ->
+ match find caller_save 0 start with
+ | Some _ as res ->
+ g.start_points.(regclass) <-
+ (if start + 1 < Array.length caller_save then start + 1 else 0);
+ res
+ | None ->
+ find callee_save 0 (Array.length callee_save)
+
+(* Aggressive coalescing of stack slots. When assigning a slot,
+ try first the slots assigned to the pseudoregs for which we
+ have a preference, provided no conflict occurs. *)
+
+let rec reuse_slot conflicts n mvlist =
+ match mvlist with
+ | [] -> None
+ | mv :: rem ->
+ let attempt_reuse n' =
+ match n'.color with
+ | Some(S(Local, _, _) as l)
+ when List.for_all (Loc.diff_dec l) conflicts -> Some l
+ | _ -> reuse_slot conflicts n rem in
+ let src = getAlias mv.src and dst = getAlias mv.dst in
+ if n == src then attempt_reuse dst
+ else if n == dst then attempt_reuse src
+ else reuse_slot conflicts n rem (* should not happen? *)
+
+(* If no reuse possible, assign lowest nonconflicting stack slot. *)
+
+let compare_slots s1 s2 =
+ match s1, s2 with
+ | S(_, ofs1, _), S(_, ofs2, _) -> Z.compare ofs1 ofs2
+ | _, _ -> assert false
+
+let find_slot conflicts typ =
+ let rec find curr = function
+ | [] ->
+ S(Local, curr, typ)
+ | S(Local, ofs, typ') :: l ->
+ if Z.le (Z.add curr (typesize typ)) ofs then
+ S(Local, curr, typ)
+ else begin
+ let ofs' = Z.add ofs (typesize typ') in
+ find (if Z.le ofs' curr then curr else ofs') l
+ end
+ | _ :: l ->
+ find curr l
+ in find Z.zero (List.stable_sort compare_slots conflicts)
+
+(* Record locations assigned to interfering nodes *)
+
+let record_reg_conflict cnf n =
+ match (getAlias n).color with
+ | Some (R r) -> Regset.add r cnf
+ | _ -> cnf
+
+let record_slot_conflict cnf n =
+ match (getAlias n).color with
+ | Some (S _ as l) -> l :: cnf
+ | _ -> cnf
+
+(* Assign a location, the best we can *)
+
+let assign_color g n =
+ let reg_conflicts =
+ List.fold_left record_reg_conflict Regset.empty n.adjlist in
+ (* First, try to assign a register *)
+ match find_reg g reg_conflicts n.regclass with
+ | Some loc ->
+ n.color <- Some loc
+ | None ->
+ (* Add extra conflicts for nonallocatable and preallocated stack slots *)
+ let slot_conflicts =
+ List.fold_left record_slot_conflict
+ (List.fold_left record_slot_conflict [] n.adjlist)
+ n.extra_adj in
+ (* Second, try to coalesce stack slots *)
+ match reuse_slot slot_conflicts n (n.extra_pref @ n.movelist) with
+ | Some loc ->
+ n.color <- Some loc
+ | None ->
+ (* Last, pick a Local stack slot *)
+ n.color <- Some (find_slot slot_conflicts n.typ)
+
+(* Extract the location of a variable *)
+
+let location_of_var g v =
+ match v with
+ | L l -> l
+ | V(r, ty) ->
+ try
+ let n = Hashtbl.find g.varTable v in
+ let n' = getAlias n in
+ match n'.color with
+ | None -> assert false
+ | Some l -> l
+ with Not_found ->
+ match ty with
+ | Tint -> R dummy_int_reg
+ | Tfloat -> R dummy_float_reg
+ | Tlong -> assert false
+
+(* The exported interface *)
+
+let add_interf g v1 v2 =
+ addEdge g (nodeOfVar g v1) (nodeOfVar g v2)
+
+let add_pref g v1 v2 =
+ addMovePref g (nodeOfVar g v1) (nodeOfVar g v2)
+
+let coloring g =
+ initialNodePartition g;
+ List.iter (assign_color g) (nodeOrder g []);
+ location_of_var g (* total function var -> location *)