aboutsummaryrefslogtreecommitdiffstats
path: root/backend/LICMaux.ml
blob: 82e4629f730de7c6200fe6842a8800ddf9334a74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
(* *************************************************************)
(*                                                             *)
(*             The Compcert verified compiler                  *)
(*                                                             *)
(*           David Monniaux     CNRS, VERIMAG                  *)
(*                                                             *)
(*  Copyright VERIMAG. All rights reserved.                    *)
(*  This file is distributed under the terms of the INRIA      *)
(*  Non-Commercial License Agreement.                          *)
(*                                                             *)
(* *************************************************************)

open RTL;;
open Camlcoq;;
open Maps;;
open Kildall;;
open HashedSet;;
open Inject;;
open DebugPrint;;
open RTLcommonaux;;

type reg = P.t;;

(** get_loop_headers moved from Duplicateaux.ml to LICMaux.ml to prevent cycle dependencies *)
type vstate = Unvisited | Processed | Visited

let rtl_successors = function
| Itailcall _ | Ireturn _ -> []
| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
| Icond (_,_,n1,n2,_) -> [n1; n2]
| Ijumptable (_,ln) -> ln

(** Getting loop branches with a DFS visit :
  * Each node is either Unvisited, Visited, or Processed
  * pre-order: node becomes Processed
  * post-order: node becomes Visited
  *
  * If we come accross an edge to a Processed node, it's a loop!
  *)
let get_loop_backedges code entrypoint = begin
  debug "get_loop_backedges\n";
  let visited = ref (PTree.map (fun n i -> Unvisited) code)
  and loop_backedge = ref (PTree.map (fun n i -> None) code)
  in let rec dfs_visit code origin = function
  | [] -> ()
  | node :: ln ->
      debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln;
      match (get_some @@ PTree.get node !visited) with
      | Visited -> begin
          debug "\tNode %d is already Visited, skipping\n" (P.to_int node);
          dfs_visit code origin ln
        end
      | Processed -> begin
          debug "Node %d is a loop header\n" (P.to_int node);
          debug "The backedge is from %d\n" (P.to_int @@ get_some origin);
          loop_backedge := PTree.set node origin !loop_backedge;
          visited := PTree.set node Visited !visited;
          dfs_visit code origin ln
        end
      | Unvisited -> begin
          visited := PTree.set node Processed !visited;
          debug "Node %d is Processed\n" (P.to_int node);
          (match PTree.get node code with
          | None -> failwith "No such node"
          | Some i -> let next_visits = rtl_successors i in begin
              debug "About to visit: %a\n" print_intlist next_visits;
              dfs_visit code (Some node) next_visits
            end);
          debug "Node %d is Visited!\n" (P.to_int node);
          visited := PTree.set node Visited !visited;
          dfs_visit code origin ln
        end
  in begin
    dfs_visit code None [entrypoint];
    debug "LOOP BACKEDGES: %a\n" print_ptree_opint !loop_backedge;
    !loop_backedge
  end
end

let get_loop_headers code entrypoint =
  let backedges = get_loop_backedges code entrypoint in
  PTree.map (fun _ ob ->
    match ob with
    | None -> false
    | Some _ -> true
  ) backedges

module Dominator =
  struct
    type t = Unreachable | Dominated of int | Multiple
    let bot = Unreachable and top = Multiple
    let beq a b =
      match a, b with
      | Unreachable, Unreachable
      | Multiple, Multiple -> true
      | (Dominated x), (Dominated y) -> x = y
      | _ -> false
    let lub a b =
      match a, b with
      | Multiple, _
      | _, Multiple -> Multiple
      | Unreachable, x
      | x, Unreachable -> x
      | (Dominated x), (Dominated y) when x=y -> a
      | (Dominated _), (Dominated _) -> Multiple

    let pp oc = function
      | Unreachable -> output_string oc "unreachable"
      | Multiple -> output_string oc "multiple"
      | Dominated x -> Printf.fprintf oc "%d" x;;
  end

module Dominator_Solver = Dataflow_Solver(Dominator)(NodeSetForward)

let apply_dominator (is_marked : node -> bool) (pc : node)
      (before : Dominator.t) : Dominator.t =
  match before with
  | Dominator.Unreachable -> before
  | _ ->
     if is_marked pc
     then Dominator.Dominated (P.to_int pc)
     else before;;

let dominated_parts1 (f : coq_function) :
      (bool PTree.t) * (Dominator.t PMap.t option) =
  let headers = get_loop_headers f.fn_code f.fn_entrypoint in
  let dominated = Dominator_Solver.fixpoint f.fn_code RTL.successors_instr
    (apply_dominator (fun pc -> match PTree.get pc headers with
                                | Some x -> x
                                | None -> false)) f.fn_entrypoint
    Dominator.top in
  (headers, dominated);;

let dominated_parts (f : coq_function) : Dominator.t PMap.t * PSet.t PTree.t =
  let (headers, dominated) = dominated_parts1 f in
  match dominated with
  | None -> failwith "dominated_parts 1"
  | Some dominated ->
     let singletons =
       PTree.fold (fun before pc flag ->
         if flag
         then PTree.set pc (PSet.add pc PSet.empty) before
         else before) headers PTree.empty in
     (dominated,
     PTree.fold (fun before pc ii ->
         match PMap.get pc dominated with
         | Dominator.Dominated x ->
            let px = P.of_int x in
            (match PTree.get px before with
             | None -> failwith "dominated_parts 2"
             | Some old ->
                PTree.set px (PSet.add pc old) before)
         | _ -> before) f.fn_code singletons);;

let graph_traversal (initial_node : P.t)
  (successor_iterator : P.t -> (P.t -> unit) -> unit) : PSet.t =
  let seen = ref PSet.empty
  and stack = Stack.create () in
  Stack.push initial_node stack;
  while not (Stack.is_empty stack)
  do
    let vertex = Stack.pop stack in
    if not (PSet.contains !seen vertex)
    then
      begin
        seen := PSet.add vertex !seen;
        successor_iterator vertex (fun x -> Stack.push x stack) 
      end
  done;
  !seen;;

let filter_dominated_part (predecessors : P.t list PTree.t)
      (header : P.t) (dominated_part : PSet.t) =
  graph_traversal header
    (fun (vertex : P.t) (f : P.t -> unit) ->
      match PTree.get vertex predecessors with
      | None -> ()
      | Some l ->
         List.iter
           (fun x ->
             if PSet.contains dominated_part x
             then f x) l
    );;

let inner_loops (f : coq_function) =
  let (dominated, parts) = dominated_parts f
  and predecessors = Kildall.make_predecessors f.fn_code RTL.successors_instr in
  (dominated, predecessors, PTree.map (filter_dominated_part predecessors) parts);;

let map_reg mapper r =
  match PTree.get r mapper with
  | None -> r
  | Some x -> x;;

let rewrite_loop_body (last_alloc : reg ref)
      (insns : RTL.code) (header : P.t) (loop_body : PSet.t) =
  let seen = ref PSet.empty
  and stack = Stack.create ()
  and rewritten = ref [] in
  let add_inj ii = rewritten := ii::!rewritten in
  Stack.push (header, PTree.empty) stack;
  while not (Stack.is_empty stack)
  do
    let (pc, mapper) = Stack.pop stack in
    if not (PSet.contains !seen pc)
    then
      begin
        seen := PSet.add pc !seen;
        match PTree.get pc insns with
        | None -> ()
        | Some ii ->
           let mapper' =
             match ii with
             | Iop(op, args, res, pc') when not (Op.is_trapping_op op) ->
                let new_res = P.succ !last_alloc in
                last_alloc := new_res;
                add_inj (INJop(op,
                               (List.map (map_reg mapper) args),
                               new_res));
                PTree.set res new_res mapper
             | Iload(_, chunk, addr, args, v, pc')
             | Istore(chunk, addr, args, v, pc')
                  when Archi.has_notrap_loads &&
                       !Clflags.option_fnontrap_loads ->
                let new_res = P.succ !last_alloc in
                last_alloc := new_res;
                add_inj (INJload(chunk, addr,
                                 (List.map (map_reg mapper) args),
                                 new_res));
                PTree.set v new_res mapper
             | _ -> mapper in
           List.iter (fun x ->
               if PSet.contains loop_body x
               then Stack.push (x, mapper') stack)
             (successors_instr ii)
      end
  done;
  List.rev !rewritten;;

let pp_inj_instr (oc : out_channel) (ii : inj_instr) =
  match ii with
  | INJnop -> output_string oc "nop"
  | INJop(op, args, res) ->
     Printf.fprintf oc "%a = %a"
       PrintRTL.reg res (PrintOp.print_operation PrintRTL.reg) (op, args)
  | INJload(chunk, addr, args, dst) ->
     Printf.fprintf oc "%a = %s[%a]"
       PrintRTL.reg dst (PrintAST.name_of_chunk chunk)
         (PrintOp.print_addressing PrintRTL.reg) (addr, args);;

let pp_inj_list (oc : out_channel) (l : inj_instr list) =
  List.iter (Printf.fprintf oc "%a; " pp_inj_instr) l;;

let pp_injections (oc : out_channel) (injections : inj_instr list PTree.t) =
  List.iter
    (fun (pc, injl) ->
      Printf.fprintf oc "%d : %a\n" (P.to_int pc) pp_inj_list injl)
    (PTree.elements injections);;

let compute_injections1 (f : coq_function) =
  let (dominated, predecessors, loop_bodies) = inner_loops f
  and last_alloc = ref (max_reg_function f) in
  (dominated, predecessors,
   PTree.map (fun header body ->
    (body, rewrite_loop_body last_alloc f.fn_code header body)) loop_bodies);;

let compute_injections (f : coq_function) : inj_instr list PTree.t =
  let (dominated, predecessors, injections) = compute_injections1 f in
  let output_map = ref PTree.empty in
  List.iter
    (fun (header, (body, inj)) ->
      match PTree.get header predecessors with
      | None -> failwith "compute_injections"
      | Some l ->
         List.iter (fun predecessor ->
             if (PMap.get predecessor dominated)<>Dominator.Unreachable &&
                  not (PSet.contains body predecessor)
             then output_map := PTree.set predecessor inj !output_map) l)
    (PTree.elements injections);
  !output_map;;
  
let pp_list pp_item oc l =
  output_string oc "{ ";
  let first = ref true in
  List.iter (fun x ->
      (if !first
       then first := false
       else output_string oc ", ");
      pp_item oc x) l;
  output_string oc " }";;

let pp_pset oc s =
  pp_list (fun oc -> Printf.fprintf oc "%d") oc
    (List.sort (fun x y -> y - x) (List.map P.to_int (PSet.elements s)));;

let print_dominated_parts oc f =
  List.iter (fun (header, nodes) ->
      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
    (PTree.elements (snd (dominated_parts f)));;

let print_inner_loops oc f =
  List.iter (fun (header, nodes) ->
      Printf.fprintf oc "%d : %a\n" (P.to_int header) pp_pset nodes)
    (PTree.elements (let (_,_,l) = (inner_loops f) in l));;

let print_dominated_parts1 oc f =
  match snd (dominated_parts1 f) with
  | None -> output_string oc "error\n"
  | Some parts ->
     List.iter
       (fun (pc, instr) ->
         Printf.fprintf oc "%d : %a\n" (P.to_int pc) Dominator.pp
           (PMap.get pc parts)
       )
       (PTree.elements f.fn_code);;
  
let loop_headers (f : coq_function) : RTL.node list =
  List.map fst (List.filter snd (PTree.elements (get_loop_headers f.fn_code f.fn_entrypoint)));;

let print_loop_headers f =
  print_endline "Loop headers";
  List.iter
    (fun i -> Printf.printf "%d " (P.to_int i))
    (loop_headers f);
  print_newline ();;

let gen_injections (f : coq_function) (coq_max_pc : node) (coq_max_reg : reg):
      (Inject.inj_instr list) PTree.t =
  let injections = compute_injections f in
  (* let () = pp_injections stdout injections in *)
  injections;;