aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64/PrepassSchedulingOracle.ml
blob: 0b3ba53a61eabfb84df40db4776b3c7e879b90d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
open AST
open RTL
open Maps
open InstructionScheduler
open Registers
open PrepassSchedulingOracleDeps
   
let use_alias_analysis () = false

let build_constraints_and_resources (opweights : opweights) insts btl =
  let last_reg_reads : int list PTree.t ref = ref PTree.empty
  and last_reg_write : (int * int) PTree.t ref = ref PTree.empty
  and last_mem_reads : int list ref = ref []
  and last_mem_write : int option ref = ref None
  and last_branch : int option ref = ref None
  and last_non_pipelined_op : int array =
    Array.make opweights.nr_non_pipelined_units (-1)
  and latency_constraints : latency_constraint list ref = ref []
  and resources = ref [] in
  let add_constraint instr_from instr_to latency =
    assert (instr_from <= instr_to);
    assert (latency >= 0);
    if instr_from = instr_to then
      if latency = 0 then ()
      else
        failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop"
    else
      latency_constraints :=
        { instr_from; instr_to; latency } :: !latency_constraints
  and get_last_reads reg =
    match PTree.get reg !last_reg_reads with Some l -> l | None -> []
  in
  let add_input_mem i =
    if not (use_alias_analysis ()) then (
      (* Read after write *)
      (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
      last_mem_reads := i :: !last_mem_reads)
  and add_output_mem i =
    if not (use_alias_analysis ()) then (
      (* Write after write *)
      (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
      (* Write after read *)
      List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
      last_mem_write := Some i;
      last_mem_reads := [])
  and add_input_reg i reg =
    (* Read after write *)
    (match PTree.get reg !last_reg_write with
    | None -> ()
    | Some (j, latency) -> add_constraint j i latency);
    last_reg_reads := PTree.set reg (i :: get_last_reads reg) !last_reg_reads
  and add_output_reg i latency reg =
    (* Write after write *)
    (match PTree.get reg !last_reg_write with
    | None -> ()
    | Some (j, _) -> add_constraint j i 1);
    (* Write after read *)
    List.iter (fun j -> add_constraint j i 0) (get_last_reads reg);
    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
    last_reg_reads := PTree.remove reg !last_reg_reads
  in
  let add_input_regs i regs = List.iter (add_input_reg i) regs
  and irreversible_action i =
    match !last_branch with None -> () | Some j -> add_constraint j i 1
  in
  let set_branch i =
    irreversible_action i;
    last_branch := Some i
  and add_non_pipelined_resources i resources =
    Array.iter2
      (fun latency last ->
        if latency >= 0 && last >= 0 then add_constraint last i latency)
      resources last_non_pipelined_op;
    Array.iteri
      (fun rsc latency -> if latency >= 0 then last_non_pipelined_op.(rsc) <- i)
      resources
  in
  Array.iteri
    (fun i inst ->
      match inst with
      | Bnop _ ->
          let rs = Array.map (fun _ -> 0) opweights.pipelined_resource_bounds in
          resources := rs :: !resources
      | Bop (op, lr, rd, _) ->
          add_non_pipelined_resources i
            (opweights.non_pipelined_resources_of_op op (List.length lr));
          if Op.is_trapping_op op then irreversible_action i;
          add_input_regs i lr;
          add_output_reg i (opweights.latency_of_op op (List.length lr)) rd;
          let rs = opweights.resources_of_op op (List.length lr) in
          resources := rs :: !resources
      | Bload (trap, chk, addr, lr, rd, _) ->
          if trap = TRAP then irreversible_action i;
          add_input_mem i;
          add_input_regs i lr;
          add_output_reg i
            (opweights.latency_of_load trap chk addr (List.length lr))
            rd;
          let rs = opweights.resources_of_load trap chk addr (List.length lr) in
          resources := rs :: !resources
      | Bstore (chk, addr, lr, src, _) ->
          irreversible_action i;
          add_input_regs i lr;
          add_input_reg i src;
          add_output_mem i;
          let rs = opweights.resources_of_store chk addr (List.length lr) in
          resources := rs :: !resources
      | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) ->
          (* TODO gourdinl test with/out this line *)
          let live = (get_some @@ PTree.get s btl).input_regs in
          add_input_regs i (Regset.elements live);
          set_branch i;
          add_input_mem i;
          add_input_regs i lr;
          let rs = opweights.resources_of_cond cond (List.length lr) in
          resources := rs :: !resources
      | Bcond (_, _, _, _, _) ->
          failwith "get_simple_dependencies: invalid Bcond"
      | BF (_, _) -> failwith "get_simple_dependencies: BF"
      | Bseq (_, _) -> failwith "get_simple_dependencies: Bseq")
    insts;
  (!latency_constraints, Array.of_list (List.rev !resources))

let define_problem (opweights : opweights) ibf btl =
  let simple_deps, resources =
    build_constraints_and_resources opweights ibf btl
  in
  {
    max_latency = -1;
    resource_bounds = opweights.pipelined_resource_bounds;
    instruction_usages = resources;
    latency_constraints = simple_deps;
  }

let zigzag_scheduler problem early_ones =
  let nr_instructions = get_nr_instructions problem in
  assert (nr_instructions = Array.length early_ones);
  match list_scheduler problem with
  | Some fwd_schedule ->
      let fwd_makespan = fwd_schedule.(Array.length fwd_schedule - 1) in
      let constraints' = ref problem.latency_constraints in
      Array.iteri
        (fun i is_early ->
          if is_early then
            constraints' :=
              {
                instr_from = i;
                instr_to = nr_instructions;
                latency = fwd_makespan - fwd_schedule.(i);
              }
              :: !constraints')
        early_ones;
      validated_scheduler reverse_list_scheduler
        { problem with latency_constraints = !constraints' }
  | None -> None

let prepass_scheduler_by_name name problem insts =
  match name with
  | "zigzag" ->
      let early_ones =
        Array.map
          (fun inst ->
            match inst with Bcond (_, _, _, _, _) -> true | _ -> false)
          insts
      in
      zigzag_scheduler problem early_ones
  | _ -> scheduler_by_name name problem

let schedule_sequence insts btl =
  let opweights = OpWeights.get_opweights () in
  try
    if Array.length insts <= 1 then None
    else
      let nr_instructions = Array.length insts in
      let problem = define_problem opweights insts btl in
      match
        prepass_scheduler_by_name !Clflags.option_fprepass_sched problem insts
      with
      | None ->
          Printf.printf "no solution in prepass scheduling\n";
          None
      | Some solution ->
          let positions = Array.init nr_instructions (fun i -> i) in
          Array.sort
            (fun i j ->
              let si = solution.(i) and sj = solution.(j) in
              if si < sj then -1 else if si > sj then 1 else i - j)
            positions;
          Some positions
  with Failure s ->
    Printf.printf "failure in prepass scheduling: %s\n" s;
    None