aboutsummaryrefslogtreecommitdiffstats
path: root/src/hls/Schedule.ml
diff options
context:
space:
mode:
authorYann Herklotz <git@yannherklotz.com>2020-10-18 18:15:24 +0100
committerYann Herklotz <git@yannherklotz.com>2020-10-18 18:15:24 +0100
commit21d92aa3c3e53a5b5510d2156af14dc18e17b65e (patch)
tree5dbc8b59be1cb4e4a0734e0dcb495743351ab312 /src/hls/Schedule.ml
parentf7e9951c08ac658537811d214c88d855a7e2c907 (diff)
downloadvericert-kvx-21d92aa3c3e53a5b5510d2156af14dc18e17b65e.tar.gz
vericert-kvx-21d92aa3c3e53a5b5510d2156af14dc18e17b65e.zip
Add output to scheduling
Diffstat (limited to 'src/hls/Schedule.ml')
-rw-r--r--src/hls/Schedule.ml307
1 files changed, 282 insertions, 25 deletions
diff --git a/src/hls/Schedule.ml b/src/hls/Schedule.ml
index 7d2d2ea..efef1ef 100644
--- a/src/hls/Schedule.ml
+++ b/src/hls/Schedule.ml
@@ -27,6 +27,35 @@ open Kildall
open Op
open RTLBlock
open HTL
+open Verilog
+open HTLgen
+open HTLMonad
+open HTLMonadExtra
+
+module IMap = Map.Make (struct
+ type t = int
+
+ let compare = compare
+end)
+
+type dfg = { nodes : instruction list; edges : (int * int) list }
+(** The DFG type defines a list of instructions with their data dependencies as [edges], which are
+ the pairs of integers that represent the index of the instruction in the [nodes]. The edges
+ always point from left to right. *)
+
+let print_list f out_chan a =
+ fprintf out_chan "[ ";
+ List.iter (fprintf out_chan "%a " f) a;
+ fprintf out_chan "]"
+
+let print_tuple out_chan a =
+ let l, r = a in
+ fprintf out_chan "(%d,%d)" l r
+
+let print_dfg out_chan dfg =
+ fprintf out_chan "{ nodes = %a, edges = %a }"
+ (print_list PrintRTLBlock.print_bblock_body)
+ dfg.nodes (print_list print_tuple) dfg.edges
let read_process command =
let buffer_size = 2048 in
@@ -41,22 +70,85 @@ let read_process command =
ignore (Unix.close_process_in in_channel);
Buffer.contents buffer
+(** Add a dependency if it uses a register that was written to previously. *)
let add_dep i tree deps curr =
match PTree.get curr tree with None -> deps | Some ip -> (ip, i) :: deps
-let accumulate_deps dfg curr =
- let i, tree, vals = dfg in
+(** This function calculates the dependencies of each instruction. The nodes correspond to previous
+ register that were allocated and show which instruction caused it.
+
+ This function only gathers the RAW constraints, and will therefore only be active for operations
+ that modify registers, which is this case only affects loads and operations. *)
+let accumulate_RAW_deps dfg curr =
+ let i, dst_map, { edges; nodes } = dfg in
+ let acc_dep_instruction rs dst =
+ ( i + 1,
+ PTree.set dst i dst_map,
+ {
+ nodes;
+ edges = List.append (List.fold_left (add_dep i dst_map) [] rs) edges;
+ } )
+ in
match curr with
- | RBnop -> (i + 1, tree, vals)
- | RBop (_, rs, dst) ->
- ( i + 1,
- PTree.set dst i tree,
- List.append (List.fold_left (add_dep i tree) [] rs) vals )
- | RBload (mem, addr, rs, dst) ->
- ( i + 1,
- PTree.set dst i tree,
- List.append (List.fold_left (add_dep i tree) [] rs) vals )
- | RBstore (mem, addr, rs, dst) -> (i + 1, tree, vals)
+ | RBop (_, rs, dst) -> acc_dep_instruction rs dst
+ | RBload (_mem, _addr, rs, dst) -> acc_dep_instruction rs dst
+ | _ -> (i + 1, dst_map, { edges; nodes })
+
+(** Finds the next write to the [dst] register. This is a small optimisation so that only one
+ dependency is generated for a data dependency. *)
+let rec find_next_dst_write i dst i' curr =
+ let check_dst dst' curr' =
+ if dst = dst' then Some (i, i')
+ else find_next_dst_write i dst (i' + 1) curr'
+ in
+ match curr with
+ | [] -> None
+ | RBop (_, _, dst') :: curr' -> check_dst dst' curr'
+ | RBload (_, _, _, dst') :: curr' -> check_dst dst' curr'
+ | _ :: curr' -> find_next_dst_write i dst (i' + 1) curr'
+
+let drop i lst =
+ let rec drop' i' lst' =
+ match lst' with
+ | _ :: ls -> if i' = i then ls else drop' (i' + 1) ls
+ | [] -> []
+ in
+ if i = 0 then lst else drop' 1 lst
+
+let take i lst =
+ let rec take' i' lst' =
+ match lst' with
+ | l :: ls -> if i' = i then [ l ] else l :: take' (i' + 1) ls
+ | [] -> []
+ in
+ if i = 0 then [] else take' 1 lst
+
+(** This function calculates the WAW dependencies, which happen when two writes are ordered one
+ after another and therefore have to be kept in that order. This accumulation might be redundant
+ if register renaming is done before hand, because then these dependencies can be avoided. *)
+let accumulate_WAW_deps dfg curr =
+ let i, { edges; nodes } = dfg in
+ let dst_dep dst =
+ match find_next_dst_write i dst (i + 1) (drop (i + 1) nodes) with
+ | Some d -> (i + 1, { nodes; edges = d :: edges })
+ | _ -> (i + 1, { nodes; edges })
+ in
+ match curr with
+ | RBop (_, _, dst) -> dst_dep dst
+ | RBload (_, _, _, dst) -> dst_dep dst
+ | _ -> (i + 1, { nodes; edges })
+
+let accumulate_WAR_deps dfg curr =
+ let i, { edges; nodes } = dfg in
+ let dst_dep dst =
+ match find_next_dst_write i dst 0 (take i nodes |> List.rev) with
+ | Some (d, d') -> (i + 1, { nodes; edges = (i - d', d) :: edges })
+ | _ -> (i + 1, { nodes; edges })
+ in
+ match curr with
+ | RBop (_, rs, dst) -> dst_dep dst
+ | RBload (_, _, rs, dst) -> dst_dep dst
+ | _ -> (i + 1, { nodes; edges })
let assigned_vars vars = function
| RBnop -> vars
@@ -64,16 +156,20 @@ let assigned_vars vars = function
| RBload (_, _, _, dst) -> dst :: vars
| RBstore (_, _, _, _) -> vars
-(* All the nodes in the DFG have to come after the source of the basic block, and should terminate
+(** All the nodes in the DFG have to come after the source of the basic block, and should terminate
before the sink of the basic block. After that, there should be constraints for data
dependencies between nodes. *)
let gather_bb_constraints bb =
- let _, _, edges =
- List.fold_left accumulate_deps (0, PTree.empty, []) bb.bb_body
+ let _, _, dfg =
+ List.fold_left accumulate_RAW_deps
+ (0, PTree.empty, { nodes = bb.bb_body; edges = [] })
+ bb.bb_body
in
+ let _, dfg' = List.fold_left accumulate_WAW_deps (0, dfg) bb.bb_body in
+ let _, dfg'' = List.fold_left accumulate_WAR_deps (0, dfg') bb.bb_body in
match bb.bb_exit with
| None -> assert false
- | Some e -> (List.length bb.bb_body, edges, successors_instr e)
+ | Some e -> (List.length bb.bb_body, dfg'', successors_instr e)
let gen_bb_name s i = sprintf "bb%d%s" (P.to_int i) s
@@ -109,14 +205,14 @@ let rec gather_cfg_constraints (completed, (bvars, constraints, types)) c curr =
else
match PTree.get curr c with
| None -> assert false
- | Some (num_iters, edges, next) ->
+ | Some (num_iters, dfg, next) ->
let constraints' =
constraints
^ String.concat "" (List.map (print_bb_order curr) next)
^ String.concat ""
(List.map (print_src_order curr)
(List.init num_iters (fun x -> x)))
- ^ String.concat "" (List.map (print_data_dep_order curr) edges)
+ ^ String.concat "" (List.map (print_data_dep_order curr) dfg.edges)
in
let types' =
types
@@ -144,11 +240,20 @@ let rec intersperse s = function
| [ a ] -> [ a ]
| x :: xs -> x :: s :: intersperse s xs
-let schedule entry (c : code) =
- let c' = PTree.map1 gather_bb_constraints c in
- let _, (vars, constraints, types) =
- gather_cfg_constraints ([], ([], "", "")) c' entry
- in
+let update_schedule v = function Some l -> Some (v :: l) | None -> Some [ v ]
+
+let parse_soln tree s =
+ let r = Str.regexp "v\([0-9]+\)b_\([0-9]+\)[ ]+\([0-9]+\)" in
+ if Str.string_match r s 0 then
+ IMap.update
+ (Str.matched_group 1 s |> int_of_string)
+ (update_schedule
+ ( Str.matched_group 2 s |> int_of_string,
+ Str.matched_group 3 s |> int_of_string ))
+ tree
+ else tree
+
+let solve_constraints vars constraints types =
let oc = open_out "lpsolve.txt" in
fprintf oc "min: ";
List.iter (fprintf oc "%s") (intersperse " + " vars);
@@ -156,6 +261,158 @@ let schedule entry (c : code) =
fprintf oc "%s" constraints;
fprintf oc "%s" types;
close_out oc;
- let s = read_process "lp_solve lpsolve.txt" in
- printf "%s" s;
+ Str.split (Str.regexp_string "\n") (read_process "lp_solve lpsolve.txt")
+ |> drop 3
+ |> List.fold_left parse_soln IMap.empty
+
+let find_min = function
+ | [] -> assert false
+ | l :: ls ->
+ let rec find_min' current = function
+ | [] -> current
+ | l' :: ls' ->
+ if snd l' < current then find_min' (snd l') ls'
+ else find_min' current ls'
+ in
+ find_min' (snd l) ls
+
+let find_max = function
+ | [] -> assert false
+ | l :: ls ->
+ let rec find_max' current = function
+ | [] -> current
+ | l' :: ls' ->
+ if snd l' > current then find_max' (snd l') ls'
+ else find_max' current ls'
+ in
+ find_max' (snd l) ls
+
+type registers = {
+ reg_finish : reg;
+ reg_return : reg;
+ reg_stk : reg;
+}
+
+let ( >>= ) = bind
+
+let translate_control_flow r curr_st instr =
+ match instr with
+ | RBgoto n ->
+ fun state ->
+ OK
+ ( (),
+ {
+ state with
+ st_controllogic =
+ PTree.set curr_st (state_goto state.st_st n) state.st_controllogic;
+ } )
+ | RBcond (c, rs, n1, n2) ->
+ translate_condition c rs >>= fun ce state ->
+ OK
+ ( (),
+ {
+ state with
+ st_controllogic =
+ PTree.set curr_st
+ (state_cond state.st_st ce n1 n2)
+ state.st_controllogic;
+ } )
+ | RBreturn ret -> (
+ match ret with
+ | None ->
+ let fin =
+ Vseq
+ ( HTLgen.block r.reg_finish
+ (Vlit (ValueInt.coq_ZToValue (Z.of_uint 1))),
+ HTLgen.block r.reg_return
+ (Vlit (ValueInt.coq_ZToValue (Z.of_uint 0))) )
+ in
+ fun state ->
+ OK
+ ( (),
+ {
+ state with
+ st_datapath = PTree.set curr_st fin state.st_datapath;
+ } )
+ | Some ret' ->
+ let fin =
+ Vseq
+ ( HTLgen.block r.reg_finish
+ (Vlit (ValueInt.coq_ZToValue (Z.of_uint 1))),
+ HTLgen.block r.reg_return (Vvar ret') )
+ in
+ fun state ->
+ OK
+ ( (),
+ {
+ state with
+ st_datapath = PTree.set curr_st fin state.st_datapath;
+ } ) )
+ | _ -> error (Errors.msg (coqstring_of_camlstring "Control flow instructions not implemented."))
+
+let translate_instr r curr_st instr =
+ let prev_instr state =
+ match PTree.get curr_st state.st_datapath with
+ | None -> Vskip
+ | Some v -> v
+ in
+ match instr with
+ | RBnop ->
+ fun state ->
+ OK ((), { state with st_datapath = PTree.set curr_st (prev_instr state) state.st_datapath })
+ | RBop (op, rs, dst) ->
+ translate_instr op rs >>=
+ (fun instr ->
+ declare_reg None dst (Nat.of_int 32) >>=
+ (fun _ state ->
+ let stmnt = Vseq (prev_instr state, nonblock dst instr) in
+ OK ((), { state with st_datapath = PTree.set curr_st stmnt state.st_datapath })))
+ | RBload (mem, addr, rs, dst) ->
+ translate_arr_access mem addr rs r.reg_stk >>=
+ (fun src ->
+ declare_reg None dst (Nat.of_int 32) >>=
+ (fun _ state ->
+ let stmnt = Vseq (prev_instr state, nonblock dst src) in
+ OK ((), { state with st_datapath = PTree.set curr_st stmnt state.st_datapath })))
+ | RBstore (mem, addr, rs, src) ->
+ translate_arr_access mem addr rs r.reg_stk >>=
+ (fun dst state ->
+ OK ((), { state with st_datapath = PTree.set curr_st (Vnonblock (dst, (Vvar src))) state.st_datapath }))
+
+let combine_bb_schedule schedule s =
+ let (i, st) = s in
+ IMap.update st (update_schedule i) schedule
+
+let add_schedules bb_body min_sched curr i state schedule =
+ let (data, control) = state in
+ let curr_state = curr - i + min_sched in
+ List.fold_left (fun state' i -> )
+
+(** Should generate the [HTL] code based on the input [RTLBlock] description. *)
+let transf_htl r c schedule =
+ let f state i bb =
+ match bb with
+ | {bb_body = []; bb_exit = Some c} ->
+ translate_control_flow r i state c
+ | {bb_body = bb_body'; bb_exit = Some c} -> begin
+ match IMap.find (P.to_int i) schedule with
+ | None -> assert false
+ | Some i_sched ->
+ (* This means that we have a schedule for the current basic block, and the schedule is a
+ mapping from instruction to state. If it is empty, that means we only have a control flow
+ instruction which we have to schedule. *)
+ let min_state = find_min i_sched in
+ let i_sched_tree = List.fold_left combine_bb_schedule IMap.empty i_sched in
+ IMap.fold ()
+ end
+ in
+ PTree.fold
+
+let schedule entry (c : code) =
+ let c' = PTree.map1 gather_bb_constraints c in
+ let _, (vars, constraints, types) =
+ gather_cfg_constraints ([], ([], "", "")) c' entry
+ in
+ let schedule = solve_constraints vars constraints types in
+ IMap.iter (fun d -> printf "%d: %a\n" d (print_list print_tuple)) schedule;
c