1 files changed, 2295 insertions, 0 deletions
diff --git a/docs/scheduler.org b/docs/scheduler.org
new file mode 100644
index 0000000..018633c
--- /dev/null
+++ b/docs/scheduler.org
@@ -0,0 +1,2295 @@
+#+title: Basic Block Generation
+#+author: Yann Herklotz
+#+email: yann [at] yannherklotz [dot] com
+
+* Scheduler
+:PROPERTIES:
+:header-args:ocaml: :comments noweb :noweb no-export :padline yes :tangle ../src/hls/Schedule.ml
+:END:
+
+#+begin_src ocaml :comments no :padline no :exports none
+<<license>>
+#+end_src
+
+#+name: scheduler-main
+#+begin_src ocaml
+open Printf
+open Clflags
+open Camlcoq
+open Datatypes
+open Coqlib
+open Maps
+open AST
+open Kildall
+open Op
+open RTLBlockInstr
+open Predicate
+open RTLBlock
+open HTL
+open Verilog
+open HTLgen
+open HTLMonad
+open HTLMonadExtra
+
+module SS = Set.Make(P)
+
+type svtype =
+  | BBType of int
+  | VarType of int * int
+
+type sv = {
+  sv_type: svtype;
+  sv_num: int;
+}
+
+let print_sv v =
+  match v with
+  | { sv_type = BBType bbi; sv_num = n } -> sprintf "bb%d_%d" bbi n
+  | { sv_type = VarType (bbi, i); sv_num = n } -> sprintf "var%dn%d_%d" bbi i n
+
+module G = Graph.Persistent.Digraph.ConcreteLabeled(struct
+  type t = sv
+  let compare = compare
+  let equal = (=)
+  let hash = Hashtbl.hash
+end)(struct
+  type t = int
+  let compare = compare
+  let hash = Hashtbl.hash
+  let equal = (=)
+  let default = 0
+end)
+
+module GDot = Graph.Graphviz.Dot(struct
+    let graph_attributes _ = []
+    let default_vertex_attributes _ = []
+    let vertex_name = print_sv
+    let vertex_attributes _ = []
+    let get_subgraph _ = None
+    let default_edge_attributes _ = []
+    let edge_attributes _ = []
+
+    include G
+  end)
+
+module DFG = Graph.Persistent.Digraph.ConcreteLabeled(struct
+  type t = int * instr
+  let compare = compare
+  let equal = (=)
+  let hash = Hashtbl.hash
+end)(struct
+  type t = int
+  let compare = compare
+  let hash = Hashtbl.hash
+  let equal = (=)
+  let default = 0
+end)
+
+module DFGSimp = Graph.Persistent.Graph.Concrete(struct
+    type t = int * instr
+    let compare = compare
+    let equal = (=)
+    let hash = Hashtbl.hash
+  end)
+
+let convert dfg =
+  DFG.fold_vertex (fun v g -> DFGSimp.add_vertex g v) dfg DFGSimp.empty
+  |> DFG.fold_edges (fun v1 v2 g -> DFGSimp.add_edge (DFGSimp.add_edge g v1 v2) v2 v1) dfg
+
+let reg r = sprintf "r%d" (P.to_int r)
+let print_pred r = sprintf "p%d" (P.to_int r)
+
+let print_instr = function
+  | RBnop -> ""
+  | RBload (_, _, _, _, r) -> sprintf "load(%s)" (reg r)
+  | RBstore (_, _, _, _, r) -> sprintf "store(%s)" (reg r)
+  | RBsetpred (_, _, _, p) -> sprintf "setpred(%s)" (print_pred p)
+  | RBop (_, op, args, d) ->
+    (match op, args with
+    | Omove, _ -> "mov"
+    | Ointconst n, _ -> sprintf "%s=%ld" (reg d) (camlint_of_coqint n)
+    | Olongconst n, _ -> sprintf "%s=%LdL" (reg d) (camlint64_of_coqint n)
+    | Ofloatconst n, _ -> sprintf "%s=%.15F" (reg d) (camlfloat_of_coqfloat n)
+    | Osingleconst n, _ -> sprintf "%s=%.15Ff" (reg d) (camlfloat_of_coqfloat32 n)
+    | Oindirectsymbol id, _ -> sprintf "%s=&%s" (reg d) (extern_atom id)
+    | Ocast8signed, [r1] -> sprintf "%s=int8signed(%s)" (reg d) (reg r1)
+    | Ocast8unsigned, [r1] -> sprintf "%s=int8unsigned(%s)" (reg d) (reg r1)
+    | Ocast16signed, [r1] -> sprintf "%s=int16signed(%s)" (reg d) (reg r1)
+    | Ocast16unsigned, [r1] -> sprintf "%s=int16unsigned(%s)" (reg d) (reg r1)
+    | Oneg, [r1] -> sprintf "%s=-%s" (reg d) (reg r1)
+    | Osub, [r1;r2] -> sprintf "%s=%s-%s" (reg d) (reg r1) (reg r2)
+    | Omul, [r1;r2] -> sprintf "%s=%s*%s" (reg d) (reg r1) (reg r2)
+    | Omulimm n, [r1] -> sprintf "%s=%s*%ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Omulhs, [r1;r2] -> sprintf "%s=mulhs(%s,%s)" (reg d) (reg r1) (reg r2)
+    | Omulhu, [r1;r2] -> sprintf "%s=mulhu(%s,%s)" (reg d) (reg r1) (reg r2)
+    | Odiv, [r1;r2] -> sprintf "%s=%s /s %s" (reg d) (reg r1) (reg r2)
+    | Odivu, [r1;r2] -> sprintf "%s=%s /u %s" (reg d) (reg r1) (reg r2)
+    | Omod, [r1;r2] -> sprintf "%s=%s %%s %s" (reg d) (reg r1) (reg r2)
+    | Omodu, [r1;r2] -> sprintf "%s=%s %%u %s" (reg d) (reg r1) (reg r2)
+    | Oand, [r1;r2] -> sprintf "%s=%s & %s" (reg d) (reg r1) (reg r2)
+    | Oandimm n, [r1] -> sprintf "%s=%s & %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oor, [r1;r2] -> sprintf "%s=%s | %s" (reg d) (reg r1) (reg r2)
+    | Oorimm n, [r1] ->  sprintf "%s=%s | %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oxor, [r1;r2] -> sprintf "%s=%s ^ %s" (reg d) (reg r1) (reg r2)
+    | Oxorimm n, [r1] -> sprintf "%s=%s ^ %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Onot, [r1] -> sprintf "%s=not(%s)" (reg d) (reg r1)
+    | Oshl, [r1;r2] -> sprintf "%s=%s << %s" (reg d) (reg r1) (reg r2)
+    | Oshlimm n, [r1] -> sprintf "%s=%s << %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshr, [r1;r2] -> sprintf "%s=%s >>s %s" (reg d) (reg r1) (reg r2)
+    | Oshrimm n, [r1] -> sprintf "%s=%s >>s %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshrximm n, [r1] -> sprintf "%s=%s >>x %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshru, [r1;r2] -> sprintf "%s=%s >>u %s" (reg d) (reg r1) (reg r2)
+    | Oshruimm n, [r1] -> sprintf "%s=%s >>u %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Ororimm n, [r1] -> sprintf "%s=%s ror %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshldimm n, [r1;r2] -> sprintf "%s=(%s, %s) << %ld" (reg d) (reg r1) (reg r2) (camlint_of_coqint n)
+    | Olea addr, args -> sprintf "%s=addr" (reg d)
+    | Omakelong, [r1;r2] -> sprintf "%s=makelong(%s,%s)" (reg d) (reg r1) (reg r2)
+    | Olowlong, [r1] -> sprintf "%s=lowlong(%s)" (reg d) (reg r1)
+    | Ohighlong, [r1] -> sprintf "%s=highlong(%s)" (reg d) (reg r1)
+    | Ocast32signed, [r1] -> sprintf "%s=long32signed(%s)" (reg d) (reg r1)
+    | Ocast32unsigned, [r1] -> sprintf "%s=long32unsigned(%s)" (reg d) (reg r1)
+    | Onegl, [r1] -> sprintf "%s=-l %s" (reg d) (reg r1)
+    | Osubl, [r1;r2] -> sprintf "%s=%s -l %s" (reg d) (reg r1) (reg r2)
+    | Omull, [r1;r2] -> sprintf "%s=%s *l %s" (reg d) (reg r1) (reg r2)
+    | Omullimm n, [r1] -> sprintf "%s=%s *l %Ld" (reg d) (reg r1) (camlint64_of_coqint n)
+    | Omullhs, [r1;r2] -> sprintf "%s=mullhs(%s,%s)" (reg d) (reg r1) (reg r2)
+    | Omullhu, [r1;r2] -> sprintf "%s=mullhu(%s,%s)" (reg d) (reg r1) (reg r2)
+    | Odivl, [r1;r2] -> sprintf "%s=%s /ls %s" (reg d) (reg r1) (reg r2)
+    | Odivlu, [r1;r2] -> sprintf "%s=%s /lu %s" (reg d) (reg r1) (reg r2)
+    | Omodl, [r1;r2] -> sprintf "%s=%s %%ls %s" (reg d) (reg r1) (reg r2)
+    | Omodlu, [r1;r2] -> sprintf "%s=%s %%lu %s" (reg d) (reg r1) (reg r2)
+    | Oandl, [r1;r2] -> sprintf "%s=%s &l %s" (reg d) (reg r1) (reg r2)
+    | Oandlimm n, [r1] -> sprintf "%s=%s &l %Ld" (reg d) (reg r1) (camlint64_of_coqint n)
+    | Oorl, [r1;r2] -> sprintf "%s=%s |l %s" (reg d) (reg r1) (reg r2)
+    | Oorlimm n, [r1] ->  sprintf "%s=%s |l %Ld" (reg d) (reg r1) (camlint64_of_coqint n)
+    | Oxorl, [r1;r2] -> sprintf "%s=%s ^l %s" (reg d) (reg r1) (reg r2)
+    | Oxorlimm n, [r1] -> sprintf "%s=%s ^l %Ld" (reg d) (reg r1) (camlint64_of_coqint n)
+    | Onotl, [r1] -> sprintf "%s=notl(%s)" (reg d) (reg r1)
+    | Oshll, [r1;r2] -> sprintf "%s=%s <<l %s" (reg d) (reg r1) (reg r2)
+    | Oshllimm n, [r1] -> sprintf "%s=%s <<l %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshrl, [r1;r2] -> sprintf "%s=%s >>ls %s" (reg d) (reg r1) (reg r2)
+    | Oshrlimm n, [r1] -> sprintf "%s=%s >>ls %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshrxlimm n, [r1] -> sprintf "%s=%s >>lx %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oshrlu, [r1;r2] -> sprintf "%s=%s >>lu %s" (reg d) (reg r1) (reg r2)
+    | Oshrluimm n, [r1] -> sprintf "%s=%s >>lu %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Ororlimm n, [r1] -> sprintf "%s=%s rorl %ld" (reg d) (reg r1) (camlint_of_coqint n)
+    | Oleal addr, args -> sprintf "%s=addr" (reg d)
+    | Onegf, [r1] -> sprintf "%s=negf(%s)" (reg d) (reg r1)
+    | Oabsf, [r1] -> sprintf "%s=absf(%s)" (reg d) (reg r1)
+    | Oaddf, [r1;r2] -> sprintf "%s=%s +f %s" (reg d) (reg r1) (reg r2)
+    | Osubf, [r1;r2] -> sprintf "%s=%s -f %s" (reg d) (reg r1) (reg r2)
+    | Omulf, [r1;r2] -> sprintf "%s=%s *f %s" (reg d) (reg r1) (reg r2)
+    | Odivf, [r1;r2] -> sprintf "%s=%s /f %s" (reg d) (reg r1) (reg r2)
+    | Onegfs, [r1] -> sprintf "%s=negfs(%s)" (reg d) (reg r1)
+    | Oabsfs, [r1] -> sprintf "%s=absfs(%s)" (reg d) (reg r1)
+    | Oaddfs, [r1;r2] -> sprintf "%s=%s +fs %s" (reg d) (reg r1) (reg r2)
+    | Osubfs, [r1;r2] -> sprintf "%s=%s -fs %s" (reg d) (reg r1) (reg r2)
+    | Omulfs, [r1;r2] -> sprintf "%s=%s *fs %s" (reg d) (reg r1) (reg r2)
+    | Odivfs, [r1;r2] -> sprintf "%s=%s /fs %s" (reg d) (reg r1) (reg r2)
+    | Osingleoffloat, [r1] -> sprintf "%s=singleoffloat(%s)" (reg d) (reg r1)
+    | Ofloatofsingle, [r1] -> sprintf "%s=floatofsingle(%s)" (reg d) (reg r1)
+    | Ointoffloat, [r1] -> sprintf "%s=intoffloat(%s)" (reg d) (reg r1)
+    | Ofloatofint, [r1] -> sprintf "%s=floatofint(%s)" (reg d) (reg r1)
+    | Ointofsingle, [r1] -> sprintf "%s=intofsingle(%s)" (reg d) (reg r1)
+    | Osingleofint, [r1] -> sprintf "%s=singleofint(%s)" (reg d) (reg r1)
+    | Olongoffloat, [r1] -> sprintf "%s=longoffloat(%s)" (reg d) (reg r1)
+    | Ofloatoflong, [r1] -> sprintf "%s=floatoflong(%s)" (reg d) (reg r1)
+    | Olongofsingle, [r1] -> sprintf "%s=longofsingle(%s)" (reg d) (reg r1)
+    | Osingleoflong, [r1] -> sprintf "%s=singleoflong(%s)" (reg d) (reg r1)
+    | Ocmp c, args -> sprintf "%s=cmp" (reg d)
+    | Osel (c, ty), r1::r2::args -> sprintf "%s=sel" (reg d)
+    | _, _ -> sprintf "N/a")
+
+module DFGDot = Graph.Graphviz.Dot(struct
+    let graph_attributes _ = []
+    let default_vertex_attributes _ = []
+    let vertex_name = function (i, instr) -> sprintf "\"%d:%s\"" i (print_instr instr)
+    let vertex_attributes _ = []
+    let get_subgraph _ = None
+    let default_edge_attributes _ = []
+    let edge_attributes _ = []
+
+    include DFG
+  end)
+
+module DFGDfs = Graph.Traverse.Dfs(DFG)
+
+module IMap = Map.Make (struct
+  type t = int
+
+  let compare = compare
+end)
+
+let gen_vertex instrs i = (i, List.nth instrs i)
+
+(** The DFG type defines a list of instructions with their data dependencies as [edges], which are
+   the pairs of integers that represent the index of the instruction in the [nodes].  The edges
+   always point from left to right. *)
+
+let print_list f out_chan a =
+  fprintf out_chan "[ ";
+  List.iter (fprintf out_chan "%a " f) a;
+  fprintf out_chan "]"
+
+let print_tuple out_chan a =
+  let l, r = a in
+  fprintf out_chan "(%d,%d)" l r
+
+(*let print_dfg out_chan dfg =
+  fprintf out_chan "{ nodes = %a, edges = %a }"
+    (print_list PrintRTLBlockInstr.print_bblock_body)
+    dfg.nodes (print_list print_tuple) dfg.edges*)
+
+let print_dfg = DFGDot.output_graph
+
+let read_process command =
+  let buffer_size = 2048 in
+  let buffer = Buffer.create buffer_size in
+  let string = Bytes.create buffer_size in
+  let in_channel = Unix.open_process_in command in
+  let chars_read = ref 1 in
+  while !chars_read <> 0 do
+    chars_read := input in_channel string 0 buffer_size;
+    Buffer.add_substring buffer (Bytes.to_string string) 0 !chars_read
+  done;
+  ignore (Unix.close_process_in in_channel);
+  Buffer.contents buffer
+
+let comb_delay = function
+  | RBnop -> 0
+  | RBop (_, op, _, _) ->
+    (match op with
+     | Omove -> 0
+     | Ointconst _ -> 0
+     | Olongconst _ -> 0
+     | Ocast8signed -> 0
+     | Ocast8unsigned -> 0
+     | Ocast16signed -> 0
+     | Ocast16unsigned -> 0
+     | Oneg -> 0
+     | Onot -> 0
+     | Oor -> 0
+     | Oorimm _ -> 0
+     | Oand -> 0
+     | Oandimm _ -> 0
+     | Oxor -> 0
+     | Oxorimm _ -> 0
+     | Omul -> 8
+     | Omulimm _ -> 8
+     | Omulhs -> 8
+     | Omulhu -> 8
+     | Odiv -> 72
+     | Odivu -> 72
+     | Omod -> 72
+     | Omodu -> 72
+     | _ -> 1)
+  | _ -> 1
+
+let pipeline_stages = function
+  | RBload _ -> 2
+  | RBop (_, op, _, _) ->
+    (match op with
+     | Odiv -> 32
+     | Odivu -> 32
+     | Omod -> 32
+     | Omodu -> 32
+     | _ -> 0)
+  | _ -> 0
+
+(** Add a dependency if it uses a register that was written to previously. *)
+let add_dep map i tree dfg curr =
+  match PTree.get curr tree with
+  | None -> dfg
+  | Some ip ->
+    let ipv = (List.nth map ip) in
+    DFG.add_edge_e dfg (ipv, comb_delay (snd (List.nth map i)), List.nth map i)
+
+(** This function calculates the dependencies of each instruction.  The nodes correspond to previous
+   registers that were allocated and show which instruction caused it.
+
+   This function only gathers the RAW constraints, and will therefore only be active for operations
+   that modify registers, which is this case only affects loads and operations. *)
+let accumulate_RAW_deps map dfg curr =
+  let i, dst_map, graph = dfg in
+  let acc_dep_instruction rs dst =
+    ( i + 1,
+      PTree.set dst i dst_map,
+      List.fold_left (add_dep map i dst_map) graph rs
+    )
+  in
+  let acc_dep_instruction_nodst rs =
+    ( i + 1,
+      dst_map,
+    List.fold_left (add_dep map i dst_map) graph rs)
+  in
+  match curr with
+  | RBop (op, _, rs, dst) -> acc_dep_instruction rs dst
+  | RBload (op, _mem, _addr, rs, dst) -> acc_dep_instruction rs dst
+  | RBsetpred (_op, _mem, rs, _p) -> acc_dep_instruction_nodst rs
+  | RBstore (op, _mem, _addr, rs, src) -> acc_dep_instruction_nodst (src :: rs)
+  | _ -> (i + 1, dst_map, graph)
+
+(** Finds the next write to the [dst] register.  This is a small optimisation so that only one
+   dependency is generated for a data dependency. *)
+let rec find_next_dst_write i dst i' curr =
+  let check_dst dst' curr' =
+    if dst = dst' then Some (i, i')
+    else find_next_dst_write i dst (i' + 1) curr'
+  in
+  match curr with
+  | [] -> None
+  | RBop (_, _, _, dst') :: curr' -> check_dst dst' curr'
+  | RBload (_, _, _, _, dst') :: curr' -> check_dst dst' curr'
+  | _ :: curr' -> find_next_dst_write i dst (i' + 1) curr'
+
+let rec find_all_next_dst_read i dst i' curr =
+  let check_dst rs curr' =
+    if List.exists (fun x -> x = dst) rs
+    then (i, i') :: find_all_next_dst_read i dst (i' + 1) curr'
+    else find_all_next_dst_read i dst (i' + 1) curr'
+  in
+  match curr with
+  | [] -> []
+  | RBop (_, _, rs, _) :: curr' -> check_dst rs curr'
+  | RBload (_, _, _, rs, _) :: curr' -> check_dst rs curr'
+  | RBstore (_, _, _, rs, src) :: curr' -> check_dst (src :: rs) curr'
+  | RBnop :: curr' -> find_all_next_dst_read i dst (i' + 1) curr'
+  | RBsetpred (_, _, rs, _) :: curr' -> check_dst rs curr'
+
+let drop i lst =
+  let rec drop' i' lst' =
+    match lst' with
+    | _ :: ls -> if i' = i then ls else drop' (i' + 1) ls
+    | [] -> []
+  in
+  if i = 0 then lst else drop' 1 lst
+
+let take i lst =
+  let rec take' i' lst' =
+    match lst' with
+    | l :: ls -> if i' = i then [ l ] else l :: take' (i' + 1) ls
+    | [] -> []
+  in
+  if i = 0 then [] else take' 1 lst
+
+let rec next_store i = function
+  | [] -> None
+  | RBstore (_, _, _, _, _) :: _ -> Some i
+  | _ :: rst -> next_store (i + 1) rst
+
+let rec next_load i = function
+  | [] -> None
+  | RBload (_, _, _, _, _) :: _ -> Some i
+  | _ :: rst -> next_load (i + 1) rst
+
+let accumulate_RAW_mem_deps instrs dfg curri =
+  let i, curr = curri in
+  match curr with
+  | RBload (_, _, _, _, _) -> (
+      match next_store 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i) )
+  | _ -> dfg
+
+let accumulate_WAR_mem_deps instrs dfg curri =
+  let i, curr = curri in
+  match curr with
+  | RBstore (_, _, _, _, _) -> (
+      match next_load 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i) )
+  | _ -> dfg
+
+let accumulate_WAW_mem_deps instrs dfg curri =
+  let i, curr = curri in
+  match curr with
+  | RBstore (_, _, _, _, _) -> (
+      match next_store 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i))
+  | _ -> dfg
+
+(** Predicate dependencies. *)
+
+let rec in_predicate p p' =
+  match p' with
+  | Plit p'' -> P.to_int p = P.to_int (snd p'')
+  | Pand (p1, p2) -> in_predicate p p1 || in_predicate p p2
+  | Por (p1, p2) -> in_predicate p p1 || in_predicate p p2
+  | Ptrue -> false
+  | Pfalse -> false
+
+let get_predicate = function
+  | RBop (p, _, _, _) -> p
+  | RBload (p, _, _, _, _) -> p
+  | RBstore (p, _, _, _, _) -> p
+  | RBsetpred (p, _, _, _) -> p
+  | _ -> None
+
+let rec next_setpred p i = function
+  | [] -> None
+  | RBsetpred (_, _, _, p') :: rst ->
+    if in_predicate p' p then
+      Some i
+    else
+      next_setpred p (i + 1) rst
+  | _ :: rst -> next_setpred p (i + 1) rst
+
+let rec next_preduse p i instr=
+  let next p' rst =
+    if in_predicate p p' then
+      Some i
+    else
+      next_preduse p (i + 1) rst
+  in
+  match instr with
+  | [] -> None
+  | RBload (Some p', _, _, _, _) :: rst -> next p' rst
+  | RBstore (Some p', _, _, _, _) :: rst -> next p' rst
+  | RBop (Some p', _, _, _) :: rst -> next p' rst
+  | RBsetpred (Some p', _, _, _) :: rst -> next p' rst
+  | _ :: rst -> next_load (i + 1) rst
+
+let accumulate_RAW_pred_deps instrs dfg curri =
+  let i, curr = curri in
+  match get_predicate curr with
+  | Some p -> (
+      match next_setpred p 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i) )
+  | _ -> dfg
+
+let accumulate_WAR_pred_deps instrs dfg curri =
+  let i, curr = curri in
+  match curr with
+  | RBsetpred (_, _, _, p) -> (
+      match next_preduse p 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i) )
+  | _ -> dfg
+
+let accumulate_WAW_pred_deps instrs dfg curri =
+  let i, curr = curri in
+  match curr with
+  | RBsetpred (_, _, _, p) -> (
+      match next_setpred (Plit (true, p)) 0 (take i instrs |> List.rev) with
+      | None -> dfg
+      | Some d -> DFG.add_edge dfg (gen_vertex instrs (i - d - 1)) (gen_vertex instrs i) )
+  | _ -> dfg
+
+(** This function calculates the WAW dependencies, which happen when two writes are ordered one
+   after another and therefore have to be kept in that order.  This accumulation might be redundant
+   if register renaming is done before hand, because then these dependencies can be avoided. *)
+let accumulate_WAW_deps instrs dfg curri =
+  let i, curr = curri in
+  let dst_dep dst =
+    match find_next_dst_write i dst (i + 1) (drop (i + 1) instrs) with
+    | Some (a, b) -> DFG.add_edge dfg (gen_vertex instrs a) (gen_vertex instrs b)
+    | _ -> dfg
+  in
+  match curr with
+  | RBop (_, _, _, dst) -> dst_dep dst
+  | RBload (_, _, _, _, dst) -> dst_dep dst
+  | RBstore (_, _, _, _, _) -> (
+      match next_store (i + 1) (drop (i + 1) instrs) with
+      | None -> dfg
+      | Some i' -> DFG.add_edge dfg (gen_vertex instrs i) (gen_vertex instrs i') )
+  | _ -> dfg
+
+let accumulate_WAR_deps instrs dfg curri =
+  let i, curr = curri in
+  let dst_dep dst =
+    let dep_list = find_all_next_dst_read i dst 0 (take i instrs |> List.rev)
+        |> List.map (function (d, d') -> (i - d' - 1, d))
+    in
+    List.fold_left (fun g ->
+        function (d, d') -> DFG.add_edge g (gen_vertex instrs d) (gen_vertex instrs d')) dfg dep_list
+  in
+  match curr with
+  | RBop (_, _, _, dst) -> dst_dep dst
+  | RBload (_, _, _, _, dst) -> dst_dep dst
+  | _ -> dfg
+
+let assigned_vars vars = function
+  | RBnop -> vars
+  | RBop (_, _, _, dst) -> dst :: vars
+  | RBload (_, _, _, _, dst) -> dst :: vars
+  | RBstore (_, _, _, _, _) -> vars
+  | RBsetpred (_, _, _, _) -> vars
+
+let get_pred = function
+  | RBnop -> None
+  | RBop (op, _, _, _) -> op
+  | RBload (op, _, _, _, _) -> op
+  | RBstore (op, _, _, _, _) -> op
+  | RBsetpred (_, _, _, _) -> None
+
+let independant_pred p p' =
+  match sat_pred_simple (Pand (p, p')) with
+  | None -> true
+  | _ -> false
+
+let check_dependent op1 op2 =
+  match op1, op2 with
+  | Some p, Some p' -> not (independant_pred p p')
+  | _, _ -> true
+
+let remove_unnecessary_deps graph =
+  let is_dependent v1 v2 g =
+    let (_, instr1) = v1 in
+    let (_, instr2) = v2 in
+    if check_dependent (get_pred instr1) (get_pred instr2)
+    then g
+    else DFG.remove_edge g v1 v2
+  in
+  DFG.fold_edges is_dependent graph graph
+
+(** All the nodes in the DFG have to come after the source of the basic block, and should terminate
+   before the sink of the basic block.  After that, there should be constraints for data
+   dependencies between nodes. *)
+let gather_bb_constraints debug bb =
+  let ibody = List.mapi (fun i a -> (i, a)) bb.bb_body in
+  let dfg = List.fold_left (fun dfg v -> DFG.add_vertex dfg v) DFG.empty ibody in
+  let _, _, dfg' =
+    List.fold_left (accumulate_RAW_deps ibody)
+      (0, PTree.empty, dfg)
+      bb.bb_body
+  in
+  let dfg'' = List.fold_left (fun dfg f -> List.fold_left (f bb.bb_body) dfg ibody) dfg'
+      [ accumulate_WAW_deps;
+        accumulate_WAR_deps;
+        accumulate_RAW_mem_deps;
+        accumulate_WAR_mem_deps;
+        accumulate_WAW_mem_deps;
+        accumulate_RAW_pred_deps;
+        accumulate_WAR_pred_deps;
+        accumulate_WAW_pred_deps
+      ]
+  in
+  let dfg''' = remove_unnecessary_deps dfg'' in
+  (List.length bb.bb_body, dfg''', successors_instr bb.bb_exit)
+
+let encode_var bbn n i = { sv_type = VarType (bbn, n); sv_num = i }
+let encode_bb bbn i = { sv_type = BBType bbn; sv_num = i }
+
+let add_initial_sv n dfg constr =
+  let add_initial_sv' (i, instr) g =
+    let pipes = pipeline_stages instr in
+    if pipes > 0 then
+      List.init pipes (fun i' -> i')
+      |> List.fold_left (fun g i' ->
+          G.add_edge_e g (encode_var n i i', -1, encode_var n i (i'+1))
+        ) g
+    else g
+  in
+  DFG.fold_vertex add_initial_sv' dfg constr
+
+let add_super_nodes n dfg =
+  DFG.fold_vertex (function v1 -> fun g ->
+      (if DFG.in_degree dfg v1 = 0
+       then G.add_edge_e g (encode_bb n 0, 0, encode_var n (fst v1) 0)
+       else g) |>
+      (fun g' ->
+         if DFG.out_degree dfg v1 = 0
+         then G.add_edge_e g' (encode_var n (fst v1) (pipeline_stages (snd v1)),
+                               0, encode_bb n 1)
+         else g')) dfg
+
+let add_data_deps n =
+  DFG.fold_edges_e (function ((i1, instr1), _, (i2, _)) -> fun g ->
+      let end_sv = pipeline_stages instr1 in
+      G.add_edge_e g (encode_var n i1 end_sv, 0, encode_var n i2 0)
+    )
+
+let add_ctrl_deps n succs constr =
+  List.fold_left (fun g n' ->
+      G.add_edge_e g (encode_bb n 1, -1, encode_bb n' 0)
+    ) constr succs
+
+module BFDFG = Graph.Path.BellmanFord(DFG)(struct
+    include DFG
+    type t = int
+    let weight = DFG.E.label
+    let compare = compare
+    let add = (+)
+    let zero = 0
+  end)
+
+module TopoDFG = Graph.Topological.Make(DFG)
+
+let negate_graph constr =
+  DFG.fold_edges_e (function (v1, e, v2) -> fun g ->
+      DFG.add_edge_e g (v1, -e, v2)
+    ) constr DFG.empty
+
+let add_cycle_constr maxf n dfg constr =
+  let negated_dfg = negate_graph dfg in
+  let max_initial_del = DFG.fold_vertex (fun v1 g ->
+      if DFG.in_degree dfg v1 = 0
+      then max g (comb_delay (snd v1))
+      else g) dfg 0 in
+  let longest_path v = BFDFG.all_shortest_paths negated_dfg v
+                       |> BFDFG.H.to_seq |> List.of_seq
+                       |> List.map (function (x, y) -> (x, y - max_initial_del)) in
+  let constrained_paths = List.filter (function (_, m) -> - m > maxf) in
+  List.fold_left (fun g -> function (v, v', w) ->
+      G.add_edge_e g (encode_var n (fst v) 0,
+                      - (int_of_float (Float.ceil (Float.div (float_of_int w) (float_of_int maxf))) - 1),
+                      encode_var n (fst v') 0)
+    ) constr (DFG.fold_vertex (fun v l ->
+      List.append l (longest_path v (*|> (function l -> List.iter (function (a, x) ->
+          printf "c: %d %d\n" (fst a) x) l; l)*) |> constrained_paths (* |> (function l -> List.iter (function (a, x) ->
+          printf "%d %d\n" (fst a) x) l; l)*)
+                     |> List.map (function (v', w) -> (v, v', - w)))
+    ) dfg [])
+
+type resource =
+  | Mem
+  | SDiv
+  | UDiv
+
+type resources = {
+  res_mem: DFG.V.t list;
+  res_udiv: DFG.V.t list;
+  res_sdiv: DFG.V.t list;
+}
+
+let find_resource = function
+  | RBload _ -> Some Mem
+  | RBstore _ -> Some Mem
+  | RBop (_, op, _, _) ->
+    ( match op with
+      | Odiv -> Some SDiv
+      | Odivu -> Some UDiv
+      | Omod -> Some SDiv
+      | Omodu -> Some UDiv
+      | _ -> None )
+  | _ -> None
+
+let add_resource_constr n dfg constr =
+  let res = TopoDFG.fold (function (i, instr) ->
+    function {res_mem = ml; res_sdiv = sdl; res_udiv = udl} as r ->
+    match find_resource instr with
+    | Some SDiv -> {r with res_sdiv = (i, instr) :: sdl}
+    | Some UDiv -> {r with res_udiv = (i, instr) :: udl}
+    | Some Mem -> {r with res_mem = (i, instr) :: ml}
+    | None -> r
+    ) dfg {res_mem = []; res_sdiv = []; res_udiv = []}
+  in
+  let get_constraints l g = List.fold_left (fun gv v' ->
+      match gv with
+      | (g, None) -> (g, Some v')
+      | (g, Some v) ->
+        (G.add_edge_e g (encode_var n (fst v) 0, -1, encode_var n (fst v') 0), Some v')
+    ) (g, None) l |> fst
+  in
+  get_constraints (List.rev res.res_mem) constr
+  |> get_constraints (List.rev res.res_udiv)
+  |> get_constraints (List.rev res.res_sdiv)
+
+let gather_cfg_constraints c constr curr =
+  let (n, dfg) = curr in
+  match PTree.get (P.of_int n) c with
+  | None -> assert false
+  | Some { bb_exit = ctrl; _ } ->
+    add_super_nodes n dfg constr
+    |> add_initial_sv n dfg
+    |> add_data_deps n dfg
+    |> add_ctrl_deps n (successors_instr ctrl
+                        |> List.map P.to_int
+                        |> List.filter (fun n' -> n' < n))
+    |> add_cycle_constr 8 n dfg
+    |> add_resource_constr n dfg
+
+let rec intersperse s = function
+  | [] -> []
+  | [ a ] -> [ a ]
+  | x :: xs -> x :: s :: intersperse s xs
+
+let print_objective constr =
+  let vars = G.fold_vertex (fun v1 l ->
+      match v1 with
+      | { sv_type = VarType _; sv_num = 0 } -> print_sv v1 :: l
+      | _ -> l
+    ) constr []
+  in
+  "min: " ^ String.concat "" (intersperse " + " vars) ^ ";\n"
+
+let print_lp constr =
+  print_objective constr ^
+  (G.fold_edges_e (function (e1, w, e2) -> fun s ->
+       s ^ sprintf "%s - %s <= %d;\n" (print_sv e1) (print_sv e2) w
+     ) constr "" |>
+   G.fold_vertex (fun v1 s ->
+       s ^ sprintf "int %s;\n" (print_sv v1)
+     ) constr)
+
+let update_schedule v = function Some l -> Some (v :: l) | None -> Some [ v ]
+
+let parse_soln (tree, bbtree) s =
+  let r = Str.regexp "var\\([0-9]+\\)n\\([0-9]+\\)_0[ ]+\\([0-9]+\\)" in
+  let bb = Str.regexp "bb\\([0-9]+\\)_\\([0-9]+\\)[ ]+\\([0-9]+\\)" in
+  let upd s = IMap.update
+            (Str.matched_group 1 s |> int_of_string)
+            (update_schedule
+               ( Str.matched_group 2 s |> int_of_string,
+                 Str.matched_group 3 s |> int_of_string ))
+  in
+  if Str.string_match r s 0
+  then (upd s tree, bbtree)
+  else
+    (if Str.string_match bb s 0
+     then (tree, upd s bbtree)
+     else (tree, bbtree))
+
+let solve_constraints constr =
+  let (fn, oc) = Filename.open_temp_file "vericert_" "_lp_solve" in
+  fprintf oc "%s\n" (print_lp constr);
+  close_out oc;
+
+  let res = Str.split (Str.regexp_string "\n") (read_process ("lp_solve " ^ fn))
+            |> drop 3
+            |> List.fold_left parse_soln (IMap.empty, IMap.empty)
+  in
+  (*Sys.remove fn;*) res
+
+let subgraph dfg l =
+  let dfg' = List.fold_left (fun g v -> DFG.add_vertex g v) DFG.empty l in
+  List.fold_left (fun g v ->
+      List.fold_left (fun g' v' ->
+          let edges = DFG.find_all_edges dfg v v' in
+          List.fold_left DFG.add_edge_e g' edges
+        ) g l
+    ) dfg' l
+
+let rec all_successors dfg v =
+  List.concat (List.fold_left (fun l v ->
+      all_successors dfg v :: l
+    ) [] (DFG.succ dfg v))
+
+let order_instr dfg =
+  DFG.fold_vertex (fun v li ->
+      if DFG.in_degree dfg v = 0
+      then (List.map snd (v :: all_successors dfg v)) :: li
+      else li
+    ) dfg []
+
+let combine_bb_schedule schedule s =
+  let i, st = s in
+  IMap.update st (update_schedule i) schedule
+
+(**let add_el dfg i l =
+  List.*)
+
+let check_in el =
+  List.exists (List.exists ((=) el))
+
+let all_dfs dfg =
+  let roots = DFG.fold_vertex (fun v li ->
+      if DFG.in_degree dfg v = 0 then v :: li else li
+    ) dfg [] in
+  let dfg' = DFG.fold_edges (fun v1 v2 g -> DFG.add_edge g v2 v1) dfg dfg in
+  List.fold_left (fun a el ->
+      if check_in el a then a else
+        (DFGDfs.fold_component (fun v l -> v :: l) [] dfg' el) :: a) [] roots
+
+let range s e =
+  List.init (e - s) (fun i -> i)
+  |> List.map (fun x -> x + s)
+
+(** Should generate the [RTLPar] code based on the input [RTLBlock] description. *)
+let transf_rtlpar c c' schedule =
+  let f i bb : RTLPar.bblock =
+    match bb with
+    | { bb_body = []; bb_exit = c } -> { bb_body = []; bb_exit = c }
+    | { bb_body = bb_body'; bb_exit = ctrl_flow } ->
+      let dfg = match PTree.get i c' with None -> assert false | Some x -> x in
+      let bb_st_e =
+        let m = IMap.find (P.to_int i) (snd schedule) in
+        (List.assq 0 m, List.assq 1 m) in
+      let i_sched = IMap.find (P.to_int i) (fst schedule) in
+      let i_sched_tree =
+        List.fold_left combine_bb_schedule IMap.empty i_sched
+      in
+      let body = IMap.to_seq i_sched_tree |> List.of_seq |> List.map snd
+                 |> List.map (List.map (fun x -> (x, List.nth bb_body' x)))
+      in
+      let body2 = List.fold_left (fun x b ->
+          match IMap.find_opt b i_sched_tree with
+          | Some i -> i :: x
+          | None -> [] :: x
+        ) [] (range (fst bb_st_e) (snd bb_st_e + 1))
+        |> List.map (List.map (fun x -> (x, List.nth bb_body' x)))
+        |> List.rev
+      in
+      (*let final_body = List.map (fun x -> subgraph dfg x |> order_instr) body in*)
+      let final_body2 = List.map (fun x -> subgraph dfg x
+                                           |> (fun x ->
+                                               all_dfs x
+                                               |> List.map (subgraph x)
+                                               |> List.map (fun y ->
+                                                   TopoDFG.fold (fun i l -> snd i :: l) y []
+                                                   |> List.rev))) body2
+                                           (*|> (fun x -> TopoDFG.fold (fun i l -> snd i :: l) x [])
+                                           |> List.rev) body2*)
+      in
+      { bb_body = final_body2;
+        bb_exit = ctrl_flow
+      }
+  in
+  PTree.map f c
+
+let schedule entry (c : RTLBlock.bb RTLBlockInstr.code) =
+  let debug = true in
+  let transf_graph (_, dfg, _) = dfg in
+  let c' = PTree.map1 (fun x -> gather_bb_constraints false x |> transf_graph) c in
+  (*let _ = if debug then PTree.map (fun r o -> printf "##### %d #####\n%a\n\n" (P.to_int r) print_dfg o) c' else PTree.empty in*)
+  let cgraph = PTree.elements c'
+               |> List.map (function (x, y) -> (P.to_int x, y))
+               |> List.fold_left (gather_cfg_constraints c) G.empty
+  in
+  let graph = open_out "constr_graph.dot" in
+  fprintf graph "%a\n" GDot.output_graph cgraph;
+  close_out graph;
+  let schedule' = solve_constraints cgraph in
+  (**IMap.iter (fun a b -> printf "##### %d #####\n%a\n\n" a (print_list print_tuple) b) schedule';*)
+  (**printf "Schedule: %a\n" (fun a x -> IMap.iter (fun d -> fprintf a "%d: %a\n" d (print_list print_tuple)) x) schedule';*)
+  transf_rtlpar c c' schedule'
+
+let rec find_reachable_states c e =
+  match PTree.get e c with
+  | Some { bb_exit = ex; _ } ->
+    e :: List.fold_left (fun x a -> List.concat [x; find_reachable_states c a]) []
+      (successors_instr ex |> List.filter (fun x -> P.lt x e))
+  | None -> assert false
+
+let add_to_tree c nt i =
+  match PTree.get i c with
+  | Some p -> PTree.set i p nt
+  | None -> assert false
+
+let schedule_fn (f : RTLBlock.coq_function) : RTLPar.coq_function =
+  let scheduled = schedule f.fn_entrypoint f.fn_code in
+  let reachable = find_reachable_states scheduled f.fn_entrypoint
+                  |> List.to_seq |> SS.of_seq |> SS.to_seq |> List.of_seq in
+  { fn_sig = f.fn_sig;
+    fn_params = f.fn_params;
+    fn_stacksize = f.fn_stacksize;
+    fn_code = scheduled (*List.fold_left (add_to_tree scheduled) PTree.empty reachable*);
+    fn_entrypoint = f.fn_entrypoint
+  }
+#+end_src
+
+* RTLPargen
+:PROPERTIES:
+:header-args:coq: :comments noweb :noweb no-export :padline yes :tangle ../src/hls/RTLPargen.v
+:END:
+
+#+begin_src coq :comments no :padline no :exports none
+<<license>>
+#+end_src
+
+#+name: rtlpargen-main
+#+begin_src coq
+Require Import compcert.backend.Registers.
+Require Import compcert.common.AST.
+Require Import compcert.common.Globalenvs.
+Require Import compcert.common.Memory.
+Require Import compcert.common.Values.
+Require Import compcert.lib.Floats.
+Require Import compcert.lib.Integers.
+Require Import compcert.lib.Maps.
+Require compcert.verilog.Op.
+
+Require Import vericert.common.Vericertlib.
+Require Import vericert.hls.RTLBlock.
+Require Import vericert.hls.RTLPar.
+Require Import vericert.hls.RTLBlockInstr.
+Require Import vericert.hls.Predicate.
+Require Import vericert.hls.Abstr.
+Import NE.NonEmptyNotation.
+
+(** * RTLPar Generation *)
+
+#[local] Open Scope positive.
+#[local] Open Scope forest.
+#[local] Open Scope pred_op.
+
+(** ** Abstract Computations
+
+Define the abstract computation using the [update] function, which will set each register to its
+symbolic value.  First we need to define a few helper functions to correctly translate the
+predicates.  *)
+
+Fixpoint list_translation (l : list reg) (f : forest) {struct l} : list pred_expr :=
+  match l with
+  | nil => nil
+  | i :: l => (f # (Reg i)) :: (list_translation l f)
+  end.
+
+Fixpoint replicate {A} (n: nat) (l: A) :=
+  match n with
+  | O => nil
+  | S n => l :: replicate n l
+  end.
+
+Definition merge''' x y :=
+  match x, y with
+  | Some p1, Some p2 => Some (Pand p1 p2)
+  | Some p, None | None, Some p => Some p
+  | None, None => None
+  end.
+
+Definition merge'' x :=
+  match x with
+  | ((a, e), (b, el)) => (merge''' a b, Econs e el)
+  end.
+
+Definition map_pred_op {A B} (pf: option pred_op * (A -> B)) (pa: option pred_op * A): option pred_op * B :=
+  match pa, pf with
+  | (p, a), (p', f) => (merge''' p p', f a)
+  end.
+
+Definition predicated_prod {A B: Type} (p1: predicated A) (p2: predicated B) :=
+  NE.map (fun x => match x with ((a, b), (c, d)) => (Pand a c, (b, d)) end)
+         (NE.non_empty_prod p1 p2).
+
+Definition predicated_map {A B: Type} (f: A -> B) (p: predicated A): predicated B :=
+  NE.map (fun x => (fst x, f (snd x))) p.
+
+(*map (fun x => (fst x, Econs (snd x) Enil)) pel*)
+Definition merge' (pel: pred_expr) (tpel: predicated expression_list) :=
+  predicated_map (uncurry Econs) (predicated_prod pel tpel).
+
+Fixpoint merge (pel: list pred_expr): predicated expression_list :=
+  match pel with
+  | nil => NE.singleton (T, Enil)
+  | a :: b => merge' a (merge b)
+  end.
+
+Definition map_predicated {A B} (pf: predicated (A -> B)) (pa: predicated A): predicated B :=
+  predicated_map (fun x => (fst x) (snd x)) (predicated_prod pf pa).
+
+Definition predicated_apply1 {A B} (pf: predicated (A -> B)) (pa: A): predicated B :=
+  NE.map (fun x => (fst x, (snd x) pa)) pf.
+
+Definition predicated_apply2 {A B C} (pf: predicated (A -> B -> C)) (pa: A) (pb: B): predicated C :=
+  NE.map (fun x => (fst x, (snd x) pa pb)) pf.
+
+Definition predicated_apply3 {A B C D} (pf: predicated (A -> B -> C -> D)) (pa: A) (pb: B) (pc: C): predicated D :=
+  NE.map (fun x => (fst x, (snd x) pa pb pc)) pf.
+
+Definition predicated_from_opt {A: Type} (p: option pred_op) (a: A) :=
+  match p with
+  | Some p' => NE.singleton (p', a)
+  | None => NE.singleton (T, a)
+  end.
+
+#[local] Open Scope non_empty_scope.
+#[local] Open Scope pred_op.
+
+Fixpoint NEfold_left {A B} (f: A -> B -> A) (l: NE.non_empty B) (a: A) : A :=
+  match l with
+  | NE.singleton a' => f a a'
+  | a' ::| b => NEfold_left f b (f a a')
+  end.
+
+Fixpoint NEapp {A} (l m: NE.non_empty A) :=
+  match l with
+  | NE.singleton a => a ::| m
+  | a ::| b => a ::| NEapp b m
+  end.
+
+Definition app_predicated' {A: Type} (a b: predicated A) :=
+  let negation := ¬ (NEfold_left (fun a b => a ∨ (fst b)) b ⟂) in
+  NEapp (NE.map (fun x => (negation ∧ fst x, snd x)) a) b.
+
+Definition app_predicated {A: Type} (p: option pred_op) (a b: predicated A) :=
+  match p with
+  | Some p' => NEapp (NE.map (fun x => (¬ p' ∧ fst x, snd x)) a)
+                     (NE.map (fun x => (p' ∧ fst x, snd x)) b)
+  | None => b
+  end.
+
+Definition pred_ret {A: Type} (a: A) : predicated A :=
+  NE.singleton (T, a).
+
+(** *** Update Function
+
+The [update] function will generate a new forest given an existing forest and a new instruction,
+so that it can evaluate a symbolic expression by folding over a list of instructions.  The main
+problem is that predicates need to be merged as well, so that:
+
+1. The predicates are *independent*.
+2. The expression assigned to the register should still be correct.
+
+This is done by multiplying the predicates together, and assigning the negation of the expression to
+the other predicates.  *)
+
+Definition update (f : forest) (i : instr) : forest :=
+  match i with
+  | RBnop => f
+  | RBop p op rl r =>
+    f # (Reg r) <-
+    (app_predicated p
+       (f # (Reg r))
+       (map_predicated (pred_ret (Eop op)) (merge (list_translation rl f))))
+  | RBload p chunk addr rl r =>
+    f # (Reg r) <-
+      (app_predicated p
+         (f # (Reg r))
+         (map_predicated
+            (map_predicated (pred_ret (Eload chunk addr)) (merge (list_translation rl f)))
+            (f # Mem)))
+  | RBstore p chunk addr rl r =>
+    f # Mem <-
+      (app_predicated p
+         (f # Mem)
+         (map_predicated
+            (map_predicated
+               (predicated_apply2 (map_predicated (pred_ret Estore) (f # (Reg r))) chunk addr)
+               (merge (list_translation rl f))) (f # Mem)))
+  | RBsetpred p' c args p =>
+    f # (Pred p) <-
+    (app_predicated p'
+       (f # (Pred p))
+       (map_predicated (pred_ret (Esetpred c)) (merge (list_translation args f))))
+  end.
+
+(** Implementing which are necessary to show the correctness of the translation validation by
+showing that there aren't any more effects in the resultant RTLPar code than in the RTLBlock code.
+
+Get a sequence from the basic block.  *)
+
+Fixpoint abstract_sequence (f : forest) (b : list instr) : forest :=
+  match b with
+  | nil => f
+  | i :: l => abstract_sequence (update f i) l
+  end.
+
+(** Check equivalence of control flow instructions.  As none of the basic blocks should have been
+moved, none of the labels should be different, meaning the control-flow instructions should match
+exactly.  *)
+
+Definition check_control_flow_instr (c1 c2: cf_instr) : bool :=
+  if cf_instr_eq c1 c2 then true else false.
+
+(** We define the top-level oracle that will check if two basic blocks are equivalent after a
+scheduling transformation.  *)
+
+Definition empty_trees (bb: RTLBlock.bb) (bbt: RTLPar.bb) : bool :=
+  match bb with
+  | nil =>
+    match bbt with
+    | nil => true
+    | _ => false
+    end
+  | _ => true
+  end.
+
+Definition schedule_oracle (bb: RTLBlock.bblock) (bbt: RTLPar.bblock) : bool :=
+  check (abstract_sequence empty (bb_body bb))
+        (abstract_sequence empty (concat (concat (bb_body bbt)))) &&
+  check_control_flow_instr (bb_exit bb) (bb_exit bbt) &&
+  empty_trees (bb_body bb) (bb_body bbt).
+
+Definition check_scheduled_trees := beq2 schedule_oracle.
+
+Ltac solve_scheduled_trees_correct :=
+  intros; unfold check_scheduled_trees in *;
+  match goal with
+  | [ H: context[beq2 _ _ _], x: positive |- _ ] =>
+    rewrite beq2_correct in H; specialize (H x)
+  end; repeat destruct_match; crush.
+
+Lemma check_scheduled_trees_correct:
+  forall f1 f2 x y1,
+    check_scheduled_trees f1 f2 = true ->
+    PTree.get x f1 = Some y1 ->
+    exists y2, PTree.get x f2 = Some y2 /\ schedule_oracle y1 y2 = true.
+Proof. solve_scheduled_trees_correct; eexists; crush. Qed.
+
+Lemma check_scheduled_trees_correct2:
+  forall f1 f2 x,
+    check_scheduled_trees f1 f2 = true ->
+    PTree.get x f1 = None ->
+    PTree.get x f2 = None.
+Proof. solve_scheduled_trees_correct. Qed.
+
+(** ** Top-level Functions  *)
+
+Parameter schedule : RTLBlock.function -> RTLPar.function.
+
+Definition transl_function (f: RTLBlock.function) : Errors.res RTLPar.function :=
+  let tfcode := fn_code (schedule f) in
+  if check_scheduled_trees f.(fn_code) tfcode then
+    Errors.OK (mkfunction f.(fn_sig)
+                          f.(fn_params)
+                          f.(fn_stacksize)
+                          tfcode
+                          f.(fn_entrypoint))
+  else
+    Errors.Error (Errors.msg "RTLPargen: Could not prove the blocks equivalent.").
+
+Definition transl_fundef := transf_partial_fundef transl_function.
+
+Definition transl_program (p : RTLBlock.program) : Errors.res RTLPar.program :=
+  transform_partial_program transl_fundef p.
+#+end_src
+* RTLPargenproof
+:PROPERTIES:
+:header-args:coq: :comments noweb :noweb no-export :padline yes :tangle ../src/hls/RTLPargenproof.v
+:END:
+
+#+begin_src coq :comments no :padline no :exports none
+<<license>>
+#+end_src
+
+#+name: rtlpargenproof-imports
+#+begin_src coq
+Require Import compcert.backend.Registers.
+Require Import compcert.common.AST.
+Require Import compcert.common.Errors.
+Require Import compcert.common.Linking.
+Require Import compcert.common.Globalenvs.
+Require Import compcert.common.Memory.
+Require Import compcert.common.Values.
+Require Import compcert.lib.Maps.
+
+Require Import vericert.common.Vericertlib.
+Require Import vericert.hls.RTLBlock.
+Require Import vericert.hls.RTLPar.
+Require Import vericert.hls.RTLBlockInstr.
+Require Import vericert.hls.RTLPargen.
+Require Import vericert.hls.Predicate.
+Require Import vericert.hls.Abstr.
+
+#[local] Open Scope positive.
+#[local] Open Scope forest.
+#[local] Open Scope pred_op.
+#+end_src
+
+** RTLBlock to abstract translation
+
+Correctness of translation from RTLBlock to the abstract interpretation language.
+
+#+name: rtlpargenproof-main
+#+begin_src coq
+(*Definition is_regs i := match i with mk_instr_state rs _ => rs end.
+Definition is_mem i := match i with mk_instr_state _ m => m end.
+
+Inductive state_lessdef : instr_state -> instr_state -> Prop :=
+  state_lessdef_intro :
+    forall rs1 rs2 m1,
+    (forall x, rs1 !! x = rs2 !! x) ->
+    state_lessdef (mk_instr_state rs1 m1) (mk_instr_state rs2 m1).
+
+Ltac inv_simp :=
+  repeat match goal with
+  | H: exists _, _ |- _ => inv H
+  end; simplify.
+
+*)
+
+Definition check_dest i r' :=
+  match i with
+  | RBop p op rl r => (r =? r')%positive
+  | RBload p chunk addr rl r => (r =? r')%positive
+  | _ => false
+  end.
+
+Lemma check_dest_dec i r : {check_dest i r = true} + {check_dest i r = false}.
+Proof. destruct (check_dest i r); tauto. Qed.
+
+Fixpoint check_dest_l l r :=
+  match l with
+  | nil => false
+  | a :: b => check_dest a r || check_dest_l b r
+  end.
+
+Lemma check_dest_l_forall :
+  forall l r,
+  check_dest_l l r = false ->
+  Forall (fun x => check_dest x r = false) l.
+Proof. induction l; crush. Qed.
+
+Lemma check_dest_l_dec i r : {check_dest_l i r = true} + {check_dest_l i r = false}.
+Proof. destruct (check_dest_l i r); tauto. Qed.
+
+Lemma check_dest_update :
+  forall f i r,
+  check_dest i r = false ->
+  (update f i) # (Reg r) = f # (Reg r).
+Proof.
+  destruct i; crush; try apply Pos.eqb_neq in H; apply genmap1; crush.
+Qed.
+
+Lemma check_dest_l_forall2 :
+  forall l r,
+  Forall (fun x => check_dest x r = false) l ->
+  check_dest_l l r = false.
+Proof.
+  induction l; crush.
+  inv H. apply orb_false_intro; crush.
+Qed.
+
+Lemma check_dest_l_ex2 :
+  forall l r,
+  (exists a, In a l /\ check_dest a r = true) ->
+  check_dest_l l r = true.
+Proof.
+  induction l; crush.
+  specialize (IHl r). inv H.
+  apply orb_true_intro; crush.
+  apply orb_true_intro; crush.
+  right. apply IHl. exists x. auto.
+Qed.
+
+Lemma check_list_l_false :
+  forall l x r,
+  check_dest_l (l ++ x :: nil) r = false ->
+  check_dest_l l r = false /\ check_dest x r = false.
+Proof.
+  simplify.
+  apply check_dest_l_forall in H. apply Forall_app in H.
+  simplify. apply check_dest_l_forall2; auto.
+  apply check_dest_l_forall in H. apply Forall_app in H.
+  simplify. inv H1. auto.
+Qed.
+
+Lemma check_dest_l_ex :
+  forall l r,
+  check_dest_l l r = true ->
+  exists a, In a l /\ check_dest a r = true.
+Proof.
+  induction l; crush.
+  destruct (check_dest a r) eqn:?; try solve [econstructor; crush].
+  simplify.
+  exploit IHl. apply H. simplify. econstructor. simplify. right. eassumption.
+  auto.
+Qed.
+
+Lemma check_list_l_true :
+  forall l x r,
+  check_dest_l (l ++ x :: nil) r = true ->
+  check_dest_l l r = true \/ check_dest x r = true.
+Proof.
+  simplify.
+  apply check_dest_l_ex in H; simplify.
+  apply in_app_or in H. inv H. left.
+  apply check_dest_l_ex2. exists x0. auto.
+  inv H0; auto.
+Qed.
+
+Lemma check_dest_l_dec2 l r :
+  {Forall (fun x => check_dest x r = false) l}
+  + {exists a, In a l /\ check_dest a r = true}.
+Proof.
+  destruct (check_dest_l_dec l r); [right | left];
+  auto using check_dest_l_ex, check_dest_l_forall.
+Qed.
+
+Lemma abstr_comp :
+  forall l i f x x0,
+  abstract_sequence f (l ++ i :: nil) = x ->
+  abstract_sequence f l = x0 ->
+  x = update x0 i.
+Proof. induction l; intros; crush; eapply IHl; eauto. Qed.
+
+(*
+
+Lemma gen_list_base:
+  forall FF ge sp l rs exps st1,
+  (forall x, @sem_value FF ge sp st1 (exps # (Reg x)) (rs !! x)) ->
+  sem_val_list ge sp st1 (list_translation l exps) rs ## l.
+Proof.
+  induction l.
+  intros. simpl. constructor.
+  intros. simpl. eapply Scons; eauto.
+Qed.
+
+Lemma check_dest_update2 :
+  forall f r rl op p,
+  (update f (RBop p op rl r)) # (Reg r) = Eop op (list_translation rl f) (f # Mem).
+Proof. crush; rewrite map2; auto. Qed.
+
+Lemma check_dest_update3 :
+  forall f r rl p addr chunk,
+  (update f (RBload p chunk addr rl r)) # (Reg r) = Eload chunk addr (list_translation rl f) (f # Mem).
+Proof. crush; rewrite map2; auto. Qed.
+
+Lemma abstract_seq_correct_aux:
+  forall FF ge sp i st1 st2 st3 f,
+    @step_instr FF ge sp st3 i st2 ->
+    sem ge sp st1 f st3 ->
+    sem ge sp st1 (update f i) st2.
+Proof.
+  intros; inv H; simplify.
+  { simplify; eauto. } (*apply match_states_refl. }*)
+  { inv H0. inv H6. destruct st1. econstructor. simplify.
+    constructor. intros.
+    destruct (resource_eq (Reg res) (Reg x)). inv e.
+    rewrite map2. econstructor. eassumption. apply gen_list_base; eauto.
+    rewrite Regmap.gss. eauto.
+    assert (res <> x). { unfold not in *. intros. apply n. rewrite H0. auto. }
+    rewrite Regmap.gso by auto.
+    rewrite genmap1 by auto. auto.
+
+    rewrite genmap1; crush. }
+  { inv H0. inv H7. constructor. constructor. intros.
+    destruct (Pos.eq_dec dst x); subst.
+    rewrite map2. econstructor; eauto.
+    apply gen_list_base. auto. rewrite Regmap.gss. auto.
+    rewrite genmap1. rewrite Regmap.gso by auto. auto.
+    unfold not in *; intros. inv H0. auto.
+    rewrite genmap1; crush.
+  }
+  { inv H0. inv H7. constructor. constructor; intros.
+    rewrite genmap1; crush.
+    rewrite map2. econstructor; eauto.
+    apply gen_list_base; auto.
+  }
+Qed.
+
+Lemma regmap_list_equiv :
+  forall A (rs1: Regmap.t A) rs2,
+    (forall x, rs1 !! x = rs2 !! x) ->
+    forall rl, rs1##rl = rs2##rl.
+Proof. induction rl; crush. Qed.
+
+Lemma sem_update_Op :
+  forall A ge sp st f st' r l o0 o m rs v,
+  @sem A ge sp st f st' ->
+  Op.eval_operation ge sp o0 rs ## l m = Some v ->
+  match_states st' (mk_instr_state rs m) ->
+  exists tst,
+  sem ge sp st (update f (RBop o o0 l r)) tst /\ match_states (mk_instr_state (Regmap.set r v rs) m) tst.
+Proof.
+  intros. inv H1. simplify.
+  destruct st.
+  econstructor. simplify.
+  { constructor.
+    { constructor. intros. destruct (Pos.eq_dec x r); subst.
+      { pose proof (H5 r). rewrite map2. pose proof H. inv H. econstructor; eauto.
+        { inv H9. eapply gen_list_base; eauto. }
+        { instantiate (1 := (Regmap.set r v rs0)). rewrite Regmap.gss. erewrite regmap_list_equiv; eauto. } }
+      { rewrite Regmap.gso by auto. rewrite genmap1; crush. inv H. inv H7; eauto. } }
+    { inv H. rewrite genmap1; crush. eauto. } }
+  { constructor; eauto. intros.
+    destruct (Pos.eq_dec r x);
+    subst; [repeat rewrite Regmap.gss | repeat rewrite Regmap.gso]; auto. }
+Qed.
+
+Lemma sem_update_load :
+  forall A ge sp st f st' r o m a l m0 rs v a0,
+  @sem A ge sp st f st' ->
+  Op.eval_addressing ge sp a rs ## l = Some a0 ->
+  Mem.loadv m m0 a0 = Some v ->
+  match_states st' (mk_instr_state rs m0) ->
+  exists tst : instr_state,
+    sem ge sp st (update f (RBload o m a l r)) tst
+    /\ match_states (mk_instr_state (Regmap.set r v rs) m0) tst.
+Proof.
+  intros. inv H2. pose proof H. inv H. inv H9.
+  destruct st.
+  econstructor; simplify.
+  { constructor.
+    { constructor. intros.
+      destruct (Pos.eq_dec x r); subst.
+      { rewrite map2. econstructor; eauto. eapply gen_list_base. intros.
+        rewrite <- H6. eauto.
+        instantiate (1 := (Regmap.set r v rs0)). rewrite Regmap.gss. auto. }
+      { rewrite Regmap.gso by auto. rewrite genmap1; crush. } }
+    { rewrite genmap1; crush. eauto. } }
+  { constructor; auto; intros. destruct (Pos.eq_dec r x);
+    subst; [repeat rewrite Regmap.gss | repeat rewrite Regmap.gso]; auto. }
+Qed.
+
+Lemma sem_update_store :
+  forall A ge sp a0 m a l r o f st m' rs m0 st',
+  @sem A ge sp st f st' ->
+  Op.eval_addressing ge sp a rs ## l = Some a0 ->
+  Mem.storev m m0 a0 rs !! r = Some m' ->
+  match_states st' (mk_instr_state rs m0) ->
+  exists tst, sem ge sp st (update f (RBstore o m a l r)) tst
+              /\ match_states (mk_instr_state rs m') tst.
+Proof.
+  intros. inv H2. pose proof H. inv H. inv H9.
+  destruct st.
+  econstructor; simplify.
+  { econstructor.
+    { econstructor; intros. rewrite genmap1; crush. }
+    { rewrite map2. econstructor; eauto. eapply gen_list_base. intros. rewrite <- H6.
+      eauto. specialize (H6 r). rewrite H6. eauto. } }
+  { econstructor; eauto. }
+Qed.
+
+Lemma sem_update :
+  forall A ge sp st x st' st'' st''' f,
+  sem ge sp st f st' ->
+  match_states st' st''' ->
+  @step_instr A ge sp st''' x st'' ->
+  exists tst, sem ge sp st (update f x) tst /\ match_states st'' tst.
+Proof.
+  intros. destruct x; inv H1.
+  { econstructor. split.
+    apply sem_update_RBnop. eassumption.
+    apply match_states_commut. auto. }
+  { eapply sem_update_Op; eauto. }
+  { eapply sem_update_load; eauto. }
+  { eapply sem_update_store; eauto. }
+Qed.
+
+Lemma sem_update2_Op :
+  forall A ge sp st f r l o0 o m rs v,
+  @sem A ge sp st f (mk_instr_state rs m) ->
+  Op.eval_operation ge sp o0 rs ## l m = Some v ->
+  sem ge sp st (update f (RBop o o0 l r)) (mk_instr_state (Regmap.set r v rs) m).
+Proof.
+  intros. destruct st. constructor.
+  inv H. inv H6.
+  { constructor; intros. simplify.
+    destruct (Pos.eq_dec r x); subst.
+    { rewrite map2. econstructor. eauto.
+      apply gen_list_base. eauto.
+      rewrite Regmap.gss. auto. }
+    { rewrite genmap1; crush. rewrite Regmap.gso; auto.  } }
+  { simplify. rewrite genmap1; crush. inv H. eauto. }
+Qed.
+
+Lemma sem_update2_load :
+  forall A ge sp st f r o m a l m0 rs v a0,
+    @sem A ge sp st f (mk_instr_state rs m0) ->
+    Op.eval_addressing ge sp a rs ## l = Some a0 ->
+    Mem.loadv m m0 a0 = Some v ->
+    sem ge sp st (update f (RBload o m a l r)) (mk_instr_state (Regmap.set r v rs) m0).
+Proof.
+  intros. simplify. inv H. inv H7. constructor.
+  { constructor; intros. destruct (Pos.eq_dec r x); subst.
+    { rewrite map2. rewrite Regmap.gss. econstructor; eauto.
+      apply gen_list_base; eauto. }
+    { rewrite genmap1; crush. rewrite Regmap.gso; eauto. }
+  }
+  { simplify. rewrite genmap1; crush. }
+Qed.
+
+Lemma sem_update2_store :
+  forall A ge sp a0 m a l r o f st m' rs m0,
+    @sem A ge sp st f (mk_instr_state rs m0) ->
+    Op.eval_addressing ge sp a rs ## l = Some a0 ->
+    Mem.storev m m0 a0 rs !! r = Some m' ->
+    sem ge sp st (update f (RBstore o m a l r)) (mk_instr_state rs m').
+Proof.
+  intros. simplify. inv H. inv H7. constructor; simplify.
+  { econstructor; intros. rewrite genmap1; crush. }
+  { rewrite map2. econstructor; eauto. apply gen_list_base; eauto. }
+Qed.
+
+Lemma sem_update2 :
+  forall A ge sp st x st' st'' f,
+  sem ge sp st f st' ->
+  @step_instr A ge sp st' x st'' ->
+  sem ge sp st (update f x) st''.
+Proof.
+  intros.
+  destruct x; inv H0;
+  eauto using sem_update_RBnop, sem_update2_Op, sem_update2_load, sem_update2_store.
+Qed.
+
+Lemma abstr_sem_val_mem :
+  forall A B ge tge st tst sp a,
+    ge_preserved ge tge ->
+    forall v m,
+    (@sem_mem A ge sp st a m /\ match_states st tst -> @sem_mem B tge sp tst a m) /\
+    (@sem_value A ge sp st a v /\ match_states st tst -> @sem_value B tge sp tst a v).
+Proof.
+  intros * H.
+  apply expression_ind2 with
+
+    (P := fun (e1: expression) =>
+    forall v m,
+    (@sem_mem A ge sp st e1 m /\ match_states st tst -> @sem_mem B tge sp tst e1 m) /\
+    (@sem_value A ge sp st e1 v /\ match_states st tst -> @sem_value B tge sp tst e1 v))
+
+    (P0 := fun (e1: expression_list) =>
+    forall lv, @sem_val_list A ge sp st e1 lv /\ match_states st tst -> @sem_val_list B tge sp tst e1 lv);
+  simplify; intros; simplify.
+  { inv H1. inv H2. constructor. }
+  { inv H2. inv H1. rewrite H0. constructor. }
+  { inv H3. }
+  { inv H3. inv H4. econstructor. apply H1; auto. simplify. eauto. constructor. auto. auto.
+    apply H0; simplify; eauto. constructor; eauto.
+    unfold ge_preserved in *. simplify. rewrite <- H2. auto.
+  }
+  { inv H3. }
+  { inv H3. inv H4. econstructor. apply H1; eauto; simplify; eauto. constructor; eauto.
+    apply H0; simplify; eauto. constructor; eauto.
+    inv H. rewrite <- H4. eauto.
+    auto.
+  }
+  { inv H4. inv H5. econstructor. apply H0; eauto. simplify; eauto. constructor; eauto.
+    apply H2; eauto. simplify; eauto. constructor; eauto.
+    apply H1; eauto. simplify; eauto. constructor; eauto.
+    inv H. rewrite <- H5. eauto. auto.
+  }
+  { inv H4. }
+  { inv H1. constructor. }
+  { inv H3. constructor; auto. apply H0; eauto. apply Mem.empty. }
+Qed.
+
+Lemma abstr_sem_value :
+  forall a A B ge tge sp st tst v,
+    @sem_value A ge sp st a v ->
+    ge_preserved ge tge ->
+    match_states st tst ->
+    @sem_value B tge sp tst a v.
+Proof. intros; eapply abstr_sem_val_mem; eauto; apply Mem.empty. Qed.
+
+Lemma abstr_sem_mem :
+  forall a A B ge tge sp st tst v,
+    @sem_mem A ge sp st a v ->
+    ge_preserved ge tge ->
+    match_states st tst ->
+    @sem_mem B tge sp tst a v.
+Proof. intros; eapply abstr_sem_val_mem; eauto. Qed.
+
+Lemma abstr_sem_regset :
+  forall a a' A B ge tge sp st tst rs,
+    @sem_regset A ge sp st a rs ->
+    ge_preserved ge tge ->
+    (forall x, a # x = a' # x) ->
+    match_states st tst ->
+    exists rs', @sem_regset B tge sp tst a' rs' /\ (forall x, rs !! x = rs' !! x).
+Proof.
+  inversion 1; intros.
+  inv H7.
+  econstructor. simplify. econstructor. intros.
+  eapply abstr_sem_value; eauto. rewrite <- H6.
+  eapply H0. constructor; eauto.
+  auto.
+Qed.
+
+Lemma abstr_sem :
+  forall a a' A B ge tge sp st tst st',
+    @sem A ge sp st a st' ->
+    ge_preserved ge tge ->
+    (forall x, a # x = a' # x) ->
+    match_states st tst ->
+    exists tst', @sem B tge sp tst a' tst' /\ match_states st' tst'.
+Proof.
+  inversion 1; subst; intros.
+  inversion H4; subst.
+  exploit abstr_sem_regset; eauto; inv_simp.
+  do 3 econstructor; eauto.
+  rewrite <- H3.
+  eapply abstr_sem_mem; eauto.
+Qed.
+
+Lemma abstract_execution_correct':
+  forall A B ge tge sp st' a a' st tst,
+  @sem A ge sp st a st' ->
+  ge_preserved ge tge ->
+  check a a' = true ->
+  match_states st tst ->
+  exists tst', @sem B tge sp tst a' tst' /\ match_states st' tst'.
+Proof.
+  intros;
+  pose proof (check_correct a a' H1);
+  eapply abstr_sem; eauto.
+Qed.
+
+Lemma states_match :
+  forall st1 st2 st3 st4,
+  match_states st1 st2 ->
+  match_states st2 st3 ->
+  match_states st3 st4 ->
+  match_states st1 st4.
+Proof.
+  intros * H1 H2 H3; destruct st1; destruct st2; destruct st3; destruct st4.
+  inv H1. inv H2. inv H3; constructor.
+  unfold regs_lessdef in *. intros.
+  repeat match goal with
+         | H: forall _, _, r : positive |- _ => specialize (H r)
+         end.
+  congruence.
+  auto.
+Qed.
+
+Lemma step_instr_block_same :
+  forall ge sp st st',
+  step_instr_block ge sp st nil st' ->
+  st = st'.
+Proof. inversion 1; auto. Qed.
+
+Lemma step_instr_seq_same :
+  forall ge sp st st',
+  step_instr_seq ge sp st nil st' ->
+  st = st'.
+Proof. inversion 1; auto. Qed.
+
+Lemma sem_update' :
+  forall A ge sp st a x st',
+  sem ge sp st (update (abstract_sequence empty a) x) st' ->
+  exists st'',
+  @step_instr A ge sp st'' x st' /\
+  sem ge sp st (abstract_sequence empty a) st''.
+Proof.
+  Admitted.
+
+Lemma rtlpar_trans_correct :
+  forall bb ge sp sem_st' sem_st st,
+  sem ge sp sem_st (abstract_sequence empty (concat (concat bb))) sem_st' ->
+  match_states sem_st st ->
+  exists st', RTLPar.step_instr_block ge sp st bb st'
+              /\ match_states sem_st' st'.
+Proof.
+  induction bb using rev_ind.
+  { repeat econstructor. eapply abstract_interp_empty3 in H.
+    inv H. inv H0. constructor; congruence. }
+  { simplify. inv H0. repeat rewrite concat_app in H. simplify.
+    rewrite app_nil_r in H.
+    exploit sem_separate; eauto; inv_simp.
+    repeat econstructor. admit. admit.
+  }
+Admitted.
+
+(*Lemma abstract_execution_correct_ld:
+  forall bb bb' cfi ge tge sp st st' tst,
+    RTLBlock.step_instr_list ge sp st bb st' ->
+    ge_preserved ge tge ->
+    schedule_oracle (mk_bblock bb cfi) (mk_bblock bb' cfi) = true ->
+    match_states_ld st tst ->
+    exists tst', RTLPar.step_instr_block tge sp tst bb' tst'
+                 /\ match_states st' tst'.
+Proof.
+  intros.*)
+*)
+
+Lemma match_states_list :
+  forall A (rs: Regmap.t A) rs',
+  (forall r, rs !! r = rs' !! r) ->
+  forall l, rs ## l = rs' ## l.
+Proof. induction l; crush. Qed.
+
+Lemma PTree_matches :
+  forall A (v: A) res rs rs',
+  (forall r, rs !! r = rs' !! r) ->
+  forall x, (Regmap.set res v rs) !! x = (Regmap.set res v rs') !! x.
+Proof.
+  intros; destruct (Pos.eq_dec x res); subst;
+  [ repeat rewrite Regmap.gss by auto
+  | repeat rewrite Regmap.gso by auto ]; auto.
+Qed.
+
+Lemma abstract_interp_empty3 :
+  forall A ctx st',
+    @sem A ctx empty st' -> match_states (ctx_is ctx) st'.
+Proof.
+  inversion 1; subst; simplify. destruct ctx.
+  destruct ctx_is.
+  constructor; intros.
+  - inv H0. specialize (H3 x). inv H3. inv H8. reflexivity.
+  - inv H1. specialize (H3 x). inv H3. inv H8. reflexivity.
+  - inv H2. inv H8. reflexivity.
+Qed.
+
+Lemma step_instr_matches :
+  forall A a ge sp st st',
+    @step_instr A ge sp st a st' ->
+    forall tst,
+      match_states st tst ->
+      exists tst', step_instr ge sp tst a tst'
+                   /\ match_states st' tst'.
+Proof.
+  induction 1; simplify;
+  match goal with H: match_states _ _ |- _ => inv H end;
+  try solve [repeat econstructor; try erewrite match_states_list;
+  try apply PTree_matches; eauto;
+  match goal with
+    H: forall _, _ |- context[Mem.storev] => erewrite <- H; eauto
+  end].
+  - destruct p. match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    apply PTree_matches; assumption.
+    repeat (econstructor; try apply eval_pred_false); eauto. try erewrite match_states_list; eauto.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    econstructor; auto.
+    match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+  - destruct p. match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    apply PTree_matches; assumption.
+    repeat (econstructor; try apply eval_pred_false); eauto. try erewrite match_states_list; eauto.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    econstructor; auto.
+    match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+  - destruct p. match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+    match goal with
+    H: forall _, _ |- context[Mem.storev] => erewrite <- H; eauto
+    end.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    repeat (econstructor; try apply eval_pred_false); eauto. try erewrite match_states_list; eauto.
+    match goal with
+    H: forall _, _ |- context[Mem.storev] => erewrite <- H; eauto
+    end.
+    erewrite <- eval_predf_pr_equiv; eassumption.
+    match goal with H: eval_pred _ _ _ _ |- _ => inv H end.
+    repeat econstructor; try erewrite match_states_list; eauto.
+    match goal with
+    H: forall _, _ |- context[Mem.storev] => erewrite <- H; eauto
+    end.
+  - admit. Admitted.
+
+Lemma step_instr_list_matches :
+  forall a ge sp st st',
+  step_instr_list ge sp st a st' ->
+  forall tst, match_states st tst ->
+              exists tst', step_instr_list ge sp tst a tst'
+                           /\ match_states st' tst'.
+Proof.
+  induction a; intros; inv H;
+  try (exploit step_instr_matches; eauto; []; simplify;
+       exploit IHa; eauto; []; simplify); repeat econstructor; eauto.
+Qed.
+
+Lemma step_instr_seq_matches :
+  forall a ge sp st st',
+  step_instr_seq ge sp st a st' ->
+  forall tst, match_states st tst ->
+              exists tst', step_instr_seq ge sp tst a tst'
+                           /\ match_states st' tst'.
+Proof.
+  induction a; intros; inv H;
+  try (exploit step_instr_list_matches; eauto; []; simplify;
+       exploit IHa; eauto; []; simplify); repeat econstructor; eauto.
+Qed.
+
+Lemma step_instr_block_matches :
+  forall bb ge sp st st',
+  step_instr_block ge sp st bb st' ->
+  forall tst, match_states st tst ->
+              exists tst', step_instr_block ge sp tst bb tst'
+                           /\ match_states st' tst'.
+Proof.
+  induction bb; intros; inv H;
+  try (exploit step_instr_seq_matches; eauto; []; simplify;
+       exploit IHbb; eauto; []; simplify); repeat econstructor; eauto.
+Qed.
+
+Lemma rtlblock_trans_correct' :
+  forall bb ge sp st x st'',
+  RTLBlock.step_instr_list ge sp st (bb ++ x :: nil) st'' ->
+  exists st', RTLBlock.step_instr_list ge sp st bb st'
+              /\ step_instr ge sp st' x st''.
+Proof.
+  induction bb.
+  crush. exists st.
+  split. constructor. inv H. inv H6. auto.
+  crush. inv H. exploit IHbb. eassumption. simplify.
+  econstructor. split.
+  econstructor; eauto. eauto.
+Qed.
+
+Lemma abstract_interp_empty A st : @sem A st empty (ctx_is st).
+Proof. destruct st, ctx_is. simpl. repeat econstructor. Qed.
+
+Lemma abstract_seq :
+  forall l f i,
+    abstract_sequence f (l ++ i :: nil) = update (abstract_sequence f l) i.
+Proof. induction l; crush. Qed.
+
+Lemma abstract_sequence_update :
+  forall l r f,
+  check_dest_l l r = false ->
+  (abstract_sequence f l) # (Reg r) = f # (Reg r).
+Proof.
+  induction l using rev_ind; crush.
+  rewrite abstract_seq. rewrite check_dest_update. apply IHl.
+  apply check_list_l_false in H. tauto.
+  apply check_list_l_false in H. tauto.
+Qed.
+
+(*Lemma sem_separate :
+  forall A ctx b a st',
+    sem ctx (abstract_sequence empty (a ++ b)) st' ->
+    exists st'',
+         @sem A ctx (abstract_sequence empty a) st''
+      /\ @sem A (mk_ctx st'' (ctx_sp ctx) (ctx_ge ctx)) (abstract_sequence empty b) st'.
+Proof.
+  induction b using rev_ind; simplify.
+  { econstructor. simplify. rewrite app_nil_r in H. eauto. apply abstract_interp_empty. }
+  { simplify. rewrite app_assoc in H. rewrite abstract_seq in H.
+    exploit sem_update'; eauto; simplify.
+    exploit IHb; eauto; inv_simp.
+    econstructor; split; eauto.
+    rewrite abstract_seq.
+    eapply sem_update2; eauto.
+  }
+Qed.*)
+
+Lemma sem_update_RBnop :
+  forall A ctx f st',
+  @sem A ctx f st' -> sem ctx (update f RBnop) st'.
+Proof. auto. Qed.
+
+Lemma sem_update_Op :
+  forall A ge sp ist f st' r l o0 o m rs v ps,
+  @sem A (mk_ctx ist sp ge) f st' ->
+  eval_predf ps o = true ->
+  Op.eval_operation ge sp o0 (rs ## l) m = Some v ->
+  match_states st' (mk_instr_state rs ps m) ->
+  exists tst,
+  sem (mk_ctx ist sp ge) (update f (RBop (Some o) o0 l r)) tst
+  /\ match_states (mk_instr_state (Regmap.set r v rs) ps m) tst.
+Proof.
+  intros. inv H1. inv H. inv H1. inv H3. simplify.
+  econstructor. simplify.
+  { constructor; try constructor; intros; try solve [rewrite genmap1; now eauto].
+    destruct (Pos.eq_dec x r); subst.
+    { rewrite map2. specialize (H1 r). inv H1.
+(*}
+  }
+  destruct st.
+  econstructor. simplify.
+  { constructor.
+    { constructor. intros. destruct (Pos.eq_dec x r); subst.
+      { pose proof (H5 r). rewrite map2. pose proof H. inv H. econstructor; eauto.
+        { inv H9. eapply gen_list_base; eauto. }
+        { instantiate (1 := (Regmap.set r v rs0)). rewrite Regmap.gss. erewrite regmap_list_equiv; eauto. } }
+      { rewrite Regmap.gso by auto. rewrite genmap1; crush. inv H. inv H7; eauto. } }
+    { inv H. rewrite genmap1; crush. eauto. } }
+  { constructor; eauto. intros.
+    destruct (Pos.eq_dec r x);
+    subst; [repeat rewrite Regmap.gss | repeat rewrite Regmap.gso]; auto. }
+Qed.*) Admitted.
+
+Lemma sem_update :
+  forall A ge sp st x st' st'' st''' f,
+  sem (mk_ctx st sp ge) f st' ->
+  match_states st' st''' ->
+  @step_instr A ge sp st''' x st'' ->
+  exists tst, sem (mk_ctx st sp ge) (update f x) tst /\ match_states st'' tst.
+Proof.
+  intros. destruct x.
+  - inv H1. econstructor. simplify. eauto. symmetry; auto.
+  - inv H1. inv H0. econstructor.
+    Admitted.
+
+Lemma rtlblock_trans_correct :
+  forall bb ge sp st st',
+    RTLBlock.step_instr_list ge sp st bb st' ->
+    forall tst,
+      match_states st tst ->
+      exists tst', sem (mk_ctx tst sp ge) (abstract_sequence empty bb) tst'
+                   /\ match_states st' tst'.
+Proof.
+  induction bb using rev_ind; simplify.
+  { econstructor. simplify. apply abstract_interp_empty.
+    inv H. auto. }
+  { apply rtlblock_trans_correct' in H. simplify.
+    rewrite abstract_seq.
+    exploit IHbb; try eassumption; []; simplify.
+    exploit sem_update. apply H1. symmetry; eassumption.
+    eauto. simplify. econstructor. split. apply H3.
+    auto. }
+Qed.
+
+Lemma abstract_execution_correct:
+  forall bb bb' cfi cfi' ge tge sp st st' tst,
+    RTLBlock.step_instr_list ge sp st bb st' ->
+    ge_preserved ge tge ->
+    schedule_oracle (mk_bblock bb cfi) (mk_bblock bb' cfi') = true ->
+    match_states st tst ->
+    exists tst', RTLPar.step_instr_block tge sp tst bb' tst'
+                 /\ match_states st' tst'.
+Proof.
+  intros.
+  unfold schedule_oracle in *. simplify. unfold empty_trees in H4.
+  exploit rtlblock_trans_correct; try eassumption; []; simplify.
+(*)  exploit abstract_execution_correct';
+  try solve [eassumption | apply state_lessdef_match_sem; eassumption].
+  apply match_states_commut. eauto. inv_simp.
+  exploit rtlpar_trans_correct; try eassumption; []; inv_simp.
+  exploit step_instr_block_matches; eauto. apply match_states_commut; eauto. inv_simp.
+  repeat match goal with | H: match_states _ _ |- _ => inv H end.
+  do 2 econstructor; eauto.
+  econstructor; congruence.
+Qed.*)Admitted.
+
+Definition match_prog (prog : RTLBlock.program) (tprog : RTLPar.program) :=
+  match_program (fun cu f tf => transl_fundef f = Errors.OK tf) eq prog tprog.
+
+Inductive match_stackframes: RTLBlock.stackframe -> RTLPar.stackframe -> Prop :=
+| match_stackframe:
+    forall f tf res sp pc rs rs' ps ps',
+      transl_function f = OK tf ->
+      (forall x, rs !! x = rs' !! x) ->
+      (forall x, ps !! x = ps' !! x) ->
+      match_stackframes (Stackframe res f sp pc rs ps)
+                        (Stackframe res tf sp pc rs' ps').
+
+Inductive match_states: RTLBlock.state -> RTLPar.state -> Prop :=
+| match_state:
+    forall sf f sp pc rs rs' m sf' tf ps ps'
+      (TRANSL: transl_function f = OK tf)
+      (STACKS: list_forall2 match_stackframes sf sf')
+      (REG: forall x, rs !! x = rs' !! x)
+      (REG: forall x, ps !! x = ps' !! x),
+      match_states (State sf f sp pc rs ps m)
+                   (State sf' tf sp pc rs' ps' m)
+| match_returnstate:
+    forall stack stack' v m
+      (STACKS: list_forall2 match_stackframes stack stack'),
+      match_states (Returnstate stack v m)
+                   (Returnstate stack' v m)
+| match_callstate:
+    forall stack stack' f tf args m
+      (TRANSL: transl_fundef f = OK tf)
+      (STACKS: list_forall2 match_stackframes stack stack'),
+      match_states (Callstate stack f args m)
+                   (Callstate stack' tf args m).
+
+Section CORRECTNESS.
+
+  Context (prog: RTLBlock.program) (tprog : RTLPar.program).
+  Context (TRANSL: match_prog prog tprog).
+
+  Let ge : RTLBlock.genv := Globalenvs.Genv.globalenv prog.
+  Let tge : RTLPar.genv := Globalenvs.Genv.globalenv tprog.
+
+  Lemma symbols_preserved:
+    forall (s: AST.ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+  Proof using TRANSL. intros. eapply (Genv.find_symbol_match TRANSL). Qed.
+  Hint Resolve symbols_preserved : rtlgp.
+
+  Lemma function_ptr_translated:
+    forall (b: Values.block) (f: RTLBlock.fundef),
+      Genv.find_funct_ptr ge b = Some f ->
+      exists tf,
+        Genv.find_funct_ptr tge b = Some tf /\ transl_fundef f = Errors.OK tf.
+  Proof using TRANSL.
+    intros. exploit (Genv.find_funct_ptr_match TRANSL); eauto.
+    intros (cu & tf & P & Q & R); exists tf; auto.
+  Qed.
+
+  Lemma functions_translated:
+    forall (v: Values.val) (f: RTLBlock.fundef),
+      Genv.find_funct ge v = Some f ->
+      exists tf,
+        Genv.find_funct tge v = Some tf /\ transl_fundef f = Errors.OK tf.
+  Proof using TRANSL.
+    intros. exploit (Genv.find_funct_match TRANSL); eauto.
+    intros (cu & tf & P & Q & R); exists tf; auto.
+  Qed.
+
+  Lemma senv_preserved:
+    Senv.equiv (Genv.to_senv ge) (Genv.to_senv tge).
+  Proof (Genv.senv_transf_partial TRANSL).
+  Hint Resolve senv_preserved : rtlgp.
+
+  Lemma sig_transl_function:
+    forall (f: RTLBlock.fundef) (tf: RTLPar.fundef),
+      transl_fundef f = OK tf ->
+      funsig tf = funsig f.
+  Proof using .
+    unfold transl_fundef, transf_partial_fundef, transl_function; intros;
+    repeat destruct_match; crush;
+    match goal with H: OK _ = OK _ |- _ => inv H end; auto.
+  Qed.
+  Hint Resolve sig_transl_function : rtlgp.
+
+  Hint Resolve Val.lessdef_same : rtlgp.
+  Hint Resolve regs_lessdef_regs : rtlgp.
+
+  Lemma find_function_translated:
+    forall ros rs rs' f,
+      (forall x, rs !! x = rs' !! x) ->
+      find_function ge ros rs = Some f ->
+      exists tf, find_function tge ros rs' = Some tf
+                 /\ transl_fundef f = OK tf.
+  Proof using TRANSL.
+    Ltac ffts := match goal with
+                 | [ H: forall _, Val.lessdef _ _, r: Registers.reg |- _ ] =>
+                   specialize (H r); inv H
+                 | [ H: Vundef = ?r, H1: Genv.find_funct _ ?r = Some _ |- _ ] =>
+                   rewrite <- H in H1
+                 | [ H: Genv.find_funct _ Vundef = Some _ |- _] => solve [inv H]
+                 | _ => solve [exploit functions_translated; eauto]
+                 end.
+    destruct ros; simplify; try rewrite <- H;
+    [| rewrite symbols_preserved; destruct_match;
+      try (apply function_ptr_translated); crush ];
+    intros;
+    repeat ffts.
+  Qed.
+
+  Lemma schedule_oracle_nil:
+    forall bb cfi,
+      schedule_oracle {| bb_body := nil; bb_exit := cfi |} bb = true ->
+      bb_body bb = nil /\ bb_exit bb = cfi.
+  Proof using .
+    unfold schedule_oracle, check_control_flow_instr.
+    simplify; repeat destruct_match; crush.
+  Qed.
+
+  Lemma schedule_oracle_nil2:
+    forall cfi,
+      schedule_oracle {| bb_body := nil; bb_exit := cfi |}
+                      {| bb_body := nil; bb_exit := cfi |} = true.
+  Proof using .
+    unfold schedule_oracle, check_control_flow_instr.
+    simplify; repeat destruct_match; crush.
+  Qed.
+
+  Lemma eval_op_eq:
+    forall (sp0 : Values.val) (op : Op.operation) (vl : list Values.val) m,
+      Op.eval_operation ge sp0 op vl m = Op.eval_operation tge sp0 op vl m.
+  Proof using TRANSL.
+    intros.
+    destruct op; auto; unfold Op.eval_operation, Genv.symbol_address, Op.eval_addressing32;
+    [| destruct a; unfold Genv.symbol_address ];
+    try rewrite symbols_preserved; auto.
+  Qed.
+  Hint Resolve eval_op_eq : rtlgp.
+
+  Lemma eval_addressing_eq:
+    forall sp addr vl,
+      Op.eval_addressing ge sp addr vl = Op.eval_addressing tge sp addr vl.
+  Proof using TRANSL.
+    intros.
+    destruct addr;
+    unfold Op.eval_addressing, Op.eval_addressing32;
+    unfold Genv.symbol_address;
+    try rewrite symbols_preserved; auto.
+  Qed.
+  Hint Resolve eval_addressing_eq : rtlgp.
+
+  Lemma ge_preserved_lem:
+    ge_preserved ge tge.
+  Proof using TRANSL.
+    unfold ge_preserved.
+    eauto with rtlgp.
+  Qed.
+  Hint Resolve ge_preserved_lem : rtlgp.
+
+  Lemma lessdef_regmap_optget:
+    forall or rs rs',
+      regs_lessdef rs rs' ->
+      Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef rs').
+  Proof using. destruct or; crush. Qed.
+  Hint Resolve lessdef_regmap_optget : rtlgp.
+
+  Lemma regmap_equiv_lessdef:
+    forall rs rs',
+      (forall x, rs !! x = rs' !! x) ->
+      regs_lessdef rs rs'.
+  Proof using.
+    intros; unfold regs_lessdef; intros.
+    rewrite H. apply Val.lessdef_refl.
+  Qed.
+  Hint Resolve regmap_equiv_lessdef : rtlgp.
+
+  Lemma int_lessdef:
+    forall rs rs',
+      regs_lessdef rs rs' ->
+      (forall arg v,
+          rs !! arg = Vint v ->
+          rs' !! arg = Vint v).
+  Proof using. intros ? ? H; intros; specialize (H arg); inv H; crush. Qed.
+  Hint Resolve int_lessdef : rtlgp.
+
+  Ltac semantics_simpl :=
+    match goal with
+    | [ H: match_states _ _ |- _ ] =>
+      let H2 := fresh "H" in
+      learn H as H2; inv H2
+    | [ H: transl_function ?f = OK _ |- _ ] =>
+      let H2 := fresh "TRANSL" in
+      learn H as H2;
+      unfold transl_function in H2;
+      destruct (check_scheduled_trees
+                  (@fn_code RTLBlock.bb f)
+                  (@fn_code RTLPar.bb (schedule f))) eqn:?;
+               [| discriminate ]; inv H2
+    | [ H: context[check_scheduled_trees] |- _ ] =>
+      let H2 := fresh "CHECK" in
+      learn H as H2;
+      eapply check_scheduled_trees_correct in H2; [| solve [eauto] ]
+    | [ H: schedule_oracle {| bb_body := nil; bb_exit := _ |} ?bb = true |- _ ] =>
+      let H2 := fresh "SCHED" in
+      learn H as H2;
+      apply schedule_oracle_nil in H2
+    | [ H: find_function _ _ _ = Some _, H2: forall x, ?rs !! x = ?rs' !! x |- _ ] =>
+      learn H; exploit find_function_translated; try apply H2; eauto; inversion 1
+    | [ H: Mem.free ?m _ _ _ = Some ?m', H2: Mem.extends ?m ?m'' |- _ ] =>
+      learn H; exploit Mem.free_parallel_extends; eauto; intros
+    | [ H: Events.eval_builtin_args _ _ _ _ _ _, H2: regs_lessdef ?rs ?rs' |- _ ] =>
+      let H3 := fresh "H" in
+      learn H; exploit Events.eval_builtin_args_lessdef; [apply H2 | | |];
+      eauto with rtlgp; intro H3; learn H3
+    | [ H: Events.external_call _ _ _ _ _ _ _ |- _ ] =>
+      let H2 := fresh "H" in
+      learn H; exploit Events.external_call_mem_extends;
+      eauto; intro H2; learn H2
+    | [ H: exists _, _ |- _ ] => inv H
+    | _ => progress simplify
+    end.
+
+  Hint Resolve Events.eval_builtin_args_preserved : rtlgp.
+  Hint Resolve Events.external_call_symbols_preserved : rtlgp.
+  Hint Resolve set_res_lessdef : rtlgp.
+  Hint Resolve set_reg_lessdef : rtlgp.
+  Hint Resolve Op.eval_condition_lessdef : rtlgp.
+
+  Hint Constructors Events.eval_builtin_arg: barg.
+
+  Lemma eval_builtin_arg_eq:
+    forall A ge a v1 m1 e1 e2 sp,
+      (forall x, e1 x = e2 x) ->
+      @Events.eval_builtin_arg A ge e1 sp m1 a v1 ->
+      Events.eval_builtin_arg ge e2 sp m1 a v1.
+Proof. induction 2; try rewrite H; eauto with barg. Qed.
+
+  Lemma eval_builtin_args_eq:
+    forall A ge e1 sp m1 e2 al vl1,
+      (forall x, e1 x = e2 x) ->
+      @Events.eval_builtin_args A ge e1 sp m1 al vl1 ->
+      Events.eval_builtin_args ge e2 sp m1 al vl1.
+  Proof.
+    induction 2.
+    - econstructor; split.
+    - exploit eval_builtin_arg_eq; eauto. intros.
+      destruct IHlist_forall2 as [| y]. constructor; eauto.
+      constructor. constructor; auto.
+      constructor; eauto.
+  Qed.
+
+  Lemma step_cf_instr_correct:
+    forall cfi t s s',
+      step_cf_instr ge s cfi t s' ->
+      forall r,
+        match_states s r ->
+        exists r', step_cf_instr tge r cfi t r' /\ match_states s' r'.
+  Proof using TRANSL.
+    induction 1; repeat semantics_simpl;
+    try solve [repeat (try erewrite match_states_list; eauto; econstructor; eauto with rtlgp)].
+    { do 3 (try erewrite match_states_list by eauto; econstructor; eauto with rtlgp).
+      eapply eval_builtin_args_eq. eapply REG.
+      eapply Events.eval_builtin_args_preserved. eapply symbols_preserved.
+      eauto.
+      intros.
+      unfold regmap_setres. destruct res.
+      destruct (Pos.eq_dec x0 x); subst.
+      repeat rewrite Regmap.gss; auto.
+      repeat rewrite Regmap.gso; auto.
+      eapply REG. eapply REG.
+    }
+    { repeat (try erewrite match_states_list; eauto; econstructor; eauto with rtlgp).
+      unfold regmap_optget. destruct or. rewrite REG. constructor; eauto.
+      constructor; eauto.
+    }
+    { exploit IHstep_cf_instr; eauto. simplify.
+      repeat (try erewrite match_states_list; eauto; econstructor; eauto with rtlgp).
+      erewrite eval_predf_pr_equiv; eauto.
+    }
+  Qed.
+
+  Theorem transl_step_correct :
+    forall (S1 : RTLBlock.state) t S2,
+      RTLBlock.step ge S1 t S2 ->
+      forall (R1 : RTLPar.state),
+        match_states S1 R1 ->
+        exists R2, Smallstep.plus RTLPar.step tge R1 t R2 /\ match_states S2 R2.
+  Proof.
+
+    induction 1; repeat semantics_simpl.
+
+    { destruct bb; destruct x.
+      assert (bb_exit = bb_exit0).
+      { unfold schedule_oracle in *. simplify.
+        unfold check_control_flow_instr in *.
+        destruct_match; crush.
+      }
+      subst.
+
+      exploit abstract_execution_correct; try eassumption. eapply ge_preserved_lem.
+      econstructor; eauto.
+      simplify. destruct x. inv H7.
+
+      exploit step_cf_instr_correct; try eassumption. econstructor; eauto.
+      simplify.
+
+      econstructor. econstructor. eapply Smallstep.plus_one. econstructor.
+      eauto. eauto. simplify. eauto. eauto. }
+    { unfold bind in *. inv TRANSL0. clear Learn. inv H0. destruct_match; crush.
+      inv H2. unfold transl_function in Heqr. destruct_match; crush.
+      inv Heqr.
+      repeat econstructor; eauto.
+      unfold bind in *. destruct_match; crush. }
+    { inv TRANSL0. repeat econstructor; eauto using Events.external_call_symbols_preserved, symbols_preserved, senv_preserved, Events.E0_right. }
+    { inv STACKS. inv H2. repeat econstructor; eauto.
+      intros. apply PTree_matches; eauto. }
+  Qed.
+
+  Lemma transl_initial_states:
+    forall S,
+      RTLBlock.initial_state prog S ->
+      exists R, RTLPar.initial_state tprog R /\ match_states S R.
+  Proof.
+    induction 1.
+    exploit function_ptr_translated; eauto. intros [tf [A B]].
+    econstructor; split.
+    econstructor. apply (Genv.init_mem_transf_partial TRANSL); eauto.
+    replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved; eauto.
+    symmetry; eapply match_program_main; eauto.
+    eexact A.
+    rewrite <- H2. apply sig_transl_function; auto.
+    constructor. auto. constructor.
+  Qed.
+
+  Lemma transl_final_states:
+    forall S R r,
+      match_states S R -> RTLBlock.final_state S r -> RTLPar.final_state R r.
+  Proof.
+    intros. inv H0. inv H. inv STACKS. constructor.
+  Qed.
+
+  Theorem transf_program_correct:
+    Smallstep.forward_simulation (RTLBlock.semantics prog) (RTLPar.semantics tprog).
+  Proof.
+    eapply Smallstep.forward_simulation_plus.
+    apply senv_preserved.
+    eexact transl_initial_states.
+    eexact transl_final_states.
+    exact transl_step_correct.
+  Qed.
+
+End CORRECTNESS.
+#+end_src
+
+* License
+
+#+name: license
+#+begin_src coq :tangle no
+(*
+ * Vericert: Verified high-level synthesis.
+ * Copyright (C) 2020-2022 Yann Herklotz <yann@yannherklotz.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *)
+#+end_src