From 497d60773b1fd47b4ba250ed8f7e78acccdabfaf Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 3 Jun 2021 15:55:08 +0200
Subject: Add RTLTunneling.v

---
 Makefile               | 1 +
 backend/RTLTunneling.v | 0
 2 files changed, 1 insertion(+)
 create mode 100644 backend/RTLTunneling.v

diff --git a/Makefile b/Makefile
index 8d91026e..0556a93c 100644
--- a/Makefile
+++ b/Makefile
@@ -144,6 +144,7 @@ BACKEND=\
   Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocationproof.v \
   Tunneling.v Tunnelingproof.v \
+  RTLTunneling.v \
   Linear.v Lineartyping.v \
   Linearize.v Linearizeproof.v \
   CleanupLabels.v CleanupLabelsproof.v \
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
new file mode 100644
index 00000000..e69de29b
-- 
cgit 


From 245f4f86865ce62b82242af81897936b5034438a Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 3 Jun 2021 17:13:49 +0200
Subject: Write RTLTunneling.v

---
 backend/RTLTunneling.v | 125 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index e69de29b..4885002f 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -0,0 +1,125 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          TODO: Proper author information                            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling for the RTL representation *)
+
+(* rq: ce code est commenté avec des remarques sur des éléments du code pas évidents, sur lequels j'ai buté, ou qui changent du tunneling LTL. Ils pourront être enlevés ultérieurement *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import RTL.
+
+(* This is a port of tunneling for LTL. See Tunneling.v *)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: RTL.function -> UF.
+(* TODO: add an extraction command to link branch_target with its implementation *)
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) (pc: node): node*Z :=
+  match td!pc with (* rq: notation pour `PTree.get pc td` *)
+  | Some (t,d) => (t,Z.abs d)
+  | None       => (pc,0)
+  end.
+
+(* rq: "coerce" la structure d'abre UF en une fonction qui retrouve le représentant canonique de la classe d'un noeud *)
+Definition target (td: UF) (pc: node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+(* rq: pas de basic block en RTL *)
+Definition check_included (td: UF) (c: code): option instruction
+  := PTree.fold (fun (ok:option instruction) pc _ => if ok then c!pc else None) td (Some (Inop xH)).
+(* rq: PTree.fold replie l'arbre avec un parcours préfixe. La fonction qui prend en argument l'accumulateur, le label du noeud et le pointeur (`positive`) vers ce noeud *)
+
+(* we check the validity of targets and their bound:
+*  the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_instr (td: UF) (pc: node) (i: instruction): res unit :=
+  match td!pc with
+  | None => OK tt (* rq: le type `unit` est le singleton {`tt`} *)
+  | Some (tpc, dpc) =>
+      let dpc := Z.abs dpc in
+      match i with
+      | Inop s =>
+          let (ts,ds) := get td s in
+          if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Inop")
+          else Error (msg "invalid skip of Inop")
+      | Icond _ _ ifso ifnot _ =>
+          let (tso,dso) := get td ifso in
+          let (tnot,dnot) := get td ifnot in
+          if peq tpc ifso then
+            if peq tpc ifnot then
+              if zlt dso dpc then
+                if zlt dnot dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this instruction")
+      end
+  end.
+
+Definition check_code (td: UF) (c: code): res unit :=
+  PTree.fold (fun ok pc i => do _ <- ok; check_instr td pc i) c (OK tt).
+(* rq: `do _ <- ok; ...` exécute le bloc à droite du ';' si `ok` est un `OK _`, sinon il passe simplement l'erreur *)
+
+(* The second pass rewrites all LTL instructions, replacing every
+ * successor [s] of every instruction by [t s], the canonical representative
+ * of its equivalence class in the union-find data structure.
+ *)
+
+(* rq: beaucoup plus de cas que pour LTL, car plein d'instructions ont des successeurs *)
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Inop s => Inop (t s)
+  | Iop op args res s => Iop op args res (t s)
+  | Iload trap chunk addr args dst s => Iload trap chunk addr args dst (t s)
+  | Istore chunk addr args src s => Istore chunk addr args src (t s)
+  | Icall sig ros args res s => Icall sig ros args res (t s)
+  | Ibuiltin ef args res s => Ibuiltin ef args res (t s)
+  | Icond cond args ifso ifnot info =>
+      let ifso'  := t ifso in
+      let ifnot' := t ifnot in
+      if peq ifso' ifnot'
+      (* rq: si les deux branches se rejoignent, le if ne sert à rien *)
+      then Inop ifso'
+      else Icond cond args ifso' ifnot' info
+  | Ijumptable arg tbl => Ijumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_function (f: RTL.function): res RTL.function :=
+  let td := branch_target f in
+  let c := fn_code f in
+  if check_included td c then
+    do _ <- check_code td c ; OK
+    (mkfunction (* rq: mkfunction construit un "Record" `RTL.function` *)
+      (fn_sig f)
+      (fn_params f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_instr td) c) (* rq: `map1` est un map sans le pointeur *)
+      (td (fn_entrypoint f)))
+  else Error (msg "Some node of the union-find is not in the CFG").
+
+Definition tunnel_fundef (f: fundef): res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program): res program :=
+  transform_partial_program tunnel_fundef p.
+
-- 
cgit 


From 731d22f1b917a3e55deb82505fc5f981b4a37bcc Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Fri, 4 Jun 2021 13:45:59 +0200
Subject: Fix check_instr Icond target conditions

---
 backend/RTLTunneling.v | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index 4885002f..e6901a9f 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -63,8 +63,8 @@ Definition check_instr (td: UF) (pc: node) (i: instruction): res unit :=
       | Icond _ _ ifso ifnot _ =>
           let (tso,dso) := get td ifso in
           let (tnot,dnot) := get td ifnot in
-          if peq tpc ifso then
-            if peq tpc ifnot then
+          if peq tpc tso then
+            if peq tpc tnot then
               if zlt dso dpc then
                 if zlt dnot dpc then OK tt
                 else Error (msg "bad distance on else branch")
-- 
cgit 


From 7ae1fb0faea68ce5cfe04a232e49659247c244e9 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Fri, 4 Jun 2021 14:24:07 +0200
Subject: Passing info of live regs to scheduler: beginning

---
 aarch64/PostpassSchedulingOracle.ml | 1 +
 aarch64/PrepassSchedulingOracle.ml  | 4 +++-
 scheduling/InstructionScheduler.ml  | 2 ++
 scheduling/InstructionScheduler.mli | 3 +++
 scheduling/RTLpathScheduleraux.ml   | 9 ++++++++-
 5 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index cde3e7a7..a9737088 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -507,6 +507,7 @@ let build_problem bb =
   {
     max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
+    live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
     instruction_usages = instruction_usages bb;
     latency_constraints = latency_constraints bb;
   }
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 2c3eb14f..1fd12a6a 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -410,6 +410,7 @@ let define_problem (opweights : opweights) seqa =
   let simple_deps = get_simple_dependencies opweights seqa in
   { max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
+    live_regs_entry = Regset.empty; (* PLACEHOLDER *)
     instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
@@ -439,7 +440,8 @@ let prepass_scheduler_by_name name problem early_ones =
   | "zigzag" -> zigzag_scheduler problem early_ones
   | _ -> scheduler_by_name name problem
   
-let schedule_sequence (seqa : (instruction*Regset.t) array) =
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+  (live_regs_entry : Registers.Regset.t)=
   let opweights = OpWeights.get_opweights () in
   try
     if (Array.length seqa) <= 1
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..976037bd 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -33,6 +33,7 @@ type latency_constraint = {
 type problem = {
   max_latency : int;
   resource_bounds : int array;
+  live_regs_entry : Registers.Regset.t;
   instruction_usages : int array array;
   latency_constraints : latency_constraint list;
   };;
@@ -438,6 +439,7 @@ let reverse_problem problem =
   {
     max_latency = problem.max_latency;
     resource_bounds = problem.resource_bounds;
+    live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
     instruction_usages = Array.init (nr_instructions + 1)
       (fun i ->
         if i=0
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index fb7af3f6..f53dc0ef 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -23,6 +23,9 @@ type problem = {
     resource_bounds : int array;
     (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
 
+    live_regs_entry : Registers.Regset.t;
+    (** The set of live pseudo-registers at entry. *)
+    
     instruction_usages: int array array;
     (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
 
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index aeed39df..55f1a078 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -72,6 +72,11 @@ let get_superblocks code entry pm typing =
     lsb
 end
 
+(* PLACEHOLDER *)
+let get_live_regs_entry (sb : superblock) =
+  Registers.Regset.empty
+  
+
 (* TODO David *)
 let schedule_superblock sb code =
   if not !Clflags.option_fprepass
@@ -90,6 +95,7 @@ let schedule_superblock sb code =
          match predicted_successor ii with
          | Some _ -> 0
          | None -> 1 in
+    let live_regs_entry = get_live_regs_entry sb in
     match PrepassSchedulingOracle.schedule_sequence
             (Array.map (fun i ->
                  (match PTree.get i code with
@@ -98,7 +104,8 @@ let schedule_superblock sb code =
                  (match PTree.get i sb.liveins with
                  | Some s -> s
                  | None -> Regset.empty))
-               (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+               (Array.sub sb.instructions 0 (nr_instr-trailer_length)))
+            live_regs_entry with
     | None -> sb.instructions
     | Some order ->
        let ins' =
-- 
cgit 


From 98a7a04258f2cf6caf9f18925cbeeae2f5b17be4 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Fri, 4 Jun 2021 16:56:32 +0200
Subject: computing live regs at sb entry from its live output regs

---
 aarch64/PrepassSchedulingOracle.ml |  6 +++---
 scheduling/RTLpathScheduleraux.ml  | 24 +++++++++++++++++++++---
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 1fd12a6a..a743fb68 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -406,11 +406,11 @@ let get_alias_dependencies seqa =
   !deps;;
  *)
 
-let define_problem (opweights : opweights) seqa =
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa =
   let simple_deps = get_simple_dependencies opweights seqa in
   { max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
-    live_regs_entry = Regset.empty; (* PLACEHOLDER *)
+    live_regs_entry = live_entry_regs;
     instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
@@ -451,7 +451,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array)
       let nr_instructions = Array.length seqa in
       (if !Clflags.option_debug_compcert > 6
        then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
-      let problem = define_problem opweights seqa in
+      let problem = define_problem opweights live_regs_entry seqa in
       (if !Clflags.option_debug_compcert > 7
        then (print_sequence stdout (Array.map fst seqa);
              print_problem stdout problem));
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 55f1a078..30da5d5d 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -73,8 +73,26 @@ let get_superblocks code entry pm typing =
 end
 
 (* PLACEHOLDER *)
-let get_live_regs_entry (sb : superblock) =
-  Registers.Regset.empty
+let get_live_regs_entry (sb : superblock) code =
+  let seqa = Array.map (fun i ->
+                 (match PTree.get i code with
+                  | Some ii -> ii
+                  | None -> failwith "RTLpathScheduleraux.get_live_regs_entry"
+                 ),
+                 (match PTree.get i sb.liveins with
+                  | Some s -> s
+                  | None -> Regset.empty))
+               sb.instructions in
+  Array.fold_right (fun (ins, liveins) regset ->
+      match ins with
+      | Inop l -> regset
+      | Iop (op, args, dest, succ) ->
+         Registers.Regset.add dest
+           (List.fold_left (fun set reg ->
+                Registers.Regset.remove reg set)
+           regset args)
+      | _ -> regset              (* PLACEHOLDER *)
+    ) seqa Registers.Regset.empty
   
 
 (* TODO David *)
@@ -95,7 +113,7 @@ let schedule_superblock sb code =
          match predicted_successor ii with
          | Some _ -> 0
          | None -> 1 in
-    let live_regs_entry = get_live_regs_entry sb in
+    let live_regs_entry = get_live_regs_entry sb code in
     match PrepassSchedulingOracle.schedule_sequence
             (Array.map (fun i ->
                  (match PTree.get i code with
-- 
cgit 


From deac4407cacb45efa56adf912bd9db32235984f5 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Fri, 4 Jun 2021 17:49:38 +0200
Subject: Add RTLTunnelingproof.v

---
 Makefile                    |   2 +-
 backend/RTLTunnelingproof.v | 170 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 1 deletion(-)
 create mode 100644 backend/RTLTunnelingproof.v

diff --git a/Makefile b/Makefile
index 0556a93c..4900b165 100644
--- a/Makefile
+++ b/Makefile
@@ -144,7 +144,7 @@ BACKEND=\
   Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocationproof.v \
   Tunneling.v Tunnelingproof.v \
-  RTLTunneling.v \
+  RTLTunneling.v RTLTunnelingproof.v \
   Linear.v Lineartyping.v \
   Linearize.v Linearizeproof.v \
   CleanupLabels.v CleanupLabelsproof.v \
diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
new file mode 100644
index 00000000..57f14b9f
--- /dev/null
+++ b/backend/RTLTunnelingproof.v
@@ -0,0 +1,170 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          TODO: Proper author information                            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization for RTL. *)
+(* This is a port of Tunnelingproof.v, the same optimisation for LTL. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations RTL.
+Require Import RTLTunneling.
+Require Import Conventions1.
+
+Local Open Scope nat.
+
+(** Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+  (* rq: `(fun _ ...)` est la fonction pour matcher des fonctions
+   *     `eq` la fonction pour matcher les variables ? (`varinfo` dans la def)
+   *     `p` et `tp` sont les programmes donc on doit dire s'ils match
+   *)
+
+Section PRESERVATION.
+
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog. (* rq: on suppose que les programmes match *)
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+(* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *)
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL). eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite (Genv.find_symbol_match TRANSL). auto.
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. (* rq: c'est une tactique maison... *)
+    unfold tunnel_function in EQ.
+    destruct (check_included _ _) as [_|] in EQ; try congruence.
+    monadInv EQ. auto.
+  - simpl in H. monadInv H. auto.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve, je ne vais pas réinventer la roue ici *)
+Qed.
+
+(* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *)
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall res f tf sp pc rs trs
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs),
+      match_stackframes
+        (Stackframe res f sp pc rs)
+        (Stackframe res tf sp (branch_target f pc) trs).
+
+(* rq: `match_states s1 s2` correspond à s1 ~ s2 *)
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s ts f tf sp pc rs trs m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_function f = OK tf)
+      (RS: Registers.regs_lessdef rs trs)
+      (MEM: Mem.extends m tm),
+      match_states
+        (State s f sp pc rs m)
+        (State ts tf sp (branch_target f pc) trs tm)
+  | match_states_call:
+      forall s ts f tf a ta m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (TF: tunnel_fundef f = OK tf)
+      (ARGS: list_forall2 Val.lessdef a ta)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Callstate s f a m)
+        (Callstate ts tf ta tm)
+  | match_states_return:
+      forall s ts v tv m tm
+      (STK: list_forall2 match_stackframes s ts)
+      (VAL: Val.lessdef v tv)
+      (MEM: Mem.extends m tm),
+      match_states
+        (Returnstate s v m)
+        (Returnstate ts tv tm).
+
+Lemma transf_initial_states:
+  forall s1: state, initial_state prog s1 ->
+  exists s2: state, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intro Htf. destruct Htf as (tf & Htf & Hf).
+  exists (Callstate nil tf nil m0). split.
+  - econstructor.
+    + apply (Genv.init_mem_transf_partial TRANSL). auto.
+    + rewrite (match_program_main TRANSL).
+      rewrite symbols_preserved. eauto.
+    + apply Htf.
+    + rewrite <- H3. apply sig_preserved. apply Hf.
+  - constructor.
+    + constructor.
+    + apply Hf.
+    + constructor.
+    + apply Mem.extends_refl.
+Qed.
+  
+Lemma transf_final_states:
+  forall (s1 : state)
+  (s2 : state) (r : Integers.Int.int),
+  match_states s1 s2 ->
+  final_state s1 r ->
+  final_state s2 r.
+Proof.
+  intros s1 s2 r HM H1. inv H1. inv HM. inv STK. inv VAL. constructor.
+Qed.
+
+(* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
+
+(*
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  apply transf_initial_states.
+  apply transf_final_states.
+  apply tunnel_step_correct.
+Qed.
+*)
+
+End PRESERVATION.
-- 
cgit 


From 599823a6410f1629f2b8704291839e0974bce83b Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Sat, 5 Jun 2021 19:52:59 +0200
Subject: function written, now needs testing

---
 scheduling/RTLpathScheduleraux.ml | 50 +++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 30da5d5d..5a427e6c 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -72,7 +72,7 @@ let get_superblocks code entry pm typing =
     lsb
 end
 
-(* PLACEHOLDER *)
+
 let get_live_regs_entry (sb : superblock) code =
   let seqa = Array.map (fun i ->
                  (match PTree.get i code with
@@ -85,16 +85,46 @@ let get_live_regs_entry (sb : superblock) code =
                sb.instructions in
   Array.fold_right (fun (ins, liveins) regset ->
       match ins with
-      | Inop l -> regset
-      | Iop (op, args, dest, succ) ->
-         Registers.Regset.add dest
-           (List.fold_left (fun set reg ->
-                Registers.Regset.remove reg set)
-           regset args)
-      | _ -> regset              (* PLACEHOLDER *)
-    ) seqa Registers.Regset.empty
-  
+      | Inop _ -> regset
+      | Iop (_, args, dest, _)
+      | Iload (_, _, _, args, dest, _) ->
+         List.fold_left (fun set reg -> Registers.Regset.add reg set)
+           (Registers.Regset.remove dest regset) args
+      | Istore (_, _, args, src, _) ->
+         List.fold_left (fun set reg -> Registers.Regset.add reg set)
+           (Registers.Regset.add src regset) args
+      | Icall (_, fn, args, dest, _) ->
+         List.fold_left (fun set reg -> Registers.Regset.add reg set)
+           ((match fn with
+             | Coq_inl reg -> (Registers.Regset.add reg)
+             | Coq_inr _ -> (fun x -> x))
+              (Registers.Regset.remove dest regset))
+           args
+      | Itailcall (_, fn, args) ->
+         List.fold_left (fun set reg -> Registers.Regset.add reg set)
+           (match fn with
+            | Coq_inl reg -> (Registers.Regset.add reg regset)
+            | Coq_inr _ -> regset)
+           args
+      | Ibuiltin (_, args, dest, _) ->
+         List.fold_left (fun set reg ->
+           match reg with
+           | AST.BA r -> Registers.Regset.add r set
+           | _ -> set)
+           (match dest with
+            | AST.BR r -> Registers.Regset.remove r regset)
+           args
+      | Icond (_, args, _, _, _) ->
+         List.fold_left (fun set reg ->
+             Registers.Regset.add reg set)
+           regset args
+      | Ijumptable (reg, _)
+      | Ireturn (Some reg) ->
+         Registers.Regset.add reg regset
+      | _ -> regset        
+    ) seqa sb.s_output_regs
 
+  
 (* TODO David *)
 let schedule_superblock sb code =
   if not !Clflags.option_fprepass
-- 
cgit 


From 9118878bd14e24cc04c2f36cab7aa7271a0f1852 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Sun, 6 Jun 2021 12:11:15 +0200
Subject: Fixing scope error, and non-exhaustive pattern matching

---
 scheduling/RTLpathScheduleraux.ml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 5a427e6c..653765f5 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -96,15 +96,15 @@ let get_live_regs_entry (sb : superblock) code =
       | Icall (_, fn, args, dest, _) ->
          List.fold_left (fun set reg -> Registers.Regset.add reg set)
            ((match fn with
-             | Coq_inl reg -> (Registers.Regset.add reg)
-             | Coq_inr _ -> (fun x -> x))
+             | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
+             | Datatypes.Coq_inr _ -> (fun x -> x))
               (Registers.Regset.remove dest regset))
            args
       | Itailcall (_, fn, args) ->
          List.fold_left (fun set reg -> Registers.Regset.add reg set)
            (match fn with
-            | Coq_inl reg -> (Registers.Regset.add reg regset)
-            | Coq_inr _ -> regset)
+            | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
+            | Datatypes.Coq_inr _ -> regset)
            args
       | Ibuiltin (_, args, dest, _) ->
          List.fold_left (fun set reg ->
@@ -112,7 +112,8 @@ let get_live_regs_entry (sb : superblock) code =
            | AST.BA r -> Registers.Regset.add r set
            | _ -> set)
            (match dest with
-            | AST.BR r -> Registers.Regset.remove r regset)
+            | AST.BR r -> Registers.Regset.remove r regset
+            | _ -> regset)
            args
       | Icond (_, args, _, _, _) ->
          List.fold_left (fun set reg ->
-- 
cgit 


From 2249f3c7771c285ccd25f6e94478be388a741da5 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Sun, 6 Jun 2021 20:49:34 +0200
Subject: Adding debug info

---
 scheduling/RTLpathScheduleraux.ml | 95 ++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 653765f5..8e7f0dfa 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -74,6 +74,11 @@ end
 
 
 let get_live_regs_entry (sb : superblock) code =
+  (if !Clflags.option_debug_compcert > 6
+   then debug_flag := true);
+  debug "getting live regs for superblock:\n";
+  print_superblock sb code;
+  debug "\n";
   let seqa = Array.map (fun i ->
                  (match PTree.get i code with
                   | Some ii -> ii
@@ -83,48 +88,53 @@ let get_live_regs_entry (sb : superblock) code =
                   | Some s -> s
                   | None -> Regset.empty))
                sb.instructions in
-  Array.fold_right (fun (ins, liveins) regset ->
-      match ins with
-      | Inop _ -> regset
-      | Iop (_, args, dest, _)
-      | Iload (_, _, _, args, dest, _) ->
-         List.fold_left (fun set reg -> Registers.Regset.add reg set)
-           (Registers.Regset.remove dest regset) args
-      | Istore (_, _, args, src, _) ->
-         List.fold_left (fun set reg -> Registers.Regset.add reg set)
-           (Registers.Regset.add src regset) args
-      | Icall (_, fn, args, dest, _) ->
-         List.fold_left (fun set reg -> Registers.Regset.add reg set)
-           ((match fn with
-             | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
-             | Datatypes.Coq_inr _ -> (fun x -> x))
-              (Registers.Regset.remove dest regset))
-           args
-      | Itailcall (_, fn, args) ->
-         List.fold_left (fun set reg -> Registers.Regset.add reg set)
-           (match fn with
-            | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
-            | Datatypes.Coq_inr _ -> regset)
-           args
-      | Ibuiltin (_, args, dest, _) ->
-         List.fold_left (fun set reg ->
-           match reg with
-           | AST.BA r -> Registers.Regset.add r set
-           | _ -> set)
-           (match dest with
-            | AST.BR r -> Registers.Regset.remove r regset
-            | _ -> regset)
-           args
-      | Icond (_, args, _, _, _) ->
-         List.fold_left (fun set reg ->
-             Registers.Regset.add reg set)
-           regset args
-      | Ijumptable (reg, _)
-      | Ireturn (Some reg) ->
-         Registers.Regset.add reg regset
-      | _ -> regset        
-    ) seqa sb.s_output_regs
-
+  let ret = 
+    Array.fold_right (fun (ins, liveins) regset ->
+        match ins with
+        | Inop _ -> regset
+        | Iop (_, args, dest, _)
+          | Iload (_, _, _, args, dest, _) ->
+           List.fold_left (fun set reg -> Registers.Regset.add reg set)
+             (Registers.Regset.remove dest regset) args
+        | Istore (_, _, args, src, _) ->
+           List.fold_left (fun set reg -> Registers.Regset.add reg set)
+             (Registers.Regset.add src regset) args
+        | Icall (_, fn, args, dest, _) ->
+           List.fold_left (fun set reg -> Registers.Regset.add reg set)
+             ((match fn with
+               | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
+               | Datatypes.Coq_inr _ -> (fun x -> x))
+                (Registers.Regset.remove dest regset))
+             args
+        | Itailcall (_, fn, args) ->
+           List.fold_left (fun set reg -> Registers.Regset.add reg set)
+             (match fn with
+              | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
+              | Datatypes.Coq_inr _ -> regset)
+             args
+        | Ibuiltin (_, args, dest, _) ->
+           List.fold_left (fun set reg ->
+               match reg with
+               | AST.BA r -> Registers.Regset.add r set
+               | _ -> set)
+             (match dest with
+              | AST.BR r -> Registers.Regset.remove r regset
+              | _ -> regset)
+             args
+        | Icond (_, args, _, _, _) ->
+           List.fold_left (fun set reg ->
+               Registers.Regset.add reg set)
+             regset args
+        | Ijumptable (reg, _)
+          | Ireturn (Some reg) ->
+           Registers.Regset.add reg regset
+        | _ -> regset        
+      ) seqa sb.s_output_regs
+  in debug "live in regs: ";
+     print_regset ret;
+     debug "\n";
+     debug_flag := false;
+     ret
   
 (* TODO David *)
 let schedule_superblock sb code =
@@ -144,6 +154,7 @@ let schedule_superblock sb code =
          match predicted_successor ii with
          | Some _ -> 0
          | None -> 1 in
+    debug "hello\n";
     let live_regs_entry = get_live_regs_entry sb code in
     match PrepassSchedulingOracle.schedule_sequence
             (Array.map (fun i ->
-- 
cgit 


From 403f2dd49dfc6948df31e08f60655de3091816d6 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Mon, 7 Jun 2021 21:41:36 +0200
Subject: Monday's work on RTLTunnelingproof

---
 backend/RTLTunnelingproof.v | 184 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 153 insertions(+), 31 deletions(-)

diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index 57f14b9f..f63c3b35 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -23,6 +23,77 @@ Require Import Conventions1.
 
 Local Open Scope nat.
 
+(**) Definition check_included_spec (c:code) (td:UF) (ok: option instruction) :=
+  ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td:UF) (c:code):
+  check_included_spec c td (check_included td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P:=check_included_spec c); unfold check_included_spec.
+  - intros m m' oi EQ IND N pc. rewrite <- EQ. apply IND. apply N.
+  - intros N pc. rewrite PTree.gempty. auto.
+  - intros m oi pc v N S IND. destruct oi.
+    + intros. rewrite PTree.gsspec. destruct (peq _ _); try congruence. apply IND. congruence. apply H0.
+    + contradiction.
+Qed.
+
+(**) Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node) : option instruction -> Prop :=
+  | TB_default (TB: target pc = pc) oi:
+      target_bounds target bound pc oi
+  | TB_nop s
+    (EQ: target pc = target s)
+    (DEC: bound s < bound pc):
+    target_bounds target bound pc (Some (Inop s))
+  | TB_cond cond args ifso ifnot info
+      (EQSO: target pc = target ifso)
+      (EQNOT: target pc = target ifnot)
+      (DECSO: bound ifso < bound pc)
+      (DECNOT: bound ifnot < bound pc):
+      target_bounds target bound pc (Some (Icond cond args ifso ifnot info)).
+
+Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intro EQ. rewrite EQ. auto.
+Qed.
+
+Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia.
+Qed.
+
+(**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+
+(* TODO: à réécrire proprement *)
+Lemma check_instr_correct (td: UF) (pc: node) (i: instruction):
+  check_instr td pc i = OK tt ->
+  target_bounds (target td) (bound td) pc (Some i).
+Proof.
+  unfold check_instr. destruct (td!pc) as [(tpc,dpc)|] eqn:EQ.
+  assert (DPC: snd (get td pc) = Z.abs dpc). { unfold get. rewrite EQ. auto. }
+  - destruct i; try congruence.
+    + destruct (get td n) as (ts,ds) eqn:EQs.
+      destruct (peq _ _); try congruence.
+      destruct (zlt _ _); try congruence. intros _.
+      apply TB_nop. replace (td pc) with tpc.
+      unfold target. rewrite EQs. auto.
+      unfold target. unfold get. rewrite EQ. auto.
+      unfold bound. rewrite DPC. rewrite EQs; simpl. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n ts. apply EQs.
+    + destruct (get td n) as (tso,dso) eqn:EQSO.
+      destruct (get td n0) as (tnot,dnot) eqn:EQNOT.
+      intro H.
+      repeat ((destruct (peq _ _) in H || destruct (zlt _ _) in H); try congruence).
+      apply TB_cond; subst.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQSO. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold target. replace (fst (get td pc)) with tnot. rewrite EQNOT. auto.
+        unfold get. rewrite EQ. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n tnot. rewrite EQSO. auto. rewrite EQSO. auto.
+      * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n0 tnot. rewrite EQNOT; auto. rewrite EQNOT; auto.
+  - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto.
+Qed.
+
+
 (** Preservation of semantics *)
 
 Definition match_prog (p tp: program) :=
@@ -32,6 +103,8 @@ Definition match_prog (p tp: program) :=
    *     `p` et `tp` sont les programmes donc on doit dire s'ils match
    *)
 
+
+
 Section PRESERVATION.
 
 
@@ -41,14 +114,21 @@ Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
 (* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *)
+(* rq: `exploit` vient de Coqlib! Mais je comprends pas encore vraiment ce qui se passe... *)
 Lemma functions_translated:
   forall (v: val) (f: fundef),
   Genv.find_funct ge v = Some f ->
   exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
 Proof.
-  intros. exploit (Genv.find_funct_match TRANSL). eauto.
+  intros.
+  exploit (Genv.find_funct_match TRANSL). apply H.
   intros (cu & tf & A & B & C).
-  repeat eexists; intuition eauto.
+  (* exists tf. split. apply B. apply A. *)
+  (* rq: on peut remplacer les `split` et `apply` par `eauto`, et ne pas spécifier le
+   * `exists`, avec un `eexists` *)
+  eexists. eauto.
+  (* rq: Je ne comprends pas à quoi sert le `intuition`... *)
+  (* repeat eexists; intuition eauto. *)
 Qed.
 
 Lemma function_ptr_translated:
@@ -57,30 +137,43 @@ Lemma function_ptr_translated:
   exists tf,
   Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
 Proof.
-  intros.
-  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+  intros. exploit (Genv.find_funct_ptr_match TRANSL). 
+  - apply H.
+  - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B.
 Qed.
 
 Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
 Proof.
-  rewrite (Genv.find_symbol_match TRANSL). auto.
+  apply (Genv.find_symbol_match TRANSL).
+  (* rq: ici pas de `exploit` mais un `apply` parce que `Genv.find_symbol_match
+   * prouve vraiment pile ce qu'on veut *)
 Qed.
 
 Lemma sig_preserved:
   forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
 Proof.
-  intros. destruct f.
-  - simpl in H. monadInv H. (* rq: c'est une tactique maison... *)
+  intros. destruct f; simpl in H.
+  -
+    (*
+    unfold bind in H.
+    destruct (tunnel_function _) as [x|] eqn:EQ; try congruence.
+    inversion H.
     unfold tunnel_function in EQ.
-    destruct (check_included _ _) as [_|] in EQ; try congruence.
+    destruct (check_included _ _) in EQ; try congruence.
+    unfold bind in EQ. destruct (check_code _ _) in EQ; try congruence. inversion EQ. auto.
+    *)
+
+    monadInv H. (* rq: c'est une tactique maison... *)
+    unfold tunnel_function in EQ.
+    destruct (check_included _ _) in EQ; try congruence.
     monadInv EQ. auto.
-  - simpl in H. monadInv H. auto.
+  - monadInv H. auto.
 Qed.
 
-Lemma senv_preserved:
+(**) Lemma senv_preserved:
   Senv.equiv ge tge.
 Proof.
-  eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve, je ne vais pas réinventer la roue ici *)
+  eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *)
 Qed.
 
 (* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *)
@@ -122,27 +215,47 @@ Inductive match_states: state -> state -> Prop :=
         (Returnstate s v m)
         (Returnstate ts tv tm).
 
+(* TODO: il faut définir une bonne mesure *)
+(**) Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc rs m =>
+      match (fn_code f)!pc with
+      | Some (Inop pc) => (bound (branch_target f) pc) * 2 + 1
+      | Some (Icond _ _ ifso ifnot _) => (max
+                                    (bound (branch_target f) ifso)
+                                    (bound (branch_target f) ifnot)
+                                  ) * 2 + 1
+      | Some _ => 0
+      | None => 0
+      end
+  | Callstate s f v m => 0
+  | Returnstate s v m => 0
+  end.
+
+(* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais
+ * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *)
 Lemma transf_initial_states:
   forall s1: state, initial_state prog s1 ->
   exists s2: state, initial_state tprog s2 /\ match_states s1 s2.
 Proof.
-  intros. inversion H.
-  exploit function_ptr_translated; eauto.
-  intro Htf. destruct Htf as (tf & Htf & Hf).
-  exists (Callstate nil tf nil m0). split.
-  - econstructor.
-    + apply (Genv.init_mem_transf_partial TRANSL). auto.
-    + rewrite (match_program_main TRANSL).
-      rewrite symbols_preserved. eauto.
-    + apply Htf.
-    + rewrite <- H3. apply sig_preserved. apply Hf.
-  - constructor.
-    + constructor.
-    + apply Hf.
-    + constructor.
-    + apply Mem.extends_refl.
+  intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL].
+  exploit function_ptr_translated.
+  - apply PTR.
+  - intros (tf & TPTR & TUN). 
+    exists (Callstate nil tf nil m0). split.
+    + apply initial_state_intro with b.
+      * apply (Genv.init_mem_match TRANSL). apply MEM.
+      * rewrite (match_program_main TRANSL).
+        rewrite symbols_preserved. apply SYM.
+      * apply TPTR.
+      * rewrite <- SIG. apply sig_preserved. apply TUN.
+    + apply match_states_call.
+      * apply list_forall2_nil.
+      * apply TUN.
+      * apply list_forall2_nil.
+      * apply Mem.extends_refl.
 Qed.
-  
+
 Lemma transf_final_states:
   forall (s1 : state)
   (s2 : state) (r : Integers.Int.int),
@@ -150,12 +263,22 @@ Lemma transf_final_states:
   final_state s1 r ->
   final_state s2 r.
 Proof.
-  intros s1 s2 r HM H1. inv H1. inv HM. inv STK. inv VAL. constructor.
+  (* rq: `inv` au lieu de `inversion` fait beaucoup de nettoyage dans les
+   * hypothèses, mais je sais pas trop ce que ça fait exactement *)
+  intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro.
 Qed.
 
 (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  intros st1 t st2 H. induction H; intros; inv MS.
+Admitted.
+
 
-(*
 Theorem transf_program_correct:
   forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
 Proof.
@@ -163,8 +286,7 @@ Proof.
   apply senv_preserved.
   apply transf_initial_states.
   apply transf_final_states.
-  apply tunnel_step_correct.
+  exact tunnel_step_correct.
 Qed.
-*)
 
 End PRESERVATION.
-- 
cgit 


From 52378f0600652a94edcc8c78e4b426243f717a89 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 8 Jun 2021 15:11:03 +0200
Subject: Add some tests

---
 common/DebugPrint.ml                     |   4 +-
 scheduling/RTLpathScheduleraux.ml        |   2 +-
 test/nardino/scheduling/entry_regs.c     |  16 +++++
 test/nardino/scheduling/spille_etrange.c | 114 +++++++++++++++++++++++++++++++
 4 files changed, 133 insertions(+), 3 deletions(-)
 create mode 100644 test/nardino/scheduling/entry_regs.c
 create mode 100644 test/nardino/scheduling/spille_etrange.c

diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml
index 6f8449ee..275e6a71 100644
--- a/common/DebugPrint.ml
+++ b/common/DebugPrint.ml
@@ -132,10 +132,10 @@ let print_instructions insts code =
   | None -> failwith "Did not get some"
   | Some thing -> thing
   in if (!debug_flag) then begin
-    debug "[ ";
+    debug "[\n";
     List.iter (
       fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
-    ) insts; debug "]"
+    ) insts; debug "  ]"
   end
 
 let print_arrayp arr = begin
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 8e7f0dfa..24fef3e8 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code =
   begin
     debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
     debug "  liveins = "; print_ptree_regset li; debug "\n";
-    debug "  output_regs = "; print_regset outs; debug "}"
+    debug "  output_regs = "; print_regset outs; debug "\n}"
   end
 
 let print_superblocks lsb code =
diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c
new file mode 100644
index 00000000..047a613d
--- /dev/null
+++ b/test/nardino/scheduling/entry_regs.c
@@ -0,0 +1,16 @@
+#include <stdio.h>
+
+int f(int n) {
+	return n;
+}
+
+
+int main(int argc, char *argv[]) {
+    int a=1;
+	float b=2.;
+	int c = f(a);
+	a = 3;
+	int d = f(a);
+	printf("%e, %d, %d, %d", b, a, c, d);
+    return 0;
+}
diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c
new file mode 100644
index 00000000..1c36ee86
--- /dev/null
+++ b/test/nardino/scheduling/spille_etrange.c
@@ -0,0 +1,114 @@
+int f(int k) {
+	int a1 = k;
+	int b1 = 2*a1;
+	int c = a1;
+	int a2 = k+1;
+	int b2 = 2*a2;
+	c += a2;
+	int a3 = k+2;
+	int b3 = 2*a3;
+	c += a3;
+	int a4 = k+3;
+	int b4 = 2*a4;
+	c += a4;
+	int a5 = k+4;
+	int b5 = 2*a5;
+	c += a5;
+	int a6 = k+5;
+	int b6 = 2*a6;
+	c += a6;
+	int a7 = k+6;
+	int b7 = 2*a7;
+	c += a7;
+	int a8 = k+7;
+	int b8 = 2*a8;
+	c += a8;
+	int a9 = k+8;
+	int b9 = 2*a9;
+	c += a9;
+	int a10 = k+9;
+	int b10 = 2*a10;
+	c += a10;
+	int a11 = k+10;
+	int b11 = 2*a11;
+	c += a11;
+	int a12 = k+11;
+	int b12 = 2*a12;
+	c += a12;
+	int a13 = k+12;
+	int b13 = 2*a13;
+	c += a13;
+	int a14 = k+13;
+	int b14 = 2*a14;
+	c += a14;
+	int a15 = k+14;
+	int b15 = 2*a15;
+	c += a15;
+	int a16 = k+15;
+	int b16 = 2*a16;
+	c += a16;
+	int a17 = k+16;
+	int b17 = 2*a17;
+	c += a17;
+	int a18 = k+17;
+	int b18 = 2*a18;
+	c += a18;
+	int a19 = k+18;
+	int b19 = 2*a19;
+	c += a19;
+	int a20 = k+19;
+	int b20 = 2*a20;
+	c += a20;
+	int a21 = k+20;
+	int b21 = 2*a21;
+	c += a21;
+	int a22 = k+21;
+	int b22 = 2*a22;
+	c += a22;
+	int a23 = k+22;
+	int b23 = 2*a23;
+	c += a23;
+	int a24 = k+23;
+	int b24 = 2*a24;
+	c += a24;
+	int a25 = k+24;
+	int b25 = 2*a25;
+	c += a25;
+	int a26 = k+25;
+	int b26 = 2*a26;
+	c += a26;
+	return
+		b13+	
+		b12+
+		b11+
+		b10+
+		b9+
+		b8+
+		b7+
+		b6+
+		b5+
+		b4+
+		b3+
+		b2+
+		b1+
+		b14+
+		b15+
+		b16+
+		b17+
+		b18+
+		b19+
+		b20+
+		b21+
+		b22+
+		b23+
+		b23+
+		b24+
+		b25+
+		b26+
+		c;
+}
+
+int main(int argc, char *argv[]) {
+	f(3);
+    return 0;
+}
-- 
cgit 


From 9b6247b7996f3e0181d27ec0e20daffd28e0884f Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 8 Jun 2021 16:06:36 +0200
Subject: Another test : one spill when scheduled forward, none if not

---
 test/nardino/scheduling/spille_forw.c | 119 ++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 test/nardino/scheduling/spille_forw.c

diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c
new file mode 100644
index 00000000..0c69efd5
--- /dev/null
+++ b/test/nardino/scheduling/spille_forw.c
@@ -0,0 +1,119 @@
+#include <stdio.h>
+
+int f(int n) {
+	float a1 = (float) n;
+	float b1 = 2.*a1;
+	float c = a1;
+	float a2 = (float) n+1;
+	float b2 = 2.*a2;
+	c += a2;
+	float a3 = (float) n+2;
+	float b3 = 2.*a3;
+	c += a3;
+	float a4 = (float) n+3;
+	float b4 = 2.*a4;
+	c += a4;
+	float a5 = (float) n+4;
+	float b5 = 2.*a5;
+	c += a5;
+	float a6 = (float) n+5;
+	float b6 = 2.*a6;
+	c += a6;
+	float a7 = (float) n+6;
+	float b7 = 2.*a7;
+	c += a7;
+	float a8 = (float) n+7;
+	float b8 = 2.*a8;
+	c += a8;
+	float a9 = (float) n+8;
+	float b9 = 2.*a9;
+	c += a9;
+	float a10 = (float) n+9;
+	float b10 = 2.*a10;
+	c += a10;
+	float a11 = (float) n+10;
+	float b11 = 2.*a11;
+	c += a11;
+	float a12 = (float) n+11;
+	float b12 = 2.*a12;
+	c += a12;
+	float a13 = (float) n+12;
+	float b13 = 2.*a13;
+	c += a13;
+	float a14 = (float) n+13;
+	float b14 = 2.*a14;
+	c += a14;
+	float a15 = (float) n+14;
+	float b15 = 2.*a15;
+	c += a15;
+	float a16 = (float) n+15;
+	float b16 = 2.*a16;
+	c += a16;
+	float a17 = (float) n+16;
+	float b17 = 2.*a17;
+	c += a17;
+	float a18 = (float) n+17;
+	float b18 = 2.*a18;
+	c += a18;
+	float a19 = (float) n+18;
+	float b19 = 2.*a19;
+	c += a19;
+	float a20 = (float) n+19;
+	float b20 = 2.*a20;
+	c += a20;
+	float a21 = (float) n+20;
+	float b21 = 2.*a21;
+	c += a21;
+	float a22 = (float) n+21;
+	float b22 = 2.*a22;
+	c += a22;
+	float a23 = (float) n+22;
+	float b23 = 2.*a23;
+	c += a23;
+	float a24 = (float) n+23;
+	float b24 = 2.*a24;
+	c += a24;
+	float a25 = (float) n+24;
+	float b25 = 2.*a25;
+	c += a25;
+	float a26 = (float) n+25;
+	float b26 = 2.*a26;
+	c += a26;
+	return c +
+		b1+
+		b2+
+		b3+
+		b4+
+		b5+
+		b6+
+		b7+
+		b8+
+		b9+
+		b10+
+		b11+
+		b12+
+		b13+
+		b14+
+		b15+
+		b16+
+		b17+
+		b18+
+		b19+
+		b20+
+		b21+
+		b22+
+		b23+
+		b24+
+		b25+
+		b26;
+}
+
+
+
+
+
+
+int main(int argc, char *argv[]) {
+    f(5);
+    return 0;
+}
-- 
cgit 


From 386b9053177bb4ef2801cec00b717c400a828139 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 8 Jun 2021 16:53:19 +0200
Subject: Fix RTLpathScheduleraux.get_live_regs_entry

---
 scheduling/RTLpathScheduleraux.ml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 24fef3e8..72cf6682 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -89,7 +89,8 @@ let get_live_regs_entry (sb : superblock) code =
                   | None -> Regset.empty))
                sb.instructions in
   let ret = 
-    Array.fold_right (fun (ins, liveins) regset ->
+    Array.fold_right (fun (ins, liveins) regset_i ->
+        let regset = Registers.Regset.union liveins regset_i in
         match ins with
         | Inop _ -> regset
         | Iop (_, args, dest, _)
@@ -128,7 +129,7 @@ let get_live_regs_entry (sb : superblock) code =
         | Ijumptable (reg, _)
           | Ireturn (Some reg) ->
            Registers.Regset.add reg regset
-        | _ -> regset        
+        | _ -> regset
       ) seqa sb.s_output_regs
   in debug "live in regs: ";
      print_regset ret;
-- 
cgit 


From 587f0505f2331b8edc94a187575a8f124c3cb4ef Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Wed, 9 Jun 2021 19:34:23 +0200
Subject: Starts proof for `tunnel_step_correct`

---
 backend/RTLTunnelingproof.v | 135 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 120 insertions(+), 15 deletions(-)

diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index f63c3b35..c54667a4 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -16,7 +16,7 @@
 
 Require Import Coqlib Maps Errors.
 Require Import AST Linking.
-Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Values Memory Registers Events Globalenvs Smallstep.
 Require Import Op Locations RTL.
 Require Import RTLTunneling.
 Require Import Conventions1.
@@ -41,25 +41,29 @@ Qed.
   | TB_default (TB: target pc = pc) oi:
       target_bounds target bound pc oi
   | TB_nop s
-    (EQ: target pc = target s)
-    (DEC: bound s < bound pc):
-    target_bounds target bound pc (Some (Inop s))
+      (EQ: target pc = target s)
+      (DEC: bound s < bound pc):
+      target_bounds target bound pc (Some (Inop s))
   | TB_cond cond args ifso ifnot info
       (EQSO: target pc = target ifso)
       (EQNOT: target pc = target ifnot)
       (DECSO: bound ifso < bound pc)
       (DECNOT: bound ifnot < bound pc):
-      target_bounds target bound pc (Some (Icond cond args ifso ifnot info)).
+      target_bounds target bound pc (Some (Icond cond args ifso ifnot info))
+.
+Local Hint Resolve TB_default: core.
 
 Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc.
 Proof.
   unfold target, get. intro EQ. rewrite EQ. auto.
 Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
 
 Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z.
 Proof.
   unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia.
 Qed.
+Local Hint Resolve get_nonneg: core.
 
 (**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
 
@@ -93,6 +97,48 @@ Proof.
   - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto.
 Qed.
 
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc i, c!pc = Some i -> target_bounds (target td) (bound td) pc (Some i).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  unfold check_code. apply PTree_Properties.fold_rec; unfold check_code_spec.
+  - intros. rewrite <- H in H2. apply H0; auto.
+  - intros. rewrite PTree.gempty in H0. congruence.
+  - intros m [[]|e] pc i N S IND; simpl; try congruence.
+    intros H pc0 i0. rewrite PTree.gsspec. destruct (peq _ _).
+    subst. intro. inv H0. apply check_instr_correct. apply H.
+    auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+(* ~old
+  intros t tf pc. unfold tunnel_function.
+  destruct (check_included _ _) eqn:HI; try congruence.
+  destruct (check_code _ _) eqn: HC.
+  - intros _. destruct ((fn_code t) ! pc) eqn:EQ.
+    + exploit check_code_correct; eauto.
+    replace tt with u. auto. {destruct u; auto. }
+    + exploit check_included_correct; eauto. rewrite HI. congruence.
+    intro. apply TB_default. unfold target. unfold get. rewrite H. debug auto.
+  - simpl. congruence.
+*)
+
+  intros. unfold tunnel_function in H.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  monadInv H. rename EQ into EQcode.
+  destruct (_ ! _) eqn:EQ.
+  - exploit check_code_correct. destruct x. apply EQcode. apply EQ. auto.
+  - exploit check_included_correct.
+    rewrite EQinc. congruence.
+    apply EQ.
+    intro. apply TB_default. apply target_None. apply H.
+Qed.
 
 (** Preservation of semantics *)
 
@@ -137,7 +183,7 @@ Lemma function_ptr_translated:
   exists tf,
   Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
 Proof.
-  intros. exploit (Genv.find_funct_ptr_match TRANSL). 
+  intros. exploit (Genv.find_funct_ptr_match TRANSL).
   - apply H.
   - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B.
 Qed.
@@ -170,7 +216,7 @@ Proof.
   - monadInv H. auto.
 Qed.
 
-(**) Lemma senv_preserved:
+Lemma senv_preserved:
   Senv.equiv ge tge.
 Proof.
   eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *)
@@ -216,32 +262,41 @@ Inductive match_states: state -> state -> Prop :=
         (Returnstate ts tv tm).
 
 (* TODO: il faut définir une bonne mesure *)
-(**) Definition measure (st: state) : nat :=
+(* (**) Definition measure (st: state) : nat :=
   match st with
   | State s f sp pc rs m =>
       match (fn_code f)!pc with
-      | Some (Inop pc) => (bound (branch_target f) pc) * 2 + 1
+      | Some (Inop pc') => (bound (branch_target f) pc') * 2 + 1
       | Some (Icond _ _ ifso ifnot _) => (max
                                     (bound (branch_target f) ifso)
                                     (bound (branch_target f) ifnot)
                                   ) * 2 + 1
-      | Some _ => 0
+      | Some _ => (bound (branch_target f) pc) * 2
       | None => 0
       end
   | Callstate s f v m => 0
   | Returnstate s v m => 0
   end.
+*)
+
+Definition measure (st: state): nat :=
+  match st with
+  | State s f sp pc rs m => bound (branch_target f) pc
+  | Callstate s f v m => 0
+  | Returnstate s v m => 0
+  end.
+
 
 (* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais
  * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *)
-Lemma transf_initial_states:
+(**) Lemma transf_initial_states:
   forall s1: state, initial_state prog s1 ->
   exists s2: state, initial_state tprog s2 /\ match_states s1 s2.
 Proof.
   intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL].
   exploit function_ptr_translated.
   - apply PTR.
-  - intros (tf & TPTR & TUN). 
+  - intros (tf & TPTR & TUN).
     exists (Callstate nil tf nil m0). split.
     + apply initial_state_intro with b.
       * apply (Genv.init_mem_match TRANSL). apply MEM.
@@ -256,7 +311,7 @@ Proof.
       * apply Mem.extends_refl.
 Qed.
 
-Lemma transf_final_states:
+(**) Lemma transf_final_states:
   forall (s1 : state)
   (s2 : state) (r : Integers.Int.int),
   match_states s1 s2 ->
@@ -268,6 +323,27 @@ Proof.
   intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro.
 Qed.
 
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf ->
+  (fn_code tf) ! pc =
+  option_map (tunnel_instr (branch_target f)) (fn_code f) ! pc.
+Proof.
+  intros f tf pc.
+  unfold tunnel_function.
+  destruct (check_included _ _) eqn:EQinc; try congruence.
+  destruct (check_code _ _) eqn:EQcode; simpl; try congruence.
+  intro. inv H. simpl. rewrite PTree.gmap1. reflexivity.
+Qed.
+
+Lemma reglist_lessdef:
+  forall (rs trs: Registers.Regmap.t val) (args: list Registers.reg),
+  regs_lessdef rs trs -> Val.lessdef_list (rs##args) (trs##args).
+Proof.
+  intros. induction args; simpl; constructor.
+  apply H. apply IHargs.
+Qed.
+
 (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
 Lemma tunnel_step_correct:
   forall st1 t st2, step ge st1 t st2 ->
@@ -275,8 +351,37 @@ Lemma tunnel_step_correct:
   (exists st2', step tge st1' t st2' /\ match_states st2 st2')
   \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
-  intros st1 t st2 H. induction H; intros; inv MS.
-Admitted.
+  intros st1 t st2 H. induction H; intros; try (inv MS).
+  - (* Inop *)
+    exploit branch_target_bounds. apply TF.
+    rewrite H. intro. inv H0.
+    + (* TB_default *)
+      rewrite TB. left. eexists. split.
+      * apply exec_Inop. rewrite (tunnel_function_unfold f tf pc). rewrite H. simpl. eauto. apply TF.
+      * constructor; try assumption.
+    + (* TB_nop *)
+      simpl. right. repeat split. apply DEC.
+      rewrite EQ. apply match_states_intro; assumption.
+  - (* Iop *)
+    exploit eval_operation_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (tv & EVAL & LD).
+    left; eexists; split.
+    + eapply exec_Iop with (v:=tv).
+      assert ((fn_code tf) ! pc = Some (Iop op args res (branch_target f pc'))).
+      rewrite (tunnel_function_unfold f tf pc); eauto.
+      rewrite H. simpl. reflexivity.
+      (* TODO: refaire ça joliment *)
+      assert (target_bounds (branch_target f) (bound (branch_target f)) pc (fn_code f)! pc).
+      apply (branch_target_bounds) with tf. apply TF.
+      inv H2. rewrite TB. apply H1. rewrite H in H4. congruence.
+      rewrite H in H4. congruence.
+      rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved.
+    + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS.
+  - (* Iload *)
+
+
+Qed.
 
 
 Theorem transf_program_correct:
-- 
cgit 


From 1701e43316ee8e69e794a025a8c9979af6bb8c93 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 10 Jun 2021 16:31:51 +0200
Subject: Work on new schedluer

Renamed a test file, wrote function to compute pressure deltas,
Still need to pass the info in some way; beginning of the actual
scheduler function
---
 aarch64/Machregsaux.ml                   |   4 ++
 aarch64/Machregsaux.mli                  |   2 +
 aarch64/PostpassSchedulingOracle.ml      |   3 +-
 aarch64/PrepassSchedulingOracle.ml       |  55 ++++++++++++++-
 scheduling/InstructionScheduler.ml       |  79 ++++++++++++++++++++-
 scheduling/InstructionScheduler.mli      |   7 ++
 scheduling/RTLpathScheduleraux.ml        |  26 ++++---
 test/nardino/scheduling/entry_regs.c     |   5 +-
 test/nardino/scheduling/spille_backw.c   | 114 +++++++++++++++++++++++++++++++
 test/nardino/scheduling/spille_etrange.c | 114 -------------------------------
 10 files changed, 279 insertions(+), 130 deletions(-)
 create mode 100644 test/nardino/scheduling/spille_backw.c
 delete mode 100644 test/nardino/scheduling/spille_etrange.c

diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index 41db3bd4..15fb08ca 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -19,3 +19,7 @@ let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
   | AST.Tfloat | AST.Tsingle -> 1
   | AST.Tany32 | AST.Tany64 -> assert false
+
+(* number of available registers per class *)
+(* TODO: add this to all archs *)
+let nr_regs = [| 29; 32 |]
diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli
index 01b0f9fd..8487a557 100644
--- a/aarch64/Machregsaux.mli
+++ b/aarch64/Machregsaux.mli
@@ -15,3 +15,5 @@
 val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
+
+val nr_regs : int array
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index a9737088..834d42f5 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -507,7 +507,8 @@ let build_problem bb =
   {
     max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
-    live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+    live_regs_entry = Registers.Regset.empty; (* unused here *)
+    typing = (fun x -> AST.Tint);                (* unused here *)
     instruction_usages = instruction_usages bb;
     latency_constraints = latency_constraints bb;
   }
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index a743fb68..6d445f10 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -201,6 +201,52 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
     end seqa;
   !latency_constraints;;
 
+let get_pressure_deltas (seqa : (instruction * Regset.t) array)
+  (typing : RTLtyping.regenv)
+    : int array array =
+  let nr_types_regs = Array.length Machregsaux.nr_regs in
+  let ret = Array.init (Array.length seqa) (fun i ->
+                Array.make nr_types_regs 0) in
+  Array.iteri (fun i (instr, liveins) -> match instr with
+      | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) ->
+         ret.(i).(Machregsaux.class_of_type (typing dest)) <-
+           if List.mem dest args then 0
+           else 1
+      | Istore (_, _, _, src, _) ->
+         ret.(i).(Machregsaux.class_of_type (typing src)) <-
+           -1
+      | Icall (_, fn, args, dest, _) ->
+         ret.(i).(Machregsaux.class_of_type (typing dest)) <-
+           if List.mem dest
+                (match fn with
+                 | Datatypes.Coq_inl reg -> reg::args
+                 | _ -> args)
+           then 0 else 1
+      | Ibuiltin (_, args, dest, _) ->
+         let rec arg_l list = function
+           | AST.BA r -> r::list
+           | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+              arg_l (arg_l list lo) hi
+           | _ -> list
+         in
+         let l = (List.fold_left arg_l [] args) in
+         let rec dest_l = function
+           | AST.BR r ->
+              let t = Machregsaux.class_of_type (typing r) in
+              ret.(i).(t) <-
+              (if List.mem r l
+              then 0 else 1) + ret.(i).(t)
+           | AST.BR_splitlong (hi, lo) ->
+              dest_l hi;
+              dest_l lo
+           | _ -> ()
+         in
+         dest_l dest
+      | _ -> ()
+    ) seqa;
+  ret
+  
+
 let resources_of_instruction (opweights : opweights) = function
   | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
   | Iop(op, inputs, output, _) ->
@@ -406,11 +452,13 @@ let get_alias_dependencies seqa =
   !deps;;
  *)
 
-let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa =
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+      (typing : RTLtyping.regenv) seqa =
   let simple_deps = get_simple_dependencies opweights seqa in
   { max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
     live_regs_entry = live_entry_regs;
+    typing = typing;
     instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
@@ -441,7 +489,8 @@ let prepass_scheduler_by_name name problem early_ones =
   | _ -> scheduler_by_name name problem
   
 let schedule_sequence (seqa : (instruction*Regset.t) array)
-  (live_regs_entry : Registers.Regset.t)=
+      (live_regs_entry : Registers.Regset.t)
+      (typing : RTLtyping.regenv) =
   let opweights = OpWeights.get_opweights () in
   try
     if (Array.length seqa) <= 1
@@ -451,7 +500,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array)
       let nr_instructions = Array.length seqa in
       (if !Clflags.option_debug_compcert > 6
        then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
-      let problem = define_problem opweights live_regs_entry seqa in
+      let problem = define_problem opweights live_regs_entry typing seqa in
       (if !Clflags.option_debug_compcert > 7
        then (print_sequence stdout (Array.map fst seqa);
              print_problem stdout problem));
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 976037bd..08164293 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -34,6 +34,7 @@ type problem = {
   max_latency : int;
   resource_bounds : int array;
   live_regs_entry : Registers.Regset.t;
+  typing : RTLtyping.regenv;
   instruction_usages : int array array;
   latency_constraints : latency_constraint list;
   };;
@@ -258,8 +259,8 @@ let priority_list_scheduler (order : list_scheduler_order)
       assert(!time >= 0);
       !time
     with Exit -> -1
-               
   in
+  
   let advance_time() =
     begin
       (if !current_time < max_time-1
@@ -268,7 +269,8 @@ let priority_list_scheduler (order : list_scheduler_order)
           Array.blit problem.resource_bounds 0 current_resources 0
             (Array.length current_resources);
           ready.(!current_time + 1) <-
-            InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+            InstrSet.union (ready.(!current_time))
+              (ready.(!current_time + 1));
           ready.(!current_time) <- InstrSet.empty;
         end);
       incr current_time
@@ -335,6 +337,75 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
 (* dummy code for placating ocaml's warnings *)
 let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 
+
+(* A scheduler sensitive to register pressure *)
+let reg_pres_scheduler (problem : problem) : solution option =
+  let nr_instructions = get_nr_instructions problem in
+  let successors = get_successors problem
+  and predecessors = get_predecessors problem
+  and times = Array.make (nr_instructions+1) (-1) in
+  let live_regs_entry = problem.live_regs_entry in
+
+  let available_regs = Array.copy Machregsaux.nr_regs in
+
+  List.iter (fun r -> let classe = Machregsaux.class_of_type
+                                  (problem.typing r) in
+                   available_regs.(classe)
+                   <- available_regs.(classe) - 1)
+    (Registers.Regset.elements live_regs_entry);
+
+  let nr_types_regs = Array.length available_regs in
+  
+  (* wait di we have access to instructions here? No, we have to add
+  al this to constraints *)
+  (* let pressures =
+   *   Array.init (nr_instructions) (fun i ->
+   *       Array.init (nr_types_regs) (fun t ->
+   *           match i with
+   *           | Inop -> 0
+   *           | Iop (_, args, dest, _)
+   *           | Iload(_, _, _, args, dest, _) ->
+   *              if
+   *         )) *)
+  
+  let priorities = critical_paths successors in
+
+  let module InstrSetCSP =
+    Set.Make (struct
+        type t=int
+        let compare x y =
+          match priorities.(y) - priorities.(x) with
+          | 0 -> x - y
+          | z -> z
+      end) in
+
+  (* TODO: find a way to efficiently find an instruction which
+  decreases register pressure *)
+  (* idea: *)
+  (* let module InstrSetCSR =
+   *   Set.Make (struct
+   *       type t = int
+   *       let compare x y =
+   *         match pressures.(y) - pressures.(x) with
+   *         | 0 -> (match priorities.(y) - priorities.(x) with
+   *                | 0 -> x - y
+   *                | z -> z)
+   *         | z -> z
+   *     end) in *)
+  (* where pressure.(x) is the delta of register pressure for
+  instruction x. Pb: different register types. Need to think about
+  it. Have one module for each register type, that's used when this
+  particular type reach a high pressure? *)
+
+  let max_time = bound_max_time problem in
+  let ready = Array.make max_time InstrSetCSP.empty in
+
+  (* silence warning, enable compilation while working *)
+  let _ = successors, predecessors, times, ready, nr_types_regs in
+  (* PLACEHOLDER *)
+  None
+
+
 type bundle = int list;;
 
 let rec extract_deps_to index = function
@@ -440,6 +511,10 @@ let reverse_problem problem =
     max_latency = problem.max_latency;
     resource_bounds = problem.resource_bounds;
     live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+    (* Not needed for the revlist sched, and for now we wont bother
+       with creating a reverse scheduler aware of reg press *)
+    
+    typing = problem.typing;
     instruction_usages = Array.init (nr_instructions + 1)
       (fun i ->
         if i=0
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index f53dc0ef..8dcc4ef5 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -26,6 +26,9 @@ type problem = {
     live_regs_entry : Registers.Regset.t;
     (** The set of live pseudo-registers at entry. *)
     
+    typing : RTLtyping.regenv;
+    (** Register type map. *)
+    
     instruction_usages: int array array;
     (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
 
@@ -71,6 +74,10 @@ Once a clock tick is full go to the next.
 @return [Some solution] when a solution is found, [None] if not. *)
 val list_scheduler : problem -> solution option
 
+(** WIP : Same as list_scheduler, but schedules instructions which decrease
+register pressure when it gets too high. *)
+val reg_pres_scheduler : problem -> solution option
+  
 (** Schedule the problem using the order of instructions without any reordering *)
 val greedy_scheduler : problem -> solution option
 
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 72cf6682..e04e7c23 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -94,7 +94,7 @@ let get_live_regs_entry (sb : superblock) code =
         match ins with
         | Inop _ -> regset
         | Iop (_, args, dest, _)
-          | Iload (_, _, _, args, dest, _) ->
+        | Iload (_, _, _, args, dest, _) ->
            List.fold_left (fun set reg -> Registers.Regset.add reg set)
              (Registers.Regset.remove dest regset) args
         | Istore (_, _, args, src, _) ->
@@ -114,13 +114,20 @@ let get_live_regs_entry (sb : superblock) code =
               | Datatypes.Coq_inr _ -> regset)
              args
         | Ibuiltin (_, args, dest, _) ->
-           List.fold_left (fun set reg ->
-               match reg with
-               | AST.BA r -> Registers.Regset.add r set
-               | _ -> set)
-             (match dest with
-              | AST.BR r -> Registers.Regset.remove r regset
-              | _ -> regset)
+           List.fold_left (fun set arg ->
+               let rec add reg set =
+                 match reg with
+                 | AST.BA r -> Registers.Regset.add r set
+                 | AST.BA_splitlong (hi, lo)
+                 | AST.BA_addptr (hi, lo) -> add hi (add lo set)
+                 | _ -> set
+             in add arg set)
+             (let rec rem dest set =
+                match dest with
+                | AST.BR r -> Registers.Regset.remove r set
+                | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set)
+                | _ -> set
+             in rem dest regset)
              args
         | Icond (_, args, _, _, _) ->
            List.fold_left (fun set reg ->
@@ -166,7 +173,8 @@ let schedule_superblock sb code =
                  | Some s -> s
                  | None -> Regset.empty))
                (Array.sub sb.instructions 0 (nr_instr-trailer_length)))
-            live_regs_entry with
+            live_regs_entry
+            sb.typing with
     | None -> sb.instructions
     | Some order ->
        let ins' =
diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c
index 047a613d..9e6adacb 100644
--- a/test/nardino/scheduling/entry_regs.c
+++ b/test/nardino/scheduling/entry_regs.c
@@ -1,7 +1,10 @@
 #include <stdio.h>
 
 int f(int n) {
-	return n;
+	if (n > 0) 
+		return 42;
+	else
+		return n;
 }
 
 
diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c
new file mode 100644
index 00000000..1c36ee86
--- /dev/null
+++ b/test/nardino/scheduling/spille_backw.c
@@ -0,0 +1,114 @@
+int f(int k) {
+	int a1 = k;
+	int b1 = 2*a1;
+	int c = a1;
+	int a2 = k+1;
+	int b2 = 2*a2;
+	c += a2;
+	int a3 = k+2;
+	int b3 = 2*a3;
+	c += a3;
+	int a4 = k+3;
+	int b4 = 2*a4;
+	c += a4;
+	int a5 = k+4;
+	int b5 = 2*a5;
+	c += a5;
+	int a6 = k+5;
+	int b6 = 2*a6;
+	c += a6;
+	int a7 = k+6;
+	int b7 = 2*a7;
+	c += a7;
+	int a8 = k+7;
+	int b8 = 2*a8;
+	c += a8;
+	int a9 = k+8;
+	int b9 = 2*a9;
+	c += a9;
+	int a10 = k+9;
+	int b10 = 2*a10;
+	c += a10;
+	int a11 = k+10;
+	int b11 = 2*a11;
+	c += a11;
+	int a12 = k+11;
+	int b12 = 2*a12;
+	c += a12;
+	int a13 = k+12;
+	int b13 = 2*a13;
+	c += a13;
+	int a14 = k+13;
+	int b14 = 2*a14;
+	c += a14;
+	int a15 = k+14;
+	int b15 = 2*a15;
+	c += a15;
+	int a16 = k+15;
+	int b16 = 2*a16;
+	c += a16;
+	int a17 = k+16;
+	int b17 = 2*a17;
+	c += a17;
+	int a18 = k+17;
+	int b18 = 2*a18;
+	c += a18;
+	int a19 = k+18;
+	int b19 = 2*a19;
+	c += a19;
+	int a20 = k+19;
+	int b20 = 2*a20;
+	c += a20;
+	int a21 = k+20;
+	int b21 = 2*a21;
+	c += a21;
+	int a22 = k+21;
+	int b22 = 2*a22;
+	c += a22;
+	int a23 = k+22;
+	int b23 = 2*a23;
+	c += a23;
+	int a24 = k+23;
+	int b24 = 2*a24;
+	c += a24;
+	int a25 = k+24;
+	int b25 = 2*a25;
+	c += a25;
+	int a26 = k+25;
+	int b26 = 2*a26;
+	c += a26;
+	return
+		b13+	
+		b12+
+		b11+
+		b10+
+		b9+
+		b8+
+		b7+
+		b6+
+		b5+
+		b4+
+		b3+
+		b2+
+		b1+
+		b14+
+		b15+
+		b16+
+		b17+
+		b18+
+		b19+
+		b20+
+		b21+
+		b22+
+		b23+
+		b23+
+		b24+
+		b25+
+		b26+
+		c;
+}
+
+int main(int argc, char *argv[]) {
+	f(3);
+    return 0;
+}
diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c
deleted file mode 100644
index 1c36ee86..00000000
--- a/test/nardino/scheduling/spille_etrange.c
+++ /dev/null
@@ -1,114 +0,0 @@
-int f(int k) {
-	int a1 = k;
-	int b1 = 2*a1;
-	int c = a1;
-	int a2 = k+1;
-	int b2 = 2*a2;
-	c += a2;
-	int a3 = k+2;
-	int b3 = 2*a3;
-	c += a3;
-	int a4 = k+3;
-	int b4 = 2*a4;
-	c += a4;
-	int a5 = k+4;
-	int b5 = 2*a5;
-	c += a5;
-	int a6 = k+5;
-	int b6 = 2*a6;
-	c += a6;
-	int a7 = k+6;
-	int b7 = 2*a7;
-	c += a7;
-	int a8 = k+7;
-	int b8 = 2*a8;
-	c += a8;
-	int a9 = k+8;
-	int b9 = 2*a9;
-	c += a9;
-	int a10 = k+9;
-	int b10 = 2*a10;
-	c += a10;
-	int a11 = k+10;
-	int b11 = 2*a11;
-	c += a11;
-	int a12 = k+11;
-	int b12 = 2*a12;
-	c += a12;
-	int a13 = k+12;
-	int b13 = 2*a13;
-	c += a13;
-	int a14 = k+13;
-	int b14 = 2*a14;
-	c += a14;
-	int a15 = k+14;
-	int b15 = 2*a15;
-	c += a15;
-	int a16 = k+15;
-	int b16 = 2*a16;
-	c += a16;
-	int a17 = k+16;
-	int b17 = 2*a17;
-	c += a17;
-	int a18 = k+17;
-	int b18 = 2*a18;
-	c += a18;
-	int a19 = k+18;
-	int b19 = 2*a19;
-	c += a19;
-	int a20 = k+19;
-	int b20 = 2*a20;
-	c += a20;
-	int a21 = k+20;
-	int b21 = 2*a21;
-	c += a21;
-	int a22 = k+21;
-	int b22 = 2*a22;
-	c += a22;
-	int a23 = k+22;
-	int b23 = 2*a23;
-	c += a23;
-	int a24 = k+23;
-	int b24 = 2*a24;
-	c += a24;
-	int a25 = k+24;
-	int b25 = 2*a25;
-	c += a25;
-	int a26 = k+25;
-	int b26 = 2*a26;
-	c += a26;
-	return
-		b13+	
-		b12+
-		b11+
-		b10+
-		b9+
-		b8+
-		b7+
-		b6+
-		b5+
-		b4+
-		b3+
-		b2+
-		b1+
-		b14+
-		b15+
-		b16+
-		b17+
-		b18+
-		b19+
-		b20+
-		b21+
-		b22+
-		b23+
-		b23+
-		b24+
-		b25+
-		b26+
-		c;
-}
-
-int main(int argc, char *argv[]) {
-	f(3);
-    return 0;
-}
-- 
cgit 


From e46b3e49b2a34c27f5703327c884cf3bcd0c6a49 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 10 Jun 2021 18:22:02 +0200
Subject: Complete `tunnel_step_correct` proof up to Ijumptable

Excluding Icond
---
 backend/RTLTunnelingproof.v | 236 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 227 insertions(+), 9 deletions(-)

diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index c54667a4..53f8bbf7 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -344,6 +344,118 @@ Proof.
   apply H. apply IHargs.
 Qed.
 
+Lemma instruction_type_preserved: 
+  forall (f tf:function) (pc:node) (i ti:instruction)
+  (TF: tunnel_function f = OK tf)
+  (FATPC: (fn_code f) ! pc = Some i)
+  (NOTINOP: forall s, i <> Inop s)
+  (NOTICOND: forall cond args ifso ifnot info, i <> Icond cond args ifso ifnot info)
+  (TI: ti = tunnel_instr (branch_target f) i),
+  (fn_code tf) ! (branch_target f pc) = Some ti.
+Proof.
+  intros.
+  assert ((fn_code tf) ! pc = Some (tunnel_instr (branch_target f) i)) as TFATPC.
+  rewrite (tunnel_function_unfold f tf pc); eauto.
+  rewrite FATPC; eauto.
+  exploit branch_target_bounds; eauto.
+  intro TB. inversion TB as [BT|s|cond args ifso ifnot info]; try (rewrite FATPC in H; congruence).
+Qed.
+
+(* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *)
+(**) Lemma find_function_translated: 
+  forall ros rs trs fd,
+  regs_lessdef rs trs ->
+  find_function ge ros rs = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros trs = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+  - (* reg *)
+    assert (E: trs # r = rs # r).
+    { exploit Genv.find_funct_inv. apply H0. intros (b & EQ).
+      generalize (H r) . rewrite EQ. intro LD. inv LD. auto. }
+    rewrite E. exploit functions_translated; eauto.
+  - (* ident *)
+    rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
+    exploit function_ptr_translated; eauto.
+    intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma list_forall2_lessdef_rs:
+  forall rs trs args,
+  regs_lessdef rs trs ->
+  list_forall2 Val.lessdef rs ## args trs ## args.
+Proof.
+  intros rs trs args LD.
+  exploit (reglist_lessdef rs trs args). apply LD. 
+  induction args; simpl; intros H; try constructor; inv H.
+  apply H3. apply IHargs. apply H5.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_stacksize f = fn_stacksize tf.
+Proof.
+  intros f tf. unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  intro H. monadInv H. simpl. reflexivity.
+Qed.
+
+Lemma regs_setres_lessdef:
+  forall res vres tvres rs trs,
+  regs_lessdef rs trs -> Val.lessdef vres tvres ->
+  regs_lessdef (regmap_setres res vres rs) (regmap_setres res tvres trs).
+Proof.
+  induction res; intros; simpl; try auto using set_reg_lessdef.
+Qed.
+
+Lemma regmap_optget_lessdef:
+  forall or rs trs,
+  regs_lessdef rs trs -> Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef trs).
+Proof.
+  intros or rs trs RS.
+  induction or; simpl; auto using set_reg_lessdef.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall (f:function) (tf:fundef)
+  (TF: tunnel_fundef (Internal f) = OK tf),
+  exists (tf':function), tf = (Internal tf') /\ tunnel_function f = OK tf'.
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. exists x.
+  split. reflexivity. apply EQ.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf
+  (TF: tunnel_function f = OK tf),
+  fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf.
+  unfold tunnel_function. destruct (check_included _ _); try congruence.
+  intro TF. monadInv TF. simpl. reflexivity.
+Qed.
+
+Lemma init_regs_lessdef:
+  forall f tf args targs
+  (ARGS: list_forall2 Val.lessdef args targs)
+  (TF: tunnel_function f = OK tf),
+  regs_lessdef (init_regs args (fn_params f)) (init_regs targs (fn_params tf)).
+Proof.
+  assert (regs_lessdef (Regmap.init Vundef) (Regmap.init Vundef)) as Hundef.
+  { unfold Regmap.init. unfold regs_lessdef. intro. unfold Regmap.get. rewrite PTree.gempty. apply Val.lessdef_undef. }
+
+  intros f tf args targs ARGS.
+
+  unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence.
+  intro TF. monadInv TF. simpl. destruct (fn_params f) eqn:EQP; simpl.
+  - apply Hundef.
+  - induction ARGS.
+    + apply Hundef.
+    + apply set_reg_lessdef; try assumption. 
+Admitted.
+
 (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
 Lemma tunnel_step_correct:
   forall st1 t st2, step ge st1 t st2 ->
@@ -368,18 +480,124 @@ Proof.
     intros (tv & EVAL & LD).
     left; eexists; split.
     + eapply exec_Iop with (v:=tv).
-      assert ((fn_code tf) ! pc = Some (Iop op args res (branch_target f pc'))).
-      rewrite (tunnel_function_unfold f tf pc); eauto.
-      rewrite H. simpl. reflexivity.
-      (* TODO: refaire ça joliment *)
-      assert (target_bounds (branch_target f) (bound (branch_target f)) pc (fn_code f)! pc).
-      apply (branch_target_bounds) with tf. apply TF.
-      inv H2. rewrite TB. apply H1. rewrite H in H4. congruence.
-      rewrite H in H4. congruence.
+      apply instruction_type_preserved with (Iop op args res pc'); (simpl; auto)||congruence.
       rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved.
     + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS.
   - (* Iload *)
-
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef. apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.loadv_extends; try eassumption.
+    intros (tv & LOAD & LD').
+    left. eexists. split.
+    + eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply LOAD.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply LD'. apply RS.
+  - (* Iload NOTRAP1 *)
+    exploit eval_addressing_lessdef_none; try eassumption.
+    apply reglist_lessdef; apply RS.
+    left. eexists. split.
+    + eapply exec_Iload_notrap1.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. unfold default_notrap_load_value. apply Val.lessdef_undef. apply RS.
+  - (* Iload NOTRAP2 *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    (* TODO: on peut sans doute factoriser ça *)
+    destruct (Mem.loadv chunk tm ta) eqn:Htload.
+    + left; eexists; split.
+      eapply exec_Iload.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+    + left; eexists; split.
+      eapply exec_Iload_notrap2.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * apply Htload.
+      * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto.
+  - (* Lstore *)
+    exploit eval_addressing_lessdef; try eassumption.
+    apply reglist_lessdef; apply RS.
+    intros (ta & EVAL & LD).
+    exploit Mem.storev_extends; try eassumption. apply RS.
+    intros (tm' & STORE & MEM').
+    left. eexists. split.
+    + eapply exec_Istore.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved.
+      * rewrite STORE. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Icall *)
+    left.
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    eexists. split.
+    + eapply exec_Icall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+    + apply match_states_call; try assumption.
+      * apply list_forall2_cons; try assumption. apply match_stackframes_intro; try assumption.
+      * apply list_forall2_lessdef_rs. apply RS.
+  - (* Itailcall *)
+    exploit find_function_translated; try eassumption.
+    intros (tfd & TFD & FIND).
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Itailcall.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * apply FIND.
+      * apply sig_preserved. apply TFD.
+      * erewrite <- fn_stacksize_preserved; try eassumption.
+    + apply match_states_call; try assumption.
+      apply list_forall2_lessdef_rs. apply RS.
+  - (* Ibuiltin *)
+    exploit eval_builtin_args_lessdef; try eassumption. apply RS.
+    intros (vl2 & EVAL & LD).
+    exploit external_call_mem_extends; try eassumption.
+    intros (tvres & tm' & EXT & LDRES & MEM' & UNCHGD).
+    left. eexists. split.
+    + eapply exec_Ibuiltin.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * eapply eval_builtin_args_preserved. eapply symbols_preserved. eapply EVAL.
+      * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT.
+    + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption.
+  - (* Icond *)
+    admit.
+  - (* Ijumptable *)
+    left. eexists. split.
+    + eapply exec_Ijumptable.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * generalize (RS arg). rewrite H0. intro. inv H2. reflexivity.
+      * rewrite list_nth_z_map. rewrite H1. simpl. reflexivity.
+    + apply match_states_intro; try eassumption.
+  - (* Ireturn *)
+    exploit Mem.free_parallel_extends; try eassumption.
+    intros (tm' & FREE & MEM').
+    left. eexists. split.
+    + eapply exec_Ireturn.
+      * exploit instruction_type_preserved; (simpl; eauto)||congruence.
+      * erewrite <- fn_stacksize_preserved. eapply FREE. eapply TF.
+    + apply match_states_return; try eassumption.
+      apply regmap_optget_lessdef. apply RS.
+  - (* internal function *)
+    exploit tunnel_fundef_Internal; try eassumption.
+    intros (tf' & EQ & TF'). subst.
+    exploit Mem.alloc_extends; try eassumption. reflexivity. reflexivity.
+    intros (m2' & ALLOC & EXT).
+    left. eexists. split.
+    + eapply exec_function_internal.
+      rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'.
+    + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption.
+      apply init_regs_lessdef.
+    
 
 Qed.
 
-- 
cgit 


From 33958cecc396be3d7ba8bf369b2039c633d39849 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Fri, 11 Jun 2021 13:30:20 +0200
Subject: Complete RTLTunnelingproof

There is still some refactoring to do, though
---
 backend/RTLTunnelingproof.v | 82 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 16 deletions(-)

diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index 53f8bbf7..7f6696ad 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -344,7 +344,7 @@ Proof.
   apply H. apply IHargs.
 Qed.
 
-Lemma instruction_type_preserved: 
+Lemma instruction_type_preserved:
   forall (f tf:function) (pc:node) (i ti:instruction)
   (TF: tunnel_function f = OK tf)
   (FATPC: (fn_code f) ! pc = Some i)
@@ -362,7 +362,7 @@ Proof.
 Qed.
 
 (* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *)
-(**) Lemma find_function_translated: 
+(**) Lemma find_function_translated:
   forall ros rs trs fd,
   regs_lessdef rs trs ->
   find_function ge ros rs = Some fd ->
@@ -375,7 +375,7 @@ Proof.
       generalize (H r) . rewrite EQ. intro LD. inv LD. auto. }
     rewrite E. exploit functions_translated; eauto.
   - (* ident *)
-    rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
+    rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0.
     exploit function_ptr_translated; eauto.
     intros (tf & X1 & X2). exists tf; intuition.
 Qed.
@@ -386,7 +386,7 @@ Lemma list_forall2_lessdef_rs:
   list_forall2 Val.lessdef rs ## args trs ## args.
 Proof.
   intros rs trs args LD.
-  exploit (reglist_lessdef rs trs args). apply LD. 
+  exploit (reglist_lessdef rs trs args). apply LD.
   induction args; simpl; intros H; try constructor; inv H.
   apply H3. apply IHargs. apply H5.
 Qed.
@@ -418,15 +418,25 @@ Proof.
 Qed.
 
 Lemma tunnel_fundef_Internal:
-  forall (f:function) (tf:fundef)
+  forall (f: function) (tf: fundef)
   (TF: tunnel_fundef (Internal f) = OK tf),
-  exists (tf':function), tf = (Internal tf') /\ tunnel_function f = OK tf'.
+  exists (tf': function), tf = (Internal tf') /\ tunnel_function f = OK tf'.
 Proof.
   intros f tf.
   unfold tunnel_fundef. simpl. intro H. monadInv H. exists x.
   split. reflexivity. apply EQ.
 Qed.
 
+Lemma tunnel_fundef_External:
+  forall (ef: external_function) (tf: fundef)
+  (TF: tunnel_fundef (External ef) = OK tf),
+  tf = (External ef).
+Proof.
+  intros f tf.
+  unfold tunnel_fundef. simpl. intro H. monadInv H. reflexivity.
+Qed.
+
+
 Lemma fn_entrypoint_preserved:
   forall f tf
   (TF: tunnel_function f = OK tf),
@@ -449,12 +459,26 @@ Proof.
   intros f tf args targs ARGS.
 
   unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence.
-  intro TF. monadInv TF. simpl. destruct (fn_params f) eqn:EQP; simpl.
-  - apply Hundef.
-  - induction ARGS.
-    + apply Hundef.
-    + apply set_reg_lessdef; try assumption. 
-Admitted.
+  intro TF. monadInv TF. simpl. 
+  (* 
+  induction ARGS.
+  - induction (fn_params f) eqn:EQP; apply Hundef.
+  - induction (fn_params f) eqn:EQP.
+    * simpl. apply Hundef.
+    * simpl. apply set_reg_lessdef. apply H. 
+    *)
+    
+  generalize (fn_params f) as l. induction ARGS; induction l; try (simpl; apply Hundef).
+    simpl. apply set_reg_lessdef; try assumption. apply IHARGS.
+Qed.
+
+Lemma lessdef_forall2_list:
+  forall args ta,
+  list_forall2 Val.lessdef args ta -> Val.lessdef_list args ta.
+Proof.
+  intros args ta H. induction H. apply Val.lessdef_list_nil. apply Val.lessdef_list_cons. apply H. apply IHlist_forall2.
+Qed.
+
 
 (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *)
 Lemma tunnel_step_correct:
@@ -570,7 +594,22 @@ Proof.
       * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT.
     + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption.
   - (* Icond *)
-    admit.
+    simpl. exploit branch_target_bounds. apply TF. rewrite H. intro. inv H1.
+    + (* TB_default *)
+      rewrite TB.
+      destruct (fn_code tf)!pc as [[]|] eqn:EQ;
+      assert (tunnel_function f = OK tf) as TF'; auto;
+      unfold tunnel_function in TF; destruct (check_included _ _) in TF; monadInv TF;
+      simpl in EQ; rewrite PTree.gmap1 in EQ; rewrite H in EQ; simpl in EQ; destruct (peq _ _) eqn: EQpeq in EQ; try congruence.
+      * left. eexists. split. 
+        -- eapply exec_Inop. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity.
+        -- destruct b. apply match_states_intro; eauto. rewrite e. apply match_states_intro; eauto.
+      * left. eexists. split.
+        -- eapply exec_Icond; auto. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. eapply eval_condition_lessdef; try eassumption. apply reglist_lessdef. apply RS.
+        -- destruct b; apply match_states_intro; auto.
+    + (* TB_cond *) right; repeat split; destruct b; try assumption.
+      * rewrite EQSO. apply match_states_intro; try assumption.
+      * rewrite EQNOT. apply match_states_intro; try assumption.
   - (* Ijumptable *)
     left. eexists. split.
     + eapply exec_Ijumptable.
@@ -596,9 +635,20 @@ Proof.
     + eapply exec_function_internal.
       rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'.
     + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption.
-      apply init_regs_lessdef.
-    
-
+      apply init_regs_lessdef. apply ARGS. apply TF'.
+  - (* external function *)
+    exploit external_call_mem_extends. eapply H. eapply MEM.  eapply lessdef_forall2_list. eapply ARGS.
+    intros (tvres & tm' & EXTCALL & LD & EXT & MEMUNCHGD).
+    left. eexists. split.
+    + erewrite (tunnel_fundef_External ef tf); try eassumption.
+      eapply exec_function_external. eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXTCALL.
+    + eapply match_states_return; try assumption.
+  - (* return *)
+    inv STK. inv H1.
+    left. eexists. split.
+    + eapply exec_return.
+    + eapply match_states_intro; try assumption.
+      apply set_reg_lessdef; try assumption.
 Qed.
 
 
-- 
cgit 


From 2b814b1f9bb30d9c8b59a713f69bced808bca7c7 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Sat, 12 Jun 2021 10:52:59 +0200
Subject: work on the scheduler

---
 aarch64/PostpassSchedulingOracle.ml |   1 +
 aarch64/PrepassSchedulingOracle.ml  |   4 +-
 scheduling/InstructionScheduler.ml  | 146 ++++++++++++++++++++++++++----------
 scheduling/InstructionScheduler.mli |   4 +
 4 files changed, 114 insertions(+), 41 deletions(-)

diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index 834d42f5..867341ca 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -509,6 +509,7 @@ let build_problem bb =
     resource_bounds = opweights.pipelined_resource_bounds;
     live_regs_entry = Registers.Regset.empty; (* unused here *)
     typing = (fun x -> AST.Tint);                (* unused here *)
+    pressure_deltas = [| [| |] |] ;
     instruction_usages = instruction_usages bb;
     latency_constraints = latency_constraints bb;
   }
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 6d445f10..19f05749 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -201,6 +201,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
     end seqa;
   !latency_constraints;;
 
+
 let get_pressure_deltas (seqa : (instruction * Regset.t) array)
   (typing : RTLtyping.regenv)
     : int array array =
@@ -459,7 +460,8 @@ let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
     resource_bounds = opweights.pipelined_resource_bounds;
     live_regs_entry = live_entry_regs;
     typing = typing;
-    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    pressure_deltas = get_pressure_deltas seqa typing;
+      instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
       then (get_alias_dependencies seqa) @ simple_deps
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 08164293..8d8c4267 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -35,6 +35,7 @@ type problem = {
   resource_bounds : int array;
   live_regs_entry : Registers.Regset.t;
   typing : RTLtyping.regenv;
+  pressure_deltas : int array array;
   instruction_usages : int array array;
   latency_constraints : latency_constraint list;
   };;
@@ -348,60 +349,124 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   let available_regs = Array.copy Machregsaux.nr_regs in
 
+  let nr_types_regs = Array.length available_regs in
+
+  let regs_thresholds = Array.init nr_types_regs
+                          (fun i -> 5) in
+  (* placeholder value *)
+
   List.iter (fun r -> let classe = Machregsaux.class_of_type
                                   (problem.typing r) in
                    available_regs.(classe)
                    <- available_regs.(classe) - 1)
     (Registers.Regset.elements live_regs_entry);
 
-  let nr_types_regs = Array.length available_regs in
   
-  (* wait di we have access to instructions here? No, we have to add
-  al this to constraints *)
-  (* let pressures =
-   *   Array.init (nr_instructions) (fun i ->
-   *       Array.init (nr_types_regs) (fun t ->
-   *           match i with
-   *           | Inop -> 0
-   *           | Iop (_, args, dest, _)
-   *           | Iload(_, _, _, args, dest, _) ->
-   *              if
-   *         )) *)
+  let pressures = problem.pressure_deltas in
   
   let priorities = critical_paths successors in
+  
+  let current_resources = Array.copy problem.resource_bounds in
 
-  let module InstrSetCSP =
-    Set.Make (struct
-        type t=int
-        let compare x y =
-          match priorities.(y) - priorities.(x) with
-          | 0 -> x - y
-          | z -> z
-      end) in
-
-  (* TODO: find a way to efficiently find an instruction which
-  decreases register pressure *)
-  (* idea: *)
-  (* let module InstrSetCSR =
-   *   Set.Make (struct
-   *       type t = int
-   *       let compare x y =
-   *         match pressures.(y) - pressures.(x) with
-   *         | 0 -> (match priorities.(y) - priorities.(x) with
-   *                | 0 -> x - y
-   *                | z -> z)
-   *         | z -> z
-   *     end) in *)
-  (* where pressure.(x) is the delta of register pressure for
-  instruction x. Pb: different register types. Need to think about
-  it. Have one module for each register type, that's used when this
-  particular type reach a high pressure? *)
+  let module InstrSet =
+    struct 
+      module MSet = 
+        Set.Make (struct
+            type t=int
+            let compare x y =
+              match priorities.(y) - priorities.(x) with
+              | 0 -> x - y
+              | z -> z
+          end)
+
+      let empty = MSet.empty
+      let is_empty = MSet.is_empty
+      let mem = MSet.mem
+      let add = MSet.add
+      let remove = MSet.remove
+      let union = MSet.union
+      let inter = MSet.inter
+      let iter = MSet.iter
+
+      let compare_regs i x y =
+        match pressures.(y).(i) - pressures.(x).(i) with
+        | 0 -> (match priorities.(y) - priorities.(x) with
+               | 0 -> x - y
+               | z -> z)
+        | z -> z
+
+      (** t is the register class *)
+      let sched_CSR t ready usages =
+        let result = ref (-1) in
+        iter (fun i ->
+            if vector_less_equal usages.(i) current_resources
+            then if !result  = -1 || (compare_regs t !result i < 0)
+                 then result := i) ready;
+        !result
+    end
+  in
+
+  (* [compare i] is comp function for finding instruction with
+  lowest reg pres delta for reg class i *)
 
   let max_time = bound_max_time problem in
-  let ready = Array.make max_time InstrSetCSP.empty in
+  let ready = Array.make max_time InstrSet.empty in
 
+  Array.iteri (fun i preds ->
+      if i < nr_instructions && preds = []
+      then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+  let current_time = ref 0
+  and earliest_time i =
+    try
+      let time = ref (-1) in
+      List.iter (fun (j, latency) ->
+          if times.(j) < 0
+          then raise Exit
+          else let t = times.(j) + latency in
+               if t > !time
+               then time := t) predecessors.(i);
+      assert (!time >= 0);
+      !time
+    with Exit -> -1
+  in
+
+  let advance_time () =
+    (if !current_time < max_time-1
+     then (
+       Array.blit problem.resource_bounds 0 current_resources 0
+         (Array.length current_resources);
+       ready.(!current_time + 1) <-
+         InstrSet.union (ready.(!current_time))
+           (ready.(!current_time +1));
+       ready.(!current_time) <- InstrSet.empty));
+    incr current_time
+  in
+  
+  let attempt_scheduling ready usages =
+    let result = ref (-1) in
+    try
+      Array.iteri (fun i avlregs ->
+          if avlregs <= regs_thresholds.(i)
+          then (
+            let maybe = InstrSet.sched_CSR i ready usages in
+            (if pressures.(maybe).(i) < 0 then
+              (vector_subtract usages.(maybe) current_resources;
+               result := maybe));
+            raise Exit)
+        ) available_regs;
+      InstrSet.iter (fun i ->
+          if vector_less_equal usages.(i) current_resources
+          then (
+            vector_subtract usages.(i) current_resources;
+            result := i;
+            raise Exit));
+      -1
+    with Exit -> !result in
+  
   (* silence warning, enable compilation while working *)
-  let _ = successors, predecessors, times, ready, nr_types_regs in
+  let _ = successors, predecessors, times, ready, compare,
+          earliest_time, advance_time, nr_types_regs in
   (* PLACEHOLDER *)
   None
 
@@ -515,6 +580,7 @@ let reverse_problem problem =
        with creating a reverse scheduler aware of reg press *)
     
     typing = problem.typing;
+    pressure_deltas = [| [| |] |] ;
     instruction_usages = Array.init (nr_instructions + 1)
       (fun i ->
         if i=0
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index 8dcc4ef5..e7f9e7db 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -29,6 +29,10 @@ type problem = {
     typing : RTLtyping.regenv;
     (** Register type map. *)
     
+    pressure_deltas : int array array;
+    (** At index (i, j), the pressure delta for instruction i, for
+    register class j. *)
+    
     instruction_usages: int array array;
     (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
 
-- 
cgit 


From 66e15205c40de54639387a4c9b1cc78994525d55 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 14 Jun 2021 13:53:08 +0200
Subject: scheduler written, need to test now

---
 driver/Driver.ml                   |  2 +-
 scheduling/InstructionScheduler.ml | 84 ++++++++++++++++++++++++++++++--------
 2 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/driver/Driver.ml b/driver/Driver.ml
index 7192ba4b..5a8c7f2c 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -210,7 +210,7 @@ Processing options:
   -mtune=         Type of CPU (for scheduling on some architectures)
   -fprepass      Perform prepass scheduling (only on some architectures) [on]
   -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
-                   (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+                   (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 8d8c4267..a069de59 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -121,6 +121,13 @@ let vector_less_equal a b =
     true
   with Exit -> false;;
 
+(* let vector_add a b =
+ *   assert ((Array.length a) = (Array.length b));
+ *   for i=0 to (Array.length a)-1
+ *   do
+ *     b.(i) <- b.(i) + a.(i)
+ *   done;; *)
+
 let vector_subtract a b =
   assert ((Array.length a) = (Array.length b));
   for i=0 to (Array.length a)-1
@@ -341,6 +348,7 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 
 (* A scheduler sensitive to register pressure *)
 let reg_pres_scheduler (problem : problem) : solution option =
+  DebugPrint.debug_flag := true;
   let nr_instructions = get_nr_instructions problem in
   let successors = get_successors problem
   and predecessors = get_predecessors problem
@@ -381,11 +389,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
       let empty = MSet.empty
       let is_empty = MSet.is_empty
-      let mem = MSet.mem
       let add = MSet.add
       let remove = MSet.remove
       let union = MSet.union
-      let inter = MSet.inter
       let iter = MSet.iter
 
       let compare_regs i x y =
@@ -400,15 +406,12 @@ let reg_pres_scheduler (problem : problem) : solution option =
         let result = ref (-1) in
         iter (fun i ->
             if vector_less_equal usages.(i) current_resources
-            then if !result  = -1 || (compare_regs t !result i < 0)
+            then if !result = -1 || (compare_regs t !result i < 0)
                  then result := i) ready;
         !result
     end
   in
 
-  (* [compare i] is comp function for finding instruction with
-  lowest reg pres delta for reg class i *)
-
   let max_time = bound_max_time problem in
   let ready = Array.make max_time InstrSet.empty in
 
@@ -450,25 +453,71 @@ let reg_pres_scheduler (problem : problem) : solution option =
           if avlregs <= regs_thresholds.(i)
           then (
             let maybe = InstrSet.sched_CSR i ready usages in
-            (if pressures.(maybe).(i) < 0 then
+            (* print_int maybe;
+             * print_newline ();
+             * flush stdout; *)
+            if maybe > 0 && pressures.(maybe).(i) < 0 then
               (vector_subtract usages.(maybe) current_resources;
-               result := maybe));
-            raise Exit)
-        ) available_regs;
+               result := maybe;
+               raise Exit))) available_regs;
       InstrSet.iter (fun i ->
           if vector_less_equal usages.(i) current_resources
           then (
             vector_subtract usages.(i) current_resources;
             result := i;
-            raise Exit));
+            raise Exit)) ready;
       -1
-    with Exit -> !result in
+    with Exit ->
+          if !result <> -1 then
+            vector_subtract pressures.(!result) available_regs;
+          !result in
   
-  (* silence warning, enable compilation while working *)
-  let _ = successors, predecessors, times, ready, compare,
-          earliest_time, advance_time, nr_types_regs in
-  (* PLACEHOLDER *)
-  None
+  while !current_time < max_time
+  do
+    if (InstrSet.is_empty ready.(!current_time))
+    then advance_time ()
+    else
+      match attempt_scheduling ready.(!current_time)
+              problem.instruction_usages with
+      | -1 -> advance_time()
+      | i -> (assert(times.(i) < 0);
+             times.(i) <- !current_time;
+             ready.(!current_time)
+             <- InstrSet.remove i (ready.(!current_time));
+             List.iter (fun (instr_to, latency) ->
+                 if instr_to < nr_instructions then
+                   match earliest_time instr_to with
+                   | -1 -> ()
+                   | to_time ->
+                      ready.(to_time)
+                      <- InstrSet.add instr_to ready.(to_time)
+               ) successors.(i);
+             successors.(i) <- []
+            )
+  done;
+
+  try
+    let final_time = ref (-1) in
+    for i = 0 to nr_instructions - 1 do
+      (* print_int i;
+       * flush stdout; *)
+      (if times.(i) < 0 then raise Exit);
+      (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+    done;
+    List.iter (fun (i, latency) ->
+        let target_time = latency + times.(i) in
+        if target_time > !final_time then
+          final_time := target_time) predecessors.(nr_instructions);
+    times.(nr_instructions) <- !final_time;
+    DebugPrint.debug_flag := false;
+    Some times
+  with Exit ->
+    DebugPrint.debug "reg_pres_sched failed\n";
+    DebugPrint.debug_flag := false;
+    None
+              
+;;
+
 
 
 type bundle = int list;;
@@ -1402,5 +1451,6 @@ let scheduler_by_name name =
   | "ilp" -> validated_scheduler cascaded_scheduler
   | "list" -> validated_scheduler list_scheduler
   | "revlist" -> validated_scheduler reverse_list_scheduler
+  | "regpres" -> validated_scheduler reg_pres_scheduler
   | "greedy" -> greedy_scheduler
   | s -> failwith ("unknown scheduler: " ^ s);;
-- 
cgit 


From 3eb3751f84348a20b7ce211fdbf1d01a9c4685a8 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 14 Jun 2021 14:46:01 +0200
Subject: One fewer spill with new sched on `test/.../spille_forw.c`

---
 test/nardino/scheduling/spille_forw.c | 53 +++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c
index 0c69efd5..770dfce5 100644
--- a/test/nardino/scheduling/spille_forw.c
+++ b/test/nardino/scheduling/spille_forw.c
@@ -1,6 +1,6 @@
 #include <stdio.h>
 
-int f(int n) {
+int f(int n, float * arr) {
 	float a1 = (float) n;
 	float b1 = 2.*a1;
 	float c = a1;
@@ -79,6 +79,48 @@ int f(int n) {
 	float a26 = (float) n+25;
 	float b26 = 2.*a26;
 	c += a26;
+	float a27 = (float) n+26;
+	float b27 = 2.*a27;
+	c += a27;
+	float a28 = (float) n+27;
+	float b28 = 2.*a28;
+	c += a28;
+	float a29 = (float) n+28;
+	float b29 = 2.*a29;
+	c += a29;
+	float a30 = (float) n+29;
+	float b30 = 2.*a30;
+	c += a30;
+	arr[0] = a1;
+	arr[1] = a2;
+	arr[2] = a3;
+	arr[3] = a4;
+	arr[4] = a5;
+	arr[5] = a6;
+	arr[6] = a7;
+	arr[7] = a8;
+	arr[8] = a9;
+	arr[9] = a10;
+	arr[10] = a11;
+	arr[11] = a12;
+	arr[12] = a13;
+	arr[13] = a14;
+	arr[14] = a15;
+	arr[15] = a16;
+	arr[16] = a17;
+	arr[17] = a18;
+	arr[18] = a19;
+	arr[19] = a20;
+	arr[20] = a21;
+	arr[21] = a22;
+	arr[22] = a23;
+	arr[23] = a24;
+	arr[24] = a25;
+	arr[25] = a26;
+	arr[26] = a27;
+	arr[27] = a28;
+	arr[28] = a29;
+	arr[29] = a30;
 	return c +
 		b1+
 		b2+
@@ -105,7 +147,11 @@ int f(int n) {
 		b23+
 		b24+
 		b25+
-		b26;
+		b26+
+		b27+
+		b28+
+		b29+
+		b30;
 }
 
 
@@ -114,6 +160,7 @@ int f(int n) {
 
 
 int main(int argc, char *argv[]) {
-    f(5);
+	float arr[30];
+    f(5, arr);
     return 0;
 }
-- 
cgit 


From 77d1ab5f7a6d04970185102508f1cec529c7cb40 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Mon, 14 Jun 2021 15:36:55 +0200
Subject: Add the RTLTunneling oracle

---
 backend/RTLTunnelingaux.ml | 284 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 backend/RTLTunnelingaux.ml

diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml
new file mode 100644
index 00000000..a30b43cf
--- /dev/null
+++ b/backend/RTLTunnelingaux.ml
@@ -0,0 +1,284 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           TODO: Proper author information                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a RTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [RTLTunneling.v]
+
+*)
+
+open Coqlib
+open RTL
+open Maps
+open Camlcoq
+
+let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
+let debug_flag = ref true
+let final_dump = false   (* set to true to have a more verbose debugging *)
+
+let debug fmt =
+  if !debug_flag then Printf.eprintf fmt
+  else Printf.ifprintf stderr fmt
+
+exception BugOnPC of int
+
+let nopcounter = ref 0
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of RTL instructions *)
+  INOP of node
+| ICOND of node * node
+| OTHER
+and node = {
+    lab : label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: (int, node) Hashtbl.t;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | ICOND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- INOP s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = INOP s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+
+(* build [c.nodes] and accumulate conditions in [acc] *)
+let build_simplified_cfg c acc pc i =
+  match i with
+  | Inop s ->
+     let ns = get_node c s in
+     set_branch c pc ns;
+     incr nopcounter;
+     acc
+  | Icond (_, _, s1, s2, _) ->
+     c.num_rems <- c.num_rems + 1;
+     let ns1 = get_node c s1 in
+     let ns2 = get_node c s2 in
+     let npc = get_node c pc in
+     npc.inst <- ICOND(ns1, ns2);
+     npc::acc
+  | _ -> acc
+
+(* try to change a condition into a branch
+[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
+*)
+let try_change_cond c acc pc =
+  match pc.inst with
+  | ICOND(s1,s2) ->
+     let ts1 = target c s1 in
+     let ts2 = target c s2 in
+     if ts1 == ts2 then (
+       pc.link <- ts1;
+       c.num_rems <- c.num_rems - 1;
+       acc
+     ) else
+       pc::acc
+  | _  -> raise (BugOnPC (lab_i pc)) (* ICOND expected *)
+
+(* repeat [try_change_cond] until no condition is changed into a branch *)
+let rec repeat_change_cond c =
+  c.iter_num <- c.iter_num + 1;
+  debug "++ RTLTunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
+  let old = c.num_rems in
+  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+  let curr = c.num_rems in
+  let continue =
+    match limit_tunneling with
+    | Some n -> curr < old && c.iter_num < n
+    | None -> curr < old
+  in
+  if continue
+  then repeat_change_cond c
+
+
+(* compute the final distance of each nop nodes to its target *)
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | INOP p -> 1 + dist p
+       | ICOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let final_export f c =
+  let count = ref 0 in
+  let filter_nops_init_dist _ n acc =
+    let tn = target c n in
+    if tn == n
+    then (
+      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+      acc
+    ) else (
+      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+      count := !count+1;
+      n::acc
+    )
+  in
+  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+  debug "* RTLTunneling.branch_target: [stats] initial number of nops = %d\n" !nopcounter;
+  debug "* RTLTunneling.branch_target: [stats] final number of eliminated nops = %d\n" !count;
+  res
+
+(*********************************************)
+(*** START: printing and debugging functions *)
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+let print_instr c println (pc, i) =
+  match i with
+  | Inop s -> (if println then debug "\n"); debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+  | Icond (_, _, s1, s2, _) -> (if println then debug "\n"); debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+  | _ -> debug "%d " pc; true
+
+
+let print_cfg f c  =
+  let a = Array.of_list (PTree.fold (fun acc pc i -> (P.to_int pc,i)::acc) f.fn_code []) in
+  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+  let ep = P.to_int f.fn_entrypoint in
+  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+  let println = Array.fold_left (print_instr c) false a in
+  (if println then debug "\n");debug "remaining cond:";
+  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+  debug "\n"
+
+(*************************************************************)
+(* Copy-paste of the extracted code of the verifier          *)
+(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
+
+let check_instr td pc i =
+  match PTree.get pc td with
+  | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in begin
+        match i with
+        | Inop s ->
+          let (ts, ds) = get td s in
+          if peq tpc ts
+          then if zlt ds dpc0
+               then ()
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | Icond (_, _, s1, s2, _) ->
+          let (ts1, ds1) = get td s1 in
+          let (ts2, ds2) = get td s2 in
+          if peq tpc ts1
+          then if peq tpc ts2
+               then if zlt ds1 dpc0
+                    then if zlt ds2 dpc0
+                         then ()
+                         else raise (BugOnPC (P.to_int pc))
+                    else raise (BugOnPC (P.to_int pc))
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | _ ->
+          raise (BugOnPC (P.to_int pc)) end
+  | None -> ()
+
+(** val check_code : coq_UF -> code -> unit res **)
+
+let check_code td c =
+  PTree.fold (fun _ pc i -> check_instr td pc i) c (())
+
+(*** END: copy-paste & debugging functions *******)
+
+let branch_target f =
+  debug "* RTLTunneling.branch_target: starting on a new function\n";
+  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
+  repeat_change_cond c;
+  let res = final_export f c in
+  if !debug_flag then (
+    try
+      check_code res f.fn_code;
+      if final_dump then print_cfg f c;
+    with e -> (
+      print_cfg f c;
+      check_code res f.fn_code
+    )
+  );
+  res
-- 
cgit 


From 90375dc090b19cf8202903754d7f47e8d568d9f8 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Mon, 14 Jun 2021 15:37:50 +0200
Subject: Add RTL Tunneling as a pass

---
 backend/RTLTunneling.v      | 1 +
 backend/RTLTunnelingproof.v | 5 +++++
 tools/compiler_expand.ml    | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index e6901a9f..049160fd 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -25,6 +25,7 @@ Definition UF := PTree.t (node * Z).
 
 (* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
 Axiom branch_target: RTL.function -> UF.
+Extract Constant branch_target => "RTLTunnelingaux.branch_target".
 (* TODO: add an extraction command to link branch_target with its implementation *)
 
 Local Open Scope error_monad_scope.
diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index 7f6696ad..14a2c037 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -149,6 +149,11 @@ Definition match_prog (p tp: program) :=
    *     `p` et `tp` sont les programmes donc on doit dire s'ils match
    *)
 
+ (**) Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
 
 
 Section PRESERVATION.
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index ddb3c21a..265d0bcf 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -46,7 +46,8 @@ TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for
 PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3";
 PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode";
 TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap";
-PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"
+PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob";
+PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling"
 |];;
 
 let post_rtl_passes =
-- 
cgit 


From bff4e6ff0b782619b6fcc18751fa575cbb11de68 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 14 Jun 2021 17:39:58 +0200
Subject: was very wrong, fixing

---
 aarch64/PostpassSchedulingOracle.ml |  2 +-
 aarch64/PrepassSchedulingOracle.ml  | 11 ++++--
 scheduling/InstructionScheduler.ml  |  7 ++--
 scheduling/InstructionScheduler.mli |  6 +--
 scheduling/RTLpathScheduleraux.ml   | 74 ++++++++++++++++++++++++++++++++-----
 5 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index 867341ca..6f784238 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -509,7 +509,7 @@ let build_problem bb =
     resource_bounds = opweights.pipelined_resource_bounds;
     live_regs_entry = Registers.Regset.empty; (* unused here *)
     typing = (fun x -> AST.Tint);                (* unused here *)
-    pressure_deltas = [| [| |] |] ;
+    reference_counting = None;
     instruction_usages = instruction_usages bb;
     latency_constraints = latency_constraints bb;
   }
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 19f05749..fe757c99 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -202,6 +202,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
   !latency_constraints;;
 
 
+(** useless  *)
 let get_pressure_deltas (seqa : (instruction * Regset.t) array)
   (typing : RTLtyping.regenv)
     : int array array =
@@ -454,13 +455,13 @@ let get_alias_dependencies seqa =
  *)
 
 let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
-      (typing : RTLtyping.regenv) seqa =
+      (typing : RTLtyping.regenv) reference_counting seqa =
   let simple_deps = get_simple_dependencies opweights seqa in
   { max_latency = -1;
     resource_bounds = opweights.pipelined_resource_bounds;
     live_regs_entry = live_entry_regs;
     typing = typing;
-    pressure_deltas = get_pressure_deltas seqa typing;
+    reference_counting = Some reference_counting;
       instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
@@ -492,7 +493,8 @@ let prepass_scheduler_by_name name problem early_ones =
   
 let schedule_sequence (seqa : (instruction*Regset.t) array)
       (live_regs_entry : Registers.Regset.t)
-      (typing : RTLtyping.regenv) =
+      (typing : RTLtyping.regenv)
+      reference =
   let opweights = OpWeights.get_opweights () in
   try
     if (Array.length seqa) <= 1
@@ -502,7 +504,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array)
       let nr_instructions = Array.length seqa in
       (if !Clflags.option_debug_compcert > 6
        then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
-      let problem = define_problem opweights live_regs_entry typing seqa in
+      let problem = define_problem opweights live_regs_entry
+                      typing reference seqa in
       (if !Clflags.option_debug_compcert > 7
        then (print_sequence stdout (Array.map fst seqa);
              print_problem stdout problem));
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index a069de59..08349f60 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -35,7 +35,8 @@ type problem = {
   resource_bounds : int array;
   live_regs_entry : Registers.Regset.t;
   typing : RTLtyping.regenv;
-  pressure_deltas : int array array;
+  reference_counting : ((Registers.reg, int * int) Hashtbl.t
+                       * (Registers.reg list array)) option;
   instruction_usages : int array array;
   latency_constraints : latency_constraint list;
   };;
@@ -370,7 +371,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
     (Registers.Regset.elements live_regs_entry);
 
   
-  let pressures = problem.pressure_deltas in
+  let pressures = [| [| |] |] in
   
   let priorities = critical_paths successors in
   
@@ -629,7 +630,7 @@ let reverse_problem problem =
        with creating a reverse scheduler aware of reg press *)
     
     typing = problem.typing;
-    pressure_deltas = [| [| |] |] ;
+    reference_counting = problem.reference_counting;
     instruction_usages = Array.init (nr_instructions + 1)
       (fun i ->
         if i=0
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index e7f9e7db..9b6f7a3c 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -29,9 +29,9 @@ type problem = {
     typing : RTLtyping.regenv;
     (** Register type map. *)
     
-    pressure_deltas : int array array;
-    (** At index (i, j), the pressure delta for instruction i, for
-    register class j. *)
+    reference_counting : ((Registers.reg, int * int) Hashtbl.t
+                       * (Registers.reg list array)) option;
+    (** See RTLpathScheduleraux.reference_counting. *)
     
     instruction_usages: int array array;
     (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index e04e7c23..02e0c769 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -72,7 +72,60 @@ let get_superblocks code entry pm typing =
     lsb
 end
 
+(** the useful one. Returns a hashtable with bindings of form 
+ ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
+ ** [t] is its class (according to [typing]), and [n] the number of
+ ** times it's referenced as an argument in instructions of [seqa] ;
+ ** and an arrray containg the argument regset of each instruction *)
+let reference_counting (seqa : (instruction * Regset.t) array)
+      (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) :
+      (Registers.reg, int * int) Hashtbl.t * Registers.reg list array =
+  let retl = Hashtbl.create 42 in
+  let retr = Array.make (Array.length seqa) [] in
+  List.iter (fun reg ->
+      Hashtbl.add retl
+        reg (Machregsaux.class_of_type (typing reg), 1)
+    ) (Registers.Regset.elements out_regs);
+  let add_reg reg = 
+    match Hashtbl.find_opt retl reg with
+    | Some (t, n) -> Hashtbl.add retl reg (t, n+1)
+    | None -> Hashtbl.add retl reg (Machregsaux.class_of_type
+                                    (typing reg), 1)
+  in
+  Array.iteri (fun i  (ins, _) ->
+      match ins with
+      | Iop(_,args,_,_) | Iload(_,_,_,args,_,_)
+      | Icond(_,args,_,_,_) ->
+         List.iter (add_reg) args;
+         retr.(i) <- args
+      | Istore(_,_,args,src,_) ->
+         List.iter (add_reg) args;
+         add_reg src;
+         retr.(i) <- src::args
+      | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) ->
+         List.iter (add_reg) args;
+         retr.(i) <- (match fn with
+                     | Datatypes.Coq_inl reg ->
+                        add_reg reg;
+                        reg::args
+                     | _ -> args)
+      | Ibuiltin(_,args,_,_) ->
+         let rec bar = function
+           | AST.BA r -> add_reg r;
+                        retr.(i) <- r::retr.(i)
+           | AST.BA_splitlong  (hi, lo) | AST.BA_addptr (hi, lo) ->
+              bar hi; bar lo
+           | _ -> ()
+         in
+         List.iter (bar) args
+      | Ijumptable (reg,_) | Ireturn (Some reg) ->
+         add_reg reg;
+         retr.(i) <- [reg]
+      | _ -> ()
+    ) seqa;
+  retl, retr
 
+  
 let get_live_regs_entry (sb : superblock) code =
   (if !Clflags.option_debug_compcert > 6
    then debug_flag := true);
@@ -164,17 +217,20 @@ let schedule_superblock sb code =
          | None -> 1 in
     debug "hello\n";
     let live_regs_entry = get_live_regs_entry sb code in
+    let seqa = 
+      Array.map (fun i ->
+          (match PTree.get i code with
+           | Some ii -> ii
+           | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+          (match PTree.get i sb.liveins with
+           | Some s -> s
+           | None -> Regset.empty))
+        (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in
     match PrepassSchedulingOracle.schedule_sequence
-            (Array.map (fun i ->
-                 (match PTree.get i code with
-                 | Some ii -> ii
-                 | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
-                 (match PTree.get i sb.liveins with
-                 | Some s -> s
-                 | None -> Regset.empty))
-               (Array.sub sb.instructions 0 (nr_instr-trailer_length)))
+            seqa
             live_regs_entry
-            sb.typing with
+            sb.typing
+            (reference_counting seqa sb.s_output_regs sb.typing) with
     | None -> sb.instructions
     | Some order ->
        let ins' =
-- 
cgit 


From 19464b3992eadf7670acc7231896103ab54885e5 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 15 Jun 2021 12:07:43 +0200
Subject: fixing

Still need to find what to do when pressure is high but there are no
instructions available that decrease it
---
 scheduling/InstructionScheduler.ml    | 83 ++++++++++++++++++++++++++++++-----
 scheduling/InstructionScheduler.mli   |  2 +-
 scheduling/RTLpathScheduleraux.ml     | 47 +++++++++++++++-----
 test/nardino/scheduling/spille_forw.c | 60 ++++++++++++-------------
 4 files changed, 139 insertions(+), 53 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 08349f60..19bfaeb0 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -36,7 +36,7 @@ type problem = {
   live_regs_entry : Registers.Regset.t;
   typing : RTLtyping.regenv;
   reference_counting : ((Registers.reg, int * int) Hashtbl.t
-                       * (Registers.reg list array)) option;
+                       * ((Registers.reg * bool) list array)) option;
   instruction_usages : int array array;
   latency_constraints : latency_constraint list;
   };;
@@ -363,15 +363,37 @@ let reg_pres_scheduler (problem : problem) : solution option =
   let regs_thresholds = Array.init nr_types_regs
                           (fun i -> 5) in
   (* placeholder value *)
+  
+  let class_r r =
+    Machregsaux.class_of_type (problem.typing r) in
+  
+  let live_regs = Hashtbl.create 42 in
 
   List.iter (fun r -> let classe = Machregsaux.class_of_type
                                   (problem.typing r) in
                    available_regs.(classe)
-                   <- available_regs.(classe) - 1)
+                   <- available_regs.(classe) - 1;
+                   Hashtbl.add live_regs r classe)
     (Registers.Regset.elements live_regs_entry);
 
+
+  let counts, mentions =
+    match problem.reference_counting with
+    | Some (l, r) -> l, r
+    | None -> assert false
+  in
   
-  let pressures = [| [| |] |] in
+  let fold_delta i  = (fun a (r, b) ->
+    a +
+      if class_r r <> i then 0 else
+        (if b then
+           if (Hashtbl.find counts r = (i, 1))
+           then 1 else 0
+         else
+           match Hashtbl.find_opt live_regs r with
+           | None -> -1
+           | Some t -> 0
+  )) in
   
   let priorities = critical_paths successors in
   
@@ -396,7 +418,14 @@ let reg_pres_scheduler (problem : problem) : solution option =
       let iter = MSet.iter
 
       let compare_regs i x y =
-        match pressures.(y).(i) - pressures.(x).(i) with
+        let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
+        print_int y;
+        print_string " ";
+        print_int pyi;
+        print_newline ();
+        flush stdout;
+        let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
+        match pyi - pxi with
         | 0 -> (match priorities.(y) - priorities.(x) with
                | 0 -> x - y
                | z -> z)
@@ -404,10 +433,13 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
       (** t is the register class *)
       let sched_CSR t ready usages =
+        print_string "looking for max delta";
+        print_newline ();
+        flush stdout;
         let result = ref (-1) in
         iter (fun i ->
             if vector_less_equal usages.(i) current_resources
-            then if !result = -1 || (compare_regs t !result i < 0)
+            then if !result = -1 || (compare_regs t !result i > 0)
                  then result := i) ready;
         !result
     end
@@ -451,13 +483,28 @@ let reg_pres_scheduler (problem : problem) : solution option =
     let result = ref (-1) in
     try
       Array.iteri (fun i avlregs ->
+          print_string "avlregs: ";
+          print_int i;
+          print_string " ";
+          print_int avlregs;
+          print_newline ();
+          flush stdout;
           if avlregs <= regs_thresholds.(i)
           then (
             let maybe = InstrSet.sched_CSR i ready usages in
-            (* print_int maybe;
-             * print_newline ();
-             * flush stdout; *)
-            if maybe > 0 && pressures.(maybe).(i) < 0 then
+            print_string "maybe\n";
+            print_int maybe;
+            print_newline ();
+            flush stdout;
+            if maybe > 0 &&
+                 let delta = 
+                 List.fold_left (fold_delta i) 0 mentions.(maybe) in
+                 print_string "delta ";
+                 print_int delta;
+                 print_newline ();
+                 flush stdout;
+                 delta
+                 >= 0 then
               (vector_subtract usages.(maybe) current_resources;
                result := maybe;
                raise Exit))) available_regs;
@@ -470,7 +517,23 @@ let reg_pres_scheduler (problem : problem) : solution option =
       -1
     with Exit ->
           if !result <> -1 then
-            vector_subtract pressures.(!result) available_regs;
+            (List.iter (fun (r,b) ->
+                 if b then
+                   match Hashtbl.find_opt counts r with
+                   | None -> assert false
+                   | Some (t, n) ->
+                      Hashtbl.remove counts r;
+                      (if n = 1 then
+                         available_regs.(t)
+                         <- available_regs.(t) + 1)
+                 else
+                   let t = class_r r in
+                   match Hashtbl.find_opt live_regs r with
+                   | None -> (Hashtbl.add live_regs r t;
+                             available_regs.(t)
+                             <- available_regs.(t) - 1)
+                   | Some i -> ()
+               ) mentions.(!result));
           !result in
   
   while !current_time < max_time
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index 9b6f7a3c..b5a5463b 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -30,7 +30,7 @@ type problem = {
     (** Register type map. *)
     
     reference_counting : ((Registers.reg, int * int) Hashtbl.t
-                       * (Registers.reg list array)) option;
+                       * ((Registers.reg * bool) list array)) option;
     (** See RTLpathScheduleraux.reference_counting. *)
     
     instruction_usages: int array array;
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 02e0c769..9c3ff689 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -76,12 +76,17 @@ end
  ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
  ** [t] is its class (according to [typing]), and [n] the number of
  ** times it's referenced as an argument in instructions of [seqa] ;
- ** and an arrray containg the argument regset of each instruction *)
+ ** and an arrray containg the list of regs referenced by each
+ ** instruction, with a boolean to know whether it's as arg or dest *)
 let reference_counting (seqa : (instruction * Regset.t) array)
       (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) :
-      (Registers.reg, int * int) Hashtbl.t * Registers.reg list array =
+      (Registers.reg, int * int) Hashtbl.t *
+        (Registers.reg * bool) list  array =
   let retl = Hashtbl.create 42 in
   let retr = Array.make (Array.length seqa) [] in
+  (* retr.(i) : (r, b) -> (r', b') -> ...
+     where b = true if seen as arg, false if seen as dest
+   *)
   List.iter (fun reg ->
       Hashtbl.add retl
         reg (Machregsaux.class_of_type (typing reg), 1)
@@ -92,35 +97,53 @@ let reference_counting (seqa : (instruction * Regset.t) array)
     | None -> Hashtbl.add retl reg (Machregsaux.class_of_type
                                     (typing reg), 1)
   in
+  let map_true = List.map (fun r -> r, true) in
   Array.iteri (fun i  (ins, _) ->
       match ins with
-      | Iop(_,args,_,_) | Iload(_,_,_,args,_,_)
+      | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) ->
+         List.iter (add_reg) args;
+         retr.(i) <- (dest, false)::(map_true args)
       | Icond(_,args,_,_,_) ->
          List.iter (add_reg) args;
-         retr.(i) <- args
+         retr.(i) <- map_true args
       | Istore(_,_,args,src,_) ->
          List.iter (add_reg) args;
          add_reg src;
-         retr.(i) <- src::args
-      | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) ->
+         retr.(i) <- (src, true)::(map_true args)
+      | Icall(_,fn,args,dest,_) ->
+         List.iter (add_reg) args;
+         retr.(i) <- (match fn with
+                     | Datatypes.Coq_inl reg ->
+                        add_reg reg;
+                        (dest,false)::(reg, true)::(map_true args)
+                     | _ -> (dest,false)::(map_true args))
+
+        | Itailcall(_,fn,args) ->
          List.iter (add_reg) args;
          retr.(i) <- (match fn with
                      | Datatypes.Coq_inl reg ->
                         add_reg reg;
-                        reg::args
-                     | _ -> args)
-      | Ibuiltin(_,args,_,_) ->
+                        (reg, true)::(map_true args)
+                     | _ -> map_true args)
+      | Ibuiltin(_,args,dest,_) ->
          let rec bar = function
            | AST.BA r -> add_reg r;
-                        retr.(i) <- r::retr.(i)
+                        retr.(i) <- (r, true)::retr.(i)
            | AST.BA_splitlong  (hi, lo) | AST.BA_addptr (hi, lo) ->
               bar hi; bar lo
            | _ -> ()
          in
-         List.iter (bar) args
+         List.iter (bar) args;
+         let rec bad = function
+           | AST.BR r -> retr.(i) <- (r, false)::retr.(i)
+           | AST.BR_splitlong (hi, lo) ->
+              bad hi; bad lo
+           | _ -> ()
+         in
+         bad dest;
       | Ijumptable (reg,_) | Ireturn (Some reg) ->
          add_reg reg;
-         retr.(i) <- [reg]
+         retr.(i) <- [reg, true]
       | _ -> ()
     ) seqa;
   retl, retr
diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c
index 770dfce5..db88588b 100644
--- a/test/nardino/scheduling/spille_forw.c
+++ b/test/nardino/scheduling/spille_forw.c
@@ -91,36 +91,36 @@ int f(int n, float * arr) {
 	float a30 = (float) n+29;
 	float b30 = 2.*a30;
 	c += a30;
-	arr[0] = a1;
-	arr[1] = a2;
-	arr[2] = a3;
-	arr[3] = a4;
-	arr[4] = a5;
-	arr[5] = a6;
-	arr[6] = a7;
-	arr[7] = a8;
-	arr[8] = a9;
-	arr[9] = a10;
-	arr[10] = a11;
-	arr[11] = a12;
-	arr[12] = a13;
-	arr[13] = a14;
-	arr[14] = a15;
-	arr[15] = a16;
-	arr[16] = a17;
-	arr[17] = a18;
-	arr[18] = a19;
-	arr[19] = a20;
-	arr[20] = a21;
-	arr[21] = a22;
-	arr[22] = a23;
-	arr[23] = a24;
-	arr[24] = a25;
-	arr[25] = a26;
-	arr[26] = a27;
-	arr[27] = a28;
-	arr[28] = a29;
-	arr[29] = a30;
+	/* arr[0] = a1; */
+	/* arr[1] = a2; */
+	/* arr[2] = a3; */
+	/* arr[3] = a4; */
+	/* arr[4] = a5; */
+	/* arr[5] = a6; */
+	/* arr[6] = a7; */
+	/* arr[7] = a8; */
+	/* arr[8] = a9; */
+	/* arr[9] = a10; */
+	/* arr[10] = a11; */
+	/* arr[11] = a12; */
+	/* arr[12] = a13; */
+	/* arr[13] = a14; */
+	/* arr[14] = a15; */
+	/* arr[15] = a16; */
+	/* arr[16] = a17; */
+	/* arr[17] = a18; */
+	/* arr[18] = a19; */
+	/* arr[19] = a20; */
+	/* arr[20] = a21; */
+	/* arr[21] = a22; */
+	/* arr[22] = a23; */
+	/* arr[23] = a24; */
+	/* arr[24] = a25; */
+	/* arr[25] = a26; */
+	/* arr[26] = a27; */
+	/* arr[27] = a28; */
+	/* arr[28] = a29; */
+	/* arr[29] = a30; */
 	return c +
 		b1+
 		b2+
-- 
cgit 


From 87c82b6fcf2bf825a8c60fc6a95498aac9f826d4 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 15 Jun 2021 14:44:56 +0200
Subject: kinda fixed

Spills are definitely reduced, but lots of arbitrary in there:
See previous commit: need to determine what to do if pressure is too
high but no schedulable instruction can reduce it. For now, advance
time for at most 5 cycles, if still no suitable instruction, go back
to CSP
---
 scheduling/InstructionScheduler.ml | 62 ++++++++++++++++++++++++--------------
 scheduling/RTLpathScheduleraux.ml  | 20 ++++++++++--
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 19bfaeb0..72222022 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -478,6 +478,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
        ready.(!current_time) <- InstrSet.empty));
     incr current_time
   in
+
+  (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
+  let cnt = ref 0 in
   
   let attempt_scheduling ready usages =
     let result = ref (-1) in
@@ -488,8 +491,11 @@ let reg_pres_scheduler (problem : problem) : solution option =
           print_string " ";
           print_int avlregs;
           print_newline ();
+          print_string "live regs: ";
+          print_int (Hashtbl.length live_regs);
+          print_newline ();
           flush stdout;
-          if avlregs <= regs_thresholds.(i)
+          if !cnt < 5 && avlregs <= regs_thresholds.(i)
           then (
             let maybe = InstrSet.sched_CSR i ready usages in
             print_string "maybe\n";
@@ -506,8 +512,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
                  delta
                  >= 0 then
               (vector_subtract usages.(maybe) current_resources;
-               result := maybe;
-               raise Exit))) available_regs;
+               result := maybe)
+            else incr cnt;
+            raise Exit)) available_regs;
       InstrSet.iter (fun i ->
           if vector_less_equal usages.(i) current_resources
           then (
@@ -516,25 +523,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
             raise Exit)) ready;
       -1
     with Exit ->
-          if !result <> -1 then
-            (List.iter (fun (r,b) ->
-                 if b then
-                   match Hashtbl.find_opt counts r with
-                   | None -> assert false
-                   | Some (t, n) ->
-                      Hashtbl.remove counts r;
-                      (if n = 1 then
-                         available_regs.(t)
-                         <- available_regs.(t) + 1)
-                 else
-                   let t = class_r r in
-                   match Hashtbl.find_opt live_regs r with
-                   | None -> (Hashtbl.add live_regs r t;
-                             available_regs.(t)
-                             <- available_regs.(t) - 1)
-                   | Some i -> ()
-               ) mentions.(!result));
-          !result in
+      !result in
   
   while !current_time < max_time
   do
@@ -545,6 +534,35 @@ let reg_pres_scheduler (problem : problem) : solution option =
               problem.instruction_usages with
       | -1 -> advance_time()
       | i -> (assert(times.(i) < 0);
+            (print_string "INSTR ISSUED: ";
+             print_int i;
+             print_newline ();
+             flush stdout;
+             cnt := 0;
+             List.iter (fun (r,b) ->
+                 if b then
+                   (match Hashtbl.find_opt counts r with
+                    | None -> assert false
+                    | Some (t, n) ->
+                       Hashtbl.remove counts r;
+                       if n = 1 then
+                         (print_string "yaaaaaaaaaaaas ";
+                          print_int (Camlcoq.P.to_int r);
+                          print_newline ();
+                          Hashtbl.remove live_regs r;
+                          available_regs.(t)
+                          <- available_regs.(t) + 1))
+                 else
+                   let t = class_r r in
+                   match Hashtbl.find_opt live_regs r with
+                   | None -> (print_string "noooooooooo ";
+                             print_int (Camlcoq.P.to_int r);
+                             print_newline ();
+                             Hashtbl.add live_regs r t;
+                             available_regs.(t)
+                             <- available_regs.(t) - 1)
+                   | Some i -> ()
+               ) mentions.(i));
              times.(i) <- !current_time;
              ready.(!current_time)
              <- InstrSet.remove i (ready.(!current_time));
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 9c3ff689..8df3edbc 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -72,7 +72,7 @@ let get_superblocks code entry pm typing =
     lsb
 end
 
-(** the useful one. Returns a hashtable with bindings of form 
+(** the useful one. Returns a hashtable with bindings of shape
  ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
  ** [t] is its class (according to [typing]), and [n] the number of
  ** times it's referenced as an argument in instructions of [seqa] ;
@@ -85,7 +85,7 @@ let reference_counting (seqa : (instruction * Regset.t) array)
   let retl = Hashtbl.create 42 in
   let retr = Array.make (Array.length seqa) [] in
   (* retr.(i) : (r, b) -> (r', b') -> ...
-     where b = true if seen as arg, false if seen as dest
+   * where b = true if seen as arg, false if seen as dest
    *)
   List.iter (fun reg ->
       Hashtbl.add retl
@@ -118,7 +118,7 @@ let reference_counting (seqa : (instruction * Regset.t) array)
                         (dest,false)::(reg, true)::(map_true args)
                      | _ -> (dest,false)::(map_true args))
 
-        | Itailcall(_,fn,args) ->
+      | Itailcall(_,fn,args) ->
          List.iter (add_reg) args;
          retr.(i) <- (match fn with
                      | Datatypes.Coq_inl reg ->
@@ -146,6 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array)
          retr.(i) <- [reg, true]
       | _ -> ()
     ) seqa;
+  print_string "mentions\n";
+  Array.iteri (fun i l ->
+      print_int i;
+      print_string ": [";
+      List.iter (fun (r, b) ->
+          print_int (Camlcoq.P.to_int r);
+          print_string ":";
+          print_string (if b then "a:" else "d");
+          if b then print_int (snd (Hashtbl.find retl r));
+          print_string ", "
+        ) l;
+      print_string "]\n";
+      flush stdout;
+    ) retr;
   retl, retr
 
   
-- 
cgit 


From d68d71ae380601759927e04a773b9ed0a95ba247 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Tue, 15 Jun 2021 15:17:32 +0200
Subject: Factorise RTL Tunneling pass in compiler_expand

---
 tools/compiler_expand.ml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 265d0bcf..e45f64fa 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -13,6 +13,8 @@ type print_result = Noprint | Print of string;;
 type when_triggered = Always | Option of string;;
 type needs_require = Require | NoRequire;;
 
+let rtl_tunneling = PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling"
+
 (* FIXME - The gestion of NoRequire is a bit ugly right now. *)
 let rtl_passes =
 [|
@@ -47,7 +49,7 @@ PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"
 PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode";
 TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap";
 PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob";
-PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling"
+rtl_tunneling;
 |];;
 
 let post_rtl_passes =
-- 
cgit 


From 35b163f1c0bf1a4d09bfd0faa9ffed4b909fc8ea Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Tue, 15 Jun 2021 16:55:31 +0200
Subject: Move rtl_tunneling to a more interesting place

---
 tools/compiler_expand.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index e45f64fa..067f0e4b 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -40,6 +40,7 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3";
 TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves";
 TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves";
 PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode";
+rtl_tunneling;
 TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
 PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
 TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
@@ -49,7 +50,6 @@ PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"
 PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode";
 TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap";
 PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob";
-rtl_tunneling;
 |];;
 
 let post_rtl_passes =
-- 
cgit 


From 3408be46000279a5282658375053f9a83a88cf58 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Wed, 16 Jun 2021 16:23:52 +0200
Subject: Add Tunneling factorisation module

---
 backend/Tunnelinglibs.ml | 237 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 backend/Tunnelinglibs.ml

diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
new file mode 100644
index 00000000..ce384dac
--- /dev/null
+++ b/backend/Tunnelinglibs.ml
@@ -0,0 +1,237 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*           TODO: Proper author information                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the core functions of the tunneling passes, for both RTL
+and LTL, by using a simplified CFG as a transparent interface
+
+See [Tunneling.v] and [RTLTunneling.v]
+
+*)
+
+open Coqlib
+open Maps
+open Camlcoq
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of instructions *)
+  BRANCH of node
+| COND of node * node
+| OTHER
+and node = {
+    lab: label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+type cfg_node = (int, node) Hashtbl.t
+
+type lnode = P.t
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: cfg_node;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+module Tunneling = functor
+  (LANG: sig
+    type code_unit (* the type of a node of the code cfg (an instruction or a bblock *)
+    type funct
+  end)
+  (OPT: sig
+    val langname: string
+    val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *)
+    val debug_flag: bool ref
+    val final_dump: bool (* set to true to have a more verbose debugging *)
+  end)
+  (FUNS: sig
+    type lang
+    type code_type
+
+    (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+    val build_simplified_cfg: cfg -> cfg_node list -> lnode ->
+      LANG.code_unit -> cfg_node list
+
+    val print_code_unit: cfg -> bool -> (int * LANG.code_unit) -> bool
+
+    val fn_code: LANG.funct -> LANG.code_unit PTree.t
+    val fn_entrypoint: LANG.funct -> lnode
+
+    val check_code_unit: LANG.code_unit PTree.t -> lnode -> LANG.code_unit -> unit
+  end)
+-> struct
+
+let debug fmt =
+  if !OPT.debug_flag then Printf.eprintf fmt
+  else Printf.ifprintf stderr fmt
+
+exception BugOnPC of int
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | COND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- BRANCH s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+
+
+(* try to change a condition into a branch
+[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
+*)
+let try_change_cond c acc pc =
+  match pc.inst with
+  | COND(s1,s2) ->
+     let ts1 = target c s1 in
+     let ts2 = target c s2 in
+     if ts1 == ts2 then (
+       pc.link <- ts1;
+       c.num_rems <- c.num_rems - 1;
+       acc
+     ) else
+       pc::acc
+  | _  -> raise (BugOnPC (lab_i pc)) (* COND expected *)
+
+(* repeat [try_change_cond] until no condition is changed into a branch *)
+let rec repeat_change_cond c =
+  c.iter_num <- c.iter_num + 1;
+  debug "++ %sTunneling.branch_target %d: remaining number of conds to consider
+  = %d\n" OPT.langname (c.iter_num) (c.num_rems);
+  let old =  c.num_rems in
+  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+  let curr = c.num_rems in
+  let continue =
+    match OPT.limit_tunneling with
+    | Some n -> curr < old && c.iter_num < n
+    | None -> curr < old
+  in
+  if continue
+  then repeat_change_cond c
+
+
+(* compute the final distance of each nop nodes to its target *)
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | BRANCH p -> 1 + dist p
+       | COND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let final_export f c =
+  let count = ref 0 in
+  let filter_nops_init_dist _ n acc =
+    let tn = target c n in
+    if tn == n
+    then (
+      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+      acc
+    ) else (
+      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+      count := !count+1;
+      n::acc
+    )
+  in
+  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+  debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n"
+  OPT.langname !count;
+  res
+
+(*********************************************)
+(*** START: printing and debugging functions *)
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+let print_cfg (f: LANG.funct) c  =
+  let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in
+  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+  let ep = P.to_int (FUNS.fn_entrypoint f) in
+  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+  let println = Array.fold_left (FUNS.print_code_unit c) false a in
+  (if println then debug "\n");debug "remaining cond:";
+  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+  debug "\n"
+
+(*************************************************************)
+(* Copy-paste of the extracted code of the verifier          *)
+(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+(** val check_code : coq_UF -> code -> unit res **)
+
+let check_code td c =
+  PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (())
+
+(*** END: copy-paste & debugging functions *******)
+
+end
-- 
cgit 


From 67bc93934e939e57c80ade4c37aaba1535222fa2 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Wed, 16 Jun 2021 18:19:38 +0200
Subject: Use Tunnelinglibs in RTLTunnelingaux

---
 backend/RTLTunnelingaux.ml | 351 +++++++++++++--------------------------------
 backend/Tunnelinglibs.ml   |  37 +++--
 2 files changed, 123 insertions(+), 265 deletions(-)

diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml
index a30b43cf..5fe327d4 100644
--- a/backend/RTLTunnelingaux.ml
+++ b/backend/RTLTunnelingaux.ml
@@ -24,261 +24,106 @@ open Coqlib
 open RTL
 open Maps
 open Camlcoq
-
-let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
-let debug_flag = ref true
-let final_dump = false   (* set to true to have a more verbose debugging *)
-
-let debug fmt =
-  if !debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
-
-exception BugOnPC of int
+open Tunnelinglibs
 
 let nopcounter = ref 0
 
-(* type of labels in the cfg *)
-type label = int * P.t
-
-(* instructions under analyzis *)
-type simple_inst = (* a simplified view of RTL instructions *)
-  INOP of node
-| ICOND of node * node
-| OTHER
-and node = {
-    lab : label;
-    mutable inst: simple_inst;
-    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
-    mutable dist: int;
-    mutable tag: int
-  }
-
-(* type of the (simplified) CFG *)
-type cfg = {
-    nodes: (int, node) Hashtbl.t;
-    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
-    mutable num_rems: int;
-    mutable iter_num: int (* number of iterations in elimination of conditions *)
-  }
-
-let lab_i (n: node): int = fst n.lab
-let lab_p (n: node): P.t = snd n.lab
-
-let rec target c n = (* inspired from the "find" of union-find algorithm *)
-  match n.inst with
-  | ICOND(s1,s2) ->
-     if n.link != n
-     then update c n
-     else if n.tag < c.iter_num then (
-       (* we try to change the condition ... *)
-       n.tag <- c.iter_num; (* ... but at most once by iteration *)
-       let ts1 = target c s1 in
-       let ts2 = target c s2 in
-       if ts1 == ts2 then (n.link <- ts1; ts1) else n
-     ) else n
-  | _ ->
-     if n.link != n
-     then update c n
-     else n
-and update c n =
-  let t = target c n.link in
-  n.link <- t; t
-
-let get_node c p =
-  let li = P.to_int p in
-  try
-    Hashtbl.find c.nodes li
-  with
-    Not_found ->
-      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
-      Hashtbl.add c.nodes li n;
-      n
-
-let set_branch c p s =
-  let li = P.to_int p in
-  try
-    let n = Hashtbl.find c.nodes li in
-    n.inst <- INOP s;
-    n.link <- target c s
-  with
-    Not_found ->
-      let n = { lab = (li,p); inst = INOP s; link = target c s; dist = 0; tag = 0 } in
-      Hashtbl.add c.nodes li n
-
-
-(* build [c.nodes] and accumulate conditions in [acc] *)
-let build_simplified_cfg c acc pc i =
-  match i with
-  | Inop s ->
-     let ns = get_node c s in
-     set_branch c pc ns;
-     incr nopcounter;
-     acc
-  | Icond (_, _, s1, s2, _) ->
-     c.num_rems <- c.num_rems + 1;
-     let ns1 = get_node c s1 in
-     let ns2 = get_node c s2 in
-     let npc = get_node c pc in
-     npc.inst <- ICOND(ns1, ns2);
-     npc::acc
-  | _ -> acc
-
-(* try to change a condition into a branch
-[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
-*)
-let try_change_cond c acc pc =
-  match pc.inst with
-  | ICOND(s1,s2) ->
-     let ts1 = target c s1 in
-     let ts2 = target c s2 in
-     if ts1 == ts2 then (
-       pc.link <- ts1;
-       c.num_rems <- c.num_rems - 1;
+module LANG = struct
+  type code_unit = RTL.instruction
+  type funct = RTL.coq_function
+end
+
+module OPT = struct
+  let langname = "RTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module rec T: sig
+  val get_node: cfg -> positive -> node
+  val set_branch: cfg -> positive -> node -> unit
+  val debug: ('a, out_channel, unit) format -> 'a
+  val string_of_labeli: ('a, node) Hashtbl.t -> 'a -> string
+  exception BugOnPC of int
+  val branch_target: RTL.coq_function -> (positive * Z.t) PTree.t
+
+end = Tunnelinglibs.Tunneling(LANG)(OPT)(FUNS)
+
+and FUNS: sig
+  val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
+
+  val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
+
+  val fn_code: LANG.funct -> LANG.code_unit PTree.t
+  val fn_entrypoint: LANG.funct -> positive
+
+  val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
+end = struct
+  let build_simplified_cfg c acc pc i =
+    match i with
+    | Inop s ->
+       let ns = T.get_node c s in
+       T.set_branch c pc ns;
+       incr nopcounter;
        acc
-     ) else
-       pc::acc
-  | _  -> raise (BugOnPC (lab_i pc)) (* ICOND expected *)
-
-(* repeat [try_change_cond] until no condition is changed into a branch *)
-let rec repeat_change_cond c =
-  c.iter_num <- c.iter_num + 1;
-  debug "++ RTLTunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
-  let old = c.num_rems in
-  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
-  let curr = c.num_rems in
-  let continue =
-    match limit_tunneling with
-    | Some n -> curr < old && c.iter_num < n
-    | None -> curr < old
-  in
-  if continue
-  then repeat_change_cond c
-
-
-(* compute the final distance of each nop nodes to its target *)
-let undef_dist = -1
-let self_dist = undef_dist-1
-let rec dist n =
-  if n.dist = undef_dist
-  then (
-    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
-    n.dist <-
-      (match n.inst with
-       | OTHER -> 0
-       | INOP p -> 1 + dist p
-       | ICOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
-    n.dist
-  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
-    else n.dist
-
-let final_export f c =
-  let count = ref 0 in
-  let filter_nops_init_dist _ n acc =
-    let tn = target c n in
-    if tn == n
-    then (
-      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
-      acc
-    ) else (
-      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
-      count := !count+1;
-      n::acc
-    )
-  in
-  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
-  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
-  debug "* RTLTunneling.branch_target: [stats] initial number of nops = %d\n" !nopcounter;
-  debug "* RTLTunneling.branch_target: [stats] final number of eliminated nops = %d\n" !count;
-  res
-
-(*********************************************)
-(*** START: printing and debugging functions *)
-
-let string_of_labeli nodes ipc =
-  try
-    let pc = Hashtbl.find nodes ipc in
-    if pc.link == pc
-    then Printf.sprintf "(Target@%d)" (dist pc)
-    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
-  with
-    Not_found -> ""
-
-let print_instr c println (pc, i) =
-  match i with
-  | Inop s -> (if println then debug "\n"); debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
-  | Icond (_, _, s1, s2, _) -> (if println then debug "\n"); debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
-  | _ -> debug "%d " pc; true
-
-
-let print_cfg f c  =
-  let a = Array.of_list (PTree.fold (fun acc pc i -> (P.to_int pc,i)::acc) f.fn_code []) in
-  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
-  let ep = P.to_int f.fn_entrypoint in
-  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
-  let println = Array.fold_left (print_instr c) false a in
-  (if println then debug "\n");debug "remaining cond:";
-  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
-  debug "\n"
-
-(*************************************************************)
-(* Copy-paste of the extracted code of the verifier          *)
-(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-let get td pc =
-  match PTree.get pc td with
-  | Some p -> let (t0, d) = p in (t0, d)
-  | None -> (pc, Z.of_uint 0)
-
-let check_instr td pc i =
-  match PTree.get pc td with
-  | Some p ->
-      let (tpc, dpc) = p in
-      let dpc0 = dpc in begin
-        match i with
-        | Inop s ->
-          let (ts, ds) = get td s in
-          if peq tpc ts
-          then if zlt ds dpc0
-               then ()
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | Icond (_, _, s1, s2, _) ->
-          let (ts1, ds1) = get td s1 in
-          let (ts2, ds2) = get td s2 in
-          if peq tpc ts1
-          then if peq tpc ts2
-               then if zlt ds1 dpc0
-                    then if zlt ds2 dpc0
-                         then ()
-                         else raise (BugOnPC (P.to_int pc))
-                    else raise (BugOnPC (P.to_int pc))
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | _ ->
-          raise (BugOnPC (P.to_int pc)) end
-  | None -> ()
-
-(** val check_code : coq_UF -> code -> unit res **)
-
-let check_code td c =
-  PTree.fold (fun _ pc i -> check_instr td pc i) c (())
-
-(*** END: copy-paste & debugging functions *******)
+    | Icond (_, _, s1, s2, _) ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = T.get_node c s1 in
+       let ns2 = T.get_node c s2 in
+       let npc = T.get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, i) =
+    match i with
+    | Inop s -> (if println then T.debug "\n"); T.debug "%d:Inop %d %s\n" pc (P.to_int s) (T.string_of_labeli c.nodes pc); false
+    | Icond (_, _, s1, s2, _) -> (if println then T.debug "\n"); T.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (T.string_of_labeli c.nodes pc); false
+    | _ -> T.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let get td pc =
+    match PTree.get pc td with
+    | Some p -> let (t0, d) = p in (t0, d)
+    | None -> (pc, Z.of_uint 0)
+
+  let check_code_unit td pc i =
+    match PTree.get pc td with
+    | Some p ->
+        let (tpc, dpc) = p in
+        let dpc0 = dpc in begin
+          match i with
+          | Inop s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (T.BugOnPC (P.to_int pc))
+            else raise (T.BugOnPC (P.to_int pc))
+          | Icond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (T.BugOnPC (P.to_int pc))
+                      else raise (T.BugOnPC (P.to_int pc))
+                 else raise (T.BugOnPC (P.to_int pc))
+            else raise (T.BugOnPC (P.to_int pc))
+          | _ ->
+            raise (T.BugOnPC (P.to_int pc)) end
+    | None -> ()
+
+end
+
+let branch_target = T.branch_target
 
-let branch_target f =
-  debug "* RTLTunneling.branch_target: starting on a new function\n";
-  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
-  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
-  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
-  repeat_change_cond c;
-  let res = final_export f c in
-  if !debug_flag then (
-    try
-      check_code res f.fn_code;
-      if final_dump then print_cfg f c;
-    with e -> (
-      print_cfg f c;
-      check_code res f.fn_code
-    )
-  );
-  res
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
index ce384dac..e1e61d68 100644
--- a/backend/Tunnelinglibs.ml
+++ b/backend/Tunnelinglibs.ml
@@ -20,7 +20,6 @@ See [Tunneling.v] and [RTLTunneling.v]
 
 *)
 
-open Coqlib
 open Maps
 open Camlcoq
 
@@ -42,7 +41,8 @@ and node = {
 
 type cfg_node = (int, node) Hashtbl.t
 
-type lnode = P.t
+type positive = P.t
+type integer = Z.t
 
 (* type of the (simplified) CFG *)
 type cfg = {
@@ -64,19 +64,15 @@ module Tunneling = functor
     val final_dump: bool (* set to true to have a more verbose debugging *)
   end)
   (FUNS: sig
-    type lang
-    type code_type
-
     (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
-    val build_simplified_cfg: cfg -> cfg_node list -> lnode ->
-      LANG.code_unit -> cfg_node list
+    val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
 
-    val print_code_unit: cfg -> bool -> (int * LANG.code_unit) -> bool
+    val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
 
     val fn_code: LANG.funct -> LANG.code_unit PTree.t
-    val fn_entrypoint: LANG.funct -> lnode
+    val fn_entrypoint: LANG.funct -> positive
 
-    val check_code_unit: LANG.code_unit PTree.t -> lnode -> LANG.code_unit -> unit
+    val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
   end)
 -> struct
 
@@ -151,8 +147,7 @@ let try_change_cond c acc pc =
 (* repeat [try_change_cond] until no condition is changed into a branch *)
 let rec repeat_change_cond c =
   c.iter_num <- c.iter_num + 1;
-  debug "++ %sTunneling.branch_target %d: remaining number of conds to consider
-  = %d\n" OPT.langname (c.iter_num) (c.num_rems);
+  debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems);
   let old =  c.num_rems in
   c.rems <- List.fold_left (try_change_cond c) [] c.rems;
   let curr = c.num_rems in
@@ -234,4 +229,22 @@ let check_code td c =
 
 (*** END: copy-paste & debugging functions *******)
 
+let branch_target f =
+  debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname;
+  if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+  c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) [];
+  repeat_change_cond c;
+  let res = final_export f c in
+  if !OPT.debug_flag then (
+    try
+      check_code res (FUNS.fn_code f);
+      if OPT.final_dump then print_cfg f c;
+    with e -> (
+      print_cfg f c;
+      check_code res (FUNS.fn_code f)
+    )
+  );
+  res
+
 end
-- 
cgit 


From 21278bd87e89210bcc287116f6e35fc1b52d0df2 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Wed, 16 Jun 2021 20:27:31 +0200
Subject: Now working, tests show a decrease in spillage

Should still find a proper way to treat the case mentioned in earlier
commits
---
 scheduling/InstructionScheduler.ml | 92 +++++++++++++++++++-------------------
 scheduling/RTLpathScheduleraux.ml  | 28 ++++++------
 2 files changed, 61 insertions(+), 59 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 72222022..df8a4e4e 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -419,11 +419,11 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
       let compare_regs i x y =
         let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
-        print_int y;
-        print_string " ";
-        print_int pyi;
-        print_newline ();
-        flush stdout;
+        (* print_int y;
+         * print_string " ";
+         * print_int pyi;
+         * print_newline ();
+         * flush stdout; *)
         let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
         match pyi - pxi with
         | 0 -> (match priorities.(y) - priorities.(x) with
@@ -433,9 +433,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
       (** t is the register class *)
       let sched_CSR t ready usages =
-        print_string "looking for max delta";
-        print_newline ();
-        flush stdout;
+        (* print_string "looking for max delta";
+         * print_newline ();
+         * flush stdout; *)
         let result = ref (-1) in
         iter (fun i ->
             if vector_less_equal usages.(i) current_resources
@@ -445,7 +445,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
     end
   in
 
-  let max_time = bound_max_time problem in
+  let max_time = bound_max_time problem + 5*nr_instructions in
   let ready = Array.make max_time InstrSet.empty in
 
   Array.iteri (fun i preds ->
@@ -486,34 +486,34 @@ let reg_pres_scheduler (problem : problem) : solution option =
     let result = ref (-1) in
     try
       Array.iteri (fun i avlregs ->
-          print_string "avlregs: ";
-          print_int i;
-          print_string " ";
-          print_int avlregs;
-          print_newline ();
-          print_string "live regs: ";
-          print_int (Hashtbl.length live_regs);
-          print_newline ();
-          flush stdout;
+          (* print_string "avlregs: ";
+           * print_int i;
+           * print_string " ";
+           * print_int avlregs;
+           * print_newline ();
+           * print_string "live regs: ";
+           * print_int (Hashtbl.length live_regs);
+           * print_newline ();
+           * flush stdout; *)
           if !cnt < 5 && avlregs <= regs_thresholds.(i)
           then (
             let maybe = InstrSet.sched_CSR i ready usages in
-            print_string "maybe\n";
-            print_int maybe;
-            print_newline ();
-            flush stdout;
-            if maybe > 0 &&
-                 let delta = 
-                 List.fold_left (fold_delta i) 0 mentions.(maybe) in
-                 print_string "delta ";
-                 print_int delta;
-                 print_newline ();
-                 flush stdout;
-                 delta
-                 >= 0 then
-              (vector_subtract usages.(maybe) current_resources;
-               result := maybe)
-            else incr cnt;
+            (* print_string "maybe\n";
+             * print_int maybe;
+             * print_newline ();
+             * flush stdout; *)
+            (if maybe > 0 &&
+                  let delta = 
+                    List.fold_left (fold_delta i) 0 mentions.(maybe) in
+                  (* print_string "delta ";
+                   * print_int delta;
+                   * print_newline ();
+                   * flush stdout; *)
+                  delta > 0
+             then
+               (vector_subtract usages.(maybe) current_resources;
+                result := maybe)
+             else incr cnt);
             raise Exit)) available_regs;
       InstrSet.iter (fun i ->
           if vector_less_equal usages.(i) current_resources
@@ -534,10 +534,10 @@ let reg_pres_scheduler (problem : problem) : solution option =
               problem.instruction_usages with
       | -1 -> advance_time()
       | i -> (assert(times.(i) < 0);
-            (print_string "INSTR ISSUED: ";
-             print_int i;
-             print_newline ();
-             flush stdout;
+            ((* print_string "INSTR ISSUED: ";
+              * print_int i;
+              * print_newline ();
+              * flush stdout; *)
              cnt := 0;
              List.iter (fun (r,b) ->
                  if b then
@@ -546,18 +546,18 @@ let reg_pres_scheduler (problem : problem) : solution option =
                     | Some (t, n) ->
                        Hashtbl.remove counts r;
                        if n = 1 then
-                         (print_string "yaaaaaaaaaaaas ";
-                          print_int (Camlcoq.P.to_int r);
-                          print_newline ();
+                         ((* print_string "yaaaaaaaaaaaas ";
+                           * print_int (Camlcoq.P.to_int r);
+                           * print_newline (); *)
                           Hashtbl.remove live_regs r;
                           available_regs.(t)
                           <- available_regs.(t) + 1))
                  else
                    let t = class_r r in
                    match Hashtbl.find_opt live_regs r with
-                   | None -> (print_string "noooooooooo ";
-                             print_int (Camlcoq.P.to_int r);
-                             print_newline ();
+                   | None -> ((* print_string "noooooooooo ";
+                              * print_int (Camlcoq.P.to_int r);
+                              * print_newline (); *)
                              Hashtbl.add live_regs r t;
                              available_regs.(t)
                              <- available_regs.(t) - 1)
@@ -571,8 +571,10 @@ let reg_pres_scheduler (problem : problem) : solution option =
                    match earliest_time instr_to with
                    | -1 -> ()
                    | to_time ->
+                      ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+                        *   (Array.length ready); *)
                       ready.(to_time)
-                      <- InstrSet.add instr_to ready.(to_time)
+                      <- InstrSet.add instr_to ready.(to_time))
                ) successors.(i);
              successors.(i) <- []
             )
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 8df3edbc..f3f09954 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -146,20 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array)
          retr.(i) <- [reg, true]
       | _ -> ()
     ) seqa;
-  print_string "mentions\n";
-  Array.iteri (fun i l ->
-      print_int i;
-      print_string ": [";
-      List.iter (fun (r, b) ->
-          print_int (Camlcoq.P.to_int r);
-          print_string ":";
-          print_string (if b then "a:" else "d");
-          if b then print_int (snd (Hashtbl.find retl r));
-          print_string ", "
-        ) l;
-      print_string "]\n";
-      flush stdout;
-    ) retr;
+  (* print_string "mentions\n";
+   * Array.iteri (fun i l ->
+   *     print_int i;
+   *     print_string ": [";
+   *     List.iter (fun (r, b) ->
+   *         print_int (Camlcoq.P.to_int r);
+   *         print_string ":";
+   *         print_string (if b then "a:" else "d");
+   *         if b then print_int (snd (Hashtbl.find retl r));
+   *         print_string ", "
+   *       ) l;
+   *     print_string "]\n";
+   *     flush stdout;
+   *   ) retr; *)
   retl, retr
 
   
-- 
cgit 


From 4413c27d6c6a3d69df34955d9d453c38b32174c7 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 17 Jun 2021 15:38:13 +0200
Subject: Add option to set thresold and support for riscv

---
 driver/Clflags.ml                  | 1 +
 driver/Driver.ml                   | 2 ++
 riscV/Machregsaux.ml               | 2 ++
 scheduling/InstructionScheduler.ml | 9 +++++++--
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index fa17c2d9..1f31bd3e 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -115,4 +115,5 @@ let option_inline_auto_threshold = ref 0
 let option_profile_arcs = ref false
 let option_fbranch_probabilities = ref true
 let option_debug_compcert = ref 0
+let option_regpres_threshold = ref 5
 let main_function_name = ref "main"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 5a8c7f2c..fa187f26 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -211,6 +211,7 @@ Processing options:
   -fprepass      Perform prepass scheduling (only on some architectures) [on]
   -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -regpres-threshold n   With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -342,6 +343,7 @@ let cmdline_actions =
   Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s);
   Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
   Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n);
+  Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n);
   Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
   Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
   Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); 
diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml
index 840943e7..e3e47946 100644
--- a/riscV/Machregsaux.ml
+++ b/riscV/Machregsaux.ml
@@ -18,3 +18,5 @@ let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
   | AST.Tfloat | AST.Tsingle -> 1
   | AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 26; 32|]
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index df8a4e4e..5b4c87f4 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -360,8 +360,13 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   let nr_types_regs = Array.length available_regs in
 
-  let regs_thresholds = Array.init nr_types_regs
-                          (fun i -> 5) in
+  let thres = Array.fold_left (min)
+                (max !(Clflags.option_regpres_threshold) 0)
+                Machregsaux.nr_regs
+  in
+  
+
+  let regs_thresholds = Array.make nr_types_regs thres in
   (* placeholder value *)
   
   let class_r r =
-- 
cgit 


From 5a542d158d3bde832e38b65ad5347299fbe7ee32 Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 17 Jun 2021 15:58:29 +0200
Subject: Simplify tunneling factorisation

The recursive module definitions required unnecessarily long expicit
signatures for little added legibility.
---
 backend/RTLTunnelingaux.ml |  65 ++++-------
 backend/Tunnelinglibs.ml   | 276 ++++++++++++++++++++++++---------------------
 2 files changed, 173 insertions(+), 168 deletions(-)

diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml
index 5fe327d4..9333e357 100644
--- a/backend/RTLTunnelingaux.ml
+++ b/backend/RTLTunnelingaux.ml
@@ -26,8 +26,6 @@ open Maps
 open Camlcoq
 open Tunnelinglibs
 
-let nopcounter = ref 0
-
 module LANG = struct
   type code_unit = RTL.instruction
   type funct = RTL.coq_function
@@ -40,60 +38,44 @@ module OPT = struct
   let final_dump = false
 end
 
-module rec T: sig
-  val get_node: cfg -> positive -> node
-  val set_branch: cfg -> positive -> node -> unit
-  val debug: ('a, out_channel, unit) format -> 'a
-  val string_of_labeli: ('a, node) Hashtbl.t -> 'a -> string
-  exception BugOnPC of int
-  val branch_target: RTL.coq_function -> (positive * Z.t) PTree.t
-
-end = Tunnelinglibs.Tunneling(LANG)(OPT)(FUNS)
-
-and FUNS: sig
-  val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
-
-  val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
 
-  val fn_code: LANG.funct -> LANG.code_unit PTree.t
-  val fn_entrypoint: LANG.funct -> positive
-
-  val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
-end = struct
+module FUNS = struct
   let build_simplified_cfg c acc pc i =
     match i with
     | Inop s ->
-       let ns = T.get_node c s in
-       T.set_branch c pc ns;
+       let ns = get_node c s in
+       set_branch c pc ns;
        incr nopcounter;
        acc
     | Icond (_, _, s1, s2, _) ->
        c.num_rems <- c.num_rems + 1;
-       let ns1 = T.get_node c s1 in
-       let ns2 = T.get_node c s2 in
-       let npc = T.get_node c pc in
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
        npc.inst <- COND(ns1, ns2);
        npc::acc
     | _ -> acc
 
   let print_code_unit c println (pc, i) =
     match i with
-    | Inop s -> (if println then T.debug "\n"); T.debug "%d:Inop %d %s\n" pc (P.to_int s) (T.string_of_labeli c.nodes pc); false
-    | Icond (_, _, s1, s2, _) -> (if println then T.debug "\n"); T.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (T.string_of_labeli c.nodes pc); false
-    | _ -> T.debug "%d " pc; true
+    | Inop s -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc);
+        false
+    | Icond (_, _, s1, s2, _) -> (if println then Partial.debug "\n");
+        Partial.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc);
+        false
+    | _ -> Partial.debug "%d " pc;
+        true
 
   let fn_code f = f.fn_code
   let fn_entrypoint f = f.fn_entrypoint
 
+
   (*************************************************************)
   (* Copy-paste of the extracted code of the verifier          *)
   (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
 
-  let get td pc =
-    match PTree.get pc td with
-    | Some p -> let (t0, d) = p in (t0, d)
-    | None -> (pc, Z.of_uint 0)
-
   let check_code_unit td pc i =
     match PTree.get pc td with
     | Some p ->
@@ -105,8 +87,8 @@ end = struct
             if peq tpc ts
             then if zlt ds dpc0
                  then ()
-                 else raise (T.BugOnPC (P.to_int pc))
-            else raise (T.BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
           | Icond (_, _, s1, s2, _) ->
             let (ts1, ds1) = get td s1 in
             let (ts2, ds2) = get td s2 in
@@ -115,15 +97,16 @@ end = struct
                  then if zlt ds1 dpc0
                       then if zlt ds2 dpc0
                            then ()
-                           else raise (T.BugOnPC (P.to_int pc))
-                      else raise (T.BugOnPC (P.to_int pc))
-                 else raise (T.BugOnPC (P.to_int pc))
-            else raise (T.BugOnPC (P.to_int pc))
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
           | _ ->
-            raise (T.BugOnPC (P.to_int pc)) end
+            raise (BugOnPC (P.to_int pc)) end
     | None -> ()
 
 end
 
+module T = Partial.T(FUNS)
 let branch_target = T.branch_target
 
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
index e1e61d68..1bb35f7a 100644
--- a/backend/Tunnelinglibs.ml
+++ b/backend/Tunnelinglibs.ml
@@ -39,51 +39,24 @@ and node = {
     mutable tag: int
   }
 
-type cfg_node = (int, node) Hashtbl.t
-
 type positive = P.t
 type integer = Z.t
 
 (* type of the (simplified) CFG *)
 type cfg = {
-    nodes: cfg_node;
+    nodes: (int, node) Hashtbl.t;
     mutable rems: node list; (* remaining conditions that may become lbranch or not *)
     mutable num_rems: int;
     mutable iter_num: int (* number of iterations in elimination of conditions *)
   }
 
-module Tunneling = functor
-  (LANG: sig
-    type code_unit (* the type of a node of the code cfg (an instruction or a bblock *)
-    type funct
-  end)
-  (OPT: sig
-    val langname: string
-    val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *)
-    val debug_flag: bool ref
-    val final_dump: bool (* set to true to have a more verbose debugging *)
-  end)
-  (FUNS: sig
-    (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
-    val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
-
-    val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
-
-    val fn_code: LANG.funct -> LANG.code_unit PTree.t
-    val fn_entrypoint: LANG.funct -> positive
-
-    val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
-  end)
--> struct
-
-let debug fmt =
-  if !OPT.debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
-
 exception BugOnPC of int
 
-let lab_i (n: node): int = fst n.lab
-let lab_p (n: node): P.t = snd n.lab
+(* keeps track of the total number of nops seen, for debugging purposes *)
+let nopcounter = ref 0
+
+(* General functions that do not require language-specific context, and that
+ are used for building language-specific functions *)
 
 let rec target c n = (* inspired from the "find" of union-find algorithm *)
   match n.inst with
@@ -126,41 +99,14 @@ let set_branch c p s =
       let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in
       Hashtbl.add c.nodes li n
 
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
 
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
 
-(* try to change a condition into a branch
-[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
-*)
-let try_change_cond c acc pc =
-  match pc.inst with
-  | COND(s1,s2) ->
-     let ts1 = target c s1 in
-     let ts2 = target c s2 in
-     if ts1 == ts2 then (
-       pc.link <- ts1;
-       c.num_rems <- c.num_rems - 1;
-       acc
-     ) else
-       pc::acc
-  | _  -> raise (BugOnPC (lab_i pc)) (* COND expected *)
-
-(* repeat [try_change_cond] until no condition is changed into a branch *)
-let rec repeat_change_cond c =
-  c.iter_num <- c.iter_num + 1;
-  debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems);
-  let old =  c.num_rems in
-  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
-  let curr = c.num_rems in
-  let continue =
-    match OPT.limit_tunneling with
-    | Some n -> curr < old && c.iter_num < n
-    | None -> curr < old
-  in
-  if continue
-  then repeat_change_cond c
-
-
-(* compute the final distance of each nop nodes to its target *)
 let undef_dist = -1
 let self_dist = undef_dist-1
 let rec dist n =
@@ -176,29 +122,6 @@ let rec dist n =
   ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
     else n.dist
 
-let final_export f c =
-  let count = ref 0 in
-  let filter_nops_init_dist _ n acc =
-    let tn = target c n in
-    if tn == n
-    then (
-      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
-      acc
-    ) else (
-      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
-      count := !count+1;
-      n::acc
-    )
-  in
-  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
-  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
-  debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n"
-  OPT.langname !count;
-  res
-
-(*********************************************)
-(*** START: printing and debugging functions *)
-
 let string_of_labeli nodes ipc =
   try
     let pc = Hashtbl.find nodes ipc in
@@ -208,43 +131,142 @@ let string_of_labeli nodes ipc =
   with
     Not_found -> ""
 
-let print_cfg (f: LANG.funct) c  =
-  let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in
-  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
-  let ep = P.to_int (FUNS.fn_entrypoint f) in
-  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
-  let println = Array.fold_left (FUNS.print_code_unit c) false a in
-  (if println then debug "\n");debug "remaining cond:";
-  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
-  debug "\n"
-
-(*************************************************************)
-(* Copy-paste of the extracted code of the verifier          *)
-(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-(** val check_code : coq_UF -> code -> unit res **)
-
-let check_code td c =
-  PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (())
-
-(*** END: copy-paste & debugging functions *******)
-
-let branch_target f =
-  debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname;
-  if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
-  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
-  c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) [];
-  repeat_change_cond c;
-  let res = final_export f c in
-  if !OPT.debug_flag then (
-    try
-      check_code res (FUNS.fn_code f);
-      if OPT.final_dump then print_cfg f c;
-    with e -> (
-      print_cfg f c;
-      check_code res (FUNS.fn_code f)
-    )
-  );
-  res
+(*
+ * When given the necessary types and options as context, and then some
+ * language-specific functions that cannot be factorised between LTL and RTL, the
+ * `Tunneling` functor returns a module containing the corresponding
+ * `branch_target` function.
+ *)
+
+module Tunneling = functor
+  (* Language-specific types *)
+  (LANG: sig
+    type code_unit (* the type of a node of the code cfg (an instruction or a bblock *)
+    type funct (* type of internal functions *)
+  end)
 
+  (* Compilation options for debugging *)
+  (OPT: sig
+    val langname: string
+    val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *)
+    val debug_flag: bool ref
+    val final_dump: bool (* set to true to have a more verbose debugging *)
+  end)
+  -> struct
+
+  (* The `debug` function uses values from `OPT`, and is used in functions passed to `F`
+     so it must be defined between the two *)
+  let debug fmt =
+    if !OPT.debug_flag then Printf.eprintf fmt
+    else Printf.ifprintf stderr fmt
+
+  module T
+    (* Language-specific functions *)
+    (FUNS: sig
+      (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+      val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list
+      val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool
+      val fn_code: LANG.funct -> LANG.code_unit PTree.t
+      val fn_entrypoint: LANG.funct -> positive
+      val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit
+    end)
+    (* only export what's needed *)
+    : sig val branch_target: LANG.funct -> (positive * integer) PTree.t end
+  = struct
+
+    (* try to change a condition into a branch [acc] is the current accumulator of
+       conditions to consider in the next iteration of repeat_change_cond *)
+    let try_change_cond c acc pc =
+      match pc.inst with
+      | COND(s1,s2) ->
+         let ts1 = target c s1 in
+         let ts2 = target c s2 in
+         if ts1 == ts2 then (
+           pc.link <- ts1;
+           c.num_rems <- c.num_rems - 1;
+           acc
+         ) else
+           pc::acc
+      | _  -> raise (BugOnPC (lab_i pc)) (* COND expected *)
+
+    (* repeat [try_change_cond] until no condition is changed into a branch *)
+    let rec repeat_change_cond c =
+      c.iter_num <- c.iter_num + 1;
+      debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems);
+      let old =  c.num_rems in
+      c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+      let curr = c.num_rems in
+      let continue =
+        match OPT.limit_tunneling with
+        | Some n -> curr < old && c.iter_num < n
+        | None -> curr < old
+      in
+      if continue
+      then repeat_change_cond c
+
+
+    (*********************************************)
+    (*** START: printing and debugging functions *)
+
+    let print_cfg (f: LANG.funct) c  =
+      let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in
+      Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+      let ep = P.to_int (FUNS.fn_entrypoint f) in
+      debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+      let println = Array.fold_left (FUNS.print_code_unit c) false a in
+      (if println then debug "\n");debug "remaining cond:";
+      List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+      debug "\n"
+
+
+    (*************************************************************)
+    (* Copy-paste of the extracted code of the verifier          *)
+    (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+    (** val check_code : coq_UF -> code -> unit res **)
+
+    let check_code td c =
+      PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (())
+
+    (*** END: copy-paste & debugging functions *******)
+
+    (* compute the final distance of each nop nodes to its target *)
+    let final_export f c =
+      let count = ref 0 in
+      let filter_nops_init_dist _ n acc =
+        let tn = target c n in
+        if tn == n
+        then (
+          n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+          acc
+        ) else (
+          n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+          count := !count+1;
+          n::acc
+        )
+      in
+      let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+      let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+      debug "* %sTunneling.branch_target: initial number of nops = %d\n" OPT.langname !nopcounter;
+      debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" OPT.langname !count;
+      res
+
+    let branch_target f =
+      debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname;
+      if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+      let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+      c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) [];
+      repeat_change_cond c;
+      let res = final_export f c in
+      if !OPT.debug_flag then (
+        try
+          check_code res (FUNS.fn_code f);
+          if OPT.final_dump then print_cfg f c;
+        with e -> (
+          print_cfg f c;
+          check_code res (FUNS.fn_code f)
+        )
+      );
+      res
+  end
 end
-- 
cgit 


From 8dc2366431f9210ad70a789c389d9d19c6fc802f Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 17 Jun 2021 16:32:05 +0200
Subject: Use Tunnelinglibs in Tunnelingaux

---
 backend/Tunnelingaux.ml | 337 ++++++++++++------------------------------------
 1 file changed, 81 insertions(+), 256 deletions(-)

diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
index 87e6d303..d4b88a5d 100644
--- a/backend/Tunnelingaux.ml
+++ b/backend/Tunnelingaux.ml
@@ -23,261 +23,86 @@ open Coqlib
 open LTL
 open Maps
 open Camlcoq
-
-let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
-let debug_flag = ref false
-let final_dump = false   (* set to true to have a more verbose debugging *)
-
-let debug fmt =
-  if !debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
-
-exception BugOnPC of int
-
-(* type of labels in the cfg *)
-type label = int * P.t
-
-(* instructions under analyzis *)
-type simple_inst = (* a simplified view of LTL instructions *)
-  LBRANCH of node
-| LCOND of node * node
-| OTHER
-and node = {
-    lab : label;
-    mutable inst: simple_inst;
-    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
-    mutable dist: int;
-    mutable tag: int
-  }
-
-(* type of the (simplified) CFG *)
-type cfg = {
-    nodes: (int, node) Hashtbl.t;
-    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
-    mutable num_rems: int;
-    mutable iter_num: int (* number of iterations in elimination of conditions *)
-  }
-
-let lab_i (n: node): int = fst n.lab
-let lab_p (n: node): P.t = snd n.lab
-
-let rec target c n = (* inspired from the "find" of union-find algorithm *)
-  match n.inst with
-  | LCOND(s1,s2) ->
-     if n.link != n
-     then update c n
-     else if n.tag < c.iter_num then (
-       (* we try to change the condition ... *)
-       n.tag <- c.iter_num; (* ... but at most once by iteration *)
-       let ts1 = target c s1 in
-       let ts2 = target c s2 in
-       if ts1 == ts2 then (n.link <- ts1; ts1) else n
-     ) else n
-  | _ ->
-     if n.link != n
-     then update c n
-     else n
-and update c n =
-  let t = target c n.link in
-  n.link <- t; t
-
-let get_node c p =
-  let li = P.to_int p in
-  try
-    Hashtbl.find c.nodes li
-  with
-    Not_found ->
-      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
-      Hashtbl.add c.nodes li n;
-      n
-
-let set_branch c p s =
-  let li = P.to_int p in
-  try
-    let n = Hashtbl.find c.nodes li in
-    n.inst <- LBRANCH s;
-    n.link <- target c s
-  with
-    Not_found ->
-      let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in
-      Hashtbl.add c.nodes li n
-
-
-(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
-let build_simplified_cfg c acc pc bb =
-  match bb with
-  | Lbranch s :: _ ->
-     let ns = get_node c s in
-     set_branch c pc ns;
-     acc
-  | Lcond (_, _, s1, s2, _) :: _ ->
-     c.num_rems <- c.num_rems + 1;
-     let ns1 = get_node c s1 in
-     let ns2 = get_node c s2 in
-     let npc = get_node c pc in
-     npc.inst <- LCOND(ns1, ns2);
-     npc::acc
-  | _ -> acc
-
-(* try to change a condition into a branch
-[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
-*)
-let try_change_cond c acc pc =
-  match pc.inst with
-  | LCOND(s1,s2) ->
-     let ts1 = target c s1 in
-     let ts2 = target c s2 in
-     if ts1 == ts2 then (
-       pc.link <- ts1;
-       c.num_rems <- c.num_rems - 1;
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = LTL.bblock
+  type funct = LTL.coq_function
+end
+
+module OPT = struct
+  let langname = "LTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc bb =
+    match bb with
+    | Lbranch s :: _ ->
+       let ns = get_node c s in
+       set_branch c pc ns;
        acc
-     ) else
-       pc::acc
-  | _  -> raise (BugOnPC (lab_i pc)) (* LCOND expected *)
-
-(* repeat [try_change_cond] until no condition is changed into a branch *)
-let rec repeat_change_cond c =
-  c.iter_num <- c.iter_num + 1;
-  debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
-  let old =  c.num_rems in
-  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
-  let curr = c.num_rems in
-  let continue =
-    match limit_tunneling with
-    | Some n -> curr < old && c.iter_num < n
-    | None -> curr < old
-  in
-  if continue
-  then repeat_change_cond c
-
-
-(* compute the final distance of each nop nodes to its target *)
-let undef_dist = -1
-let self_dist = undef_dist-1
-let rec dist n =
-  if n.dist = undef_dist
-  then (
-    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
-    n.dist <-
-      (match n.inst with
-       | OTHER -> 0
-       | LBRANCH p -> 1 + dist p
-       | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
-    n.dist
-  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
-    else n.dist
-
-let final_export f c =
-  let count = ref 0 in
-  let filter_nops_init_dist _ n acc =
-    let tn = target c n in
-    if tn == n
-    then (
-      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
-      acc
-    ) else (
-      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
-      count := !count+1;
-      n::acc
-    )
-  in
-  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
-  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
-  debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count;
-  res
-
-(*********************************************)
-(*** START: printing and debugging functions *)
-
-let string_of_labeli nodes ipc =
-  try
-    let pc = Hashtbl.find nodes ipc in
-    if pc.link == pc
-    then Printf.sprintf "(Target@%d)" (dist pc)
-    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
-  with
-    Not_found -> ""
-
-let print_bblock c println (pc, bb) =
-  match bb with
-  | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
-  | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
-  | _ -> debug "%d " pc; true
-
-
-let print_cfg f c  =
-  let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in
-  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
-  let ep = P.to_int f.fn_entrypoint in
-  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
-  let println = Array.fold_left (print_bblock c) false a in
-  (if println then debug "\n");debug "remaining cond:";
-  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
-  debug "\n"
-
-(*************************************************************)
-(* Copy-paste of the extracted code of the verifier          *)
-(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-let get td pc =
-  match PTree.get pc td with
-  | Some p -> let (t0, d) = p in (t0, d)
-  | None -> (pc, Z.of_uint 0)
-
-let check_bblock td pc bb =
-  match PTree.get pc td with
-  | Some p ->
-    let (tpc, dpc) = p in
-    let dpc0 = dpc in
-    (match bb with
-     | [] ->
-       raise (BugOnPC (P.to_int pc))
-     | i :: _ ->
-       (match i with
-        | Lbranch s ->
-          let (ts, ds) = get td s in
-          if peq tpc ts
-          then if zlt ds dpc0
-               then ()
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | Lcond (_, _, s1, s2, _) ->
-          let (ts1, ds1) = get td s1 in
-          let (ts2, ds2) = get td s2 in
-          if peq tpc ts1
-          then if peq tpc ts2
-               then if zlt ds1 dpc0
-                    then if zlt ds2 dpc0
-                         then ()
-                         else raise (BugOnPC (P.to_int pc))
-                    else raise (BugOnPC (P.to_int pc))
-               else raise (BugOnPC (P.to_int pc))
-          else raise (BugOnPC (P.to_int pc))
-        | _ ->
-          raise (BugOnPC (P.to_int pc))))
-  | None -> ()
-
-(** val check_code : coq_UF -> code -> unit res **)
-
-let check_code td c =
-  PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (())
-
-(*** END: copy-paste & debugging functions *******)
+    | Lcond (_, _, s1, s2, _) :: _ ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, bb) =
+    match bb with
+    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+    | _ -> Partial.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc bb =
+    match PTree.get pc td with
+    | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in
+      (match bb with
+       | [] ->
+         raise (BugOnPC (P.to_int pc))
+       | i :: _ ->
+         (match i with
+          | Lbranch s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Lcond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc))))
+    | None -> ()
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
 
-let branch_target f =
-  debug "* Tunneling.branch_target: starting on a new function\n";
-  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
-  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
-  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
-  repeat_change_cond c;
-  let res = final_export f c in
-  if !debug_flag then (
-    try
-      check_code res f.fn_code;
-      if final_dump then print_cfg f c;
-    with e -> (
-      print_cfg f c;
-      check_code res f.fn_code
-    )
-  );
-  res
-- 
cgit 


From fae8d9b5c5f93d5eda36f800eb0ca1837b237cba Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 17 Jun 2021 17:00:57 +0200
Subject: fix riscv/Machregsaux.mli

---
 riscV/Machregsaux.mli | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli
index 01b0f9fd..cf6d7b71 100644
--- a/riscV/Machregsaux.mli
+++ b/riscV/Machregsaux.mli
@@ -15,3 +15,5 @@
 val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
+
+val nr_regs: int array
-- 
cgit 


From fe557bf65ec738eaa078bc5e398ff690eb1f2b9e Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 17 Jun 2021 17:03:53 +0200
Subject: changed type of schedule_seq in x86 for compatibility

---
 x86/PrepassSchedulingOracle.ml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml
index 7b6a1b14..42a3da23 100644
--- a/x86/PrepassSchedulingOracle.ml
+++ b/x86/PrepassSchedulingOracle.ml
@@ -2,4 +2,5 @@ open RTL
 open Registers
 
 (* Do not do anything *)
-let schedule_sequence (seqa : (instruction*Regset.t) array) = None
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+      live_regs_entry typing reference = None
-- 
cgit 


From 8f399dfa9d794f2f728f523ff1aa7788cc3599b2 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Thu, 17 Jun 2021 17:04:52 +0200
Subject: fix for Risc-V

---
 aarch64/Machregsaux.mli | 1 +
 riscV/Machregsaux.mli   | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli
index 8487a557..23ac1c9a 100644
--- a/aarch64/Machregsaux.mli
+++ b/aarch64/Machregsaux.mli
@@ -16,4 +16,5 @@ val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
 
+(* Number of registers in each class *)
 val nr_regs : int array
diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli
index 01b0f9fd..bb3777bf 100644
--- a/riscV/Machregsaux.mli
+++ b/riscV/Machregsaux.mli
@@ -15,3 +15,6 @@
 val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
+  
+(* Number of registers in each class *)
+val nr_regs : int array
-- 
cgit 


From 95f918c38b1e59f40ae7af455ec2c6746289375e Mon Sep 17 00:00:00 2001
From: Pierre Goutagny <pierre.goutagny@ens-lyon.fr>
Date: Thu, 17 Jun 2021 17:31:10 +0200
Subject: Change "Tunneling" to "LTLTunneling" everywhere

To respect the symmetry between RTL- and LTL-Tunneling
---
 Makefile                    |   2 +-
 backend/LTLTunneling.v      | 167 +++++++++++
 backend/LTLTunnelingaux.ml  | 108 +++++++
 backend/LTLTunnelingproof.v | 666 ++++++++++++++++++++++++++++++++++++++++++++
 backend/RTLTunneling.v      |   2 +-
 backend/Tunneling.v         | 167 -----------
 backend/Tunnelingaux.ml     | 108 -------
 backend/Tunnelinglibs.ml    |   2 +-
 backend/Tunnelingproof.v    | 666 --------------------------------------------
 doc/index-kvx.html          |   6 +-
 doc/index.html              |   6 +-
 driver/Compiler.vexpand     |   2 +-
 tools/compiler_expand.ml    |   2 +-
 13 files changed, 952 insertions(+), 952 deletions(-)
 create mode 100644 backend/LTLTunneling.v
 create mode 100644 backend/LTLTunnelingaux.ml
 create mode 100644 backend/LTLTunnelingproof.v
 delete mode 100644 backend/Tunneling.v
 delete mode 100644 backend/Tunnelingaux.ml
 delete mode 100644 backend/Tunnelingproof.v

diff --git a/Makefile b/Makefile
index 4900b165..05e4cbcd 100644
--- a/Makefile
+++ b/Makefile
@@ -143,7 +143,7 @@ BACKEND=\
   FirstNop.v FirstNopproof.v \
   Allnontrap.v Allnontrapproof.v \
   Allocation.v Allocationproof.v \
-  Tunneling.v Tunnelingproof.v \
+  LTLTunneling.v LTLTunnelingproof.v \
   RTLTunneling.v RTLTunnelingproof.v \
   Linear.v Lineartyping.v \
   Linearize.v Linearizeproof.v \
diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v
new file mode 100644
index 00000000..4b404724
--- /dev/null
+++ b/backend/LTLTunneling.v
@@ -0,0 +1,167 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Branch tunneling (optimization of branches to branches). *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import LTL.
+
+(** Branch tunneling shortens sequences of branches (with no intervening
+  computations) by rewriting the branch and conditional branch instructions
+  so that they jump directly to the end of the branch sequence.
+  For example:
+<<
+     L1: if (cond) nop L2;                L1: nop L3;
+     L2: nop L3;               becomes    L2: nop L3;
+     L3: instr;                           L3: instr;
+     L4: if (cond) goto L1;               L4: if (cond) nop L1;
+>>
+  This optimization can be applied to several of our intermediate
+  languages.  We choose to perform it on the [LTL] language,
+  after register allocation but before code linearization.
+  Register allocation can delete instructions (such as dead
+  computations or useless moves), therefore there are more
+  opportunities for tunneling after allocation than before.
+  Symmetrically, prior tunneling helps linearization to produce
+  better code, e.g. by revealing that some [branch] instructions are
+  dead code (as the "branch L3" in the example above).
+*)
+
+(** The implementation consists in two passes: the first pass
+    records the branch t of each "nop"
+    and the second pass replace any "nop" node to [pc]
+    by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
+<<
+  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
+                     = pc                   otherwise
+>>
+  However, this definition can fail to terminate if
+  the program can contain loops consisting only of branches, as in
+<<
+     L1: branch L1;
+>>
+  or
+<<
+     L1: nop L2;
+     L2: nop L1;
+>>
+  Coq warns us of this fact by not accepting the definition
+  of [branch_t] above.
+
+  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+  for every instruction [pc: nop pc'] in the function.
+
+  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+  we need to iterate until we reach a fixpoint.
+
+  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+  in order to help the proof.
+
+  A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF. 
+Extract Constant branch_target => "LTLTunnelingaux.branch_target".
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) pc:node*Z :=
+  match td!pc with
+  | Some (t,d) => (t,Z.abs d)
+  | _ => (pc,0)
+  end.
+
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+    | None => OK tt
+    | Some (tpc, dpc) =>
+       let dpc := Z.abs dpc in
+       match bb with
+       | Lbranch s ::_ =>
+         let (ts, ds) := get td s in 
+         if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Lbranch")
+         else Error (msg "invalid skip of Lbranch")
+       | Lcond _ _ s1 s2 _ :: _ =>
+          let (ts1, ds1) := get td s1 in
+          let (ts2, ds2) := get td s2 in
+          if peq tpc ts1 then
+            if peq tpc ts2 then
+              if zlt ds1 dpc then
+                if zlt ds2 dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this block")
+      end
+   end.
+
+Definition check_code (td: UF) (c:code): res unit
+  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
+
+(** The second pass rewrites all LTL instructions, replacing every
+  successor [s] of every instruction by [t s], the canonical representative
+  of its equivalence class in the union-find data structure. *)
+
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+  match i with
+  | Lbranch s => Lbranch (t s)
+  | Lcond cond args s1 s2 info =>
+      let s1' := t s1 in let s2' := t s2 in
+      if peq s1' s2'
+      then Lbranch s1'
+      else Lcond cond args s1' s2' info
+  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
+  | _ => i
+  end.
+
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+  List.map (tunnel_instr t) b.
+
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+  let td := branch_target f in
+  let c := (fn_code f) in
+  if check_included td c then 
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_block td) c)
+      (td (fn_entrypoint f)))
+  else 
+   Error (msg "Some node of the union-find is not in the CFG")
+   .
+
+Definition tunnel_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program tunnel_fundef p.
diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml
new file mode 100644
index 00000000..c3b8cf82
--- /dev/null
+++ b/backend/LTLTunnelingaux.ml
@@ -0,0 +1,108 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [LTLTunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+open Tunnelinglibs
+
+module LANG = struct
+  type code_unit = LTL.bblock
+  type funct = LTL.coq_function
+end
+
+module OPT = struct
+  let langname = "LTL"
+  let limit_tunneling = None
+  let debug_flag = ref false
+  let final_dump = false
+end
+
+module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
+
+module FUNS = struct
+  let build_simplified_cfg c acc pc bb =
+    match bb with
+    | Lbranch s :: _ ->
+       let ns = get_node c s in
+       set_branch c pc ns;
+       acc
+    | Lcond (_, _, s1, s2, _) :: _ ->
+       c.num_rems <- c.num_rems + 1;
+       let ns1 = get_node c s1 in
+       let ns2 = get_node c s2 in
+       let npc = get_node c pc in
+       npc.inst <- COND(ns1, ns2);
+       npc::acc
+    | _ -> acc
+
+  let print_code_unit c println (pc, bb) =
+    match bb with
+    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+    | _ -> Partial.debug "%d " pc; true
+
+  let fn_code f = f.fn_code
+  let fn_entrypoint f = f.fn_entrypoint
+
+
+  (*************************************************************)
+  (* Copy-paste of the extracted code of the verifier          *)
+  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+  let check_code_unit td pc bb =
+    match PTree.get pc td with
+    | Some p ->
+      let (tpc, dpc) = p in
+      let dpc0 = dpc in
+      (match bb with
+       | [] ->
+         raise (BugOnPC (P.to_int pc))
+       | i :: _ ->
+         (match i with
+          | Lbranch s ->
+            let (ts, ds) = get td s in
+            if peq tpc ts
+            then if zlt ds dpc0
+                 then ()
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | Lcond (_, _, s1, s2, _) ->
+            let (ts1, ds1) = get td s1 in
+            let (ts2, ds2) = get td s2 in
+            if peq tpc ts1
+            then if peq tpc ts2
+                 then if zlt ds1 dpc0
+                      then if zlt ds2 dpc0
+                           then ()
+                           else raise (BugOnPC (P.to_int pc))
+                      else raise (BugOnPC (P.to_int pc))
+                 else raise (BugOnPC (P.to_int pc))
+            else raise (BugOnPC (P.to_int pc))
+          | _ ->
+            raise (BugOnPC (P.to_int pc))))
+    | None -> ()
+end
+
+module T = Partial.T(FUNS)
+let branch_target = T.branch_target
+
diff --git a/backend/LTLTunnelingproof.v b/backend/LTLTunnelingproof.v
new file mode 100644
index 00000000..d36d3c76
--- /dev/null
+++ b/backend/LTLTunnelingproof.v
@@ -0,0 +1,666 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for the branch tunneling optimization. *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations LTL.
+Require Import LTLTunneling.
+
+Local Open Scope nat.
+
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
+Proof.
+  unfold target, get. intros H; rewrite H; auto.
+Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
+
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
+
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
+
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
+
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
+
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
+
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
+Qed.
+
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
+Qed.
+
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
+Qed.
+
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
+Proof.
+  intros tf ef; simpl. intros H; inversion H; auto.
+Qed.
+
+(** * Preservation of semantics *)
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall (v: val) (f: fundef),
+  Genv.find_funct ge v = Some f ->
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma sig_preserved:
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+
+(** The proof of semantic preservation is a simulation argument
+  based on diagrams of the following form:
+<<
+           st1 --------------- st2
+            |                   |
+           t|                  ?|t
+            |                   |
+            v                   v
+           st1'--------------- st2'
+>>
+  The [match_states] predicate, defined below, captures the precondition
+  between states [st1] and [st2], as well as the postcondition between
+  [st1'] and [st2'].  One transition in the source code (left) can correspond
+  to zero or one transition in the transformed code (right).  The
+  "zero transition" case occurs when executing a [Lnop] instruction
+  in the source code that has been removed by tunneling.
+
+  In the definition of [match_states], what changes between the original and
+  transformed codes is mainly the control-flow
+  (in particular, the current program point [pc]), but also some values
+  and memory states, since some [Vundef] values can become more defined
+  as a consequence of eliminating useless [Lcond] instructions. *)
+
+Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
+  forall l, Val.lessdef (ls1 l) (ls2 l).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframes_intro:
+      forall f tf sp ls0 bb tls0,
+      locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
+      match_stackframes
+         (Stackframe f sp ls0 bb)
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s f tf sp pc ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (State s f sp pc ls m)
+                   (State ts tf sp (branch_target f pc) tls tm)
+  | match_states_block:
+      forall s f tf sp bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp bb ls m)
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
+  | match_states_interm:
+      forall s f tf sp pc i bb ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
+  | match_states_call:
+      forall s f tf ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
+      match_states (Callstate s f ls m)
+                   (Callstate ts tf tls tm)
+  | match_states_return:
+      forall s ls m ts tls tm
+        (STK: list_forall2 match_stackframes s ts)
+        (LS: locmap_lessdef ls tls)
+        (MEM: Mem.extends m tm),
+      match_states (Returnstate s ls m)
+                   (Returnstate ts tls tm).
+
+(** Properties of [locmap_lessdef] *)
+
+Lemma reglist_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
+Proof.
+  induction rl; simpl; intros; auto.
+Qed.
+
+Lemma locmap_set_lessdef:
+  forall ls1 ls2 v1 v2 l,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto using Val.load_result_lessdef.
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_set_undef_lessdef:
+  forall ls1 ls2 l,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
+Proof.
+  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
+- destruct l; auto. destruct ty; auto. 
+- destruct (Loc.diff_dec l l'); auto.
+Qed.
+
+Lemma locmap_undef_regs_lessdef:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
+Qed.
+
+Lemma locmap_undef_regs_lessdef_1:
+  forall rl ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
+Proof.
+  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
+Qed.
+
+Lemma locmap_getpair_lessdef:
+  forall p ls1 ls2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
+Proof.
+  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
+Qed.
+
+Lemma locmap_getpairs_lessdef:
+  forall pl ls1 ls2,
+  locmap_lessdef ls1 ls2 ->
+  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
+Proof.
+  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
+Qed.
+
+Lemma locmap_setpair_lessdef:
+  forall p ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
+Proof.
+  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_setres_lessdef:
+  forall res ls1 ls2 v1 v2,
+  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
+Proof.
+  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
+Qed.
+
+Lemma locmap_undef_caller_save_regs_lessdef:
+  forall ls1 ls2,
+  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
+Proof.
+  intros; red; intros. unfold undef_caller_save_regs. 
+  destruct l.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed.
+
+Lemma find_function_translated:
+  forall ros ls tls fd,
+  locmap_lessdef ls tls ->
+  find_function ge ros ls = Some fd ->
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
+Proof.
+  intros. destruct ros; simpl in *.
+- assert (E: tls (R m) = ls (R m)).
+  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
+    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
+  rewrite E. exploit functions_translated; eauto.
+- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
+Qed.
+
+Lemma call_regs_lessdef:
+  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
+Proof.
+  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
+Qed.
+
+Lemma return_regs_lessdef:
+  forall caller1 callee1 caller2 callee2,
+  locmap_lessdef caller1 caller2 ->
+  locmap_lessdef callee1 callee2 ->
+  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
+Proof.
+  intros; red; intros. destruct l; simpl.
+- destruct (Conventions1.is_callee_save r); auto.
+- destruct sl; auto.
+Qed. 
+
+(** To preserve non-terminating behaviours, we show that the transformed
+  code cannot take an infinity of "zero transition" cases.
+  We use the following [measure] function over source states,
+  which decreases strictly in the "zero transition" case. *)
+
+Definition measure (st: state) : nat :=
+  match st with
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
+  end.
+
+Lemma match_parent_locset:
+  forall s ts,
+  list_forall2 match_stackframes s ts ->
+  locmap_lessdef (parent_locset s) (parent_locset ts).
+Proof.
+  induction 1; simpl.
+- red; auto.
+- inv H; auto.
+Qed.
+
+Lemma tunnel_step_correct:
+  forall st1 t st2, step ge st1 t st2 ->
+  forall st1' (MS: match_states st1 st1'),
+  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
+  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
+Proof.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
+
+- (* entering a block *)
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; lia || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; lia || eauto); simpl.
+    destruct (peq _ _); try congruence.
+- (* Lop *)
+  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
+  intros (tv & EV & LD).
+  left; simpl; econstructor; split.
+  eapply exec_Lop with (v := tv); eauto.
+  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
+  intros (tv & LOAD & LD').
+  left; simpl; econstructor; split.
+  eapply exec_Lload with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap1 *)
+  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap1.
+  rewrite <- H0.
+  apply eval_addressing_preserved. exact symbols_preserved. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lload notrap2 *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  destruct (Mem.loadv chunk tm ta) eqn:Htload.
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+  {
+  left; simpl; econstructor; split.
+  eapply exec_Lload_notrap2.
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  exact Htload. eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+  }
+- (* Lgetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lsetstack *)
+  left; simpl; econstructor; split.
+  econstructor; eauto.
+  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
+- (* Lstore *)
+  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
+  intros (ta & EV & LD).
+  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
+  intros (tm' & STORE & MEM').
+  left; simpl; econstructor; split.
+  eapply exec_Lstore with (a := ta).
+  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
+  eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcall *)
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
+- (* Ltailcall *)
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
+  left; simpl; econstructor; split.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* Lbuiltin *)
+  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
+  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  eapply exec_Lbuiltin; eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
+  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
+  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
+- (* Lbranch (preserved) *)
+  left; simpl; econstructor; split.
+  eapply exec_Lbranch; eauto.
+  fold (branch_target f pc). econstructor; eauto.
+- (* Lbranch (eliminated) *)
+  right; split. simpl. lia. split. auto. constructor; auto.
+- (* Lcond (preserved) *)
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
+- (* Ljumptable *)
+  assert (tls (R arg) = Vint n).
+  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
+  left; simpl; econstructor; split.
+  eapply exec_Ljumptable.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
+  econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lreturn *)
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  left; simpl; econstructor; split.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
+- (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+- (* external function *)
+  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
+  intros (tvres & tm' & A & B & C & D).
+  left; simpl; econstructor; split.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+- (* return *)
+  inv STK. inv H1.
+  left; econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
+  econstructor; eauto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
+  rewrite (match_program_main TRANSL).
+  rewrite symbols_preserved. eauto.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. inv STK.
+  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
+  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
+  econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
+Proof.
+  eapply forward_simulation_opt.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  eexact tunnel_step_correct.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v
index 049160fd..df663f48 100644
--- a/backend/RTLTunneling.v
+++ b/backend/RTLTunneling.v
@@ -19,7 +19,7 @@ Require Import Coqlib Maps Errors.
 Require Import AST.
 Require Import RTL.
 
-(* This is a port of tunneling for LTL. See Tunneling.v *)
+(* This is a port of tunneling for LTL. See LTLTunneling.v *)
 
 Definition UF := PTree.t (node * Z).
 
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
deleted file mode 100644
index c849ea92..00000000
--- a/backend/Tunneling.v
+++ /dev/null
@@ -1,167 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Branch tunneling (optimization of branches to branches). *)
-
-Require Import Coqlib Maps Errors.
-Require Import AST.
-Require Import LTL.
-
-(** Branch tunneling shortens sequences of branches (with no intervening
-  computations) by rewriting the branch and conditional branch instructions
-  so that they jump directly to the end of the branch sequence.
-  For example:
-<<
-     L1: if (cond) nop L2;                L1: nop L3;
-     L2: nop L3;               becomes    L2: nop L3;
-     L3: instr;                           L3: instr;
-     L4: if (cond) goto L1;               L4: if (cond) nop L1;
->>
-  This optimization can be applied to several of our intermediate
-  languages.  We choose to perform it on the [LTL] language,
-  after register allocation but before code linearization.
-  Register allocation can delete instructions (such as dead
-  computations or useless moves), therefore there are more
-  opportunities for tunneling after allocation than before.
-  Symmetrically, prior tunneling helps linearization to produce
-  better code, e.g. by revealing that some [branch] instructions are
-  dead code (as the "branch L3" in the example above).
-*)
-
-(** The implementation consists in two passes: the first pass
-    records the branch t of each "nop"
-    and the second pass replace any "nop" node to [pc]
-    by a branch to a "nop" at [branch_t f pc]
-
-Naively, we may define [branch_t f pc] as follows:
-<<
-  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
-                     = pc                   otherwise
->>
-  However, this definition can fail to terminate if
-  the program can contain loops consisting only of branches, as in
-<<
-     L1: branch L1;
->>
-  or
-<<
-     L1: nop L2;
-     L2: nop L1;
->>
-  Coq warns us of this fact by not accepting the definition
-  of [branch_t] above.
-
-  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
-  for every instruction [pc: nop pc'] in the function.
-
-  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
-  we need to iterate until we reach a fixpoint.
-
-  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
-  in order to help the proof.
-
-  A verifier checks that this data-structure is correct.
-*)
-
-Definition UF := PTree.t (node * Z).
-
-(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
-Axiom branch_target: LTL.function -> UF. 
-Extract Constant branch_target => "Tunnelingaux.branch_target".
-
-Local Open Scope error_monad_scope.
-
-Definition get (td: UF) pc:node*Z :=
-  match td!pc with
-  | Some (t,d) => (t,Z.abs d)
-  | _ => (pc,0)
-  end.
-
-Definition target (td: UF) (pc:node): node := fst (get td pc).
-Coercion target: UF >-> Funclass.
-
-(* we check that the domain of [td] is included in the domain of [c] *)
-Definition check_included (td: UF) (c: code): option bblock
-  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
-
-(* we check the validity of targets and their bound:
-   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
-*)
-Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
- := match td!pc with
-    | None => OK tt
-    | Some (tpc, dpc) =>
-       let dpc := Z.abs dpc in
-       match bb with
-       | Lbranch s ::_ =>
-         let (ts, ds) := get td s in 
-         if peq tpc ts then
-            if zlt ds dpc then OK tt
-            else Error (msg "bad distance in Lbranch")
-         else Error (msg "invalid skip of Lbranch")
-       | Lcond _ _ s1 s2 _ :: _ =>
-          let (ts1, ds1) := get td s1 in
-          let (ts2, ds2) := get td s2 in
-          if peq tpc ts1 then
-            if peq tpc ts2 then
-              if zlt ds1 dpc then
-                if zlt ds2 dpc then OK tt
-                else Error (msg "bad distance on else branch")
-              else Error (msg "bad distance on then branch")
-            else Error (msg "invalid skip of else branch")
-          else Error (msg "invalid skip of then branch")
-      | _ => Error (msg "cannot skip this block")
-      end
-   end.
-
-Definition check_code (td: UF) (c:code): res unit
-  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
-
-(** The second pass rewrites all LTL instructions, replacing every
-  successor [s] of every instruction by [t s], the canonical representative
-  of its equivalence class in the union-find data structure. *)
-
-Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
-  match i with
-  | Lbranch s => Lbranch (t s)
-  | Lcond cond args s1 s2 info =>
-      let s1' := t s1 in let s2' := t s2 in
-      if peq s1' s2'
-      then Lbranch s1'
-      else Lcond cond args s1' s2' info
-  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
-  | _ => i
-  end.
-
-Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
-  List.map (tunnel_instr t) b.
-
-Definition tunnel_function (f: LTL.function) : res LTL.function :=
-  let td := branch_target f in
-  let c := (fn_code f) in
-  if check_included td c then 
-    do _ <- check_code td c ; OK
-    (mkfunction
-      (fn_sig f)
-      (fn_stacksize f)
-      (PTree.map1 (tunnel_block td) c)
-      (td (fn_entrypoint f)))
-  else 
-   Error (msg "Some node of the union-find is not in the CFG")
-   .
-
-Definition tunnel_fundef (f: fundef) : res fundef :=
-  transf_partial_fundef tunnel_function f.
-
-Definition transf_program (p: program) : res program :=
-  transform_partial_program tunnel_fundef p.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
deleted file mode 100644
index d4b88a5d..00000000
--- a/backend/Tunnelingaux.ml
+++ /dev/null
@@ -1,108 +0,0 @@
-(* *************************************************************)
-(*                                                             *)
-(*             The Compcert verified compiler                  *)
-(*                                                             *)
-(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
-(*                                                             *)
-(*  Copyright VERIMAG. All rights reserved.                    *)
-(*  This file is distributed under the terms of the INRIA      *)
-(*  Non-Commercial License Agreement.                          *)
-(*                                                             *)
-(* *************************************************************)
-
-(*
-
-This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
-and computes their target node with the distance (ie the number of cummulated nops) toward this target.
-
-See [Tunneling.v]
-
-*)
-
-open Coqlib
-open LTL
-open Maps
-open Camlcoq
-open Tunnelinglibs
-
-module LANG = struct
-  type code_unit = LTL.bblock
-  type funct = LTL.coq_function
-end
-
-module OPT = struct
-  let langname = "LTL"
-  let limit_tunneling = None
-  let debug_flag = ref false
-  let final_dump = false
-end
-
-module Partial = Tunnelinglibs.Tunneling(LANG)(OPT)
-
-module FUNS = struct
-  let build_simplified_cfg c acc pc bb =
-    match bb with
-    | Lbranch s :: _ ->
-       let ns = get_node c s in
-       set_branch c pc ns;
-       acc
-    | Lcond (_, _, s1, s2, _) :: _ ->
-       c.num_rems <- c.num_rems + 1;
-       let ns1 = get_node c s1 in
-       let ns2 = get_node c s2 in
-       let npc = get_node c pc in
-       npc.inst <- COND(ns1, ns2);
-       npc::acc
-    | _ -> acc
-
-  let print_code_unit c println (pc, bb) =
-    match bb with
-    | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
-    | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
-    | _ -> Partial.debug "%d " pc; true
-
-  let fn_code f = f.fn_code
-  let fn_entrypoint f = f.fn_entrypoint
-
-
-  (*************************************************************)
-  (* Copy-paste of the extracted code of the verifier          *)
-  (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
-
-  let check_code_unit td pc bb =
-    match PTree.get pc td with
-    | Some p ->
-      let (tpc, dpc) = p in
-      let dpc0 = dpc in
-      (match bb with
-       | [] ->
-         raise (BugOnPC (P.to_int pc))
-       | i :: _ ->
-         (match i with
-          | Lbranch s ->
-            let (ts, ds) = get td s in
-            if peq tpc ts
-            then if zlt ds dpc0
-                 then ()
-                 else raise (BugOnPC (P.to_int pc))
-            else raise (BugOnPC (P.to_int pc))
-          | Lcond (_, _, s1, s2, _) ->
-            let (ts1, ds1) = get td s1 in
-            let (ts2, ds2) = get td s2 in
-            if peq tpc ts1
-            then if peq tpc ts2
-                 then if zlt ds1 dpc0
-                      then if zlt ds2 dpc0
-                           then ()
-                           else raise (BugOnPC (P.to_int pc))
-                      else raise (BugOnPC (P.to_int pc))
-                 else raise (BugOnPC (P.to_int pc))
-            else raise (BugOnPC (P.to_int pc))
-          | _ ->
-            raise (BugOnPC (P.to_int pc))))
-    | None -> ()
-end
-
-module T = Partial.T(FUNS)
-let branch_target = T.branch_target
-
diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml
index 1bb35f7a..7c826ba4 100644
--- a/backend/Tunnelinglibs.ml
+++ b/backend/Tunnelinglibs.ml
@@ -16,7 +16,7 @@
 This file implements the core functions of the tunneling passes, for both RTL
 and LTL, by using a simplified CFG as a transparent interface
 
-See [Tunneling.v] and [RTLTunneling.v]
+See [LTLTunneling.v] and [RTLTunneling.v]
 
 *)
 
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
deleted file mode 100644
index 3bc92f75..00000000
--- a/backend/Tunnelingproof.v
+++ /dev/null
@@ -1,666 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
-(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for the branch tunneling optimization. *)
-
-Require Import Coqlib Maps Errors.
-Require Import AST Linking.
-Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations LTL.
-Require Import Tunneling.
-
-Local Open Scope nat.
-
-
-(** * Properties of the branch_target, when the verifier succeeds *)
-
-Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
-   ok <> None -> forall pc, c!pc = None -> td!pc = None.
-
-Lemma check_included_correct (td: UF) (c: code): 
-  check_included_spec c td (check_included td c).
-Proof. 
-  apply PTree_Properties.fold_rec with (P := check_included_spec c).
-- (* extensionality *)
-  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
-- (* base case *)
-  intros _ pc.  rewrite PTree.gempty; try congruence.
-- (* inductive case *)
-  unfold check_included_spec.
-  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
-  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
-  intros; eapply IND; try congruence.
-Qed.
-
-Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
- | TB_default (TB: target pc = pc) ob
-     : target_bounds target bound pc ob
- | TB_branch s bb
-     (EQ: target pc = target s)
-     (DECREASE: bound s < bound pc)
-     : target_bounds target bound pc (Some (Lbranch s::bb))
- | TB_cond cond args s1 s2 info bb
-     (EQ1: target pc = target s1)
-     (EQ2: target pc = target s2)
-     (DEC1: bound s1 < bound pc)
-     (DEC2: bound s2 < bound pc)
-     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
- .
-Local Hint Resolve TB_default: core.
-
-Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
-Proof.
-  unfold target, get. intros H; rewrite H; auto.
-Qed.
-Local Hint Resolve target_None Z.abs_nonneg: core.
-
-Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
-Proof.
-  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto.
-Qed.
-Local Hint Resolve get_nonneg: core.
-
-Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
-
-Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
-  check_bblock td pc bb = OK tt -> 
-  target_bounds (target td) (bound td) pc (Some bb).
-Proof.
-  unfold check_bblock, bound.
-  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
-  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
-  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
-  destruct bb as [|[ ] bb]; simpl; try congruence.
-  + destruct (get td s) as (ts, ds) eqn:Hs.
-    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
-    intros; apply TB_branch.
-    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
-    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
-  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
-    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
-    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
-    intros; apply TB_cond.
-    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
-    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
-    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
-    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
-Qed.
-
-Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
-   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
-
-Lemma check_code_correct (td:UF) c:
-   check_code_spec td c (check_code td c).
-Proof.
-  apply PTree_Properties.fold_rec with (P := check_code_spec td).
-- (* extensionality *)
-  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
-- (* base case *)
-  intros _ pc.  rewrite PTree.gempty; try congruence.
-- (* inductive case *)
-  unfold check_code_spec.
-  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
-  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
-  intros X; inversion X; subst.
-  apply check_bblock_correct; auto.
-Qed.
-
-Theorem branch_target_bounds:
-  forall f tf pc, 
-  tunnel_function f = OK tf ->
-  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
-Proof.
-  unfold tunnel_function; intros f f' pc.
-  destruct (check_included _ _) eqn:H1; try congruence.
-  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
-  intros _.
-  destruct ((fn_code f)!pc) eqn:X.
-  - exploit check_code_correct; eauto.
-  - exploit check_included_correct; eauto.
-    congruence.
-Qed.
-
-Lemma tunnel_function_unfold:
-  forall f tf pc,
-  tunnel_function f = OK tf -> 
-  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
-Proof.
-  unfold tunnel_function; intros f f' pc.
-  destruct (check_included _ _) eqn:H1; try congruence.
-  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
-  intros X; inversion X; clear X; subst.
-  simpl. rewrite PTree.gmap1. auto.
-Qed.
-
-Lemma tunnel_fundef_Internal:
-  forall f tf, tunnel_fundef (Internal f) = OK tf
-  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
-Proof.
-  intros f tf; simpl.
-  destruct (tunnel_function f) eqn:X; simpl; try congruence.
-  intros EQ; inversion EQ.
-  eexists; split; eauto.
-Qed.
-
-Lemma tunnel_fundef_External:
-  forall tf ef, tunnel_fundef (External ef) = OK tf
-  -> tf = External ef.
-Proof.
-  intros tf ef; simpl. intros H; inversion H; auto.
-Qed.
-
-(** * Preservation of semantics *)
-
-Definition match_prog (p tp: program) :=
-  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
-
-Lemma transf_program_match:
-  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
-Proof.
-  intros. eapply match_transform_partial_program_contextual; eauto.
-Qed.
-
-Section PRESERVATION.
-
-Variables prog tprog: program.
-Hypothesis TRANSL: match_prog prog tprog.
-Let ge := Genv.globalenv prog.
-Let tge := Genv.globalenv tprog.
-
-Lemma functions_translated:
-  forall (v: val) (f: fundef),
-  Genv.find_funct ge v = Some f ->
-  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
-Proof.
-  intros. exploit (Genv.find_funct_match TRANSL); eauto.
-  intros (cu & tf & A & B & C).
-  repeat eexists; intuition eauto.
-Qed.
-
-Lemma function_ptr_translated:
-  forall v f,
-  Genv.find_funct_ptr ge v = Some f ->
-  exists tf,
-  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
-Proof.
-  intros.
-  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
-Qed.
-
-Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof.
-  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
-Qed.
-
-Lemma senv_preserved:
-  Senv.equiv ge tge.
-Proof.
-  eapply (Genv.senv_match TRANSL).
-Qed.
-
-Lemma sig_preserved:
-  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
-Proof.
-  intros. destruct f.
-  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
-    destruct (check_included _ _); try congruence.
-    monadInv EQ. simpl; auto.
-  - simpl in H. monadInv H. reflexivity.
-Qed.
-
-Lemma fn_stacksize_preserved:
-  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
-Proof.
-  intros f tf; unfold tunnel_function.
-  destruct (check_included _ _); try congruence.
-  destruct (check_code _ _); simpl; try congruence.
-  intros H; inversion H; simpl; auto.
-Qed.
-
-Lemma fn_entrypoint_preserved:
-  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
-Proof.
-  intros f tf; unfold tunnel_function.
-  destruct (check_included _ _); try congruence.
-  destruct (check_code _ _); simpl; try congruence.
-  intros H; inversion H; simpl; auto.
-Qed.
-
-
-(** The proof of semantic preservation is a simulation argument
-  based on diagrams of the following form:
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  ?|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  The [match_states] predicate, defined below, captures the precondition
-  between states [st1] and [st2], as well as the postcondition between
-  [st1'] and [st2'].  One transition in the source code (left) can correspond
-  to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lnop] instruction
-  in the source code that has been removed by tunneling.
-
-  In the definition of [match_states], what changes between the original and
-  transformed codes is mainly the control-flow
-  (in particular, the current program point [pc]), but also some values
-  and memory states, since some [Vundef] values can become more defined
-  as a consequence of eliminating useless [Lcond] instructions. *)
-
-Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
-  forall l, Val.lessdef (ls1 l) (ls2 l).
-
-Inductive match_stackframes: stackframe -> stackframe -> Prop :=
-  | match_stackframes_intro:
-      forall f tf sp ls0 bb tls0,
-      locmap_lessdef ls0 tls0 ->
-      tunnel_function f = OK tf ->
-      match_stackframes
-         (Stackframe f sp ls0 bb)
-         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
-
-Inductive match_states: state -> state -> Prop :=
-  | match_states_intro:
-      forall s f tf sp pc ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_function f = OK tf),
-      match_states (State s f sp pc ls m)
-                   (State ts tf sp (branch_target f pc) tls tm)
-  | match_states_block:
-      forall s f tf sp bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_function f = OK tf),
-      match_states (Block s f sp bb ls m)
-                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
-  | match_states_interm:
-      forall s f tf sp pc i bb ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
-        (TF: tunnel_function f = OK tf),
-      match_states (Block s f sp (i :: bb) ls m)
-                   (State ts tf sp pc tls tm)
-  | match_states_call:
-      forall s f tf ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm)
-        (TF: tunnel_fundef f = OK tf),
-      match_states (Callstate s f ls m)
-                   (Callstate ts tf tls tm)
-  | match_states_return:
-      forall s ls m ts tls tm
-        (STK: list_forall2 match_stackframes s ts)
-        (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Returnstate s ls m)
-                   (Returnstate ts tls tm).
-
-(** Properties of [locmap_lessdef] *)
-
-Lemma reglist_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl).
-Proof.
-  induction rl; simpl; intros; auto.
-Qed.
-
-Lemma locmap_set_lessdef:
-  forall ls1 ls2 v1 v2 l,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2).
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto using Val.load_result_lessdef.
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_set_undef_lessdef:
-  forall ls1 ls2 l,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2.
-Proof.
-  intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l').
-- destruct l; auto. destruct ty; auto. 
-- destruct (Loc.diff_dec l l'); auto.
-Qed.
-
-Lemma locmap_undef_regs_lessdef:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2).
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_regs_lessdef_1:
-  forall rl ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2.
-Proof.
-  induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
-Qed.
-
-Lemma locmap_getpair_lessdef:
-  forall p ls1 ls2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
-Proof.
-  intros; destruct p; simpl; auto using Val.longofwords_lessdef.
-Qed.
-
-Lemma locmap_getpairs_lessdef:
-  forall pl ls1 ls2,
-  locmap_lessdef ls1 ls2 ->
-  Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl).
-Proof.
-  intros. induction pl; simpl; auto using locmap_getpair_lessdef.
-Qed.
-
-Lemma locmap_setpair_lessdef:
-  forall p ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2).
-Proof.
-  intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_setres_lessdef:
-  forall res ls1 ls2 v1 v2,
-  locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2).
-Proof.
-  induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef.
-Qed.
-
-Lemma locmap_undef_caller_save_regs_lessdef:
-  forall ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2).
-Proof.
-  intros; red; intros. unfold undef_caller_save_regs. 
-  destruct l.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed.
-
-Lemma find_function_translated:
-  forall ros ls tls fd,
-  locmap_lessdef ls tls ->
-  find_function ge ros ls = Some fd ->
-  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
-Proof.
-  intros. destruct ros; simpl in *.
-- assert (E: tls (R m) = ls (R m)).
-  { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
-    generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. exploit functions_translated; eauto.
-- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  exploit function_ptr_translated; eauto.
-  intros (tf & X1 & X2). exists tf; intuition.
-Qed.
-
-Lemma call_regs_lessdef:
-  forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2).
-Proof.
-  intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto.
-Qed.
-
-Lemma return_regs_lessdef:
-  forall caller1 callee1 caller2 callee2,
-  locmap_lessdef caller1 caller2 ->
-  locmap_lessdef callee1 callee2 ->
-  locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2).
-Proof.
-  intros; red; intros. destruct l; simpl.
-- destruct (Conventions1.is_callee_save r); auto.
-- destruct sl; auto.
-Qed. 
-
-(** To preserve non-terminating behaviours, we show that the transformed
-  code cannot take an infinity of "zero transition" cases.
-  We use the following [measure] function over source states,
-  which decreases strictly in the "zero transition" case. *)
-
-Definition measure (st: state) : nat :=
-  match st with
-  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
-  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
-  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
-  | Block s f sp bb ls m => 0
-  | Callstate s f ls m => 0
-  | Returnstate s ls m => 0
-  end.
-
-Lemma match_parent_locset:
-  forall s ts,
-  list_forall2 match_stackframes s ts ->
-  locmap_lessdef (parent_locset s) (parent_locset ts).
-Proof.
-  induction 1; simpl.
-- red; auto.
-- inv H; auto.
-Qed.
-
-Lemma tunnel_step_correct:
-  forall st1 t st2, step ge st1 t st2 ->
-  forall st1' (MS: match_states st1 st1'),
-  (exists st2', step tge st1' t st2' /\ match_states st2 st2')
-  \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
-Proof.
-  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
-
-- (* entering a block *)
-  exploit (branch_target_bounds f tf pc); eauto.
-  rewrite H. intros X; inversion X.
-  + (* TB_default *) 
-    rewrite TB; left. econstructor; split.
-    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
-    * econstructor; eauto.
-  + (* FT_branch *)
-    simpl; right.
-    rewrite EQ; repeat (econstructor; lia || eauto).
-  + (* FT_cond *)
-    simpl; right.
-    repeat (econstructor; lia || eauto); simpl.
-    destruct (peq _ _); try congruence.
-- (* Lop *)
-  exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
-  intros (tv & EV & LD).
-  left; simpl; econstructor; split.
-  eapply exec_Lop with (v := tv); eauto.
-  rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.loadv_extends. eauto. eauto. eexact LD. 
-  intros (tv & LOAD & LD').
-  left; simpl; econstructor; split.
-  eapply exec_Lload with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload notrap1 *)
-  exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption.
-  left; simpl; econstructor; split.
-  eapply exec_Lload_notrap1.
-  rewrite <- H0.
-  apply eval_addressing_preserved. exact symbols_preserved. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lload notrap2 *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  destruct (Mem.loadv chunk tm ta) eqn:Htload.
-  {
-  left; simpl; econstructor; split.
-  eapply exec_Lload.
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  exact Htload. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-  }
-  {
-  left; simpl; econstructor; split.
-  eapply exec_Lload_notrap2.
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  exact Htload. eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-  }
-- (* Lgetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lsetstack *)
-  left; simpl; econstructor; split.
-  econstructor; eauto.
-  econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef.
-- (* Lstore *)
-  exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. 
-  intros (ta & EV & LD).
-  exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS.  
-  intros (tm' & STORE & MEM').
-  left; simpl; econstructor; split.
-  eapply exec_Lstore with (a := ta).
-  rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved.
-  eauto. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcall *)
-  left; simpl.
-  exploit find_function_translated; eauto.
-  intros (tfd & Htfd & FIND).
-  econstructor; split.
-  + eapply exec_Lcall; eauto.
-    erewrite sig_preserved; eauto.
-  + econstructor; eauto.
-    constructor; auto.
-    constructor; auto.
-- (* Ltailcall *)
-  exploit find_function_translated. 2: eauto.
-  { eauto using return_regs_lessdef, match_parent_locset. }
-  intros (tfd & Htfd & FIND).
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
-  left; simpl; econstructor; split.
-  + eapply exec_Ltailcall; eauto.
-    * eapply sig_preserved; eauto.
-    * erewrite fn_stacksize_preserved; eauto.
-  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* Lbuiltin *)
-  exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
-  exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  eapply exec_Lbuiltin; eauto.
-  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. 
-  eapply external_call_symbols_preserved. apply senv_preserved. eauto.
-  econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef.
-- (* Lbranch (preserved) *)
-  left; simpl; econstructor; split.
-  eapply exec_Lbranch; eauto.
-  fold (branch_target f pc). econstructor; eauto.
-- (* Lbranch (eliminated) *)
-  right; split. simpl. lia. split. auto. constructor; auto.
-- (* Lcond (preserved) *)
-  simpl; left; destruct (peq _ _) eqn: EQ.
-  + econstructor; split.
-    eapply exec_Lbranch. 
-    destruct b.
-    * constructor; eauto using locmap_undef_regs_lessdef_1.
-    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
-  + econstructor; split.
-    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lcond (eliminated) *)
-  destruct (peq _ _) eqn: EQ; try inv H1.
-  right; split; simpl. 
-  + destruct b.
-    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
-    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia.
-  + destruct b.
-    -- repeat (constructor; auto).
-    -- rewrite e; repeat (constructor; auto).
-- (* Ljumptable *)
-  assert (tls (R arg) = Vint n).
-  { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
-  left; simpl; econstructor; split.
-  eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
-  econstructor; eauto using locmap_undef_regs_lessdef.
-- (* Lreturn *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
-  left; simpl; econstructor; split.
-  + eapply exec_Lreturn; eauto.
-    erewrite fn_stacksize_preserved; eauto.
-  + constructor; eauto using return_regs_lessdef, match_parent_locset.
-- (* internal function *)
-  exploit tunnel_fundef_Internal; eauto.
-  intros (tf' & TF' & ITF). subst.
-  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM').
-  left; simpl.
-  econstructor; split.
-  + eapply exec_function_internal; eauto.
-    erewrite fn_stacksize_preserved; eauto.
-  + simpl.
-    erewrite (fn_entrypoint_preserved f tf'); auto.
-    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
-- (* external function *)
-  exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
-  intros (tvres & tm' & A & B & C & D).
-  left; simpl; econstructor; split.
-  + erewrite (tunnel_fundef_External tf ef); eauto.
-    eapply exec_function_external; eauto.
-    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
-- (* return *)
-  inv STK. inv H1.
-  left; econstructor; split.
-  eapply exec_return; eauto.
-  constructor; auto.
-Qed.
-
-Lemma transf_initial_states:
-  forall st1, initial_state prog st1 ->
-  exists st2, initial_state tprog st2 /\ match_states st1 st2.
-Proof.
-  intros. inversion H.
-  exploit function_ptr_translated; eauto.
-  intros (tf & Htf & Hf).
-  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
-  econstructor; eauto.
-  apply (Genv.init_mem_transf_partial TRANSL); auto.
-  rewrite (match_program_main TRANSL).
-  rewrite symbols_preserved. eauto.
-  rewrite <- H3. apply sig_preserved. auto.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
-Qed.
-
-Lemma transf_final_states:
-  forall st1 st2 r,
-  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
-Proof.
-  intros. inv H0. inv H. inv STK.
-  set (p := map_rpair R (Conventions1.loc_result signature_main)) in *.
-  generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD.
-  econstructor; eauto.
-Qed.
-
-Theorem transf_program_correct:
-  forward_simulation (LTL.semantics prog) (LTL.semantics tprog).
-Proof.
-  eapply forward_simulation_opt.
-  apply senv_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact tunnel_step_correct.
-Qed.
-
-End PRESERVATION.
diff --git a/doc/index-kvx.html b/doc/index-kvx.html
index 62afb423..daa4cdc4 100644
--- a/doc/index-kvx.html
+++ b/doc/index-kvx.html
@@ -315,10 +315,10 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 
 <TR valign="top">
-  <TD>Branch tunneling</TD>
+  <TD>(LTL) Branch tunneling</TD>
   <TD>LTL to LTL</TD>
-  <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD>
-  <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD>
 </TR>
 
 <TR valign="top">
diff --git a/doc/index.html b/doc/index.html
index c3912cb2..34b87924 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -270,10 +270,10 @@ code.
 </TR>
 
 <TR valign="top">
-  <TD>Branch tunneling</TD>
+  <TD>(LTL) Branch tunneling</TD>
   <TD>LTL to LTL</TD>
-  <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD>
-  <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD>
+  <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD>
 </TR>
 
 <TR valign="top">
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index a751b232..952bed22 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -308,7 +308,7 @@ EXPAND_RTL_FORWARD_SIMULATIONS
   eapply compose_forward_simulations.
     eapply Allocationproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
-    eapply Tunnelingproof.transf_program_correct; eassumption.
+    eapply LTLTunnelingproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Linearizeproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 067f0e4b..e45f0617 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -58,7 +58,7 @@ let post_rtl_passes =
   PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint;
   TOTAL, Always, Require,  (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
   PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1");
-  PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2");
+  PARTIAL, Always, Require, (Some "LTL Branch tunneling"), "LTLTunneling", (Print "LTL 2");
   PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;
   TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint;
   PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint;
-- 
cgit 


From 10cbe4b28ef6dc5d02c9a5d4d369484e4943a18d Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 22 Jun 2021 15:57:21 +0200
Subject: Changed default threshold value following tests

---
 driver/Clflags.ml                  |  2 +-
 scheduling/InstructionScheduler.ml | 10 ++--------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 1f31bd3e..c90fdb8c 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -115,5 +115,5 @@ let option_inline_auto_threshold = ref 0
 let option_profile_arcs = ref false
 let option_fbranch_probabilities = ref true
 let option_debug_compcert = ref 0
-let option_regpres_threshold = ref 5
+let option_regpres_threshold = ref 2
 let main_function_name = ref "main"
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 5b4c87f4..cd924825 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -551,19 +551,13 @@ let reg_pres_scheduler (problem : problem) : solution option =
                     | Some (t, n) ->
                        Hashtbl.remove counts r;
                        if n = 1 then
-                         ((* print_string "yaaaaaaaaaaaas ";
-                           * print_int (Camlcoq.P.to_int r);
-                           * print_newline (); *)
-                          Hashtbl.remove live_regs r;
+                         (Hashtbl.remove live_regs r;
                           available_regs.(t)
                           <- available_regs.(t) + 1))
                  else
                    let t = class_r r in
                    match Hashtbl.find_opt live_regs r with
-                   | None -> ((* print_string "noooooooooo ";
-                              * print_int (Camlcoq.P.to_int r);
-                              * print_newline (); *)
-                             Hashtbl.add live_regs r t;
+                   | None -> (Hashtbl.add live_regs r t;
                              available_regs.(t)
                              <- available_regs.(t) - 1)
                    | Some i -> ()
-- 
cgit 


From dfa09586ae40c70769eeda688a0e7f59f611749f Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 24 Jun 2021 18:33:20 +0200
Subject: Another scheduler

---
 driver/Driver.ml                    |   2 +-
 scheduling/InstructionScheduler.ml  | 201 ++++++++++++++++++++++++++++++++++++
 scheduling/InstructionScheduler.mli |   2 +
 3 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/driver/Driver.ml b/driver/Driver.ml
index fa187f26..4f43d7c9 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -210,7 +210,7 @@ Processing options:
   -mtune=         Type of CPU (for scheduling on some architectures)
   -fprepass      Perform prepass scheduling (only on some architectures) [on]
   -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
-                   (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+                   (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
   -regpres-threshold n   With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index cd924825..99002c36 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -602,6 +602,206 @@ let reg_pres_scheduler (problem : problem) : solution option =
 ;;
 
 
+(********************************************************************)
+
+let reg_pres_scheduler_bis (problem : problem) : solution option =
+  DebugPrint.debug_flag := true;
+  Printf.printf "\nNEW\n\n";
+  let nr_instructions = get_nr_instructions problem in
+  let successors = get_successors problem
+  and predecessors = get_predecessors problem
+  and times = Array.make (nr_instructions+1) (-1) in
+  let live_regs_entry = problem.live_regs_entry in
+
+  (* let available_regs = Array.copy Machregsaux.nr_regs in *)
+
+  let class_r r =
+    Machregsaux.class_of_type (problem.typing r) in
+  
+  let live_regs = Hashtbl.create 42 in
+
+  List.iter (fun r -> let classe = Machregsaux.class_of_type
+                                  (problem.typing r) in
+                   (* available_regs.(classe)
+                    * <- available_regs.(classe) - 1; *)
+                   Hashtbl.add live_regs r classe)
+    (Registers.Regset.elements live_regs_entry);
+
+
+  let counts, mentions =
+    match problem.reference_counting with
+    | Some (l, r) -> l, r
+    | None -> assert false
+  in
+  
+  let fold_delta a (r, b) =
+    a + (if b then
+           match Hashtbl.find_opt counts r with
+           | Some (_, 1) -> 1
+           | _ -> 0
+         else
+           match Hashtbl.find_opt live_regs r with
+           | None -> -1
+           | Some t -> 0
+        ) in
+  
+  let priorities = critical_paths successors in
+  
+  let current_resources = Array.copy problem.resource_bounds in
+
+  let compare_pres x y =
+    let pdy = List.fold_left (fold_delta) 0 mentions.(y) in
+    let pdx = List.fold_left (fold_delta) 0 mentions.(x) in
+    match pdy - pdx with
+    | 0 -> x - y
+    | z -> z
+  in
+  
+  let module InstrSet =
+    Set.Make (struct
+        type t = int
+        let compare x y =
+          match priorities.(y) - priorities.(x) with
+          | 0 -> x - y
+          | z -> z
+      end) in
+
+  let max_time = bound_max_time problem (* + 5*nr_instructions *) in
+  let ready = Array.make max_time InstrSet.empty in
+
+  Array.iteri (fun i preds ->
+      if i < nr_instructions && preds = []
+      then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+  let current_time = ref 0
+  and earliest_time i =
+    try
+      let time = ref (-1) in
+      List.iter (fun (j, latency) ->
+          if times.(j) < 0
+          then raise Exit
+          else let t = times.(j) + latency in
+               if t > !time
+               then time := t) predecessors.(i);
+      assert (!time >= 0);
+      !time
+    with Exit -> -1
+  in
+
+  let advance_time () =
+    (* Printf.printf "ADV\n"; 
+     * flush stdout; *)
+    (if !current_time < max_time-1
+     then (
+       Array.blit problem.resource_bounds 0 current_resources 0
+         (Array.length current_resources);
+       ready.(!current_time + 1) <-
+         InstrSet.union (ready.(!current_time))
+           (ready.(!current_time +1));
+       ready.(!current_time) <- InstrSet.empty));
+    incr current_time
+  in
+
+  
+  let attempt_scheduling ready usages =
+    let result = ref [] in
+    try
+      InstrSet.iter (fun i ->
+          if vector_less_equal usages.(i) current_resources
+          then
+            if !result = [] || priorities.(i) = priorities.(List.hd (!result))
+            then
+              result := i::(!result)
+            else raise Exit
+        ) ready;
+      if !result <> [] then raise Exit;
+      -1
+    with
+      Exit ->
+      let mini =  List.fold_left (fun a b ->
+                      if a = -1 || compare_pres a b > 0
+                      then b else a
+                    ) (-1) !result in
+      vector_subtract usages.(mini) current_resources;
+      mini
+  in
+  
+  while !current_time < max_time
+  do
+    if (InstrSet.is_empty ready.(!current_time))
+    then advance_time ()
+    else
+      match attempt_scheduling ready.(!current_time)
+              problem.instruction_usages with
+      | -1 -> advance_time()
+      | i -> (
+        Printf.printf "ISSUED: %d\nREADY: " i;
+        InstrSet.iter (fun i -> Printf.printf "%d " i)
+          ready.(!current_time);
+        Printf.printf "\nSUCC: ";
+        List.iter (fun (i, l) -> Printf.printf "%d " i)
+          successors.(i);
+        Printf.printf "\n\n";
+        flush stdout;
+        assert(times.(i) < 0);
+        times.(i) <- !current_time;
+        ready.(!current_time)
+        <- InstrSet.remove i (ready.(!current_time));
+        (List.iter (fun (r,b) ->
+             if b then
+               (match Hashtbl.find_opt counts r with
+                | None -> assert false
+                | Some (t, n) ->
+                   Hashtbl.remove counts r;
+                   if n = 1 then
+                     (Hashtbl.remove live_regs r;
+               (* available_regs.(t)
+                * <- available_regs.(t) + 1 *)))
+             else
+               let t = class_r r in
+               match Hashtbl.find_opt live_regs r with
+               | None -> (Hashtbl.add live_regs r t;
+                        (* available_regs.(t)
+                         * <- available_regs.(t) - 1 *))
+               | Some i -> ()
+           ) mentions.(i));
+        List.iter (fun (instr_to, latency) ->
+            if instr_to < nr_instructions then
+              match earliest_time instr_to with
+              | -1 -> ()
+              | to_time ->
+                 ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+                   *   (Array.length ready); *)
+                  ready.(to_time)
+                  <- InstrSet.add instr_to ready.(to_time))
+          ) successors.(i);
+        successors.(i) <- []
+      )
+  done;
+
+  try
+    let final_time = ref (-1) in
+    for i = 0 to nr_instructions - 1 do
+      (* print_int i;
+       * flush stdout; *)
+      (if times.(i) < 0 then raise Exit);
+      (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+    done;
+    List.iter (fun (i, latency) ->
+        let target_time = latency + times.(i) in
+        if target_time > !final_time then
+          final_time := target_time) predecessors.(nr_instructions);
+    times.(nr_instructions) <- !final_time;
+    DebugPrint.debug_flag := false;
+    Some times
+  with Exit ->
+    DebugPrint.debug "reg_pres_sched failed\n";
+    DebugPrint.debug_flag := false;
+    None
+              
+;;
+
+(********************************************************************)
 
 type bundle = int list;;
 
@@ -1535,5 +1735,6 @@ let scheduler_by_name name =
   | "list" -> validated_scheduler list_scheduler
   | "revlist" -> validated_scheduler reverse_list_scheduler
   | "regpres" -> validated_scheduler reg_pres_scheduler
+  | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis
   | "greedy" -> greedy_scheduler
   | s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index b5a5463b..48c7bc09 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -81,6 +81,8 @@ val list_scheduler : problem -> solution option
 (** WIP : Same as list_scheduler, but schedules instructions which decrease
 register pressure when it gets too high. *)
 val reg_pres_scheduler : problem -> solution option
+
+val reg_pres_scheduler_bis : problem -> solution option
   
 (** Schedule the problem using the order of instructions without any reordering *)
 val greedy_scheduler : problem -> solution option
-- 
cgit 


From 9ac49c465f9c8969fba00e6242da0c188a6a3080 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Fri, 25 Jun 2021 09:42:41 +0200
Subject: Changed printfs into debugs

---
 scheduling/InstructionScheduler.ml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 99002c36..e2413bc0 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -735,14 +735,13 @@ let reg_pres_scheduler_bis (problem : problem) : solution option =
               problem.instruction_usages with
       | -1 -> advance_time()
       | i -> (
-        Printf.printf "ISSUED: %d\nREADY: " i;
-        InstrSet.iter (fun i -> Printf.printf "%d " i)
+        DebugPrint.debug "ISSUED: %d\nREADY: " i;
+        InstrSet.iter (fun i -> DebugPrint.debug "%d " i)
           ready.(!current_time);
-        Printf.printf "\nSUCC: ";
-        List.iter (fun (i, l) -> Printf.printf "%d " i)
+        DebugPrint.debug "\nSUCC: ";
+        List.iter (fun (i, l) -> DebugPrint.debug "%d " i)
           successors.(i);
-        Printf.printf "\n\n";
-        flush stdout;
+        DebugPrint.debug "\n\n";
         assert(times.(i) < 0);
         times.(i) <- !current_time;
         ready.(!current_time)
-- 
cgit 


From b96a48de58e1969535865b7b345514a24f7178a6 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 28 Jun 2021 16:04:44 +0200
Subject: Change temporary solution (see prev commits), and add option for it

---
 driver/Clflags.ml                  |  1 +
 driver/Driver.ml                   |  2 ++
 scheduling/InstructionScheduler.ml | 21 +++++++++++++++++----
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index c90fdb8c..d01b57f0 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -116,4 +116,5 @@ let option_profile_arcs = ref false
 let option_fbranch_probabilities = ref true
 let option_debug_compcert = ref 0
 let option_regpres_threshold = ref 2
+let option_regpres_temp = ref false
 let main_function_name = ref "main"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 4f43d7c9..22c75f44 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -212,6 +212,7 @@ Processing options:
   -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
   -regpres-threshold n   With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
+  -fregpres-temp  use the temporary solution (default no)
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -426,6 +427,7 @@ let cmdline_actions =
   @ f_opt "redundancy" option_fredundancy
   @ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
   @ f_opt "prepass" option_fprepass
+  @ f_opt "regpres-temp" option_regpres_temp
   @ f_opt "postpass" option_fpostpass
   @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ]
   @ f_opt "predict" option_fpredict
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index e2413bc0..a881df68 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -486,7 +486,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
   let cnt = ref 0 in
-  
+
   let attempt_scheduling ready usages =
     let result = ref (-1) in
     try
@@ -500,7 +500,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
            * print_int (Hashtbl.length live_regs);
            * print_newline ();
            * flush stdout; *)
-          if !cnt < 5 && avlregs <= regs_thresholds.(i)
+          if avlregs <= regs_thresholds.(i)
           then (
             let maybe = InstrSet.sched_CSR i ready usages in
             (* print_string "maybe\n";
@@ -518,7 +518,19 @@ let reg_pres_scheduler (problem : problem) : solution option =
              then
                (vector_subtract usages.(maybe) current_resources;
                 result := maybe)
-             else incr cnt);
+             else
+               if not !Clflags.option_regpres_temp
+               then
+                 (InstrSet.iter (fun ins ->
+                      if vector_less_equal usages.(ins) current_resources &&
+                           List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0
+                      then result := ins
+                    ) ready;
+                  if !result <> -1 then
+                    vector_subtract usages.(!result) current_resources)
+               else
+                 (incr cnt)
+            );
             raise Exit)) available_regs;
       InstrSet.iter (fun i ->
           if vector_less_equal usages.(i) current_resources
@@ -543,7 +555,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
               * print_int i;
               * print_newline ();
               * flush stdout; *)
-             cnt := 0;
+             if !Clflags.option_regpres_temp then
+               cnt := 0;
              List.iter (fun (r,b) ->
                  if b then
                    (match Hashtbl.find_opt counts r with
-- 
cgit 


From af97fca0f1d824f3becf9c6895f44ad234e262f8 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Tue, 6 Jul 2021 15:32:35 +0200
Subject: Add debug info

---
 scheduling/InstructionScheduler.ml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index a881df68..4fdc455c 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -502,6 +502,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
            * flush stdout; *)
           if avlregs <= regs_thresholds.(i)
           then (
+            if !Clflags.option_debug_compcert > 6 then
+              DebugPrint.debug "REGPRES: high pres class %d\n" i;
             let maybe = InstrSet.sched_CSR i ready usages in
             (* print_string "maybe\n";
              * print_int maybe;
-- 
cgit 


From a4a0b36f56a94c19da301265a4e3acad1fbdf6c4 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 8 Jul 2021 11:20:49 +0200
Subject: Deactivate sched validator (i think)

---
 scheduling/RTLpathScheduler.v | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v
index 31680256..5a81dd28 100644
--- a/scheduling/RTLpathScheduler.v
+++ b/scheduling/RTLpathScheduler.v
@@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P
   let (tc, te) := tcte in
   let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
   do tf <- proj1_sig (function_builder tfr tpm);
-  do tt <- function_equiv_checker dm f tf; 
+  (* do tt <- function_equiv_checker dm f tf;  *)
   OK (tf, dm).
 
 Theorem verified_scheduler_correct f tf dm:
@@ -172,15 +172,16 @@ Theorem verified_scheduler_correct f tf dm:
   /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2)
 .
 Proof.
-  intros VERIF. unfold verified_scheduler in VERIF. explore.
-  Local Hint Resolve function_equiv_checker_entrypoint
-    function_equiv_checker_pathentry1 function_equiv_checker_pathentry2
-    function_equiv_checker_correct: core.
-  destruct (function_builder _ _) as [res H]; simpl in * |- *; auto.
-    apply H in EQ2. rewrite EQ2. simpl.
-  repeat (constructor; eauto).
-  exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition.
-Qed.
+  Admitted.
+(*   intros VERIF. unfold verified_scheduler in VERIF. explore. *)
+(*   Local Hint Resolve function_equiv_checker_entrypoint *)
+(*     function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *)
+(*     function_equiv_checker_correct: core. *)
+(*   destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *)
+(*     apply H in EQ2. rewrite EQ2. simpl. *)
+(*   repeat (constructor; eauto). *)
+(*   exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *)
+(* Qed. *)
 
 Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := {
   preserv_fnsig: fn_sig f1 = fn_sig f2;
-- 
cgit 


From f86f5df47b69053702661671340b0fcb31506aa3 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Thu, 8 Jul 2021 11:22:17 +0200
Subject: add more debug info

---
 scheduling/InstructionScheduler.ml | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 4fdc455c..f823ccca 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -350,7 +350,12 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 (* A scheduler sensitive to register pressure *)
 let reg_pres_scheduler (problem : problem) : solution option =
   DebugPrint.debug_flag := true;
+
   let nr_instructions = get_nr_instructions problem in
+
+  if !Clflags.option_debug_compcert > 6 then
+    DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+
   let successors = get_successors problem
   and predecessors = get_predecessors problem
   and times = Array.make (nr_instructions+1) (-1) in
@@ -381,6 +386,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
                    Hashtbl.add live_regs r classe)
     (Registers.Regset.elements live_regs_entry);
 
+  let csr_b = ref false in
 
   let counts, mentions =
     match problem.reference_counting with
@@ -502,8 +508,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
            * flush stdout; *)
           if avlregs <= regs_thresholds.(i)
           then (
-            if !Clflags.option_debug_compcert > 6 then
-              DebugPrint.debug "REGPRES: high pres class %d\n" i;
+            csr_b := true;
             let maybe = InstrSet.sched_CSR i ready usages in
             (* print_string "maybe\n";
              * print_int maybe;
@@ -557,6 +562,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
               * print_int i;
               * print_newline ();
               * flush stdout; *)
+             if !csr_b && !Clflags.option_debug_compcert > 6 then
+               DebugPrint.debug "REGPRES: high pres class %d\n" i;
+             csr_b := false;
              if !Clflags.option_regpres_temp then
                cnt := 0;
              List.iter (fun (r,b) ->
-- 
cgit 


From 70f5867e441e253869cb3b432af77636a186d1cb Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 12:26:27 +0200
Subject: rm TODO

---
 aarch64/Machregsaux.ml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index 15fb08ca..98e461eb 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -21,5 +21,4 @@ let class_of_type = function
   | AST.Tany32 | AST.Tany64 -> assert false
 
 (* number of available registers per class *)
-(* TODO: add this to all archs *)
 let nr_regs = [| 29; 32 |]
-- 
cgit 


From d6a846b641787ea6a5ed113b1d7275ffb5028d9c Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 12:54:19 +0200
Subject: rm "Admitted"

---
 scheduling/RTLpathScheduler.v | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v
index 5a81dd28..31680256 100644
--- a/scheduling/RTLpathScheduler.v
+++ b/scheduling/RTLpathScheduler.v
@@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P
   let (tc, te) := tcte in
   let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
   do tf <- proj1_sig (function_builder tfr tpm);
-  (* do tt <- function_equiv_checker dm f tf;  *)
+  do tt <- function_equiv_checker dm f tf; 
   OK (tf, dm).
 
 Theorem verified_scheduler_correct f tf dm:
@@ -172,16 +172,15 @@ Theorem verified_scheduler_correct f tf dm:
   /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2)
 .
 Proof.
-  Admitted.
-(*   intros VERIF. unfold verified_scheduler in VERIF. explore. *)
-(*   Local Hint Resolve function_equiv_checker_entrypoint *)
-(*     function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *)
-(*     function_equiv_checker_correct: core. *)
-(*   destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *)
-(*     apply H in EQ2. rewrite EQ2. simpl. *)
-(*   repeat (constructor; eauto). *)
-(*   exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *)
-(* Qed. *)
+  intros VERIF. unfold verified_scheduler in VERIF. explore.
+  Local Hint Resolve function_equiv_checker_entrypoint
+    function_equiv_checker_pathentry1 function_equiv_checker_pathentry2
+    function_equiv_checker_correct: core.
+  destruct (function_builder _ _) as [res H]; simpl in * |- *; auto.
+    apply H in EQ2. rewrite EQ2. simpl.
+  repeat (constructor; eauto).
+  exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition.
+Qed.
 
 Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := {
   preserv_fnsig: fn_sig f1 = fn_sig f2;
-- 
cgit 


From 169a221104c37737f12abe79711009fc0d88ce09 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 13:00:56 +0200
Subject: rm useless code

---
 aarch64/PrepassSchedulingOracle.ml | 48 --------------------------------------
 1 file changed, 48 deletions(-)

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index fe757c99..53a81095 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -201,54 +201,6 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.
     end seqa;
   !latency_constraints;;
 
-
-(** useless  *)
-let get_pressure_deltas (seqa : (instruction * Regset.t) array)
-  (typing : RTLtyping.regenv)
-    : int array array =
-  let nr_types_regs = Array.length Machregsaux.nr_regs in
-  let ret = Array.init (Array.length seqa) (fun i ->
-                Array.make nr_types_regs 0) in
-  Array.iteri (fun i (instr, liveins) -> match instr with
-      | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) ->
-         ret.(i).(Machregsaux.class_of_type (typing dest)) <-
-           if List.mem dest args then 0
-           else 1
-      | Istore (_, _, _, src, _) ->
-         ret.(i).(Machregsaux.class_of_type (typing src)) <-
-           -1
-      | Icall (_, fn, args, dest, _) ->
-         ret.(i).(Machregsaux.class_of_type (typing dest)) <-
-           if List.mem dest
-                (match fn with
-                 | Datatypes.Coq_inl reg -> reg::args
-                 | _ -> args)
-           then 0 else 1
-      | Ibuiltin (_, args, dest, _) ->
-         let rec arg_l list = function
-           | AST.BA r -> r::list
-           | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
-              arg_l (arg_l list lo) hi
-           | _ -> list
-         in
-         let l = (List.fold_left arg_l [] args) in
-         let rec dest_l = function
-           | AST.BR r ->
-              let t = Machregsaux.class_of_type (typing r) in
-              ret.(i).(t) <-
-              (if List.mem r l
-              then 0 else 1) + ret.(i).(t)
-           | AST.BR_splitlong (hi, lo) ->
-              dest_l hi;
-              dest_l lo
-           | _ -> ()
-         in
-         dest_l dest
-      | _ -> ()
-    ) seqa;
-  ret
-  
-
 let resources_of_instruction (opweights : opweights) = function
   | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
   | Iop(op, inputs, output, _) ->
-- 
cgit 


From 43d4932e8ba9e00eb8c8788c86f56b6bddd46392 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 13:28:26 +0200
Subject: setup registers

---
 kvx/Machregsaux.ml  | 2 ++
 kvx/Machregsaux.mli | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml
index e3b18181..dbb89727 100644
--- a/kvx/Machregsaux.ml
+++ b/kvx/Machregsaux.ml
@@ -31,3 +31,5 @@ let class_of_type = function
   | AST.Tint | AST.Tlong 
   | AST.Tfloat | AST.Tsingle -> 0
   | AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 59 |]
diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli
index 01b0f9fd..23ac1c9a 100644
--- a/kvx/Machregsaux.mli
+++ b/kvx/Machregsaux.mli
@@ -15,3 +15,6 @@
 val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
-- 
cgit 


From 6121be54b80a55fdadd8b64dfad53357148c9090 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 14:13:50 +0200
Subject: fix for KVX

---
 kvx/PostpassSchedulingOracle.ml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml
index 2107ce22..5ebad421 100644
--- a/kvx/PostpassSchedulingOracle.ml
+++ b/kvx/PostpassSchedulingOracle.ml
@@ -787,8 +787,14 @@ let latency_constraints bb =
  *)
 
 let build_problem bb = 
-  { max_latency = -1; resource_bounds = resource_bounds;
-    instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }
+{ max_latency = -1;
+  resource_bounds = resource_bounds;
+  instruction_usages = instruction_usages bb;
+  latency_constraints = latency_constraints bb;
+  live_regs_entry = Registers.Regset.empty; (* unused here *)
+  typing = (fun x -> AST.Tint);             (* unused here *)
+  reference_counting = None
+}
 
 let rec find_min_opt (l: int option list) =
   match l with
-- 
cgit 


From 67f4ae2b702cc95ed7cef67b726e15abbf18e768 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 15:26:03 +0200
Subject: use a more recognizable option name

---
 arm/ExpansionOracle.ml               |  18 +-
 arm/PrepassSchedulingOracle.ml       |   7 +-
 driver/Clflags.ml                    |   2 +-
 driver/Driver.ml                     |   4 +-
 kvx/ExpansionOracle.ml               |  18 +-
 kvx/PrepassSchedulingOracle.ml       | 486 ++++++++++++++++++++++++++++++++++-
 kvx/PrepassSchedulingOracleDeps.ml   |  18 +-
 powerpc/ExpansionOracle.ml           |  18 +-
 powerpc/PrepassSchedulingOracle.ml   |   7 +-
 riscV/PrepassSchedulingOracle.ml     | 486 ++++++++++++++++++++++++++++++++++-
 riscV/PrepassSchedulingOracleDeps.ml |  18 +-
 scheduling/InstructionScheduler.ml   |   4 +-
 x86/ExpansionOracle.ml               |  18 +-
 13 files changed, 1089 insertions(+), 15 deletions(-)
 mode change 120000 => 100644 arm/ExpansionOracle.ml
 mode change 120000 => 100644 arm/PrepassSchedulingOracle.ml
 mode change 120000 => 100644 kvx/ExpansionOracle.ml
 mode change 120000 => 100644 kvx/PrepassSchedulingOracle.ml
 mode change 120000 => 100644 kvx/PrepassSchedulingOracleDeps.ml
 mode change 120000 => 100644 powerpc/ExpansionOracle.ml
 mode change 120000 => 100644 powerpc/PrepassSchedulingOracle.ml
 mode change 120000 => 100644 riscV/PrepassSchedulingOracle.ml
 mode change 120000 => 100644 riscV/PrepassSchedulingOracleDeps.ml
 mode change 120000 => 100644 x86/ExpansionOracle.ml

diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml
deleted file mode 120000
index ee2674bf..00000000
--- a/arm/ExpansionOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/ExpansionOracle.ml
\ No newline at end of file
diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml
new file mode 100644
index 00000000..3b63b80d
--- /dev/null
+++ b/arm/ExpansionOracle.ml
@@ -0,0 +1,17 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml
deleted file mode 120000
index 9885fd52..00000000
--- a/arm/PrepassSchedulingOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../x86/PrepassSchedulingOracle.ml
\ No newline at end of file
diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..42a3da23
--- /dev/null
+++ b/arm/PrepassSchedulingOracle.ml
@@ -0,0 +1,6 @@
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+      live_regs_entry typing reference = None
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index d01b57f0..085eaa7e 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -116,5 +116,5 @@ let option_profile_arcs = ref false
 let option_fbranch_probabilities = ref true
 let option_debug_compcert = ref 0
 let option_regpres_threshold = ref 2
-let option_regpres_temp = ref false
+let option_regpres_wait_window = ref false
 let main_function_name = ref "main"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 22c75f44..79353f32 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -212,7 +212,7 @@ Processing options:
   -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
   -regpres-threshold n   With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
-  -fregpres-temp  use the temporary solution (default no)
+  -fregpres-wait-window  When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no)
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -427,7 +427,7 @@ let cmdline_actions =
   @ f_opt "redundancy" option_fredundancy
   @ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
   @ f_opt "prepass" option_fprepass
-  @ f_opt "regpres-temp" option_regpres_temp
+  @ f_opt "regpres-wait-window" option_regpres_wait_window
   @ f_opt "postpass" option_fpostpass
   @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ]
   @ f_opt "predict" option_fpredict
diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml
deleted file mode 120000
index ee2674bf..00000000
--- a/kvx/ExpansionOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/ExpansionOracle.ml
\ No newline at end of file
diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml
new file mode 100644
index 00000000..3b63b80d
--- /dev/null
+++ b/kvx/ExpansionOracle.ml
@@ -0,0 +1,17 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
deleted file mode 120000
index 912e9ffa..00000000
--- a/kvx/PrepassSchedulingOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/PrepassSchedulingOracle.ml
\ No newline at end of file
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..53a81095
--- /dev/null
+++ b/kvx/PrepassSchedulingOracle.ml
@@ -0,0 +1,485 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+      (typing : RTLtyping.regenv) reference_counting seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    live_regs_entry = live_entry_regs;
+    typing = typing;
+    reference_counting = Some reference_counting;
+      instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+      (live_regs_entry : Registers.Regset.t)
+      (typing : RTLtyping.regenv)
+      reference =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights live_regs_entry
+                      typing reference seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
deleted file mode 120000
index 1e955b85..00000000
--- a/kvx/PrepassSchedulingOracleDeps.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/PrepassSchedulingOracleDeps.ml
\ No newline at end of file
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/kvx/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml
deleted file mode 120000
index ee2674bf..00000000
--- a/powerpc/ExpansionOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/ExpansionOracle.ml
\ No newline at end of file
diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml
new file mode 100644
index 00000000..3b63b80d
--- /dev/null
+++ b/powerpc/ExpansionOracle.ml
@@ -0,0 +1,17 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml
deleted file mode 120000
index 9885fd52..00000000
--- a/powerpc/PrepassSchedulingOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../x86/PrepassSchedulingOracle.ml
\ No newline at end of file
diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..42a3da23
--- /dev/null
+++ b/powerpc/PrepassSchedulingOracle.ml
@@ -0,0 +1,6 @@
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+      live_regs_entry typing reference = None
diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml
deleted file mode 120000
index 912e9ffa..00000000
--- a/riscV/PrepassSchedulingOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/PrepassSchedulingOracle.ml
\ No newline at end of file
diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..53a81095
--- /dev/null
+++ b/riscV/PrepassSchedulingOracle.ml
@@ -0,0 +1,485 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+      (typing : RTLtyping.regenv) reference_counting seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    live_regs_entry = live_entry_regs;
+    typing = typing;
+    reference_counting = Some reference_counting;
+      instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+      (live_regs_entry : Registers.Regset.t)
+      (typing : RTLtyping.regenv)
+      reference =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights live_regs_entry
+                      typing reference seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml
deleted file mode 120000
index 1e955b85..00000000
--- a/riscV/PrepassSchedulingOracleDeps.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/PrepassSchedulingOracleDeps.ml
\ No newline at end of file
diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/riscV/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index f823ccca..0203d9c8 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -526,7 +526,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
                (vector_subtract usages.(maybe) current_resources;
                 result := maybe)
              else
-               if not !Clflags.option_regpres_temp
+               if not !Clflags.option_regpres_wait_window
                then
                  (InstrSet.iter (fun ins ->
                       if vector_less_equal usages.(ins) current_resources &&
@@ -565,7 +565,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
              if !csr_b && !Clflags.option_debug_compcert > 6 then
                DebugPrint.debug "REGPRES: high pres class %d\n" i;
              csr_b := false;
-             if !Clflags.option_regpres_temp then
+             if !Clflags.option_regpres_wait_window then
                cnt := 0;
              List.iter (fun (r,b) ->
                  if b then
diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml
deleted file mode 120000
index ee2674bf..00000000
--- a/x86/ExpansionOracle.ml
+++ /dev/null
@@ -1 +0,0 @@
-../aarch64/ExpansionOracle.ml
\ No newline at end of file
diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml
new file mode 100644
index 00000000..3b63b80d
--- /dev/null
+++ b/x86/ExpansionOracle.ml
@@ -0,0 +1,17 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
-- 
cgit 


From 2ff766a18432fd75739abab0b5741ded6b67a2a5 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 15:29:25 +0200
Subject: activate register pressure by default

---
 driver/Clflags.ml | 2 +-
 driver/Driver.ml  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 085eaa7e..25bd2c78 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi
 let option_mtune = ref ""
                 
 let option_fprepass = ref true
-let option_fprepass_sched = ref "list"
+let option_fprepass_sched = ref "regpres"
 
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 79353f32..3f5a4bd9 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -298,9 +298,9 @@ let num_input_files = ref 0
 let cmdline_actions =
   let f_opt name ref =
     [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in
-  let f_opt_str name ref strref =
+  let f_opt_str name default ref strref =
     [Exact("-f" ^ name ^ "="), String 
-      (fun s -> (strref := (if s == "" then "list" else s)); ref := true)
+      (fun s -> (strref := (if s == "" then default else s)); ref := true)
      ] in
   let f_str name strref default =
     [Exact("-f" ^ name ^ "="), String 
@@ -435,8 +435,8 @@ let cmdline_actions =
   @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
   @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
   @ f_opt "tracelinearize" option_ftracelinearize
-  @ f_opt_str "prepass" option_fprepass option_fprepass_sched
-  @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
+  @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched
+  @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched
   @ f_opt "inline" option_finline
   @ f_opt "inline-functions-called-once" option_finline_functions_called_once
   @ f_opt "globaladdrtmp" option_fglobaladdrtmp
-- 
cgit 


From cf033ec29391d5358dea1d3b25da1738957478c4 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Fri, 16 Jul 2021 18:01:03 +0200
Subject: comment for authors

---
 aarch64/PrepassSchedulingOracle.ml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 53a81095..e09eea13 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -1,3 +1,16 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*           Nicolas Nardino    ENS-Lyon, VERIMAG              *)
+(*                                                             *)
+(*                                                             *)
+(* *************************************************************)
+
 open AST
 open RTL
 open Maps
-- 
cgit 


From cb6103e684ff89a0493d6b2e3af2317df04fede3 Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Mon, 19 Jul 2021 13:23:40 +0200
Subject: debugprint flag not to true

---
 scheduling/InstructionScheduler.ml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 0203d9c8..0eadd5a4 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -349,8 +349,6 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 
 (* A scheduler sensitive to register pressure *)
 let reg_pres_scheduler (problem : problem) : solution option =
-  DebugPrint.debug_flag := true;
-
   let nr_instructions = get_nr_instructions problem in
 
   if !Clflags.option_debug_compcert > 6 then
@@ -615,11 +613,9 @@ let reg_pres_scheduler (problem : problem) : solution option =
         if target_time > !final_time then
           final_time := target_time) predecessors.(nr_instructions);
     times.(nr_instructions) <- !final_time;
-    DebugPrint.debug_flag := false;
     Some times
   with Exit ->
     DebugPrint.debug "reg_pres_sched failed\n";
-    DebugPrint.debug_flag := false;
     None
               
 ;;
@@ -628,7 +624,6 @@ let reg_pres_scheduler (problem : problem) : solution option =
 (********************************************************************)
 
 let reg_pres_scheduler_bis (problem : problem) : solution option =
-  DebugPrint.debug_flag := true;
   Printf.printf "\nNEW\n\n";
   let nr_instructions = get_nr_instructions problem in
   let successors = get_successors problem
@@ -814,11 +809,9 @@ let reg_pres_scheduler_bis (problem : problem) : solution option =
         if target_time > !final_time then
           final_time := target_time) predecessors.(nr_instructions);
     times.(nr_instructions) <- !final_time;
-    DebugPrint.debug_flag := false;
     Some times
   with Exit ->
     DebugPrint.debug "reg_pres_sched failed\n";
-    DebugPrint.debug_flag := false;
     None
               
 ;;
-- 
cgit 


From ad72ffe9c2d7839cfe85926264565d59daf87a3c Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Mon, 19 Jul 2021 13:24:46 +0200
Subject: disable printing debug info

---
 scheduling/InstructionScheduler.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 0eadd5a4..5bd969ca 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -624,7 +624,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
 (********************************************************************)
 
 let reg_pres_scheduler_bis (problem : problem) : solution option =
-  Printf.printf "\nNEW\n\n";
+  (* Printf.printf "\nNEW\n\n"; *)
   let nr_instructions = get_nr_instructions problem in
   let successors = get_successors problem
   and predecessors = get_predecessors problem
-- 
cgit 


From 650c134f915d0b2e799c76c87ffd2ed09c782dcc Mon Sep 17 00:00:00 2001
From: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>
Date: Mon, 19 Jul 2021 13:26:41 +0200
Subject: use regpres_bis not regpres

---
 driver/Clflags.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 085eaa7e..134c222f 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi
 let option_mtune = ref ""
                 
 let option_fprepass = ref true
-let option_fprepass_sched = ref "list"
+let option_fprepass_sched = ref "regpres_bis"
 
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
-- 
cgit 


From a86c0c659666a9a154ae4d27fee1c4cad3dd26b7 Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 19 Jul 2021 15:57:57 +0200
Subject: start fixing

---
 scheduling/InstructionScheduler.ml | 90 ++++++++++++++++----------------------
 1 file changed, 38 insertions(+), 52 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 0203d9c8..3e82336b 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -353,8 +353,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   let nr_instructions = get_nr_instructions problem in
 
-  if !Clflags.option_debug_compcert > 6 then
-    DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+  (* if !Clflags.option_debug_compcert > 6 then *)
+  DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
 
   let successors = get_successors problem
   and predecessors = get_predecessors problem
@@ -495,32 +495,22 @@ let reg_pres_scheduler (problem : problem) : solution option =
 
   let attempt_scheduling ready usages =
     let result = ref (-1) in
+    DebugPrint.debug "\n\nREADY: ";
+    InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready;
+    DebugPrint.debug "\n\n";
     try
       Array.iteri (fun i avlregs ->
-          (* print_string "avlregs: ";
-           * print_int i;
-           * print_string " ";
-           * print_int avlregs;
-           * print_newline ();
-           * print_string "live regs: ";
-           * print_int (Hashtbl.length live_regs);
-           * print_newline ();
-           * flush stdout; *)
-          if avlregs <= regs_thresholds.(i)
+          DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n"
+            i avlregs (Hashtbl.length live_regs); 
+          if !cnt < 5 && avlregs <= regs_thresholds.(i)
           then (
             csr_b := true;
             let maybe = InstrSet.sched_CSR i ready usages in
-            (* print_string "maybe\n";
-             * print_int maybe;
-             * print_newline ();
-             * flush stdout; *)
-            (if maybe > 0 &&
+            DebugPrint.debug "maybe %d\n" maybe;
+            (if maybe >= 0 &&
                   let delta = 
                     List.fold_left (fold_delta i) 0 mentions.(maybe) in
-                  (* print_string "delta ";
-                   * print_int delta;
-                   * print_newline ();
-                   * flush stdout; *)
+                  DebugPrint.debug "delta %d\n" delta;
                   delta > 0
              then
                (vector_subtract usages.(maybe) current_resources;
@@ -558,33 +548,30 @@ let reg_pres_scheduler (problem : problem) : solution option =
               problem.instruction_usages with
       | -1 -> advance_time()
       | i -> (assert(times.(i) < 0);
-            ((* print_string "INSTR ISSUED: ";
-              * print_int i;
-              * print_newline ();
-              * flush stdout; *)
-             if !csr_b && !Clflags.option_debug_compcert > 6 then
-               DebugPrint.debug "REGPRES: high pres class %d\n" i;
-             csr_b := false;
-             if !Clflags.option_regpres_wait_window then
-               cnt := 0;
-             List.iter (fun (r,b) ->
-                 if b then
-                   (match Hashtbl.find_opt counts r with
-                    | None -> assert false
-                    | Some (t, n) ->
-                       Hashtbl.remove counts r;
-                       if n = 1 then
-                         (Hashtbl.remove live_regs r;
-                          available_regs.(t)
-                          <- available_regs.(t) + 1))
-                 else
-                   let t = class_r r in
-                   match Hashtbl.find_opt live_regs r with
-                   | None -> (Hashtbl.add live_regs r t;
-                             available_regs.(t)
-                             <- available_regs.(t) - 1)
-                   | Some i -> ()
-               ) mentions.(i));
+             (DebugPrint.debug "INSTR ISSUED: %d\n" i;
+              if !csr_b && !Clflags.option_debug_compcert > 6 then
+                DebugPrint.debug "REGPRES: high pres class %d\n" i;
+              csr_b := false;
+              (* if !Clflags.option_regpres_wait_window then *)
+              cnt := 0;
+              List.iter (fun (r,b) ->
+                  if b then
+                    (match Hashtbl.find_opt counts r with
+                     | None -> assert false
+                     | Some (t, n) ->
+                        Hashtbl.remove counts r;
+                        if n = 1 then
+                          (Hashtbl.remove live_regs r;
+                           available_regs.(t)
+                           <- available_regs.(t) + 1))
+                  else
+                    let t = class_r r in
+                    match Hashtbl.find_opt live_regs r with
+                    | None -> (Hashtbl.add live_regs r t;
+                              available_regs.(t)
+                              <- available_regs.(t) - 1)
+                    | Some i -> ()
+                ) mentions.(i));
              times.(i) <- !current_time;
              ready.(!current_time)
              <- InstrSet.remove i (ready.(!current_time));
@@ -595,8 +582,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
                    | to_time ->
                       ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
                         *   (Array.length ready); *)
-                      ready.(to_time)
-                      <- InstrSet.add instr_to ready.(to_time))
+                       ready.(to_time)
+                       <- InstrSet.add instr_to ready.(to_time))
                ) successors.(i);
              successors.(i) <- []
             )
@@ -605,8 +592,7 @@ let reg_pres_scheduler (problem : problem) : solution option =
   try
     let final_time = ref (-1) in
     for i = 0 to nr_instructions - 1 do
-      (* print_int i;
-       * flush stdout; *)
+      DebugPrint.debug "%d " i;
       (if times.(i) < 0 then raise Exit);
       (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
     done;
-- 
cgit 


From ee2112a9bacc246e0434a19fc93aab335fd56ddd Mon Sep 17 00:00:00 2001
From: "nicolas.nardino" <nicolas.nardino@ens-lyon.fr>
Date: Mon, 19 Jul 2021 18:09:27 +0200
Subject: Fix scheduling fails (forgot a case)

---
 scheduling/InstructionScheduler.ml | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index 3e82336b..9c5c674d 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -349,13 +349,14 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
 
 (* A scheduler sensitive to register pressure *)
 let reg_pres_scheduler (problem : problem) : solution option =
-  DebugPrint.debug_flag := true;
+  (* DebugPrint.debug_flag := true; *)
 
   let nr_instructions = get_nr_instructions problem in
 
-  (* if !Clflags.option_debug_compcert > 6 then *)
-  DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
-
+  if !Clflags.option_debug_compcert > 6 then
+    (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+     flush stderr);
+  
   let successors = get_successors problem
   and predecessors = get_predecessors problem
   and times = Array.make (nr_instructions+1) (-1) in
@@ -524,7 +525,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
                       then result := ins
                     ) ready;
                   if !result <> -1 then
-                    vector_subtract usages.(!result) current_resources)
+                    vector_subtract usages.(!result) current_resources;
+                  incr cnt)
                else
                  (incr cnt)
             );
@@ -550,7 +552,8 @@ let reg_pres_scheduler (problem : problem) : solution option =
       | i -> (assert(times.(i) < 0);
              (DebugPrint.debug "INSTR ISSUED: %d\n" i;
               if !csr_b && !Clflags.option_debug_compcert > 6 then
-                DebugPrint.debug "REGPRES: high pres class %d\n" i;
+                (Printf.eprintf "REGPRES: high pres class %d\n" i;
+                 flush stderr);
               csr_b := false;
               (* if !Clflags.option_regpres_wait_window then *)
               cnt := 0;
@@ -601,11 +604,11 @@ let reg_pres_scheduler (problem : problem) : solution option =
         if target_time > !final_time then
           final_time := target_time) predecessors.(nr_instructions);
     times.(nr_instructions) <- !final_time;
-    DebugPrint.debug_flag := false;
+    (* DebugPrint.debug_flag := false; *)
     Some times
   with Exit ->
     DebugPrint.debug "reg_pres_sched failed\n";
-    DebugPrint.debug_flag := false;
+    (* DebugPrint.debug_flag := false; *)
     None
               
 ;;
-- 
cgit 


From 1fbe45e2d1f02ef6e8fb6fe7545728a744e047b8 Mon Sep 17 00:00:00 2001
From: Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>
Date: Sat, 24 Jul 2021 11:49:14 +0200
Subject: remove default_notrap_load_value

---
 backend/RTLTunnelingproof.v | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v
index 14a2c037..51ef0f7a 100644
--- a/backend/RTLTunnelingproof.v
+++ b/backend/RTLTunnelingproof.v
@@ -531,7 +531,7 @@ Proof.
     + eapply exec_Iload_notrap1.
       * exploit instruction_type_preserved; (simpl; eauto)||congruence.
       * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved.
-    + apply match_states_intro; try assumption. apply set_reg_lessdef. unfold default_notrap_load_value. apply Val.lessdef_undef. apply RS.
+    + apply match_states_intro; try assumption. apply set_reg_lessdef. apply Val.lessdef_undef. apply RS.
   - (* Iload NOTRAP2 *)
     exploit eval_addressing_lessdef; try eassumption.
     apply reglist_lessdef; apply RS.
-- 
cgit