From 932f7f853fdad81c2bdc9db42ed87d106db5762f Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Wed, 16 Sep 2020 10:17:27 +0200
Subject: first opweights, bogus weights

---
 aarch64/OpWeights.ml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 aarch64/OpWeights.ml

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..3662ef1c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,19 @@
+open Op;;
+let resource_bounds = [| 1 |];;
+
+
+let latency_of_op (op : operation) (nargs : int) = 1;;
+
+let resources_of_op (op : operation) (nargs : int) = [| 1 |];;
+
+let resources_of_cond (cond : condition) (nargs : int) = [| 1 |];;
+
+let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+let latency_of_call _ _ = 6;;
+
+let resources_of_load trap chunk addressing nargs = [| 1 |];;
+
+let resources_of_store chunk addressing nargs = [| 1 |];;
+
+let resources_of_call _ _ = resource_bounds;;
+let resources_of_builtin _ = resource_bounds;;
-- 
cgit 


From d01887944e21d33c869b6b2ffc2e1dc6c5a701cd Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Tue, 29 Sep 2020 18:53:58 +0200
Subject: attempt at latencies for Cortex A53

---
 aarch64/OpWeights.ml | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 147 insertions(+), 2 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
index 3662ef1c..471161b7 100644
--- a/aarch64/OpWeights.ml
+++ b/aarch64/OpWeights.ml
@@ -1,8 +1,153 @@
 open Op;;
 let resource_bounds = [| 1 |];;
+let nr_non_pipelined_units = 1;;
 
-
-let latency_of_op (op : operation) (nargs : int) = 1;;
+let latency_of_op (op : operation) (nargs : int) =
+  match op with
+  | Omove 
+  | Ointconst _
+  | Olongconst _
+  | Ofloatconst _
+  | Osingleconst _
+  | Oaddrsymbol _
+  | Oaddrstack _ -> 1
+  | Oshift _ -> 2
+  | Oadd -> 1
+  | Oaddshift _ -> 2
+  | Oaddimm _
+  | Oneg -> 1                   
+  | Onegshift _ -> 2
+  | Osub -> 1
+  | Osubshift _ -> 2
+  | Omul
+  | Omuladd  
+  | Omulsub -> 4
+  | Odiv
+  | Odivu -> 29
+  | Oand -> 1
+  | Oandshift _ -> 2
+  | Oandimm _ -> 1
+  | Oor -> 1
+  | Oorshift _ -> 2
+  | Oorimm _ -> 1
+  | Oxor -> 1
+  | Oxorshift _ -> 2
+  | Oxorimm _ -> 1
+  | Onot -> 1
+  | Onotshift _ -> 2
+  | Obic -> 1
+  | Obicshift _ -> 2
+  | Oorn -> 1
+  | Oornshift _ -> 2
+  | Oeqv -> 1
+  | Oeqvshift _ -> 2
+  | Oshl 
+  | Oshr
+  | Oshru -> 2
+  | Oshrximm _ -> 6
+  | Ozext _
+  | Osext _ -> 1
+  | Oshlzext _
+  | Oshlsext _
+  | Ozextshr _
+  | Osextshr _ -> 2
+              
+(* 64-bit integer arithmetic *)
+  | Oshiftl _ -> 2
+  | Oextend _ -> 1
+  | Omakelong
+  | Olowlong 
+  | Ohighlong 
+  | Oaddl -> 1
+  | Oaddlshift _
+  | Oaddlext _ -> 2
+  | Oaddlimm _
+  | Onegl -> 1         
+  | Oneglshift _ -> 2
+  | Osubl -> 1
+  | Osublshift _
+  | Osublext _ -> 2
+  | Omull
+  | Omulladd 
+  | Omullsub 
+  | Omullhs 
+  | Omullhu -> 4
+  | Odivl -> 50
+  | Odivlu -> 50
+  | Oandl -> 1
+  | Oandlshift _ -> 2
+  | Oandlimm _
+  | Oorl -> 1 
+  | Oorlshift _ -> 2
+  | Oorlimm _
+  | Oxorl -> 1
+  | Oxorlshift _ -> 2
+  | Oxorlimm _
+  | Onotl -> 1
+  | Onotlshift _ -> 2
+  | Obicl -> 1
+  | Obiclshift _ -> 2
+  | Oornl -> 1
+  | Oornlshift _ -> 2
+  | Oeqvl -> 1
+  | Oeqvlshift _ -> 2
+  | Oshll 
+  | Oshrl
+  | Oshrlu -> 2
+  | Oshrlximm _ -> 6
+  | Ozextl _
+  | Osextl _ -> 1
+  | Oshllzext _
+  | Oshllsext _
+  | Ozextshrl _
+  | Osextshrl _ -> 2
+                 
+(* 64-bit floating-point arithmetic *)
+  | Onegf                                               (* r [rd = - r1] *)
+  | Oabsf                                               (* r [rd = abs(r1)] *)
+  | Oaddf                                               (* r [rd = r1 + r2] *)
+  | Osubf                                               (* r [rd = r1 - r2] *)
+  | Omulf                                               (* r [rd = r1 * r2] *)
+  | Odivf                                               (* r [rd = r1 / r2] *)
+(* 32-bit floating-point arithmetic *)
+  | Onegfs                                              (* r [rd = - r1] *)
+  | Oabsfs                                              (* r [rd = abs(r1)] *)
+  | Oaddfs                                              (* r [rd = r1 + r2] *)
+  | Osubfs                                              (* r [rd = r1 - r2] *)
+  | Omulfs                                              (* r [rd = r1 * r2] *)
+  | Odivfs                                              (* r [rd = r1 / r2] *)
+  | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+  | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+  | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+  | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+  | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+  | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+  | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+  | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+  | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+  | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+  | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+  | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+  | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+  | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+  | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+  | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+  | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+    -> 6
+  (*  Boolean tests *)
+  | Ocmp cmp | Osel (cmp, _) ->
+     (match cmp with
+     | Ccompf _                              (* r FP comparison *)
+     | Cnotcompf _                           (* r negation of an FP comparison *)
+     | Ccompfzero _                          (* r comparison with 0.0 *)
+     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+     | Ccompfs _                             (* r FP comparison *)
+     | Cnotcompfs _                          (* r negation of an FP comparison *)
+     | Ccompfszero _                         (* r equal to 0.0 *)
+     | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+     | _ -> 1);;
 
 let resources_of_op (op : operation) (nargs : int) = [| 1 |];;
 
-- 
cgit 


From 1f0c8cf821d310e405f3ad6327870fe3321ad6e6 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Tue, 29 Sep 2020 19:19:02 +0200
Subject: try to model resources

---
 aarch64/OpWeights.ml | 169 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 164 insertions(+), 5 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
index 471161b7..d19f34af 100644
--- a/aarch64/OpWeights.ml
+++ b/aarch64/OpWeights.ml
@@ -1,5 +1,5 @@
 open Op;;
-let resource_bounds = [| 1 |];;
+let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
 let nr_non_pipelined_units = 1;;
 
 let latency_of_op (op : operation) (nargs : int) =
@@ -149,16 +149,175 @@ let latency_of_op (op : operation) (nargs : int) =
      | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
      | _ -> 1);;
 
-let resources_of_op (op : operation) (nargs : int) = [| 1 |];;
+let resources_of_op (op : operation) (nargs : int) =
+  match op with
+  | Omove 
+  | Ointconst _
+  | Olongconst _
+  | Ofloatconst _
+  | Osingleconst _
+  | Oaddrsymbol _
+  | Oaddrstack _
+(* 32-bit integer arithmetic *)
+  | Oshift _
+  | Oadd 
+  | Oaddshift _
+  | Oaddimm _
+  | Oneg         
+  | Onegshift _
+  | Osub
+  | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+  | Omul 
+  | Omuladd
+  | Omulsub -> [| 1; 1; 1; 0 |]
+  | Odiv 
+  | Odivu -> [| 1; 0; 0; 0 |]
+  | Oand 
+  | Oandshift _
+  | Oandimm _
+  | Oor
+  | Oorshift _
+  | Oorimm _
+  | Oxor
+  | Oxorshift _
+  | Oxorimm _
+  | Onot
+  | Onotshift _
+  | Obic
+  | Obicshift _
+  | Oorn
+  | Oornshift _
+  | Oeqv
+  | Oeqvshift _
+  | Oshl
+  | Oshr
+  | Oshru
+  | Oshrximm _
+  | Ozext _
+  | Osext  _
+  | Oshlzext _
+  | Oshlsext _
+  | Ozextshr _
+  | Osextshr _
+  
+(* 64-bit integer arithmetic *)
+  | Oshiftl _
+  | Oextend _
+  | Omakelong
+  | Olowlong 
+  | Ohighlong
+  | Oaddl 
+  | Oaddlshift _
+  | Oaddlext _
+  | Oaddlimm _
+  | Onegl        
+  | Oneglshift _
+  | Osubl
+  | Osublshift _
+  | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+  | Omull 
+  | Omulladd 
+  | Omullsub
+  | Omullhs 
+  | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+  | Odivl 
+  | Odivlu -> [| 1; 0; 0; 0 |]
+  | Oandl 
+  | Oandlshift _
+  | Oandlimm _
+  | Oorl
+  | Oorlshift _
+  | Oorlimm _
+  | Oxorl
+  | Oxorlshift _
+  | Oxorlimm _
+  | Onotl
+  | Onotlshift _
+  | Obicl
+  | Obiclshift _
+  | Oornl
+  | Oornlshift _
+  | Oeqvl
+  | Oeqvlshift _
+  | Oshll
+  | Oshrl
+  | Oshrlu 
+  | Oshrlximm _
+  | Ozextl _
+  | Osextl _
+  | Oshllzext _
+  | Oshllsext _
+  | Ozextshrl _
+  | Osextshrl _ -> [| 1; 1; 0; 0 |]
+(* 64-bit floating-point arithmetic *)
+  | Onegf                                               (* r [rd = - r1] *)
+  | Oabsf                                               (* r [rd = abs(r1)] *)
+  | Oaddf                                               (* r [rd = r1 + r2] *)
+  | Osubf                                               (* r [rd = r1 - r2] *)
+  | Omulf                                               (* r [rd = r1 * r2] *)
+  | Odivf                                               (* r [rd = r1 / r2] *)
+(* 32-bit floating-point arithmetic *)
+  | Onegfs                                              (* r [rd = - r1] *)
+  | Oabsfs                                              (* r [rd = abs(r1)] *)
+  | Oaddfs                                              (* r [rd = r1 + r2] *)
+  | Osubfs                                              (* r [rd = r1 - r2] *)
+  | Omulfs                                              (* r [rd = r1 * r2] *)
+  | Odivfs                                              (* r [rd = r1 / r2] *)
+  | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+  | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+  | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+  | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+  | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+  | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+  | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+  | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+  | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+  | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+  | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+  | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+  | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+  | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+  | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+  | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+  | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+    -> [| 1 ; 1; 1; 0 |]
+     
+(* Boolean tests *)
+  | Ocmp cmp | Osel (cmp, _) ->
+     (match cmp with
+     | Ccompf _                              (* r FP comparison *)
+     | Cnotcompf _                           (* r negation of an FP comparison *)
+     | Ccompfzero _                          (* r comparison with 0.0 *)
+     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+     | Ccompfs _                             (* r FP comparison *)
+     | Cnotcompfs _                          (* r negation of an FP comparison *)
+     | Ccompfszero _                         (* r equal to 0.0 *)
+     | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+        [| 1; 1; 1; 0 |]
+     | _ -> [| 1; 1; 0; 0 |] );;
+  
 
-let resources_of_cond (cond : condition) (nargs : int) = [| 1 |];;
+let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+     | Ccompf _                              (* r FP comparison *)
+     | Cnotcompf _                           (* r negation of an FP comparison *)
+     | Ccompfzero _                          (* r comparison with 0.0 *)
+     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+     | Ccompfs _                             (* r FP comparison *)
+     | Cnotcompfs _                          (* r negation of an FP comparison *)
+     | Ccompfszero _                         (* r equal to 0.0 *)
+     | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+        [| 1; 1; 1; 0 |]
+     | _ -> [| 1; 1; 0; 0 |] );;
 
 let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
 let latency_of_call _ _ = 6;;
 
-let resources_of_load trap chunk addressing nargs = [| 1 |];;
+let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
 
-let resources_of_store chunk addressing nargs = [| 1 |];;
+let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
 
 let resources_of_call _ _ = resource_bounds;;
 let resources_of_builtin _ = resource_bounds;;
-- 
cgit 


From f6b0bfa541f69b4a563ac99a864284939067e994 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Tue, 29 Sep 2020 19:35:36 +0200
Subject: attempt at separating the divisions

---
 aarch64/OpWeights.ml | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
index d19f34af..01a1a553 100644
--- a/aarch64/OpWeights.ml
+++ b/aarch64/OpWeights.ml
@@ -298,6 +298,11 @@ let resources_of_op (op : operation) (nargs : int) =
         [| 1; 1; 1; 0 |]
      | _ -> [| 1; 1; 0; 0 |] );;
   
+let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+  match op with
+  | Odiv | Odivu -> [| 29 |]
+  | Odivl | Odivlu -> [| 50 |]
+  | _ -> [| -1 |];;
 
 let resources_of_cond (cmp : condition) (nargs : int) =
       (match cmp with
-- 
cgit 


From 827bdabf1242720979848cf473263a54fcf212f5 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Tue, 29 Sep 2020 20:29:05 +0200
Subject: floating-point division uses the divisor

---
 aarch64/OpWeights.ml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
index 01a1a553..1b48bc0f 100644
--- a/aarch64/OpWeights.ml
+++ b/aarch64/OpWeights.ml
@@ -108,14 +108,12 @@ let latency_of_op (op : operation) (nargs : int) =
   | Oaddf                                               (* r [rd = r1 + r2] *)
   | Osubf                                               (* r [rd = r1 - r2] *)
   | Omulf                                               (* r [rd = r1 * r2] *)
-  | Odivf                                               (* r [rd = r1 / r2] *)
 (* 32-bit floating-point arithmetic *)
   | Onegfs                                              (* r [rd = - r1] *)
   | Oabsfs                                              (* r [rd = abs(r1)] *)
   | Oaddfs                                              (* r [rd = r1 + r2] *)
   | Osubfs                                              (* r [rd = r1 - r2] *)
   | Omulfs                                              (* r [rd = r1 * r2] *)
-  | Odivfs                                              (* r [rd = r1 / r2] *)
   | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
   | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
 (*  Conversions between int and float *)
@@ -136,6 +134,8 @@ let latency_of_op (op : operation) (nargs : int) =
   | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
   | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
     -> 6
+  | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+  | Odivfs -> 20
   (*  Boolean tests *)
   | Ocmp cmp | Osel (cmp, _) ->
      (match cmp with
@@ -255,7 +255,7 @@ let resources_of_op (op : operation) (nargs : int) =
   | Oaddf                                               (* r [rd = r1 + r2] *)
   | Osubf                                               (* r [rd = r1 - r2] *)
   | Omulf                                               (* r [rd = r1 * r2] *)
-  | Odivf                                               (* r [rd = r1 / r2] *)
+  | Odivf
 (* 32-bit floating-point arithmetic *)
   | Onegfs                                              (* r [rd = - r1] *)
   | Oabsfs                                              (* r [rd = abs(r1)] *)
@@ -301,7 +301,8 @@ let resources_of_op (op : operation) (nargs : int) =
 let non_pipelined_resources_of_op (op : operation) (nargs : int) =
   match op with
   | Odiv | Odivu -> [| 29 |]
-  | Odivl | Odivlu -> [| 50 |]
+  | Odivfs -> [| 20 |]
+  | Odivl | Odivlu | Odivf -> [| 50 |]
   | _ -> [| -1 |];;
 
 let resources_of_cond (cmp : condition) (nargs : int) =
-- 
cgit 


From c455f69d66b186414c8bb1c5cd28ce8f29e965aa Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Wed, 30 Sep 2020 12:00:33 +0200
Subject: AArch64 division no longer "traps"

---
 aarch64/Asmgenproof1.v     |  87 +++++++++++++++++++++++++++++++-
 aarch64/ConstpropOpproof.v | 121 +++++++++++++++++++++++++++++++--------------
 aarch64/Op.v               |  64 ++++++++++++------------
 aarch64/SelectLongproof.v  |  10 ++--
 aarch64/SelectOpproof.v    |  12 +++--
 aarch64/ValueAOp.v         |   8 +--
 6 files changed, 221 insertions(+), 81 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index 0e36bd05..35f1f2d7 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -881,7 +881,40 @@ Proof.
     split. subst v; Simpl.
     split; intros; Simpl.
 Qed.
- 
+
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx32; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.add _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shr _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
   Val.shrxl rs#r1 (Vint n) = Some v ->
   r1 <> X16 ->
@@ -918,6 +951,38 @@ Proof.
     split; intros; Simpl.
 Qed.
 
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+  exists rs',
+     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+  /\ rs' # RA = rs # RA.
+Proof.
+  unfold shrx64; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split.
+  + intros. Simpl.
+  + Simpl.
+- generalize (Int.eq_spec n Int.one).
+  destruct (Int.eq n Int.one); intro ONE.
+  * subst n.
+    econstructor; split. eapply exec_straight_two.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.addl _ _); cbn; Simpl.
+    ** Simpl.
+  * econstructor; split. eapply exec_straight_three.
+    all: cbn; auto.
+    split.
+    ** intros.
+       destruct (Val.shrl _ _); cbn; Simpl.
+    ** Simpl.
+Qed.
+
 (** Condition bits *)
 
 Lemma compare_int_spec: forall rs v1 v2 m,
@@ -1660,10 +1725,19 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
 - (* shrx *)
-  exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL.
+  {
+    exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
   intros (rs' & A & B & C & D).
   econstructor; split. eexact A. split. rewrite B; auto.
   split; auto.
+  }
+  exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+  
 - (* zero-ext *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
@@ -1736,9 +1810,18 @@ Local Transparent Val.add.
   TranslOpBase.
   destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
 - (* shrx *)
+  destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+  {
   exploit (exec_shrx64 x x0 n); eauto with asmgen.
   apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
   econstructor; split. eexact A. split. rewrite B; auto. auto.
+  }
+  exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+  intros (rs' & A & B & C).
+  econstructor; split. { eexact A. }
+  split. { cbn. constructor. }
+  split; auto.           
+
 - (* zero-ext-l *)
   TranslOpBase.
   destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
+
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
+
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  {
+    rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  apply make_divimm_correct; auto. congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divuimm_correct; auto.
+  apply make_divuimm_correct; auto. congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divlimm_correct; auto.
+  apply make_divlimm_correct; auto. congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divluimm_correct; auto.
+  apply make_divluimm_correct; auto. congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/aarch64/Op.v b/aarch64/Op.v
index afc25aa6..30f806d3 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
   | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
   | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
   | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
   | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
   | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
   | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
-  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
   | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
   | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
   | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
   | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
   | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
   | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
-  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
   | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
   | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0... destruct v1...
   - apply type_add.
   - apply type_sub.
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (_  || _); trivial...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (Int.eq i0 Int.zero); constructor.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shift s v1 a)...
   - destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_subl.
   - destruct v0... destruct v1...
   - destruct v0... destruct v1...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shiftl s v1 a)...
   - destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
@@ -1409,12 +1411,12 @@ Proof.
   - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
   - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (_ || _); cbn;
+    constructor.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (Int.eq i0 Int.zero); cbn;
+    constructor.
   (* and*)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1446,8 +1448,8 @@ Proof.
   (* shru *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1482,12 +1484,10 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl *)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1519,8 +1519,8 @@ Proof.
   (* shrlu *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   (* shrlx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..c1847638 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   red; intros; unfold divls_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
 Proof.
   red; intros; unfold modls_base, modl_aux.
   exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
 Proof.
   red; intros; unfold divlu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
 Proof.
   red; intros; unfold modlu_base, modl_aux.
   exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
   rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..c7898193 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
   Val.divs x y = Some z ->
   exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold divs_base; TrivialExists.
+  intros; unfold divs_base; TrivialExists; cbn.
+  rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
 Proof.
   intros; unfold mods_base, mod_aux.
   exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  cbn. rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
   exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
   intros; unfold divu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
 Proof.
   intros; unfold modu_base, mod_aux.
   exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
   rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..d379bbe8 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
   | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
   | Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
   | Oandlimm n, v1::nil => andl v1 (L n)
-- 
cgit 


From 154529f64b96ca0e4cef8c4aeff6a1cfb8210e91 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Wed, 30 Sep 2020 14:07:40 +0200
Subject: non trapping

---
 aarch64/Op.v | 2 --
 1 file changed, 2 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/Op.v b/aarch64/Op.v
index 30f806d3..ef28dd6d 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -926,8 +926,6 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivu | Odivl | Odivlu
-  | Oshrximm _  | Oshrlximm _
   | Ointoffloat | Ointuoffloat
   | Ointofsingle | Ointuofsingle
   | Ofloatofint | Ofloatofintu
-- 
cgit 


From 244159573aa20a0a74897edb5897ded1bd16cd66 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Wed, 30 Sep 2020 14:42:28 +0200
Subject: non trapping op

---
 aarch64/Op.v              | 113 ++++++++++++++++++++--------------------------
 aarch64/SelectLongproof.v |  16 +++----
 aarch64/SelectOpproof.v   |  16 +++----
 aarch64/ValueAOp.v        |  16 +++----
 4 files changed, 73 insertions(+), 88 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/Op.v b/aarch64/Op.v
index ef28dd6d..0a29ff3e 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -481,22 +481,22 @@ Definition eval_operation
 
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
 
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -895,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m) as [[]|]...
   - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -926,14 +926,7 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Ofloatofint | Ofloatofintu
-  | Osingleofint | Osingleofintu
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Ofloatoflong | Ofloatoflongu
-  | Osingleoflong | Osingleoflongu => true
+  | Omove => false
   | _ => false
   end.
                 
@@ -1549,37 +1542,29 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* cmp, sel *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index c1847638..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -730,42 +730,42 @@ Qed.
 
 Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists.  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index c7898193..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -932,7 +932,7 @@ Theorem eval_intoffloat:
   Val.intoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -943,7 +943,7 @@ Theorem eval_floatofint:
 Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -952,7 +952,7 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -963,7 +963,7 @@ Theorem eval_floatofintu:
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -972,7 +972,7 @@ Theorem eval_intofsingle:
   Val.intofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -983,7 +983,7 @@ Theorem eval_singleofint:
 Proof.
   intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -992,7 +992,7 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -1003,7 +1003,7 @@ Theorem eval_singleofintu:
 Proof.
   intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 (** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index d379bbe8..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
 
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
 
-- 
cgit 


From 825b77fe8b4eb0919564e51cfaae69a6dfae24e3 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Fri, 2 Oct 2020 21:54:34 +0200
Subject: so that all architectures compile

---
 aarch64/PrepassSchedulingOracle.ml | 473 +++++++++++++++++++++++++++++++++++++
 1 file changed, 473 insertions(+)
 create mode 100644 aarch64/PrepassSchedulingOracle.ml

(limited to 'aarch64')

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..25083bcd
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,473 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open OpWeights
+open Registers
+
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction = function
+  | Inop _ -> Array.map (fun _ -> 0) resource_bounds
+  | Iop(op, inputs, output, _) -> resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem seqa =
+  let simple_deps = get_simple_dependencies seqa in
+  { max_latency = -1;
+    resource_bounds = OpWeights.resource_bounds;
+    instruction_usages = Array.map resources_of_instruction (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      Printf.printf "prepass scheduling length = %d\n" (Array.length seqa);
+      let problem = define_problem seqa in
+      print_sequence stdout (Array.map fst seqa);
+      print_problem stdout problem;
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
-- 
cgit 


From 1deadbf2715e435f2599c415c629ad0f042d40a6 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Mon, 19 Oct 2020 18:20:28 +0200
Subject: op_valid_pointer_eq for aarch64

---
 aarch64/Op.v | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'aarch64')

diff --git a/aarch64/Op.v b/aarch64/Op.v
index 0a29ff3e..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -1202,6 +1202,20 @@ Proof.
   rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
 Qed.
 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct cond; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
-- 
cgit 


From bc80528de5dfbc864c611e23691ddd96f15dfdc7 Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Thu, 22 Oct 2020 12:28:16 +0200
Subject: prefix all calls to OpWeights as preparation to using a structure

---
 aarch64/PrepassSchedulingOracle.ml | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 25083bcd..63fdec17 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -2,7 +2,6 @@ open AST
 open RTL
 open Maps
 open InstructionScheduler
-open OpWeights
 open Registers
 
 let use_alias_analysis () = false
@@ -26,7 +25,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
   and last_mem_write : int option ref = ref None
   and last_branch : int option ref = ref None
   and last_non_pipelined_op : int array = Array.make
-                                        nr_non_pipelined_units ( -1 )
+                                        OpWeights.nr_non_pipelined_units ( -1 )
   and latency_constraints : latency_constraint list ref = ref [] in
   let add_constraint instr_from instr_to latency =
     assert (instr_from <= instr_to);
@@ -142,15 +141,15 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
       | Inop _ -> ()
       | Iop(op, inputs, output, _) ->
          add_non_pipelined_resources i
-           (non_pipelined_resources_of_op op (List.length inputs));
+           (OpWeights.non_pipelined_resources_of_op op (List.length inputs));
          (if Op.is_trapping_op op then irreversible_action i);
          add_input_regs i inputs;
-         add_output_reg i (latency_of_op op (List.length inputs)) output
+         add_output_reg i (OpWeights.latency_of_op op (List.length inputs)) output
       | Iload(trap, chunk, addressing, addr_regs, output, _) ->
          (if trap=TRAP then irreversible_action i);
          add_input_mem i;
          add_input_regs i addr_regs;
-         add_output_reg i (latency_of_load trap chunk addressing (List.length addr_regs)) output
+         add_output_reg i (OpWeights.latency_of_load trap chunk addressing (List.length addr_regs)) output
       | Istore(chunk, addressing, addr_regs, input, _) ->
          irreversible_action i;
          add_input_regs i addr_regs;
@@ -164,7 +163,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
          );
          add_input_mem i;
          add_input_regs i inputs;
-         add_output_reg i (latency_of_call signature ef) output;
+         add_output_reg i (OpWeights.latency_of_call signature ef) output;
          add_output_mem i;
          failwith "Icall"
       | Itailcall(signature, ef, inputs) ->
@@ -202,19 +201,20 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
   !latency_constraints;;
 
 let resources_of_instruction = function
-  | Inop _ -> Array.map (fun _ -> 0) resource_bounds
-  | Iop(op, inputs, output, _) -> resources_of_op op (List.length inputs)
+  | Inop _ -> Array.map (fun _ -> 0) OpWeights.resource_bounds
+  | Iop(op, inputs, output, _) ->
+     OpWeights.resources_of_op op (List.length inputs)
   | Iload(trap, chunk, addressing, addr_regs, output, _) ->
-     resources_of_load trap chunk addressing (List.length addr_regs)
+     OpWeights.resources_of_load trap chunk addressing (List.length addr_regs)
   | Istore(chunk, addressing, addr_regs, input, _) ->
-     resources_of_store chunk addressing (List.length addr_regs)
+     OpWeights.resources_of_store chunk addressing (List.length addr_regs)
   | Icall(signature, ef, inputs, output, _) ->
-     resources_of_call signature ef
+     OpWeights.resources_of_call signature ef
   | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
-     resources_of_builtin ef
+     OpWeights.resources_of_builtin ef
   | Icond(cond, args, _, _ , _) ->
-     resources_of_cond cond (List.length args)
-  | Itailcall _ | Ijumptable _ | Ireturn _ -> resource_bounds
+     OpWeights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> OpWeights.resource_bounds
     
 let print_sequence pp (seqa : instruction array) =
   Array.iteri (
-- 
cgit 


From 1740571e67f4c9384aea706132b534411d24b36c Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Thu, 22 Oct 2020 13:26:35 +0200
Subject: allow changing target cpu

---
 aarch64/PrepassSchedulingOracle.ml     | 44 ++++++++++++++++++----------------
 aarch64/PrepassSchedulingOracleDeps.ml | 17 +++++++++++++
 2 files changed, 40 insertions(+), 21 deletions(-)
 create mode 100644 aarch64/PrepassSchedulingOracleDeps.ml

(limited to 'aarch64')

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 63fdec17..358ee97b 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -3,7 +3,8 @@ open RTL
 open Maps
 open InstructionScheduler
 open Registers
-
+open PrepassSchedulingOracleDeps
+   
 let use_alias_analysis () = false
                           
 let length_of_chunk = function
@@ -18,14 +19,14 @@ let length_of_chunk = function
 | Mfloat64 
 | Many64 -> 8;;
 
-let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
   let last_reg_reads : int list PTree.t ref = ref PTree.empty
   and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
   and last_mem_reads : int list ref = ref []
   and last_mem_write : int option ref = ref None
   and last_branch : int option ref = ref None
   and last_non_pipelined_op : int array = Array.make
-                                        OpWeights.nr_non_pipelined_units ( -1 )
+                                        opweights.nr_non_pipelined_units ( -1 )
   and latency_constraints : latency_constraint list ref = ref [] in
   let add_constraint instr_from instr_to latency =
     assert (instr_from <= instr_to);
@@ -141,15 +142,15 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
       | Inop _ -> ()
       | Iop(op, inputs, output, _) ->
          add_non_pipelined_resources i
-           (OpWeights.non_pipelined_resources_of_op op (List.length inputs));
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
          (if Op.is_trapping_op op then irreversible_action i);
          add_input_regs i inputs;
-         add_output_reg i (OpWeights.latency_of_op op (List.length inputs)) output
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
       | Iload(trap, chunk, addressing, addr_regs, output, _) ->
          (if trap=TRAP then irreversible_action i);
          add_input_mem i;
          add_input_regs i addr_regs;
-         add_output_reg i (OpWeights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
       | Istore(chunk, addressing, addr_regs, input, _) ->
          irreversible_action i;
          add_input_regs i addr_regs;
@@ -163,7 +164,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
          );
          add_input_mem i;
          add_input_regs i inputs;
-         add_output_reg i (OpWeights.latency_of_call signature ef) output;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
          add_output_mem i;
          failwith "Icall"
       | Itailcall(signature, ef, inputs) ->
@@ -200,21 +201,21 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) =
     end seqa;
   !latency_constraints;;
 
-let resources_of_instruction = function
-  | Inop _ -> Array.map (fun _ -> 0) OpWeights.resource_bounds
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
   | Iop(op, inputs, output, _) ->
-     OpWeights.resources_of_op op (List.length inputs)
+     opweights.resources_of_op op (List.length inputs)
   | Iload(trap, chunk, addressing, addr_regs, output, _) ->
-     OpWeights.resources_of_load trap chunk addressing (List.length addr_regs)
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
   | Istore(chunk, addressing, addr_regs, input, _) ->
-     OpWeights.resources_of_store chunk addressing (List.length addr_regs)
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
   | Icall(signature, ef, inputs, output, _) ->
-     OpWeights.resources_of_call signature ef
+     opweights.resources_of_call signature ef
   | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
-     OpWeights.resources_of_builtin ef
+     opweights.resources_of_builtin ef
   | Icond(cond, args, _, _ , _) ->
-     OpWeights.resources_of_cond cond (List.length args)
-  | Itailcall _ | Ijumptable _ | Ireturn _ -> OpWeights.resource_bounds
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
     
 let print_sequence pp (seqa : instruction array) =
   Array.iteri (
@@ -405,11 +406,11 @@ let get_alias_dependencies seqa =
   !deps;;
  *)
 
-let define_problem seqa =
-  let simple_deps = get_simple_dependencies seqa in
+let define_problem (opweights : opweights) seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
   { max_latency = -1;
-    resource_bounds = OpWeights.resource_bounds;
-    instruction_usages = Array.map resources_of_instruction (Array.map fst seqa);
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
     latency_constraints =
       (* if (use_alias_analysis ())
       then (get_alias_dependencies seqa) @ simple_deps
@@ -439,6 +440,7 @@ let prepass_scheduler_by_name name problem early_ones =
   | _ -> scheduler_by_name name problem
   
 let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  let opweights = OpWeights.get_opweights () in
   try
     if (Array.length seqa) <= 1
     then None
@@ -446,7 +448,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) =
       begin
       let nr_instructions = Array.length seqa in
       Printf.printf "prepass scheduling length = %d\n" (Array.length seqa);
-      let problem = define_problem seqa in
+      let problem = define_problem opweights seqa in
       print_sequence stdout (Array.map fst seqa);
       print_problem stdout problem;
       match prepass_scheduler_by_name
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
-- 
cgit 


From 3486eb50d289456b79de168e1ab410306305cf2c Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Thu, 22 Oct 2020 13:47:17 +0200
Subject: new OpWeights for aarch64

---
 aarch64/OpWeights.ml | 660 ++++++++++++++++++++++++++-------------------------
 1 file changed, 342 insertions(+), 318 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
index 1b48bc0f..5cdd002c 100644
--- a/aarch64/OpWeights.ml
+++ b/aarch64/OpWeights.ml
@@ -1,329 +1,353 @@
 open Op;;
-let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
-let nr_non_pipelined_units = 1;;
+open PrepassSchedulingOracleDeps;;
 
-let latency_of_op (op : operation) (nargs : int) =
-  match op with
-  | Omove 
-  | Ointconst _
-  | Olongconst _
-  | Ofloatconst _
-  | Osingleconst _
-  | Oaddrsymbol _
-  | Oaddrstack _ -> 1
-  | Oshift _ -> 2
-  | Oadd -> 1
-  | Oaddshift _ -> 2
-  | Oaddimm _
-  | Oneg -> 1                   
-  | Onegshift _ -> 2
-  | Osub -> 1
-  | Osubshift _ -> 2
-  | Omul
-  | Omuladd  
-  | Omulsub -> 4
-  | Odiv
-  | Odivu -> 29
-  | Oand -> 1
-  | Oandshift _ -> 2
-  | Oandimm _ -> 1
-  | Oor -> 1
-  | Oorshift _ -> 2
-  | Oorimm _ -> 1
-  | Oxor -> 1
-  | Oxorshift _ -> 2
-  | Oxorimm _ -> 1
-  | Onot -> 1
-  | Onotshift _ -> 2
-  | Obic -> 1
-  | Obicshift _ -> 2
-  | Oorn -> 1
-  | Oornshift _ -> 2
-  | Oeqv -> 1
-  | Oeqvshift _ -> 2
-  | Oshl 
-  | Oshr
-  | Oshru -> 2
-  | Oshrximm _ -> 6
-  | Ozext _
-  | Osext _ -> 1
-  | Oshlzext _
-  | Oshlsext _
-  | Ozextshr _
-  | Osextshr _ -> 2
-              
-(* 64-bit integer arithmetic *)
-  | Oshiftl _ -> 2
-  | Oextend _ -> 1
-  | Omakelong
-  | Olowlong 
-  | Ohighlong 
-  | Oaddl -> 1
-  | Oaddlshift _
-  | Oaddlext _ -> 2
-  | Oaddlimm _
-  | Onegl -> 1         
-  | Oneglshift _ -> 2
-  | Osubl -> 1
-  | Osublshift _
-  | Osublext _ -> 2
-  | Omull
-  | Omulladd 
-  | Omullsub 
-  | Omullhs 
-  | Omullhu -> 4
-  | Odivl -> 50
-  | Odivlu -> 50
-  | Oandl -> 1
-  | Oandlshift _ -> 2
-  | Oandlimm _
-  | Oorl -> 1 
-  | Oorlshift _ -> 2
-  | Oorlimm _
-  | Oxorl -> 1
-  | Oxorlshift _ -> 2
-  | Oxorlimm _
-  | Onotl -> 1
-  | Onotlshift _ -> 2
-  | Obicl -> 1
-  | Obiclshift _ -> 2
-  | Oornl -> 1
-  | Oornlshift _ -> 2
-  | Oeqvl -> 1
-  | Oeqvlshift _ -> 2
-  | Oshll 
-  | Oshrl
-  | Oshrlu -> 2
-  | Oshrlximm _ -> 6
-  | Ozextl _
-  | Osextl _ -> 1
-  | Oshllzext _
-  | Oshllsext _
-  | Ozextshrl _
-  | Osextshrl _ -> 2
-                 
-(* 64-bit floating-point arithmetic *)
-  | Onegf                                               (* r [rd = - r1] *)
-  | Oabsf                                               (* r [rd = abs(r1)] *)
-  | Oaddf                                               (* r [rd = r1 + r2] *)
-  | Osubf                                               (* r [rd = r1 - r2] *)
-  | Omulf                                               (* r [rd = r1 * r2] *)
+module Cortex_A53=
+  struct
+    let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+    let nr_non_pipelined_units = 1;;
+
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _ -> 1
+      | Oshift _ -> 2
+      | Oadd -> 1
+      | Oaddshift _ -> 2
+      | Oaddimm _
+        | Oneg -> 1                   
+      | Onegshift _ -> 2
+      | Osub -> 1
+      | Osubshift _ -> 2
+      | Omul
+        | Omuladd  
+        | Omulsub -> 4
+      | Odiv
+        | Odivu -> 29
+      | Oand -> 1
+      | Oandshift _ -> 2
+      | Oandimm _ -> 1
+      | Oor -> 1
+      | Oorshift _ -> 2
+      | Oorimm _ -> 1
+      | Oxor -> 1
+      | Oxorshift _ -> 2
+      | Oxorimm _ -> 1
+      | Onot -> 1
+      | Onotshift _ -> 2
+      | Obic -> 1
+      | Obicshift _ -> 2
+      | Oorn -> 1
+      | Oornshift _ -> 2
+      | Oeqv -> 1
+      | Oeqvshift _ -> 2
+      | Oshl 
+        | Oshr
+        | Oshru -> 2
+      | Oshrximm _ -> 6
+      | Ozext _
+        | Osext _ -> 1
+      | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _ -> 2
+                      
+      (* 64-bit integer arithmetic *)
+      | Oshiftl _ -> 2
+      | Oextend _ -> 1
+      | Omakelong
+        | Olowlong 
+        | Ohighlong 
+        | Oaddl -> 1
+      | Oaddlshift _
+        | Oaddlext _ -> 2
+      | Oaddlimm _
+        | Onegl -> 1         
+      | Oneglshift _ -> 2
+      | Osubl -> 1
+      | Osublshift _
+        | Osublext _ -> 2
+      | Omull
+        | Omulladd 
+        | Omullsub 
+        | Omullhs 
+        | Omullhu -> 4
+      | Odivl -> 50
+      | Odivlu -> 50
+      | Oandl -> 1
+      | Oandlshift _ -> 2
+      | Oandlimm _
+        | Oorl -> 1 
+      | Oorlshift _ -> 2
+      | Oorlimm _
+        | Oxorl -> 1
+      | Oxorlshift _ -> 2
+      | Oxorlimm _
+        | Onotl -> 1
+      | Onotlshift _ -> 2
+      | Obicl -> 1
+      | Obiclshift _ -> 2
+      | Oornl -> 1
+      | Oornlshift _ -> 2
+      | Oeqvl -> 1
+      | Oeqvlshift _ -> 2
+      | Oshll 
+        | Oshrl
+        | Oshrlu -> 2
+      | Oshrlximm _ -> 6
+      | Ozextl _
+        | Osextl _ -> 1
+      | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> 2
+                       
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
 (* 32-bit floating-point arithmetic *)
-  | Onegfs                                              (* r [rd = - r1] *)
-  | Oabsfs                                              (* r [rd = abs(r1)] *)
-  | Oaddfs                                              (* r [rd = r1 + r2] *)
-  | Osubfs                                              (* r [rd = r1 - r2] *)
-  | Omulfs                                              (* r [rd = r1 * r2] *)
-  | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
-  | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
 (*  Conversions between int and float *)
-  | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
-  | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
-  | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
-  | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
-  | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
-  | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
-  | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
-  | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
-  | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
-  | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
-  | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
-  | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
-  | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
-  | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
-  | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
-  | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
-    -> 6
-  | Odivf -> 50                                         (* r [rd = r1 / r2] *)
-  | Odivfs -> 20
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> 6
+      | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+      | Odivfs -> 20
   (*  Boolean tests *)
-  | Ocmp cmp | Osel (cmp, _) ->
-     (match cmp with
-     | Ccompf _                              (* r FP comparison *)
-     | Cnotcompf _                           (* r negation of an FP comparison *)
-     | Ccompfzero _                          (* r comparison with 0.0 *)
-     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
-     | Ccompfs _                             (* r FP comparison *)
-     | Cnotcompfs _                          (* r negation of an FP comparison *)
-     | Ccompfszero _                         (* r equal to 0.0 *)
-     | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
-     | _ -> 1);;
-
-let resources_of_op (op : operation) (nargs : int) =
-  match op with
-  | Omove 
-  | Ointconst _
-  | Olongconst _
-  | Ofloatconst _
-  | Osingleconst _
-  | Oaddrsymbol _
-  | Oaddrstack _
-(* 32-bit integer arithmetic *)
-  | Oshift _
-  | Oadd 
-  | Oaddshift _
-  | Oaddimm _
-  | Oneg         
-  | Onegshift _
-  | Osub
-  | Osubshift _ -> [| 1 ; 1; 0; 0 |]
-  | Omul 
-  | Omuladd
-  | Omulsub -> [| 1; 1; 1; 0 |]
-  | Odiv 
-  | Odivu -> [| 1; 0; 0; 0 |]
-  | Oand 
-  | Oandshift _
-  | Oandimm _
-  | Oor
-  | Oorshift _
-  | Oorimm _
-  | Oxor
-  | Oxorshift _
-  | Oxorimm _
-  | Onot
-  | Onotshift _
-  | Obic
-  | Obicshift _
-  | Oorn
-  | Oornshift _
-  | Oeqv
-  | Oeqvshift _
-  | Oshl
-  | Oshr
-  | Oshru
-  | Oshrximm _
-  | Ozext _
-  | Osext  _
-  | Oshlzext _
-  | Oshlsext _
-  | Ozextshr _
-  | Osextshr _
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+          | _ -> 1);;
+    
+    let resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _
+        (* 32-bit integer arithmetic *)
+        | Oshift _
+        | Oadd 
+        | Oaddshift _
+        | Oaddimm _
+        | Oneg         
+        | Onegshift _
+        | Osub
+        | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+      | Omul 
+        | Omuladd
+        | Omulsub -> [| 1; 1; 1; 0 |]
+      | Odiv 
+        | Odivu -> [| 1; 0; 0; 0 |]
+      | Oand 
+        | Oandshift _
+        | Oandimm _
+        | Oor
+        | Oorshift _
+        | Oorimm _
+        | Oxor
+        | Oxorshift _
+        | Oxorimm _
+        | Onot
+        | Onotshift _
+        | Obic
+        | Obicshift _
+        | Oorn
+        | Oornshift _
+        | Oeqv
+        | Oeqvshift _
+        | Oshl
+        | Oshr
+        | Oshru
+        | Oshrximm _
+        | Ozext _
+        | Osext  _
+        | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _
   
 (* 64-bit integer arithmetic *)
-  | Oshiftl _
-  | Oextend _
-  | Omakelong
-  | Olowlong 
-  | Ohighlong
-  | Oaddl 
-  | Oaddlshift _
-  | Oaddlext _
-  | Oaddlimm _
-  | Onegl        
-  | Oneglshift _
-  | Osubl
-  | Osublshift _
-  | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
-  | Omull 
-  | Omulladd 
-  | Omullsub
-  | Omullhs 
-  | Omullhu -> [| 1 ; 1 ; 1; 0 |]
-  | Odivl 
-  | Odivlu -> [| 1; 0; 0; 0 |]
-  | Oandl 
-  | Oandlshift _
-  | Oandlimm _
-  | Oorl
-  | Oorlshift _
-  | Oorlimm _
-  | Oxorl
-  | Oxorlshift _
-  | Oxorlimm _
-  | Onotl
-  | Onotlshift _
-  | Obicl
-  | Obiclshift _
-  | Oornl
-  | Oornlshift _
-  | Oeqvl
-  | Oeqvlshift _
-  | Oshll
-  | Oshrl
-  | Oshrlu 
-  | Oshrlximm _
-  | Ozextl _
-  | Osextl _
-  | Oshllzext _
-  | Oshllsext _
-  | Ozextshrl _
-  | Osextshrl _ -> [| 1; 1; 0; 0 |]
-(* 64-bit floating-point arithmetic *)
-  | Onegf                                               (* r [rd = - r1] *)
-  | Oabsf                                               (* r [rd = abs(r1)] *)
-  | Oaddf                                               (* r [rd = r1 + r2] *)
-  | Osubf                                               (* r [rd = r1 - r2] *)
-  | Omulf                                               (* r [rd = r1 * r2] *)
-  | Odivf
-(* 32-bit floating-point arithmetic *)
-  | Onegfs                                              (* r [rd = - r1] *)
-  | Oabsfs                                              (* r [rd = abs(r1)] *)
-  | Oaddfs                                              (* r [rd = r1 + r2] *)
-  | Osubfs                                              (* r [rd = r1 - r2] *)
-  | Omulfs                                              (* r [rd = r1 * r2] *)
-  | Odivfs                                              (* r [rd = r1 / r2] *)
-  | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
-  | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+        | Oshiftl _
+        | Oextend _
+        | Omakelong
+        | Olowlong 
+        | Ohighlong
+        | Oaddl 
+        | Oaddlshift _
+        | Oaddlext _
+        | Oaddlimm _
+        | Onegl        
+        | Oneglshift _
+        | Osubl
+        | Osublshift _
+        | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+      | Omull 
+        | Omulladd 
+        | Omullsub
+        | Omullhs 
+        | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+      | Odivl 
+        | Odivlu -> [| 1; 0; 0; 0 |]
+      | Oandl 
+        | Oandlshift _
+        | Oandlimm _
+        | Oorl
+        | Oorlshift _
+        | Oorlimm _
+        | Oxorl
+        | Oxorlshift _
+        | Oxorlimm _
+        | Onotl
+        | Onotlshift _
+        | Obicl
+        | Obiclshift _
+        | Oornl
+        | Oornlshift _
+        | Oeqvl
+        | Oeqvlshift _
+        | Oshll
+        | Oshrl
+        | Oshrlu 
+        | Oshrlximm _
+        | Ozextl _
+        | Osextl _
+        | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> [| 1; 1; 0; 0 |]
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+        | Odivf
+        (* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Odivfs                                              (* r [rd = r1 / r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
 (*  Conversions between int and float *)
-  | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
-  | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
-  | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
-  | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
-  | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
-  | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
-  | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
-  | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
-  | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
-  | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
-  | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
-  | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
-  | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
-  | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
-  | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
-  | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
-    -> [| 1 ; 1; 1; 0 |]
-     
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> [| 1 ; 1; 1; 0 |]
+         
 (* Boolean tests *)
-  | Ocmp cmp | Osel (cmp, _) ->
-     (match cmp with
-     | Ccompf _                              (* r FP comparison *)
-     | Cnotcompf _                           (* r negation of an FP comparison *)
-     | Ccompfzero _                          (* r comparison with 0.0 *)
-     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
-     | Ccompfs _                             (* r FP comparison *)
-     | Cnotcompfs _                          (* r negation of an FP comparison *)
-     | Ccompfszero _                         (* r equal to 0.0 *)
-     | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
-        [| 1; 1; 1; 0 |]
-     | _ -> [| 1; 1; 0; 0 |] );;
-  
-let non_pipelined_resources_of_op (op : operation) (nargs : int) =
-  match op with
-  | Odiv | Odivu -> [| 29 |]
-  | Odivfs -> [| 20 |]
-  | Odivl | Odivlu | Odivf -> [| 50 |]
-  | _ -> [| -1 |];;
-
-let resources_of_cond (cmp : condition) (nargs : int) =
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+             [| 1; 1; 1; 0 |]
+          | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cmp : condition) (nargs : int) =
       (match cmp with
-     | Ccompf _                              (* r FP comparison *)
-     | Cnotcompf _                           (* r negation of an FP comparison *)
-     | Ccompfzero _                          (* r comparison with 0.0 *)
-     | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
-     | Ccompfs _                             (* r FP comparison *)
-     | Cnotcompfs _                          (* r negation of an FP comparison *)
-     | Ccompfszero _                         (* r equal to 0.0 *)
-     | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
-        [| 1; 1; 1; 0 |]
-     | _ -> [| 1; 1; 0; 0 |] );;
-
-let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
-let latency_of_call _ _ = 6;;
-
-let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
-
-let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
 
-let resources_of_call _ _ = resource_bounds;;
-let resources_of_builtin _ = resource_bounds;;
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "cortex-a53" | "cortex-a35" | "" ->
+     {
+       pipelined_resource_bounds = Cortex_A53.resource_bounds;
+       nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+       latency_of_op = Cortex_A53.latency_of_op;
+       resources_of_op = Cortex_A53.resources_of_op;
+       non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+       latency_of_load = Cortex_A53.latency_of_load;
+       resources_of_load = Cortex_A53.resources_of_load;
+       resources_of_store = Cortex_A53.resources_of_store;
+       resources_of_cond = Cortex_A53.resources_of_cond;
+       latency_of_call = Cortex_A53.latency_of_call;
+       resources_of_call = Cortex_A53.resources_of_call;
+       resources_of_builtin = Cortex_A53.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
-- 
cgit 


From 0fe569d24b99a34fb3b9ad6c0cb86876cc86a31d Mon Sep 17 00:00:00 2001
From: David Monniaux <david.monniaux@univ-grenoble-alpes.fr>
Date: Wed, 4 Nov 2020 09:10:00 +0100
Subject: disable debug printing in scheduler

---
 aarch64/PrepassSchedulingOracle.ml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'aarch64')

diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 358ee97b..2c3eb14f 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -447,10 +447,12 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) =
     else
       begin
       let nr_instructions = Array.length seqa in
-      Printf.printf "prepass scheduling length = %d\n" (Array.length seqa);
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
       let problem = define_problem opweights seqa in
-      print_sequence stdout (Array.map fst seqa);
-      print_problem stdout problem;
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
       match prepass_scheduler_by_name
               (!Clflags.option_fprepass_sched)
               problem
-- 
cgit