From 37cebaabf65fe3961b9932c6582d15b3b676cefe Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 17 Jun 2019 09:43:34 +0200
Subject: Perform constant propagation and strength reduction on conditional
 moves

A conditional move whose condition is statically known becomes a regular move.
Otherwise, the condition can sometimes be simplified by strength reduction.
---
 x86/ConstpropOp.vp     | 12 +++++++++++-
 x86/ConstpropOpproof.v | 26 +++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'x86')

diff --git a/x86/ConstpropOp.vp b/x86/ConstpropOp.vp
index f59b9dba..ada8d54a 100644
--- a/x86/ConstpropOp.vp
+++ b/x86/ConstpropOp.vp
@@ -16,7 +16,7 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats.
 Require Import Op Registers.
-Require Import ValueDomain.
+Require Import ValueDomain ValueAOp.
 
 (** * Converting known values to constants *)
 
@@ -98,6 +98,15 @@ Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) :=
       make_cmp_base c args vl
   end.
 
+Definition make_select (c: condition) (ty: typ)
+                       (r1 r2: reg) (args: list reg) (vl: list aval) :=
+  match resolve_branch (eval_static_condition c vl) with
+  | Some b => (Omove, (if b then r1 else r2) :: nil)
+  | None =>
+      let (c', args') := cond_strength_reduction c args vl in
+      (Osel c' ty, r1 :: r2 :: args')
+  end.
+
 (** For addressing modes, we need to distinguish
 - reductions that produce pointers (i.e. that produce [Aglobal], [Ainstack], [Abased] and [Abasedscaled] addressing modes), which are valid only if the pointer size is right;
 - other reductions (producing [Aindexed] or [Aindexed2] modes), which are valid independently of the pointer size.
@@ -416,6 +425,7 @@ Nondetfunction op_strength_reduction
       let (addr', args') := addr_strength_reduction_64 addr args vl in
       (Oleal addr', args')
   | Ocmp c, args, vl => make_cmp c args vl
+  | Osel c ty, r1 :: r2 :: args, v1 :: v2 :: vl => make_select c ty r1 r2 args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
diff --git a/x86/ConstpropOpproof.v b/x86/ConstpropOpproof.v
index 3bb0f3cd..6d2df9c1 100644
--- a/x86/ConstpropOpproof.v
+++ b/x86/ConstpropOpproof.v
@@ -14,7 +14,7 @@
 
 Require Import Coqlib Compopts.
 Require Import Integers Floats Values Memory Globalenvs Events.
-Require Import Op Registers RTL ValueDomain.
+Require Import Op Registers RTL ValueDomain ValueAOp ValueAnalysis.
 Require Import ConstpropOp.
 
 Section STRENGTH_REDUCTION.
@@ -371,6 +371,28 @@ Proof.
 - apply make_cmp_base_correct; auto.
 Qed.
 
+Lemma make_select_correct:
+  forall c ty r1 r2 args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (op', args') := make_select c ty r1 r2 args vl in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+         /\ Val.lessdef (Val.select (eval_condition c e##args m) e#r1 e#r2 ty) v.
+Proof.
+  unfold make_select; intros.
+  destruct (resolve_branch (eval_static_condition c vl)) as [b|] eqn:RB.
+- exists (if b then e#r1 else e#r2); split.
++ simpl. destruct b; auto.
++ destruct (eval_condition c e##args m) as [b'|] eqn:EC; simpl; auto.
+  assert (b = b').
+  { eapply resolve_branch_sound; eauto. 
+    rewrite <- EC. apply eval_static_condition_sound with bc. 
+    subst vl. exact (aregs_sound _ _ _ args MATCH). }
+  subst b'. apply Val.lessdef_normalize.
+- generalize (cond_strength_reduction_correct c args vl H).
+  destruct (cond_strength_reduction c args vl) as [cond' args']; intros EQ.
+  econstructor; split. simpl; eauto. rewrite EQ; auto.
+Qed.
+
 Lemma make_addimm_correct:
   forall n r,
   let (op, args) := make_addimm n r in
@@ -905,6 +927,8 @@ Proof.
   auto.
 (* cond *)
   inv H0. apply make_cmp_correct; auto.
+(* select *)
+  inv H0. apply make_select_correct; congruence.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
-- 
cgit 


From ed0cfe4b38208f15763f2d1c0372c441a320ea56 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 17 Jun 2019 14:22:22 +0200
Subject: Extended asm: print register names according to their types

When printing an extended asm code fragment, placeholders %n
are replaced by register names.

Currently we ignore the fact that some assemblers use different
register names depending on the width of the data that resides
in the register.

For example, x86_64 uses %rax for a 64-bit quantity and %eax for
a 32-bit quantity, but CompCert always prints %rax in extended asm
statements.  This is problematic if we want to use 32-bit integer
instructions in extended asm, e.g.

             int x, y;
             asm("addl %1, %0", "=r"(x), "r"(y));
produces
             addl %rax, %rdx

which is syntactically incorrect.

Another example is ARM FP registers: D0 is a double-precision float,
but S0 is a single-precision float.

This commit partially solves this issue by taking into account the
Cminor type of the asm parameter when printing the corresponding register.
Continuing the previous example,

             int x, y;
             asm("addl %1, %0", "=r"(x), "r"(y));
now produces
             addl %eax, %edx

This is not perfect yet: we use Cminor types, because this is all we
have at hand, and not source C types, hence "char" and "short" parameters
are still printed like "int" parameters, which is not good for x86.
(I.e. we produce %eax where GCC might have produced %al or %ax.)
We'll leave this issue open.
---
 x86/TargetPrinter.ml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'x86')

diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index 3025d2e7..cd54e08b 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -62,8 +62,8 @@ let ireg64 oc r = output_string oc (int64_reg_name r)
 let ireg = if Archi.ptr64 then ireg64 else ireg32
 let freg oc r = output_string oc (float_reg_name r)
 
-let preg oc = function
-  | IR r -> ireg oc r
+let preg_asm oc ty = function
+  | IR r -> if ty = Tlong then ireg64 oc r else ireg32 oc r
   | FR r -> freg oc r
   | _    -> assert false
 
@@ -826,7 +826,7 @@ module Target(System: SYSTEM):TARGET =
                                (P.to_int kind) (extern_atom txt) args
           | EF_inline_asm(txt, sg, clob) ->
               fprintf oc "%s begin inline assembly\n\t" comment;
-              print_inline_asm preg oc (camlstring_of_coqstring txt) sg args res;
+              print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
               fprintf oc "%s end inline assembly\n" comment
           | _ ->
               assert false
-- 
cgit 


From fb20aab431a768299118ed30822af59cab13325e Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Tue, 2 Jul 2019 14:55:31 +0200
Subject: Remove the cparser/Builtins module

Move its definitions to modules C (the type `builtins`) and Env
(the operations that deal with the initial environment).

Reasons for the refactoring:
1- The name "Builtins" will soon be reused for a Coq module
2- `Env.initial()` makes more sense than `Builtins.environment()`.
---
 x86/CBuiltins.ml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'x86')

diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml
index 69a2eb64..6fb8b697 100644
--- a/x86/CBuiltins.ml
+++ b/x86/CBuiltins.ml
@@ -26,10 +26,10 @@ let (va_list_type, va_list_scalar, size_va_list) =
     (TPtr(TVoid [], []), true, 4)
 
 let builtins = {
-  Builtins.typedefs = [
+  builtin_typedefs = [
     "__builtin_va_list", va_list_type;
   ];
-  Builtins.functions = [
+  builtin_functions = [
     (* Integer arithmetic *)
     "__builtin_bswap64",
       (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
-- 
cgit 


From d08b080747225160b80c3f04bdfd9cd67550b425 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Tue, 2 Jul 2019 16:25:40 +0200
Subject: Give formal semantics to some built-in functions and run-time
 functions

This commit adds mechanisms to
- recognize certain built-in and run-time functions by name and signature;
- associate semantics to these functions, as a partial function from
  list of values to values;
- interpret external calls to these functions according to this semantics
  (pure function from values to values, memory unchanged, no observable
  events in the trace);
- external calls to unknown built-in and run-time functions remain
  interpreted as generating observable events and possibly changing
  memory, like before.

The description of the built-ins is split into a target-independent
part (in common/Builtins0.v) and a target-specific part (in
$ARCH/Builtins1.v).

Instruction selection uses the new mechanism in order to
- recognize some built-in functions and turn them into operations
  of the target processor.  Currently, this is done for
  __builtin_sel and __builtin_fabs; more to come.
- remove the axioms about int64 helper functions from the standard
  library.  More precisely, the behavior of these functions is
  still axiomatized, but now it is specified using the more general
  machinery introduced in this commit, rather than ad-hoc axioms
  in backend/SplitLongproof.

The only built-ins currently described are __builtin_fsqrt (for all platforms)
and __builtin_fmin / __builtin_fmax (for x86).  More built-ins will be
added later.
---
 x86/Builtins1.v     | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/SelectOp.vp     |  7 ++++++-
 x86/SelectOpproof.v | 24 +++++++++++++++---------
 3 files changed, 75 insertions(+), 10 deletions(-)
 create mode 100644 x86/Builtins1.v

(limited to 'x86')

diff --git a/x86/Builtins1.v b/x86/Builtins1.v
new file mode 100644
index 00000000..6103cc4c
--- /dev/null
+++ b/x86/Builtins1.v
@@ -0,0 +1,54 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, Collège de France and Inria Paris            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Platform-specific built-in functions *)
+
+Require Import String Coqlib.
+Require Import AST Integers Floats Values.
+Require Import Builtins0.
+
+Inductive platform_builtin : Type :=
+  | BI_fmin
+  | BI_fmax.
+
+Local Open Scope string_scope.
+
+Definition platform_builtin_table : list (string * platform_builtin) :=
+     ("__builtin_fmin", BI_fmin)
+  :: ("__builtin_fmax", BI_fmax)
+  :: nil.
+
+Definition platform_builtin_sig (b: platform_builtin) : signature :=
+  match b with
+  | BI_fmin | BI_fmax =>
+      mksignature (Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+  end.
+
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+  match b with
+  | BI_fmin =>
+      mkbuiltin_n2t Tfloat Tfloat Tfloat
+        (fun f1 f2 => match Float.compare f1 f2 with
+                      | Some Eq | Some Lt => f1
+                      | Some Gt | None => f2
+                      end)
+  | BI_fmax =>
+      mkbuiltin_n2t Tfloat Tfloat Tfloat
+        (fun f1 f2 => match Float.compare f1 f2 with
+                      | Some Eq | Some Gt => f1
+                      | Some Lt | None => f2
+                      end)
+  end.
+
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index cf0f37ec..378590ce 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -38,7 +38,7 @@
 
 Require Import Coqlib.
 Require Import Compopts.
-Require Import AST Integers Floats.
+Require Import AST Integers Floats Builtins.
 Require Import Op CminorSel.
 
 Local Open Scope cminorsel_scope.
@@ -568,3 +568,8 @@ Nondetfunction builtin_arg (e: expr) :=
   | Eload chunk (Ainstack ofs) Enil => BA_loadstack chunk ofs
   | _ => BA e
   end.
+
+(** Platform-specific known builtins *)
+
+Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
+  None.
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index b412ccf7..821a54e8 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -13,15 +13,9 @@
 (** Correctness of instruction selection for operators *)
 
 Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Values.
-Require Import Memory.
-Require Import Globalenvs.
-Require Import Cminor.
-Require Import Op.
-Require Import CminorSel.
+Require Import AST Integers Floats.
+Require Import Values Memory Builtins Globalenvs.
+Require Import Cminor Op CminorSel.
 Require Import SelectOp.
 
 Local Open Scope cminorsel_scope.
@@ -1010,4 +1004,16 @@ Proof.
 - constructor; auto.
 Qed.
 
+(** Platform-specific known builtins *)
+
+Theorem eval_platform_builtin:
+  forall bf al a vl v le,
+  platform_builtin bf al = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  platform_builtin_sem bf vl = Some v ->
+  exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+  intros. discriminate.
+Qed.
+
 End CMCONSTR.
-- 
cgit 


From 822a6a12316aa043eea7f6aed4d730bc10a73d7b Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Wed, 10 Jul 2019 15:43:03 +0200
Subject: x86_64: branchless implementation of floatofintu and intuoffloat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The implementation uses float <-> signed 64-bit integer conversion
instructions, and is both efficient and branchless.

Based on a suggestion by Rémi Hutin.
---
 x86/SelectOp.vp     | 27 +++++++++++++++++----------
 x86/SelectOpproof.v | 16 ++++++++++++----
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'x86')

diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index 378590ce..31be8c32 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -40,6 +40,7 @@ Require Import Coqlib.
 Require Import Compopts.
 Require Import AST Integers Floats Builtins.
 Require Import Op CminorSel.
+Require Archi.
 
 Local Open Scope cminorsel_scope.
 
@@ -498,21 +499,27 @@ Nondetfunction floatofint (e: expr) :=
   end.
 
 Definition intuoffloat (e: expr) :=
-  Elet e
-    (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
-      (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
-        (intoffloat (Eletvar 1))
-        (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
+  if Archi.splitlong then
+    Elet e
+      (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
+        (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
+          (intoffloat (Eletvar 1))
+          (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat
+  else
+    Eop Olowlong (Eop Olongoffloat (e ::: Enil) ::: Enil).
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
-    let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
-    Elet e
-      (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
-        (floatofint (Eletvar O))
-        (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
+    if Archi.splitlong then
+      let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
+      Elet e
+        (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
+          (floatofint (Eletvar O))
+          (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
+    else
+      Eop Ofloatoflong (Eop Ocast32unsigned (e ::: Enil) ::: Enil)
   end.
 
 Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index 821a54e8..a1bb0703 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -828,7 +828,8 @@ Proof.
   intros. destruct x; simpl in H0; try discriminate.
   destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
-  set (im := Int.repr Int.half_modulus).
+  destruct Archi.splitlong.
+- set (im := Int.repr Int.half_modulus).
   set (fm := Float.of_intu im).
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
     constructor. auto.
@@ -855,6 +856,11 @@ Proof.
   rewrite Int.add_neg_zero in A4.
   rewrite Int.add_zero in A4.
   auto.
+- apply Float.to_intu_to_long in Heqo. repeat econstructor. eauto.
+  simpl. rewrite Heqo; reflexivity.
+  simpl. unfold Int64.loword. rewrite Int64.unsigned_repr, Int.repr_unsigned; auto.
+  assert (Int.modulus < Int64.max_unsigned) by reflexivity. 
+  generalize (Int.unsigned_range n); omega.
 Qed.
 
 Theorem eval_floatofintu:
@@ -864,10 +870,11 @@ Theorem eval_floatofintu:
   exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
-  InvEval. TrivialExists.
-  destruct x; simpl in H0; try discriminate. inv H0.
+- InvEval. TrivialExists.
+- destruct x; simpl in H0; try discriminate. inv H0.
   exists (Vfloat (Float.of_intu i)); split; auto.
-  econstructor. eauto.
+  destruct Archi.splitlong.
++ econstructor. eauto.
   set (fm := Float.of_intu Float.ox8000_0000).
   assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
     constructor. auto.
@@ -883,6 +890,7 @@ Proof.
   constructor. EvalOp. simpl; eauto. constructor. simpl; eauto.
   fold fm. rewrite Float.of_intu_of_int_2; auto.
   rewrite Int.sub_add_opp. auto.
++ rewrite Float.of_intu_of_long. repeat econstructor. eauto. reflexivity. 
 Qed.
 
 Theorem eval_intofsingle:
-- 
cgit