From 37cebaabf65fe3961b9932c6582d15b3b676cefe Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 17 Jun 2019 09:43:34 +0200
Subject: Perform constant propagation and strength reduction on conditional
 moves

A conditional move whose condition is statically known becomes a regular move.
Otherwise, the condition can sometimes be simplified by strength reduction.
---
 arm/ConstpropOp.vp     | 12 +++++++++++-
 arm/ConstpropOpproof.v | 26 +++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'arm')

diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index d62240ef..8555d3aa 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -20,7 +20,7 @@ Require Import Integers.
 Require Import Floats.
 Require Import Op.
 Require Import Registers.
-Require Import ValueDomain.
+Require Import ValueDomain ValueAOp.
 
 (** * Converting known values to constants *)
 
@@ -131,6 +131,15 @@ Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) :=
       make_cmp_base c args vl
   end.
 
+Definition make_select (c: condition) (ty: typ)
+                       (r1 r2: reg) (args: list reg) (vl: list aval) :=
+  match resolve_branch (eval_static_condition c vl) with
+  | Some b => (Omove, (if b then r1 else r2) :: nil)
+  | None =>
+      let (c', args') := cond_strength_reduction c args vl in
+      (Osel c' ty, r1 :: r2 :: args')
+  end.
+
 Definition make_addimm (n: int) (r: reg) :=
   if Int.eq n Int.zero
   then (Omove, r :: nil)
@@ -284,6 +293,7 @@ Nondetfunction op_strength_reduction
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
   | Ocmp c, args, vl => make_cmp c args vl
+  | Osel c ty, r1 :: r2 :: args, v1 :: v2 :: vl => make_select c ty r1 r2 args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index 079ba2be..a4f5c29c 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -24,7 +24,7 @@ Require Import Events.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
-Require Import ValueDomain.
+Require Import ValueDomain ValueAOp ValueAnalysis.
 Require Import ConstpropOp.
 
 Local Transparent Archi.ptr64.
@@ -234,6 +234,28 @@ Proof.
 - apply make_cmp_base_correct; auto.
 Qed.
 
+Lemma make_select_correct:
+  forall c ty r1 r2 args vl,
+  vl = map (fun r => AE.get r ae) args ->
+  let (op', args') := make_select c ty r1 r2 args vl in
+  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' rs##args' m = Some v
+         /\ Val.lessdef (Val.select (eval_condition c rs##args m) rs#r1 rs#r2 ty) v.
+Proof.
+  unfold make_select; intros.
+  destruct (resolve_branch (eval_static_condition c vl)) as [b|] eqn:RB.
+- exists (if b then rs#r1 else rs#r2); split.
++ simpl. destruct b; auto.
++ destruct (eval_condition c rs##args m) as [b'|] eqn:EC; simpl; auto.
+  assert (b = b').
+  { eapply resolve_branch_sound; eauto. 
+    rewrite <- EC. apply eval_static_condition_sound with bc. 
+    subst vl. exact (aregs_sound _ _ _ args MATCH). }
+  subst b'. apply Val.lessdef_normalize.
+- generalize (cond_strength_reduction_correct c args vl H).
+  destruct (cond_strength_reduction c args vl) as [cond' args']; intros EQ.
+  econstructor; split. simpl; eauto. rewrite EQ; auto.
+Qed.
+
 Lemma make_addimm_correct:
   forall n r,
   let (op, args) := make_addimm n r in
@@ -616,6 +638,8 @@ Proof.
   InvApproxRegs; SimplVM. inv H0. apply make_shruimm_correct; auto.
 (* cmp *)
   inv H0. apply make_cmp_correct; auto.
+(* select *)
+  inv H0. apply make_select_correct; congruence.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) rs#r2).
-- 
cgit 


From ed0cfe4b38208f15763f2d1c0372c441a320ea56 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 17 Jun 2019 14:22:22 +0200
Subject: Extended asm: print register names according to their types

When printing an extended asm code fragment, placeholders %n
are replaced by register names.

Currently we ignore the fact that some assemblers use different
register names depending on the width of the data that resides
in the register.

For example, x86_64 uses %rax for a 64-bit quantity and %eax for
a 32-bit quantity, but CompCert always prints %rax in extended asm
statements.  This is problematic if we want to use 32-bit integer
instructions in extended asm, e.g.

             int x, y;
             asm("addl %1, %0", "=r"(x), "r"(y));
produces
             addl %rax, %rdx

which is syntactically incorrect.

Another example is ARM FP registers: D0 is a double-precision float,
but S0 is a single-precision float.

This commit partially solves this issue by taking into account the
Cminor type of the asm parameter when printing the corresponding register.
Continuing the previous example,

             int x, y;
             asm("addl %1, %0", "=r"(x), "r"(y));
now produces
             addl %eax, %edx

This is not perfect yet: we use Cminor types, because this is all we
have at hand, and not source C types, hence "char" and "short" parameters
are still printed like "int" parameters, which is not good for x86.
(I.e. we produce %eax where GCC might have produced %al or %ax.)
We'll leave this issue open.
---
 arm/TargetPrinter.ml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arm')

diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml
index 3a0814e1..03e06a65 100644
--- a/arm/TargetPrinter.ml
+++ b/arm/TargetPrinter.ml
@@ -113,9 +113,9 @@ struct
   let freg_single oc r = output_string oc (single_float_reg_name r)
   let freg_param_single oc r = output_string oc (single_param_reg_name r)
 
-  let preg oc = function
+  let preg_asm oc ty = function
     | IR r -> ireg oc r
-    | FR r -> freg oc r
+    | FR r -> if ty = Tsingle then freg_single oc r else freg oc r
     | _    -> assert false
 
   (* In Thumb2 mode, some arithmetic instructions have shorter encodings
@@ -480,7 +480,7 @@ struct
             (P.to_int kind) (extern_atom txt) args
         | EF_inline_asm(txt, sg, clob) ->
           fprintf oc "%s begin inline assembly\n\t" comment;
-          print_inline_asm preg oc (camlstring_of_coqstring txt) sg args res;
+          print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
           fprintf oc "%s end inline assembly\n" comment
         | _ ->
           assert false
-- 
cgit 


From 83deaa4b3a164423254008d8594de99edc491c3b Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Thu, 11 Jul 2019 10:56:23 +0200
Subject: Revised specification of NaN payload behavior

When an FP arithmetic instruction produces a NaN result, the payload
of this NaN depends on the architecture.

Before, the payload behavior was specified by 3 architecture-dependent
parameters: `Archi.choose_binop_pl_64` and `Archi.choose_binop_pl_32`
and `Archi.fpu_results_default_qNaN`.  This was adequate for
two-argument operations, but doesn't extend to FMA.

In preparation for FMA support, this commit generalizes the `Archi.choose`
functions from two arguments to any number of arguments.  In passing,
`Archi.fpu_results_default_qNaN` is no longer needed.
---
 arm/Archi.v | 56 +++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 17 deletions(-)

(limited to 'arm')

diff --git a/arm/Archi.v b/arm/Archi.v
index 39a424ec..e78ff6ce 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -16,7 +16,7 @@
 
 (** Architecture-dependent parameters for ARM *)
 
-Require Import ZArith.
+Require Import ZArith List.
 (*From Flocq*)
 Require Import Binary Bits.
 
@@ -34,30 +34,52 @@ Proof.
   unfold splitlong, ptr64; congruence.
 Qed.
 
-Definition default_nan_64 : { x : binary64 | is_nan _ _ x = true } :=
-  exist _ (B754_nan 53 1024 false (iter_nat 51 _ xO xH) (eq_refl true)) (eq_refl true).
+Definition default_nan_64 := (false, iter_nat 51 _ xO xH).
+Definition default_nan_32 := (false, iter_nat 22 _ xO xH).
+
+(** Choose the first signaling NaN, if any;
+    otherwise choose the first NaN;
+    otherwise use default. *)
+
+Definition choose_nan (is_signaling: positive -> bool) 
+                      (default: bool * positive)
+                      (l0: list (bool * positive)) : bool * positive :=
+  let fix choose_snan (l1: list (bool * positive)) :=
+    match l1 with
+    | nil =>
+        match l0 with nil => default | n :: _ => n end
+    | ((s, p) as n) :: l1 =>
+        if is_signaling p then n else choose_snan l1
+    end
+  in choose_snan l0.
+
+Lemma choose_nan_idem: forall is_signaling default n,
+  choose_nan is_signaling default (n :: n :: nil) =
+  choose_nan is_signaling default (n :: nil).
+Proof.
+  intros. destruct n as [s p]; unfold choose_nan; simpl.
+  destruct (is_signaling p); auto. 
+Qed.
 
-Definition choose_binop_pl_64 (pl1 pl2 : positive) :=
-  (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
-      In all other cases, choose first NaN *)
-  (Pos.testbit pl1 51 && negb (Pos.testbit pl2 51))%bool.
+Definition choose_nan_64 :=
+  choose_nan (fun p => negb (Pos.testbit p 51)) default_nan_64.
 
-Definition default_nan_32 : { x : binary32 | is_nan _ _ x = true } :=
-  exist _ (B754_nan 24 128 false (iter_nat 22 _ xO xH) (eq_refl true)) (eq_refl true).
+Definition choose_nan_32 :=
+  choose_nan (fun p => negb (Pos.testbit p 22)) default_nan_32.
 
-Definition choose_binop_pl_32 (pl1 pl2 : positive) :=
-  (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
-      In all other cases, choose first NaN *)
-  (Pos.testbit pl1 22 && negb (Pos.testbit pl2 22))%bool.
+Lemma choose_nan_64_idem: forall n,
+  choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil).
+Proof. intros; apply choose_nan_idem. Qed.
 
-Definition fpu_returns_default_qNaN := false.
+Lemma choose_nan_32_idem: forall n,
+  choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
+Proof. intros; apply choose_nan_idem. Qed.
 
 Definition float_of_single_preserves_sNaN := false.
 
 Global Opaque ptr64 big_endian splitlong
-              default_nan_64 choose_binop_pl_64
-              default_nan_32 choose_binop_pl_32
-              fpu_returns_default_qNaN
+              default_nan_64 choose_nan_64
+              default_nan_32 choose_nan_32
               float_of_single_preserves_sNaN.
 
 (** Which ABI to use: either the standard ARM EABI with floats passed
-- 
cgit 


From 8b0de52ffa302298abe8d0d6e3b6ed339a2354ba Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Fri, 12 Jul 2019 11:42:25 +0200
Subject: Add floating-point square root and fused multiply-add

We just lift the corresponding functions from Flocq and add
the computation of NaN payloads.

NaN payloads for FMA are described in the ARM and RISC-V specifications,
and were determined experimentally for x86 and for Power.
---
 arm/Archi.v | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arm')

diff --git a/arm/Archi.v b/arm/Archi.v
index e78ff6ce..16d6c71d 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -75,11 +75,16 @@ Lemma choose_nan_32_idem: forall n,
   choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
 Proof. intros; apply choose_nan_idem. Qed.
 
+Definition fma_order {A: Type} (x y z: A) := (z, x, y).
+
+Definition fma_invalid_mul_is_nan := true.
+
 Definition float_of_single_preserves_sNaN := false.
 
 Global Opaque ptr64 big_endian splitlong
               default_nan_64 choose_nan_64
               default_nan_32 choose_nan_32
+              fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
 
 (** Which ABI to use: either the standard ARM EABI with floats passed
-- 
cgit 


From fb20aab431a768299118ed30822af59cab13325e Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Tue, 2 Jul 2019 14:55:31 +0200
Subject: Remove the cparser/Builtins module

Move its definitions to modules C (the type `builtins`) and Env
(the operations that deal with the initial environment).

Reasons for the refactoring:
1- The name "Builtins" will soon be reused for a Coq module
2- `Env.initial()` makes more sense than `Builtins.environment()`.
---
 arm/CBuiltins.ml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arm')

diff --git a/arm/CBuiltins.ml b/arm/CBuiltins.ml
index ec4f4aaa..d6a1ea35 100644
--- a/arm/CBuiltins.ml
+++ b/arm/CBuiltins.ml
@@ -18,10 +18,10 @@
 open C
 
 let builtins = {
-  Builtins.typedefs = [
+  builtin_typedefs = [
     "__builtin_va_list", TPtr(TVoid [], [])
   ];
-  Builtins.functions = [
+  builtin_functions = [
     (* Integer arithmetic *)
     "__builtin_clz",
       (TInt(IInt, []), [TInt(IUInt, [])], false);
-- 
cgit 


From d08b080747225160b80c3f04bdfd9cd67550b425 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Tue, 2 Jul 2019 16:25:40 +0200
Subject: Give formal semantics to some built-in functions and run-time
 functions

This commit adds mechanisms to
- recognize certain built-in and run-time functions by name and signature;
- associate semantics to these functions, as a partial function from
  list of values to values;
- interpret external calls to these functions according to this semantics
  (pure function from values to values, memory unchanged, no observable
  events in the trace);
- external calls to unknown built-in and run-time functions remain
  interpreted as generating observable events and possibly changing
  memory, like before.

The description of the built-ins is split into a target-independent
part (in common/Builtins0.v) and a target-specific part (in
$ARCH/Builtins1.v).

Instruction selection uses the new mechanism in order to
- recognize some built-in functions and turn them into operations
  of the target processor.  Currently, this is done for
  __builtin_sel and __builtin_fabs; more to come.
- remove the axioms about int64 helper functions from the standard
  library.  More precisely, the behavior of these functions is
  still axiomatized, but now it is specified using the more general
  machinery introduced in this commit, rather than ad-hoc axioms
  in backend/SplitLongproof.

The only built-ins currently described are __builtin_fsqrt (for all platforms)
and __builtin_fmin / __builtin_fmax (for x86).  More built-ins will be
added later.
---
 arm/Builtins1.v     | 33 +++++++++++++++++++++++++++++++++
 arm/SelectOp.vp     | 12 +++++++-----
 arm/SelectOpproof.v | 25 +++++++++++++++----------
 3 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 arm/Builtins1.v

(limited to 'arm')

diff --git a/arm/Builtins1.v b/arm/Builtins1.v
new file mode 100644
index 00000000..f6e643d2
--- /dev/null
+++ b/arm/Builtins1.v
@@ -0,0 +1,33 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, Collège de France and Inria Paris            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Platform-specific built-in functions *)
+
+Require Import String Coqlib.
+Require Import AST Integers Floats Values.
+Require Import Builtins0.
+
+Inductive platform_builtin : Type := .
+
+Local Open Scope string_scope.
+
+Definition platform_builtin_table : list (string * platform_builtin) :=
+  nil.
+
+Definition platform_builtin_sig (b: platform_builtin) : signature :=
+  match b with end.
+
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
+  match b with end.
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index d04832d6..1220abc4 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -38,11 +38,8 @@
 
 Require Import Coqlib.
 Require Import Compopts.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Op.
-Require Import CminorSel.
+Require Import AST Integers Floats Builtins.
+Require Import Op CminorSel.
 
 Local Open Scope cminorsel_scope.
 
@@ -518,3 +515,8 @@ Nondetfunction builtin_arg (e: expr) :=
   | Eop (Oaddimm n) (e1:::Enil) => BA_addptr (BA e1) (BA_int n)
   | _ => BA e
   end.
+
+(** Platform-specific known builtins *)
+
+Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
+  None.
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index 8b546971..5d54d94f 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -13,16 +13,9 @@
 (** Correctness of instruction selection for operators *)
 
 Require Import Coqlib.
-Require Import Maps.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Values.
-Require Import Memory.
-Require Import Globalenvs.
-Require Import Cminor.
-Require Import Op.
-Require Import CminorSel.
+Require Import AST Integers Floats.
+Require Import Values Memory Builtins Globalenvs.
+Require Import Cminor Op CminorSel.
 Require Import SelectOp.
 
 Local Open Scope cminorsel_scope.
@@ -909,4 +902,16 @@ Proof.
 - constructor; auto.
 Qed.
 
+(** Platform-specific known builtins *)
+
+Theorem eval_platform_builtin:
+  forall bf al a vl v le,
+  platform_builtin bf al = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  platform_builtin_sem bf vl = Some v ->
+  exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+  intros. discriminate.
+Qed.
+
 End CMCONSTR.
-- 
cgit