Merge branch 'aarch64-peephole-tofix' into aarch64-peephole

author: Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> 2020-12-20 14:33:44 +0100
committer: Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> 2020-12-20 14:33:44 +0100
commit: a23b977c5e100c1fd24d47d99b7e860c0bcf64ae (patch)
tree: dc555a7401a74790f629aae02bb919834e1faf94 /aarch64
parent: a71ed69400fbd8f6533a32c117e7063f6b083049 (diff)
parent: 373ad4a6efcb6cd0ecd30e7c131640b9783f1269 (diff)
download: compcert-kvx-a23b977c5e100c1fd24d47d99b7e860c0bcf64ae.tar.gz
compcert-kvx-a23b977c5e100c1fd24d47d99b7e860c0bcf64ae.zip
10 files changed, 63 insertions, 54 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
index 7d7b6887..7f39d1fa 100644
--- a/aarch64/Archi.v
+++ b/aarch64/Archi.v
@@ -6,15 +6,17 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
 
 (** Architecture-dependent parameters for AArch64 *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := true.
 
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 5d9cf601..30bac2a4 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -255,6 +255,7 @@ Inductive instruction: Type :=
   | Pclz (sz: isize) (rd r1: ireg)                                    (**r count leading zero bits *)
   | Prev (sz: isize) (rd r1: ireg)                                    (**r reverse bytes *)
   | Prev16 (sz: isize) (rd r1: ireg)                                  (**r reverse bytes in each 16-bit word *)
+  | Prbit (sz: isize) (rd r1: ireg)                                   (**r reverse bits *)
   (** Conditional data processing *)
   | Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond)                      (**r int conditional move *)
   | Pcset (rd: ireg) (c: testcond)                                    (**r set to 1/0 if cond is true/false *)
@@ -300,6 +301,8 @@ Inductive instruction: Type :=
   | Pfmsub (sz: fsize) (rd r1 r2 r3: freg)                            (**r [rd = r3 - r1 * r2] *)
   | Pfnmadd (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 - r1 * r2] *)
   | Pfnmsub (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 + r1 * r2] *)
+  | Pfmax (sz: fsize) (rd r1 r2: freg)                                (**r maximum *)
+  | Pfmin (sz: fsize) (rd r1 r2: freg)                                (**r minimum *)
   (** Floating-point comparison *)
   | Pfcmp (sz: fsize) (r1 r2: freg)                                   (**r compare [r1] and [r2] *)
   | Pfcmp0 (sz: fsize) (r1: freg)                                     (**r compare [r1] and [+0.0] *)
@@ -1304,7 +1307,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       | Vint n =>
           match list_nth_z tbl (Int.unsigned n) with
           | None => Stuck
-          | Some lbl => goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+          | Some lbl => goto_label f lbl (rs#X16 <- Vundef) m
           end
       | _ => Stuck
       end
@@ -1324,11 +1327,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pclz _ _ _
   | Prev _ _ _
   | Prev16 _ _ _
+  | Prbit _ _ _
   | Pfsqrt _ _ _
   | Pfmadd _ _ _ _ _
   | Pfmsub _ _ _ _ _
   | Pfnmadd _ _ _ _ _
   | Pfnmsub _ _ _ _ _
+  | Pfmax _ _ _ _
+  | Pfmin _ _ _ _
   | Pcfi_adjust _ 
   | Pcfi_rel_offset _ =>
       Stuck
@@ -1351,7 +1357,7 @@ Inductive step: state -> trace -> state -> Prop :=
       external_call ef ge vargs m t vres m' ->
       rs' = nextinstr
               (set_res res vres
-                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+                (undef_regs (DR X16 :: DR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
       step (State rs m) t (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index a4decae7..58817776 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -640,7 +640,7 @@ Definition exec_cfi (f: function) (cfi: cf_instruction) (rs: regset) (m: mem) :
       match (rs#X16 <- Vundef)#r with
       | Vint n =>
           SOME lbl <- list_nth_z tbl (Int.unsigned n) IN
-          goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+          goto_label f lbl (rs#X16 <- Vundef) m
       | _ => Stuck
       end
   end.
@@ -951,7 +951,7 @@ Inductive exec_exit (f: function) size_b (rs: regset) (m: mem): (option control)
       external_call ef ge vargs m t vres m' ->
       rs' = incrPC size_b
               (set_res (map_builtin_res DR res) vres
-                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+                (undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of (destroyed_by_builtin ef)) rs)) ->
       exec_exit f size_b rs m (Some (Pbuiltin ef args res)) t rs' m'
       .
 
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
index 1ad18889..670a7d06 100644
--- a/aarch64/Asmblockdeps.v
+++ b/aarch64/Asmblockdeps.v
@@ -39,8 +39,10 @@ Require Import Events.
 
 Require Import Lia.
 
-Open Scope impure.
+Import ListNotations.
+Local Open Scope list_scope.
 
+Open Scope impure.
 (** auxiliary treatments of builtins *)
 
 Definition is_builtin(ex: option control): bool :=
@@ -1049,8 +1051,7 @@ Definition trans_control (ctl: control) : inst :=
   | Ptbz sz r n lbl => [(#PC, Op (Control (Otbz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
   | Pbtbl r tbl => [(#X16, Op (Constant Vundef) Enil);
                     (#PC, Op (Control (Obtbl tbl)) (PReg(#r) @ PReg(#PC) @ Enil));
-                    (#X16, Op (Constant Vundef) Enil);
-                    (#X17, Op (Constant Vundef) Enil)]
+                    (#X16, Op (Constant Vundef) Enil)]
   | Pbuiltin ef args res => []
   end.
 
@@ -1921,10 +1922,9 @@ Proof.
       try rewrite 2Pregmap.gso, Pregmap.gss; destruct (Val.offset_ptr (rsr PC) (Ptrofs.repr sz));
       try reflexivity; discriminate_ppos. Simpl sr.
       destruct (PregEq.eq X16 rr); [ subst; Simpl_update |];
-      destruct (PregEq.eq X17 rr); [ subst; Simpl_update |];
       destruct (PregEq.eq PC rr); [ subst; Simpl_update |].
       rewrite !Pregmap.gso; auto;
-      apply ppos_discr in n0; apply ppos_discr in n1; apply ppos_discr in n2;
+      apply ppos_discr in n0; apply ppos_discr in n1;
       rewrite !assign_diff; auto.
 Qed.
 
@@ -2042,11 +2042,11 @@ Proof.
 Qed.
 
 Lemma incrPC_undef_regs_commut l : forall d rs,
-  incrPC d (undef_regs (map preg_of l) rs) = undef_regs (map preg_of l) (incrPC d rs).
+  incrPC d (undef_regs l rs) = undef_regs l (incrPC d rs).
 Proof.
   induction l; simpl; auto.
   intros. rewrite IHl. unfold incrPC.
-  destruct (PregEq.eq (preg_of a) PC).
+  destruct (PregEq.eq a PC).
   - rewrite e. rewrite Pregmap.gss.
     simpl. apply f_equal. unfold Pregmap.set.
     apply functional_extensionality. intros x.
@@ -2054,7 +2054,9 @@ Proof.
   - rewrite Pregmap.gso; auto.
     apply f_equal. unfold Pregmap.set.
     apply functional_extensionality. intros x.
-    destruct (PregEq.eq x PC); subst; auto.
+    destruct (PregEq.eq x PC).
+    + subst. destruct a; simpl; auto. congruence.
+    + auto.
 Qed.
 
 Lemma bblock_simu_reduce:
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 1a02b019..8187e077 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -337,7 +337,7 @@ let expand_builtin_inline name args res =
   | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Prev16(W, res, a1));
      emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
-  (* Count leading zeros and leading sign bits *)
+  (* Count leading zeros, leading sign bits, trailing zeros *)
   | "__builtin_clz",  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pclz(W, res, a1))
   | ("__builtin_clzl" | "__builtin_clzll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
@@ -346,10 +346,16 @@ let expand_builtin_inline name args res =
      emit (Pcls(W, res, a1))
   | ("__builtin_clsl" | "__builtin_clsll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pcls(X, res, a1))
+  | "__builtin_ctz",  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+     emit (Prbit(W, res, a1));
+     emit (Pclz(W, res, res))
+  | ("__builtin_ctzl" | "__builtin_ctzll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+     emit (Prbit(X, res, a1));
+     emit (Pclz(X, res, res))
  (* Float arithmetic *)
   | "__builtin_fabs",  [BA(DR(FR a1))], BR(DR(FR res)) ->
      emit (Pfabs(D, res, a1))
-  | "__builtin_fsqrt",  [BA(DR(FR a1))], BR(DR(FR res)) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"),  [BA(DR(FR a1))], BR(DR(FR res)) ->
      emit (Pfsqrt(D, res, a1))
   | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfmadd(D, res, a1, a2, a3))
@@ -359,6 +365,10 @@ let expand_builtin_inline name args res =
       emit (Pfnmadd(D, res, a1, a2, a3))
   | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfnmsub(D, res, a1, a2, a3))
+  | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+      emit (Pfmax (D, res, a1, a2))
+  | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+      emit (Pfmin (D, res, a1, a2))
   (* Vararg *)
   | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
       expand_builtin_va_start a
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index a91ec285..19821509 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1936,28 +1936,25 @@ Proof.
     rewrite <- (label_pos_preserved f); auto.
     inversion MATCHI; subst.
     destruct label_pos; next_stuck_cong.
-    destruct (((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef) # X17 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
+    destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
     inversion H0; auto. repeat (econstructor; eauto).
     rewrite !Pregmap.gso; try congruence.
     rewrite <- AGPC.
     unfold incrPC in *.
     destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate.
-    replace (((rs2 # X16 <- Vundef) # X17 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
-      ((((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
-      Vundef) # X17 <- Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+    replace ((rs2 # X16 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
+      (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
+      Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
     eapply functional_extensionality; intros x.
     destruct (PregEq.eq x PC); subst.
     + rewrite Pregmap.gso in INCRPC; try congruence.
-      rewrite Pregmap.gso in INCRPC; try congruence.
       rewrite Pregmap.gss in INCRPC.
       rewrite !Pregmap.gss in *; congruence.
     + rewrite Pregmap.gso; auto.
       rewrite (Pregmap.gso (i := x) (j := PC)); auto.
-      destruct (PregEq.eq x X17); subst.
+      destruct (PregEq.eq x X16); subst.
       * rewrite !Pregmap.gss; auto.
-      * rewrite !(Pregmap.gso (i := x) (j:= X17)); auto. destruct (PregEq.eq x X16); subst.
-        -- rewrite !Pregmap.gss; auto.
-        -- rewrite !Pregmap.gso; auto.
+      * rewrite !Pregmap.gso; auto.
 Qed.
 
 Lemma last_instruction_cannot_be_label bb:
@@ -2188,9 +2185,16 @@ Proof.
           reflexivity. }
         apply set_builtin_res_dont_move_pc_gen.
         -- erewrite !set_builtin_map_not_pc.
-           erewrite !undef_regs_other_2.
-           rewrite HPC; auto. all: rewrite preg_notin_charact; intros; try discriminate.
+           erewrite !undef_regs_other.
+           rewrite HPC; auto.
+           all: intros; simpl in *; destruct H3 as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+           exploit list_in_map_inv; eauto; intros [mr [A B]]; subst; discriminate.
         -- intros. eapply undef_reg_preserved; eauto.
+           intros. destruct (PregEq.eq X16 r0); destruct (PregEq.eq X30 r0); subst.
+           rewrite Pregmap.gso, Pregmap.gss; try congruence.
+           do 2 (rewrite Pregmap.gso, Pregmap.gss; try discriminate; auto).
+           rewrite 2Pregmap.gss; auto.
+           rewrite !Pregmap.gso; auto.
 Qed.
 
 Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall
diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml
index fdc1372d..e2a9c87a 100644
--- a/aarch64/CBuiltins.ml
+++ b/aarch64/CBuiltins.ml
@@ -6,6 +6,9 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
@@ -29,14 +32,6 @@ let builtins = {
     "__builtin_fence",
       (TVoid [], [], false);
     (* Integer arithmetic *)
-    "__builtin_bswap64",
-      (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
-    "__builtin_clz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_clzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_clzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     "__builtin_cls",
       (TInt(IInt, []), [TInt(IInt, [])], false);
     "__builtin_clsl",
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index f13a9ff5..41db3bd4 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -12,28 +12,9 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register s =
   s = "X16" || s = "x16" || s = "X30" || s = "x30"
 
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
-                           
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
   | AST.Tfloat | AST.Tsingle -> 1
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index 8c3daabe..40e4a182 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -388,6 +388,8 @@ module Target (*: TARGET*) =
         fprintf oc "	rev	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     | Prev16(sz, rd, r1) ->
         fprintf oc "	rev16	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Prbit(sz, rd, r1) ->
+        fprintf oc "	rbit	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     (* Conditional data processing *)
     | Pcsel(rd, r1, r2, c) ->
         fprintf oc "	csel	%a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c)
@@ -478,6 +480,10 @@ module Target (*: TARGET*) =
         fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
     | Pfnmsub(sz, rd, r1, r2, r3) ->
         fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    | Pfmax (sz, rd, r1, r2) ->
+        fprintf oc "	fmax	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfmin (sz, rd, r1, r2) ->
+        fprintf oc "	fmin	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
     (* Floating-point comparison *)
     | Pfcmp(sz, r1, r2) ->
         fprintf oc "	fcmp	%a, %a\n" freg (sz, r1) freg (sz, r2)
diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v
index 6815bc59..69edeb55 100644
--- a/aarch64/extractionMachdep.v
+++ b/aarch64/extractionMachdep.v
@@ -6,6 +6,9 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
author	Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>	2020-12-20 14:33:44 +0100
committer	Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>	2020-12-20 14:33:44 +0100
commit	a23b977c5e100c1fd24d47d99b7e860c0bcf64ae (patch)
tree	dc555a7401a74790f629aae02bb919834e1faf94 /aarch64
parent	a71ed69400fbd8f6533a32c117e7063f6b083049 (diff)
parent	373ad4a6efcb6cd0ecd30e7c131640b9783f1269 (diff)
download	compcert-kvx-a23b977c5e100c1fd24d47d99b7e860c0bcf64ae.tar.gz compcert-kvx-a23b977c5e100c1fd24d47d99b7e860c0bcf64ae.zip