aboutsummaryrefslogtreecommitdiffstats
path: root/powerpc
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-06-03 20:09:19 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-06-03 20:11:48 +0200
commit677771949bd62f7f5bbcf99bba6b6f816f07a6c2 (patch)
tree597b2ccc5867bc2bbb083c4e13f792671b2042c1 /powerpc
parent36e64ee96ded0c94c83da6fb12202c276e66ba45 (diff)
parentb7e0d70de2ace6f0a22f9f65cc244d875ee48496 (diff)
downloadcompcert-kvx-677771949bd62f7f5bbcf99bba6b6f816f07a6c2.tar.gz
compcert-kvx-677771949bd62f7f5bbcf99bba6b6f816f07a6c2.zip
Merge branch 'if-conversion' of https://github.com/AbsInt/CompCert into mppa-if-conversion
Diffstat (limited to 'powerpc')
-rw-r--r--powerpc/Archi.v25
-rw-r--r--powerpc/Asm.v16
-rw-r--r--powerpc/AsmToJSON.ml1
-rw-r--r--powerpc/Asmexpand.ml77
-rw-r--r--powerpc/Asmgen.v67
-rw-r--r--powerpc/Asmgenproof.v37
-rw-r--r--powerpc/Asmgenproof1.v230
-rw-r--r--powerpc/Machregs.v6
-rw-r--r--powerpc/NeedOp.v5
-rw-r--r--powerpc/Op.v43
-rw-r--r--powerpc/PrintOp.ml4
-rw-r--r--powerpc/SelectLongproof.v10
-rw-r--r--powerpc/SelectOp.vp14
-rw-r--r--powerpc/SelectOpproof.v21
-rw-r--r--powerpc/TargetPrinter.ml20
-rw-r--r--powerpc/ValueAOp.v2
16 files changed, 470 insertions, 108 deletions
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index 5d11aad1..d792e4fe 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -17,8 +17,8 @@
(** Architecture-dependent parameters for PowerPC *)
Require Import ZArith.
-Require Import Fappli_IEEE.
-Require Import Fappli_IEEE_bits.
+(*From Flocq*)
+Require Import Binary Bits.
Definition ptr64 := false.
@@ -37,21 +37,24 @@ Proof.
reflexivity.
Qed.
-Program Definition default_pl_64 : bool * nan_pl 53 :=
- (false, iter_nat 51 _ xO xH).
+Definition default_nan_64 : { x : binary64 | is_nan _ _ x = true } :=
+ exist _ (B754_nan 53 1024 false (iter_nat 51 _ xO xH) (eq_refl true)) (eq_refl true).
-Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (pl1 pl2 : positive) :=
false. (**r always choose first NaN *)
-Program Definition default_pl_32 : bool * nan_pl 24 :=
- (false, iter_nat 22 _ xO xH).
+Definition default_nan_32 : { x : binary32 | is_nan _ _ x = true } :=
+ exist _ (B754_nan 24 128 false (iter_nat 22 _ xO xH) (eq_refl true)) (eq_refl true).
-Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+Definition choose_binop_pl_32 (pl1 pl2 : positive) :=
false. (**r always choose first NaN *)
+Definition fpu_returns_default_qNaN := false.
+
Definition float_of_single_preserves_sNaN := true.
Global Opaque ptr64 big_endian splitlong
- default_pl_64 choose_binop_pl_64
- default_pl_32 choose_binop_pl_32
- float_of_single_preserves_sNaN. \ No newline at end of file
+ default_nan_64 choose_binop_pl_64
+ default_nan_32 choose_binop_pl_32
+ fpu_returns_default_qNaN
+ float_of_single_preserves_sNaN.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index ad24f563..b9300fd7 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -231,6 +231,7 @@ Inductive instruction : Type :=
| Pfres: freg -> freg -> instruction (**r approximate inverse *)
| Pfsel: freg -> freg -> freg -> freg -> instruction (**r FP conditional move *)
| Pisel: ireg -> ireg -> ireg -> crbit -> instruction (**r integer select *)
+ | Pfsel_gen: freg -> freg -> freg -> crbit -> instruction (**r floating point select *)
| Pisync: instruction (**r ISYNC barrier *)
| Picbi: ireg -> ireg -> instruction (**r instruction cache invalidate *)
| Picbtls: int -> ireg -> ireg -> instruction (**r instruction cache block touch and lock set *)
@@ -860,6 +861,20 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
Next (nextinstr (rs#rd <- (Val.subf rs#r1 rs#r2))) m
| Pfsubs rd r1 r2 =>
Next (nextinstr (rs#rd <- (Val.subfs rs#r1 rs#r2))) m
+ | Pisel rd r1 r2 bit =>
+ let v :=
+ match rs#(reg_of_crbit bit) with
+ | Vint n => if Int.eq n Int.zero then rs#r2 else rs#r1
+ | _ => Vundef
+ end in
+ Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m
+ | Pfsel_gen rd r1 r2 bit =>
+ let v :=
+ match rs#(reg_of_crbit bit) with
+ | Vint n => if Int.eq n Int.zero then rs#r2 else rs#r1
+ | _ => Vundef
+ end in
+ Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m
| Plbz rd cst r1 =>
load1 Mint8unsigned rd cst r1 rs m
| Plbzx rd r1 r2 =>
@@ -1073,7 +1088,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pfrsqrte _ _
| Pfres _ _
| Pfsel _ _ _ _
- | Pisel _ _ _ _
| Plwarx _ _ _
| Plwbrx _ _ _
| Picbi _ _
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index ee3eaca8..99c51e43 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -228,6 +228,7 @@ let pp_instructions pp ic =
| Pfres (fr1,fr2) -> instruction pp "Pfres" [Freg fr1; Freg fr2]
| Pfsel (fr1,fr2,fr3,fr4) -> instruction pp "Pfsel" [Freg fr1; Freg fr2; Freg fr3; Freg fr4]
| Pisel (ir1,ir2,ir3,cr) -> instruction pp "Pisel" [Ireg ir1; Ireg ir2; Ireg ir3; Crbit cr]
+ | Pfsel_gen _ -> assert false (* Should not occur *)
| Picbi (ir1,ir2) -> instruction pp "Picbi" [Ireg ir1; Ireg ir2]
| Picbtls (n,ir1,ir2) -> instruction pp "Picbtls" [Constant (Cint n);Ireg ir1; Ireg ir2]
| Pisync -> instruction pp "Pisync" []
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
index 49a0d237..415b6651 100644
--- a/powerpc/Asmexpand.ml
+++ b/powerpc/Asmexpand.ml
@@ -36,12 +36,14 @@ let _2 = coqint_of_camlint 2l
let _4 = coqint_of_camlint 4l
let _6 = coqint_of_camlint 6l
let _8 = coqint_of_camlint 8l
+let _16 = coqint_of_camlint 16l
let _31 = coqint_of_camlint 31l
let _32 = coqint_of_camlint 32l
let _64 = coqint_of_camlint 64l
let _m1 = coqint_of_camlint (-1l)
let _m4 = coqint_of_camlint (-4l)
let _m8 = coqint_of_camlint (-8l)
+let _m16 = coqint_of_camlint (-16l)
let _0L = Integers.Int64.zero
let _32L = coqint_of_camlint64 32L
@@ -56,6 +58,15 @@ let emit_loadimm r n =
let emit_addimm rd rs n =
List.iter emit (Asmgen.addimm rd rs n [])
+(* Numbering of bits in the CR register *)
+
+let num_crbit = function
+ | CRbit_0 -> 0
+ | CRbit_1 -> 1
+ | CRbit_2 -> 2
+ | CRbit_3 -> 3
+ | CRbit_6 -> 6
+
(* Handling of annotations *)
let expand_annot_val kind txt targ args res =
@@ -77,8 +88,6 @@ let expand_annot_val kind txt targ args res =
So, use 64-bit accesses only if alignment >= 4.
Note that lfd and stfd cannot trap on ill-formed floats. *)
-
-
let offset_in_range ofs =
Int.eq (Asmgen.high_s ofs) _0
@@ -410,10 +419,21 @@ let expand_builtin_va_start r =
let expand_int64_arith conflict rl fn =
if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl
-(* Expansion of integer conditional moves (__builtin_*sel) *)
+(* Expansion of integer conditional moves (__builtin_*sel and Pisel) *)
(* The generated code works equally well with 32-bit integer registers
and with 64-bit integer registers. *)
+let expand_integer_cond_move_1 a2 a3 res =
+ (* GPR0 is -1 (all ones) if condition is true, 0 if it is false *)
+ if res <> a3 then begin
+ emit (Pand_ (res, a2, GPR0));
+ emit (Pandc (GPR0, a3, GPR0))
+ end else begin
+ emit (Pandc (res, a3, GPR0));
+ emit (Pand_ (GPR0, a2, GPR0))
+ end;
+ emit (Por (res, res, GPR0))
+
let expand_integer_cond_move a1 a2 a3 res =
if a2 = a3 then
emit (Pmr (res, a2))
@@ -423,15 +443,37 @@ let expand_integer_cond_move a1 a2 a3 res =
end else begin
(* a1 has type _Bool, hence it is 0 or 1 *)
emit (Psubfic (GPR0, a1, Cint _0));
- (* r0 = -1 (all ones) if a1 is true, r0 = 0 if a1 is false *)
- if res <> a3 then begin
- emit (Pand_ (res, a2, GPR0));
- emit (Pandc (GPR0, a3, GPR0))
- end else begin
- emit (Pandc (res, a3, GPR0));
- emit (Pand_ (GPR0, a2, GPR0))
- end;
- emit (Por (res, res, GPR0))
+ expand_integer_cond_move_1 a2 a3 res
+ end
+
+
+(* Expansion of floating point conditional moves (Pfcmove) *)
+
+let expand_float_cond_move bit a2 a3 res =
+ emit (Pmfcr GPR0);
+ emit (Prlwinm(GPR0, GPR0, Z.of_uint (4 + num_crbit bit), _8));
+ emit (Pstfdu (a3, Cint (_m16), GPR1));
+ emit (Pcfi_adjust _16);
+ emit (Pstfd (a2, Cint (_8), GPR1));
+ emit (Plfdx (res, GPR1, GPR0));
+ emit (Paddi (GPR1, GPR1, (Cint _16)));
+ emit (Pcfi_adjust _m16)
+
+
+
+(* Symmetrically, we emulate the "isel" instruction on PPC processors
+ that do not have it. *)
+
+let expand_isel bit a2 a3 res =
+ if a2 = a3 then
+ emit (Pmr (res, a2))
+ else if eref then
+ emit (Pisel (res, a2, a3, bit))
+ else begin
+ emit (Pmfcr GPR0);
+ emit (Prlwinm(GPR0, GPR0, Z.of_uint (1 + num_crbit bit), _1));
+ emit (Psubfic (GPR0, GPR0, Cint _0));
+ expand_integer_cond_move_1 a2 a3 res
end
(* Convert integer constant into GPR with corresponding number *)
@@ -772,13 +814,6 @@ let set_cr6 sg =
(* Expand instructions *)
-let num_crbit = function
- | CRbit_0 -> 0
- | CRbit_1 -> 1
- | CRbit_2 -> 2
- | CRbit_3 -> 3
- | CRbit_6 -> 6
-
let expand_instruction instr =
match instr with
| Pallocframe(sz, ofs,retofs) ->
@@ -874,6 +909,10 @@ let expand_instruction instr =
emit (Pcfi_adjust _m8);
| Pfxdp(r1, r2) ->
if r1 <> r2 then emit(Pfmr(r1, r2))
+ | Pisel(rd, r1, r2, bit) ->
+ expand_isel bit r1 r2 rd
+ | Pfsel_gen (rd, r1, r2, bit) ->
+ expand_float_cond_move bit r1 r2 rd
| Plmake(r1, rhi, rlo) ->
if r1 = rlo then
emit (Prldimi(r1, rhi, _32L, upper32))
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 8c296f0a..a686414a 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -125,17 +125,35 @@ Definition rolm (r1 r2: ireg) (amount mask: int) (k: code) :=
Definition low64_u (n: int64) := Int64.zero_ext 16 n.
Definition low64_s (n: int64) := Int64.sign_ext 16 n.
-Definition loadimm64 (r: ireg) (n: int64) (k: code) :=
+Definition loadimm64_32s (r: ireg) (n: int64) (k: code) :=
let lo_u := low64_u n in
let lo_s := low64_s n in
- let hi_s := Int64.sign_ext 16 (Int64.shr n (Int64.repr 16)) in
+ let hi_s := low64_s (Int64.shr n (Int64.repr 16)) in
if Int64.eq n lo_s then
Paddi64 r GPR0 n :: k
- else if Int64.eq n (Int64.or (Int64.shl hi_s (Int64.repr 16)) lo_u) then
- Paddis64 r GPR0 hi_s :: Pori64 r r lo_u :: k
+ else
+ Paddis64 r GPR0 hi_s :: Pori64 r r lo_u :: k.
+
+Definition loadimm64 (r: ireg) (n: int64) (k: code) :=
+ if Int64.eq n (Int64.sign_ext 32 n) then
+ loadimm64_32s r n k
else
Pldi r n :: k.
+(** [loadimm64_notemp] is a variant of [loadimm64] that uses no temporary
+ register. The code it produces is larger and slower than the code
+ produced by [loadimm64], but it is sometimes useful to preserve all registers
+ except the destination. *)
+
+Definition loadimm64_notemp (r: ireg) (n: int64) (k: code) :=
+ if Int64.eq n (Int64.sign_ext 32 n) then
+ loadimm64_32s r n k
+ else
+ loadimm64_32s r (Int64.shru n (Int64.repr 32))
+ (Prldinm r r (Int.repr 32) (Int64.shl Int64.mone (Int64.repr 32)) ::
+ Poris64 r r (low64_u (Int64.shru n (Int64.repr 16))) ::
+ Pori64 r r (low64_u n) :: k).
+
Definition opimm64 (insn2: ireg -> ireg -> ireg -> instruction)
(insn1: ireg -> ireg -> int64 -> instruction)
(r1 r2: ireg) (n: int64) (ok: bool) (k: code) :=
@@ -261,18 +279,14 @@ Definition transl_cond
do r1 <- ireg_of a1;
if Int64.eq n (low64_s n) then
OK (Pcmpdi r1 n :: k)
- else if ireg_eq r1 GPR12 then
- OK (Pmr GPR0 GPR12 :: loadimm64 GPR12 n (Pcmpd GPR0 GPR12 :: k))
else
- OK (loadimm64 GPR0 n (Pcmpd r1 GPR0 :: k))
+ OK (loadimm64_notemp GPR0 n (Pcmpd r1 GPR0 :: k))
| Ccompluimm c n, a1 :: nil =>
do r1 <- ireg_of a1;
if Int64.eq n (low64_u n) then
OK (Pcmpldi r1 n :: k)
- else if ireg_eq r1 GPR12 then
- OK (Pmr GPR0 GPR12 :: loadimm64 GPR12 n (Pcmpld GPR0 GPR12 :: k))
else
- OK (loadimm64 GPR0 n (Pcmpld r1 GPR0 :: k))
+ OK (loadimm64_notemp GPR0 n (Pcmpld r1 GPR0 :: k))
| _, _ =>
Error(msg "Asmgen.transl_cond")
end.
@@ -390,6 +404,28 @@ Definition transl_cond_op
else Pxori r' r' (Cint Int.one) :: k)
end.
+(** Translation of a select operation *)
+
+Definition transl_select_op
+ (cond: condition) (args: list mreg) (r1 r2 rd: ireg) (k: code) :=
+ if ireg_eq r1 r2 then
+ OK (Pmr rd r1 :: k)
+ else
+ (let p := crbit_for_cond cond in
+ let r1' := if snd p then r1 else r2 in
+ let r2' := if snd p then r2 else r1 in
+ transl_cond cond args (Pisel rd r1' r2' (fst p) :: k)).
+
+Definition transl_fselect_op
+ (cond: condition) (args: list mreg) (r1 r2 rd: freg) (k: code) :=
+ if freg_eq r1 r2 then
+ OK (Pfmr rd r1 :: k)
+ else
+ (let p := crbit_for_cond cond in
+ let r1' := if snd p then r1 else r2 in
+ let r2' := if snd p then r2 else r1 in
+ transl_cond cond args (Pfsel_gen rd r1' r2' (fst p) :: k)).
+
(** Translation of the arithmetic operation [r <- op(args)].
The corresponding instructions are prepended to [k]. *)
@@ -596,6 +632,17 @@ Definition transl_op
do r1 <- ireg_of a1; do r <- ireg_of res; OK (Plhi r r1 :: k)
| Ocmp cmp, _ =>
transl_cond_op cmp args res k
+ | Osel cmp ty, a1 :: a2 :: args =>
+ match preg_of res with
+ | IR r1 =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res;
+ transl_select_op cmp args r1 r2 r k
+ | FR r =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
+ transl_fselect_op cmp args r1 r2 r k
+ | _ =>
+ Error (msg "Asmgen.Osel")
+ end
(*c PPC64 operations *)
| Olongconst n, nil =>
do r <- ireg_of res; OK (loadimm64 r n k)
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 8ad28aea..d653633c 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -179,14 +179,28 @@ Proof.
Qed.
Hint Resolve rolm_label: labels.
+Remark loadimm64_32s_label:
+ forall r n k, tail_nolabel k (loadimm64_32s r n k).
+Proof.
+ unfold loadimm64_32s; intros. destruct Int64.eq; TailNoLabel.
+Qed.
+Hint Resolve loadimm64_32s_label: labels.
+
Remark loadimm64_label:
forall r n k, tail_nolabel k (loadimm64 r n k).
Proof.
- unfold loadimm64; intros.
- destruct Int64.eq. TailNoLabel. destruct Int64.eq; TailNoLabel.
+ unfold loadimm64; intros. destruct Int64.eq; TailNoLabel.
Qed.
Hint Resolve loadimm64_label: labels.
+Remark loadimm64_notemp_label:
+ forall r n k, tail_nolabel k (loadimm64_notemp r n k).
+Proof.
+ unfold loadimm64_notemp; intros. destruct Int64.eq; TailNoLabel.
+ eapply tail_nolabel_trans; TailNoLabel.
+Qed.
+Hint Resolve loadimm64_notemp_label: labels.
+
Remark loadind_label:
forall base ofs ty dst k c,
loadind base ofs ty dst k = OK c -> tail_nolabel k c.
@@ -234,6 +248,24 @@ Proof.
destruct (snd (crbit_for_cond c0)); TailNoLabel.
Qed.
+Remark transl_select_op_label:
+ forall cond args r1 r2 rd k c,
+ transl_select_op cond args r1 r2 rd k = OK c -> tail_nolabel k c.
+Proof.
+ unfold transl_select_op; intros. destruct (ireg_eq r1 r2).
+ TailNoLabel.
+ eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
+Qed.
+
+Remark transl_fselect_op_label:
+ forall cond args r1 r2 rd k c,
+ transl_fselect_op cond args r1 r2 rd k = OK c -> tail_nolabel k c.
+Proof.
+ unfold transl_fselect_op; intros. destruct (freg_eq r1 r2).
+ TailNoLabel.
+ eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
+Qed.
+
Remark transl_op_label:
forall op args r k c,
transl_op op args r k = OK c -> tail_nolabel k c.
@@ -261,6 +293,7 @@ Opaque Int.eq.
destruct Int64.eq. TailNoLabel.
destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel.
- eapply transl_cond_op_label; eauto.
+- destruct (preg_of r); monadInv H. eapply transl_select_op_label; eauto. eapply transl_fselect_op_label; eauto.
Qed.
Remark transl_memory_access_label:
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index c18757b2..884d5366 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -16,6 +16,7 @@ Require Import Coqlib.
Require Import Errors.
Require Import Maps.
Require Import AST.
+Require Import Zbits.
Require Import Integers.
Require Import Floats.
Require Import Values.
@@ -80,13 +81,13 @@ Proof.
unfold Int.modu, Int.zero. decEq.
change 0 with (0 mod 65536).
change (Int.unsigned (Int.repr 65536)) with 65536.
- apply Int.eqmod_mod_eq. omega.
- unfold x, low_s. eapply Int.eqmod_trans.
- apply Int.eqmod_divides with Int.modulus.
+ apply eqmod_mod_eq. omega.
+ unfold x, low_s. eapply eqmod_trans.
+ apply eqmod_divides with Int.modulus.
unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
exists 65536. compute; auto.
replace 0 with (Int.unsigned n - Int.unsigned n) by omega.
- apply Int.eqmod_sub. apply Int.eqmod_refl. apply Int.eqmod_sign_ext'.
+ apply eqmod_sub. apply eqmod_refl. apply Int.eqmod_sign_ext'.
compute; auto.
rewrite H0 in H. rewrite Int.add_zero in H.
rewrite <- H. unfold x. rewrite Int.sub_add_opp. rewrite Int.add_assoc.
@@ -531,6 +532,40 @@ Qed.
(** Load int64 constant. *)
+Lemma loadimm64_32s_correct:
+ forall r n k rs m,
+ exists rs',
+ exec_straight ge fn (loadimm64_32s r n k) rs m k rs' m
+ /\ rs'#r = Vlong (Int64.sign_ext 32 n)
+ /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+ unfold loadimm64_32s; intros. predSpec Int64.eq Int64.eq_spec n (low64_s n).
+ - econstructor; split; [|split].
+ + apply exec_straight_one. simpl; eauto. auto.
+ + Simpl. rewrite Int64.add_zero_l. rewrite H. unfold low64_s.
+ rewrite Int64.sign_ext_widen by omega. auto.
+ + intros; Simpl.
+ - econstructor; split; [|split].
+ + eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+ + Simpl. simpl. f_equal. rewrite Int64.add_zero_l.
+ apply Int64.same_bits_eq; intros. assert (Int64.zwordsize = 64) by auto.
+ rewrite Int64.bits_or, Int64.bits_shl by auto.
+ unfold low64_s, low64_u.
+ rewrite Int64.bits_zero_ext by omega.
+ change (Int64.unsigned (Int64.repr 16)) with 16.
+ destruct (zlt i 16).
+ * rewrite Int64.bits_sign_ext by omega. rewrite zlt_true by omega. auto.
+ * rewrite ! Int64.bits_sign_ext by omega. rewrite orb_false_r.
+ destruct (zlt i 32).
+ ** rewrite zlt_true by omega. rewrite Int64.bits_shr by omega.
+ change (Int64.unsigned (Int64.repr 16)) with 16.
+ rewrite zlt_true by omega. f_equal; omega.
+ ** rewrite zlt_false by omega. rewrite Int64.bits_shr by omega.
+ change (Int64.unsigned (Int64.repr 16)) with 16.
+ reflexivity.
+ + intros; Simpl.
+Qed.
+
Lemma loadimm64_correct:
forall r n k rs m,
exists rs',
@@ -539,20 +574,78 @@ Lemma loadimm64_correct:
/\ forall r': preg, r' <> r -> r' <> GPR12 -> r' <> PC -> rs'#r' = rs#r'.
Proof.
intros. unfold loadimm64.
- set (hi_s := Int64.sign_ext 16 (Int64.shr n (Int64.repr 16))).
- set (hi' := Int64.shl hi_s (Int64.repr 16)).
- destruct (Int64.eq n (low64_s n)).
- (* addi *)
- econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- rewrite Int64.add_zero_l. intuition Simpl.
- (* addis + ori *)
- predSpec Int64.eq Int64.eq_spec n (Int64.or hi' (low64_u n)).
- econstructor; split. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
- split. Simpl. rewrite Int64.add_zero_l. simpl; f_equal; auto.
- intros. Simpl.
- (* ldi *)
- econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- intuition Simpl.
+ predSpec Int64.eq Int64.eq_spec n (Int64.sign_ext 32 n).
+ - destruct (loadimm64_32s_correct r n k rs m) as (rs' & A & B & C).
+ exists rs'; intuition auto. congruence.
+ - econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ intuition Simpl.
+Qed.
+
+(** Alternate load int64 immediate *)
+
+Lemma loadimm64_notemp_correct:
+ forall r n k rs m,
+ exists rs',
+ exec_straight ge fn (loadimm64_notemp r n k) rs m k rs' m
+ /\ rs'#r = Vlong n
+ /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'.
+Proof.
+ intros. unfold loadimm64_notemp.
+ predSpec Int64.eq Int64.eq_spec n (Int64.sign_ext 32 n).
+- destruct (loadimm64_32s_correct r n k rs m) as (rs' & A & B & C).
+ exists rs'; intuition auto. congruence.
+- set (n2 := Int64.shru n (Int64.repr 32)).
+ set (n1 := Int64.zero_ext 16 (Int64.shru n (Int64.repr 16))).
+ set (n0 := Int64.zero_ext 16 n).
+ set (mi := Int64.shl n1 (Int64.repr 16)).
+ set (hi := Int64.shl (Int64.sign_ext 32 n2) (Int64.repr 32)).
+ assert (HI: forall i, 0 <= i < Int64.zwordsize ->
+ Int64.testbit hi i = if zlt i 32 then false else Int64.testbit n i).
+ { intros. unfold hi. assert (Int64.zwordsize = 64) by auto.
+ rewrite Int64.bits_shl by auto.
+ change (Int64.unsigned (Int64.repr 32)) with 32.
+ destruct (zlt i 32); auto.
+ rewrite Int64.bits_sign_ext by omega.
+ rewrite zlt_true by omega.
+ unfold n2. rewrite Int64.bits_shru by omega.
+ change (Int64.unsigned (Int64.repr 32)) with 32.
+ rewrite zlt_true by omega. f_equal; omega.
+ }
+ assert (MI: forall i, 0 <= i < Int64.zwordsize ->
+ Int64.testbit mi i =
+ if zlt i 16 then false
+ else if zlt i 32 then Int64.testbit n i else false).
+ { intros. unfold mi. assert (Int64.zwordsize = 64) by auto.
+ rewrite Int64.bits_shl by auto.
+ change (Int64.unsigned (Int64.repr 16)) with 16.
+ destruct (zlt i 16); auto.
+ unfold n1. rewrite Int64.bits_zero_ext by omega.
+ rewrite Int64.bits_shru by omega.
+ destruct (zlt i 32).
+ rewrite zlt_true by omega.
+ change (Int64.unsigned (Int64.repr 16)) with 16.
+ rewrite zlt_true by omega. f_equal; omega.
+ rewrite zlt_false by omega. auto.
+ }
+ assert (EQ: Int64.or (Int64.or hi mi) n0 = n).
+ { apply Int64.same_bits_eq; intros.
+ rewrite ! Int64.bits_or by auto.
+ unfold n0; rewrite Int64.bits_zero_ext by omega.
+ rewrite HI, MI by auto.
+ destruct (zlt i 16).
+ rewrite zlt_true by omega. auto.
+ destruct (zlt i 32); rewrite ! orb_false_r; auto.
+ }
+ edestruct (loadimm64_32s_correct r n2) as (rs' & A & B & C).
+ econstructor; split; [|split].
+ + eapply exec_straight_trans. eexact A.
+ eapply exec_straight_three.
+ simpl. rewrite B. simpl; auto.
+ simpl; auto.
+ simpl; auto.
+ reflexivity. reflexivity. reflexivity.
+ + Simpl. simpl. f_equal. rewrite <- Int64.shl_rolm by auto. exact EQ.
+ + intros; Simpl.
Qed.
(** Add int64 immediate. *)
@@ -889,7 +982,7 @@ Lemma transl_cond_correct_1:
(if snd (crbit_for_cond cond)
then Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)
else Val.notbool (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)))
- /\ forall r, important_preg r = true -> preg_notin r (destroyed_by_cond cond) -> rs'#r = rs#r.
+ /\ forall r, important_preg r = true -> rs'#r = rs#r.
Proof.
intros.
Opaque Int.eq.
@@ -991,20 +1084,12 @@ Opaque Int.eq.
auto with asmgen.
- (* Ccomplimm *)
rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmpl_bool.
- destruct (Int64.eq i (low64_s i)); [|destruct (ireg_eq x GPR12)]; inv EQ0.
+ destruct (Int64.eq i (low64_s i)); inv EQ0.
+ destruct (compare_slong_spec rs (rs x) (Vlong i)) as [A [B [C D]]].
econstructor; split.
apply exec_straight_one. simpl; reflexivity. reflexivity.
split. case c0; simpl; auto. auto with asmgen.
-+ destruct (loadimm64_correct GPR12 i (Pcmpd GPR0 GPR12 :: k) (nextinstr (rs#GPR0 <- (rs#GPR12))) m) as [rs1 [EX1 [RES1 OTH1]]].
- destruct (compare_slong_spec rs1 (rs GPR12) (Vlong i)) as [A [B [C D]]].
- assert (SAME: rs1 GPR0 = rs GPR12) by (apply OTH1; eauto with asmgen).
- econstructor; split.
- eapply exec_straight_step. simpl;reflexivity. reflexivity.
- eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
- split. rewrite RES1, SAME. destruct c0; simpl; auto.
- simpl; intros. rewrite RES1, SAME. rewrite D by eauto with asmgen. rewrite OTH1 by eauto with asmgen. Simpl.
-+ destruct (loadimm64_correct GPR0 i (Pcmpd x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
++ destruct (loadimm64_notemp_correct GPR0 i (Pcmpd x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
destruct (compare_slong_spec rs1 (rs x) (Vlong i)) as [A [B [C D]]].
assert (SAME: rs1 x = rs x) by (apply OTH1; eauto with asmgen).
econstructor; split.
@@ -1013,20 +1098,12 @@ Opaque Int.eq.
simpl; intros. rewrite RES1, SAME. rewrite D; eauto with asmgen.
- (* Ccompluimm *)
rewrite <- Val.notbool_negb_3. rewrite <- Val.negate_cmplu_bool.
- destruct (Int64.eq i (low64_u i)); [|destruct (ireg_eq x GPR12)]; inv EQ0.
+ destruct (Int64.eq i (low64_u i)); inv EQ0.
+ destruct (compare_ulong_spec rs m (rs x) (Vlong i)) as [A [B [C D]]].
econstructor; split.
apply exec_straight_one. simpl; reflexivity. reflexivity.
split. case c0; simpl; auto. auto with asmgen.
-+ destruct (loadimm64_correct GPR12 i (Pcmpld GPR0 GPR12 :: k) (nextinstr (rs#GPR0 <- (rs#GPR12))) m) as [rs1 [EX1 [RES1 OTH1]]].
- destruct (compare_ulong_spec rs1 m (rs GPR12) (Vlong i)) as [A [B [C D]]].
- assert (SAME: rs1 GPR0 = rs GPR12) by (apply OTH1; eauto with asmgen).
- econstructor; split.
- eapply exec_straight_step. simpl;reflexivity. reflexivity.
- eapply exec_straight_trans. eexact EX1. eapply exec_straight_one. simpl;reflexivity. reflexivity.
- split. rewrite RES1, SAME. destruct c0; simpl; auto.
- simpl; intros. rewrite RES1, SAME. rewrite D by eauto with asmgen. rewrite OTH1 by eauto with asmgen. Simpl.
-+ destruct (loadimm64_correct GPR0 i (Pcmpld x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
++ destruct (loadimm64_notemp_correct GPR0 i (Pcmpld x GPR0 :: k) rs m) as [rs1 [EX1 [RES1 OTH1]]].
destruct (compare_ulong_spec rs1 m (rs x) (Vlong i)) as [A [B [C D]]].
assert (SAME: rs1 x = rs x) by (apply OTH1; eauto with asmgen).
econstructor; split.
@@ -1045,7 +1122,7 @@ Lemma transl_cond_correct_2:
(if snd (crbit_for_cond cond)
then Val.of_bool b
else Val.notbool (Val.of_bool b))
- /\ forall r, important_preg r = true -> preg_notin r (destroyed_by_cond cond) -> rs'#r = rs#r.
+ /\ forall r, important_preg r = true -> rs'#r = rs#r.
Proof.
intros.
replace (Val.of_bool b)
@@ -1072,7 +1149,8 @@ Proof.
exploit transl_cond_correct_2. eauto.
eapply eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto.
intros [rs' [A [B C]]].
- exists rs'; split. eauto. split. auto. apply agree_undef_regs with rs; auto. intros r D.
+ exists rs'; split. eauto. split. auto.
+ apply agree_undef_regs with rs; auto. intros r D E.
apply C. apply important_data_preg_1; auto.
Qed.
@@ -1180,6 +1258,64 @@ Proof.
intuition Simpl.
rewrite RES1. destruct (eval_condition c rs ## (preg_of ## rl) m). destruct b; auto. auto.
Qed.
+
+Lemma transl_select_op_correct:
+ forall cond args ty r1 r2 rd k rs m c,
+ transl_select_op cond args r1 r2 rd k = OK c ->
+ important_preg rd = true -> important_preg r1 = true -> important_preg r2 = true ->
+ exists rs',
+ exec_straight ge fn c rs m k rs' m
+ /\ Val.lessdef (Val.select (eval_condition cond (map rs (map preg_of args)) m) rs#r1 rs#r2 ty) rs'#rd
+ /\ forall r, important_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until c. intros TR IMP1 IMP2 IMP3.
+ unfold transl_select_op in TR.
+ destruct (ireg_eq r1 r2).
+ - inv TR. econstructor; split; [|split].
+ + apply exec_straight_one. simpl; eauto. auto.
+ + Simpl. destruct (eval_condition cond rs ## (preg_of ## args) m) as [[]|]; simpl; auto using Val.lessdef_normalize.
+ + intros; Simpl.
+ - destruct (transl_cond_correct_1 cond args _ rs m _ TR) as (rs1 & A & B & C).
+ set (bit := fst (crbit_for_cond cond)) in *.
+ set (dir := snd (crbit_for_cond cond)) in *.
+ set (ob := eval_condition cond rs##(preg_of##args) m) in *.
+ econstructor; split; [|split].
+ + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+ reflexivity.
+ + Simpl.
+ rewrite <- (C r1), <- (C r2) by auto.
+ rewrite B. destruct dir; destruct ob as [[]|]; simpl; auto using Val.lessdef_normalize.
+ + intros. Simpl.
+Qed.
+
+Lemma transl_fselect_op_correct:
+ forall cond args ty r1 r2 rd k rs m c,
+ transl_fselect_op cond args r1 r2 rd k = OK c ->
+ important_preg rd = true -> important_preg r1 = true -> important_preg r2 = true ->
+ exists rs',
+ exec_straight ge fn c rs m k rs' m
+ /\ Val.lessdef (Val.select (eval_condition cond (map rs (map preg_of args)) m) rs#r1 rs#r2 ty) rs'#rd
+ /\ forall r, important_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until c. intros TR IMP1 IMP2 IMP3.
+ unfold transl_fselect_op in TR.
+ destruct (freg_eq r1 r2).
+ - inv TR. econstructor; split; [|split].
+ + apply exec_straight_one. simpl; eauto. auto.
+ + Simpl. destruct (eval_condition cond rs ## (preg_of ## args) m) as [[]|]; simpl; auto using Val.lessdef_normalize.
+ + intros; Simpl.
+ - destruct (transl_cond_correct_1 cond args _ rs m _ TR) as (rs1 & A & B & C).
+ set (bit := fst (crbit_for_cond cond)) in *.
+ set (dir := snd (crbit_for_cond cond)) in *.
+ set (ob := eval_condition cond rs##(preg_of##args) m) in *.
+ econstructor; split; [|split].
+ + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+ reflexivity.
+ + Simpl.
+ rewrite <- (C r1), <- (C r2) by auto.
+ rewrite B. destruct dir; destruct ob as [[]|]; simpl; auto using Val.lessdef_normalize.
+ + intros. Simpl.
+Qed.
(** Translation of arithmetic operations. *)
@@ -1377,6 +1513,18 @@ Opaque Val.add.
(* Ocmp *)
- destruct (transl_cond_op_correct c0 args res k rs m c) as [rs' [A [B C]]]; auto.
exists rs'; auto with asmgen.
+ (* Osel *)
+- assert (X: forall mr r, ireg_of mr = OK r -> important_preg r = true).
+ { intros. apply ireg_of_eq in H0. apply important_data_preg_1. rewrite <- H0.
+ auto with asmgen. }
+ assert (Y: forall mr r, freg_of mr = OK r -> important_preg r = true).
+ { intros. apply freg_of_eq in H0. apply important_data_preg_1. rewrite <- H0.
+ auto with asmgen. }
+ destruct (preg_of res) eqn:RES; monadInv H; rewrite <- RES.
+ + rewrite (ireg_of_eq _ _ EQ), (ireg_of_eq _ _ EQ0), (ireg_of_eq _ _ EQ1) in *.
+ destruct (transl_select_op_correct _ _ t _ _ _ _ rs m _ EQ3) as (rs' & A & B & C); eauto.
+ + rewrite (freg_of_eq _ _ EQ), (freg_of_eq _ _ EQ0), (freg_of_eq _ _ EQ1) in *.
+ destruct (transl_fselect_op_correct _ _ t _ _ _ _ rs m _ EQ3) as (rs' & A & B & C); eauto.
Qed.
Lemma transl_op_correct:
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index 53d99e2f..e7c8758b 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -159,11 +159,7 @@ Definition register_by_name (s: string) : option mreg :=
(** ** Destroyed registers, preferred registers *)
-Definition destroyed_by_cond (cond: condition): list mreg :=
- match cond with
- | Ccomplimm _ _ | Ccompluimm _ _ => R12 :: nil
- | _ => nil
- end.
+Definition destroyed_by_cond (cond: condition): list mreg := nil.
Definition destroyed_by_op (op: operation): list mreg :=
match op with
diff --git a/powerpc/NeedOp.v b/powerpc/NeedOp.v
index 9a579cc5..5ea09bd8 100644
--- a/powerpc/NeedOp.v
+++ b/powerpc/NeedOp.v
@@ -65,6 +65,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
| Ofloatofwords | Omakelong => op2 (default nv)
| Olowlong | Ohighlong => op1 (default nv)
| Ocmp c => needs_of_condition c
+ | Osel c ty => nv :: nv :: needs_of_condition c
end.
Definition operation_is_redundant (op: operation) (nv: nval): bool :=
@@ -147,6 +148,10 @@ Proof.
erewrite needs_of_condition_sound by eauto.
subst v; simpl. auto with na.
subst v; auto with na.
+- destruct (eval_condition c args m) as [b|] eqn:EC.
+ erewrite needs_of_condition_sound by eauto.
+ apply select_sound; auto.
+ simpl; auto with na.
Qed.
Lemma operation_is_redundant_sound:
diff --git a/powerpc/Op.v b/powerpc/Op.v
index e6f942c1..0f082c1f 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -150,8 +150,9 @@ Inductive operation : Type :=
| Olowlong: operation (**r [rd = low-word(r1)] *)
| Ohighlong: operation (**r [rd = high-word(r1)] *)
(*c Boolean tests: *)
- | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
-
+ | Ocmp: condition -> operation (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+ | Osel: condition -> typ -> operation.
+ (**r [rd = rs1] if condition holds, [rd = rs2] otherwise. *)
(** Addressing modes. [r1], [r2], etc, are the arguments to the
addressing. *)
@@ -173,7 +174,7 @@ Proof.
Defined.
Definition beq_operation: forall (x y: operation), bool.
- generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec ident_eq Float.eq_dec Float32.eq_dec eq_condition; boolean_equality.
+ generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec ident_eq Float.eq_dec Float32.eq_dec typ_eq eq_condition; boolean_equality.
Defined.
Definition eq_operation (x y: operation): {x=y} + {x<>y}.
@@ -306,6 +307,7 @@ Definition eval_operation
| Olowlong, v1::nil => Some(Val.loword v1)
| Ohighlong, v1::nil => Some(Val.hiword v1)
| Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
+ | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
| _, _ => None
end.
@@ -455,6 +457,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
| Olowlong => (Tlong :: nil, Tint)
| Ohighlong => (Tlong :: nil, Tint)
| Ocmp c => (type_of_condition c, Tint)
+ | Osel c ty => (ty :: ty :: type_of_condition c, ty)
end.
Definition type_of_addressing (addr: addressing) : list typ :=
@@ -575,6 +578,7 @@ Proof with (try exact I; try reflexivity).
destruct v0...
destruct v0...
destruct (eval_condition c vl m); simpl... destruct b...
+ unfold Val.select. destruct (eval_condition c vl m). apply Val.normalize_type. exact I.
Qed.
End SOUNDNESS.
@@ -727,22 +731,40 @@ Definition is_trivial_op (op: operation) : bool :=
(** Operations that depend on the memory state. *)
+Definition condition_depends_on_memory (c: condition) : bool :=
+ match c with
+ | Ccompu _ => true
+ | Ccompuimm _ _ => true
+ | Ccomplu _ => Archi.ppc64
+ | Ccompluimm _ _ => Archi.ppc64
+ | _ => false
+ end.
+
Definition op_depends_on_memory (op: operation) : bool :=
match op with
- | Ocmp (Ccompu _) => true
- | Ocmp (Ccompuimm _ _) => true
- | Ocmp (Ccomplu _) => Archi.ppc64
- | Ocmp (Ccompluimm _ _) => Archi.ppc64
+ | Ocmp c => condition_depends_on_memory c
+ | Osel c ty => condition_depends_on_memory c
| _ => false
end.
+Lemma condition_depends_on_memory_correct:
+ forall c args m1 m2,
+ condition_depends_on_memory c = false ->
+ eval_condition c args m1 = eval_condition c args m2.
+Proof.
+ intros. destruct c; simpl; auto; discriminate.
+Qed.
+
Lemma op_depends_on_memory_correct:
forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
op_depends_on_memory op = false ->
eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
Proof.
- intros until m2. destruct op; simpl; try congruence. unfold eval_condition.
- destruct c; simpl; auto; try discriminate.
+ intros until m2. destruct op; simpl; try congruence; intros C.
+- f_equal; f_equal; apply condition_depends_on_memory_correct; auto.
+- destruct args; auto. destruct args; auto.
+ rewrite (condition_depends_on_memory_correct c args m1 m2 C).
+ auto.
Qed.
(** Global variables mentioned in an operation or addressing mode *)
@@ -989,6 +1011,9 @@ Proof.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
destruct b; simpl; constructor.
simpl; constructor.
+ apply Val.select_inject; auto.
+ destruct (eval_condition c vl1 m1) eqn:?; auto.
+ right; symmetry; eapply eval_condition_inj; eauto.
Qed.
Lemma eval_addressing_inj:
diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml
index cffaafdb..8d7f17ab 100644
--- a/powerpc/PrintOp.ml
+++ b/powerpc/PrintOp.ml
@@ -110,6 +110,10 @@ let print_operation reg pp = function
| Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
| Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
| Ocmp c, args -> print_condition reg pp (c, args)
+ | Osel (c, ty), r1::r2::args ->
+ fprintf pp "%a ?%s %a : %a"
+ (print_condition reg) (c, args)
+ (PrintAST.name_of_type ty) reg r1 reg r2
| Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
| Ocast32signed, [r1] -> fprintf pp "int32signed(%a)" reg r1
| Ocast32unsigned, [r1] -> fprintf pp "int32unsigned(%a)" reg r1
diff --git a/powerpc/SelectLongproof.v b/powerpc/SelectLongproof.v
index b4e48596..eba071eb 100644
--- a/powerpc/SelectLongproof.v
+++ b/powerpc/SelectLongproof.v
@@ -12,7 +12,7 @@
(** Correctness of instruction selection for 64-bit integer operations *)
-Require Import String Coqlib Maps Integers Floats Errors.
+Require Import String Coqlib Maps Zbits Integers Floats Errors.
Require Archi.
Require Import AST Values Memory Globalenvs Events.
Require Import Cminor Op CminorSel.
@@ -222,11 +222,11 @@ Proof.
change (Int64.unsigned Int64.iwordsize) with 64.
f_equal.
rewrite Int.unsigned_repr.
- apply Int.eqmod_mod_eq. omega.
- apply Int.eqmod_trans with a.
- apply Int.eqmod_divides with Int.modulus. apply Int.eqm_sym. apply Int.eqm_unsigned_repr.
+ apply eqmod_mod_eq. omega.
+ apply eqmod_trans with a.
+ apply eqmod_divides with Int.modulus. apply Int.eqm_sym. apply Int.eqm_unsigned_repr.
exists (two_p (32-6)); auto.
- apply Int.eqmod_divides with Int64.modulus. apply Int64.eqm_unsigned_repr.
+ apply eqmod_divides with Int64.modulus. apply Int64.eqm_unsigned_repr.
exists (two_p (64-6)); auto.
assert (0 <= Int.unsigned (Int.repr a) mod 64 < 64) by (apply Z_mod_lt; omega).
assert (64 < Int.max_unsigned) by (compute; auto).
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 478ce251..b1cac124 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -44,6 +44,7 @@ Require Import Floats.
Require Import Op.
Require Import CminorSel.
Require Import OpHelpers.
+Require Archi.
Local Open Scope cminorsel_scope.
@@ -517,6 +518,19 @@ Definition singleofintu (e: expr) :=
Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+(** ** Selection *)
+
+Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) :=
+ if match ty with
+ | Tint => true
+ | Tfloat => true
+ | Tsingle => true
+ | Tlong => Archi.ppc64
+ | _ => false
+ end
+ then Some (Eop (Osel cond ty) (e1 ::: e2 ::: args))
+ else None.
+
(** ** Recognition of addressing modes for load and store operations *)
Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 00b91e70..92852d36 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -1004,6 +1004,27 @@ Proof.
exists (Val.singleoffloat v); split. EvalOp. inv D; auto.
Qed.
+Theorem eval_select:
+ forall le ty cond al vl a1 v1 a2 v2 a b,
+ select ty cond al a1 a2 = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ eval_expr ge sp e m le a1 v1 ->
+ eval_expr ge sp e m le a2 v2 ->
+ eval_condition cond vl m = Some b ->
+
+ exists v,
+ eval_expr ge sp e m le a v
+ /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
+Proof.
+ unfold select; intros.
+ destruct (match ty with Tint => true | Tfloat => true | Tsingle => true | Tlong => Archi.ppc64 | _ => false end); inv H.
+ exists (Val.select (Some b) v1 v2 ty); split.
+ apply eval_Eop with (v1 :: v2 :: vl).
+ constructor; auto. constructor; auto.
+ simpl. rewrite H3; auto.
+ auto.
+Qed.
+
Theorem eval_addressing:
forall le chunk a v b ofs,
eval_expr ge sp e m le a v ->
diff --git a/powerpc/TargetPrinter.ml b/powerpc/TargetPrinter.ml
index c1aaa55d..a1338561 100644
--- a/powerpc/TargetPrinter.ml
+++ b/powerpc/TargetPrinter.ml
@@ -118,13 +118,22 @@ module Linux_System : SYSTEM =
let name_of_section = function
| Section_text -> ".text"
| Section_data i ->
- if i then ".data" else "COMM"
+ if i then
+ ".data"
+ else
+ common_section ~sec:".section .bss" ()
| Section_small_data i ->
- if i then ".section .sdata,\"aw\",@progbits" else "COMM"
+ if i then
+ ".section .sdata,\"aw\",@progbits"
+ else
+ common_section ~sec:".section .sbss,\"aw\",@nobits" ()
| Section_const i ->
- if i then ".rodata" else "COMM"
+ if i || (not !Clflags.option_fcommon) then ".rodata" else "COMM"
| Section_small_const i ->
- if i then ".section .sdata2,\"a\",@progbits" else "COMM"
+ if i || (not !Clflags.option_fcommon) then
+ ".section .sdata2,\"a\",@progbits"
+ else
+ "COMM"
| Section_string -> ".rodata"
| Section_literal -> ".section .rodata.cst8,\"aM\",@progbits,8"
| Section_jumptable -> ".text"
@@ -209,7 +218,7 @@ module Diab_System : SYSTEM =
let name_of_section = function
| Section_text -> ".text"
- | Section_data i -> if i then ".data" else "COMM"
+ | Section_data i -> if i then ".data" else common_section ()
| Section_small_data i -> if i then ".sdata" else ".sbss"
| Section_const _ -> ".text"
| Section_small_const _ -> ".sdata2"
@@ -604,6 +613,7 @@ module Target (System : SYSTEM):TARGET =
fprintf oc " fsel %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
| Pisel (r1,r2,r3,cr) ->
fprintf oc " isel %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 crbit cr
+ | Pfsel_gen _ -> assert false
| Picbi (r1,r2) ->
fprintf oc " icbi %a, %a\n" ireg r1 ireg r2
| Picbtls (n,r1,r2) ->
diff --git a/powerpc/ValueAOp.v b/powerpc/ValueAOp.v
index f7f65e9e..a270d857 100644
--- a/powerpc/ValueAOp.v
+++ b/powerpc/ValueAOp.v
@@ -141,6 +141,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Olowlong, v1::nil => loword v1
| Ohighlong, v1::nil => hiword v1
| Ocmp c, _ => of_optbool (eval_static_condition c vl)
+ | Osel c ty, v1::v2::vl => select (eval_static_condition c vl) v1 v2
| _, _ => Vbot
end.
@@ -211,6 +212,7 @@ Proof.
apply rolml_sound; auto.
apply floatofwords_sound; auto.
apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
+ apply select_sound; auto. eapply eval_static_condition_sound; eauto.
Qed.
End SOUNDNESS.