From d002919334e83904447957f666f0d48704c5e55b Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Sat, 13 Apr 2019 10:11:05 +0200 Subject: Emulate the "isel" instruction on non-EREF PPC processors On non-EREF processors it expands to instructions that destroy GPR0. Reflect this in the Asm semantics for Pisel. --- powerpc/Asm.v | 2 +- powerpc/Asmexpand.ml | 58 +++++++++++++++++++++++++++++++++----------------- powerpc/Asmgenproof1.v | 4 ++-- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/powerpc/Asm.v b/powerpc/Asm.v index 748058c4..e821c6c4 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -866,7 +866,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Vint n => if Int.eq n Int.zero then rs#r2 else rs#r1 | _ => Vundef end in - Next (nextinstr (rs#rd <- v)) m + Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m | Plbz rd cst r1 => load1 Mint8unsigned rd cst r1 rs m | Plbzx rd r1 r2 => diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml index 49a0d237..5903ab0e 100644 --- a/powerpc/Asmexpand.ml +++ b/powerpc/Asmexpand.ml @@ -56,6 +56,15 @@ let emit_loadimm r n = let emit_addimm rd rs n = List.iter emit (Asmgen.addimm rd rs n []) +(* Numbering of bits in the CR register *) + +let num_crbit = function + | CRbit_0 -> 0 + | CRbit_1 -> 1 + | CRbit_2 -> 2 + | CRbit_3 -> 3 + | CRbit_6 -> 6 + (* Handling of annotations *) let expand_annot_val kind txt targ args res = @@ -77,8 +86,6 @@ let expand_annot_val kind txt targ args res = So, use 64-bit accesses only if alignment >= 4. Note that lfd and stfd cannot trap on ill-formed floats. *) - - let offset_in_range ofs = Int.eq (Asmgen.high_s ofs) _0 @@ -410,10 +417,21 @@ let expand_builtin_va_start r = let expand_int64_arith conflict rl fn = if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl -(* Expansion of integer conditional moves (__builtin_*sel) *) +(* Expansion of integer conditional moves (__builtin_*sel and Pisel) *) (* The generated code works equally well with 32-bit integer registers and with 64-bit integer registers. *) +let expand_integer_cond_move_1 a2 a3 res = + (* GPR0 is -1 (all ones) if condition is true, 0 if it is false *) + if res <> a3 then begin + emit (Pand_ (res, a2, GPR0)); + emit (Pandc (GPR0, a3, GPR0)) + end else begin + emit (Pandc (res, a3, GPR0)); + emit (Pand_ (GPR0, a2, GPR0)) + end; + emit (Por (res, res, GPR0)) + let expand_integer_cond_move a1 a2 a3 res = if a2 = a3 then emit (Pmr (res, a2)) @@ -423,15 +441,22 @@ let expand_integer_cond_move a1 a2 a3 res = end else begin (* a1 has type _Bool, hence it is 0 or 1 *) emit (Psubfic (GPR0, a1, Cint _0)); - (* r0 = -1 (all ones) if a1 is true, r0 = 0 if a1 is false *) - if res <> a3 then begin - emit (Pand_ (res, a2, GPR0)); - emit (Pandc (GPR0, a3, GPR0)) - end else begin - emit (Pandc (res, a3, GPR0)); - emit (Pand_ (GPR0, a2, GPR0)) - end; - emit (Por (res, res, GPR0)) + expand_integer_cond_move_1 a2 a3 res + end + +(* Symmetrically, we emulate the "isel" instruction on PPC processors + that do not have it. *) + +let expand_isel bit a2 a3 res = + if a2 = a3 then + emit (Pmr (res, a2)) + else if eref then + emit (Pisel (res, a2, a3, bit)) + else begin + emit (Pmfcr GPR0); + emit (Prlwinm(GPR0, GPR0, Z.of_uint (1 + num_crbit bit), _1)); + emit (Psubfic (GPR0, GPR0, Cint _0)); + expand_integer_cond_move_1 a2 a3 res end (* Convert integer constant into GPR with corresponding number *) @@ -772,13 +797,6 @@ let set_cr6 sg = (* Expand instructions *) -let num_crbit = function - | CRbit_0 -> 0 - | CRbit_1 -> 1 - | CRbit_2 -> 2 - | CRbit_3 -> 3 - | CRbit_6 -> 6 - let expand_instruction instr = match instr with | Pallocframe(sz, ofs,retofs) -> @@ -874,6 +892,8 @@ let expand_instruction instr = emit (Pcfi_adjust _m8); | Pfxdp(r1, r2) -> if r1 <> r2 then emit(Pfmr(r1, r2)) + | Pisel(rd, r1, r2, bit) -> + expand_isel bit r1 r2 rd | Plmake(r1, rhi, rlo) -> if r1 = rlo then emit (Prldimi(r1, rhi, _32L, upper32)) diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index 8c9fd2bd..2e609900 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -1262,13 +1262,13 @@ Qed. Lemma transl_select_op_correct: forall cond args ty r1 r2 rd k rs m c, transl_select_op cond args r1 r2 rd k = OK c -> - important_preg r1 = true -> important_preg r2 = true -> + important_preg rd = true -> important_preg r1 = true -> important_preg r2 = true -> exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef (Val.select (eval_condition cond (map rs (map preg_of args)) m) rs#r1 rs#r2 ty) rs'#rd /\ forall r, important_preg r = true -> r <> rd -> rs'#r = rs#r. Proof. - intros until c. intros TR IMP1 IMP2. + intros until c. intros TR IMP1 IMP2 IMP3. unfold transl_select_op in TR. destruct (ireg_eq r1 r2). - inv TR. econstructor; split; [|split]. -- cgit