aboutsummaryrefslogtreecommitdiffstats
path: root/kvx/FPDivision32.v
diff options
context:
space:
mode:
Diffstat (limited to 'kvx/FPDivision32.v')
-rw-r--r--kvx/FPDivision32.v883
1 files changed, 883 insertions, 0 deletions
diff --git a/kvx/FPDivision32.v b/kvx/FPDivision32.v
new file mode 100644
index 00000000..5a7b536f
--- /dev/null
+++ b/kvx/FPDivision32.v
@@ -0,0 +1,883 @@
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+ Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+
+From Gappa Require Import Gappa_tactic.
+
+Local Open Scope cminorsel_scope.
+
+Definition approx_inv b :=
+ let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in
+ let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in
+ let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in
+ let invb_d_var := Eletvar (0%nat) in
+ let one := Eop (Ofloatconst ExtFloat.one) Enil in
+ let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in
+ let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in
+ Elet b (Elet invb_d x).
+
+Definition approx_inv_thresh := (1/17179869184)%R.
+
+(*
+Lemma BofZ_exact_zero:
+ forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec)
+ (Hmax : (prec < emax)%Z),
+ B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\
+ is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\
+ Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false.
+Proof.
+ intros.
+ apply BofZ_exact.
+ pose proof (Z.pow_nonneg 2 prec).
+ lia.
+Qed.
+ *)
+
+Lemma Rabs_relax:
+ forall b b' (INEQ : (b < b')%R) x,
+ (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+ intros.
+ apply Rabs_lt.
+ lra.
+Qed.
+
+Theorem approx_inv_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_b : expr) (b : int)
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : ((Int.unsigned b) > 0)%Z),
+ exists f : float,
+ eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\
+ is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R.
+Proof.
+ intros. unfold approx_inv.
+ econstructor. constructor.
+ { repeat econstructor.
+ { eassumption. }
+ { reflexivity. } }
+ set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))).
+ set (b' := Int.unsigned b) in *.
+ pose proof (Int.unsigned_range b) as RANGE.
+ fold b' in RANGE.
+ change Int.modulus with 4294967296%Z in RANGE.
+ assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE.
+ { change Int64.max_unsigned with 18446744073709551615%Z.
+ lia. }
+ assert (1 <= IZR b' <= 4294967295)%R as RANGE'.
+ { split.
+ { apply IZR_le. lia. }
+ apply IZR_le. lia.
+ }
+ cbn.
+
+ set (b_d := (Float.of_longu (Int64.repr b'))) in *.
+ Local Transparent Float.of_longu.
+ unfold Float.of_longu in b_d.
+
+ assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia.
+ destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _).
+ clear SILLY.
+
+ assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia.
+ destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _).
+ clear SILLY.
+
+ pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1. cbn.
+ eapply (Rabs_relax (IZR 4294967296)).
+ lra.
+ gappa.
+ }
+ rewrite Zlt_bool_false in C1 by lia.
+ destruct C1 as (C1E & C1F & _).
+
+ Local Transparent Float32.of_intu Float32.of_int Float32.div.
+ unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d.
+ fold b' in invb_d.
+ change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d.
+ pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2. rewrite C1E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { cbn; lra. }
+ unfold F2R. cbn. unfold F2R. cbn.
+ gappa.
+ }
+ assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ.
+ { clear C2.
+ rewrite C1E.
+ cbn.
+ assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa.
+ lra.
+ }
+ destruct (C2 NONZ) as (C2E & C2F & _).
+ clear C2 NONZ.
+ Local Transparent Float.of_single.
+ unfold Float.of_single in invb_d.
+ pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE
+ (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) 1)
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) b'))) as C3.
+ fold invb_d in C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F.
+ destruct (C3 C2F) as (C3E & C3F & _).
+ clear C3.
+ unfold Float.fma.
+ assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F.
+ { Local Transparent Float.neg.
+ unfold Float.neg.
+ rewrite is_finite_Bopp.
+ assumption.
+ }
+
+ assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia.
+ destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _).
+ clear SILLY.
+
+ assert (is_finite 53 1024 b_d = true) as b_d_F.
+ { unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ assumption.
+ }
+
+ assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity.
+
+ pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5.
+
+ rewrite Rlt_bool_true in C5; cycle 1.
+ { clear C5.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold ExtFloat.one.
+ change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+ rewrite C9E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ destruct C5 as (C5E & C5F & _).
+
+ pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6.
+ rewrite Rlt_bool_true in C6; cycle 1.
+ { clear C6.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ rewrite C5E.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ unfold ExtFloat.one.
+ change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+ rewrite C9E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ destruct C6 as (C6E & C6F & _).
+ split.
+ { exact C6F. }
+ rewrite C6E.
+ rewrite C5E.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ unfold ExtFloat.one.
+ Local Transparent Float.of_int.
+ unfold Float.of_int.
+ rewrite (Int.signed_repr 1) by (cbn ; lia).
+ rewrite C9E.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ cbn.
+ set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *.
+ set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *.
+ set (bi := IZR b') in *.
+ set (invb0 := rd (rs (1/ rs bi))%R) in *.
+ set (alpha := (- invb0 * bi + 1)%R) in *.
+ set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *.
+ assert (alpha = alpha')%R as expand_alpha.
+ {
+ unfold alpha, alpha', invb0.
+ field.
+ lra.
+ }
+ assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND.
+ { unfold alpha', rd, rs.
+ gappa.
+ }
+ set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R).
+ assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND.
+ { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs.
+ gappa.
+ }
+ replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with
+ (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring).
+ replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring.
+ replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1.
+ { unfold alpha.
+ field.
+ lra.
+ }
+ rewrite expand_alpha.
+ unfold invb0, rd, rs, approx_inv_thresh.
+ apply Rabs_le.
+ gappa.
+Qed.
+
+Definition fp_divu32 a b :=
+ let a_var := Eletvar (1%nat) in
+ let b_var := Eletvar (0%nat) in
+ let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in
+ let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in
+ let qr_var := Eletvar 0%nat in
+ let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil)):::
+ qr_var :::
+ (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in
+ let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in
+ let cases := Eop (Osel (Ccompl0 Clt) Tlong)
+ (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *)
+ Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil).
+
+Open Scope Z.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+ let q:=ZnearestE x in
+ let r:=a-q*b in
+ if r <? 0
+ then q-1
+ else q.
+
+Lemma floor_ball1:
+ forall x : R, forall y : Z,
+ (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+ intros x y BALL.
+ apply Rabs_lt_inv in BALL.
+ case (Rcompare_spec x (IZR y)); intro CMP.
+ - left. apply Zfloor_imp.
+ ring_simplify (y-1+1).
+ rewrite minus_IZR. lra.
+ - subst.
+ rewrite Zfloor_IZR. right. reflexivity.
+ - right. apply Zfloor_imp.
+ rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+ forall a b : Z, forall x : R,
+ b > 0 ->
+ (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+ div_approx_reals a b x = (a/b)%Z.
+Proof.
+ intros a b x bPOS GAP.
+ assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+ unfold div_approx_reals.
+ pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+ assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+ { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+ (x - IZR a/ IZR b)) as TRI.
+ ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+ lra.
+ }
+ clear GAP NEAR.
+ rewrite Rabs_minus_sym in BALL.
+ pose proof (floor_ball1 _ _ BALL) as FLOOR.
+ clear BALL.
+ rewrite Zfloor_div in FLOOR by lia.
+ pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+ assert (0 < b) as bPOS' by lia.
+ pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+ case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Opaque approx_inv.
+
+Theorem fp_divu32_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_divu32 expr_a expr_b)
+ (Vint (Int.divu a b)).
+Proof.
+ intros.
+ assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le)
+ (Eletvar 0) (Vint b)) as EVAL_b'.
+ { constructor. reflexivity. }
+ destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct).
+ unfold fp_divu32.
+ repeat econstructor.
+ { exact EVAL_a. }
+ { apply eval_lift. exact EVAL_b. }
+ exact inv_b_eval.
+ cbn. f_equal.
+ rewrite <- Float.of_intu_of_longu.
+ unfold Float.to_longu_ne.
+ rewrite ZofB_ne_range_correct by lia.
+ set (prod := (Float.mul (Float.of_intu a) inv_b)).
+ pose proof (Int.unsigned_range a) as a_range.
+ pose proof (Int.unsigned_range b) as b_range.
+ change Int.modulus with 4294967296 in a_range.
+ change Int.modulus with 4294967296 in b_range.
+ set (prod' := (B2R _ _ prod)).
+ set (prod_r := ZnearestE prod') in *.
+
+ Local Transparent Float.mul Float.of_intu.
+ unfold Float.mul, Float.of_intu in prod.
+ set (a' := Int.unsigned a) in *.
+ set (b' := Int.unsigned b) in *.
+ assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia. }
+ assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R).
+ { split; apply IZR_le; lia. }
+ assert (SILLY : -2^53 <= a' <= 2^53).
+ { cbn; lia. }
+ destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E & C0F & _).
+ clear SILLY.
+ pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE
+ (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1.
+ set (inv_b_r := B2R 53 1024 inv_b) in *.
+ assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R).
+ { split; unfold Rdiv.
+ - apply Rmult_le_compat_l. lra.
+ apply Rinv_le. apply IZR_lt. lia.
+ apply IZR_le. lia.
+ - replace 1%R with (1 / 1)%R at 2 by field.
+ apply Rmult_le_compat_l. lra.
+ apply Rinv_le. apply IZR_lt. lia.
+ apply IZR_le. lia.
+ }
+ apply Rabs_def2b in inv_b_correct.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1.
+ rewrite C0E.
+ apply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. lia. }
+ replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+ unfold approx_inv_thresh in inv_b_correct.
+ cbn.
+ assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ gappa.
+ }
+ rewrite C0F in C1.
+ cbn in C1.
+ rewrite C0E in C1.
+ rewrite inv_b_finite in C1.
+ fold prod in C1.
+ fold prod' in C1.
+ destruct C1 as (C1E & C1F & _).
+ rewrite C1F. cbn.
+
+ assert(prod'_range : (0 <= prod' <= 17179869181/4)%R).
+ {
+ rewrite C1E.
+ replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ unfold approx_inv_thresh in inv_b_correct.
+ set (true_inv := (1 / IZR b')%R) in *.
+ set (delta := (inv_b_r - true_inv)%R) in *.
+ gappa.
+ }
+
+ assert(0 <= prod_r <= 4294967295) as prod_r_range.
+ { unfold prod_r.
+ rewrite <- (Znearest_IZR (fun x => negb (Z.even x)) 0).
+ replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1.
+ { apply Znearest_imp.
+ apply Rabs_lt.
+ lra.
+ }
+ split; apply Znearest_le; lra.
+ }
+
+ replace (_ && _ ) with true; cycle 1.
+ {
+ symmetry.
+ rewrite andb_true_iff.
+ split; apply Zle_imp_le_bool; lia.
+ }
+ cbn.
+ f_equal.
+ unfold Int.divu.
+ assert(Rabs
+ (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa.
+
+ assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR.
+ {
+ unfold prod.
+ pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2.
+ rewrite C0E in C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. reflexivity. }
+ cbn.
+ fold inv_b_r.
+ replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+ unfold approx_inv_thresh in *.
+ gappa.
+ }
+ destruct C2 as (C2E & C2F & _).
+ rewrite C2E.
+ fold inv_b_r a' b'.
+ replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+ (IZR a' / IZR b'))%R with
+ (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+ (IZR a' * inv_b_r)) +
+ (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra).
+ set(delta := (inv_b_r - 1 / IZR b')%R) in *.
+ cbn.
+ unfold approx_inv_thresh in *.
+ assert (Rabs(IZR a' * delta) <= 3/8)%R as R2.
+ { apply Rabs_le.
+ split; nra.
+ }
+ rewrite <- C1E.
+ rewrite <- C1E in R1.
+ pose proof (Rabs_triang (prod' - IZR a' * inv_b_r) (IZR a' * delta))%R.
+ lra.
+ }
+ pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT.
+ rewrite <- DIV_CORRECT.
+
+ unfold div_approx_reals in *.
+ fold a' b' prod' prod_r.
+ unfold Int64.mul.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ unfold Int64.sub.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ rewrite Int64.unsigned_repr by (cbn; nia).
+ assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed).
+ { cbn.
+ unfold prod_r.
+ rewrite <- C1E in R1.
+ assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R.
+ 2: split; apply le_IZR; lra.
+ rewrite minus_IZR.
+ rewrite mult_IZR.
+ replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring.
+ pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2.
+ set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *.
+ replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b')
+ + IZR a' / IZR b')%R by (field; lra).
+ set (delta2 := (inv_b_r - 1 / IZR b')%R) in *.
+ set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *.
+ replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with
+ (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra).
+ unfold approx_inv_thresh in *.
+ assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4.
+ { apply Rabs_le.
+ split;
+ nra. }
+ set (delta4 := (IZR a' * delta2)%R) in *.
+ apply Rabs_def2b in R1.
+ apply Rabs_def2b in R2.
+ apply Rabs_def2b in R4.
+ split; nra.
+ }
+ fold a' b' prod_r in DIV_CORRECT.
+
+ pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP.
+ destruct (Z.ltb (a' - prod_r * b') 0).
+ - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1.
+ { unfold Int64.lt.
+ change (Int64.signed Int64.zero) with 0.
+ rewrite Int64.signed_repr by exact FMA_RANGE.
+ rewrite zlt_true by lia.
+ reflexivity.
+ }
+ cbn.
+ f_equal.
+ rewrite Int64.add_signed.
+ rewrite (Int64.signed_repr prod_r) by (cbn ; lia).
+ rewrite Int64.signed_repr by (cbn ; lia).
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr. reflexivity.
+ split.
+ 2: cbn; lia.
+ replace (prod_r + (-1)) with (prod_r - 1) by lia.
+ rewrite DIV_CORRECT.
+ apply Z.div_pos; lia.
+
+ - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1.
+ { unfold Int64.lt.
+ change (Int64.signed Int64.zero) with 0.
+ rewrite Int64.signed_repr by exact FMA_RANGE.
+ rewrite zlt_false by lia.
+ reflexivity.
+ }
+ cbn.
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ reflexivity.
+Qed.
+
+Definition e_msubl a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+Definition fp_modu32 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat)
+ (fp_divu32 (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_modu32_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_modu32 expr_a expr_b)
+ (Vint (Int.modu a b)).
+Proof.
+ intros.
+ rewrite Int.modu_divu; cycle 1.
+ { intro Z.
+ subst.
+ rewrite Int.unsigned_zero in b_nz.
+ lia.
+ }
+ unfold fp_modu32.
+ Local Opaque fp_divu32.
+ repeat (econstructor + apply eval_lift + eassumption).
+ { apply fp_divu32_correct;
+ repeat (econstructor + apply eval_lift + eassumption).
+ }
+ cbn.
+ rewrite Int.mul_commut.
+ reflexivity.
+Qed.
+
+Definition e_is_neg a := Eop (Ocmp (Ccompimm Clt Int.zero)) (a ::: Enil).
+Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil).
+Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil).
+Definition e_neg a := Eop Oneg (a ::: Enil).
+Definition e_abs a := Eop (Oabsdiffimm Int.zero) (a ::: Enil).
+
+Definition fp_divs32 a b :=
+ Elet a (Elet (lift b)
+ (Elet (fp_divu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+ (e_ite Tint (Ccompu0 Cne) (e_xorw (e_is_neg (Eletvar 2%nat))
+ (e_is_neg (Eletvar 1%nat)))
+ (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Lemma nonneg_signed_unsigned:
+ forall x (x_NONNEG : Int.signed x >= 0),
+ (Int.signed x) = (Int.unsigned x).
+Proof.
+ intros.
+ pose proof (Int.unsigned_range x).
+ unfold Int.signed in *.
+ destruct zlt. reflexivity.
+ change Int.modulus with 4294967296%Z in *.
+ change Int.half_modulus with 2147483648%Z in *.
+ lia.
+Qed.
+
+Lemma int_min_signed_unsigned :
+ (- Int.min_signed < Int.max_unsigned)%Z.
+Proof.
+ reflexivity.
+Qed.
+
+Lemma int_divs_divu :
+ forall a b
+ (b_NOT0 : Int.signed b <> 0),
+ Int.divs a b = if xorb (Int.lt a Int.zero)
+ (Int.lt b Int.zero)
+ then Int.neg (Int.divu (ExtValues.int_abs a)
+ (ExtValues.int_abs b))
+ else Int.divu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+ intros.
+ unfold Int.divs, Int.divu, Int.lt, ExtValues.int_abs.
+ pose proof (Int.signed_range a) as a_RANGE.
+ pose proof (Int.signed_range b) as b_RANGE.
+ change (Int.signed Int.zero) with 0%Z.
+ destruct zlt.
+ - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+ rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+
+ destruct zlt.
+ + rewrite (Z.abs_neq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+ rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+ rewrite Z.quot_opp_r by lia.
+ rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+ rewrite Z.quot_opp_l by lia.
+ rewrite Z.quot_div_nonneg by lia.
+ rewrite Z.opp_involutive.
+ reflexivity.
+
+ + rewrite (Z.abs_eq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+ rewrite Z.quot_opp_l by lia.
+ rewrite Z.quot_div_nonneg by lia.
+ rewrite Int.neg_repr.
+ reflexivity.
+
+ - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+ rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ destruct zlt.
+ + rewrite (Z.abs_neq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+ rewrite Int.neg_repr.
+ rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+ rewrite Z.quot_opp_r by lia.
+ rewrite Z.quot_div_nonneg by lia.
+ reflexivity.
+
+ + rewrite (Z.abs_eq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ rewrite Z.quot_div_nonneg by lia.
+ reflexivity.
+Qed.
+
+Lemma nonzero_unsigned_signed :
+ forall b, Int.unsigned b > 0 -> Int.signed b <> 0.
+Proof.
+ intros b GT EQ.
+ rewrite Int.unsigned_signed in GT.
+ unfold Int.lt in GT.
+ rewrite Int.signed_zero in GT.
+ destruct zlt in GT; lia.
+Qed.
+
+Theorem fp_divs32_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_divs32 expr_a expr_b)
+ (Vint (Int.divs a b)).
+Proof.
+ intros.
+ unfold fp_divs32.
+ Local Opaque fp_divu32.
+ repeat (econstructor + apply eval_lift + eassumption).
+ apply fp_divu32_correct.
+ all: repeat (econstructor + apply eval_lift + eassumption).
+ { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+ rewrite Int.signed_zero. rewrite Z.sub_0_r.
+ rewrite Int.unsigned_repr.
+ { pose proof (nonzero_unsigned_signed b b_nz).
+ lia.
+ }
+ pose proof Int.max_signed_unsigned.
+ pose proof int_min_signed_unsigned.
+ pose proof (Int.signed_range b).
+ lia.
+ }
+ cbn.
+ rewrite int_divs_divu ; cycle 1.
+ { apply nonzero_unsigned_signed. assumption. }
+ unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+ change (Int.signed Int.zero) with 0%Z.
+ repeat rewrite Z.sub_0_r.
+ destruct zlt; destruct zlt; reflexivity.
+Qed.
+
+Lemma int_mods_modu :
+ forall a b
+ (b_NOT0 : Int.signed b <> 0),
+ Int.mods a b = if Int.lt a Int.zero
+ then Int.neg (Int.modu (ExtValues.int_abs a)
+ (ExtValues.int_abs b))
+ else Int.modu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+ intros.
+ unfold Int.mods, Int.modu, Int.lt, ExtValues.int_abs.
+ pose proof (Int.signed_range a) as a_RANGE.
+ pose proof (Int.signed_range b) as b_RANGE.
+ change (Int.signed Int.zero) with 0%Z.
+ destruct zlt.
+ - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+ rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+
+ destruct (zlt (Int.signed b) 0%Z).
+ + rewrite (Z.abs_neq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+ rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+ rewrite Z.rem_opp_r by lia.
+ rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+ rewrite Z.rem_opp_l by lia.
+ rewrite Z.rem_mod_nonneg by lia.
+ rewrite Int.neg_repr.
+ reflexivity.
+
+ + rewrite (Z.abs_eq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+ rewrite Z.rem_opp_l by lia.
+ rewrite Z.rem_mod_nonneg by lia.
+ rewrite Int.neg_repr.
+ reflexivity.
+
+ - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+ rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ destruct (zlt (Int.signed b) 0%Z).
+ + rewrite (Z.abs_neq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof int_min_signed_unsigned. lia. }
+ rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+ rewrite Z.rem_opp_r by lia.
+ rewrite Z.rem_mod_nonneg by lia.
+ reflexivity.
+
+ + rewrite (Z.abs_eq (Int.signed b)) by lia.
+ rewrite Int.unsigned_repr ; cycle 1.
+ { pose proof Int.max_signed_unsigned. lia. }
+ rewrite Z.rem_mod_nonneg by lia.
+ reflexivity.
+Qed.
+
+Definition fp_mods32z a b :=
+ Elet a (Elet (lift b)
+ (Elet (fp_modu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+ (e_ite Tint (Ccomp0 Clt) (Eletvar 2%nat)
+ (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Theorem fp_mods32z_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_mods32z expr_a expr_b)
+ (Vint (Int.mods a b)).
+Proof.
+ intros.
+ unfold fp_mods32z.
+ Local Opaque fp_modu32.
+ repeat (econstructor + apply eval_lift + eassumption).
+ apply fp_modu32_correct.
+ all: repeat (econstructor + apply eval_lift + eassumption).
+ { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+ rewrite Int.signed_zero. rewrite Z.sub_0_r.
+ rewrite Int.unsigned_repr.
+ { pose proof (nonzero_unsigned_signed b b_nz).
+ lia.
+ }
+ pose proof Int.max_signed_unsigned.
+ pose proof int_min_signed_unsigned.
+ pose proof (Int.signed_range b).
+ lia.
+ }
+ cbn.
+ rewrite int_mods_modu ; cycle 1.
+ { apply nonzero_unsigned_signed. assumption. }
+ unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+ change (Int.signed Int.zero) with 0%Z.
+ repeat rewrite Z.sub_0_r.
+ destruct zlt; reflexivity.
+Qed.
+
+Definition e_msub a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+
+Definition fp_mods32 a b :=
+ Elet a (Elet (lift b)
+ (Elet (fp_divs32 (Eletvar (1%nat)) (Eletvar (0%nat)))
+ (e_msub (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_mods32_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_mods32 expr_a expr_b)
+ (Vint (Int.mods a b)).
+Proof.
+ intros.
+ rewrite Int.mods_divs.
+ unfold fp_mods32.
+ Local Opaque fp_divs32.
+ repeat (econstructor + apply eval_lift + eassumption).
+ { apply fp_divs32_correct;
+ repeat (econstructor + apply eval_lift + eassumption).
+ }
+ cbn.
+ rewrite Int.mul_commut.
+ reflexivity.
+Qed.