1 files changed, 883 insertions, 0 deletions
diff --git a/kvx/FPDivision32.v b/kvx/FPDivision32.v
new file mode 100644
index 00000000..5a7b536f
--- /dev/null
+++ b/kvx/FPDivision32.v
@@ -0,0 +1,883 @@
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+
+From Gappa Require Import Gappa_tactic.
+
+Local Open Scope cminorsel_scope.
+    
+Definition approx_inv b :=
+  let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in
+  let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in
+  let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in
+  let invb_d_var := Eletvar (0%nat) in
+  let one := Eop (Ofloatconst ExtFloat.one) Enil in
+  let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in
+  let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in
+  Elet b (Elet invb_d x).
+
+Definition approx_inv_thresh := (1/17179869184)%R.
+
+(*
+Lemma BofZ_exact_zero:
+  forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec)
+         (Hmax : (prec < emax)%Z),
+  B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\
+  is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\
+    Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false.
+Proof.
+  intros.
+  apply BofZ_exact.
+  pose proof (Z.pow_nonneg 2 prec).
+  lia.
+Qed.
+ *)
+
+Lemma Rabs_relax:
+  forall b b' (INEQ : (b < b')%R) x,
+    (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+  intros.
+  apply Rabs_lt.
+  lra.
+Qed.
+
+Theorem approx_inv_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_b : expr) (b : int)
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : ((Int.unsigned b) > 0)%Z),
+    exists f : float,
+      eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\
+        is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R.
+Proof.
+  intros. unfold approx_inv.
+  econstructor. constructor.
+  { repeat econstructor.
+    { eassumption. }
+    { reflexivity. } }
+  set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))).
+  set (b' := Int.unsigned b) in *. 
+  pose proof (Int.unsigned_range b) as RANGE.
+  fold b' in RANGE.
+  change  Int.modulus with  4294967296%Z in RANGE.
+  assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE.
+  { change Int64.max_unsigned with 18446744073709551615%Z.
+    lia. }
+  assert (1 <= IZR b' <= 4294967295)%R as RANGE'.
+  { split.
+    { apply IZR_le. lia. }
+    apply IZR_le. lia.
+  }
+  cbn.
+  
+  set (b_d := (Float.of_longu (Int64.repr b'))) in *.
+  Local Transparent Float.of_longu.
+  unfold Float.of_longu in b_d.
+
+  assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia.
+  destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _).
+  clear SILLY.
+
+  assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia.
+  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _).
+  clear SILLY.
+  
+  pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1. cbn.
+    eapply (Rabs_relax (IZR 4294967296)).
+    lra.
+    gappa.
+  }
+  rewrite Zlt_bool_false in C1 by lia.
+  destruct C1 as (C1E & C1F & _).
+  
+  Local Transparent Float32.of_intu Float32.of_int Float32.div.
+  unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d.
+  fold b' in invb_d.
+  change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d.
+  pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)
+                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2. rewrite C1E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { cbn; lra. }
+    unfold F2R. cbn. unfold F2R. cbn.
+    gappa.
+  }
+  assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ.
+  { clear C2.
+    rewrite C1E.
+    cbn.
+    assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa.
+    lra.
+  }
+  destruct (C2 NONZ) as (C2E & C2F & _).
+  clear C2 NONZ.
+  Local Transparent Float.of_single.
+  unfold Float.of_single in invb_d.
+  pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt)
+              (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE
+              (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt)
+                 (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+                    (@eq_refl Datatypes.comparison Lt) 1)
+                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+                       (@eq_refl Datatypes.comparison Lt) b'))) as C3.
+  fold invb_d in C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    rewrite C2E.
+    rewrite C1E.
+    rewrite C0E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F.
+  destruct (C3 C2F) as (C3E & C3F & _).
+  clear C3.
+  unfold Float.fma.
+  assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F.
+  { Local Transparent Float.neg.
+    unfold Float.neg.
+    rewrite is_finite_Bopp.
+    assumption.
+  }
+
+  assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia.
+  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _).
+  clear SILLY.
+
+  assert (is_finite 53 1024 b_d = true) as b_d_F.
+  { unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    assumption.
+  }
+  
+  assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity.
+  
+  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt)
+          (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+          (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5.
+  
+  rewrite Rlt_bool_true in C5; cycle 1.
+  { clear C5.
+    unfold Float.neg.
+    rewrite B2R_Bopp.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C0E.
+    rewrite C1E.
+    unfold ExtFloat.one.
+    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+    rewrite C9E.
+    unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    rewrite C4E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }  
+  destruct C5 as (C5E & C5F & _).
+
+  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+       (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE 
+             (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6.
+  rewrite Rlt_bool_true in C6; cycle 1.
+  { clear C6.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C1E.
+    rewrite C0E.
+    rewrite C5E.
+    unfold Float.neg.
+    rewrite B2R_Bopp.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C0E.
+    rewrite C1E.
+    unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    rewrite C4E.
+    unfold ExtFloat.one.
+    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+    rewrite C9E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  destruct C6 as (C6E & C6F & _).
+  split.
+  { exact C6F. }
+  rewrite C6E.
+  rewrite C5E.
+  rewrite C3E.
+  rewrite C2E.
+  rewrite C1E.
+  rewrite C0E.
+  unfold Float.neg.
+  rewrite B2R_Bopp.
+  unfold ExtFloat.one.
+  Local Transparent Float.of_int.
+  unfold Float.of_int.
+  rewrite (Int.signed_repr 1) by (cbn ; lia).
+  rewrite C9E.
+  rewrite C3E.
+  rewrite C2E.
+  rewrite C0E.
+  rewrite C1E.
+  unfold b_d.
+  rewrite Int64.unsigned_repr by lia.
+  rewrite C4E.
+  cbn.
+  set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *.
+  set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *.
+  set (bi := IZR b') in *.
+  set (invb0 := rd (rs (1/ rs bi))%R) in *.
+  set (alpha := (- invb0 * bi + 1)%R) in *.
+  set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *.
+  assert (alpha = alpha')%R as expand_alpha.
+  {
+    unfold alpha, alpha', invb0.
+    field.
+    lra.
+  }
+  assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND.
+  { unfold alpha', rd, rs.
+    gappa.
+  }
+  set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R).
+  assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND.
+  { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs.
+    gappa.
+  }
+  replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with
+    (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring).
+  replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring.
+  replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1.
+  { unfold alpha.
+    field.
+    lra.
+  }
+  rewrite expand_alpha.
+  unfold invb0, rd, rs, approx_inv_thresh.
+  apply Rabs_le.
+  gappa.
+Qed.
+
+Definition fp_divu32 a b :=
+  let a_var := Eletvar (1%nat) in
+  let b_var := Eletvar (0%nat) in
+  let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in
+  let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in
+  let qr_var := Eletvar 0%nat in
+  let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil)):::
+                         qr_var :::
+                         (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in
+  let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in
+  let cases := Eop (Osel (Ccompl0 Clt) Tlong)
+                   (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *)
+  Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil).
+
+Open Scope Z.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+    let q:=ZnearestE x in
+    let r:=a-q*b in
+    if r <? 0
+    then q-1
+    else q.
+
+Lemma floor_ball1:
+  forall x : R, forall y : Z,
+    (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+  intros x y BALL.
+  apply Rabs_lt_inv in BALL.
+  case (Rcompare_spec x (IZR y)); intro CMP.
+  - left. apply Zfloor_imp.
+    ring_simplify (y-1+1).
+    rewrite minus_IZR. lra.
+  - subst.
+    rewrite Zfloor_IZR. right. reflexivity.
+  - right. apply Zfloor_imp.
+    rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+  forall a b : Z, forall x : R,
+    b > 0 ->
+    (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+    div_approx_reals a b x = (a/b)%Z.
+Proof.                    
+  intros a b x bPOS GAP.
+  assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+  unfold div_approx_reals.
+  pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+  assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+  { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+                            (x - IZR a/ IZR b)) as TRI.
+    ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+    lra.
+  }
+  clear GAP NEAR.
+  rewrite Rabs_minus_sym in BALL.
+  pose proof (floor_ball1 _ _ BALL) as FLOOR.
+  clear BALL.
+  rewrite Zfloor_div in FLOOR by lia.
+  pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+  assert (0 < b) as bPOS' by lia.
+  pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+  case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Opaque approx_inv.
+
+Theorem fp_divu32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divu32 expr_a expr_b)
+            (Vint (Int.divu a b)).
+Proof.
+  intros.
+  assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le)
+                    (Eletvar 0) (Vint b)) as EVAL_b'.
+  { constructor. reflexivity. }
+  destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct).
+  unfold fp_divu32.
+  repeat econstructor.
+  { exact EVAL_a. }
+  { apply eval_lift. exact EVAL_b. }
+  exact inv_b_eval.
+  cbn. f_equal.
+  rewrite <- Float.of_intu_of_longu.
+  unfold Float.to_longu_ne.
+  rewrite ZofB_ne_range_correct by lia.
+  set (prod := (Float.mul (Float.of_intu a) inv_b)).
+  pose proof (Int.unsigned_range a) as a_range.
+  pose proof (Int.unsigned_range b) as b_range.
+  change Int.modulus with 4294967296 in a_range.
+  change Int.modulus with 4294967296 in b_range.
+  set (prod' := (B2R _ _ prod)).
+  set (prod_r := ZnearestE prod') in *.
+
+  Local Transparent Float.mul Float.of_intu.
+  unfold Float.mul, Float.of_intu in prod.
+  set (a' := Int.unsigned a) in *.
+  set (b' := Int.unsigned b) in *.
+  assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R).
+  { split; apply IZR_le; lia. }
+  assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R).
+  { split; apply IZR_le; lia. }
+  assert (SILLY : -2^53 <= a' <= 2^53).
+  { cbn; lia. }
+  destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E  & C0F & _).
+  clear SILLY.
+  pose proof (Bmult_correct  53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE
+                               (BofZ 53 1024  (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1.
+  set (inv_b_r :=  B2R 53 1024 inv_b) in *.
+  assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R).
+  { split; unfold Rdiv.
+    - apply Rmult_le_compat_l. lra.
+      apply Rinv_le. apply IZR_lt. lia.
+      apply IZR_le. lia.
+    - replace 1%R with (1 / 1)%R at 2 by field.
+      apply Rmult_le_compat_l. lra.
+      apply Rinv_le. apply IZR_lt. lia.
+      apply IZR_le. lia.
+  }
+  apply Rabs_def2b in inv_b_correct.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1.
+    rewrite C0E.
+    apply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    replace inv_b_r with (1 / IZR b' +  (inv_b_r - 1 / IZR b'))%R by ring.
+    set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+    unfold approx_inv_thresh in inv_b_correct.
+    cbn.
+    assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    gappa.
+  }
+  rewrite C0F in C1.
+  cbn in C1.
+  rewrite C0E in C1.
+  rewrite inv_b_finite in C1.
+  fold prod in C1.
+  fold prod' in C1.
+  destruct C1 as (C1E & C1F & _).
+  rewrite C1F. cbn.
+
+  assert(prod'_range : (0 <= prod' <= 17179869181/4)%R).
+  {
+    rewrite C1E.
+    replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    unfold approx_inv_thresh in inv_b_correct.
+    set (true_inv := (1 / IZR b')%R) in *.
+    set (delta := (inv_b_r - true_inv)%R) in *.
+    gappa.
+  }
+
+  assert(0 <= prod_r <= 4294967295) as prod_r_range.
+  { unfold prod_r.
+    rewrite <- (Znearest_IZR  (fun x => negb (Z.even x)) 0).
+    replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1.
+    { apply Znearest_imp.
+      apply Rabs_lt.
+      lra.
+    }
+    split; apply Znearest_le; lra.
+  }
+
+  replace (_ && _ ) with true; cycle 1.
+  {
+    symmetry.
+    rewrite andb_true_iff.
+    split; apply Zle_imp_le_bool; lia.
+  }
+  cbn.
+  f_equal.
+  unfold Int.divu.
+  assert(Rabs
+             (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa.
+
+  assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR.
+  {
+    unfold prod.
+    pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2.
+    rewrite C0E in C2.
+    rewrite Rlt_bool_true in C2; cycle 1.
+    { clear C2.
+      apply (Rabs_relax (bpow radix2 64)).
+      { apply bpow_lt. reflexivity. }
+      cbn.
+      fold inv_b_r.
+      replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+      set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+      unfold approx_inv_thresh in *.
+      gappa.
+    }
+    destruct C2 as (C2E & C2F & _).
+    rewrite C2E.
+    fold inv_b_r a' b'.
+    replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+               (IZR a' / IZR b'))%R with
+      (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+          (IZR a' * inv_b_r)) +
+         (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra).
+    set(delta :=  (inv_b_r - 1 / IZR b')%R) in *.
+    cbn. 
+    unfold approx_inv_thresh in *.
+    assert (Rabs(IZR a' * delta) <= 3/8)%R as R2.
+    { apply Rabs_le.
+      split; nra.
+    }
+    rewrite <- C1E.
+    rewrite <- C1E in R1.
+    pose proof (Rabs_triang (prod' - IZR a' * inv_b_r)  (IZR a' * delta))%R.
+    lra.
+  }
+  pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT.
+  rewrite <- DIV_CORRECT.
+
+  unfold div_approx_reals in *.
+  fold a' b' prod' prod_r.
+  unfold Int64.mul.
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  unfold Int64.sub.
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  rewrite Int64.unsigned_repr by (cbn; nia).
+  assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed).
+  { cbn.
+    unfold prod_r.
+    rewrite <- C1E in R1.
+    assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R.
+    2: split; apply le_IZR; lra.
+    rewrite minus_IZR.
+    rewrite mult_IZR.
+    replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring.
+    pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2.
+    set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *.
+    replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b')
+                        + IZR a' / IZR b')%R by (field; lra).
+    set (delta2 :=  (inv_b_r - 1 / IZR b')%R) in *.
+    set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *.
+    replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with
+      (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra).
+    unfold approx_inv_thresh in *.
+    assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4.
+    { apply Rabs_le.
+      split; 
+        nra. }
+    set (delta4 := (IZR a' * delta2)%R) in *.
+    apply Rabs_def2b in R1.
+    apply Rabs_def2b in R2.
+    apply Rabs_def2b in R4.
+    split; nra.
+  }
+  fold a' b' prod_r in DIV_CORRECT.
+
+  pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP.
+  destruct (Z.ltb (a' - prod_r * b') 0).
+  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1.
+    { unfold Int64.lt.
+      change (Int64.signed Int64.zero) with 0.
+      rewrite Int64.signed_repr by exact FMA_RANGE.
+      rewrite zlt_true by lia.
+      reflexivity.
+    }
+    cbn.
+    f_equal.
+    rewrite Int64.add_signed.
+    rewrite (Int64.signed_repr prod_r) by (cbn ; lia).
+    rewrite Int64.signed_repr by (cbn ; lia).
+    unfold Int64.loword.
+    rewrite Int64.unsigned_repr. reflexivity.
+    split.
+    2: cbn; lia.
+    replace (prod_r + (-1)) with (prod_r - 1) by lia.
+    rewrite DIV_CORRECT.
+    apply Z.div_pos; lia.
+    
+  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1.
+    { unfold Int64.lt.
+      change (Int64.signed Int64.zero) with 0.
+      rewrite Int64.signed_repr by exact FMA_RANGE.
+      rewrite zlt_false by lia.
+      reflexivity.
+    }
+    cbn.
+    unfold Int64.loword.
+    rewrite Int64.unsigned_repr by (cbn; lia).
+    reflexivity.
+Qed.
+
+Definition e_msubl a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+Definition fp_modu32 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat)
+                                                    (fp_divu32 (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_modu32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_modu32 expr_a expr_b)
+            (Vint (Int.modu a b)).
+Proof.
+  intros.
+  rewrite Int.modu_divu; cycle 1.
+  { intro Z.
+    subst.
+    rewrite Int.unsigned_zero in b_nz.
+    lia.
+  }
+  unfold fp_modu32.
+  Local Opaque fp_divu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divu32_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int.mul_commut.
+  reflexivity.
+Qed.
+
+Definition e_is_neg a := Eop (Ocmp (Ccompimm Clt Int.zero)) (a ::: Enil).
+Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil).
+Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil).
+Definition e_neg a := Eop Oneg (a ::: Enil).
+Definition e_abs a := Eop (Oabsdiffimm Int.zero) (a ::: Enil).
+
+Definition fp_divs32 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+          (e_ite Tint (Ccompu0 Cne) (e_xorw (e_is_neg (Eletvar 2%nat))
+                                            (e_is_neg (Eletvar 1%nat)))
+                 (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Lemma nonneg_signed_unsigned:
+  forall x (x_NONNEG : Int.signed x >= 0),
+    (Int.signed x) = (Int.unsigned x).
+Proof.
+  intros.
+  pose proof (Int.unsigned_range x).
+  unfold Int.signed in *.
+  destruct zlt. reflexivity.
+  change Int.modulus with 4294967296%Z in *.
+  change Int.half_modulus with 2147483648%Z in *.
+  lia.
+Qed.
+
+Lemma int_min_signed_unsigned :
+  (- Int.min_signed < Int.max_unsigned)%Z.
+Proof.
+  reflexivity.
+Qed.
+
+Lemma int_divs_divu :
+  forall a b
+    (b_NOT0 : Int.signed b <> 0),
+    Int.divs a b = if xorb (Int.lt a Int.zero)
+                           (Int.lt b Int.zero)
+                   then Int.neg (Int.divu (ExtValues.int_abs a)
+                                          (ExtValues.int_abs b))
+                   else Int.divu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+  intros.
+  unfold Int.divs, Int.divu, Int.lt, ExtValues.int_abs.
+  pose proof (Int.signed_range a) as a_RANGE.
+  pose proof (Int.signed_range b) as b_RANGE.
+  change (Int.signed Int.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+    { pose proof int_min_signed_unsigned. lia. }
+
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Z.opp_involutive.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+    { pose proof Int.max_signed_unsigned. lia. }
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite Int.neg_repr.
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+Qed.
+
+Lemma nonzero_unsigned_signed :
+  forall b, Int.unsigned b > 0 -> Int.signed b <> 0.
+Proof.
+  intros b GT EQ.
+  rewrite Int.unsigned_signed in GT.
+  unfold Int.lt in GT.
+  rewrite Int.signed_zero in GT.
+  destruct zlt in GT; lia.
+Qed.
+
+Theorem fp_divs32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divs32 expr_a expr_b)
+            (Vint (Int.divs a b)).
+Proof.
+  intros.
+  unfold fp_divs32.
+  Local Opaque fp_divu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_divu32_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int.max_signed_unsigned.
+    pose proof int_min_signed_unsigned.
+    pose proof (Int.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite int_divs_divu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+  change (Int.signed Int.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; destruct zlt; reflexivity.
+Qed.
+
+Lemma int_mods_modu :
+  forall a b
+    (b_NOT0 : Int.signed b <> 0),
+    Int.mods a b = if Int.lt a Int.zero
+                   then Int.neg (Int.modu (ExtValues.int_abs a)
+                                          (ExtValues.int_abs b))
+                   else Int.modu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+  intros.
+  unfold Int.mods, Int.modu, Int.lt, ExtValues.int_abs.
+  pose proof (Int.signed_range a) as a_RANGE.
+  pose proof (Int.signed_range b) as b_RANGE.
+  change (Int.signed Int.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+    { pose proof int_min_signed_unsigned. lia. }
+
+    destruct (zlt (Int.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+    { pose proof Int.max_signed_unsigned. lia. }
+    destruct (zlt (Int.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+Qed.
+
+Definition fp_mods32z a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_modu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+          (e_ite Tint (Ccomp0 Clt) (Eletvar 2%nat)
+                 (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Theorem fp_mods32z_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods32z expr_a expr_b)
+            (Vint (Int.mods a b)).
+Proof.
+  intros.
+  unfold fp_mods32z.
+  Local Opaque fp_modu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_modu32_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int.max_signed_unsigned.
+    pose proof int_min_signed_unsigned.
+    pose proof (Int.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite int_mods_modu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+  change (Int.signed Int.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; reflexivity. 
+Qed.
+
+Definition e_msub a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+
+Definition fp_mods32 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divs32 (Eletvar (1%nat)) (Eletvar (0%nat)))
+          (e_msub (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_mods32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods32 expr_a expr_b)
+            (Vint (Int.mods a b)).
+Proof.
+  intros.
+  rewrite Int.mods_divs.
+  unfold fp_mods32.
+  Local Opaque fp_divs32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divs32_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int.mul_commut.
+  reflexivity.
+Qed.