5 files changed, 44 insertions, 618 deletions
diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v
index de0d9885..163dcbd8 100644
--- a/kvx/Builtins1.v
+++ b/kvx/Builtins1.v
@@ -27,7 +27,8 @@ Inductive platform_builtin : Type :=
 | BI_fma
 | BI_fmaf
 | BI_lround_ne
-| BI_luround_ne.
+| BI_luround_ne
+| BI_fp_udiv32.
 
 Local Open Scope string_scope.
 
@@ -40,6 +41,7 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
   :: ("__builtin_fmaf", BI_fmaf)
   :: ("__builtin_lround_ne", BI_lround_ne)
   :: ("__builtin_luround_ne", BI_luround_ne)
+  :: ("__builtin_fp_udiv32", BI_fp_udiv32)
   :: nil.
 
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
@@ -54,6 +56,8 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
       mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default
   | BI_lround_ne | BI_luround_ne =>
       mksignature (Tfloat :: nil) Tlong cc_default
+  | BI_fp_udiv32 =>
+      mksignature (Tint :: Tint :: nil) Tint cc_default
   end.
 
 Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
@@ -66,4 +70,8 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (pl
   | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
   | BI_lround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_long_ne
   | BI_luround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_longu_ne
+  | BI_fp_udiv32 => mkbuiltin_n2p Tint Tint Tint
+       (fun n1 n2 => if Int.eq n2 Int.zero
+                     then None
+                     else Some (Int.divu n1 n2))
   end.
diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml
index f2b7b09e..fad86d3c 100644
--- a/kvx/CBuiltins.ml
+++ b/kvx/CBuiltins.ml
@@ -136,7 +136,9 @@ let builtins = {
     "__builtin_lround_ne",
       (TInt(ILong, []), [TFloat(FDouble, [])], false);
     "__builtin_luround_ne",
-      (TInt(IULong, []), [TFloat(FDouble, [])], false);
+    (TInt(IULong, []), [TFloat(FDouble, [])], false);
+    "__builtin_fp_udiv32",
+    (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false);
 ]
 }
 
diff --git a/kvx/FPDivision.v b/kvx/FPDivision.v
deleted file mode 100644
index 0ba9a39f..00000000
--- a/kvx/FPDivision.v
+++ /dev/null
@@ -1,615 +0,0 @@
-From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
-                          Binary Round_odd Bits.
-Require Archi.
-Require Import Coqlib.
-Require Import Compopts.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Op.
-Require Import CminorSel.
-Require Import OpHelpers.
-Require Import ExtFloats.
-Require Import DecBoolOps.
-Require Import Chunks.
-Require Import Builtins.
-Require Import Values Globalenvs.
-Require Compopts.
-Require Import Psatz.
-Require Import IEEE754_extra.
-
-From Gappa Require Import Gappa_tactic.
-
-Local Open Scope cminorsel_scope.
-
-Local Open Scope string_scope.
-Local Open Scope error_monad_scope.
-
-Definition approx_inv b :=
-  let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in
-  let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in
-  let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in
-  let invb_d_var := Eletvar (0%nat) in
-  let one := Eop (Ofloatconst ExtFloat.one) Enil in
-  let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in
-  let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in
-  Elet b (Elet invb_d x).
-
-Definition approx_inv_thresh := (1/17179869184)%R.
-
-(*
-Lemma BofZ_exact_zero:
-  forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec)
-         (Hmax : (prec < emax)%Z),
-  B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\
-  is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\
-    Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false.
-Proof.
-  intros.
-  apply BofZ_exact.
-  pose proof (Z.pow_nonneg 2 prec).
-  lia.
-Qed.
- *)
-
-Lemma Rabs_relax:
-  forall b b' (INEQ : (b < b')%R) x,
-    (-b <= x <= b)%R -> (Rabs x < b')%R.
-Proof.
-  intros.
-  apply Rabs_lt.
-  lra.
-Qed.
-
-Theorem approx_inv_correct :
-  forall (ge : genv)  (sp: val) cmenv memenv
-         (le : letenv) (expr_b : expr) (b : int)
-         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
-         (b_nz : ((Int.unsigned b) > 0)%Z),
-    exists f : float,
-      eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\
-        is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R.
-Proof.
-  intros. unfold approx_inv.
-  econstructor. constructor.
-  { repeat econstructor.
-    { eassumption. }
-    { reflexivity. } }
-  set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))).
-  set (b' := Int.unsigned b) in *. 
-  pose proof (Int.unsigned_range b) as RANGE.
-  fold b' in RANGE.
-  change  Int.modulus with  4294967296%Z in RANGE.
-  assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE.
-  { change Int64.max_unsigned with 18446744073709551615%Z.
-    lia. }
-  assert (1 <= IZR b' <= 4294967295)%R as RANGE'.
-  { split.
-    { apply IZR_le. lia. }
-    apply IZR_le. lia.
-  }
-  cbn.
-  
-  set (b_d := (Float.of_longu (Int64.repr b'))) in *.
-  Local Transparent Float.of_longu.
-  unfold Float.of_longu in b_d.
-
-  assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia.
-  destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _).
-  clear SILLY.
-
-  assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia.
-  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _).
-  clear SILLY.
-  
-  pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1.
-  rewrite Rlt_bool_true in C1; cycle 1.
-  { clear C1. cbn.
-    eapply (Rabs_relax (IZR 4294967296)).
-    lra.
-    gappa.
-  }
-  rewrite Zlt_bool_false in C1 by lia.
-  destruct C1 as (C1E & C1F & _).
-  
-  Local Transparent Float32.of_intu Float32.of_int Float32.div.
-  unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d.
-  fold b' in invb_d.
-  Check BofZ.
-  change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d.
-  pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
-                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)
-                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2.
-  rewrite Rlt_bool_true in C2; cycle 1.
-  { clear C2. rewrite C1E.
-    apply (Rabs_relax (bpow radix2 10)).
-    { cbn; lra. }
-    unfold F2R. cbn. unfold F2R. cbn.
-    gappa.
-  }
-  assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ.
-  { clear C2.
-    rewrite C1E.
-    cbn.
-    assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa.
-    lra.
-  }
-  destruct (C2 NONZ) as (C2E & C2F & _).
-  clear C2 NONZ.
-  Local Transparent Float.of_single.
-  unfold Float.of_single in invb_d.
-  pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt)
-              (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE
-              (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt)
-                 (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
-                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
-                    (@eq_refl Datatypes.comparison Lt) 1)
-                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
-                       (@eq_refl Datatypes.comparison Lt) b'))) as C3.
-  fold invb_d in C3.
-  rewrite Rlt_bool_true in C3; cycle 1.
-  { clear C3.
-    rewrite C2E.
-    rewrite C1E.
-    rewrite C0E.
-    apply (Rabs_relax (bpow radix2 10)).
-    { apply bpow_lt; lia. }
-    cbn.
-    gappa.
-  }
-  change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F.
-  destruct (C3 C2F) as (C3E & C3F & _).
-  clear C3.
-  unfold Float.fma.
-  assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F.
-  { Local Transparent Float.neg.
-    unfold Float.neg.
-    rewrite is_finite_Bopp.
-    assumption.
-  }
-
-  assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia.
-  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _).
-  clear SILLY.
-
-  assert (is_finite 53 1024 b_d = true) as b_d_F.
-  { unfold b_d.
-    rewrite Int64.unsigned_repr by lia.
-    assumption.
-  }
-  
-  assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity.
-  
-  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt)
-          (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
-          (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5.
-  
-  rewrite Rlt_bool_true in C5; cycle 1.
-  { clear C5.
-    unfold Float.neg.
-    rewrite B2R_Bopp.
-    rewrite C3E.
-    rewrite C2E.
-    rewrite C0E.
-    rewrite C1E.
-    unfold ExtFloat.one.
-    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
-    rewrite C9E.
-    unfold b_d.
-    rewrite Int64.unsigned_repr by lia.
-    rewrite C4E.
-    apply (Rabs_relax (bpow radix2 10)).
-    { apply bpow_lt; lia. }
-    cbn.
-    gappa.
-  }  
-  destruct C5 as (C5E & C5F & _).
-
-  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
-       (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE 
-             (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6.
-  rewrite Rlt_bool_true in C6; cycle 1.
-  { clear C6.
-    rewrite C3E.
-    rewrite C2E.
-    rewrite C1E.
-    rewrite C0E.
-    rewrite C5E.
-    unfold Float.neg.
-    rewrite B2R_Bopp.
-    rewrite C3E.
-    rewrite C2E.
-    rewrite C0E.
-    rewrite C1E.
-    unfold b_d.
-    rewrite Int64.unsigned_repr by lia.
-    rewrite C4E.
-    unfold ExtFloat.one.
-    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
-    rewrite C9E.
-    apply (Rabs_relax (bpow radix2 10)).
-    { apply bpow_lt; lia. }
-    cbn.
-    gappa.
-  }
-  destruct C6 as (C6E & C6F & _).
-  split.
-  { exact C6F. }
-  rewrite C6E.
-  rewrite C5E.
-  rewrite C3E.
-  rewrite C2E.
-  rewrite C1E.
-  rewrite C0E.
-  unfold Float.neg.
-  rewrite B2R_Bopp.
-  unfold ExtFloat.one.
-  Local Transparent Float.of_int.
-  unfold Float.of_int.
-  rewrite (Int.signed_repr 1) by (cbn ; lia).
-  rewrite C9E.
-  rewrite C3E.
-  rewrite C2E.
-  rewrite C0E.
-  rewrite C1E.
-  unfold b_d.
-  rewrite Int64.unsigned_repr by lia.
-  rewrite C4E.
-  cbn.
-  set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *.
-  set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *.
-  set (bi := IZR b') in *.
-  set (invb0 := rd (rs (1/ rs bi))%R) in *.
-  set (alpha := (- invb0 * bi + 1)%R) in *.
-  set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *.
-  assert (alpha = alpha')%R as expand_alpha.
-  {
-    unfold alpha, alpha', invb0.
-    field.
-    lra.
-  }
-  assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND.
-  { unfold alpha', rd, rs.
-    gappa.
-  }
-  set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R).
-  assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND.
-  { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs.
-    gappa.
-  }
-  replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with
-    (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring).
-  replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring.
-  replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1.
-  { unfold alpha.
-    field.
-    lra.
-  }
-  rewrite expand_alpha.
-  unfold invb0, rd, rs, approx_inv_thresh.
-  apply Rabs_le.
-  gappa.
-Qed.
-
-Definition approx_div a b :=
-  let a_var := Eletvar (1%nat) in
-  let b_var := Eletvar (0%nat) in
-  let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in
-  let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in
-  let qr_var := Eletvar 0%nat in
-  let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil)):::
-                         qr_var :::
-                         (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in
-  let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in
-  let cases := Eop (Osel (Ccompl0 Clt) Tlong)
-                   (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *)
-  Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil).
-
-Open Scope Z.
-
-Definition div_approx_reals (a b : Z) (x : R) :=
-    let q:=ZnearestE x in
-    let r:=a-q*b in
-    if r <? 0
-    then q-1
-    else q.
-
-Lemma floor_ball1:
-  forall x : R, forall y : Z,
-    (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
-Proof.
-  intros x y BALL.
-  apply Rabs_lt_inv in BALL.
-  case (Rcompare_spec x (IZR y)); intro CMP.
-  - left. apply Zfloor_imp.
-    ring_simplify (y-1+1).
-    rewrite minus_IZR. lra.
-  - subst.
-    rewrite Zfloor_IZR. right. reflexivity.
-  - right. apply Zfloor_imp.
-    rewrite plus_IZR. lra.
-Qed.
-
-Theorem div_approx_reals_correct:
-  forall a b : Z, forall x : R,
-    b > 0 ->
-    (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
-    div_approx_reals a b x = (a/b)%Z.
-Proof.                    
-  intros a b x bPOS GAP.
-  assert (0 < IZR b)%R by (apply IZR_lt ; lia).
-  unfold div_approx_reals.
-  pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
-  assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
-  { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
-                            (x - IZR a/ IZR b)) as TRI.
-    ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
-    lra.
-  }
-  clear GAP NEAR.
-  rewrite Rabs_minus_sym in BALL.
-  pose proof (floor_ball1 _ _ BALL) as FLOOR.
-  clear BALL.
-  rewrite Zfloor_div in FLOOR by lia.
-  pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
-  assert (0 < b) as bPOS' by lia.
-  pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
-  case Z.ltb_spec; intro; destruct FLOOR; lia.
-Qed.
-
-Opaque approx_inv.
-
-Search ({_ <= _} + {_ > _})%Z.
-Theorem Znearest_le
-  : forall choice (x y : R), (x <= y)%R -> Znearest choice x <= Znearest choice y.
-Proof.
-  intros.
-  destruct (Z_le_gt_dec (Znearest choice x) (Znearest choice y)) as [LE | GT].
-  assumption.
-  exfalso.
-  assert (1 <= IZR (Znearest choice x) - IZR(Znearest choice y))%R as GAP.
-  { rewrite <- minus_IZR.
-    apply IZR_le.
-    lia.
-  }
-  pose proof (Znearest_imp2 choice x) as Rx.
-  pose proof (Znearest_imp2 choice y) as Ry.
-  apply Rabs_def2b in Rx.
-  apply Rabs_def2b in Ry.
-  assert (x = y) by lra.
-  subst y.
-  lia.
-Qed.
-
-Theorem approx_div_correct :
-  forall (ge : genv)  (sp: val) cmenv memenv
-         (le : letenv) (expr_a expr_b : expr) (a b : int)
-         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
-         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
-         (b_nz : (Int.unsigned b > 0)%Z),
-  eval_expr ge sp cmenv memenv le (approx_div expr_a expr_b)
-            (Vint (Int.divu a b)).
-Proof.
-  intros.
-  assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le)
-                    (Eletvar 0) (Vint b)) as EVAL_b'.
-  { constructor. reflexivity. }
-  destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct).
-  unfold approx_div.
-  repeat econstructor.
-  { exact EVAL_a. }
-  { apply eval_lift. exact EVAL_b. }
-  exact inv_b_eval.
-  cbn. f_equal.
-  rewrite <- Float.of_intu_of_longu.
-  unfold Float.to_longu_ne.
-  rewrite ZofB_ne_range_correct by lia.
-  set (prod := (Float.mul (Float.of_intu a) inv_b)).
-  pose proof (Int.unsigned_range a) as a_range.
-  pose proof (Int.unsigned_range b) as b_range.
-  change Int.modulus with 4294967296 in a_range.
-  change Int.modulus with 4294967296 in b_range.
-  set (prod' := (B2R _ _ prod)).
-  set (prod_r := ZnearestE prod') in *.
-
-  Local Transparent Float.mul Float.of_intu.
-  unfold Float.mul, Float.of_intu in prod.
-  set (a' := Int.unsigned a) in *.
-  set (b' := Int.unsigned b) in *.
-  assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R).
-  { split; apply IZR_le; lia. }
-  assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R).
-  { split; apply IZR_le; lia. }
-  assert (SILLY : -2^53 <= a' <= 2^53).
-  { cbn; lia. }
-  destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E  & C0F & _).
-  clear SILLY.
-  pose proof (Bmult_correct  53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE
-                               (BofZ 53 1024  (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1.
-  set (inv_b_r :=  B2R 53 1024 inv_b) in *.
-  assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R).
-  { split; unfold Rdiv.
-    - apply Rmult_le_compat_l. lra.
-      apply Rinv_le. apply IZR_lt. lia.
-      apply IZR_le. lia.
-    - replace 1%R with (1 / 1)%R at 2 by field.
-      apply Rmult_le_compat_l. lra.
-      apply Rinv_le. apply IZR_lt. lia.
-      apply IZR_le. lia.
-  }
-  apply Rabs_def2b in inv_b_correct.
-  rewrite Rlt_bool_true in C1; cycle 1.
-  { clear C1.
-    rewrite C0E.
-    apply (Rabs_relax (bpow radix2 64)).
-    { apply bpow_lt. lia. }
-    replace inv_b_r with (1 / IZR b' +  (inv_b_r - 1 / IZR b'))%R by ring.
-    set (delta := (inv_b_r - 1 / IZR b')%R) in *.
-    unfold approx_inv_thresh in inv_b_correct.
-    cbn.
-    assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R).
-    { split; apply IZR_le; lia.
-    }
-    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
-    { split; apply IZR_le; lia.
-    }
-    gappa.
-  }
-  rewrite C0F in C1.
-  cbn in C1.
-  rewrite C0E in C1.
-  rewrite inv_b_finite in C1.
-  fold prod in C1.
-  fold prod' in C1.
-  destruct C1 as (C1E & C1F & _).
-  rewrite C1F. cbn.
-
-  assert(prod'_range : (0 <= prod' <= 17179869181/4)%R).
-  {
-    rewrite C1E.
-    replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
-    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
-    { split; apply IZR_le; lia.
-    }
-    unfold approx_inv_thresh in inv_b_correct.
-    set (true_inv := (1 / IZR b')%R) in *.
-    set (delta := (inv_b_r - true_inv)%R) in *.
-    gappa.
-  }
-
-  assert(0 <= prod_r <= 4294967295) as prod_r_range.
-  { unfold prod_r.
-    rewrite <- (Znearest_IZR  (fun x => negb (Z.even x)) 0).
-    replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1.
-    { apply Znearest_imp.
-      apply Rabs_lt.
-      lra.
-    }
-    split; apply Znearest_le; lra.
-  }
-
-  replace (_ && _ ) with true; cycle 1.
-  {
-    symmetry.
-    rewrite andb_true_iff.
-    split; apply Zle_imp_le_bool; lia.
-  }
-  cbn.
-  f_equal.
-  unfold Int.divu.
-  assert(Rabs
-             (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa.
-
-  assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR.
-  {
-    unfold prod.
-    pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2.
-    rewrite C0E in C2.
-    rewrite Rlt_bool_true in C2; cycle 1.
-    { clear C2.
-      apply (Rabs_relax (bpow radix2 64)).
-      { apply bpow_lt. reflexivity. }
-      cbn.
-      fold inv_b_r.
-      replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
-      set (delta := (inv_b_r - 1 / IZR b')%R) in *.
-      unfold approx_inv_thresh in *.
-      gappa.
-    }
-    destruct C2 as (C2E & C2F & _).
-    rewrite C2E.
-    fold inv_b_r a' b'.
-    replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
-               (IZR a' / IZR b'))%R with
-      (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
-          (IZR a' * inv_b_r)) +
-         (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra).
-    set(delta :=  (inv_b_r - 1 / IZR b')%R) in *.
-    cbn. 
-    unfold approx_inv_thresh in *.
-    assert (Rabs(IZR a' * delta) <= 3/8)%R as R2.
-    { apply Rabs_le.
-      split; nra.
-    }
-    rewrite <- C1E.
-    rewrite <- C1E in R1.
-    pose proof (Rabs_triang (prod' - IZR a' * inv_b_r)  (IZR a' * delta))%R.
-    lra.
-  }
-  pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT.
-  rewrite <- DIV_CORRECT.
-
-  unfold div_approx_reals in *.
-  fold a' b' prod' prod_r.
-  unfold Int64.mul.
-  rewrite Int64.unsigned_repr by (cbn; lia).
-  rewrite Int64.unsigned_repr by (cbn; lia).
-  unfold Int64.sub.
-  rewrite Int64.unsigned_repr by (cbn; lia).
-  rewrite Int64.unsigned_repr by (cbn; nia).
-  assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed).
-  { cbn.
-    unfold prod_r.
-    rewrite <- C1E in R1.
-    assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R.
-    2: split; apply le_IZR; lra.
-    rewrite minus_IZR.
-    rewrite mult_IZR.
-    replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring.
-    pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2.
-    set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *.
-    replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b')
-                        + IZR a' / IZR b')%R by (field; lra).
-    set (delta2 :=  (inv_b_r - 1 / IZR b')%R) in *.
-    set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *.
-    replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with
-      (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra).
-    unfold approx_inv_thresh in *.
-    assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4.
-    { apply Rabs_le.
-      split; 
-        nra. }
-    set (delta4 := (IZR a' * delta2)%R) in *.
-    apply Rabs_def2b in R1.
-    apply Rabs_def2b in R2.
-    apply Rabs_def2b in R4.
-    split; nra.
-  }
-  fold a' b' prod_r in DIV_CORRECT.
-
-  pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP.
-  destruct (Z.ltb (a' - prod_r * b') 0).
-  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1.
-    { unfold Int64.lt.
-      change (Int64.signed Int64.zero) with 0.
-      rewrite Int64.signed_repr by exact FMA_RANGE.
-      rewrite zlt_true by lia.
-      reflexivity.
-    }
-    cbn.
-    f_equal.
-    rewrite Int64.add_signed.
-    rewrite (Int64.signed_repr prod_r) by (cbn ; lia).
-    rewrite Int64.signed_repr by (cbn ; lia).
-    unfold Int64.loword.
-    rewrite Int64.unsigned_repr. reflexivity.
-    split.
-    2: cbn; lia.
-    replace (prod_r + (-1)) with (prod_r - 1) by lia.
-    rewrite DIV_CORRECT.
-    apply Z.div_pos; lia.
-    
-  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1.
-    { unfold Int64.lt.
-      change (Int64.signed Int64.zero) with 0.
-      rewrite Int64.signed_repr by exact FMA_RANGE.
-      rewrite zlt_false by lia.
-      reflexivity.
-    }
-    cbn.
-    unfold Int64.loword.
-    rewrite Int64.unsigned_repr by (cbn; lia).
-    reflexivity.
-Qed.
-    
-    
diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp
index f529907d..70941c48 100644
--- a/kvx/SelectOp.vp
+++ b/kvx/SelectOp.vp
@@ -742,6 +742,8 @@ Nondetfunction gen_fmaf args :=
   | _ => None
   end.
 
+Require FPDivision32.
+
 Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
   match b with
   | BI_fmin => Some (Eop Ominf args)
@@ -752,6 +754,10 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
   | BI_fmaf => gen_fmaf args
   | BI_lround_ne => Some (Eop Olongoffloat_ne args)
   | BI_luround_ne => Some (Eop Olonguoffloat_ne args)
+  | BI_fp_udiv32 => (match args with
+                     | a:::b:::Enil => Some (FPDivision32.fp_divu32 a b)
+                     | _ => None
+                     end)
   end.
 End SELECT.
 
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
index a374ec54..658bf0b3 100644
--- a/kvx/SelectOpproof.v
+++ b/kvx/SelectOpproof.v
@@ -1883,6 +1883,9 @@ Proof.
       destruct v2; simpl; trivial.
 Qed.
 
+
+Require FPDivision32.
+
 Theorem eval_platform_builtin:
   forall bf al a vl v le,
   platform_builtin bf al = Some a ->
@@ -1903,7 +1906,29 @@ Proof.
   - cbn in *;
       destruct vl; trivial. destruct vl; trivial.
       destruct v0; try discriminate;
-      cbn; rewrite H0; reflexivity.
+        cbn; rewrite H0; reflexivity.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int.eq in EVAL.
+    change (Int.unsigned Int.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vint (Int.divu i i0)). split.
+    {
+      apply FPDivision32.fp_divu32_correct; auto.
+      pose proof (Int.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
 Qed.
 
 End CMCONSTR.