aboutsummaryrefslogtreecommitdiffstats
path: root/kvx
diff options
context:
space:
mode:
Diffstat (limited to 'kvx')
-rw-r--r--kvx/Archi.v2
-rw-r--r--kvx/Asm.v12
-rw-r--r--kvx/Asmblockdeps.v4
-rw-r--r--kvx/Asmblockgen.v12
-rw-r--r--kvx/Asmvliw.v9
-rw-r--r--kvx/Builtins1.v18
-rw-r--r--kvx/CBuiltins.ml6
-rw-r--r--kvx/ExtFloats.v5
-rw-r--r--kvx/ExtIEEE754.v12
-rw-r--r--kvx/ExtValues.v65
-rw-r--r--kvx/ExtZ.v12
-rw-r--r--kvx/FPDivision32.v588
-rw-r--r--kvx/FPDivision64.v2053
-rw-r--r--kvx/NeedOp.v2
-rw-r--r--kvx/Op.v30
-rw-r--r--kvx/PostpassSchedulingOracle.ml19
-rw-r--r--kvx/SelectOp.vp8
-rw-r--r--kvx/SelectOpproof.v33
-rw-r--r--kvx/TargetPrinter.ml8
-rw-r--r--kvx/ValueAOp.v4
20 files changed, 2889 insertions, 13 deletions
diff --git a/kvx/Archi.v b/kvx/Archi.v
index 6d59a3d1..b1739421 100644
--- a/kvx/Archi.v
+++ b/kvx/Archi.v
@@ -16,7 +16,7 @@
(** Architecture-dependent parameters for MPPA KVX. Mostly copied from the Risc-V backend *)
Require Import ZArith List.
-Require Import Binary Bits.
+From Flocq Require Import Binary Bits.
Definition ptr64 := true.
diff --git a/kvx/Asm.v b/kvx/Asm.v
index fd20316c..333854cf 100644
--- a/kvx/Asm.v
+++ b/kvx/Asm.v
@@ -163,6 +163,8 @@ Inductive instruction : Type :=
| Pfloatuwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (u32 -> 32) *)
| Pfloatudrnsz (rd rs: ireg) (**r Floating Point Conversion from unsigned integer (64 bits) *)
| Pfloatdrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (64 bits) *)
+
+ (* round to zero *)
| Pfixedwrzz (rd rs: ireg) (**r Integer conversion from floating point *)
| Pfixeduwrzz (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
| Pfixeddrzz (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *)
@@ -170,6 +172,12 @@ Inductive instruction : Type :=
| Pfixedudrzz (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
| Pfixedudrzz_i32 (rd rs: ireg) (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *)
+ (* round to nearest, prefer even numbers *)
+ | Pfixedwrne (rd rs: ireg) (**r Integer conversion from floating point *)
+ | Pfixeduwrne (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+ | Pfixeddrne (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *)
+ | Pfixedudrne (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
+
(** Arith RI32 *)
| Pmake (rd: ireg) (imm: int) (**r load immediate *)
@@ -352,6 +360,10 @@ Definition basic_to_instruction (b: basic) :=
| PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs
| PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs
| PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs
+ | PArithRR Asmvliw.Pfixedwrne rd rs => Pfixedwrne rd rs
+ | PArithRR Asmvliw.Pfixeduwrne rd rs => Pfixeduwrne rd rs
+ | PArithRR Asmvliw.Pfixeddrne rd rs => Pfixeddrne rd rs
+ | PArithRR Asmvliw.Pfixedudrne rd rs => Pfixedudrne rd rs
(* RI32 *)
| PArithRI32 Asmvliw.Pmake rd imm => Pmake rd imm
diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v
index a9786e0a..ac0b359e 100644
--- a/kvx/Asmblockdeps.v
+++ b/kvx/Asmblockdeps.v
@@ -1537,6 +1537,10 @@ Definition string_of_name_rr (n: arith_name_rr): pstring :=
| Pfixedudrzz => "Pfixedudrzz"
| Pfixeddrzz_i32 => "Pfixeddrzz_i32"
| Pfixedudrzz_i32 => "Pfixedudrzz_i32"
+ | Pfixedwrne => "Pfixedwrne"
+ | Pfixeduwrne => "Pfixeduwrne"
+ | Pfixeddrne => "Pfixeddrne"
+ | Pfixedudrne => "Pfixedudrne"
end.
Definition string_of_name_ri32 (n: arith_name_ri32): pstring :=
diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v
index ab827b1c..a8f1a045 100644
--- a/kvx/Asmblockgen.v
+++ b/kvx/Asmblockgen.v
@@ -799,6 +799,12 @@ Definition transl_op
| Ointuofsingle, a1 :: nil =>
do rd <- ireg_of res; do rs <- freg_of a1;
OK (Pfixeduwrzz rd rs ::i k)
+ | Ointofsingle_ne, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixedwrne rd rs ::i k)
+ | Ointuofsingle_ne, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixeduwrne rd rs ::i k)
| Olongoffloat, a1 :: nil =>
do rd <- ireg_of res; do rs <- freg_of a1;
OK (Pfixeddrzz rd rs ::i k)
@@ -811,6 +817,12 @@ Definition transl_op
| Olonguoffloat, a1 :: nil =>
do rd <- ireg_of res; do rs <- freg_of a1;
OK (Pfixedudrzz rd rs ::i k)
+ | Olongoffloat_ne, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixeddrne rd rs ::i k)
+ | Olonguoffloat_ne, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixedudrne rd rs ::i k)
| Ofloatofsingle, a1 :: nil =>
do rd <- freg_of res; do rs <- freg_of a1;
diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v
index 45b230e6..6d60445a 100644
--- a/kvx/Asmvliw.v
+++ b/kvx/Asmvliw.v
@@ -402,7 +402,10 @@ Inductive arith_name_rr : Type :=
| Pfixedudrzz (**r Integer conversion from floating point (float -> unsigned long) *)
| Pfixeddrzz_i32 (**r Integer conversion from floating point (float -> int) *)
| Pfixedudrzz_i32 (**r Integer conversion from floating point (float -> unsigned int) *)
-.
+ | Pfixedwrne (**r Integer conversion from floating point *)
+ | Pfixeduwrne (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+ | Pfixeddrne (**r Integer conversion from floating point (i64 -> 64 bits) *)
+ | Pfixedudrne. (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
Inductive arith_name_ri32 : Type :=
| Pmake (**r load immediate *)
@@ -955,6 +958,10 @@ Definition arith_eval_rr n v :=
| Pfixedudrzz => Val.maketotal (Val.longuoffloat v)
| Pfixeddrzz_i32 => Val.maketotal (Val.intoffloat v)
| Pfixedudrzz_i32 => Val.maketotal (Val.intuoffloat v)
+ | Pfixedudrne => Val.maketotal (Val.longuoffloat_ne v)
+ | Pfixeddrne => Val.maketotal (Val.longoffloat_ne v)
+ | Pfixeduwrne => Val.maketotal (Val.intuofsingle_ne v)
+ | Pfixedwrne => Val.maketotal (Val.intofsingle_ne v)
end.
Definition arith_eval_ri32 n i :=
diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v
index 441345bf..163dcbd8 100644
--- a/kvx/Builtins1.v
+++ b/kvx/Builtins1.v
@@ -25,7 +25,10 @@ Inductive platform_builtin : Type :=
| BI_fminf
| BI_fmaxf
| BI_fma
-| BI_fmaf.
+| BI_fmaf
+| BI_lround_ne
+| BI_luround_ne
+| BI_fp_udiv32.
Local Open Scope string_scope.
@@ -36,6 +39,9 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
:: ("__builtin_fmaxf", BI_fmaxf)
:: ("__builtin_fma", BI_fma)
:: ("__builtin_fmaf", BI_fmaf)
+ :: ("__builtin_lround_ne", BI_lround_ne)
+ :: ("__builtin_luround_ne", BI_luround_ne)
+ :: ("__builtin_fp_udiv32", BI_fp_udiv32)
:: nil.
Definition platform_builtin_sig (b: platform_builtin) : signature :=
@@ -48,6 +54,10 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default
| BI_fmaf =>
mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default
+ | BI_lround_ne | BI_luround_ne =>
+ mksignature (Tfloat :: nil) Tlong cc_default
+ | BI_fp_udiv32 =>
+ mksignature (Tint :: Tint :: nil) Tint cc_default
end.
Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
@@ -58,4 +68,10 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (pl
| BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
| BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
| BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
+ | BI_lround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_long_ne
+ | BI_luround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_longu_ne
+ | BI_fp_udiv32 => mkbuiltin_n2p Tint Tint Tint
+ (fun n1 n2 => if Int.eq n2 Int.zero
+ then None
+ else Some (Int.divu n1 n2))
end.
diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml
index 7398e0f4..fad86d3c 100644
--- a/kvx/CBuiltins.ml
+++ b/kvx/CBuiltins.ml
@@ -133,6 +133,12 @@ let builtins = {
"__builtin_fmaf",
(TFloat(FFloat, []),
[TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false);
+ "__builtin_lround_ne",
+ (TInt(ILong, []), [TFloat(FDouble, [])], false);
+ "__builtin_luround_ne",
+ (TInt(IULong, []), [TFloat(FDouble, [])], false);
+ "__builtin_fp_udiv32",
+ (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false);
]
}
diff --git a/kvx/ExtFloats.v b/kvx/ExtFloats.v
index b08503a5..332d3e3d 100644
--- a/kvx/ExtFloats.v
+++ b/kvx/ExtFloats.v
@@ -13,6 +13,8 @@
(* *)
(* *************************************************************)
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+ Binary Round_odd.
Require Import Floats Integers ZArith.
Module ExtFloat.
@@ -30,6 +32,8 @@ Definition max (x : float) (y : float) : float :=
| Some Eq | Some Gt => x
| Some Lt | None => y
end.
+
+Definition one := Float.of_int (Int.repr (1%Z)).
End ExtFloat.
Module ExtFloat32.
@@ -50,5 +54,4 @@ Definition max (x : float32) (y : float32) : float32 :=
Definition one := Float32.of_int (Int.repr (1%Z)).
Definition inv (x : float32) : float32 :=
Float32.div one x.
-
End ExtFloat32.
diff --git a/kvx/ExtIEEE754.v b/kvx/ExtIEEE754.v
new file mode 100644
index 00000000..095fd0cc
--- /dev/null
+++ b/kvx/ExtIEEE754.v
@@ -0,0 +1,12 @@
+Require Import Coq.ZArith.Zdiv.
+
+Open Scope Z.
+
+Definition Zdiv_ne (a b : Z) :=
+ let q := Z.div a b in
+ let q1 := Z.succ q in
+ match Z.compare (a-b*q) (b*q1-a) with
+ | Lt => q
+ | Gt => q1
+ | Eq => (if Z.even q then q else q1)
+ end.
diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v
index b4e14898..c478f70b 100644
--- a/kvx/ExtValues.v
+++ b/kvx/ExtValues.v
@@ -754,3 +754,68 @@ Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1).
Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1).
Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1).
+
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+ Binary Round_odd.
+Require Import IEEE754_extra Zdiv Psatz Floats ExtFloats.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+ let q:=ZnearestE x in
+ let r:=a-q*b in
+ if r <? 0
+ then q-1
+ else q.
+
+Lemma floor_ball1:
+ forall x : R, forall y : Z,
+ (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+ intros x y BALL.
+ apply Rabs_lt_inv in BALL.
+ case (Rcompare_spec x (IZR y)); intro CMP.
+ - left. apply Zfloor_imp.
+ ring_simplify (y-1+1).
+ rewrite minus_IZR. lra.
+ - subst.
+ rewrite Zfloor_IZR. right. reflexivity.
+ - right. apply Zfloor_imp.
+ rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+ forall a b : Z, forall x : R,
+ b > 0 ->
+ (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+ div_approx_reals a b x = (a/b)%Z.
+Proof.
+ intros a b x bPOS GAP.
+ assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+ unfold div_approx_reals.
+ pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+ assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+ { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+ (x - IZR a/ IZR b)) as TRI.
+ ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+ lra.
+ }
+ clear GAP NEAR.
+ rewrite Rabs_minus_sym in BALL.
+ pose proof (floor_ball1 _ _ BALL) as FLOOR.
+ clear BALL.
+ rewrite Zfloor_div in FLOOR by lia.
+ pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+ assert (0 < b) as bPOS' by lia.
+ pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+ case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Definition my_div (a b : val) :=
+ let b_d := Val.maketotal (Val.floatofintu b) in
+ let invb_d := Val.floatofsingle (invfs (Val.maketotal (Val.singleofintu b))) in
+ let alpha := fmsubf (Vfloat ExtFloat.one) invb_d b_d in
+ let x := fmaddf invb_d alpha invb_d in
+ Val.mulf (Val.maketotal (Val.floatofintu a)) x.
+
+(*
+Compute (my_div (Vint (Int.repr 3)) (Vint (Int.repr 5))).
+*)
diff --git a/kvx/ExtZ.v b/kvx/ExtZ.v
new file mode 100644
index 00000000..095fd0cc
--- /dev/null
+++ b/kvx/ExtZ.v
@@ -0,0 +1,12 @@
+Require Import Coq.ZArith.Zdiv.
+
+Open Scope Z.
+
+Definition Zdiv_ne (a b : Z) :=
+ let q := Z.div a b in
+ let q1 := Z.succ q in
+ match Z.compare (a-b*q) (b*q1-a) with
+ | Lt => q
+ | Gt => q1
+ | Eq => (if Z.even q then q else q1)
+ end.
diff --git a/kvx/FPDivision32.v b/kvx/FPDivision32.v
new file mode 100644
index 00000000..5cda1077
--- /dev/null
+++ b/kvx/FPDivision32.v
@@ -0,0 +1,588 @@
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+ Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+
+From Gappa Require Import Gappa_tactic.
+
+Local Open Scope cminorsel_scope.
+
+Definition approx_inv b :=
+ let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in
+ let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in
+ let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in
+ let invb_d_var := Eletvar (0%nat) in
+ let one := Eop (Ofloatconst ExtFloat.one) Enil in
+ let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in
+ let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in
+ Elet b (Elet invb_d x).
+
+Definition approx_inv_thresh := (1/17179869184)%R.
+
+(*
+Lemma BofZ_exact_zero:
+ forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec)
+ (Hmax : (prec < emax)%Z),
+ B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\
+ is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\
+ Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false.
+Proof.
+ intros.
+ apply BofZ_exact.
+ pose proof (Z.pow_nonneg 2 prec).
+ lia.
+Qed.
+ *)
+
+Lemma Rabs_relax:
+ forall b b' (INEQ : (b < b')%R) x,
+ (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+ intros.
+ apply Rabs_lt.
+ lra.
+Qed.
+
+Theorem approx_inv_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_b : expr) (b : int)
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : ((Int.unsigned b) > 0)%Z),
+ exists f : float,
+ eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\
+ is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R.
+Proof.
+ intros. unfold approx_inv.
+ econstructor. constructor.
+ { repeat econstructor.
+ { eassumption. }
+ { reflexivity. } }
+ set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))).
+ set (b' := Int.unsigned b) in *.
+ pose proof (Int.unsigned_range b) as RANGE.
+ fold b' in RANGE.
+ change Int.modulus with 4294967296%Z in RANGE.
+ assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE.
+ { change Int64.max_unsigned with 18446744073709551615%Z.
+ lia. }
+ assert (1 <= IZR b' <= 4294967295)%R as RANGE'.
+ { split.
+ { apply IZR_le. lia. }
+ apply IZR_le. lia.
+ }
+ cbn.
+
+ set (b_d := (Float.of_longu (Int64.repr b'))) in *.
+ Local Transparent Float.of_longu.
+ unfold Float.of_longu in b_d.
+
+ assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia.
+ destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _).
+ clear SILLY.
+
+ assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia.
+ destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _).
+ clear SILLY.
+
+ pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1. cbn.
+ eapply (Rabs_relax (IZR 4294967296)).
+ lra.
+ gappa.
+ }
+ rewrite Zlt_bool_false in C1 by lia.
+ destruct C1 as (C1E & C1F & _).
+
+ Local Transparent Float32.of_intu Float32.of_int Float32.div.
+ unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d.
+ fold b' in invb_d.
+ change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d.
+ pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2. rewrite C1E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { cbn; lra. }
+ unfold F2R. cbn. unfold F2R. cbn.
+ gappa.
+ }
+ assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ.
+ { clear C2.
+ rewrite C1E.
+ cbn.
+ assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa.
+ lra.
+ }
+ destruct (C2 NONZ) as (C2E & C2F & _).
+ clear C2 NONZ.
+ Local Transparent Float.of_single.
+ unfold Float.of_single in invb_d.
+ pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE
+ (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) 1)
+ (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) b'))) as C3.
+ fold invb_d in C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F.
+ destruct (C3 C2F) as (C3E & C3F & _).
+ clear C3.
+ unfold Float.fma.
+ assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F.
+ { Local Transparent Float.neg.
+ unfold Float.neg.
+ rewrite is_finite_Bopp.
+ assumption.
+ }
+
+ assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia.
+ destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _).
+ clear SILLY.
+
+ assert (is_finite 53 1024 b_d = true) as b_d_F.
+ { unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ assumption.
+ }
+
+ assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity.
+
+ pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt)
+ (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5.
+
+ rewrite Rlt_bool_true in C5; cycle 1.
+ { clear C5.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold ExtFloat.one.
+ change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+ rewrite C9E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ destruct C5 as (C5E & C5F & _).
+
+ pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+ (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6.
+ rewrite Rlt_bool_true in C6; cycle 1.
+ { clear C6.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ rewrite C5E.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ unfold ExtFloat.one.
+ change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+ rewrite C9E.
+ apply (Rabs_relax (bpow radix2 10)).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ destruct C6 as (C6E & C6F & _).
+ split.
+ { exact C6F. }
+ rewrite C6E.
+ rewrite C5E.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C1E.
+ rewrite C0E.
+ unfold Float.neg.
+ rewrite B2R_Bopp.
+ unfold ExtFloat.one.
+ Local Transparent Float.of_int.
+ unfold Float.of_int.
+ rewrite (Int.signed_repr 1) by (cbn ; lia).
+ rewrite C9E.
+ rewrite C3E.
+ rewrite C2E.
+ rewrite C0E.
+ rewrite C1E.
+ unfold b_d.
+ rewrite Int64.unsigned_repr by lia.
+ rewrite C4E.
+ cbn.
+ set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *.
+ set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *.
+ set (bi := IZR b') in *.
+ set (invb0 := rd (rs (1/ rs bi))%R) in *.
+ set (alpha := (- invb0 * bi + 1)%R) in *.
+ set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *.
+ assert (alpha = alpha')%R as expand_alpha.
+ {
+ unfold alpha, alpha', invb0.
+ field.
+ lra.
+ }
+ assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND.
+ { unfold alpha', rd, rs.
+ gappa.
+ }
+ set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R).
+ assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND.
+ { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs.
+ gappa.
+ }
+ replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with
+ (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring).
+ replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring.
+ replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1.
+ { unfold alpha.
+ field.
+ lra.
+ }
+ rewrite expand_alpha.
+ unfold invb0, rd, rs, approx_inv_thresh.
+ apply Rabs_le.
+ gappa.
+Qed.
+
+Definition fp_divu32 a b :=
+ let a_var := Eletvar (1%nat) in
+ let b_var := Eletvar (0%nat) in
+ let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in
+ let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in
+ let qr_var := Eletvar 0%nat in
+ let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil)):::
+ qr_var :::
+ (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in
+ let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in
+ let cases := Eop (Osel (Ccompl0 Clt) Tlong)
+ (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *)
+ Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil).
+
+Open Scope Z.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+ let q:=ZnearestE x in
+ let r:=a-q*b in
+ if r <? 0
+ then q-1
+ else q.
+
+Lemma floor_ball1:
+ forall x : R, forall y : Z,
+ (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+ intros x y BALL.
+ apply Rabs_lt_inv in BALL.
+ case (Rcompare_spec x (IZR y)); intro CMP.
+ - left. apply Zfloor_imp.
+ ring_simplify (y-1+1).
+ rewrite minus_IZR. lra.
+ - subst.
+ rewrite Zfloor_IZR. right. reflexivity.
+ - right. apply Zfloor_imp.
+ rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+ forall a b : Z, forall x : R,
+ b > 0 ->
+ (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+ div_approx_reals a b x = (a/b)%Z.
+Proof.
+ intros a b x bPOS GAP.
+ assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+ unfold div_approx_reals.
+ pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+ assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+ { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+ (x - IZR a/ IZR b)) as TRI.
+ ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+ lra.
+ }
+ clear GAP NEAR.
+ rewrite Rabs_minus_sym in BALL.
+ pose proof (floor_ball1 _ _ BALL) as FLOOR.
+ clear BALL.
+ rewrite Zfloor_div in FLOOR by lia.
+ pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+ assert (0 < b) as bPOS' by lia.
+ pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+ case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Opaque approx_inv.
+
+Theorem fp_divu32_correct :
+ forall (ge : genv) (sp: val) cmenv memenv
+ (le : letenv) (expr_a expr_b : expr) (a b : int)
+ (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+ (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+ (b_nz : (Int.unsigned b > 0)%Z),
+ eval_expr ge sp cmenv memenv le (fp_divu32 expr_a expr_b)
+ (Vint (Int.divu a b)).
+Proof.
+ intros.
+ assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le)
+ (Eletvar 0) (Vint b)) as EVAL_b'.
+ { constructor. reflexivity. }
+ destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct).
+ unfold fp_divu32.
+ repeat econstructor.
+ { exact EVAL_a. }
+ { apply eval_lift. exact EVAL_b. }
+ exact inv_b_eval.
+ cbn. f_equal.
+ rewrite <- Float.of_intu_of_longu.
+ unfold Float.to_longu_ne.
+ rewrite ZofB_ne_range_correct by lia.
+ set (prod := (Float.mul (Float.of_intu a) inv_b)).
+ pose proof (Int.unsigned_range a) as a_range.
+ pose proof (Int.unsigned_range b) as b_range.
+ change Int.modulus with 4294967296 in a_range.
+ change Int.modulus with 4294967296 in b_range.
+ set (prod' := (B2R _ _ prod)).
+ set (prod_r := ZnearestE prod') in *.
+
+ Local Transparent Float.mul Float.of_intu.
+ unfold Float.mul, Float.of_intu in prod.
+ set (a' := Int.unsigned a) in *.
+ set (b' := Int.unsigned b) in *.
+ assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia. }
+ assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R).
+ { split; apply IZR_le; lia. }
+ assert (SILLY : -2^53 <= a' <= 2^53).
+ { cbn; lia. }
+ destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E & C0F & _).
+ clear SILLY.
+ pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE
+ (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1.
+ set (inv_b_r := B2R 53 1024 inv_b) in *.
+ assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R).
+ { split; unfold Rdiv.
+ - apply Rmult_le_compat_l. lra.
+ apply Rinv_le. apply IZR_lt. lia.
+ apply IZR_le. lia.
+ - replace 1%R with (1 / 1)%R at 2 by field.
+ apply Rmult_le_compat_l. lra.
+ apply Rinv_le. apply IZR_lt. lia.
+ apply IZR_le. lia.
+ }
+ apply Rabs_def2b in inv_b_correct.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1.
+ rewrite C0E.
+ apply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. lia. }
+ replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+ unfold approx_inv_thresh in inv_b_correct.
+ cbn.
+ assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ gappa.
+ }
+ rewrite C0F in C1.
+ cbn in C1.
+ rewrite C0E in C1.
+ rewrite inv_b_finite in C1.
+ fold prod in C1.
+ fold prod' in C1.
+ destruct C1 as (C1E & C1F & _).
+ rewrite C1F. cbn.
+
+ assert(prod'_range : (0 <= prod' <= 17179869181/4)%R).
+ {
+ rewrite C1E.
+ replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+ { split; apply IZR_le; lia.
+ }
+ unfold approx_inv_thresh in inv_b_correct.
+ set (true_inv := (1 / IZR b')%R) in *.
+ set (delta := (inv_b_r - true_inv)%R) in *.
+ gappa.
+ }
+
+ assert(0 <= prod_r <= 4294967295) as prod_r_range.
+ { unfold prod_r.
+ rewrite <- (Znearest_IZR (fun x => negb (Z.even x)) 0).
+ replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1.
+ { apply Znearest_imp.
+ apply Rabs_lt.
+ lra.
+ }
+ split; apply Znearest_le; lra.
+ }
+
+ replace (_ && _ ) with true; cycle 1.
+ {
+ symmetry.
+ rewrite andb_true_iff.
+ split; apply Zle_imp_le_bool; lia.
+ }
+ cbn.
+ f_equal.
+ unfold Int.divu.
+ assert(Rabs
+ (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa.
+
+ assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR.
+ {
+ unfold prod.
+ pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2.
+ rewrite C0E in C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. reflexivity. }
+ cbn.
+ fold inv_b_r.
+ replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+ set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+ unfold approx_inv_thresh in *.
+ gappa.
+ }
+ destruct C2 as (C2E & C2F & _).
+ rewrite C2E.
+ fold inv_b_r a' b'.
+ replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+ (IZR a' / IZR b'))%R with
+ (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+ (IZR a' * inv_b_r)) +
+ (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra).
+ set(delta := (inv_b_r - 1 / IZR b')%R) in *.
+ cbn.
+ unfold approx_inv_thresh in *.
+ assert (Rabs(IZR a' * delta) <= 3/8)%R as R2.
+ { apply Rabs_le.
+ split; nra.
+ }
+ rewrite <- C1E.
+ rewrite <- C1E in R1.
+ pose proof (Rabs_triang (prod' - IZR a' * inv_b_r) (IZR a' * delta))%R.
+ lra.
+ }
+ pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT.
+ rewrite <- DIV_CORRECT.
+
+ unfold div_approx_reals in *.
+ fold a' b' prod' prod_r.
+ unfold Int64.mul.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ unfold Int64.sub.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ rewrite Int64.unsigned_repr by (cbn; nia).
+ assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed).
+ { cbn.
+ unfold prod_r.
+ rewrite <- C1E in R1.
+ assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R.
+ 2: split; apply le_IZR; lra.
+ rewrite minus_IZR.
+ rewrite mult_IZR.
+ replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring.
+ pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2.
+ set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *.
+ replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b')
+ + IZR a' / IZR b')%R by (field; lra).
+ set (delta2 := (inv_b_r - 1 / IZR b')%R) in *.
+ set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *.
+ replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with
+ (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra).
+ unfold approx_inv_thresh in *.
+ assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4.
+ { apply Rabs_le.
+ split;
+ nra. }
+ set (delta4 := (IZR a' * delta2)%R) in *.
+ apply Rabs_def2b in R1.
+ apply Rabs_def2b in R2.
+ apply Rabs_def2b in R4.
+ split; nra.
+ }
+ fold a' b' prod_r in DIV_CORRECT.
+
+ pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP.
+ destruct (Z.ltb (a' - prod_r * b') 0).
+ - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1.
+ { unfold Int64.lt.
+ change (Int64.signed Int64.zero) with 0.
+ rewrite Int64.signed_repr by exact FMA_RANGE.
+ rewrite zlt_true by lia.
+ reflexivity.
+ }
+ cbn.
+ f_equal.
+ rewrite Int64.add_signed.
+ rewrite (Int64.signed_repr prod_r) by (cbn ; lia).
+ rewrite Int64.signed_repr by (cbn ; lia).
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr. reflexivity.
+ split.
+ 2: cbn; lia.
+ replace (prod_r + (-1)) with (prod_r - 1) by lia.
+ rewrite DIV_CORRECT.
+ apply Z.div_pos; lia.
+
+ - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1.
+ { unfold Int64.lt.
+ change (Int64.signed Int64.zero) with 0.
+ rewrite Int64.signed_repr by exact FMA_RANGE.
+ rewrite zlt_false by lia.
+ reflexivity.
+ }
+ cbn.
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr by (cbn; lia).
+ reflexivity.
+Qed.
+
diff --git a/kvx/FPDivision64.v b/kvx/FPDivision64.v
new file mode 100644
index 00000000..4e91b853
--- /dev/null
+++ b/kvx/FPDivision64.v
@@ -0,0 +1,2053 @@
+(*
+This needs a special gappa script
+
+#!/bin/sh
+/home/monniaux/.opam/4.12.0+flambda/bin/gappa -Eprecision=100 "$@"
+
+in PATH before the normal gappa
+ *)
+
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+ Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+Require Import Memory.
+
+From Gappa Require Import Gappa_tactic.
+
+
+Lemma Znearest_lub :
+ forall choice (n : Z) (x : R), (IZR n <= x)%R -> (n <= Znearest choice x)%Z.
+Proof.
+ intros until x. intro BND.
+ pose proof (Zfloor_lub n x BND).
+ pose proof (Znearest_ge_floor choice x).
+ lia.
+Qed.
+
+Lemma Znearest_glb :
+ forall choice (n : Z) (x : R), (x <= IZR n)%R -> (Znearest choice x <= n)%Z.
+Proof.
+ intros until x. intro BND.
+ pose proof (Zceil_glb n x BND).
+ pose proof (Znearest_le_ceil choice x).
+ lia.
+Qed.
+
+Definition approx_inv_longu b :=
+ let invb_s := ExtValues.invfs (Val.maketotal (Val.singleoflongu b)) in
+ let invb_d := Val.floatofsingle invb_s in
+ let b_d := Val.maketotal (Val.floatoflongu b) in
+ let one := Vfloat (ExtFloat.one) in
+ let alpha := ExtValues.fmsubf one invb_d b_d in
+ ExtValues.fmaddf invb_d alpha invb_d.
+
+Lemma Rabs_relax:
+ forall b b' (INEQ : (b < b')%R) x,
+ (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+ intros.
+ apply Rabs_lt.
+ lra.
+Qed.
+
+Definition approx_inv_thresh := (25/2251799813685248)%R.
+(* 1.11022302462516e-14 *)
+
+Theorem approx_inv_longu_correct_abs :
+ forall b,
+ (0 < Int64.unsigned b)%Z ->
+ exists (f : float),
+ (approx_inv_longu (Vlong b)) = Vfloat f /\
+ is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int64.unsigned b))) <= approx_inv_thresh)%R.
+Proof.
+ intros b NONZ.
+ unfold approx_inv_longu.
+ cbn.
+ econstructor.
+ split.
+ reflexivity.
+ Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int.
+ unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+ change (Int.signed (Int.repr 1)) with 1%Z.
+ set (b' := Int64.unsigned b) in *.
+ pose proof (Int64.unsigned_range b) as RANGE.
+ change Int64.modulus with 18446744073709551616%Z in RANGE.
+ assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'.
+ { split; apply IZR_le; lia.
+ }
+
+ assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia.
+ destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _).
+ clear SILLY.
+ set (one_s := (BofZ 24 128 re re 1)) in *.
+
+ pose proof (BofZ_correct 24 128 re re b') as C1.
+ cbn in C1.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C1 as (C1R & C1F & _).
+ set (b_s := (BofZ 24 128 re re b')) in *.
+
+ assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE.
+ { rewrite C1R.
+ gappa.
+ }
+ assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra.
+
+ pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := 1%R).
+ { cbn; lra. }
+ rewrite C0R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ cbn.
+ gappa.
+ }
+
+ destruct C2 as (C2R & C2F & _).
+ set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *.
+ rewrite C0F in C2F.
+
+ assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE.
+ { rewrite C2R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ rewrite C0R.
+ cbn.
+ gappa.
+ }
+
+ pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ set (r_invb_s := (B2R 24 128 invb_s)) in *.
+ apply Rabs_relax with (b := 1%R).
+ { replace 1%R with (bpow radix2 0)%R by reflexivity.
+ apply bpow_lt.
+ lia.
+ }
+ cbn.
+ gappa.
+ }
+
+ destruct C3 as (C3R & C3F & _).
+ set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *.
+ assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE.
+ {
+ rewrite C3R.
+ set (r_invb_s := B2R 24 128 invb_s) in *.
+ cbn.
+ gappa.
+ }
+
+ pose proof (is_finite_Bopp 53 1024 Float.neg_nan invb_d) as opp_finite.
+ rewrite C3F in opp_finite.
+
+ pose proof (BofZ_correct 53 1024 re re 1) as C4.
+ rewrite Rlt_bool_true in C4; cycle 1.
+ { clear C4.
+ cbn.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C4 as (C4R & C4F & _).
+
+ pose proof (BofZ_correct 53 1024 re re b') as C5.
+ cbn in C5.
+ rewrite Rlt_bool_true in C5; cycle 1.
+ { clear C5.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C5 as (C5R & C5F & _).
+ set (b_d := (BofZ 53 1024 re re b')) in *.
+
+ assert(1 <= B2R 53 1024 b_d <= 18446744073709551616)%R as b_d_RANGE.
+ { rewrite C5R.
+ gappa.
+ }
+
+ pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+ (Bopp 53 1024 Float.neg_nan invb_d) (BofZ 53 1024 re re b')
+ (BofZ 53 1024 re re 1) opp_finite C5F C4F) as C6.
+ rewrite Rlt_bool_true in C6; cycle 1.
+ { clear C6.
+ rewrite C4R.
+ rewrite B2R_Bopp.
+ cbn.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ { lra. }
+ fold invb_d.
+ fold b_d.
+ set (r_invb_d := B2R 53 1024 invb_d) in *.
+ set (r_b_d := B2R 53 1024 b_d) in *.
+ gappa.
+ }
+ fold b_d in C6.
+ destruct C6 as (C6R & C6F & _).
+
+ pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+ (Bfma 53 1024 re re Float.fma_nan mode_NE
+ (Bopp 53 1024 Float.neg_nan invb_d) b_d (BofZ 53 1024 re re 1))
+ invb_d invb_d C6F C3F C3F) as C7.
+ rewrite Rlt_bool_true in C7; cycle 1.
+ { clear C7.
+ rewrite C6R.
+ rewrite B2R_Bopp.
+ eapply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. lia. }
+ rewrite C4R.
+ cbn.
+ set (r_invb_d := B2R 53 1024 invb_d) in *.
+ set (r_b_d := B2R 53 1024 b_d) in *.
+ gappa.
+ }
+ destruct C7 as (C7R & C7F & _).
+
+ split. assumption.
+ rewrite C7R.
+ rewrite C6R.
+ rewrite C5R.
+ rewrite C4R.
+ rewrite B2R_Bopp.
+ rewrite C3R.
+ rewrite C2R.
+ rewrite C1R.
+ rewrite C0R.
+ cbn.
+ set(b1 := IZR b') in *.
+ replace (round radix2 (FLT_exp (-1074) 53) ZnearestE 1) with 1%R by gappa.
+ set (bd := round radix2 (FLT_exp (-1074) 53) ZnearestE b1).
+ set (x0 := round radix2 (FLT_exp (-1074) 53) ZnearestE
+ (round radix2 (FLT_exp (-149) 24) ZnearestE
+ (1 / round radix2 (FLT_exp (-149) 24) ZnearestE b1))).
+ set (alpha0 := (- x0 * bd + 1)%R).
+ set (y1 := (round radix2 (FLT_exp (-1074) 53) ZnearestE alpha0 * x0 + x0)%R).
+ set (x1 := round radix2 (FLT_exp (-1074) 53) ZnearestE y1).
+ replace (x1 - 1/b1)%R with ((y1-1/b1)+(x1-y1))%R by ring.
+
+ assert(alpha0 = -((x0-1/bd)/(1/bd)))%R as alpha0_EQ.
+ { unfold alpha0.
+ field.
+ unfold bd.
+ gappa.
+ }
+ assert(y1-1/b1 = ((round radix2 (FLT_exp (-1074) 53) ZnearestE alpha0)
+ - alpha0) * x0
+ + alpha0*(x0-1/b1) - ((bd-b1)/b1) * x0)%R as y1_EQ.
+ { unfold y1, alpha0.
+ field.
+ lra.
+ }
+ assert(Rabs alpha0 <= 257/2147483648)%R as alpha0_ABS.
+ { rewrite alpha0_EQ.
+ unfold x0, bd.
+ gappa.
+ }
+ assert (Rabs (x0 - 1 / b1) <= 3/33554432)%R as x0_delta_ABS.
+ { unfold x0.
+ gappa.
+ }
+ set (x0_delta := (x0 - 1 / b1)%R) in *.
+ assert (Rabs ((bd - b1) / b1) <= 1/9007199254740992)%R as bd_delta_ABS.
+ { unfold bd.
+ gappa.
+ }
+ set (bd_delta := ((bd - b1) / b1)%R) in *.
+ set (rnd_alpha0_delta := (round radix2 (FLT_exp (-1074) 53) ZnearestE alpha0 - alpha0)%R) in *.
+ assert (Rabs rnd_alpha0_delta <= 1/75557863725914323419136)%R as rnd_alpha0_delta_ABS.
+ { unfold rnd_alpha0_delta.
+ gappa.
+ }
+ assert (1/18446744073709551616 <= x0 <= 1)%R as x0_RANGE.
+ { unfold x0.
+ gappa.
+ }
+ assert (Rabs (y1 - 1 / b1) <= 49/4503599627370496)%R as y1_delta_ABS.
+ { rewrite y1_EQ.
+ gappa.
+ }
+ assert (Rabs(x1 - y1) <= 1/9007199254740992)%R as x1_delta_ABS.
+ { unfold x1.
+ gappa.
+ }
+ set (y1_delta := (y1 - 1 / b1)%R) in *.
+ set (x1_delta := (x1-y1)%R) in *.
+ unfold approx_inv_thresh.
+ gappa.
+Qed.
+
+Local Notation "'rd'" := (round radix2 (FLT_exp (-1074) 53) ZnearestE).
+Local Notation "'rs'" := (round radix2 (FLT_exp (-149) 24) ZnearestE).
+
+Definition approx_inv_rel_thresh := (1049/72057594037927936)%R.
+Theorem approx_inv_longu_correct_rel :
+ forall b,
+ (0 < Int64.unsigned b)%Z ->
+ exists (f : float),
+ (approx_inv_longu (Vlong b)) = Vfloat f /\
+ is_finite _ _ f = true /\ (Rabs(IZR (Int64.unsigned b) * (B2R _ _ f) - 1) <= approx_inv_rel_thresh)%R.
+Proof.
+ intros b NONZ.
+ unfold approx_inv_longu.
+ cbn.
+ econstructor.
+ split.
+ reflexivity.
+ Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int.
+ unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+ change (Int.signed (Int.repr 1)) with 1%Z.
+ set (b' := Int64.unsigned b) in *.
+ pose proof (Int64.unsigned_range b) as RANGE.
+ change Int64.modulus with 18446744073709551616%Z in RANGE.
+ assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'.
+ { split; apply IZR_le; lia.
+ }
+
+ assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia.
+ destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _).
+ clear SILLY.
+ set (one_s := (BofZ 24 128 re re 1)) in *.
+
+ pose proof (BofZ_correct 24 128 re re b') as C1.
+ cbn in C1.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C1 as (C1R & C1F & _).
+ set (b_s := (BofZ 24 128 re re b')) in *.
+
+ assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE.
+ { rewrite C1R.
+ gappa.
+ }
+ assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra.
+
+ pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := 1%R).
+ { cbn; lra. }
+ rewrite C0R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ cbn.
+ gappa.
+ }
+
+ destruct C2 as (C2R & C2F & _).
+ set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *.
+ rewrite C0F in C2F.
+
+ assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE.
+ { rewrite C2R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ rewrite C0R.
+ cbn.
+ gappa.
+ }
+
+ pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ set (r_invb_s := (B2R 24 128 invb_s)) in *.
+ apply Rabs_relax with (b := 1%R).
+ { replace 1%R with (bpow radix2 0)%R by reflexivity.
+ apply bpow_lt.
+ lia.
+ }
+ cbn.
+ gappa.
+ }
+
+ destruct C3 as (C3R & C3F & _).
+ set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *.
+ assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE.
+ {
+ rewrite C3R.
+ set (r_invb_s := B2R 24 128 invb_s) in *.
+ cbn.
+ gappa.
+ }
+
+ pose proof (is_finite_Bopp 53 1024 Float.neg_nan invb_d) as opp_finite.
+ rewrite C3F in opp_finite.
+
+ pose proof (BofZ_correct 53 1024 re re 1) as C4.
+ rewrite Rlt_bool_true in C4; cycle 1.
+ { clear C4.
+ cbn.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C4 as (C4R & C4F & _).
+
+ pose proof (BofZ_correct 53 1024 re re b') as C5.
+ cbn in C5.
+ rewrite Rlt_bool_true in C5; cycle 1.
+ { clear C5.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ destruct C5 as (C5R & C5F & _).
+ set (b_d := (BofZ 53 1024 re re b')) in *.
+
+ assert(1 <= B2R 53 1024 b_d <= 18446744073709551616)%R as b_d_RANGE.
+ { rewrite C5R.
+ gappa.
+ }
+
+ pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+ (Bopp 53 1024 Float.neg_nan invb_d) (BofZ 53 1024 re re b')
+ (BofZ 53 1024 re re 1) opp_finite C5F C4F) as C6.
+ rewrite Rlt_bool_true in C6; cycle 1.
+ { clear C6.
+ rewrite C4R.
+ rewrite B2R_Bopp.
+ cbn.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ { lra. }
+ fold invb_d.
+ fold b_d.
+ set (r_invb_d := B2R 53 1024 invb_d) in *.
+ set (r_b_d := B2R 53 1024 b_d) in *.
+ gappa.
+ }
+ fold b_d in C6.
+ destruct C6 as (C6R & C6F & _).
+
+ pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+ (Bfma 53 1024 re re Float.fma_nan mode_NE
+ (Bopp 53 1024 Float.neg_nan invb_d) b_d (BofZ 53 1024 re re 1))
+ invb_d invb_d C6F C3F C3F) as C7.
+ rewrite Rlt_bool_true in C7; cycle 1.
+ { clear C7.
+ rewrite C6R.
+ rewrite B2R_Bopp.
+ eapply (Rabs_relax (bpow radix2 64)).
+ { apply bpow_lt. lia. }
+ rewrite C4R.
+ cbn.
+ set (r_invb_d := B2R 53 1024 invb_d) in *.
+ set (r_b_d := B2R 53 1024 b_d) in *.
+ gappa.
+ }
+ destruct C7 as (C7R & C7F & _).
+
+ split. assumption.
+ rewrite C7R.
+ rewrite C6R.
+ rewrite C5R.
+ rewrite C4R.
+ rewrite B2R_Bopp.
+ rewrite C3R.
+ rewrite C2R.
+ rewrite C1R.
+ rewrite C0R.
+ cbn.
+ set(b1 := IZR b') in *.
+
+ replace (rd 1) with 1%R by gappa.
+ replace (rd (rs (1 / rs b1))) with
+ ((((rd (rs (1 / rs b1)) - (/b1))/(/b1))+1)*(/ b1))%R ; cycle 1.
+ { field. lra. }
+ set (er0 := ((rd (rs (1 / rs b1)) - (/b1))/(/b1))%R).
+ replace (rd b1) with ((((rd b1) - b1)/b1 + 1) * b1)%R; cycle 1.
+ { field. lra. }
+ set (er1 := (((rd b1) - b1)/b1)%R).
+ replace (- ((er0 + 1) * / b1) * ((er1 + 1) * b1) + 1)%R
+ with (1 - (er0 + 1)*(er1 + 1))%R ; cycle 1.
+ { field. lra. }
+ set (z0 := (1 - (er0 + 1) * (er1 + 1))%R).
+ assert (Rabs er0 <= 257/2147483648)%R as er0_ABS.
+ { unfold er0.
+ gappa.
+ }
+ assert (Rabs er1 <= 1/9007199254740992)%R as er1_ABS.
+ { unfold er1.
+ gappa.
+ }
+ replace (rd z0) with ((rd(z0)-z0)+z0)%R by ring.
+ set (ea0 := (rd(z0)-z0)%R).
+ assert (Rabs ea0 <= 1/75557863725914323419136)%R as ea0_ABS.
+ { unfold ea0. unfold z0.
+ gappa.
+ }
+ set (z1 := ((ea0 + z0) * ((er0 + 1) * / b1) + (er0 + 1) * / b1)%R).
+ replace (rd z1) with ((((rd z1)-z1)/z1+1)*z1)%R; cycle 1.
+ { field.
+ unfold z1.
+ unfold z0.
+ gappa.
+ }
+ set (er2 := ((rd z1 - z1) / z1)%R).
+ assert (Rabs er2 <= 1/9007199254740992)%R as er2_ABS.
+ { unfold er2.
+ unfold z1, z0.
+ gappa.
+ }
+ unfold z1, z0.
+ replace (b1 *
+ ((er2 + 1) *
+ ((ea0 + (1 - (er0 + 1) * (er1 + 1))) * ((er0 + 1) * / b1) +
+ (er0 + 1) * / b1)) - 1)%R
+ with (-er0*er0*er1*er2 - er0*er0*er1 + ea0*er0*er2 - er0*er0*er2 - 2*er0*er1*er2 + ea0*er0 - er0*er0 - 2*er0*er1 + ea0*er2 - er1*er2 + ea0 - er1 + er2)%R; cycle 1.
+ { field. lra. }
+ unfold approx_inv_rel_thresh.
+ gappa.
+Qed.
+
+Definition step1_real_inv_longu b :=
+ let invb_s := ExtValues.invfs (Val.maketotal (Val.singleoflongu b)) in
+ Val.floatofsingle invb_s.
+
+Definition step1_real_inv_thresh := (3/33554432)%R.
+(* 8.94069671630859e-8 *)
+
+Theorem step1_real_inv_longu_correct :
+ forall b,
+ (0 < Int64.unsigned b)%Z ->
+ exists (f : float),
+ (step1_real_inv_longu (Vlong b)) = Vfloat f /\
+ (B2R _ _ f) = (rd (rs (1 / rs (IZR (Int64.unsigned b))))) /\
+ is_finite _ _ f = true /\
+ Bsign _ _ f = false.
+Proof.
+ intros b NONZ.
+ unfold step1_real_inv_longu.
+ cbn.
+ econstructor.
+ split.
+ reflexivity.
+ Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int.
+ unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+ change (Int.signed (Int.repr 1)) with 1%Z.
+ set (b' := Int64.unsigned b) in *.
+ pose proof (Int64.unsigned_range b) as RANGE.
+ change Int64.modulus with 18446744073709551616%Z in RANGE.
+ assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'.
+ { split; apply IZR_le; lia.
+ }
+
+ assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia.
+ destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _).
+ clear SILLY.
+ set (one_s := (BofZ 24 128 re re 1)) in *.
+
+ pose proof (BofZ_correct 24 128 re re b') as C1.
+ cbn in C1.
+ rewrite Rlt_bool_true in C1; cycle 1.
+ { clear C1.
+ eapply (Rabs_relax (IZR 18446744073709551616)).
+ lra.
+ set (b'' := IZR b') in *.
+ gappa.
+ }
+ rewrite (Zlt_bool_false b' 0) in C1 by lia.
+ destruct C1 as (C1R & C1F & C1S).
+ set (b_s := (BofZ 24 128 re re b')) in *.
+
+ assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE.
+ { rewrite C1R.
+ gappa.
+ }
+ assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra.
+
+ pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := 1%R).
+ { cbn; lra. }
+ rewrite C0R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ cbn.
+ gappa.
+ }
+ rewrite C1R in C2.
+ destruct C2 as (C2R & C2F & C2Sz).
+ rewrite C1S in C2Sz.
+ change (xorb _ _) with false in C2Sz.
+ set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *.
+ rewrite C0F in C2F.
+ assert (is_nan 24 128 invb_s = false) as NAN.
+ { apply is_finite_not_is_nan.
+ assumption.
+ }
+ pose proof (C2Sz NAN) as C2S.
+ clear C2Sz.
+
+ assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE.
+ { rewrite C2R.
+ set (r_b_s := B2R 24 128 b_s) in *.
+ rewrite C0R.
+ cbn.
+ gappa.
+ }
+
+ pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ set (r_invb_s := (B2R 24 128 invb_s)) in *.
+ apply Rabs_relax with (b := 1%R).
+ { replace 1%R with (bpow radix2 0)%R by reflexivity.
+ apply bpow_lt.
+ lia.
+ }
+ cbn.
+ gappa.
+ }
+ destruct C3 as (C3R & C3F & C3S).
+ set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *.
+ assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE.
+ {
+ rewrite C3R.
+ set (r_invb_s := B2R 24 128 invb_s) in *.
+ cbn.
+ gappa.
+ }
+ rewrite C2S in C3S.
+ rewrite C2R in C3R.
+ rewrite C0R in C3R.
+
+ auto.
+Qed.
+
+Theorem step1_real_inv_longu_correct1 :
+ forall b,
+ (Int64.unsigned b = 1%Z) ->
+ exists f,
+ (step1_real_inv_longu (Vlong b)) = Vfloat f /\
+ (B2R _ _ f) = 1%R /\
+ is_finite _ _ f = true /\
+ Bsign _ _ f = false.
+Proof.
+ intros b EQ1.
+ assert (0 < Int64.unsigned b)%Z as b_RANGE by lia.
+ destruct (step1_real_inv_longu_correct b b_RANGE) as (f & C1E & C1R & C1F & C1S).
+ rewrite EQ1 in C1R.
+ exists f.
+ repeat split; try assumption.
+ rewrite C1R.
+ gappa.
+Qed.
+
+Lemma Bsign_false_nonneg:
+ forall prec emax f,
+ (Bsign prec emax f) = false -> (0 <= (B2R prec emax f))%R.
+Proof.
+ intros until f. intro SIGN.
+ destruct f.
+ 1, 2, 3: cbn; lra.
+ cbn.
+ apply F2R_ge_0.
+ cbn.
+ cbn in SIGN.
+ rewrite SIGN.
+ cbn.
+ lia.
+Qed.
+
+Lemma Znearest_IZR_le :
+ forall rnd n x, (IZR n <= x)%R -> (n <= Znearest rnd x)%Z.
+Proof.
+ intros until x. intro ORDER.
+ pose proof (Znearest_ge_floor rnd x).
+ pose proof (Zfloor_le _ _ ORDER) as KK.
+ rewrite Zfloor_IZR in KK.
+ lia.
+Qed.
+
+Lemma Znearest_le_IZR :
+ forall rnd n x, (x <= IZR n)%R -> (Znearest rnd x <= n)%Z.
+Proof.
+ intros until x. intro ORDER.
+ pose proof (Znearest_le_ceil rnd x).
+ pose proof (Zceil_le _ _ ORDER) as KK.
+ rewrite Zceil_IZR in KK.
+ lia.
+Qed.
+
+Definition step1_real_div_longu a b :=
+ Val.mulf (Val.maketotal (Val.floatoflongu a)) (step1_real_inv_longu b).
+
+Definition step1_div_longu a b :=
+ Val.maketotal (Val.longuoffloat_ne (step1_real_div_longu a b)).
+
+Definition step1_real_quotient (a b : R) :=
+ rd ((rd (a)) * (rd (rs (1 / rs (b))))).
+
+Theorem step1_real_div_longu_correct:
+ forall a b,
+ (1 < Int64.unsigned b)%Z ->
+ exists (q : float),
+ (step1_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\
+ (B2R _ _ q) = step1_real_quotient (IZR (Int64.unsigned a))
+ (IZR (Int64.unsigned b)) /\
+ is_finite _ _ q = true /\
+ Bsign _ _ q = false.
+Proof.
+ intros a b b_NON01.
+ assert (0 < Int64.unsigned b)%Z as b_NON0 by lia.
+ destruct (step1_real_inv_longu_correct b b_NON0) as (f & C1E & C1R & C1F & C1S).
+ unfold step1_real_div_longu.
+ rewrite C1E.
+ cbn.
+ set (b' := Int64.unsigned b) in *.
+ Local Transparent Float.mul.
+ unfold Float.mul, Float.of_longu.
+ econstructor.
+ split. reflexivity.
+ set (a' := Int64.unsigned a) in *.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+
+ pose proof (Int64.unsigned_range a) as a_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in a_RANGE.
+ assert (0 <= IZR a' <= 18446744073709551615)%R as IZR_a_RANGE.
+ { split; apply IZR_le; lia. }
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+ assert (2 <= IZR b' <= 18446744073709551615)%R as IZR_b_RANGE.
+ { split; apply IZR_le; lia. }
+
+ pose proof (BofZ_correct 53 1024 re re a') as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt. lia. }
+ cbn.
+ gappa.
+ }
+ destruct C2 as (C2R & C2F & C2S).
+ rewrite Zlt_bool_false in C2S by lia.
+
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C3.
+ rewrite C1S in C3.
+ rewrite C2S in C3.
+ rewrite C1F in C3.
+ rewrite C2F in C3.
+ rewrite C1R in C3.
+ rewrite C2R in C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt ; lia. }
+ cbn.
+ gappa.
+ }
+ cbn in C3.
+ destruct C3 as (C3R & C3F & C3Sz).
+ assert (is_nan 53 1024
+ (Bmult 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re a') f) = false) as NAN.
+ { apply is_finite_not_is_nan.
+ assumption. }
+ pose proof (C3Sz NAN) as C3S.
+ clear NAN C3Sz.
+
+ auto.
+Qed.
+
+Definition smallb_thresh := 4398046511104%Z.
+
+Definition smallb_approx_real_range := 2200000000000%R.
+Lemma step1_smallb_real :
+ forall a b
+ (a_RANGE : (1 <= a <= 18446744073709551615)%R)
+ (b_RANGE : (1 <= b <= IZR smallb_thresh)%R),
+ (Rabs((step1_real_quotient a b) * b - a) <= smallb_approx_real_range)%R.
+Proof.
+ intros.
+ unfold smallb_thresh in b_RANGE.
+ unfold smallb_approx_real_range.
+ unfold step1_real_quotient.
+ set (q := ((rd (a)) * (rd (rs (1 / rs (b)))))%R) in *.
+ replace ((rd q) *b - a)%R with
+ ((rd(q)-q)/q * rd(a) * (1 + (rd (rs (1 / rs (b))) - 1/b)/(1/b)) +
+ (rd (a)) * ((rd (rs (1 / rs (b))) - 1 / b) / (1/b)) +
+ (rd(a) - a))%R; cycle 1.
+ { unfold q.
+ field.
+ split. lra.
+ split. gappa.
+ gappa.
+ }
+ unfold q.
+ gappa.
+Qed.
+
+Lemma step1_div_longu_a0 :
+ forall b,
+ (0 < Int64.unsigned b)%Z ->
+ (step1_div_longu (Vlong Int64.zero) (Vlong b)) = Vlong Int64.zero.
+Proof.
+ intros b b_NOT0.
+ unfold step1_div_longu.
+ unfold step1_real_div_longu.
+ destruct (step1_real_inv_longu_correct b b_NOT0) as
+ (f & C1E & C1R & C1F & C1S).
+ rewrite C1E.
+ cbn.
+ unfold Float.to_longu_ne, Float.of_longu, Float.mul.
+ rewrite Int64.unsigned_zero.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+ assert (- 2 ^ 53 <= 0 <= 2 ^ 53)%Z as SILLY by lia.
+ destruct (BofZ_exact 53 1024 re re 0 SILLY) as (C2R & C2F & C2S).
+
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re 0) f) as C3.
+ rewrite C1F in C3.
+ rewrite C2F in C3.
+ rewrite C1S in C3.
+ rewrite C2S in C3.
+ rewrite Z.ltb_irrefl in C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt. lia. }
+ cbn.
+ rewrite Rmult_0_l.
+ gappa.
+ }
+ rewrite C2R in C3.
+ rewrite Rmult_0_l in C3.
+ destruct C3 as (C3R & C3F & C3Sz).
+ change (true && true) with true in C3F.
+ change (xorb false false) with false in C3Sz.
+ assert (is_nan 53 1024
+ (Bmult 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re 0) f) = false) as NAN.
+ { apply is_finite_not_is_nan.
+ assumption.
+ }
+ pose proof (C3Sz NAN) as C3S.
+ clear NAN C3Sz.
+ pose proof ((ZofB_ne_range_correct 53 1024
+ (Bmult 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re 0) f) 0 Int64.max_unsigned)) as C4.
+ rewrite C3R in C4.
+ replace (round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) 0)
+ with 0%R in C4 by (cbn ; gappa).
+ rewrite Znearest_IZR in C4.
+ cbn zeta in C4.
+ rewrite Z.leb_refl in C4.
+ change (0 <=? Int64.max_unsigned)%Z with true in C4.
+ rewrite andb_true_r in C4.
+ rewrite andb_true_r in C4.
+ rewrite C3F in C4.
+ rewrite C4.
+ reflexivity.
+Qed.
+
+Lemma step1_div_longu_correct_anyb :
+ forall a b
+ (b_NOT01 : (1 < Int64.unsigned b)%Z),
+ exists (q : int64),
+ (step1_div_longu (Vlong a) (Vlong b)) = Vlong q.
+Proof.
+ intros.
+
+ pose proof (Int64.unsigned_range a) as a_RANGE.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in *.
+ set (a' := Int64.unsigned a) in *.
+ set (b' := Int64.unsigned b) in *.
+ assert (0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+ { split; apply IZR_le; lia. }
+ assert (2 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia. }
+
+ assert (0 < b')%Z as b_NOT0 by lia.
+
+ destruct (Z_le_gt_dec a' 0).
+ { assert (a' = 0%Z) as ZERO by lia.
+ replace a with Int64.zero; cycle 1.
+ {
+ unfold a' in ZERO.
+ unfold Int64.zero.
+ rewrite <- ZERO.
+ apply Int64.repr_unsigned.
+ }
+ exists Int64.zero.
+ apply step1_div_longu_a0.
+ exact b_NOT0.
+ }
+
+ unfold step1_div_longu.
+ unfold step1_real_div_longu.
+ destruct (step1_real_inv_longu_correct b b_NOT0) as (f & C1E & C1R & C1F & C1S).
+ rewrite C1E.
+ cbn.
+ unfold Float.of_longu, Float.mul, Float.to_longu_ne.
+ set (re := (@eq_refl Datatypes.comparison Lt)).
+ fold a'.
+ fold b' in C1R.
+ pose proof (BofZ_correct 53 1024 re re a') as C2.
+ rewrite Rlt_bool_true in C2; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt. lia. }
+ cbn.
+ gappa.
+ }
+ cbn in C2.
+ destruct C2 as (C2R & C2F & C2S).
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re a') f) as C3.
+ rewrite C2R in C3.
+ rewrite C2F in C3.
+ rewrite C2S in C3.
+ rewrite C1R in C3.
+ rewrite C1F in C3.
+ rewrite C1S in C3.
+ rewrite Rlt_bool_true in C3; cycle 1.
+ { clear C3.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt. lia. }
+ cbn.
+ gappa.
+ }
+ cbn in C3.
+ destruct C3 as (C3R & C3F & _).
+ pose proof (ZofB_ne_range_correct 53 1024
+ (Bmult 53 1024 re re Float.binop_nan mode_NE
+ (BofZ 53 1024 re re a') f) 0 Int64.max_unsigned) as C4.
+ rewrite C3R in C4.
+ rewrite C3F in C4.
+ cbn zeta in C4.
+ rewrite Zle_bool_true in C4 ; cycle 1.
+ { clear C4.
+ apply Znearest_lub.
+ gappa.
+ }
+ rewrite Zle_bool_true in C4 ; cycle 1.
+ { clear C4.
+ apply Znearest_glb.
+ cbn.
+ gappa.
+ }
+ rewrite C4.
+ cbn.
+ eauto.
+Qed.
+
+Definition smallb_approx_range := 4400000000000%Z.
+Lemma step1_div_longu_correct :
+ forall a b,
+ (1 < Int64.unsigned b <= smallb_thresh)%Z ->
+ exists (q : int64),
+ (step1_div_longu (Vlong a) (Vlong b)) = Vlong q /\
+ (Z.abs (Int64.unsigned a - Int64.unsigned b*Int64.unsigned q) <= smallb_approx_range)%Z.
+Proof.
+ intros a b b_RANGE.
+
+ pose proof (Int64.unsigned_range a) as a_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in a_RANGE.
+ set (a' := Int64.unsigned a) in *.
+ set (b' := Int64.unsigned b) in *.
+
+ destruct (Z_le_gt_dec a' 0).
+ { assert (a' = 0%Z) as ZERO by lia.
+ exists Int64.zero.
+ rewrite ZERO.
+ rewrite Int64.unsigned_zero.
+ replace (Z.abs (0 - b' * 0))%Z with 0%Z by lia.
+ replace a with Int64.zero; cycle 1.
+ {
+ unfold a' in ZERO.
+ unfold Int64.zero.
+ rewrite <- ZERO.
+ apply Int64.repr_unsigned.
+ }
+ split.
+ { apply step1_div_longu_a0.
+ lia.
+ }
+ unfold smallb_approx_range.
+ lia.
+ }
+
+ unfold step1_div_longu.
+ assert (1 < b')%Z as b_NOT01 by lia.
+ destruct (step1_real_div_longu_correct a b b_NOT01) as (q & C1E & C1R & C1F & C1S).
+ rewrite C1E. cbn.
+ unfold Float.to_longu_ne.
+ pose proof (ZofB_ne_range_correct 53 1024 q 0 Int64.max_unsigned) as C2.
+ rewrite C1F in C2.
+
+
+ assert (1 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+ { split; apply IZR_le; lia. }
+ assert (2 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'.
+ { split; apply IZR_le; lia. }
+ assert (1 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'' by lra.
+ pose proof (step1_smallb_real (IZR a') (IZR b') a_RANGE' b_RANGE'') as DELTA.
+ fold a' in C1R.
+ fold b' in C1R.
+ rewrite <- C1R in DELTA.
+
+ assert (0 <= B2R _ _ q)%R as q_NONNEG.
+ { apply Bsign_false_nonneg. assumption. }
+ cbn in C2.
+ rewrite Zle_bool_true in C2; cycle 1.
+ { apply Znearest_IZR_le. assumption. }
+ assert (B2R _ _ q <= 9223376000000000000)%R as q_SMALL.
+ { replace (B2R _ _ q) with
+ ((IZR a' / IZR b') + (B2R _ _ q * IZR b' - IZR a') / IZR b')%R; cycle 1.
+ { field. lra. }
+ unfold smallb_approx_real_range in DELTA.
+ unfold smallb_thresh in b_RANGE'.
+ set (y := (B2R 53 1024 q * IZR b' - IZR a')%R) in *.
+ gappa.
+ }
+ rewrite Zle_bool_true in C2; cycle 1.
+ { apply Znearest_le_IZR. lra. }
+ cbn in C2.
+
+ change Int64.max_unsigned with 18446744073709551615%Z.
+ rewrite C2.
+ cbn.
+
+ econstructor. split. reflexivity.
+ rewrite Int64.unsigned_repr; cycle 1.
+ { split.
+ - apply Znearest_IZR_le. lra.
+ - apply Znearest_le_IZR.
+ change Int64.max_unsigned with 18446744073709551615%Z.
+ lra.
+ }
+ apply le_IZR.
+ rewrite abs_IZR.
+ unfold smallb_approx_real_range, smallb_approx_range, smallb_thresh in *.
+ rewrite minus_IZR.
+ rewrite mult_IZR.
+ set (q_r := B2R 53 1024 q) in *.
+ assert (Rabs (IZR (ZnearestE q_r) - q_r) <= / 2)%R as NEAR
+ by apply Znearest_imp2.
+ set (q_i := IZR (ZnearestE q_r)) in *.
+ replace (IZR a' - IZR b' * q_i)%R with
+ (-(IZR b' * (q_i - q_r)) - (q_r * IZR b' - IZR a'))%R by ring.
+ set (delta1 := (q_i - q_r)%R) in *.
+ set (delta2 := (q_r * IZR b' - IZR a')%R) in *.
+ gappa.
+Qed.
+
+Lemma find_quotient:
+ forall (a b : Z)
+ (b_POS : (0 < b)%Z)
+ (qr : R)
+ (GAP : (Rabs (IZR a / IZR b - qr) < /2)%R),
+ (a / b)%Z =
+ let q := ZnearestE qr in
+ if (b*q >? a)%Z
+ then (q-1)%Z
+ else q.
+Proof.
+ intros.
+ set (q := ZnearestE qr).
+ cbn zeta.
+ set (b' := IZR b) in *.
+ set (a' := IZR a) in *.
+ assert (1 <= b')%R as b_POS'.
+ { apply IZR_le.
+ lia.
+ }
+
+ pose proof (Znearest_imp2 (fun x : Z => negb (Z.even x)) qr) as ROUND.
+ fold q in ROUND.
+ set (q' := IZR q) in *.
+
+ pose proof (Rabs_triang (a' / b' - qr)
+ (qr - q'))%R as TRIANGLE.
+ replace ((a' / b' - qr) + (qr - q'))%R with
+ (a' / b' - q')%R in TRIANGLE by ring.
+ rewrite <- Rabs_Ropp in ROUND.
+ replace (- (q' - qr))%R with (qr - q')%R in ROUND by ring.
+ assert (Z.abs (a - b*q) < b)%Z as DELTA.
+ { apply lt_IZR.
+ rewrite <- Rabs_Zabs.
+ rewrite minus_IZR.
+ rewrite mult_IZR.
+ fold a' q' b'.
+ apply Rmult_lt_reg_r with (r := (/b')%R).
+ { apply Rinv_0_lt_compat. lra. }
+ rewrite Rinv_r by lra.
+ replace (/ b')%R with (/ Rabs(b'))%R ; cycle 1.
+ { f_equal.
+ apply Rabs_pos_eq. lra. }
+ rewrite <- Rabs_Rinv by lra.
+ rewrite <- Rabs_mult.
+ replace ((a' - b' * q') * / b')%R with (a'/b' - q')%R by (field ; lra).
+ lra.
+ }
+
+ pose proof (Zgt_cases (b * q) a)%Z as CASE.
+ destruct (_ >? _)%Z.
+ { apply Zdiv_unique with (b := (a - (q-1)*b)%Z).
+ ring.
+ split; lia.
+ }
+
+ apply Zdiv_unique with (b := (a - q*b)%Z).
+ ring.
+ split; lia.
+Qed.
+
+Definition step2_real_div_long a b :=
+ Val.mulf (Val.maketotal (Val.floatoflong a)) (approx_inv_longu b).
+
+Definition smalla_thresh := 34184372088832%Z.
+
+Lemma step2_real_div_long_smalla_correct :
+ forall (a b : int64)
+ (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z)
+ (b_NOT0 : (0 < Int64.unsigned b)%Z),
+ exists (q : float),
+ (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\
+ (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+ is_finite _ _ q = true.
+Proof.
+ intros.
+ unfold step2_real_div_long.
+ destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+ rewrite C0E.
+ econstructor.
+ split. reflexivity.
+ Local Transparent Float.of_long.
+ unfold Float.mul, Float.of_long.
+ set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+ set (a' := Int64.signed a) in *.
+ set (b' := Int64.unsigned b) in *.
+ assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ assert(Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'.
+ { rewrite Rabs_Zabs.
+ apply IZR_le.
+ assumption.
+ }
+ assert (- 2 ^ 53 <= a' <= 2 ^ 53)%Z as SILLY.
+ { unfold smalla_thresh in a_SMALL.
+ apply Z.abs_le.
+ lia.
+ }
+ destruct (BofZ_exact 53 1024 re re (Int64.signed a) SILLY) as (C1R & C1F & C1S).
+ fold a' in C1R, C1F, C1S.
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+ rewrite Rlt_bool_true in C2 ; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := bpow radix2 53).
+ { apply bpow_lt. lia. }
+ cbn.
+ rewrite C1R.
+ unfold approx_inv_rel_thresh in C0R.
+ replace (B2R 53 1024 f) with
+ ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+ { field. lra. }
+ unfold smalla_thresh in *.
+ gappa.
+ }
+ rewrite C0F in C2.
+ rewrite C1R in C2.
+ rewrite C1F in C2.
+ rewrite C1S in C2.
+ cbn in C2.
+ destruct C2 as (C2R & C2F & _).
+ split.
+ 2: exact C2F.
+ rewrite C2R.
+ replace (IZR a' * (B2R 53 1024 f))%R with
+ ((IZR a'/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+ { field. lra. }
+ set (delta1 := (IZR b' * B2R 53 1024 f - 1)%R) in *.
+ set (q1 := (IZR a' / IZR b' * (delta1 + 1))%R).
+ replace (rd q1) with (((rd q1) - q1) + q1)%R by ring.
+ set (delta2 := ((rd q1) - q1)%R).
+ unfold q1.
+ replace (delta2 + IZR a' / IZR b' * (delta1 + 1) - IZR a' / IZR b')%R with
+ (delta2 + (IZR a' / IZR b') * delta1)%R by ring.
+ unfold delta2.
+ unfold q1.
+ unfold approx_inv_rel_thresh in *.
+ unfold smalla_thresh in *.
+ gappa.
+Qed.
+
+Definition step2_div_long' a b :=
+ Val.maketotal (Val.longoffloat_ne (step2_real_div_long a b)).
+
+Definition step2_div_long a b :=
+ let q := step2_div_long' a b in
+ Val.select (Val.cmpl_bool Cgt (Val.subl (Val.mull q b) a) (Vlong Int64.zero))
+ (Val.addl q (Vlong Int64.mone)) q Tlong.
+
+Lemma function_ite :
+ forall {A B : Type} (fn : A->B) (b : bool) (x y: A),
+ fn (if b then x else y) = (if b then fn x else fn y).
+Proof.
+ intros.
+ destruct b; reflexivity.
+Qed.
+
+Lemma normalize_ite :
+ forall ty (b : bool) x y,
+ Val.normalize (if b then x else y) ty =
+ (if b then Val.normalize x ty else Val.normalize y ty).
+Proof.
+ intros.
+ destruct b; reflexivity.
+Qed.
+
+
+Lemma int64_mul_signed_unsigned:
+ forall x y : int64,
+ Int64.mul x y = Int64.repr (Int64.signed x * Int64.unsigned y).
+Proof.
+ intros.
+ unfold Int64.mul.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_mult.
+ - apply Int64.eqm_sym.
+ apply Int64.eqm_signed_unsigned.
+ - apply Int64.eqm_refl.
+Qed.
+
+Lemma int64_eqm_signed_repr:
+ forall z : Z, Int64.eqm z (Int64.signed (Int64.repr z)).
+Proof.
+ intros.
+ apply Int64.eqm_trans with (y := Int64.unsigned (Int64.repr z)).
+ - apply Int64.eqm_unsigned_repr.
+ - apply Int64.eqm_sym.
+ apply Int64.eqm_signed_unsigned.
+Qed.
+
+Lemma signed_repr_sub:
+ forall x y,
+ Int64.repr (Int64.signed (Int64.repr x) - y) =
+ Int64.repr (x - y).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_sub.
+ - apply Int64.eqm_sym.
+ apply int64_eqm_signed_repr.
+ - apply Int64.eqm_refl.
+Qed.
+
+Lemma step2_div_long_smalla_correct :
+ forall a b
+ (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z)
+ (b_NOT0 : (0 < Int64.unsigned b)%Z)
+ (b_NOT_VERY_BIG : (Int64.unsigned b <= Int64.max_signed)%Z),
+ step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z.
+Proof.
+ intros.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+ set (a' := (Int64.signed a)) in *.
+ set (b' := (Int64.unsigned b)) in *.
+ assert (Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'.
+ { rewrite Rabs_Zabs.
+ apply IZR_le.
+ assumption.
+ }
+ assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ destruct (step2_real_div_long_smalla_correct a b a_SMALL b_NOT0) as (q & C1R & C1E & C1F).
+ fold a' b' in C1E.
+ assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+ { apply Zle_imp_le_bool.
+ change Int64.min_signed with (-9223372036854775808)%Z.
+ apply Znearest_lub.
+ set (q' := B2R 53 1024 q) in *.
+ replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ unfold smalla_thresh in a_RANGE'.
+ gappa.
+ }
+ assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH.
+ { apply Zle_imp_le_bool.
+ change Int64.max_signed with (9223372036854775807)%Z.
+ apply Znearest_glb.
+ set (q' := B2R 53 1024 q) in *.
+ replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ unfold smalla_thresh in a_RANGE'.
+ gappa.
+ }
+ unfold step2_div_long, step2_div_long'.
+ rewrite C1R.
+ cbn.
+ unfold Float.to_long_ne.
+ rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed).
+ rewrite C1F.
+ rewrite q_LOW.
+ rewrite q_HIGH.
+ cbn.
+ rewrite normalize_ite.
+ cbn.
+ rewrite <- (function_ite Vlong).
+ f_equal.
+ unfold Int64.lt.
+ set (q' := B2R 53 1024 q) in *.
+ fold a'.
+ assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED.
+ { apply Int64.signed_repr.
+ split; lia.
+ }
+ rewrite Int64.add_signed.
+ rewrite q_SIGNED.
+ rewrite Int64.signed_mone.
+ rewrite Int64.signed_zero.
+ rewrite <- (function_ite Int64.repr).
+ f_equal.
+ replace (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring.
+
+ set (q'' := (ZnearestE q')) in *.
+ fold a'.
+ rewrite int64_mul_signed_unsigned.
+ rewrite q_SIGNED.
+ fold b'.
+
+ rewrite Int64.sub_signed.
+ fold a'.
+ rewrite signed_repr_sub.
+
+ assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+ { replace (IZR a' / IZR b' - q')%R with
+ (-(q' - IZR a' / IZR b'))%R by ring.
+ rewrite Rabs_Ropp.
+ lra.
+ }
+ pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+ fold q'' in QUOTIENT.
+ cbn zeta in QUOTIENT.
+
+ assert (b' <> 0)%Z as NONZ by lia.
+ pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+ assert (b' > 0)%Z as b_GT0 by lia.
+ pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+ destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+ { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+ replace q'' with (1 + (a' / b'))%Z by lia.
+ replace ((1 + a' / b') * b' - a')%Z
+ with (-(a' - a' / b' * b')+b')%Z by ring.
+ rewrite <- MOD.
+ rewrite Int64.signed_repr; cycle 1.
+ { change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ lia.
+ }
+ rewrite zlt_true by lia.
+ ring.
+ }
+ replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+ rewrite <- QUOTIENT.
+ replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring.
+ rewrite <- MOD.
+ rewrite Int64.signed_repr ; cycle 1.
+ { change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ lia.
+ }
+ rewrite zlt_false by lia.
+ reflexivity.
+Qed.
+
+Definition twostep_div_longu a b :=
+ let q1 := step1_div_longu a b in
+ let q2 := step2_div_long (Val.subl a (Val.mull b q1)) b in
+ Val.addl q1 q2.
+
+Lemma unsigned_repr_sub :
+ forall a b,
+ Int64.repr (a - b) = Int64.repr (a - Int64.unsigned (Int64.repr b)).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_sub.
+ - apply Int64.eqm_refl.
+ - apply Int64.eqm_unsigned_repr.
+Qed.
+
+Lemma unsigned_repr_add :
+ forall a b,
+ Int64.repr (a + b) = Int64.repr (a + Int64.unsigned (Int64.repr b)).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_add.
+ - apply Int64.eqm_refl.
+ - apply Int64.eqm_unsigned_repr.
+Qed.
+
+Lemma twostep_div_longu_smallb_correct :
+ forall a b
+ (b_RANGE : (1 < Int64.unsigned b <= smallb_thresh)%Z),
+ (twostep_div_longu (Vlong a) (Vlong b)) =
+ (Val.maketotal (Val.divlu (Vlong a) (Vlong b))).
+Proof.
+ intros.
+ unfold twostep_div_longu.
+ destruct (step1_div_longu_correct a b b_RANGE) as (q1 & C1R & C1E).
+ rewrite C1R.
+ set (q1' := Int64.unsigned q1) in *.
+ set (b' := Int64.unsigned b) in *.
+ set (a' := Int64.unsigned a) in *.
+ assert ( Z.abs (Int64.signed (Int64.sub a (Int64.mul b q1))) <= smalla_thresh)%Z as r1_SMALL.
+ { unfold smalla_thresh, smallb_approx_range in *.
+ unfold Int64.sub, Int64.mul.
+ fold q1' b' a'.
+ rewrite <- unsigned_repr_sub.
+ rewrite Int64.signed_repr ; cycle 1.
+ { change Int64.min_signed with (-9223372036854775808)%Z.
+ change Int64.max_signed with (9223372036854775807)%Z.
+ lia.
+ }
+ lia.
+ }
+ assert (0 < b')%Z as b_NOT0 by lia.
+ assert (b' <= Int64.max_signed)%Z as b_NOTBIG.
+ { change Int64.max_signed with (9223372036854775807)%Z.
+ unfold smallb_thresh in b_RANGE.
+ lia.
+ }
+ cbn.
+ rewrite (step2_div_long_smalla_correct (Int64.sub a (Int64.mul b q1)) b r1_SMALL b_NOT0 b_NOTBIG).
+ unfold Int64.add, Int64.sub, Int64.mul, Int64.divu.
+ fold q1' b' a'.
+ rewrite <- unsigned_repr_sub.
+ rewrite <- unsigned_repr_add.
+ rewrite Int64.signed_repr ; cycle 1.
+ {
+ change Int64.min_signed with (-9223372036854775808)%Z.
+ change Int64.max_signed with (9223372036854775807)%Z.
+ unfold smallb_approx_range in *.
+ lia.
+ }
+ rewrite Z.add_comm.
+ rewrite <- Z.div_add by lia.
+ replace (a' - b' * q1' + q1' * b')%Z with a' by ring.
+ rewrite Int64.eq_false ; cycle 1.
+ { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0.
+ rewrite Int64.unsigned_zero in b_NOT0.
+ lia.
+ }
+ reflexivity.
+Qed.
+
+
+Lemma step2_real_div_long_bigb_correct :
+ forall (a b : int64)
+ (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z),
+ exists (q : float),
+ (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\
+ (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+ is_finite _ _ q = true.
+Proof.
+ intros.
+ unfold step2_real_div_long.
+ assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia).
+ destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+ rewrite C0E.
+ econstructor.
+ split. reflexivity.
+ Local Transparent Float.of_long.
+ unfold Float.mul, Float.of_long.
+ set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+ pose proof (Int64.signed_range a) as a_RANGE.
+ set (a' := Int64.signed a) in *.
+ set (b' := Int64.unsigned b) in *.
+ assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ change Int64.min_signed with (-9223372036854775808)%Z in a_RANGE'.
+ change Int64.max_signed with (9223372036854775807)%Z in a_RANGE'.
+ pose proof (BofZ_correct 53 1024 re re a') as C1.
+ rewrite Rlt_bool_true in C1 ; cycle 1.
+ { clear C1.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ cbn in C1.
+ destruct C1 as (C1R & C1F & C1S).
+
+ unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'.
+
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+ rewrite Rlt_bool_true in C2 ; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := bpow radix2 53).
+ { apply bpow_lt. lia. }
+ cbn.
+ rewrite C1R.
+ unfold approx_inv_rel_thresh in C0R.
+ replace (B2R 53 1024 f) with
+ ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+ { field. lra. }
+ gappa.
+ }
+ rewrite C0F in C2.
+ rewrite C1R in C2.
+ rewrite C1F in C2.
+ rewrite C1S in C2.
+ cbn in C2.
+ destruct C2 as (C2R & C2F & _).
+ split.
+ 2: exact C2F.
+ rewrite C2R.
+ set (f' := (B2R 53 1024 f)) in *.
+ replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with
+ ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1.
+ { field. lra. }
+ unfold approx_inv_rel_thresh in *.
+ gappa.
+Qed.
+
+Lemma step2_div_long_bigb_correct :
+ forall a b
+ (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z)
+ (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z),
+ step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z.
+Proof.
+ intros.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+ pose proof (Int64.signed_range a) as a_RANGE.
+ set (a' := (Int64.signed a)) in *.
+ set (b' := (Int64.unsigned b)) in *.
+ assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ unfold smallb_thresh in *; cbn in b_RANGE'.
+ change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ assert (0 < b')%Z as b_NOT0 by lia.
+
+ destruct (step2_real_div_long_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F).
+ fold a' b' in C1E.
+ assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+ { apply Zle_imp_le_bool.
+ change Int64.min_signed with (-9223372036854775808)%Z.
+ apply Znearest_lub.
+ set (q' := B2R 53 1024 q) in *.
+ replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ gappa.
+ }
+ assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH.
+ { apply Zle_imp_le_bool.
+ change Int64.max_signed with (9223372036854775807)%Z.
+ apply Znearest_glb.
+ set (q' := B2R 53 1024 q) in *.
+ replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ gappa.
+ }
+ unfold step2_div_long, step2_div_long'.
+ rewrite C1R.
+ cbn.
+ unfold Float.to_long_ne.
+ rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed).
+ rewrite C1F.
+ rewrite q_LOW.
+ rewrite q_HIGH.
+ cbn.
+ rewrite normalize_ite.
+ cbn.
+ rewrite <- (function_ite Vlong).
+ f_equal.
+ unfold Int64.lt.
+ set (q' := B2R 53 1024 q) in *.
+ fold a'.
+ assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED.
+ { apply Int64.signed_repr.
+ split; lia.
+ }
+ rewrite Int64.add_signed.
+ rewrite q_SIGNED.
+ rewrite Int64.signed_mone.
+ rewrite Int64.signed_zero.
+ rewrite <- (function_ite Int64.repr).
+ f_equal.
+ replace (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring.
+
+ set (q'' := (ZnearestE q')) in *.
+ fold a'.
+ rewrite int64_mul_signed_unsigned.
+ rewrite q_SIGNED.
+ fold b'.
+
+ rewrite Int64.sub_signed.
+ fold a'.
+ rewrite signed_repr_sub.
+
+ assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+ { replace (IZR a' / IZR b' - q')%R with
+ (-(q' - IZR a' / IZR b'))%R by ring.
+ rewrite Rabs_Ropp.
+ lra.
+ }
+ pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+ fold q'' in QUOTIENT.
+ cbn zeta in QUOTIENT.
+
+ assert (b' <> 0)%Z as NONZ by lia.
+ pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+ assert (b' > 0)%Z as b_GT0 by lia.
+ pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+ destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+ { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+ replace q'' with (1 + (a' / b'))%Z by lia.
+ replace ((1 + a' / b') * b' - a')%Z
+ with (-(a' - a' / b' * b')+b')%Z by ring.
+ rewrite <- MOD.
+ rewrite Int64.signed_repr; cycle 1.
+ { change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ lia.
+ }
+ rewrite zlt_true by lia.
+ ring.
+ }
+ replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+ rewrite <- QUOTIENT.
+ replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring.
+ rewrite <- MOD.
+ rewrite Int64.signed_repr ; cycle 1.
+ { change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ lia.
+ }
+ rewrite zlt_false by lia.
+ reflexivity.
+Qed.
+
+Definition step2_real_div_longu a b :=
+ Val.mulf (Val.maketotal (Val.floatoflongu a)) (approx_inv_longu b).
+
+Definition step2_div_longu' a b :=
+ Val.maketotal (Val.longuoffloat_ne (step2_real_div_longu a b)).
+
+Definition step2_div_longu a b :=
+ let q := step2_div_longu' a b in
+ Val.select (Val.cmpl_bool Cgt (Val.subl (Val.mull q b) a) (Vlong Int64.zero))
+ (Val.addl q (Vlong Int64.mone)) q Tlong.
+
+Lemma step2_real_div_longu_bigb_correct :
+ forall (a b : int64)
+ (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z),
+ exists (q : float),
+ (step2_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\
+ (Rabs ((B2R _ _ q) - (IZR (Int64.unsigned a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+ is_finite _ _ q = true.
+Proof.
+ intros.
+ unfold step2_real_div_longu.
+ assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia).
+ destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+ rewrite C0E.
+ econstructor.
+ split. reflexivity.
+ Local Transparent Float.of_longu.
+ unfold Float.mul, Float.of_longu.
+ set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ pose proof (Int64.unsigned_range a) as a_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in *.
+ set (a' := Int64.unsigned a) in *.
+ set (b' := Int64.unsigned b) in *.
+ assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ pose proof (BofZ_correct 53 1024 re re a') as C1.
+ rewrite Rlt_bool_true in C1 ; cycle 1.
+ { clear C1.
+ apply Rabs_relax with (b := bpow radix2 64).
+ { apply bpow_lt; lia. }
+ cbn.
+ gappa.
+ }
+ cbn in C1.
+ destruct C1 as (C1R & C1F & C1S).
+
+ unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'.
+
+ pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+ rewrite Rlt_bool_true in C2 ; cycle 1.
+ { clear C2.
+ apply Rabs_relax with (b := bpow radix2 53).
+ { apply bpow_lt. lia. }
+ cbn.
+ rewrite C1R.
+ unfold approx_inv_rel_thresh in C0R.
+ replace (B2R 53 1024 f) with
+ ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+ { field. lra. }
+ gappa.
+ }
+ rewrite C0F in C2.
+ rewrite C1R in C2.
+ rewrite C1F in C2.
+ rewrite C1S in C2.
+ cbn in C2.
+ destruct C2 as (C2R & C2F & _).
+ split.
+ 2: exact C2F.
+ rewrite C2R.
+ set (f' := (B2R 53 1024 f)) in *.
+ replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with
+ ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1.
+ { field. lra. }
+ unfold approx_inv_rel_thresh in *.
+ gappa.
+Qed.
+
+Lemma repr_unsigned_mul:
+ forall a b,
+ (Int64.repr (Int64.unsigned (Int64.repr a) * b)) = Int64.repr (a * b).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_mult.
+ - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+ - apply Int64.eqm_refl.
+Qed.
+
+Lemma repr_unsigned_sub:
+ forall a b,
+ (Int64.repr (Int64.unsigned (Int64.repr a) - b)) = Int64.repr (a - b).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_sub.
+ - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+ - apply Int64.eqm_refl.
+Qed.
+
+Lemma repr_unsigned_add:
+ forall a b,
+ (Int64.repr (Int64.unsigned (Int64.repr a) + b)) = Int64.repr (a + b).
+Proof.
+ intros.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_add.
+ - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+ - apply Int64.eqm_refl.
+Qed.
+
+Lemma step2_div_longu_bigb_correct :
+ forall a b
+ (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z)
+ (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z),
+ step2_div_longu (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z.
+Proof.
+ intros.
+ pose proof (Int64.unsigned_range b) as b_RANGE.
+ pose proof (Int64.unsigned_range a) as a_RANGE.
+ change Int64.modulus with 18446744073709551616%Z in *.
+ set (a' := (Int64.unsigned a)) in *.
+ set (b' := (Int64.unsigned b)) in *.
+ assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+ { split; apply IZR_le; lia.
+ }
+ unfold smallb_thresh in *; cbn in b_RANGE'.
+ assert (0 < b')%Z as b_NOT0 by lia.
+
+ destruct (step2_real_div_longu_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F).
+ fold a' b' in C1E.
+
+ assert ((0 <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+ { apply Zle_imp_le_bool.
+ set (q' := B2R 53 1024 q) in *.
+ assert (-32767 / 65536 <= q')%R as LOWROUND.
+ { replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ gappa.
+ }
+ destruct (Rcase_abs q').
+ { replace (ZnearestE q') with 0%Z. lia.
+ symmetry.
+ apply Znearest_imp.
+ apply Rabs_lt.
+ split; lra.
+ }
+ apply Znearest_lub.
+ lra.
+ }
+ assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_unsigned)=true)%Z as q_HIGH.
+ { apply Zle_imp_le_bool.
+ change Int64.max_unsigned with (18446744073709551615)%Z.
+ apply Znearest_glb.
+ set (q' := B2R 53 1024 q) in *.
+ replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+ gappa.
+ }
+
+ unfold step2_div_longu, step2_div_longu'.
+ rewrite C1R.
+ cbn.
+ unfold Float.to_longu_ne.
+ rewrite (ZofB_ne_range_correct _ _ q _ _).
+ rewrite C1F.
+ rewrite q_LOW.
+ rewrite q_HIGH.
+ cbn.
+ rewrite normalize_ite.
+ cbn.
+ rewrite <- (function_ite Vlong).
+ f_equal.
+ unfold Int64.lt.
+ set (q' := B2R 53 1024 q) in *.
+ fold a'.
+ rewrite Int64.signed_zero.
+ set (q'' := (ZnearestE q')) in *.
+ assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+ { replace (IZR a' / IZR b' - q')%R with
+ (-(q' - IZR a' / IZR b'))%R by ring.
+ rewrite Rabs_Ropp.
+ lra.
+ }
+ pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+ fold q'' in QUOTIENT.
+ cbn zeta in QUOTIENT.
+
+ assert (b' <> 0)%Z as NONZ by lia.
+ pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+ assert (b' > 0)%Z as b_GT0 by lia.
+ pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+ destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+ { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+ unfold Int64.sub, Int64.mul.
+ fold a' b'.
+ replace q'' with (1 + a'/b')%Z by lia.
+ rewrite repr_unsigned_mul.
+ rewrite repr_unsigned_sub.
+
+ replace ((1 + a' / b') * b' - a')%Z with (b' - (a' - a' / b' * b'))%Z by ring.
+ rewrite <- MOD.
+ rewrite Int64.signed_repr ; cycle 1.
+ { change Int64.max_signed with 9223372036854775807%Z in *.
+ change Int64.min_signed with (-9223372036854775808)%Z in *.
+ lia.
+ }
+ rewrite zlt_true by lia.
+ replace q'' with (1 + (a' / b'))%Z by lia.
+ apply Int64.eqm_samerepr.
+ apply Int64.eqm_trans with (y := ((1 + a' / b') + (-1))%Z).
+ { apply Int64.eqm_add.
+ apply Int64.eqm_sym.
+ apply Int64.eqm_unsigned_repr.
+ rewrite Int64.unsigned_mone.
+ replace (-1)%Z with (0 - 1)%Z by ring.
+ apply Int64.eqm_add.
+ exists 1%Z.
+ lia.
+ apply Int64.eqm_refl.
+ }
+ replace (1 + a' / b' + -1)%Z with (a'/b')%Z by ring.
+ apply Int64.eqm_refl.
+ }
+ replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+ rewrite <- QUOTIENT.
+ unfold Int64.sub, Int64.mul.
+ fold a' b'.
+ rewrite repr_unsigned_mul.
+ rewrite repr_unsigned_sub.
+ replace (a' / b' * b' - a')%Z with (- (a' mod b'))%Z by lia.
+ rewrite Int64.signed_repr ; cycle 1.
+ { change Int64.max_signed with 9223372036854775807%Z in *.
+ change Int64.min_signed with (-9223372036854775808)%Z in *.
+ lia.
+ }
+ rewrite zlt_false by lia.
+ reflexivity.
+Qed.
+
+Definition anystep_div_longu a b m :=
+ Val.select (Val.cmplu_bool (Mem.valid_pointer m) Cgt b (Vlong (Int64.repr smallb_thresh)))
+ (step2_div_longu a b)
+ (twostep_div_longu a b) Tlong.
+
+Lemma anystep_div_longu_correct :
+ forall a b m
+ (b_RANGE : (1 < Int64.unsigned b <= Int64.max_signed)%Z),
+ anystep_div_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z.
+Proof.
+ intros.
+ unfold anystep_div_longu.
+ set (a' := Int64.unsigned a).
+ set (b' := Int64.unsigned b).
+ assert (0 < b')%Z as b_NOT0 by lia.
+ cbn.
+ unfold Int64.ltu.
+ fold b'.
+ rewrite Int64.unsigned_repr; cycle 1.
+ { unfold smallb_thresh.
+ change Int64.max_unsigned with 18446744073709551615%Z.
+ lia.
+ }
+ destruct zlt.
+ { rewrite step2_div_longu_bigb_correct by lia.
+ reflexivity.
+ }
+ rewrite twostep_div_longu_smallb_correct by lia.
+ cbn.
+ rewrite Int64.eq_false ; cycle 1.
+ { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0.
+ rewrite Int64.unsigned_zero in b_NOT0.
+ lia.
+ }
+ reflexivity.
+Qed.
+
+
+Definition full_div_longu a b m :=
+ let is_big := Val.cmpl_bool Clt b (Vlong Int64.zero) in
+ let is_one := Val.cmplu_bool (Mem.valid_pointer m) Cle b (Vlong Int64.one) in
+ let is_special := Val.or (Val.of_optbool is_big) (Val.of_optbool is_one) in
+ let if_big := Val.select (Val.cmplu_bool (Mem.valid_pointer m) Clt a b)
+ (Vlong Int64.zero) (Vlong Int64.one) Tlong in
+ let if_special := Val.select is_big if_big a Tlong in
+ Val.select (Val.cmp_bool Cne is_special (Vint Int.zero))
+ if_special
+ (anystep_div_longu a b m) Tlong.
+
+Lemma one_bigb_div :
+ forall a b
+ (b_RANGE : (9223372036854775808 <= b < 18446744073709551616)%Z)
+ (ORDER : (b <= a < 18446744073709551616)%Z),
+ (a / b = 1)%Z.
+Proof.
+ intros.
+ assert (((a - b) / b) = 0)%Z as ZERO.
+ { apply Zdiv_small. lia.
+ }
+ replace a with (1 * b + (a - b))%Z by ring.
+ rewrite Z.div_add_l by lia.
+ rewrite ZERO.
+ ring.
+Qed.
+
+Lemma full_div_longu_correct :
+ forall a b m
+ (b_NOT0 : (0 < Int64.unsigned b)%Z),
+ full_div_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z.
+Proof.
+
+ Local Opaque anystep_div_longu.
+ intros.
+ unfold full_div_longu.
+ cbn.
+ unfold Int64.lt, Int64.ltu.
+ pose proof (Int64.unsigned_range a).
+ pose proof (Int64.unsigned_range b).
+ set (a' := Int64.unsigned a) in *.
+ set (b' := Int64.unsigned b) in *.
+ rewrite Int64.signed_zero.
+ rewrite Int64.unsigned_one.
+ destruct zlt.
+ { replace (Val.cmp_bool Cne
+ (Val.or Vtrue
+ (if negb (if zlt 1 b' then true else false) then Vtrue else Vfalse))
+ (Vint Int.zero)) with (Some true) ; cycle 1.
+ { destruct zlt; reflexivity.
+ }
+ cbn.
+ destruct zlt; cbn.
+ { rewrite Zdiv_small by lia.
+ reflexivity.
+ }
+ rewrite one_bigb_div.
+ reflexivity.
+ {
+ change Int64.modulus with 18446744073709551616%Z in *.
+ split. 2: lia.
+ unfold b'.
+ rewrite Int64.unsigned_signed.
+ unfold Int64.lt.
+ rewrite Int64.signed_zero.
+ rewrite zlt_true by lia.
+ pose proof (Int64.signed_range b).
+ change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ change Int64.modulus with 18446744073709551616%Z in *.
+ lia.
+ }
+ change Int64.modulus with 18446744073709551616%Z in *.
+ lia.
+ }
+ destruct zlt; cbn.
+ { change (negb (Int.eq (Int.or Int.zero Int.zero) Int.zero)) with false.
+ cbn.
+ rewrite anystep_div_longu_correct.
+ reflexivity.
+
+ change Int64.modulus with 18446744073709551616%Z in *.
+ split. lia.
+ rewrite Int64.unsigned_signed.
+ unfold Int64.lt.
+ rewrite Int64.signed_zero.
+ rewrite zlt_false by lia.
+ pose proof (Int64.signed_range b).
+ change Int64.min_signed with (-9223372036854775808)%Z in *.
+ change Int64.max_signed with (9223372036854775807)%Z in *.
+ change Int64.modulus with 18446744073709551616%Z in *.
+ lia.
+ }
+ change (negb (Int.eq (Int.or Int.zero Int.one) Int.zero)) with true.
+ cbn.
+ replace b' with 1%Z by lia.
+ rewrite Z.div_1_r.
+ unfold a'.
+ rewrite Int64.repr_unsigned.
+ reflexivity.
+Qed.
diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v
index 4578b4e8..455af70e 100644
--- a/kvx/NeedOp.v
+++ b/kvx/NeedOp.v
@@ -131,6 +131,8 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
| Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv)
| Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
| Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
+ | Ointofsingle_ne | Ointuofsingle_ne => op1 (default nv)
+ | Olongoffloat_ne | Olonguoffloat_ne => op1 (default nv)
| Ocmp c => needs_of_condition c
| Oextfz _ _ | Oextfs _ _ | Oextfzl _ _ | Oextfsl _ _ => op1 (default nv)
| Oinsf _ _ | Oinsfl _ _ => op2 (default nv)
diff --git a/kvx/Op.v b/kvx/Op.v
index 4458adb3..8549fc38 100644
--- a/kvx/Op.v
+++ b/kvx/Op.v
@@ -209,6 +209,11 @@ Inductive operation : Type :=
| Olonguofsingle (**r [rd = unsigned_long_of_float32(r1)] *)
| Osingleoflong (**r [rd = float32_of_signed_long(r1)] *)
| Osingleoflongu (**r [rd = float32_of_unsigned_int(r1)] *)
+ | Ointofsingle_ne (**r [rd = signed_int_of_float64(r1)] *)
+ | Ointuofsingle_ne (**r [rd = unsigned_int_of_float64(r1)] *)
+ | Olongoffloat_ne (**r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat_ne (**r [rd = unsigned_long_of_float64(r1)] *)
+
(*c Boolean tests: *)
| Ocmp (cond: condition) (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
| Oextfz (stop : Z) (start : Z)
@@ -466,6 +471,11 @@ Definition eval_operation
| Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
| Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
| Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
+ | Ointofsingle_ne, v1::nil => Some (Val.maketotal (Val.intofsingle_ne v1))
+ | Ointuofsingle_ne, v1::nil => Some (Val.maketotal (Val.intuofsingle_ne v1))
+ | Olongoffloat_ne, v1::nil => Some (Val.maketotal (Val.longoffloat_ne v1))
+ | Olonguoffloat_ne, v1::nil => Some (Val.maketotal (Val.longuoffloat_ne v1))
+
| Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
| (Oextfz stop start), v0::nil => Some (extfz stop start v0)
| (Oextfs stop start), v0::nil => Some (extfs stop start v0)
@@ -683,6 +693,12 @@ Definition type_of_operation (op: operation) : list typ * typ :=
| Olonguofsingle => (Tsingle :: nil, Tlong)
| Osingleoflong => (Tlong :: nil, Tsingle)
| Osingleoflongu => (Tlong :: nil, Tsingle)
+
+ | Ointofsingle_ne => (Tsingle :: nil, Tint)
+ | Ointuofsingle_ne => (Tsingle :: nil, Tint)
+ | Olongoffloat_ne => (Tfloat :: nil, Tlong)
+ | Olonguoffloat_ne => (Tfloat :: nil, Tlong)
+
| Ocmp c => (type_of_condition c, Tint)
| Oextfz _ _ | Oextfs _ _ => (Tint :: nil, Tint)
| Oextfzl _ _ | Oextfsl _ _ => (Tlong :: nil, Tlong)
@@ -980,6 +996,12 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
(* singleoflong, singleoflongu *)
- destruct v0; cbn...
- destruct v0; cbn...
+ (* intofsingle_ne, intuofsingle_ne *)
+ - destruct v0; cbn... destruct (Float32.to_int_ne f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float32.to_intu_ne f); cbn; trivial.
+ (* longoffloat_ne, longuoffloat_ne *)
+ - destruct v0; cbn... destruct (Float.to_long_ne f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float.to_longu_ne f); cbn; trivial.
(* cmp *)
- destruct (eval_condition cond vl m)... destruct b...
(* extfz *)
@@ -1669,6 +1691,14 @@ Proof.
(* singleoflong, singleoflongu *)
- inv H4; cbn; auto.
- inv H4; cbn; auto.
+
+ (* intofsingle_ne, intuofsingle_ne *)
+ - inv H4; cbn; auto. destruct (Float32.to_int_ne f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float32.to_intu_ne f0); cbn; auto.
+ (* longoffloat_ne, longuoffloat_ne *)
+ - inv H4; cbn; auto. destruct (Float.to_long_ne f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float.to_longu_ne f0); cbn; auto.
+
(* cmp *)
- subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml
index 3f4520a6..f53da14b 100644
--- a/kvx/PostpassSchedulingOracle.ml
+++ b/kvx/PostpassSchedulingOracle.ml
@@ -47,7 +47,8 @@ type real_instruction =
| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
| Fmind | Fminw | Fmaxd | Fmaxw | Finvw
| Ffmaw | Ffmad | Ffmsw | Ffmsd
- | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
+ | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz
+ | Fixedw | Fixeduw | Fixedd | Fixedud
| Fcompw | Fcompd
type ab_inst_rec = {
@@ -86,12 +87,12 @@ let arith_rr_real = function
| Pfloatuwrnsz -> Floatuwz
| Pfloatudrnsz -> Floatudz
| Pfloatdrnsz -> Floatdz
- | Pfixedwrzz -> Fixedwz
- | Pfixeduwrzz -> Fixeduwz
- | Pfixeddrzz -> Fixeddz
- | Pfixedudrzz -> Fixedudz
- | Pfixeddrzz_i32 -> Fixeddz
- | Pfixedudrzz_i32 -> Fixedudz
+ | Pfixedwrzz | Pfixedwrne -> Fixedw
+ | Pfixeduwrzz | Pfixeduwrne -> Fixeduw
+ | Pfixeddrzz | Pfixeddrne -> Fixedd
+ | Pfixedudrzz| Pfixedudrne -> Fixedud
+ | Pfixeddrzz_i32 -> Fixedd
+ | Pfixedudrzz_i32 -> Fixedud
let arith_rrr_real = function
| Pcompw it -> Compw
@@ -643,7 +644,7 @@ let rec_to_usage r =
(* TODO: check *)
| Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)
| Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)
- | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau
+ | Fixeduw | Fixedw | Floatwz | Floatuwz | Fixedd | Fixedud | Floatdz | Floatudz -> mau
| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo ->
(match encoding with None | Some U6 | Some S10 -> lsu_auxw
| Some U27L5 | Some U27L10 -> lsu_auxw_x
@@ -681,7 +682,7 @@ let real_inst_to_latency = function
| Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd
| Fmind | Fmaxd | Fminw | Fmaxw
-> 1
- | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4
+ | Floatwz | Floatuwz | Fixeduw | Fixedw | Floatdz | Floatudz | Fixedd | Fixedud -> 4
| Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)
| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3
| Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)
diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp
index 4e1087f9..70941c48 100644
--- a/kvx/SelectOp.vp
+++ b/kvx/SelectOp.vp
@@ -742,6 +742,8 @@ Nondetfunction gen_fmaf args :=
| _ => None
end.
+Require FPDivision32.
+
Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
match b with
| BI_fmin => Some (Eop Ominf args)
@@ -750,6 +752,12 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
| BI_fmaxf => Some (Eop Omaxfs args)
| BI_fma => gen_fma args
| BI_fmaf => gen_fmaf args
+ | BI_lround_ne => Some (Eop Olongoffloat_ne args)
+ | BI_luround_ne => Some (Eop Olonguoffloat_ne args)
+ | BI_fp_udiv32 => (match args with
+ | a:::b:::Enil => Some (FPDivision32.fp_divu32 a b)
+ | _ => None
+ end)
end.
End SELECT.
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
index 0ede1e2d..658bf0b3 100644
--- a/kvx/SelectOpproof.v
+++ b/kvx/SelectOpproof.v
@@ -1883,6 +1883,9 @@ Proof.
destruct v2; simpl; trivial.
Qed.
+
+Require FPDivision32.
+
Theorem eval_platform_builtin:
forall bf al a vl v le,
platform_builtin bf al = Some a ->
@@ -1896,6 +1899,36 @@ Proof.
repeat (try econstructor; try eassumption)).
- apply eval_fma; assumption.
- apply eval_fmaf; assumption.
+ - cbn in *;
+ destruct vl; trivial. destruct vl; trivial.
+ destruct v0; try discriminate;
+ cbn; rewrite H0; reflexivity.
+ - cbn in *;
+ destruct vl; trivial. destruct vl; trivial.
+ destruct v0; try discriminate;
+ cbn; rewrite H0; reflexivity.
+ - cbn in *.
+ intro EVAL.
+ inv EVAL. discriminate.
+ inv H0. discriminate.
+ inv H2. 2: discriminate.
+ inv Heval.
+ intro EVAL.
+ destruct v1; try discriminate.
+ destruct v0; try discriminate.
+ unfold Int.eq in EVAL.
+ change (Int.unsigned Int.zero) with 0 in EVAL.
+ unfold zeq in EVAL.
+ destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+ { discriminate. }
+ exists (Vint (Int.divu i i0)). split.
+ {
+ apply FPDivision32.fp_divu32_correct; auto.
+ pose proof (Int.unsigned_range i0).
+ lia.
+ }
+ inv EVAL.
+ constructor.
Qed.
End CMCONSTR.
diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml
index 9e2e3776..01733858 100644
--- a/kvx/TargetPrinter.ml
+++ b/kvx/TargetPrinter.ml
@@ -586,6 +586,14 @@ module Target (*: TARGET*) =
fprintf oc " fixedd.rz %a = %a, 0\n" ireg rd ireg rs
| Pfixedudrzz(rd, rs) | Pfixedudrzz_i32(rd, rs) ->
fprintf oc " fixedud.rz %a = %a, 0\n" ireg rd ireg rs
+ | Pfixedudrne(rd, rs) ->
+ fprintf oc " fixedud.rn %a = %a, 0\n" ireg rd ireg rs
+ | Pfixeddrne(rd, rs) ->
+ fprintf oc " fixedd.rn %a = %a, 0\n" ireg rd ireg rs
+ | Pfixeduwrne(rd, rs) ->
+ fprintf oc " fixeduw.rn %a = %a, 0\n" ireg rd ireg rs
+ | Pfixedwrne(rd, rs) ->
+ fprintf oc " fixedw.rn %a = %a, 0\n" ireg rd ireg rs
(* Arith RI32 instructions *)
| Pmake (rd, imm) ->
diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v
index 87554258..4909a25b 100644
--- a/kvx/ValueAOp.v
+++ b/kvx/ValueAOp.v
@@ -298,6 +298,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Olonguofsingle, v1::nil => longuofsingle_total v1
| Osingleoflong, v1::nil => singleoflong v1
| Osingleoflongu, v1::nil => singleoflongu v1
+ | Ointofsingle_ne, v1::nil => intofsingle_ne_total v1
+ | Ointuofsingle_ne, v1::nil => intuofsingle_ne_total v1
+ | Olongoffloat_ne, v1::nil => longoffloat_ne_total v1
+ | Olonguoffloat_ne, v1::nil => longuoffloat_ne_total v1
| Ocmp c, _ => of_optbool (eval_static_condition c vl)
| (Oextfz stop start), v0::nil => eval_static_extfz stop start v0
| (Oextfs stop start), v0::nil => eval_static_extfs stop start v0