diff options
author | David Monniaux <David.Monniaux@univ-grenoble-alpes.fr> | 2022-03-07 16:21:13 +0100 |
---|---|---|
committer | David Monniaux <David.Monniaux@univ-grenoble-alpes.fr> | 2022-03-07 16:21:13 +0100 |
commit | 25e82e849de35eaef24412b468d3a36c72f4fcb6 (patch) | |
tree | e6abc778dfa37ac5df55c8b0926ed681b9c04f04 /kvx | |
parent | ab776cd94e000d07c4d14521a8d0c635d3b8412c (diff) | |
parent | 2d9138547d93c32c0ec5ae54b4afc022f5c434ff (diff) | |
download | compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.tar.gz compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.zip |
Merge remote-tracking branch 'origin/kvx_fp_division' into kvx-work
Diffstat (limited to 'kvx')
-rw-r--r-- | kvx/Asm.v | 12 | ||||
-rw-r--r-- | kvx/Asmblockdeps.v | 4 | ||||
-rw-r--r-- | kvx/Asmblockgen.v | 12 | ||||
-rw-r--r-- | kvx/Asmvliw.v | 9 | ||||
-rw-r--r-- | kvx/Builtins1.v | 72 | ||||
-rw-r--r-- | kvx/CBuiltins.ml | 20 | ||||
-rw-r--r-- | kvx/ExtFloats.v | 5 | ||||
-rw-r--r-- | kvx/ExtIEEE754.v | 12 | ||||
-rw-r--r-- | kvx/ExtValues.v | 61 | ||||
-rw-r--r-- | kvx/ExtZ.v | 12 | ||||
-rw-r--r-- | kvx/FPDivision32.v | 883 | ||||
-rw-r--r-- | kvx/FPDivision64.v | 2670 | ||||
-rw-r--r-- | kvx/NeedOp.v | 2 | ||||
-rw-r--r-- | kvx/Op.v | 30 | ||||
-rw-r--r-- | kvx/PostpassSchedulingOracle.ml | 8 | ||||
-rw-r--r-- | kvx/SelectOp.vp | 36 | ||||
-rw-r--r-- | kvx/SelectOpproof.v | 186 | ||||
-rw-r--r-- | kvx/TargetPrinter.ml | 8 | ||||
-rw-r--r-- | kvx/ValueAOp.v | 4 |
19 files changed, 4040 insertions, 6 deletions
@@ -163,6 +163,8 @@ Inductive instruction : Type := | Pfloatuwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (u32 -> 32) *) | Pfloatudrnsz (rd rs: ireg) (**r Floating Point Conversion from unsigned integer (64 bits) *) | Pfloatdrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (64 bits) *) + + (* round to zero *) | Pfixedwrzz (rd rs: ireg) (**r Integer conversion from floating point *) | Pfixeduwrzz (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *) | Pfixeddrzz (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *) @@ -170,6 +172,12 @@ Inductive instruction : Type := | Pfixedudrzz (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *) | Pfixedudrzz_i32 (rd rs: ireg) (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *) + (* round to nearest, prefer even numbers *) + | Pfixedwrne (rd rs: ireg) (**r Integer conversion from floating point *) + | Pfixeduwrne (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *) + | Pfixeddrne (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *) + | Pfixedudrne (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *) + (** Arith RI32 *) | Pmake (rd: ireg) (imm: int) (**r load immediate *) @@ -357,6 +365,10 @@ Definition basic_to_instruction (b: basic) := | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs + | PArithRR Asmvliw.Pfixedwrne rd rs => Pfixedwrne rd rs + | PArithRR Asmvliw.Pfixeduwrne rd rs => Pfixeduwrne rd rs + | PArithRR Asmvliw.Pfixeddrne rd rs => Pfixeddrne rd rs + | PArithRR Asmvliw.Pfixedudrne rd rs => Pfixedudrne rd rs (* RI32 *) | PArithRI32 Asmvliw.Pmake rd imm => Pmake rd imm diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v index b1895ee6..78a766ee 100644 --- a/kvx/Asmblockdeps.v +++ b/kvx/Asmblockdeps.v @@ -1537,6 +1537,10 @@ Definition string_of_name_rr (n: arith_name_rr): pstring := | Pfixedudrzz => "Pfixedudrzz" | Pfixeddrzz_i32 => "Pfixeddrzz_i32" | Pfixedudrzz_i32 => "Pfixedudrzz_i32" + | Pfixedwrne => "Pfixedwrne" + | Pfixeduwrne => "Pfixeduwrne" + | Pfixeddrne => "Pfixeddrne" + | Pfixedudrne => "Pfixedudrne" end. Definition string_of_name_ri32 (n: arith_name_ri32): pstring := diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v index fd41dfdd..07b31b11 100644 --- a/kvx/Asmblockgen.v +++ b/kvx/Asmblockgen.v @@ -811,6 +811,12 @@ Definition transl_op | Ointuofsingle, a1 :: nil => do rd <- ireg_of res; do rs <- freg_of a1; OK (Pfixeduwrzz rd rs ::i k) + | Ointofsingle_ne, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixedwrne rd rs ::i k) + | Ointuofsingle_ne, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixeduwrne rd rs ::i k) | Olongoffloat, a1 :: nil => do rd <- ireg_of res; do rs <- freg_of a1; OK (Pfixeddrzz rd rs ::i k) @@ -823,6 +829,12 @@ Definition transl_op | Olonguoffloat, a1 :: nil => do rd <- ireg_of res; do rs <- freg_of a1; OK (Pfixedudrzz rd rs ::i k) + | Olongoffloat_ne, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixeddrne rd rs ::i k) + | Olonguoffloat_ne, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixedudrne rd rs ::i k) | Ofloatofsingle, a1 :: nil => do rd <- freg_of res; do rs <- freg_of a1; diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v index 0ce2ed69..3fa184c6 100644 --- a/kvx/Asmvliw.v +++ b/kvx/Asmvliw.v @@ -402,7 +402,10 @@ Inductive arith_name_rr : Type := | Pfixedudrzz (**r Integer conversion from floating point (float -> unsigned long) *) | Pfixeddrzz_i32 (**r Integer conversion from floating point (float -> int) *) | Pfixedudrzz_i32 (**r Integer conversion from floating point (float -> unsigned int) *) -. + | Pfixedwrne (**r Integer conversion from floating point *) + | Pfixeduwrne (**r Integer conversion from floating point (f32 -> 32 bits unsigned *) + | Pfixeddrne (**r Integer conversion from floating point (i64 -> 64 bits) *) + | Pfixedudrne. (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *) Inductive arith_name_ri32 : Type := | Pmake (**r load immediate *) @@ -960,6 +963,10 @@ Definition arith_eval_rr n v := | Pfixedudrzz => Val.maketotal (Val.longuoffloat v) | Pfixeddrzz_i32 => Val.maketotal (Val.intoffloat v) | Pfixedudrzz_i32 => Val.maketotal (Val.intuoffloat v) + | Pfixedudrne => Val.maketotal (Val.longuoffloat_ne v) + | Pfixeddrne => Val.maketotal (Val.longoffloat_ne v) + | Pfixeduwrne => Val.maketotal (Val.intuofsingle_ne v) + | Pfixedwrne => Val.maketotal (Val.intofsingle_ne v) end. Definition arith_eval_ri32 n i := diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v index b5fc7459..5536e58c 100644 --- a/kvx/Builtins1.v +++ b/kvx/Builtins1.v @@ -26,6 +26,16 @@ Inductive platform_builtin : Type := | BI_fmaxf | BI_fma | BI_fmaf +| BI_lround_ne +| BI_luround_ne +| BI_fp_udiv32 +| BI_fp_udiv64 +| BI_fp_umod32 +| BI_fp_umod64 +| BI_fp_sdiv32 +| BI_fp_sdiv64 +| BI_fp_smod32 +| BI_fp_smod64 | BI_abs | BI_absl. @@ -38,6 +48,16 @@ Definition platform_builtin_table : list (string * platform_builtin) := :: ("__builtin_fmaxf", BI_fmaxf) :: ("__builtin_fma", BI_fma) :: ("__builtin_fmaf", BI_fmaf) + :: ("__builtin_lround_ne", BI_lround_ne) + :: ("__builtin_luround_ne", BI_luround_ne) + :: ("__builtin_fp_udiv32", BI_fp_udiv32) + :: ("__builtin_fp_udiv64", BI_fp_udiv64) + :: ("__builtin_fp_umod32", BI_fp_umod32) + :: ("__builtin_fp_umod64", BI_fp_umod64) + :: ("__builtin_fp_sdiv32", BI_fp_sdiv32) + :: ("__builtin_fp_sdiv64", BI_fp_sdiv64) + :: ("__builtin_fp_smod32", BI_fp_smod32) + :: ("__builtin_fp_smod64", BI_fp_smod64) :: ("__builtin_abs", BI_abs) :: ("__builtin_absl", BI_absl) :: nil. @@ -52,6 +72,24 @@ Definition platform_builtin_sig (b: platform_builtin) : signature := mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default | BI_fmaf => mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default + | BI_lround_ne | BI_luround_ne => + mksignature (Tfloat :: nil) Tlong cc_default + | BI_fp_udiv32 => + mksignature (Tint :: Tint :: nil) Tint cc_default + | BI_fp_udiv64 => + mksignature (Tlong :: Tlong :: nil) Tlong cc_default + | BI_fp_umod32 => + mksignature (Tint :: Tint :: nil) Tint cc_default + | BI_fp_umod64 => + mksignature (Tlong :: Tlong :: nil) Tlong cc_default + | BI_fp_sdiv32 => + mksignature (Tint :: Tint :: nil) Tint cc_default + | BI_fp_sdiv64 => + mksignature (Tlong :: Tlong :: nil) Tlong cc_default + | BI_fp_smod32 => + mksignature (Tint :: Tint :: nil) Tint cc_default + | BI_fp_smod64 => + mksignature (Tlong :: Tlong :: nil) Tlong cc_default | BI_abs => mksignature (Tint :: nil) Tint cc_default | BI_absl => @@ -66,6 +104,40 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (pl | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma + | BI_lround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_long_ne + | BI_luround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_longu_ne + | BI_fp_udiv32 => mkbuiltin_n2p Tint Tint Tint + (fun n1 n2 => if Int.eq n2 Int.zero + then None + else Some (Int.divu n1 n2)) + | BI_fp_udiv64 => mkbuiltin_n2p Tlong Tlong Tlong + (fun n1 n2 => if Int64.eq n2 Int64.zero + then None + else Some (Int64.divu n1 n2)) + | BI_fp_umod32 => mkbuiltin_n2p Tint Tint Tint + (fun n1 n2 => if Int.eq n2 Int.zero + then None + else Some (Int.modu n1 n2)) + | BI_fp_umod64 => mkbuiltin_n2p Tlong Tlong Tlong + (fun n1 n2 => if Int64.eq n2 Int64.zero + then None + else Some (Int64.modu n1 n2)) + | BI_fp_sdiv32 => mkbuiltin_n2p Tint Tint Tint + (fun n1 n2 => if Int.eq n2 Int.zero + then None + else Some (Int.divs n1 n2)) + | BI_fp_sdiv64 => mkbuiltin_n2p Tlong Tlong Tlong + (fun n1 n2 => if Int64.eq n2 Int64.zero + then None + else Some (Int64.divs n1 n2)) + | BI_fp_smod32 => mkbuiltin_n2p Tint Tint Tint + (fun n1 n2 => if Int.eq n2 Int.zero + then None + else Some (Int.mods n1 n2)) + | BI_fp_smod64 => mkbuiltin_n2p Tlong Tlong Tlong + (fun n1 n2 => if Int64.eq n2 Int64.zero + then None + else Some (Int64.mods n1 n2)) | BI_abs => mkbuiltin_n1t Tint Tint ExtValues.int_abs | BI_absl => mkbuiltin_n1t Tlong Tlong ExtValues.long_abs end. diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml index 4d016453..c0b69adf 100644 --- a/kvx/CBuiltins.ml +++ b/kvx/CBuiltins.ml @@ -133,6 +133,26 @@ let builtins = { "__builtin_fmaf", (TFloat(FFloat, []), [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false); + "__builtin_lround_ne", + (TInt(ILong, []), [TFloat(FDouble, [])], false); + "__builtin_luround_ne", + (TInt(IULong, []), [TFloat(FDouble, [])], false); + "__builtin_fp_udiv32", + (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); + "__builtin_fp_udiv64", + (TInt(IULong, []), [TInt(IULong, []); TInt(IULong, [])], false); + "__builtin_fp_umod32", + (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); + "__builtin_fp_umod64", + (TInt(IULong, []), [TInt(IULong, []); TInt(IULong, [])], false); + "__builtin_fp_sdiv32", + (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); + "__builtin_fp_sdiv64", + (TInt(ILong, []), [TInt(ILong, []); TInt(ILong, [])], false); + "__builtin_fp_smod32", + (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); + "__builtin_fp_smod64", + (TInt(ILong, []), [TInt(ILong, []); TInt(ILong, [])], false); "__builtin_abs", (TInt(IInt, []), [TInt(IInt, [])], false); "__builtin_absl", diff --git a/kvx/ExtFloats.v b/kvx/ExtFloats.v index b08503a5..332d3e3d 100644 --- a/kvx/ExtFloats.v +++ b/kvx/ExtFloats.v @@ -13,6 +13,8 @@ (* *) (* *************************************************************) +From Flocq Require Import Core Digits Operations Round Bracket Sterbenz + Binary Round_odd. Require Import Floats Integers ZArith. Module ExtFloat. @@ -30,6 +32,8 @@ Definition max (x : float) (y : float) : float := | Some Eq | Some Gt => x | Some Lt | None => y end. + +Definition one := Float.of_int (Int.repr (1%Z)). End ExtFloat. Module ExtFloat32. @@ -50,5 +54,4 @@ Definition max (x : float32) (y : float32) : float32 := Definition one := Float32.of_int (Int.repr (1%Z)). Definition inv (x : float32) : float32 := Float32.div one x. - End ExtFloat32. diff --git a/kvx/ExtIEEE754.v b/kvx/ExtIEEE754.v new file mode 100644 index 00000000..095fd0cc --- /dev/null +++ b/kvx/ExtIEEE754.v @@ -0,0 +1,12 @@ +Require Import Coq.ZArith.Zdiv. + +Open Scope Z. + +Definition Zdiv_ne (a b : Z) := + let q := Z.div a b in + let q1 := Z.succ q in + match Z.compare (a-b*q) (b*q1-a) with + | Lt => q + | Gt => q1 + | Eq => (if Z.even q then q else q1) + end. diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v index c493f708..f08c1157 100644 --- a/kvx/ExtValues.v +++ b/kvx/ExtValues.v @@ -755,6 +755,67 @@ Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1). Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1). Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1). +From Flocq Require Import Core Digits Operations Round Bracket Sterbenz + Binary Round_odd. +Require Import IEEE754_extra Zdiv Psatz Floats ExtFloats. + +Definition div_approx_reals (a b : Z) (x : R) := + let q:=ZnearestE x in + let r:=a-q*b in + if r <? 0 + then q-1 + else q. + +Lemma floor_ball1: + forall x : R, forall y : Z, + (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y. +Proof. + intros x y BALL. + apply Rabs_lt_inv in BALL. + case (Rcompare_spec x (IZR y)); intro CMP. + - left. apply Zfloor_imp. + ring_simplify (y-1+1). + rewrite minus_IZR. lra. + - subst. + rewrite Zfloor_IZR. right. reflexivity. + - right. apply Zfloor_imp. + rewrite plus_IZR. lra. +Qed. + +Theorem div_approx_reals_correct: + forall a b : Z, forall x : R, + b > 0 -> + (Rabs (x - IZR a/ IZR b) < 1/2)%R -> + div_approx_reals a b x = (a/b)%Z. +Proof. + intros a b x bPOS GAP. + assert (0 < IZR b)%R by (apply IZR_lt ; lia). + unfold div_approx_reals. + pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR. + assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL. + { pose proof (Rabs_triang (IZR (ZnearestE x) - x) + (x - IZR a/ IZR b)) as TRI. + ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI. + lra. + } + clear GAP NEAR. + rewrite Rabs_minus_sym in BALL. + pose proof (floor_ball1 _ _ BALL) as FLOOR. + clear BALL. + rewrite Zfloor_div in FLOOR by lia. + pose proof (Z_div_mod_eq_full a b) as DIV_MOD. + assert (0 < b) as bPOS' by lia. + pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS. + case Z.ltb_spec; intro; destruct FLOOR; lia. +Qed. + +Definition my_div (a b : val) := + let b_d := Val.maketotal (Val.floatofintu b) in + let invb_d := Val.floatofsingle (invfs (Val.maketotal (Val.singleofintu b))) in + let alpha := fmsubf (Vfloat ExtFloat.one) invb_d b_d in + let x := fmaddf invb_d alpha invb_d in + Val.mulf (Val.maketotal (Val.floatofintu a)) x. + Definition int_abs i1 := Int.repr (Z.abs (Int.signed i1)). Definition long_abs i1 := Int64.repr (Z.abs (Int64.signed i1)). diff --git a/kvx/ExtZ.v b/kvx/ExtZ.v new file mode 100644 index 00000000..095fd0cc --- /dev/null +++ b/kvx/ExtZ.v @@ -0,0 +1,12 @@ +Require Import Coq.ZArith.Zdiv. + +Open Scope Z. + +Definition Zdiv_ne (a b : Z) := + let q := Z.div a b in + let q1 := Z.succ q in + match Z.compare (a-b*q) (b*q1-a) with + | Lt => q + | Gt => q1 + | Eq => (if Z.even q then q else q1) + end. diff --git a/kvx/FPDivision32.v b/kvx/FPDivision32.v new file mode 100644 index 00000000..5a7b536f --- /dev/null +++ b/kvx/FPDivision32.v @@ -0,0 +1,883 @@ +From Flocq Require Import Core Digits Operations Round Bracket Sterbenz + Binary Round_odd Bits. +Require Archi. +Require Import Coqlib. +Require Import Compopts. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Op. +Require Import CminorSel. +Require Import OpHelpers. +Require Import ExtFloats. +Require Import DecBoolOps. +Require Import Chunks. +Require Import Builtins. +Require Import Values Globalenvs. +Require Compopts. +Require Import Psatz. +Require Import IEEE754_extra. + +From Gappa Require Import Gappa_tactic. + +Local Open Scope cminorsel_scope. + +Definition approx_inv b := + let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in + let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in + let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in + let invb_d_var := Eletvar (0%nat) in + let one := Eop (Ofloatconst ExtFloat.one) Enil in + let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in + let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in + Elet b (Elet invb_d x). + +Definition approx_inv_thresh := (1/17179869184)%R. + +(* +Lemma BofZ_exact_zero: + forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec) + (Hmax : (prec < emax)%Z), + B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\ + is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\ + Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false. +Proof. + intros. + apply BofZ_exact. + pose proof (Z.pow_nonneg 2 prec). + lia. +Qed. + *) + +Lemma Rabs_relax: + forall b b' (INEQ : (b < b')%R) x, + (-b <= x <= b)%R -> (Rabs x < b')%R. +Proof. + intros. + apply Rabs_lt. + lra. +Qed. + +Theorem approx_inv_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_b : expr) (b : int) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : ((Int.unsigned b) > 0)%Z), + exists f : float, + eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\ + is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R. +Proof. + intros. unfold approx_inv. + econstructor. constructor. + { repeat econstructor. + { eassumption. } + { reflexivity. } } + set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))). + set (b' := Int.unsigned b) in *. + pose proof (Int.unsigned_range b) as RANGE. + fold b' in RANGE. + change Int.modulus with 4294967296%Z in RANGE. + assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE. + { change Int64.max_unsigned with 18446744073709551615%Z. + lia. } + assert (1 <= IZR b' <= 4294967295)%R as RANGE'. + { split. + { apply IZR_le. lia. } + apply IZR_le. lia. + } + cbn. + + set (b_d := (Float.of_longu (Int64.repr b'))) in *. + Local Transparent Float.of_longu. + unfold Float.of_longu in b_d. + + assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia. + destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _). + clear SILLY. + + assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia. + destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _). + clear SILLY. + + pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1. + rewrite Rlt_bool_true in C1; cycle 1. + { clear C1. cbn. + eapply (Rabs_relax (IZR 4294967296)). + lra. + gappa. + } + rewrite Zlt_bool_false in C1 by lia. + destruct C1 as (C1E & C1F & _). + + Local Transparent Float32.of_intu Float32.of_int Float32.div. + unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d. + fold b' in invb_d. + change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d. + pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE + (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1) + (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. rewrite C1E. + apply (Rabs_relax (bpow radix2 10)). + { cbn; lra. } + unfold F2R. cbn. unfold F2R. cbn. + gappa. + } + assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ. + { clear C2. + rewrite C1E. + cbn. + assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa. + lra. + } + destruct (C2 NONZ) as (C2E & C2F & _). + clear C2 NONZ. + Local Transparent Float.of_single. + unfold Float.of_single in invb_d. + pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt) + (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE + (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt) + (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE + (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) + (@eq_refl Datatypes.comparison Lt) 1) + (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) + (@eq_refl Datatypes.comparison Lt) b'))) as C3. + fold invb_d in C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + rewrite C2E. + rewrite C1E. + rewrite C0E. + apply (Rabs_relax (bpow radix2 10)). + { apply bpow_lt; lia. } + cbn. + gappa. + } + change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F. + destruct (C3 C2F) as (C3E & C3F & _). + clear C3. + unfold Float.fma. + assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F. + { Local Transparent Float.neg. + unfold Float.neg. + rewrite is_finite_Bopp. + assumption. + } + + assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia. + destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _). + clear SILLY. + + assert (is_finite 53 1024 b_d = true) as b_d_F. + { unfold b_d. + rewrite Int64.unsigned_repr by lia. + assumption. + } + + assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity. + + pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) + (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE + (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5. + + rewrite Rlt_bool_true in C5; cycle 1. + { clear C5. + unfold Float.neg. + rewrite B2R_Bopp. + rewrite C3E. + rewrite C2E. + rewrite C0E. + rewrite C1E. + unfold ExtFloat.one. + change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1). + rewrite C9E. + unfold b_d. + rewrite Int64.unsigned_repr by lia. + rewrite C4E. + apply (Rabs_relax (bpow radix2 10)). + { apply bpow_lt; lia. } + cbn. + gappa. + } + destruct C5 as (C5E & C5F & _). + + pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE + (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE + (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6. + rewrite Rlt_bool_true in C6; cycle 1. + { clear C6. + rewrite C3E. + rewrite C2E. + rewrite C1E. + rewrite C0E. + rewrite C5E. + unfold Float.neg. + rewrite B2R_Bopp. + rewrite C3E. + rewrite C2E. + rewrite C0E. + rewrite C1E. + unfold b_d. + rewrite Int64.unsigned_repr by lia. + rewrite C4E. + unfold ExtFloat.one. + change (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1). + rewrite C9E. + apply (Rabs_relax (bpow radix2 10)). + { apply bpow_lt; lia. } + cbn. + gappa. + } + destruct C6 as (C6E & C6F & _). + split. + { exact C6F. } + rewrite C6E. + rewrite C5E. + rewrite C3E. + rewrite C2E. + rewrite C1E. + rewrite C0E. + unfold Float.neg. + rewrite B2R_Bopp. + unfold ExtFloat.one. + Local Transparent Float.of_int. + unfold Float.of_int. + rewrite (Int.signed_repr 1) by (cbn ; lia). + rewrite C9E. + rewrite C3E. + rewrite C2E. + rewrite C0E. + rewrite C1E. + unfold b_d. + rewrite Int64.unsigned_repr by lia. + rewrite C4E. + cbn. + set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *. + set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *. + set (bi := IZR b') in *. + set (invb0 := rd (rs (1/ rs bi))%R) in *. + set (alpha := (- invb0 * bi + 1)%R) in *. + set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *. + assert (alpha = alpha')%R as expand_alpha. + { + unfold alpha, alpha', invb0. + field. + lra. + } + assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND. + { unfold alpha', rd, rs. + gappa. + } + set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R). + assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND. + { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs. + gappa. + } + replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with + (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring). + replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring. + replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1. + { unfold alpha. + field. + lra. + } + rewrite expand_alpha. + unfold invb0, rd, rs, approx_inv_thresh. + apply Rabs_le. + gappa. +Qed. + +Definition fp_divu32 a b := + let a_var := Eletvar (1%nat) in + let b_var := Eletvar (0%nat) in + let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in + let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in + let qr_var := Eletvar 0%nat in + let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil))::: + qr_var ::: + (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in + let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in + let cases := Eop (Osel (Ccompl0 Clt) Tlong) + (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *) + Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil). + +Open Scope Z. + +Definition div_approx_reals (a b : Z) (x : R) := + let q:=ZnearestE x in + let r:=a-q*b in + if r <? 0 + then q-1 + else q. + +Lemma floor_ball1: + forall x : R, forall y : Z, + (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y. +Proof. + intros x y BALL. + apply Rabs_lt_inv in BALL. + case (Rcompare_spec x (IZR y)); intro CMP. + - left. apply Zfloor_imp. + ring_simplify (y-1+1). + rewrite minus_IZR. lra. + - subst. + rewrite Zfloor_IZR. right. reflexivity. + - right. apply Zfloor_imp. + rewrite plus_IZR. lra. +Qed. + +Theorem div_approx_reals_correct: + forall a b : Z, forall x : R, + b > 0 -> + (Rabs (x - IZR a/ IZR b) < 1/2)%R -> + div_approx_reals a b x = (a/b)%Z. +Proof. + intros a b x bPOS GAP. + assert (0 < IZR b)%R by (apply IZR_lt ; lia). + unfold div_approx_reals. + pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR. + assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL. + { pose proof (Rabs_triang (IZR (ZnearestE x) - x) + (x - IZR a/ IZR b)) as TRI. + ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI. + lra. + } + clear GAP NEAR. + rewrite Rabs_minus_sym in BALL. + pose proof (floor_ball1 _ _ BALL) as FLOOR. + clear BALL. + rewrite Zfloor_div in FLOOR by lia. + pose proof (Z_div_mod_eq_full a b) as DIV_MOD. + assert (0 < b) as bPOS' by lia. + pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS. + case Z.ltb_spec; intro; destruct FLOOR; lia. +Qed. + +Opaque approx_inv. + +Theorem fp_divu32_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : (Int.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_divu32 expr_a expr_b) + (Vint (Int.divu a b)). +Proof. + intros. + assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le) + (Eletvar 0) (Vint b)) as EVAL_b'. + { constructor. reflexivity. } + destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct). + unfold fp_divu32. + repeat econstructor. + { exact EVAL_a. } + { apply eval_lift. exact EVAL_b. } + exact inv_b_eval. + cbn. f_equal. + rewrite <- Float.of_intu_of_longu. + unfold Float.to_longu_ne. + rewrite ZofB_ne_range_correct by lia. + set (prod := (Float.mul (Float.of_intu a) inv_b)). + pose proof (Int.unsigned_range a) as a_range. + pose proof (Int.unsigned_range b) as b_range. + change Int.modulus with 4294967296 in a_range. + change Int.modulus with 4294967296 in b_range. + set (prod' := (B2R _ _ prod)). + set (prod_r := ZnearestE prod') in *. + + Local Transparent Float.mul Float.of_intu. + unfold Float.mul, Float.of_intu in prod. + set (a' := Int.unsigned a) in *. + set (b' := Int.unsigned b) in *. + assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R). + { split; apply IZR_le; lia. } + assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R). + { split; apply IZR_le; lia. } + assert (SILLY : -2^53 <= a' <= 2^53). + { cbn; lia. } + destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E & C0F & _). + clear SILLY. + pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE + (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1. + set (inv_b_r := B2R 53 1024 inv_b) in *. + assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R). + { split; unfold Rdiv. + - apply Rmult_le_compat_l. lra. + apply Rinv_le. apply IZR_lt. lia. + apply IZR_le. lia. + - replace 1%R with (1 / 1)%R at 2 by field. + apply Rmult_le_compat_l. lra. + apply Rinv_le. apply IZR_lt. lia. + apply IZR_le. lia. + } + apply Rabs_def2b in inv_b_correct. + rewrite Rlt_bool_true in C1; cycle 1. + { clear C1. + rewrite C0E. + apply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring. + set (delta := (inv_b_r - 1 / IZR b')%R) in *. + unfold approx_inv_thresh in inv_b_correct. + cbn. + assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R). + { split; apply IZR_le; lia. + } + assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R). + { split; apply IZR_le; lia. + } + gappa. + } + rewrite C0F in C1. + cbn in C1. + rewrite C0E in C1. + rewrite inv_b_finite in C1. + fold prod in C1. + fold prod' in C1. + destruct C1 as (C1E & C1F & _). + rewrite C1F. cbn. + + assert(prod'_range : (0 <= prod' <= 17179869181/4)%R). + { + rewrite C1E. + replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring. + assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R). + { split; apply IZR_le; lia. + } + unfold approx_inv_thresh in inv_b_correct. + set (true_inv := (1 / IZR b')%R) in *. + set (delta := (inv_b_r - true_inv)%R) in *. + gappa. + } + + assert(0 <= prod_r <= 4294967295) as prod_r_range. + { unfold prod_r. + rewrite <- (Znearest_IZR (fun x => negb (Z.even x)) 0). + replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1. + { apply Znearest_imp. + apply Rabs_lt. + lra. + } + split; apply Znearest_le; lra. + } + + replace (_ && _ ) with true; cycle 1. + { + symmetry. + rewrite andb_true_iff. + split; apply Zle_imp_le_bool; lia. + } + cbn. + f_equal. + unfold Int.divu. + assert(Rabs + (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa. + + assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR. + { + unfold prod. + pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2. + rewrite C0E in C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. reflexivity. } + cbn. + fold inv_b_r. + replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring. + set (delta := (inv_b_r - 1 / IZR b')%R) in *. + unfold approx_inv_thresh in *. + gappa. + } + destruct C2 as (C2E & C2F & _). + rewrite C2E. + fold inv_b_r a' b'. + replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) - + (IZR a' / IZR b'))%R with + (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) - + (IZR a' * inv_b_r)) + + (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra). + set(delta := (inv_b_r - 1 / IZR b')%R) in *. + cbn. + unfold approx_inv_thresh in *. + assert (Rabs(IZR a' * delta) <= 3/8)%R as R2. + { apply Rabs_le. + split; nra. + } + rewrite <- C1E. + rewrite <- C1E in R1. + pose proof (Rabs_triang (prod' - IZR a' * inv_b_r) (IZR a' * delta))%R. + lra. + } + pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT. + rewrite <- DIV_CORRECT. + + unfold div_approx_reals in *. + fold a' b' prod' prod_r. + unfold Int64.mul. + rewrite Int64.unsigned_repr by (cbn; lia). + rewrite Int64.unsigned_repr by (cbn; lia). + unfold Int64.sub. + rewrite Int64.unsigned_repr by (cbn; lia). + rewrite Int64.unsigned_repr by (cbn; nia). + assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed). + { cbn. + unfold prod_r. + rewrite <- C1E in R1. + assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R. + 2: split; apply le_IZR; lra. + rewrite minus_IZR. + rewrite mult_IZR. + replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring. + pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2. + set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *. + replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b') + + IZR a' / IZR b')%R by (field; lra). + set (delta2 := (inv_b_r - 1 / IZR b')%R) in *. + set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *. + replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with + (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra). + unfold approx_inv_thresh in *. + assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4. + { apply Rabs_le. + split; + nra. } + set (delta4 := (IZR a' * delta2)%R) in *. + apply Rabs_def2b in R1. + apply Rabs_def2b in R2. + apply Rabs_def2b in R4. + split; nra. + } + fold a' b' prod_r in DIV_CORRECT. + + pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP. + destruct (Z.ltb (a' - prod_r * b') 0). + - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1. + { unfold Int64.lt. + change (Int64.signed Int64.zero) with 0. + rewrite Int64.signed_repr by exact FMA_RANGE. + rewrite zlt_true by lia. + reflexivity. + } + cbn. + f_equal. + rewrite Int64.add_signed. + rewrite (Int64.signed_repr prod_r) by (cbn ; lia). + rewrite Int64.signed_repr by (cbn ; lia). + unfold Int64.loword. + rewrite Int64.unsigned_repr. reflexivity. + split. + 2: cbn; lia. + replace (prod_r + (-1)) with (prod_r - 1) by lia. + rewrite DIV_CORRECT. + apply Z.div_pos; lia. + + - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1. + { unfold Int64.lt. + change (Int64.signed Int64.zero) with 0. + rewrite Int64.signed_repr by exact FMA_RANGE. + rewrite zlt_false by lia. + reflexivity. + } + cbn. + unfold Int64.loword. + rewrite Int64.unsigned_repr by (cbn; lia). + reflexivity. +Qed. + +Definition e_msubl a b c := Eop Omsub (a ::: b ::: c ::: Enil). +Definition fp_modu32 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat) + (fp_divu32 (Eletvar 1%nat) (Eletvar 0%nat)))). + +Theorem fp_modu32_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : (Int.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_modu32 expr_a expr_b) + (Vint (Int.modu a b)). +Proof. + intros. + rewrite Int.modu_divu; cycle 1. + { intro Z. + subst. + rewrite Int.unsigned_zero in b_nz. + lia. + } + unfold fp_modu32. + Local Opaque fp_divu32. + repeat (econstructor + apply eval_lift + eassumption). + { apply fp_divu32_correct; + repeat (econstructor + apply eval_lift + eassumption). + } + cbn. + rewrite Int.mul_commut. + reflexivity. +Qed. + +Definition e_is_neg a := Eop (Ocmp (Ccompimm Clt Int.zero)) (a ::: Enil). +Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil). +Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil). +Definition e_neg a := Eop Oneg (a ::: Enil). +Definition e_abs a := Eop (Oabsdiffimm Int.zero) (a ::: Enil). + +Definition fp_divs32 a b := + Elet a (Elet (lift b) + (Elet (fp_divu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat)))) + (e_ite Tint (Ccompu0 Cne) (e_xorw (e_is_neg (Eletvar 2%nat)) + (e_is_neg (Eletvar 1%nat))) + (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))). + +Lemma nonneg_signed_unsigned: + forall x (x_NONNEG : Int.signed x >= 0), + (Int.signed x) = (Int.unsigned x). +Proof. + intros. + pose proof (Int.unsigned_range x). + unfold Int.signed in *. + destruct zlt. reflexivity. + change Int.modulus with 4294967296%Z in *. + change Int.half_modulus with 2147483648%Z in *. + lia. +Qed. + +Lemma int_min_signed_unsigned : + (- Int.min_signed < Int.max_unsigned)%Z. +Proof. + reflexivity. +Qed. + +Lemma int_divs_divu : + forall a b + (b_NOT0 : Int.signed b <> 0), + Int.divs a b = if xorb (Int.lt a Int.zero) + (Int.lt b Int.zero) + then Int.neg (Int.divu (ExtValues.int_abs a) + (ExtValues.int_abs b)) + else Int.divu (ExtValues.int_abs a) (ExtValues.int_abs b). +Proof. + intros. + unfold Int.divs, Int.divu, Int.lt, ExtValues.int_abs. + pose proof (Int.signed_range a) as a_RANGE. + pose proof (Int.signed_range b) as b_RANGE. + change (Int.signed Int.zero) with 0%Z. + destruct zlt. + - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia. + rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1. + { pose proof int_min_signed_unsigned. lia. } + + destruct zlt. + + rewrite (Z.abs_neq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof int_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int.signed b)) at 1. + rewrite Z.quot_opp_r by lia. + rewrite <- (Z.opp_involutive (Int.signed a)) at 1. + rewrite Z.quot_opp_l by lia. + rewrite Z.quot_div_nonneg by lia. + rewrite Z.opp_involutive. + reflexivity. + + + rewrite (Z.abs_eq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int.signed a)) at 1. + rewrite Z.quot_opp_l by lia. + rewrite Z.quot_div_nonneg by lia. + rewrite Int.neg_repr. + reflexivity. + + - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia. + rewrite (Int.unsigned_repr (Int.signed a)); cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + destruct zlt. + + rewrite (Z.abs_neq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof int_min_signed_unsigned. lia. } + rewrite Int.neg_repr. + rewrite <- (Z.opp_involutive (Int.signed b)) at 1. + rewrite Z.quot_opp_r by lia. + rewrite Z.quot_div_nonneg by lia. + reflexivity. + + + rewrite (Z.abs_eq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + rewrite Z.quot_div_nonneg by lia. + reflexivity. +Qed. + +Lemma nonzero_unsigned_signed : + forall b, Int.unsigned b > 0 -> Int.signed b <> 0. +Proof. + intros b GT EQ. + rewrite Int.unsigned_signed in GT. + unfold Int.lt in GT. + rewrite Int.signed_zero in GT. + destruct zlt in GT; lia. +Qed. + +Theorem fp_divs32_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : (Int.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_divs32 expr_a expr_b) + (Vint (Int.divs a b)). +Proof. + intros. + unfold fp_divs32. + Local Opaque fp_divu32. + repeat (econstructor + apply eval_lift + eassumption). + apply fp_divu32_correct. + all: repeat (econstructor + apply eval_lift + eassumption). + { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff. + rewrite Int.signed_zero. rewrite Z.sub_0_r. + rewrite Int.unsigned_repr. + { pose proof (nonzero_unsigned_signed b b_nz). + lia. + } + pose proof Int.max_signed_unsigned. + pose proof int_min_signed_unsigned. + pose proof (Int.signed_range b). + lia. + } + cbn. + rewrite int_divs_divu ; cycle 1. + { apply nonzero_unsigned_signed. assumption. } + unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff. + change (Int.signed Int.zero) with 0%Z. + repeat rewrite Z.sub_0_r. + destruct zlt; destruct zlt; reflexivity. +Qed. + +Lemma int_mods_modu : + forall a b + (b_NOT0 : Int.signed b <> 0), + Int.mods a b = if Int.lt a Int.zero + then Int.neg (Int.modu (ExtValues.int_abs a) + (ExtValues.int_abs b)) + else Int.modu (ExtValues.int_abs a) (ExtValues.int_abs b). +Proof. + intros. + unfold Int.mods, Int.modu, Int.lt, ExtValues.int_abs. + pose proof (Int.signed_range a) as a_RANGE. + pose proof (Int.signed_range b) as b_RANGE. + change (Int.signed Int.zero) with 0%Z. + destruct zlt. + - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia. + rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1. + { pose proof int_min_signed_unsigned. lia. } + + destruct (zlt (Int.signed b) 0%Z). + + rewrite (Z.abs_neq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof int_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int.signed b)) at 1. + rewrite Z.rem_opp_r by lia. + rewrite <- (Z.opp_involutive (Int.signed a)) at 1. + rewrite Z.rem_opp_l by lia. + rewrite Z.rem_mod_nonneg by lia. + rewrite Int.neg_repr. + reflexivity. + + + rewrite (Z.abs_eq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int.signed a)) at 1. + rewrite Z.rem_opp_l by lia. + rewrite Z.rem_mod_nonneg by lia. + rewrite Int.neg_repr. + reflexivity. + + - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia. + rewrite (Int.unsigned_repr (Int.signed a)); cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + destruct (zlt (Int.signed b) 0%Z). + + rewrite (Z.abs_neq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof int_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int.signed b)) at 1. + rewrite Z.rem_opp_r by lia. + rewrite Z.rem_mod_nonneg by lia. + reflexivity. + + + rewrite (Z.abs_eq (Int.signed b)) by lia. + rewrite Int.unsigned_repr ; cycle 1. + { pose proof Int.max_signed_unsigned. lia. } + rewrite Z.rem_mod_nonneg by lia. + reflexivity. +Qed. + +Definition fp_mods32z a b := + Elet a (Elet (lift b) + (Elet (fp_modu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat)))) + (e_ite Tint (Ccomp0 Clt) (Eletvar 2%nat) + (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))). + +Theorem fp_mods32z_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : (Int.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_mods32z expr_a expr_b) + (Vint (Int.mods a b)). +Proof. + intros. + unfold fp_mods32z. + Local Opaque fp_modu32. + repeat (econstructor + apply eval_lift + eassumption). + apply fp_modu32_correct. + all: repeat (econstructor + apply eval_lift + eassumption). + { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff. + rewrite Int.signed_zero. rewrite Z.sub_0_r. + rewrite Int.unsigned_repr. + { pose proof (nonzero_unsigned_signed b b_nz). + lia. + } + pose proof Int.max_signed_unsigned. + pose proof int_min_signed_unsigned. + pose proof (Int.signed_range b). + lia. + } + cbn. + rewrite int_mods_modu ; cycle 1. + { apply nonzero_unsigned_signed. assumption. } + unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff. + change (Int.signed Int.zero) with 0%Z. + repeat rewrite Z.sub_0_r. + destruct zlt; reflexivity. +Qed. + +Definition e_msub a b c := Eop Omsub (a ::: b ::: c ::: Enil). + +Definition fp_mods32 a b := + Elet a (Elet (lift b) + (Elet (fp_divs32 (Eletvar (1%nat)) (Eletvar (0%nat))) + (e_msub (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))). + +Theorem fp_mods32_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b)) + (b_nz : (Int.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_mods32 expr_a expr_b) + (Vint (Int.mods a b)). +Proof. + intros. + rewrite Int.mods_divs. + unfold fp_mods32. + Local Opaque fp_divs32. + repeat (econstructor + apply eval_lift + eassumption). + { apply fp_divs32_correct; + repeat (econstructor + apply eval_lift + eassumption). + } + cbn. + rewrite Int.mul_commut. + reflexivity. +Qed. diff --git a/kvx/FPDivision64.v b/kvx/FPDivision64.v new file mode 100644 index 00000000..298831a0 --- /dev/null +++ b/kvx/FPDivision64.v @@ -0,0 +1,2670 @@ +(* +This needs a special gappa script + +#!/bin/sh +/home/monniaux/.opam/4.12.0+flambda/bin/gappa -Eprecision=100 "$@" + +in PATH before the normal gappa + *) + +From Flocq Require Import Core Digits Operations Round Bracket Sterbenz + Binary Round_odd Bits. +Require Archi. +Require Import Coqlib. +Require Import Compopts. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Op. +Require Import CminorSel. +Require Import OpHelpers. +Require Import ExtFloats. +Require Import DecBoolOps. +Require Import Chunks. +Require Import Builtins. +Require Import Values Globalenvs. +Require Compopts. +Require Import Psatz. +Require Import IEEE754_extra. +Require Import Memory. + +From Gappa Require Import Gappa_tactic. + +Definition approx_inv_longu b := + let invb_s := ExtValues.invfs (Val.singleoffloat (Val.maketotal (Val.floatoflongu b))) in + let invb_d := Val.floatofsingle invb_s in + let b_d := Val.maketotal (Val.floatoflongu b) in + let one := Vfloat (ExtFloat.one) in + let alpha := ExtValues.fmsubf one invb_d b_d in + ExtValues.fmaddf invb_d alpha invb_d. + +Lemma Rabs_relax: + forall b b' (INEQ : (b < b')%R) x, + (-b <= x <= b)%R -> (Rabs x < b')%R. +Proof. + intros. + apply Rabs_lt. + lra. +Qed. + +Local Notation "'rd'" := (round radix2 (FLT_exp (-1074) 53) ZnearestE). +Local Notation "'rs'" := (round radix2 (FLT_exp (-149) 24) ZnearestE). + +Definition approx_inv_rel_thresh := (1049/72057594037927936)%R. +Theorem approx_inv_longu_correct_rel : + forall b, + (0 < Int64.unsigned b)%Z -> + exists (f : float), + (approx_inv_longu (Vlong b)) = Vfloat f /\ + is_finite _ _ f = true /\ (Rabs(IZR (Int64.unsigned b) * (B2R _ _ f) - 1) <= approx_inv_rel_thresh)%R. +Proof. + intros b NONZ. + unfold approx_inv_longu. + cbn. + econstructor. + split. + reflexivity. + Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int Float.to_single. + unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int, Float.to_single. + set (re := (@eq_refl Datatypes.comparison Lt)). + change (Int.signed (Int.repr 1)) with 1%Z. + set (b' := Int64.unsigned b) in *. + pose proof (Int64.unsigned_range b) as RANGE. + change Int64.modulus with 18446744073709551616%Z in RANGE. + assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'. + { split; apply IZR_le; lia. + } + + assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia. + destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _). + clear SILLY. + set (one_s := (BofZ 24 128 re re 1)) in *. + + pose proof (BofZ_correct 53 1024 re re b') as C5. + rewrite Rlt_bool_true in C5; cycle 1. + { clear C5. + eapply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + cbn. + gappa. + } + cbn in C5. + destruct C5 as (C5R & C5F & C5S). + set (b_d := (BofZ 53 1024 re re b')) in *. + + pose proof (Bconv_correct 53 1024 24 128 re re Float.to_single_nan mode_NE b_d C5F) as C1. + rewrite Rlt_bool_true in C1; cycle 1. + { clear C1. + apply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + rewrite C5R. + cbn. + gappa. + } + cbn in C1. + destruct C1 as (C1R & C1F & C1S). + set (b_s := (Bconv 53 1024 24 128 re re Float.to_single_nan mode_NE b_d)) in *. + assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE. + { rewrite C1R. + rewrite C5R. + cbn. + gappa. + } + assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra. + + pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply Rabs_relax with (b := 1%R). + { cbn; lra. } + rewrite C0R. + set (r_b_s := B2R 24 128 b_s) in *. + cbn. + gappa. + } + + destruct C2 as (C2R & C2F & _). + set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *. + rewrite C0F in C2F. + + assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE. + { rewrite C2R. + set (r_b_s := B2R 24 128 b_s) in *. + rewrite C0R. + cbn. + gappa. + } + + pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + set (r_invb_s := (B2R 24 128 invb_s)) in *. + apply Rabs_relax with (b := 1%R). + { replace 1%R with (bpow radix2 0)%R by reflexivity. + apply bpow_lt. + lia. + } + cbn. + gappa. + } + + destruct C3 as (C3R & C3F & _). + set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *. + assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE. + { + rewrite C3R. + set (r_invb_s := B2R 24 128 invb_s) in *. + cbn. + gappa. + } + + pose proof (is_finite_Bopp 53 1024 Float.neg_nan invb_d) as opp_finite. + rewrite C3F in opp_finite. + + pose proof (BofZ_correct 53 1024 re re 1) as C4. + rewrite Rlt_bool_true in C4; cycle 1. + { clear C4. + cbn. + eapply (Rabs_relax (IZR 18446744073709551616)). + lra. + set (b'' := IZR b') in *. + gappa. + } + destruct C4 as (C4R & C4F & _). + + assert(1 <= B2R 53 1024 b_d <= 18446744073709551616)%R as b_d_RANGE. + { rewrite C5R. + gappa. + } + + pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE + (Bopp 53 1024 Float.neg_nan invb_d) (BofZ 53 1024 re re b') + (BofZ 53 1024 re re 1) opp_finite C5F C4F) as C6. + rewrite Rlt_bool_true in C6; cycle 1. + { clear C6. + rewrite C4R. + rewrite B2R_Bopp. + cbn. + eapply (Rabs_relax (IZR 18446744073709551616)). + { lra. } + fold invb_d. + fold b_d. + set (r_invb_d := B2R 53 1024 invb_d) in *. + set (r_b_d := B2R 53 1024 b_d) in *. + gappa. + } + fold b_d in C6. + destruct C6 as (C6R & C6F & _). + + pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE + (Bfma 53 1024 re re Float.fma_nan mode_NE + (Bopp 53 1024 Float.neg_nan invb_d) b_d (BofZ 53 1024 re re 1)) + invb_d invb_d C6F C3F C3F) as C7. + rewrite Rlt_bool_true in C7; cycle 1. + { clear C7. + rewrite C6R. + rewrite B2R_Bopp. + eapply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + rewrite C4R. + cbn. + set (r_invb_d := B2R 53 1024 invb_d) in *. + set (r_b_d := B2R 53 1024 b_d) in *. + gappa. + } + destruct C7 as (C7R & C7F & _). + + split. assumption. + rewrite C7R. + rewrite C6R. + rewrite C5R. + rewrite C4R. + rewrite B2R_Bopp. + rewrite C3R. + rewrite C2R. + rewrite C1R. + rewrite C5R. + rewrite C0R. + cbn. + set(b1 := IZR b') in *. + + replace (rd 1) with 1%R by gappa. + replace (rd (rs (1 / rs (rd b1)))) with + ((((rd (rs (1 / rs (rd b1))) - (/b1))/(/b1))+1)*(/ b1))%R ; cycle 1. + { field. lra. } + set (er0 := ((rd (rs (1 / rs (rd b1))) - (/b1))/(/b1))%R). + replace (rd b1) with ((((rd b1) - b1)/b1 + 1) * b1)%R; cycle 1. + { field. lra. } + set (er1 := (((rd b1) - b1)/b1)%R). + replace (- ((er0 + 1) * / b1) * ((er1 + 1) * b1) + 1)%R + with (1 - (er0 + 1)*(er1 + 1))%R ; cycle 1. + { field. lra. } + set (z0 := (1 - (er0 + 1) * (er1 + 1))%R). + assert (Rabs er0 <= 257/2147483648)%R as er0_ABS. + { unfold er0. + gappa. + } + assert (Rabs er1 <= 1/9007199254740992)%R as er1_ABS. + { unfold er1. + gappa. + } + replace (rd z0) with ((rd(z0)-z0)+z0)%R by ring. + set (ea0 := (rd(z0)-z0)%R). + assert (Rabs ea0 <= 1/75557863725914323419136)%R as ea0_ABS. + { unfold ea0. unfold z0. + gappa. + } + set (z1 := ((ea0 + z0) * ((er0 + 1) * / b1) + (er0 + 1) * / b1)%R). + replace (rd z1) with ((((rd z1)-z1)/z1+1)*z1)%R; cycle 1. + { field. + unfold z1. + unfold z0. + gappa. + } + set (er2 := ((rd z1 - z1) / z1)%R). + assert (Rabs er2 <= 1/9007199254740992)%R as er2_ABS. + { unfold er2. + unfold z1, z0. + gappa. + } + unfold z1, z0. + replace (b1 * + ((er2 + 1) * + ((ea0 + (1 - (er0 + 1) * (er1 + 1))) * ((er0 + 1) * / b1) + + (er0 + 1) * / b1)) - 1)%R + with (-er0*er0*er1*er2 - er0*er0*er1 + ea0*er0*er2 - er0*er0*er2 - 2*er0*er1*er2 + ea0*er0 - er0*er0 - 2*er0*er1 + ea0*er2 - er1*er2 + ea0 - er1 + er2)%R; cycle 1. + { field. lra. } + unfold approx_inv_rel_thresh. + gappa. +Qed. + +Definition step1_real_inv_longu b := + let invb_s := ExtValues.invfs (Val.singleoffloat (Val.maketotal (Val.floatoflongu b))) in + Val.floatofsingle invb_s. + +Definition step1_real_inv_thresh := (3/33554432)%R. +(* 8.94069671630859e-8 *) + +Theorem step1_real_inv_longu_correct : + forall b, + (0 < Int64.unsigned b)%Z -> + exists (f : float), + (step1_real_inv_longu (Vlong b)) = Vfloat f /\ + (B2R _ _ f) = (rd (rs (1 / rs (rd (IZR (Int64.unsigned b)))))) /\ + is_finite _ _ f = true /\ + Bsign _ _ f = false. +Proof. + intros b NONZ. + unfold step1_real_inv_longu. + cbn. + econstructor. + split. + reflexivity. + Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int Float.to_single. + unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int, Float.to_single. + set (re := (@eq_refl Datatypes.comparison Lt)). + change (Int.signed (Int.repr 1)) with 1%Z. + set (b' := Int64.unsigned b) in *. + pose proof (Int64.unsigned_range b) as RANGE. + change Int64.modulus with 18446744073709551616%Z in RANGE. + assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'. + { split; apply IZR_le; lia. + } + + assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia. + destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & C0S). + clear SILLY. + set (one_s := (BofZ 24 128 re re 1)) in *. + + pose proof (BofZ_correct 53 1024 re re b') as C0'. + rewrite Rlt_bool_true in C0'; cycle 1. + { apply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + cbn. + gappa. + } + cbn in C0'. + destruct C0' as (C0'R & C0'F & C0'S). + set (b_d := (BofZ 53 1024 re re b')) in *. + + pose proof (Bconv_correct 53 1024 24 128 re re Float.to_single_nan mode_NE b_d C0'F) as C1. + rewrite C0'R in C1. + rewrite C0'S in C1. + rewrite Rlt_bool_true in C1; cycle 1. + { clear C1. + eapply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + cbn. + gappa. + } + destruct C1 as (C1R & C1F & C1S). + set (b_s := (Bconv 53 1024 24 128 re re Float.to_single_nan mode_NE b_d)) in *. + + assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE. + { rewrite C1R. + cbn. + gappa. + } + assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra. + + pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply Rabs_relax with (b := 1%R). + { cbn; lra. } + rewrite C0R. + set (r_b_s := B2R 24 128 b_s) in *. + cbn. + gappa. + } + rewrite C1R in C2. + rewrite C1S in C2. + rewrite C0S in C2. + destruct C2 as (C2R & C2F & C2Sz). + change (1 <? 0)%Z with false in C2Sz. + replace (b' <? 0)%Z with false in C2Sz by lia. + change (xorb false false) with false in C2Sz. + set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *. + rewrite C0F in C2F. + assert (is_nan 24 128 invb_s = false) as NAN. + { apply is_finite_not_is_nan. + assumption. + } + pose proof (C2Sz NAN) as C2S. + clear C2Sz. + + assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE. + { rewrite C2R. + set (r_b_s := B2R 24 128 b_s) in *. + rewrite C0R. + cbn. + gappa. + } + + pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + set (r_invb_s := (B2R 24 128 invb_s)) in *. + apply Rabs_relax with (b := 1%R). + { replace 1%R with (bpow radix2 0)%R by reflexivity. + apply bpow_lt. + lia. + } + cbn. + gappa. + } + destruct C3 as (C3R & C3F & C3S). + set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *. + assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE. + { + rewrite C3R. + set (r_invb_s := B2R 24 128 invb_s) in *. + cbn. + gappa. + } + rewrite C2S in C3S. + rewrite C2R in C3R. + rewrite C0R in C3R. + + auto. +Qed. + +Theorem step1_real_inv_longu_correct1 : + forall b, + (Int64.unsigned b = 1%Z) -> + exists f, + (step1_real_inv_longu (Vlong b)) = Vfloat f /\ + (B2R _ _ f) = 1%R /\ + is_finite _ _ f = true /\ + Bsign _ _ f = false. +Proof. + intros b EQ1. + assert (0 < Int64.unsigned b)%Z as b_RANGE by lia. + destruct (step1_real_inv_longu_correct b b_RANGE) as (f & C1E & C1R & C1F & C1S). + rewrite EQ1 in C1R. + exists f. + repeat split; try assumption. + rewrite C1R. + gappa. +Qed. + +Lemma Bsign_false_nonneg: + forall prec emax f, + (Bsign prec emax f) = false -> (0 <= (B2R prec emax f))%R. +Proof. + intros until f. intro SIGN. + destruct f. + 1, 2, 3: cbn; lra. + cbn. + apply F2R_ge_0. + cbn. + cbn in SIGN. + rewrite SIGN. + cbn. + lia. +Qed. + +Lemma Znearest_IZR_le : + forall rnd n x, (IZR n <= x)%R -> (n <= Znearest rnd x)%Z. +Proof. + intros until x. intro ORDER. + pose proof (Znearest_ge_floor rnd x). + pose proof (Zfloor_le _ _ ORDER) as KK. + rewrite Zfloor_IZR in KK. + lia. +Qed. + +Lemma Znearest_le_IZR : + forall rnd n x, (x <= IZR n)%R -> (Znearest rnd x <= n)%Z. +Proof. + intros until x. intro ORDER. + pose proof (Znearest_le_ceil rnd x). + pose proof (Zceil_le _ _ ORDER) as KK. + rewrite Zceil_IZR in KK. + lia. +Qed. + +Definition step1_real_div_longu a b := + Val.mulf (Val.maketotal (Val.floatoflongu a)) (step1_real_inv_longu b). + +Definition step1_div_longu a b := + Val.maketotal (Val.longuoffloat_ne (step1_real_div_longu a b)). + +Definition step1_real_quotient (a b : R) := + rd ((rd (a)) * (rd (rs (1 / rs (rd (b)))))). + +Theorem step1_real_div_longu_correct: + forall a b, + (1 < Int64.unsigned b)%Z -> + exists (q : float), + (step1_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\ + (B2R _ _ q) = step1_real_quotient (IZR (Int64.unsigned a)) + (IZR (Int64.unsigned b)) /\ + is_finite _ _ q = true /\ + Bsign _ _ q = false. +Proof. + intros a b b_NON01. + assert (0 < Int64.unsigned b)%Z as b_NON0 by lia. + destruct (step1_real_inv_longu_correct b b_NON0) as (f & C1E & C1R & C1F & C1S). + unfold step1_real_div_longu. + rewrite C1E. + cbn. + set (b' := Int64.unsigned b) in *. + Local Transparent Float.mul. + unfold Float.mul, Float.of_longu. + econstructor. + split. reflexivity. + set (a' := Int64.unsigned a) in *. + set (re := (@eq_refl Datatypes.comparison Lt)). + + pose proof (Int64.unsigned_range a) as a_RANGE. + change Int64.modulus with 18446744073709551616%Z in a_RANGE. + assert (0 <= IZR a' <= 18446744073709551615)%R as IZR_a_RANGE. + { split; apply IZR_le; lia. } + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in b_RANGE. + assert (2 <= IZR b' <= 18446744073709551615)%R as IZR_b_RANGE. + { split; apply IZR_le; lia. } + + pose proof (BofZ_correct 53 1024 re re a') as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt. lia. } + cbn. + gappa. + } + destruct C2 as (C2R & C2F & C2S). + rewrite Zlt_bool_false in C2S by lia. + + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C3. + rewrite C1S in C3. + rewrite C2S in C3. + rewrite C1F in C3. + rewrite C2F in C3. + rewrite C1R in C3. + rewrite C2R in C3. + rewrite Rlt_bool_true in C3; cycle 1. + { apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt ; lia. } + cbn. + gappa. + } + cbn in C3. + destruct C3 as (C3R & C3F & C3Sz). + assert (is_nan 53 1024 + (Bmult 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re a') f) = false) as NAN. + { apply is_finite_not_is_nan. + assumption. } + pose proof (C3Sz NAN) as C3S. + clear NAN C3Sz. + + auto. +Qed. + +Definition smallb_thresh := 4398046511104%Z. + +Definition smallb_approx_real_range := 2200000000000%R. +Lemma step1_smallb_real : + forall a b + (a_RANGE : (1 <= a <= 18446744073709551615)%R) + (b_RANGE : (1 <= b <= IZR smallb_thresh)%R), + (Rabs((step1_real_quotient a b) * b - a) <= smallb_approx_real_range)%R. +Proof. + intros. + unfold smallb_thresh in b_RANGE. + unfold smallb_approx_real_range. + unfold step1_real_quotient. + set (q := ((rd (a)) * (rd (rs (1 / rs (rd b)))))%R) in *. + replace ((rd q) *b - a)%R with + ((rd(q)-q)/q * rd(a) * (1 + (rd (rs (1 / rs (rd b))) - 1/b)/(1/b)) + + (rd (a)) * ((rd (rs (1 / rs (rd b))) - 1 / b) / (1/b)) + + (rd(a) - a))%R; cycle 1. + { unfold q. + field. + split. lra. + split. gappa. + gappa. + } + unfold q. + gappa. +Qed. + +Lemma step1_div_longu_a0 : + forall b, + (0 < Int64.unsigned b)%Z -> + (step1_div_longu (Vlong Int64.zero) (Vlong b)) = Vlong Int64.zero. +Proof. + intros b b_NOT0. + unfold step1_div_longu. + unfold step1_real_div_longu. + destruct (step1_real_inv_longu_correct b b_NOT0) as + (f & C1E & C1R & C1F & C1S). + rewrite C1E. + cbn. + unfold Float.to_longu_ne, Float.of_longu, Float.mul. + rewrite Int64.unsigned_zero. + set (re := (@eq_refl Datatypes.comparison Lt)). + assert (- 2 ^ 53 <= 0 <= 2 ^ 53)%Z as SILLY by lia. + destruct (BofZ_exact 53 1024 re re 0 SILLY) as (C2R & C2F & C2S). + + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re 0) f) as C3. + rewrite C1F in C3. + rewrite C2F in C3. + rewrite C1S in C3. + rewrite C2S in C3. + rewrite Z.ltb_irrefl in C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt. lia. } + cbn. + rewrite Rmult_0_l. + gappa. + } + rewrite C2R in C3. + rewrite Rmult_0_l in C3. + destruct C3 as (C3R & C3F & C3Sz). + change (true && true) with true in C3F. + change (xorb false false) with false in C3Sz. + assert (is_nan 53 1024 + (Bmult 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re 0) f) = false) as NAN. + { apply is_finite_not_is_nan. + assumption. + } + pose proof (C3Sz NAN) as C3S. + clear NAN C3Sz. + pose proof ((ZofB_ne_range_correct 53 1024 + (Bmult 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re 0) f) 0 Int64.max_unsigned)) as C4. + rewrite C3R in C4. + replace (round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) 0) + with 0%R in C4 by (cbn ; gappa). + rewrite Znearest_IZR in C4. + cbn zeta in C4. + rewrite Z.leb_refl in C4. + change (0 <=? Int64.max_unsigned)%Z with true in C4. + rewrite andb_true_r in C4. + rewrite andb_true_r in C4. + rewrite C3F in C4. + rewrite C4. + reflexivity. +Qed. + +Lemma step1_div_longu_correct_anyb : + forall a b + (b_NOT01 : (1 < Int64.unsigned b)%Z), + exists (q : int64), + (step1_div_longu (Vlong a) (Vlong b)) = Vlong q. +Proof. + intros. + + pose proof (Int64.unsigned_range a) as a_RANGE. + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in *. + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + assert (0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'. + { split; apply IZR_le; lia. } + assert (2 <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. } + + assert (0 < b')%Z as b_NOT0 by lia. + + destruct (Z_le_gt_dec a' 0). + { assert (a' = 0%Z) as ZERO by lia. + replace a with Int64.zero; cycle 1. + { + unfold a' in ZERO. + unfold Int64.zero. + rewrite <- ZERO. + apply Int64.repr_unsigned. + } + exists Int64.zero. + apply step1_div_longu_a0. + exact b_NOT0. + } + + unfold step1_div_longu. + unfold step1_real_div_longu. + destruct (step1_real_inv_longu_correct b b_NOT0) as (f & C1E & C1R & C1F & C1S). + rewrite C1E. + cbn. + unfold Float.of_longu, Float.mul, Float.to_longu_ne. + set (re := (@eq_refl Datatypes.comparison Lt)). + fold a'. + fold b' in C1R. + pose proof (BofZ_correct 53 1024 re re a') as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt. lia. } + cbn. + gappa. + } + cbn in C2. + destruct C2 as (C2R & C2F & C2S). + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re a') f) as C3. + rewrite C2R in C3. + rewrite C2F in C3. + rewrite C2S in C3. + rewrite C1R in C3. + rewrite C1F in C3. + rewrite C1S in C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt. lia. } + cbn. + gappa. + } + cbn in C3. + destruct C3 as (C3R & C3F & _). + pose proof (ZofB_ne_range_correct 53 1024 + (Bmult 53 1024 re re Float.binop_nan mode_NE + (BofZ 53 1024 re re a') f) 0 Int64.max_unsigned) as C4. + rewrite C3R in C4. + rewrite C3F in C4. + cbn zeta in C4. + rewrite Zle_bool_true in C4 ; cycle 1. + { clear C4. + apply Znearest_lub. + gappa. + } + rewrite Zle_bool_true in C4 ; cycle 1. + { clear C4. + apply Znearest_glb. + cbn. + gappa. + } + rewrite C4. + cbn. + eauto. +Qed. + +Definition smallb_approx_range := 4400000000000%Z. +Lemma step1_div_longu_correct : + forall a b, + (1 < Int64.unsigned b <= smallb_thresh)%Z -> + exists (q : int64), + (step1_div_longu (Vlong a) (Vlong b)) = Vlong q /\ + (Z.abs (Int64.unsigned a - Int64.unsigned b*Int64.unsigned q) <= smallb_approx_range)%Z. +Proof. + intros a b b_RANGE. + + pose proof (Int64.unsigned_range a) as a_RANGE. + change Int64.modulus with 18446744073709551616%Z in a_RANGE. + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + + destruct (Z_le_gt_dec a' 0). + { assert (a' = 0%Z) as ZERO by lia. + exists Int64.zero. + rewrite ZERO. + rewrite Int64.unsigned_zero. + replace (Z.abs (0 - b' * 0))%Z with 0%Z by lia. + replace a with Int64.zero; cycle 1. + { + unfold a' in ZERO. + unfold Int64.zero. + rewrite <- ZERO. + apply Int64.repr_unsigned. + } + split. + { apply step1_div_longu_a0. + lia. + } + unfold smallb_approx_range. + lia. + } + + unfold step1_div_longu. + assert (1 < b')%Z as b_NOT01 by lia. + destruct (step1_real_div_longu_correct a b b_NOT01) as (q & C1E & C1R & C1F & C1S). + rewrite C1E. cbn. + unfold Float.to_longu_ne. + pose proof (ZofB_ne_range_correct 53 1024 q 0 Int64.max_unsigned) as C2. + rewrite C1F in C2. + + + assert (1 <= IZR a' <= 18446744073709551615)%R as a_RANGE'. + { split; apply IZR_le; lia. } + assert (2 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'. + { split; apply IZR_le; lia. } + assert (1 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'' by lra. + pose proof (step1_smallb_real (IZR a') (IZR b') a_RANGE' b_RANGE'') as DELTA. + fold a' in C1R. + fold b' in C1R. + rewrite <- C1R in DELTA. + + assert (0 <= B2R _ _ q)%R as q_NONNEG. + { apply Bsign_false_nonneg. assumption. } + cbn in C2. + rewrite Zle_bool_true in C2; cycle 1. + { apply Znearest_IZR_le. assumption. } + assert (B2R _ _ q <= 9223376000000000000)%R as q_SMALL. + { replace (B2R _ _ q) with + ((IZR a' / IZR b') + (B2R _ _ q * IZR b' - IZR a') / IZR b')%R; cycle 1. + { field. lra. } + unfold smallb_approx_real_range in DELTA. + unfold smallb_thresh in b_RANGE'. + set (y := (B2R 53 1024 q * IZR b' - IZR a')%R) in *. + gappa. + } + rewrite Zle_bool_true in C2; cycle 1. + { apply Znearest_le_IZR. lra. } + cbn in C2. + + change Int64.max_unsigned with 18446744073709551615%Z. + rewrite C2. + cbn. + + econstructor. split. reflexivity. + rewrite Int64.unsigned_repr; cycle 1. + { split. + - apply Znearest_IZR_le. lra. + - apply Znearest_le_IZR. + change Int64.max_unsigned with 18446744073709551615%Z. + lra. + } + apply le_IZR. + rewrite abs_IZR. + unfold smallb_approx_real_range, smallb_approx_range, smallb_thresh in *. + rewrite minus_IZR. + rewrite mult_IZR. + set (q_r := B2R 53 1024 q) in *. + assert (Rabs (IZR (ZnearestE q_r) - q_r) <= / 2)%R as NEAR + by apply Znearest_imp2. + set (q_i := IZR (ZnearestE q_r)) in *. + replace (IZR a' - IZR b' * q_i)%R with + (-(IZR b' * (q_i - q_r)) - (q_r * IZR b' - IZR a'))%R by ring. + set (delta1 := (q_i - q_r)%R) in *. + set (delta2 := (q_r * IZR b' - IZR a')%R) in *. + gappa. +Qed. + +Lemma le_IZR3 : + forall n m p : Z, (IZR n <= IZR m <= IZR p)%R -> (n <= m <= p)%Z. +Proof. + intros ; split ; apply le_IZR ; lra. +Qed. + +Definition mostb_thresh := 18446740000000000000%Z. +Lemma step1_div_longu_correct_mostb : + forall a b, + (1 < Int64.unsigned b <= mostb_thresh)%Z -> + exists (q : int64), + (step1_div_longu (Vlong a) (Vlong b)) = Vlong q /\ + (Int64.min_signed <= (Int64.unsigned a - Int64.unsigned b*Int64.unsigned q) <= Int64.max_signed)%Z. +Proof. + intros a b b_RANGE. + + pose proof (Int64.unsigned_range a) as a_RANGE. + change Int64.modulus with 18446744073709551616%Z in a_RANGE. + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + + destruct (Z_le_gt_dec a' 0). + { assert (a' = 0%Z) as ZERO by lia. + exists Int64.zero. + rewrite ZERO. + rewrite Int64.unsigned_zero. + replace (Z.abs (0 - b' * 0))%Z with 0%Z by lia. + replace a with Int64.zero; cycle 1. + { + unfold a' in ZERO. + unfold Int64.zero. + rewrite <- ZERO. + apply Int64.repr_unsigned. + } + split. + { apply step1_div_longu_a0. + lia. + } + change Int64.min_signed with (-9223372036854775808)%Z. + change Int64.max_signed with ( 9223372036854775807)%Z. + lia. + } + + unfold step1_div_longu. + assert (1 < b')%Z as b_NOT01 by lia. + destruct (step1_real_div_longu_correct a b b_NOT01) as (q & C1E & C1R & C1F & C1S). + rewrite C1E. cbn. + unfold Float.to_longu_ne. + pose proof (ZofB_ne_range_correct 53 1024 q 0 Int64.max_unsigned) as C2. + rewrite C1F in C2. + + + assert (1 <= IZR a' <= 18446744073709551615)%R as a_RANGE'. + { split; apply IZR_le; lia. } + assert (2 <= IZR b' <= IZR mostb_thresh)%R as b_RANGE'. + { split; apply IZR_le; lia. } + assert (1 <= IZR b' <= IZR mostb_thresh)%R as b_RANGE'' by lra. + cbn zeta in C2. + rewrite C2. + cbn. + rewrite C1R. + unfold step1_real_quotient. + fold a' b'. + unfold mostb_thresh in *. + + rewrite Zle_bool_true ; cycle 1. + { apply Znearest_IZR_le. + gappa. + } + rewrite Zle_bool_true ; cycle 1. + { apply Znearest_le_IZR. + gappa. + } + cbn. + econstructor; split. reflexivity. + set (q_r := (rd (rd (IZR a') * rd (rs (1 / rs ( rd (IZR b'))))))%R). + assert (Rabs (IZR (ZnearestE q_r) - q_r) <= /2)%R as NEAR by apply Znearest_imp2. + set (delta1 := (IZR (ZnearestE q_r) - q_r)%R) in NEAR. + apply le_IZR3. + rewrite minus_IZR. + rewrite mult_IZR. + rewrite Int64.unsigned_repr ; cycle 1. + { split. + - apply Znearest_IZR_le. unfold q_r. + gappa. + - apply Znearest_le_IZR. unfold q_r. + change Int64.max_unsigned with 18446744073709551615%Z. + gappa. + } + replace (IZR (ZnearestE q_r)) with ((IZR (ZnearestE q_r) - q_r) + q_r)%R by ring. + fold delta1. + unfold q_r. + set (a1 := IZR a') in *. + set (b1 := IZR b') in *. + replace (rd (rd a1 * rd (rs (1 / rs (rd b1)))))%R with + ((((rd (rd a1 * rd (rs (1 / rs (rd b1))))-(a1 * (1 / b1))) / (a1 * (1 / b1)))+1) * (a1 / b1))%R; + cycle 1. + { field. lra. } + set (delta2 := ((rd (rd a1 * rd (rs (1 / rs (rd b1))))-(a1 * (1 / b1))) / (a1 * (1 / b1)))%R) in *. + (* assert (Rabs (delta2) <= 1/4194304)%R. + { unfold delta2. gappa. } *) + replace (a1 - b1 * (delta1 + (delta2 + 1) * (a1 / b1)))%R with + (-b1*delta1 - a1*delta2)%R; cycle 1. + { field. lra. } + unfold delta2. + gappa. +Qed. + +Lemma find_quotient: + forall (a b : Z) + (b_POS : (0 < b)%Z) + (qr : R) + (GAP : (Rabs (IZR a / IZR b - qr) < /2)%R), + (a / b)%Z = + let q := ZnearestE qr in + if (b*q >? a)%Z + then (q-1)%Z + else q. +Proof. + intros. + set (q := ZnearestE qr). + cbn zeta. + set (b' := IZR b) in *. + set (a' := IZR a) in *. + assert (1 <= b')%R as b_POS'. + { apply IZR_le. + lia. + } + + pose proof (Znearest_imp2 (fun x : Z => negb (Z.even x)) qr) as ROUND. + fold q in ROUND. + set (q' := IZR q) in *. + + pose proof (Rabs_triang (a' / b' - qr) + (qr - q'))%R as TRIANGLE. + replace ((a' / b' - qr) + (qr - q'))%R with + (a' / b' - q')%R in TRIANGLE by ring. + rewrite <- Rabs_Ropp in ROUND. + replace (- (q' - qr))%R with (qr - q')%R in ROUND by ring. + assert (Z.abs (a - b*q) < b)%Z as DELTA. + { apply lt_IZR. + rewrite <- Rabs_Zabs. + rewrite minus_IZR. + rewrite mult_IZR. + fold a' q' b'. + apply Rmult_lt_reg_r with (r := (/b')%R). + { apply Rinv_0_lt_compat. lra. } + rewrite Rinv_r by lra. + replace (/ b')%R with (/ Rabs(b'))%R ; cycle 1. + { f_equal. + apply Rabs_pos_eq. lra. } + rewrite <- Rabs_Rinv by lra. + rewrite <- Rabs_mult. + replace ((a' - b' * q') * / b')%R with (a'/b' - q')%R by (field ; lra). + lra. + } + + pose proof (Zgt_cases (b * q) a)%Z as CASE. + destruct (_ >? _)%Z. + { apply Zdiv_unique with (b := (a - (q-1)*b)%Z). + ring. + split; lia. + } + + apply Zdiv_unique with (b := (a - q*b)%Z). + ring. + split; lia. +Qed. + +Definition step2_real_div_long a b := + Val.mulf (Val.maketotal (Val.floatoflong a)) (approx_inv_longu b). + +Definition smalla_thresh := 34184372088832%Z. + +Lemma step2_real_div_long_smalla_correct : + forall (a b : int64) + (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z) + (b_NOT0 : (0 < Int64.unsigned b)%Z), + exists (q : float), + (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\ + (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\ + is_finite _ _ q = true. +Proof. + intros. + unfold step2_real_div_long. + destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R). + rewrite C0E. + econstructor. + split. reflexivity. + Local Transparent Float.of_long. + unfold Float.mul, Float.of_long. + set (re := (@eq_refl Datatypes.comparison Lt)) in *. + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in b_RANGE. + set (a' := Int64.signed a) in *. + set (b' := Int64.unsigned b) in *. + assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + assert(Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'. + { rewrite Rabs_Zabs. + apply IZR_le. + assumption. + } + assert (- 2 ^ 53 <= a' <= 2 ^ 53)%Z as SILLY. + { unfold smalla_thresh in a_SMALL. + apply Z.abs_le. + lia. + } + destruct (BofZ_exact 53 1024 re re (Int64.signed a) SILLY) as (C1R & C1F & C1S). + fold a' in C1R, C1F, C1S. + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2. + rewrite Rlt_bool_true in C2 ; cycle 1. + { clear C2. + apply Rabs_relax with (b := bpow radix2 53). + { apply bpow_lt. lia. } + cbn. + rewrite C1R. + unfold approx_inv_rel_thresh in C0R. + replace (B2R 53 1024 f) with + ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1. + { field. lra. } + unfold smalla_thresh in *. + gappa. + } + rewrite C0F in C2. + rewrite C1R in C2. + rewrite C1F in C2. + rewrite C1S in C2. + cbn in C2. + destruct C2 as (C2R & C2F & _). + split. + 2: exact C2F. + rewrite C2R. + replace (IZR a' * (B2R 53 1024 f))%R with + ((IZR a'/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1. + { field. lra. } + set (delta1 := (IZR b' * B2R 53 1024 f - 1)%R) in *. + set (q1 := (IZR a' / IZR b' * (delta1 + 1))%R). + replace (rd q1) with (((rd q1) - q1) + q1)%R by ring. + set (delta2 := ((rd q1) - q1)%R). + unfold q1. + replace (delta2 + IZR a' / IZR b' * (delta1 + 1) - IZR a' / IZR b')%R with + (delta2 + (IZR a' / IZR b') * delta1)%R by ring. + unfold delta2. + unfold q1. + unfold approx_inv_rel_thresh in *. + unfold smalla_thresh in *. + gappa. +Qed. + +Definition step2_div_long' a b := + Val.maketotal (Val.longoffloat_ne (step2_real_div_long a b)). + +Definition step2_div_long a b := + let q := step2_div_long' a b in + Val.select (Val.cmpl_bool Clt (Val.subl a (Val.mull q b)) (Vlong Int64.zero)) + (Val.addl q (Vlong Int64.mone)) q Tlong. + +Lemma function_ite : + forall {A B : Type} (fn : A->B) (b : bool) (x y: A), + fn (if b then x else y) = (if b then fn x else fn y). +Proof. + intros. + destruct b; reflexivity. +Qed. + +Lemma normalize_ite : + forall ty (b : bool) x y, + Val.normalize (if b then x else y) ty = + (if b then Val.normalize x ty else Val.normalize y ty). +Proof. + intros. + destruct b; reflexivity. +Qed. + + +Lemma int64_mul_signed_unsigned: + forall x y : int64, + Int64.mul x y = Int64.repr (Int64.signed x * Int64.unsigned y). +Proof. + intros. + unfold Int64.mul. + apply Int64.eqm_samerepr. + apply Int64.eqm_mult. + - apply Int64.eqm_sym. + apply Int64.eqm_signed_unsigned. + - apply Int64.eqm_refl. +Qed. + +Lemma int64_eqm_signed_repr: + forall z : Z, Int64.eqm z (Int64.signed (Int64.repr z)). +Proof. + intros. + apply Int64.eqm_trans with (y := Int64.unsigned (Int64.repr z)). + - apply Int64.eqm_unsigned_repr. + - apply Int64.eqm_sym. + apply Int64.eqm_signed_unsigned. +Qed. + +Lemma signed_repr_sub: + forall x y, + Int64.repr (Int64.signed (Int64.repr x) - y) = + Int64.repr (x - y). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_sub. + - apply Int64.eqm_sym. + apply int64_eqm_signed_repr. + - apply Int64.eqm_refl. +Qed. + +Lemma signed_repr_sub2: + forall x y, + Int64.repr (x - Int64.signed (Int64.repr y)) = + Int64.repr (x - y). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_sub. + - apply Int64.eqm_refl. + - apply Int64.eqm_sym. + apply int64_eqm_signed_repr. +Qed. + +Lemma step2_div_long_smalla_correct : + forall a b + (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z) + (b_NOT0 : (0 < Int64.unsigned b)%Z) + (b_NOT_VERY_BIG : (Int64.unsigned b <= Int64.max_signed)%Z), + step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z. +Proof. + intros. + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in b_RANGE. + set (a' := (Int64.signed a)) in *. + set (b' := (Int64.unsigned b)) in *. + assert (Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'. + { rewrite Rabs_Zabs. + apply IZR_le. + assumption. + } + assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + destruct (step2_real_div_long_smalla_correct a b a_SMALL b_NOT0) as (q & C1R & C1E & C1F). + fold a' b' in C1E. + assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW. + { apply Zle_imp_le_bool. + change Int64.min_signed with (-9223372036854775808)%Z. + apply Znearest_lub. + set (q' := B2R 53 1024 q) in *. + replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + unfold smalla_thresh in a_RANGE'. + gappa. + } + assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH. + { apply Zle_imp_le_bool. + change Int64.max_signed with (9223372036854775807)%Z. + apply Znearest_glb. + set (q' := B2R 53 1024 q) in *. + replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + unfold smalla_thresh in a_RANGE'. + gappa. + } + unfold step2_div_long, step2_div_long'. + rewrite C1R. + cbn. + unfold Float.to_long_ne. + rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed). + rewrite C1F. + rewrite q_LOW. + rewrite q_HIGH. + cbn. + rewrite normalize_ite. + cbn. + rewrite <- (function_ite Vlong). + f_equal. + unfold Int64.lt. + set (q' := B2R 53 1024 q) in *. + fold a'. + assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED. + { apply Int64.signed_repr. + split; lia. + } + rewrite Int64.add_signed. + rewrite q_SIGNED. + rewrite Int64.signed_mone. + rewrite Int64.signed_zero. + rewrite <- (function_ite Int64.repr). + f_equal. + replace (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring. + + set (q'' := (ZnearestE q')) in *. + fold a'. + rewrite int64_mul_signed_unsigned. + rewrite q_SIGNED. + fold b'. + + rewrite Int64.sub_signed. + fold a'. + rewrite signed_repr_sub2. + + assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF. + { replace (IZR a' / IZR b' - q')%R with + (-(q' - IZR a' / IZR b'))%R by ring. + rewrite Rabs_Ropp. + lra. + } + pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT. + fold q'' in QUOTIENT. + cbn zeta in QUOTIENT. + + assert (b' <> 0)%Z as NONZ by lia. + pose proof (Zmod_eq_full a' b' NONZ) as MOD. + assert (b' > 0)%Z as b_GT0 by lia. + pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT. + destruct (Z_lt_dec a' (b' * q'')) as [LT | GE]. + { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia. + replace q'' with (1 + (a' / b'))%Z by lia. + replace (a' - (1 + a' / b') * b')%Z + with ((a' - a' / b' * b')-b')%Z by ring. + rewrite <- MOD. + rewrite Int64.signed_repr; cycle 1. + { change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + lia. + } + rewrite zlt_true by lia. + ring. + } + replace (b' * q'' >? a')%Z with false in QUOTIENT by lia. + rewrite <- QUOTIENT. + replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring. + rewrite <- MOD. + rewrite Int64.signed_repr ; cycle 1. + { change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + lia. + } + rewrite zlt_false by lia. + reflexivity. +Qed. + +Definition twostep_div_longu a b := + let q1 := step1_div_longu a b in + let q2 := step2_div_long (Val.subl a (Val.mull b q1)) b in + Val.addl q1 q2. + +Lemma unsigned_repr_sub : + forall a b, + Int64.repr (a - b) = Int64.repr (a - Int64.unsigned (Int64.repr b)). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_sub. + - apply Int64.eqm_refl. + - apply Int64.eqm_unsigned_repr. +Qed. + +Lemma unsigned_repr_add : + forall a b, + Int64.repr (a + b) = Int64.repr (a + Int64.unsigned (Int64.repr b)). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_add. + - apply Int64.eqm_refl. + - apply Int64.eqm_unsigned_repr. +Qed. + +Lemma twostep_div_longu_smallb_correct : + forall a b + (b_RANGE : (1 < Int64.unsigned b <= smallb_thresh)%Z), + (twostep_div_longu (Vlong a) (Vlong b)) = + (Val.maketotal (Val.divlu (Vlong a) (Vlong b))). +Proof. + intros. + unfold twostep_div_longu. + destruct (step1_div_longu_correct a b b_RANGE) as (q1 & C1R & C1E). + rewrite C1R. + set (q1' := Int64.unsigned q1) in *. + set (b' := Int64.unsigned b) in *. + set (a' := Int64.unsigned a) in *. + assert ( Z.abs (Int64.signed (Int64.sub a (Int64.mul b q1))) <= smalla_thresh)%Z as r1_SMALL. + { unfold smalla_thresh, smallb_approx_range in *. + unfold Int64.sub, Int64.mul. + fold q1' b' a'. + rewrite <- unsigned_repr_sub. + rewrite Int64.signed_repr ; cycle 1. + { change Int64.min_signed with (-9223372036854775808)%Z. + change Int64.max_signed with (9223372036854775807)%Z. + lia. + } + lia. + } + assert (0 < b')%Z as b_NOT0 by lia. + assert (b' <= Int64.max_signed)%Z as b_NOTBIG. + { change Int64.max_signed with (9223372036854775807)%Z. + unfold smallb_thresh in b_RANGE. + lia. + } + cbn. + rewrite (step2_div_long_smalla_correct (Int64.sub a (Int64.mul b q1)) b r1_SMALL b_NOT0 b_NOTBIG). + unfold Int64.add, Int64.sub, Int64.mul, Int64.divu. + fold q1' b' a'. + rewrite <- unsigned_repr_sub. + rewrite <- unsigned_repr_add. + rewrite Int64.signed_repr ; cycle 1. + { + change Int64.min_signed with (-9223372036854775808)%Z. + change Int64.max_signed with (9223372036854775807)%Z. + unfold smallb_approx_range in *. + lia. + } + rewrite Z.add_comm. + rewrite <- Z.div_add by lia. + replace (a' - b' * q1' + q1' * b')%Z with a' by ring. + rewrite Int64.eq_false ; cycle 1. + { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0. + rewrite Int64.unsigned_zero in b_NOT0. + lia. + } + reflexivity. +Qed. + + +Lemma step2_real_div_long_bigb_correct : + forall (a b : int64) + (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z), + exists (q : float), + (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\ + (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\ + is_finite _ _ q = true. +Proof. + intros. + unfold step2_real_div_long. + assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia). + destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R). + rewrite C0E. + econstructor. + split. reflexivity. + Local Transparent Float.of_long. + unfold Float.mul, Float.of_long. + set (re := (@eq_refl Datatypes.comparison Lt)) in *. + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in b_RANGE. + pose proof (Int64.signed_range a) as a_RANGE. + set (a' := Int64.signed a) in *. + set (b' := Int64.unsigned b) in *. + assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'. + { split; apply IZR_le; lia. + } + change Int64.min_signed with (-9223372036854775808)%Z in a_RANGE'. + change Int64.max_signed with (9223372036854775807)%Z in a_RANGE'. + pose proof (BofZ_correct 53 1024 re re a') as C1. + rewrite Rlt_bool_true in C1 ; cycle 1. + { clear C1. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt; lia. } + cbn. + gappa. + } + cbn in C1. + destruct C1 as (C1R & C1F & C1S). + + unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'. + + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2. + rewrite Rlt_bool_true in C2 ; cycle 1. + { clear C2. + apply Rabs_relax with (b := bpow radix2 53). + { apply bpow_lt. lia. } + cbn. + rewrite C1R. + unfold approx_inv_rel_thresh in C0R. + replace (B2R 53 1024 f) with + ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1. + { field. lra. } + gappa. + } + rewrite C0F in C2. + rewrite C1R in C2. + rewrite C1F in C2. + rewrite C1S in C2. + cbn in C2. + destruct C2 as (C2R & C2F & _). + split. + 2: exact C2F. + rewrite C2R. + set (f' := (B2R 53 1024 f)) in *. + replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with + ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1. + { field. lra. } + unfold approx_inv_rel_thresh in *. + gappa. +Qed. + +Lemma step2_div_long_bigb_correct : + forall a b + (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z) + (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z), + step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z. +Proof. + intros. + pose proof (Int64.unsigned_range b) as b_RANGE. + change Int64.modulus with 18446744073709551616%Z in b_RANGE. + pose proof (Int64.signed_range a) as a_RANGE. + set (a' := (Int64.signed a)) in *. + set (b' := (Int64.unsigned b)) in *. + assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'. + { split; apply IZR_le; lia. + } + unfold smallb_thresh in *; cbn in b_RANGE'. + change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + assert (0 < b')%Z as b_NOT0 by lia. + + destruct (step2_real_div_long_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F). + fold a' b' in C1E. + assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW. + { apply Zle_imp_le_bool. + change Int64.min_signed with (-9223372036854775808)%Z. + apply Znearest_lub. + set (q' := B2R 53 1024 q) in *. + replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + gappa. + } + assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH. + { apply Zle_imp_le_bool. + change Int64.max_signed with (9223372036854775807)%Z. + apply Znearest_glb. + set (q' := B2R 53 1024 q) in *. + replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + gappa. + } + unfold step2_div_long, step2_div_long'. + rewrite C1R. + cbn. + unfold Float.to_long_ne. + rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed). + rewrite C1F. + rewrite q_LOW. + rewrite q_HIGH. + cbn. + rewrite normalize_ite. + cbn. + rewrite <- (function_ite Vlong). + f_equal. + unfold Int64.lt. + set (q' := B2R 53 1024 q) in *. + fold a'. + assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED. + { apply Int64.signed_repr. + split; lia. + } + rewrite Int64.add_signed. + rewrite q_SIGNED. + rewrite Int64.signed_mone. + rewrite Int64.signed_zero. + rewrite <- (function_ite Int64.repr). + f_equal. + replace (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring. + + set (q'' := (ZnearestE q')) in *. + fold a'. + rewrite int64_mul_signed_unsigned. + rewrite q_SIGNED. + fold b'. + + rewrite Int64.sub_signed. + fold a'. + rewrite signed_repr_sub2. + + assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF. + { replace (IZR a' / IZR b' - q')%R with + (-(q' - IZR a' / IZR b'))%R by ring. + rewrite Rabs_Ropp. + lra. + } + pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT. + fold q'' in QUOTIENT. + cbn zeta in QUOTIENT. + + assert (b' <> 0)%Z as NONZ by lia. + pose proof (Zmod_eq_full a' b' NONZ) as MOD. + assert (b' > 0)%Z as b_GT0 by lia. + pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT. + destruct (Z_lt_dec a' (b' * q'')) as [LT | GE]. + { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia. + replace q'' with (1 + (a' / b'))%Z by lia. + replace (a' - (1 + a' / b') * b')%Z + with ((a' - a' / b' * b')-b')%Z by ring. + rewrite <- MOD. + rewrite Int64.signed_repr; cycle 1. + { change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + lia. + } + rewrite zlt_true by lia. + ring. + } + replace (b' * q'' >? a')%Z with false in QUOTIENT by lia. + rewrite <- QUOTIENT. + replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring. + rewrite <- MOD. + rewrite Int64.signed_repr ; cycle 1. + { change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + lia. + } + rewrite zlt_false by lia. + reflexivity. +Qed. + +Definition step2_real_div_longu a b := + Val.mulf (Val.maketotal (Val.floatoflongu a)) (approx_inv_longu b). + +Definition step2_div_longu' a b := + Val.maketotal (Val.longuoffloat_ne (step2_real_div_longu a b)). + +Definition step2_div_longu a b := + let q := step2_div_longu' a b in + Val.select (Val.cmpl_bool Cgt (Val.subl (Val.mull q b) a) (Vlong Int64.zero)) + (Val.addl q (Vlong Int64.mone)) q Tlong. + +Lemma step2_real_div_longu_bigb_correct : + forall (a b : int64) + (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z), + exists (q : float), + (step2_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\ + (Rabs ((B2R _ _ q) - (IZR (Int64.unsigned a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\ + is_finite _ _ q = true. +Proof. + intros. + unfold step2_real_div_longu. + assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia). + destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R). + rewrite C0E. + econstructor. + split. reflexivity. + Local Transparent Float.of_longu. + unfold Float.mul, Float.of_longu. + set (re := (@eq_refl Datatypes.comparison Lt)) in *. + pose proof (Int64.unsigned_range b) as b_RANGE. + pose proof (Int64.unsigned_range a) as a_RANGE. + change Int64.modulus with 18446744073709551616%Z in *. + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'. + { split; apply IZR_le; lia. + } + pose proof (BofZ_correct 53 1024 re re a') as C1. + rewrite Rlt_bool_true in C1 ; cycle 1. + { clear C1. + apply Rabs_relax with (b := bpow radix2 64). + { apply bpow_lt; lia. } + cbn. + gappa. + } + cbn in C1. + destruct C1 as (C1R & C1F & C1S). + + unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'. + + pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2. + rewrite Rlt_bool_true in C2 ; cycle 1. + { clear C2. + apply Rabs_relax with (b := bpow radix2 53). + { apply bpow_lt. lia. } + cbn. + rewrite C1R. + unfold approx_inv_rel_thresh in C0R. + replace (B2R 53 1024 f) with + ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1. + { field. lra. } + gappa. + } + rewrite C0F in C2. + rewrite C1R in C2. + rewrite C1F in C2. + rewrite C1S in C2. + cbn in C2. + destruct C2 as (C2R & C2F & _). + split. + 2: exact C2F. + rewrite C2R. + set (f' := (B2R 53 1024 f)) in *. + replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with + ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1. + { field. lra. } + unfold approx_inv_rel_thresh in *. + gappa. +Qed. + +Lemma repr_unsigned_mul: + forall a b, + (Int64.repr (Int64.unsigned (Int64.repr a) * b)) = Int64.repr (a * b). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_mult. + - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr. + - apply Int64.eqm_refl. +Qed. + +Lemma repr_unsigned_sub: + forall a b, + (Int64.repr (Int64.unsigned (Int64.repr a) - b)) = Int64.repr (a - b). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_sub. + - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr. + - apply Int64.eqm_refl. +Qed. + +Lemma repr_unsigned_add: + forall a b, + (Int64.repr (Int64.unsigned (Int64.repr a) + b)) = Int64.repr (a + b). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_add. + - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr. + - apply Int64.eqm_refl. +Qed. + +Lemma step2_div_longu_bigb_correct : + forall a b + (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z) + (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z), + step2_div_longu (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z. +Proof. + intros. + pose proof (Int64.unsigned_range b) as b_RANGE. + pose proof (Int64.unsigned_range a) as a_RANGE. + change Int64.modulus with 18446744073709551616%Z in *. + set (a' := (Int64.unsigned a)) in *. + set (b' := (Int64.unsigned b)) in *. + assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'. + { split; apply IZR_le; lia. + } + assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'. + { split; apply IZR_le; lia. + } + unfold smallb_thresh in *; cbn in b_RANGE'. + assert (0 < b')%Z as b_NOT0 by lia. + + destruct (step2_real_div_longu_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F). + fold a' b' in C1E. + + assert ((0 <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW. + { apply Zle_imp_le_bool. + set (q' := B2R 53 1024 q) in *. + assert (-32767 / 65536 <= q')%R as LOWROUND. + { replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + gappa. + } + destruct (Rcase_abs q'). + { replace (ZnearestE q') with 0%Z. lia. + symmetry. + apply Znearest_imp. + apply Rabs_lt. + split; lra. + } + apply Znearest_lub. + lra. + } + assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_unsigned)=true)%Z as q_HIGH. + { apply Zle_imp_le_bool. + change Int64.max_unsigned with (18446744073709551615)%Z. + apply Znearest_glb. + set (q' := B2R 53 1024 q) in *. + replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring. + gappa. + } + + unfold step2_div_longu, step2_div_longu'. + rewrite C1R. + cbn. + unfold Float.to_longu_ne. + rewrite (ZofB_ne_range_correct _ _ q _ _). + rewrite C1F. + rewrite q_LOW. + rewrite q_HIGH. + cbn. + rewrite normalize_ite. + cbn. + rewrite <- (function_ite Vlong). + f_equal. + unfold Int64.lt. + set (q' := B2R 53 1024 q) in *. + fold a'. + rewrite Int64.signed_zero. + set (q'' := (ZnearestE q')) in *. + assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF. + { replace (IZR a' / IZR b' - q')%R with + (-(q' - IZR a' / IZR b'))%R by ring. + rewrite Rabs_Ropp. + lra. + } + pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT. + fold q'' in QUOTIENT. + cbn zeta in QUOTIENT. + + assert (b' <> 0)%Z as NONZ by lia. + pose proof (Zmod_eq_full a' b' NONZ) as MOD. + assert (b' > 0)%Z as b_GT0 by lia. + pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT. + destruct (Z_lt_dec a' (b' * q'')) as [LT | GE]. + { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia. + unfold Int64.sub, Int64.mul. + fold a' b'. + replace q'' with (1 + a'/b')%Z by lia. + rewrite repr_unsigned_mul. + rewrite repr_unsigned_sub. + + replace ((1 + a' / b') * b' - a')%Z with (b' - (a' - a' / b' * b'))%Z by ring. + rewrite <- MOD. + rewrite Int64.signed_repr ; cycle 1. + { change Int64.max_signed with 9223372036854775807%Z in *. + change Int64.min_signed with (-9223372036854775808)%Z in *. + lia. + } + rewrite zlt_true by lia. + replace q'' with (1 + (a' / b'))%Z by lia. + apply Int64.eqm_samerepr. + apply Int64.eqm_trans with (y := ((1 + a' / b') + (-1))%Z). + { apply Int64.eqm_add. + apply Int64.eqm_sym. + apply Int64.eqm_unsigned_repr. + rewrite Int64.unsigned_mone. + replace (-1)%Z with (0 - 1)%Z by ring. + apply Int64.eqm_add. + exists 1%Z. + lia. + apply Int64.eqm_refl. + } + replace (1 + a' / b' + -1)%Z with (a'/b')%Z by ring. + apply Int64.eqm_refl. + } + replace (b' * q'' >? a')%Z with false in QUOTIENT by lia. + rewrite <- QUOTIENT. + unfold Int64.sub, Int64.mul. + fold a' b'. + rewrite repr_unsigned_mul. + rewrite repr_unsigned_sub. + replace (a' / b' * b' - a')%Z with (- (a' mod b'))%Z by lia. + rewrite Int64.signed_repr ; cycle 1. + { change Int64.max_signed with 9223372036854775807%Z in *. + change Int64.min_signed with (-9223372036854775808)%Z in *. + lia. + } + rewrite zlt_false by lia. + reflexivity. +Qed. + +Lemma one_bigb_div : + forall a b + (b_RANGE : (9223372036854775808 <= b < 18446744073709551616)%Z) + (ORDER : (b <= a < 18446744073709551616)%Z), + (a / b = 1)%Z. +Proof. + intros. + assert (((a - b) / b) = 0)%Z as ZERO. + { apply Zdiv_small. lia. + } + replace a with (1 * b + (a - b))%Z by ring. + rewrite Z.div_add_l by lia. + rewrite ZERO. + ring. +Qed. + +Lemma repr_unsigned_sub2: + forall a b, + (Int64.repr (a - Int64.unsigned (Int64.repr b))) = Int64.repr (a - b). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_sub. + - apply Int64.eqm_refl. + - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr. +Qed. + +Lemma repr_unsigned_add2: + forall a b, + (Int64.repr (a + Int64.unsigned (Int64.repr b))) = Int64.repr (a + b). +Proof. + intros. + apply Int64.eqm_samerepr. + apply Int64.eqm_add. + - apply Int64.eqm_refl. + - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr. +Qed. + +Lemma twostep_div_longu_mostb_correct : + forall a b + (b_RANGE : (1 < Int64.unsigned b <= Int64.max_signed)%Z), + (twostep_div_longu (Vlong a) (Vlong b)) = + (Val.maketotal (Val.divlu (Vlong a) (Vlong b))). +Proof. + intros. + destruct (Z_le_gt_dec (Int64.unsigned b) smallb_thresh). + { apply twostep_div_longu_smallb_correct. lia. } + set (a' := Int64.unsigned a). + set (b' := Int64.unsigned b). + assert (0 < b')%Z as b_NOT0 by lia. + cbn. + rewrite Int64.eq_false ; cycle 1. + { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0. + rewrite Int64.unsigned_zero in b_NOT0. + lia. + } + cbn. + + unfold twostep_div_longu. + assert (1 < Int64.unsigned b <= mostb_thresh)%Z as MOST_B. + { unfold mostb_thresh. + change Int64.max_signed with 9223372036854775807%Z in b_RANGE. + lia. + } + destruct (step1_div_longu_correct_mostb a b MOST_B) as + (q & step1_REW & step1_RANGE). + rewrite step1_REW. + cbn. + rewrite step2_div_long_bigb_correct; cycle 1. + 1, 2: lia. + f_equal. + + unfold Int64.sub, Int64.mul. + rewrite repr_unsigned_sub2. + rewrite Int64.signed_repr by lia. + unfold Int64.add, Int64.divu. + fold a' b'. + set (q' := Int64.unsigned q) in *. + rewrite repr_unsigned_add2. + rewrite <- Z.div_add_l by lia. + f_equal. f_equal. + ring. +Qed. + +Definition full2_div_longu a b m := + let is_big := Val.cmpl_bool Clt b (Vlong Int64.zero) in + let is_one := Val.cmplu_bool (Mem.valid_pointer m) Cle b (Vlong Int64.one) in + let is_special := Val.or (Val.of_optbool is_big) (Val.of_optbool is_one) in + let if_big := Val.longofintu (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Cge a b)) in + let if_special := Val.select is_big if_big a Tlong in + Val.select (Val.cmp_bool Cne is_special (Vint Int.zero)) + if_special + (twostep_div_longu a b) Tlong. + +Lemma full2_div_longu_correct : + forall a b m + (b_NOT0 : (0 < Int64.unsigned b)%Z), + full2_div_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z. +Proof. + + Local Opaque twostep_div_longu. + intros. + unfold full2_div_longu. + cbn. + unfold Int64.lt, Int64.ltu. + pose proof (Int64.unsigned_range a). + pose proof (Int64.unsigned_range b). + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + rewrite Int64.signed_zero. + rewrite Int64.unsigned_one. + destruct zlt. + { replace (Val.cmp_bool Cne + (Val.or Vtrue + (if negb (if zlt 1 b' then true else false) then Vtrue else Vfalse)) + (Vint Int.zero)) with (Some true) ; cycle 1. + { destruct zlt; reflexivity. + } + cbn. + destruct zlt; cbn. + { rewrite Zdiv_small by lia. + reflexivity. + } + rewrite one_bigb_div. + reflexivity. + { + change Int64.modulus with 18446744073709551616%Z in *. + split. 2: lia. + unfold b'. + rewrite Int64.unsigned_signed. + unfold Int64.lt. + rewrite Int64.signed_zero. + rewrite zlt_true by lia. + pose proof (Int64.signed_range b). + change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + destruct zlt; cbn. + { change (negb (Int.eq (Int.or Int.zero Int.zero) Int.zero)) with false. + cbn. + rewrite twostep_div_longu_mostb_correct. + { + cbn. + unfold Int64.eq. + fold b'. + rewrite Int64.unsigned_zero. + rewrite zeq_false by lia. + reflexivity. + } + + change Int64.modulus with 18446744073709551616%Z in *. + split. lia. + rewrite Int64.unsigned_signed. + unfold Int64.lt. + rewrite Int64.signed_zero. + rewrite zlt_false by lia. + pose proof (Int64.signed_range b). + change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + change (negb (Int.eq (Int.or Int.zero Int.one) Int.zero)) with true. + cbn. + replace b' with 1%Z by lia. + rewrite Z.div_1_r. + unfold a'. + rewrite Int64.repr_unsigned. + reflexivity. +Qed. + +Definition step2_mod_long a b := + let q := step2_div_long' a b in + let r := Val.subl a (Val.mull q b) in + Val.select (Val.cmpl_bool Clt r (Vlong Int64.zero)) + (Val.addl r b) r Tlong. + +Definition twostep_mod_longu a b := + let q1 := step1_div_longu a b in + step2_mod_long (Val.subl a (Val.mull b q1)) b. + +Lemma vlong_eq: forall a b, (Vlong a) = (Vlong b) -> a = b. +Proof. + intros a b EQ. + congruence. +Qed. + +Lemma move_repr_in_mod : + forall a b c, + Int64.repr (a - b * c)%Z = + Int64.repr (a - b * Int64.unsigned (Int64.repr c))%Z. +Proof. + intros. + apply Int64.eqm_samerepr. + auto 10 with ints. +Qed. + +Lemma twostep_mod_longu_mostb_correct : + forall a b + (b_RANGE : (1 < Int64.unsigned b <= Int64.max_signed)%Z), + (twostep_mod_longu (Vlong a) (Vlong b)) = + (Val.maketotal (Val.modlu (Vlong a) (Vlong b))). +Proof. + intros. + Local Transparent twostep_div_longu. + pose proof (twostep_div_longu_mostb_correct a b b_RANGE) as div_correct. + unfold twostep_div_longu, twostep_mod_longu, step2_div_long, step2_mod_long in *. + set (q1 := (step1_div_longu (Vlong a) (Vlong b))) in *. + set (q2 := (step2_div_long' (Val.subl (Vlong a) (Val.mull (Vlong b) q1)) (Vlong b))) in *. + cbn in div_correct. + cbn. + unfold Int64.eq in *. + change (Int64.unsigned Int64.zero) with 0%Z in *. + rewrite zeq_false by lia. + rewrite zeq_false in div_correct by lia. + cbn in div_correct. + cbn. + destruct q1 as [ | | q1l | | | ] ; cbn in *; try discriminate. + destruct q2 as [ | | q2l | | | ] ; cbn in *; try discriminate. + rewrite <- (function_ite Vlong). + rewrite <- (function_ite Vlong) in div_correct. + cbn. cbn in div_correct. + unfold Int64.lt, Int64.sub, Int64.mul, Int64.add, Int64.divu, Int64.modu in *. + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + set (q1' := Int64.unsigned q1l) in *. + set (q2' := Int64.unsigned q2l) in *. + change (Int64.signed Int64.zero) with 0%Z in *. + replace (Int64.repr + (Int64.unsigned + (Int64.repr (a' - Int64.unsigned (Int64.repr (b' * q1')))) - + Int64.unsigned (Int64.repr (q2' * b')))) + with (Int64.repr (a' - (b' * q1') - (q2' * b')))%Z in * ; cycle 1. + { + apply Int64.eqm_samerepr. + auto 10 with ints. + } + replace (a' - b' * q1' - q2' * b')%Z with (a' - b' * (q1' + q2'))%Z in * by ring. + f_equal. + apply vlong_eq in div_correct. + rewrite Z.mod_eq by lia. + rewrite (move_repr_in_mod a' b' (a' / b'))%Z. + rewrite <- div_correct. + clear div_correct. + rewrite <- (move_repr_in_mod a' b')%Z. + + destruct zlt as [NEG | POS]. + 2: reflexivity. + rewrite repr_unsigned_add. + replace (a' - b' * (q1' + q2') + b')%Z with (a' - b' * (q1' + q2' - 1))%Z by ring. + apply Int64.eqm_samerepr. + assert (Int64.eqm (Int64.unsigned (Int64.repr (q2' + Int64.unsigned Int64.mone))) + (q2' -1))%Z as EQM. + { apply Int64.eqm_trans with (y := (q2' + Int64.unsigned Int64.mone)%Z). + apply Int64.eqm_sym. + apply Int64.eqm_unsigned_repr. + apply Int64.eqm_add. + apply Int64.eqm_refl. + exists (1)%Z. + reflexivity. + } + replace (q1' + q2' - 1)%Z with (q1' + (q2' - 1))%Z by ring. + auto with ints. +Qed. + +Definition full2_mod_longu a b m := + let is_big := Val.cmpl_bool Clt b (Vlong Int64.zero) in + let is_one := Val.cmplu_bool (Mem.valid_pointer m) Cle b (Vlong Int64.one) in + let is_special := Val.or (Val.of_optbool is_big) (Val.of_optbool is_one) in + let if_big := Val.select (Val.cmplu_bool (Mem.valid_pointer m) Cge a b) (Val.subl a b) a Tlong in + let if_special := Val.select is_big if_big (Vlong Int64.zero) Tlong in + Val.select (Val.cmp_bool Cne is_special (Vint Int.zero)) + if_special + (twostep_mod_longu a b) Tlong. + +Lemma full2_mod_longu_correct : + forall a b m + (b_NOT0 : (0 < Int64.unsigned b)%Z), + full2_mod_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr ((Int64.unsigned a) mod (Int64.unsigned b)))%Z. +Proof. + + Local Opaque twostep_mod_longu. + intros. + unfold full2_mod_longu. + cbn. + unfold Int64.lt, Int64.ltu. + pose proof (Int64.unsigned_range a). + pose proof (Int64.unsigned_range b). + set (a' := Int64.unsigned a) in *. + set (b' := Int64.unsigned b) in *. + rewrite Int64.signed_zero. + rewrite Int64.unsigned_one. + destruct zlt. + { replace (Val.cmp_bool Cne + (Val.or Vtrue + (if negb (if zlt 1 b' then true else false) then Vtrue else Vfalse)) + (Vint Int.zero)) with (Some true) ; cycle 1. + { destruct zlt; reflexivity. + } + cbn. + rewrite Z.mod_eq by lia. + + destruct zlt; cbn. + { rewrite Zdiv_small by lia. + replace (a' - b' * 0)%Z with a' by ring. + unfold a'. + rewrite Int64.repr_unsigned. + reflexivity. + } + rewrite one_bigb_div. + { unfold Int64.sub. + fold a' b'. + repeat f_equal. ring. + } + { + change Int64.modulus with 18446744073709551616%Z in *. + split. 2: lia. + unfold b'. + rewrite Int64.unsigned_signed. + unfold Int64.lt. + rewrite Int64.signed_zero. + rewrite zlt_true by lia. + pose proof (Int64.signed_range b). + change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + destruct zlt; cbn. + { change (negb (Int.eq (Int.or Int.zero Int.zero) Int.zero)) with false. + cbn. + rewrite twostep_mod_longu_mostb_correct. + { + cbn. + unfold Int64.eq. + fold b'. + rewrite Int64.unsigned_zero. + rewrite zeq_false by lia. + reflexivity. + } + + change Int64.modulus with 18446744073709551616%Z in *. + split. lia. + rewrite Int64.unsigned_signed. + unfold Int64.lt. + rewrite Int64.signed_zero. + rewrite zlt_false by lia. + pose proof (Int64.signed_range b). + change Int64.min_signed with (-9223372036854775808)%Z in *. + change Int64.max_signed with (9223372036854775807)%Z in *. + change Int64.modulus with 18446744073709551616%Z in *. + lia. + } + change (negb (Int.eq (Int.or Int.zero Int.one) Int.zero)) with true. + cbn. + replace b' with 1%Z by lia. + rewrite Z.mod_1_r. + reflexivity. +Qed. + +Open Scope cminorsel_scope. +Definition e_invfs a := Eop Oinvfs (a ::: Enil). +Definition e_float_of_longu a := Eop Ofloatoflongu (a ::: Enil). +Definition e_float_of_long a := Eop Ofloatoflong (a ::: Enil). +Definition e_float_of_single a := Eop Ofloatofsingle (a ::: Enil). +Definition e_single_of_float a := Eop Osingleoffloat (a ::: Enil). +Definition e_long_of_float_ne a := Eop Olongoffloat_ne (a ::: Enil). +Definition e_longu_of_float_ne a := Eop Olonguoffloat_ne (a ::: Enil). +Definition e_mulf a b := Eop Omulf (a ::: b ::: Enil). +Definition e_float_const c := Eop (Ofloatconst c) Enil. +Definition e_fmaddf a b c := Eop Ofmaddf (a ::: b ::: c ::: Enil). +Definition e_fmsubf a b c := Eop Ofmsubf (a ::: b ::: c ::: Enil). +Definition e_addlimm a b := Eop (Oaddlimm b) (a ::: Enil). +Definition e_msubl a b c := Eop Omsubl (a ::: b ::: c ::: Enil). +Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil). +Definition e_cmplimm c v n := Eop (Ocmp (Ccomplimm c n)) (v ::: Enil). +Definition e_cmpluimm c v n := Eop (Ocmp (Ccompluimm c n)) (v ::: Enil). +Definition e_addl a b := Eop Oaddl (a ::: b ::: Enil). +Definition e_or a b := Eop Oor (a ::: b ::: Enil). +Definition e_cast32unsigned a := Eop Ocast32unsigned (a ::: Enil). +Definition e_cmplu c a b := Eop (Ocmp (Ccomplu c)) (a ::: b ::: Enil). + +Definition a_var1 := Eletvar (4%nat). +Definition a_d_var1 := Eletvar (3%nat). +Definition b_var1 := Eletvar (2%nat). +Definition b_d_var1 := Eletvar (1%nat). +Definition binv_d_var1 := Eletvar (0%nat). + +Definition e_setup1 a b rest := + Elet a (Elet (e_float_of_longu (Eletvar 0%nat)) + (Elet (lift (lift b)) (Elet (e_float_of_longu (Eletvar 0%nat)) + (Elet (e_float_of_single (e_invfs (e_single_of_float (Eletvar 0%nat)))) + rest)))). +Definition e_step1 := e_longu_of_float_ne (e_mulf a_d_var1 binv_d_var1). + +Lemma e_step1_correct : + forall (ge : genv) (sp: val) cmenv memenv (le : letenv) + (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)), + (eval_expr ge sp cmenv memenv le (e_setup1 expr_a expr_b (e_step1)) + (step1_div_longu (Vlong a) (Vlong b))). +Proof. + intros. + unfold e_setup1, step1_div_longu. + repeat econstructor. + { eassumption. } + { cbn. apply eval_lift. apply eval_lift. eassumption. } +Qed. + +Definition e_setup2 a b rest := (e_setup1 a b (Elet e_step1 rest)). + +Definition a_var2 := Eletvar (5%nat). +Definition a_d_var2 := Eletvar (4%nat). +Definition b_var2 := Eletvar (3%nat). +Definition b_d_var2 := Eletvar (2%nat). +Definition binv_d_var2 := Eletvar (1%nat). +Definition step1_var2 := Eletvar (0%nat). + +Definition e_step2 := e_msubl a_var2 b_var2 step1_var2. + +Definition e_setup3 a b rest := (e_setup2 a b (Elet e_step2 rest)). + +Definition a_var3 := Eletvar (6%nat). +Definition a_d_var3 := Eletvar (5%nat). +Definition b_var3 := Eletvar (4%nat). +Definition b_d_var3 := Eletvar (3%nat). +Definition binv_d_var3 := Eletvar (2%nat). +Definition step1_var3 := Eletvar (1%nat). +Definition step2_var3 := Eletvar (0%nat). + +Definition e_step3 := + e_long_of_float_ne + (e_mulf (e_float_of_long step2_var3) + (e_fmaddf + binv_d_var3 + (e_fmsubf (e_float_const ExtFloat.one) + binv_d_var3 + b_d_var3 ) + binv_d_var3)). + +Lemma e_step3_correct : + forall (ge : genv) (sp: val) cmenv memenv (le : letenv) + (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)), + (eval_expr ge sp cmenv memenv le (e_setup3 expr_a expr_b (e_step3)) + (step2_div_long' (Val.subl (Vlong a) (Val.mull (Vlong b) (step1_div_longu (Vlong a) (Vlong b)))) (Vlong b))). +Proof. +intros. +unfold e_setup2, e_setup1, e_step2, step2_div_long', step2_real_div_long, approx_inv_longu. +repeat (econstructor + apply eval_lift + eassumption). +Qed. + +Definition e_setup4 a b rest := (e_setup3 a b (Elet e_step3 rest)). + +Definition a_var4 := Eletvar (7%nat). +Definition a_d_var4 := Eletvar (6%nat). +Definition b_var4 := Eletvar (5%nat). +Definition b_d_var4 := Eletvar (4%nat). +Definition binv_d_var4 := Eletvar (3%nat). +Definition step1_var4 := Eletvar (2%nat). +Definition step2_var4 := Eletvar (1%nat). +Definition step3_var4 := Eletvar (0%nat). + +Definition e_step4 := + e_ite Tlong (Ccompl0 Clt) (e_msubl step2_var4 step3_var4 b_var4) + (e_addlimm step3_var4 Int64.mone) step3_var4. + +Lemma e_step4_correct : + forall (ge : genv) (sp: val) cmenv memenv (le : letenv) + (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)), + (eval_expr ge sp cmenv memenv le (e_setup4 expr_a expr_b (e_step4)) + (step2_div_long (Val.subl (Vlong a) (Val.mull (Vlong b) (step1_div_longu (Vlong a) (Vlong b)))) (Vlong b))). +Proof. +intros. +unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu. +repeat (econstructor + apply eval_lift + eassumption). +Qed. + +Definition e_setup5 a b rest := (e_setup4 a b (Elet e_step4 rest)). + +Definition a_var5 := Eletvar (8%nat). +Definition a_d_var5 := Eletvar (7%nat). +Definition b_var5 := Eletvar (6%nat). +Definition b_d_var5 := Eletvar (5%nat). +Definition binv_d_var5 := Eletvar (4%nat). +Definition step1_var5 := Eletvar (3%nat). +Definition step2_var5 := Eletvar (2%nat). +Definition step3_var5 := Eletvar (1%nat). +Definition step4_var5 := Eletvar (0%nat). + +Definition e_step5 := e_addl step1_var5 step4_var5. + +Lemma e_step5_correct : + forall (ge : genv) (sp: val) cmenv memenv (le : letenv) + (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)), + (eval_expr ge sp cmenv memenv le (e_setup5 expr_a expr_b (e_step5)) + (twostep_div_longu (Vlong a) (Vlong b))). +Proof. + intros. + Local Transparent twostep_div_longu. + repeat unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu, twostep_div_longu. +repeat (econstructor + apply eval_lift + eassumption). +Qed. + +Definition e_setup6 a b rest := (e_setup5 a b (Elet e_step5 rest)). + +Definition a_var6 := Eletvar (9%nat). +Definition a_d_var6 := Eletvar (8%nat). +Definition b_var6 := Eletvar (7%nat). +Definition b_d_var6 := Eletvar (6%nat). +Definition binv_d_var6 := Eletvar (5%nat). +Definition step1_var6 := Eletvar (4%nat). +Definition step2_var6 := Eletvar (3%nat). +Definition step3_var6 := Eletvar (2%nat). +Definition step4_var6 := Eletvar (1%nat). +Definition twostep_var6 := Eletvar (0%nat). + +Definition e_step6 := e_cmplimm Clt b_var6 Int64.zero. + +Definition e_setup7 a b rest := e_setup6 a b (Elet e_step6 rest). + +Definition a_var7 := Eletvar (10%nat). +Definition a_d_var7 := Eletvar (9%nat). +Definition b_var7 := Eletvar (8%nat). +Definition b_d_var7 := Eletvar (7%nat). +Definition binv_d_var7 := Eletvar (6%nat). +Definition step1_var7 := Eletvar (5%nat). +Definition step2_var7 := Eletvar (5%nat). +Definition step3_var7 := Eletvar (3%nat). +Definition step4_var7 := Eletvar (2%nat). +Definition twostep_var7 := Eletvar (1%nat). +Definition is_big_var7 := Eletvar (0%nat). + +Definition e_is_one := e_cmpluimm Cle b_var7 Int64.one. +Definition e_is_special := e_or is_big_var7 e_is_one. +Definition e_if_big := e_cast32unsigned (e_cmplu Cge a_var7 b_var7). +Definition e_if_special := e_ite Tlong (Ccompu0 Cne) is_big_var7 e_if_big a_var7. +Definition e_step7 := e_ite Tlong (Ccompu0 Cne) e_is_special e_if_special twostep_var7. + +Lemma e_step7_correct : + forall (ge : genv) (sp: val) cmenv memenv (le : letenv) + (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)), + (eval_expr ge sp cmenv memenv le (e_setup7 expr_a expr_b (e_step7)) + (full2_div_longu (Vlong a) (Vlong b) memenv)). +Proof. + intros. + Local Transparent full2_div_longu. + repeat unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu, twostep_div_longu, full2_div_longu. + repeat (econstructor + apply eval_lift + eassumption). + cbn. + repeat f_equal. + destruct (Int64.lt b Int64.zero); cbn; change (Int.eq Int.one Int.zero) with false; change (Int.eq Int.zero Int.zero) with true; cbn; reflexivity. +Qed. + +Definition fp_divu64 a b := e_setup7 a b e_step7. + +Theorem fp_divu64_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int64) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)) + (b_nz : (Int64.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_divu64 expr_a expr_b) + (Vlong (Int64.divu a b)). +Proof. + intros. + unfold Int64.divu. + rewrite <- full2_div_longu_correct with (m := memenv) by lia. + apply e_step7_correct; assumption. +Qed. + +Definition fp_modu64 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat) + (fp_divu64 (Eletvar 1%nat) (Eletvar 0%nat)))). + +Theorem fp_modu64_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int64) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)) + (b_nz : (Int64.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_modu64 expr_a expr_b) + (Vlong (Int64.modu a b)). +Proof. + intros. + rewrite Int64.modu_divu; cycle 1. + { intro Z. + subst. + rewrite Int64.unsigned_zero in b_nz. + lia. + } + unfold fp_modu64. + Local Opaque fp_divu64. + repeat (econstructor + apply eval_lift + eassumption). + { apply fp_divu64_correct; + repeat (econstructor + apply eval_lift + eassumption). + } + cbn. + rewrite Int64.mul_commut. + reflexivity. +Qed. + +Definition e_is_negl a := Eop (Ocmp (Ccomplimm Clt Int64.zero)) (a ::: Enil). +Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil). +Definition e_negl a := Eop Onegl (a ::: Enil). +Definition e_absl a := Eop (Oabsdifflimm Int64.zero) (a ::: Enil). + +Definition fp_divs64 a b := + Elet a (Elet (lift b) + (Elet (fp_divu64 (e_absl (Eletvar (1%nat))) (e_absl (Eletvar (0%nat)))) + (e_ite Tlong (Ccompu0 Cne) (e_xorw (e_is_negl (Eletvar 2%nat)) + (e_is_negl (Eletvar 1%nat))) + (e_negl (Eletvar 0%nat)) (Eletvar 0%nat)))). + +Lemma nonneg_signed_unsigned: + forall x (x_NONNEG : (Int64.signed x >= 0)%Z), + (Int64.signed x) = (Int64.unsigned x). +Proof. + intros. + pose proof (Int64.unsigned_range x). + unfold Int64.signed in *. + destruct zlt. reflexivity. + change Int64.modulus with 18446744073709551616%Z in *. + change Int64.half_modulus with 9223372036854775808%Z in *. + lia. +Qed. + +Lemma long_min_signed_unsigned : + (- Int64.min_signed < Int64.max_unsigned)%Z. +Proof. + reflexivity. +Qed. + +Lemma long_divs_divu : + forall a b + (b_NOT0 : (Int64.signed b <> 0)%Z), + Int64.divs a b = if xorb (Int64.lt a Int64.zero) + (Int64.lt b Int64.zero) + then Int64.neg (Int64.divu (ExtValues.long_abs a) + (ExtValues.long_abs b)) + else Int64.divu (ExtValues.long_abs a) (ExtValues.long_abs b). +Proof. + intros. + unfold Int64.divs, Int64.divu, Int64.lt, ExtValues.long_abs. + pose proof (Int64.signed_range a) as a_RANGE. + pose proof (Int64.signed_range b) as b_RANGE. + change (Int64.signed Int64.zero) with 0%Z. + destruct zlt. + - cbn. rewrite (Z.abs_neq (Int64.signed a)) by lia. + rewrite (Int64.unsigned_repr (- Int64.signed a)); cycle 1. + { pose proof long_min_signed_unsigned. lia. } + + destruct zlt. + + rewrite (Z.abs_neq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof long_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int64.signed b)) at 1. + rewrite Z.quot_opp_r by lia. + rewrite <- (Z.opp_involutive (Int64.signed a)) at 1. + rewrite Z.quot_opp_l by lia. + rewrite Z.quot_div_nonneg by lia. + rewrite Z.opp_involutive. + reflexivity. + + + rewrite (Z.abs_eq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int64.signed a)) at 1. + rewrite Z.quot_opp_l by lia. + rewrite Z.quot_div_nonneg by lia. + rewrite Int64.neg_repr. + reflexivity. + + - cbn. rewrite (Z.abs_eq (Int64.signed a)) by lia. + rewrite (Int64.unsigned_repr (Int64.signed a)); cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + destruct zlt. + + rewrite (Z.abs_neq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof long_min_signed_unsigned. lia. } + rewrite Int64.neg_repr. + rewrite <- (Z.opp_involutive (Int64.signed b)) at 1. + rewrite Z.quot_opp_r by lia. + rewrite Z.quot_div_nonneg by lia. + reflexivity. + + + rewrite (Z.abs_eq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + rewrite Z.quot_div_nonneg by lia. + reflexivity. +Qed. + +Lemma nonzero_unsigned_signed : + forall b, (Int64.unsigned b > 0 -> Int64.signed b <> 0)%Z. +Proof. + intros b GT EQ. + rewrite Int64.unsigned_signed in GT. + unfold Int64.lt in GT. + rewrite Int64.signed_zero in GT. + destruct zlt in GT; lia. +Qed. + +Theorem fp_divs64_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int64) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)) + (b_nz : (Int64.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_divs64 expr_a expr_b) + (Vlong (Int64.divs a b)). +Proof. + intros. + unfold fp_divs64. + Local Opaque fp_divu64. + repeat (econstructor + apply eval_lift + eassumption). + apply fp_divu64_correct. + all: repeat (econstructor + apply eval_lift + eassumption). + { unfold ExtValues.long_absdiff, ExtValues.Z_abs_diff. + rewrite Int64.signed_zero. rewrite Z.sub_0_r. + rewrite Int64.unsigned_repr. + { pose proof (nonzero_unsigned_signed b b_nz). + lia. + } + pose proof Int64.max_signed_unsigned. + pose proof long_min_signed_unsigned. + pose proof (Int64.signed_range b). + lia. + } + cbn. + rewrite long_divs_divu ; cycle 1. + { apply nonzero_unsigned_signed. assumption. } + unfold Int64.lt, ExtValues.long_abs, ExtValues.long_absdiff, ExtValues.Z_abs_diff. + change (Int64.signed Int64.zero) with 0%Z. + repeat rewrite Z.sub_0_r. + destruct zlt; destruct zlt; reflexivity. +Qed. + +Lemma long_mods_modu : + forall a b + (b_NOT0 : (Int64.signed b <> 0)%Z), + Int64.mods a b = if Int64.lt a Int64.zero + then Int64.neg (Int64.modu (ExtValues.long_abs a) + (ExtValues.long_abs b)) + else Int64.modu (ExtValues.long_abs a) (ExtValues.long_abs b). +Proof. + intros. + unfold Int64.mods, Int64.modu, Int64.lt, ExtValues.long_abs. + pose proof (Int64.signed_range a) as a_RANGE. + pose proof (Int64.signed_range b) as b_RANGE. + change (Int64.signed Int64.zero) with 0%Z. + destruct zlt. + - cbn. rewrite (Z.abs_neq (Int64.signed a)) by lia. + rewrite (Int64.unsigned_repr (- Int64.signed a)); cycle 1. + { pose proof long_min_signed_unsigned. lia. } + + destruct (zlt (Int64.signed b) 0%Z). + + rewrite (Z.abs_neq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof long_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int64.signed b)) at 1. + rewrite Z.rem_opp_r by lia. + rewrite <- (Z.opp_involutive (Int64.signed a)) at 1. + rewrite Z.rem_opp_l by lia. + rewrite Z.rem_mod_nonneg by lia. + rewrite Int64.neg_repr. + reflexivity. + + + rewrite (Z.abs_eq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int64.signed a)) at 1. + rewrite Z.rem_opp_l by lia. + rewrite Z.rem_mod_nonneg by lia. + rewrite Int64.neg_repr. + reflexivity. + + - cbn. rewrite (Z.abs_eq (Int64.signed a)) by lia. + rewrite (Int64.unsigned_repr (Int64.signed a)); cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + destruct (zlt (Int64.signed b) 0%Z). + + rewrite (Z.abs_neq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof long_min_signed_unsigned. lia. } + rewrite <- (Z.opp_involutive (Int64.signed b)) at 1. + rewrite Z.rem_opp_r by lia. + rewrite Z.rem_mod_nonneg by lia. + reflexivity. + + + rewrite (Z.abs_eq (Int64.signed b)) by lia. + rewrite Int64.unsigned_repr ; cycle 1. + { pose proof Int64.max_signed_unsigned. lia. } + rewrite Z.rem_mod_nonneg by lia. + reflexivity. +Qed. + +Definition fp_mods64z a b := + Elet a (Elet (lift b) + (Elet (fp_modu64 (e_absl (Eletvar (1%nat))) (e_absl (Eletvar (0%nat)))) + (e_ite Tlong (Ccompl0 Clt) (Eletvar 2%nat) + (e_negl (Eletvar 0%nat)) (Eletvar 0%nat)))). + +Theorem fp_mods64z_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int64) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)) + (b_nz : (Int64.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_mods64z expr_a expr_b) + (Vlong (Int64.mods a b)). +Proof. + intros. + unfold fp_mods64z. + Local Opaque fp_modu64. + repeat (econstructor + apply eval_lift + eassumption). + apply fp_modu64_correct. + all: repeat (econstructor + apply eval_lift + eassumption). + { unfold ExtValues.long_absdiff, ExtValues.Z_abs_diff. + rewrite Int64.signed_zero. rewrite Z.sub_0_r. + rewrite Int64.unsigned_repr. + { pose proof (nonzero_unsigned_signed b b_nz). + lia. + } + pose proof Int64.max_signed_unsigned. + pose proof long_min_signed_unsigned. + pose proof (Int64.signed_range b). + lia. + } + cbn. + rewrite long_mods_modu ; cycle 1. + { apply nonzero_unsigned_signed. assumption. } + unfold Int64.lt, ExtValues.long_abs, ExtValues.long_absdiff, ExtValues.Z_abs_diff. + change (Int64.signed Int64.zero) with 0%Z. + repeat rewrite Z.sub_0_r. + destruct zlt; reflexivity. +Qed. + +Definition fp_mods64 a b := + Elet a (Elet (lift b) + (Elet (fp_divs64 (Eletvar (1%nat)) (Eletvar (0%nat))) + (e_msubl (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))). + +Theorem fp_mods64_correct : + forall (ge : genv) (sp: val) cmenv memenv + (le : letenv) (expr_a expr_b : expr) (a b : int64) + (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a)) + (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)) + (b_nz : (Int64.unsigned b > 0)%Z), + eval_expr ge sp cmenv memenv le (fp_mods64 expr_a expr_b) + (Vlong (Int64.mods a b)). +Proof. + intros. + rewrite Int64.mods_divs. + unfold fp_mods64. + Local Opaque fp_divs64. + repeat (econstructor + apply eval_lift + eassumption). + { apply fp_divs64_correct; + repeat (econstructor + apply eval_lift + eassumption). + } + cbn. + rewrite Int64.mul_commut. + reflexivity. +Qed. diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v index cc86fc88..5a3b7190 100644 --- a/kvx/NeedOp.v +++ b/kvx/NeedOp.v @@ -131,6 +131,8 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval := | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv) | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv) | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv) + | Ointofsingle_ne | Ointuofsingle_ne => op1 (default nv) + | Olongoffloat_ne | Olonguoffloat_ne => op1 (default nv) | Ocmp c => needs_of_condition c | Oextfz _ _ | Oextfs _ _ | Oextfzl _ _ | Oextfsl _ _ => op1 (default nv) | Oinsf _ _ | Oinsfl _ _ => op2 (default nv) @@ -209,6 +209,11 @@ Inductive operation : Type := | Olonguofsingle (**r [rd = unsigned_long_of_float32(r1)] *) | Osingleoflong (**r [rd = float32_of_signed_long(r1)] *) | Osingleoflongu (**r [rd = float32_of_unsigned_int(r1)] *) + | Ointofsingle_ne (**r [rd = signed_int_of_float64(r1)] *) + | Ointuofsingle_ne (**r [rd = unsigned_int_of_float64(r1)] *) + | Olongoffloat_ne (**r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat_ne (**r [rd = unsigned_long_of_float64(r1)] *) + (*c Boolean tests: *) | Ocmp (cond: condition) (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) | Oextfz (stop : Z) (start : Z) @@ -470,6 +475,11 @@ Definition eval_operation | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) + | Ointofsingle_ne, v1::nil => Some (Val.maketotal (Val.intofsingle_ne v1)) + | Ointuofsingle_ne, v1::nil => Some (Val.maketotal (Val.intuofsingle_ne v1)) + | Olongoffloat_ne, v1::nil => Some (Val.maketotal (Val.longoffloat_ne v1)) + | Olonguoffloat_ne, v1::nil => Some (Val.maketotal (Val.longuoffloat_ne v1)) + | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) | (Oextfz stop start), v0::nil => Some (extfz stop start v0) | (Oextfs stop start), v0::nil => Some (extfs stop start v0) @@ -691,6 +701,12 @@ Definition type_of_operation (op: operation) : list typ * typ := | Olonguofsingle => (Tsingle :: nil, Tlong) | Osingleoflong => (Tlong :: nil, Tsingle) | Osingleoflongu => (Tlong :: nil, Tsingle) + + | Ointofsingle_ne => (Tsingle :: nil, Tint) + | Ointuofsingle_ne => (Tsingle :: nil, Tint) + | Olongoffloat_ne => (Tfloat :: nil, Tlong) + | Olonguoffloat_ne => (Tfloat :: nil, Tlong) + | Ocmp c => (type_of_condition c, Tint) | Oextfz _ _ | Oextfs _ _ => (Tint :: nil, Tint) | Oextfzl _ _ | Oextfsl _ _ => (Tlong :: nil, Tlong) @@ -992,6 +1008,12 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). (* singleoflong, singleoflongu *) - destruct v0; cbn... - destruct v0; cbn... + (* intofsingle_ne, intuofsingle_ne *) + - destruct v0; cbn... destruct (Float32.to_int_ne f); cbn; trivial. + - destruct v0; cbn... destruct (Float32.to_intu_ne f); cbn; trivial. + (* longoffloat_ne, longuoffloat_ne *) + - destruct v0; cbn... destruct (Float.to_long_ne f); cbn; trivial. + - destruct v0; cbn... destruct (Float.to_longu_ne f); cbn; trivial. (* cmp *) - destruct (eval_condition cond vl m)... destruct b... (* extfz *) @@ -1686,6 +1708,14 @@ Proof. (* singleoflong, singleoflongu *) - inv H4; cbn; auto. - inv H4; cbn; auto. + + (* intofsingle_ne, intuofsingle_ne *) + - inv H4; cbn; auto. destruct (Float32.to_int_ne f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float32.to_intu_ne f0); cbn; auto. + (* longoffloat_ne, longuoffloat_ne *) + - inv H4; cbn; auto. destruct (Float.to_long_ne f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float.to_longu_ne f0); cbn; auto. + (* cmp *) - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 3eb0b95f..c729c72d 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -88,10 +88,10 @@ let arith_rr_real = function | Pfloatuwrnsz -> Floatuwz | Pfloatudrnsz -> Floatudz | Pfloatdrnsz -> Floatdz - | Pfixedwrzz -> Fixedw - | Pfixeduwrzz -> Fixeduw - | Pfixeddrzz -> Fixedd - | Pfixedudrzz -> Fixedud + | Pfixedwrzz | Pfixedwrne -> Fixedw + | Pfixeduwrzz | Pfixeduwrne -> Fixeduw + | Pfixeddrzz | Pfixeddrne -> Fixedd + | Pfixedudrzz | Pfixedudrne -> Fixedud | Pfixeddrzz_i32 -> Fixedd | Pfixedudrzz_i32 -> Fixedud diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp index f243089d..5225a71c 100644 --- a/kvx/SelectOp.vp +++ b/kvx/SelectOp.vp @@ -760,6 +760,8 @@ Definition gen_absl args := | _ => None end. +Require FPDivision32 FPDivision64. + Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr := match b with | BI_fmin => Some (Eop Ominf args) @@ -768,6 +770,40 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr | BI_fmaxf => Some (Eop Omaxfs args) | BI_fma => gen_fma args | BI_fmaf => gen_fmaf args + | BI_lround_ne => Some (Eop Olongoffloat_ne args) + | BI_luround_ne => Some (Eop Olonguoffloat_ne args) + | BI_fp_udiv32 => (match args with + | a:::b:::Enil => Some (FPDivision32.fp_divu32 a b) + | _ => None + end) + | BI_fp_udiv64 => (match args with + | a:::b:::Enil => Some (FPDivision64.fp_divu64 a b) + | _ => None + end) + | BI_fp_umod32 => (match args with + | a:::b:::Enil => Some (FPDivision32.fp_modu32 a b) + | _ => None + end) + | BI_fp_umod64 => (match args with + | a:::b:::Enil => Some (FPDivision64.fp_modu64 a b) + | _ => None + end) + | BI_fp_sdiv32 => (match args with + | a:::b:::Enil => Some (FPDivision32.fp_divs32 a b) + | _ => None + end) + | BI_fp_sdiv64 => (match args with + | a:::b:::Enil => Some (FPDivision64.fp_divs64 a b) + | _ => None + end) + | BI_fp_smod32 => (match args with + | a:::b:::Enil => Some (FPDivision32.fp_mods32 a b) + | _ => None + end) + | BI_fp_smod64 => (match args with + | a:::b:::Enil => Some (FPDivision64.fp_mods64 a b) + | _ => None + end) | BI_abs => gen_abs args | BI_absl => gen_absl args end. diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v index 4ddf6ece..19393091 100644 --- a/kvx/SelectOpproof.v +++ b/kvx/SelectOpproof.v @@ -1935,6 +1935,8 @@ Proof. constructor. Qed. +Require FPDivision32. + Theorem eval_platform_builtin: forall bf al a vl v le, platform_builtin bf al = Some a -> @@ -1948,6 +1950,190 @@ Proof. repeat (try econstructor; try eassumption)). - apply eval_fma; assumption. - apply eval_fmaf; assumption. + - cbn in *; + destruct vl; trivial. destruct vl; trivial. + destruct v0; try discriminate; + cbn; rewrite H0; reflexivity. + - cbn in *; + destruct vl; trivial. destruct vl; trivial. + destruct v0; try discriminate; + cbn; rewrite H0; reflexivity. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int.eq in EVAL. + change (Int.unsigned Int.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vint (Int.divu i i0)). split. + { + apply FPDivision32.fp_divu32_correct; auto. + pose proof (Int.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int64.eq in EVAL. + change (Int64.unsigned Int64.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vlong (Int64.divu i i0)). split. + { + apply FPDivision64.fp_divu64_correct; auto. + pose proof (Int64.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int.eq in EVAL. + change (Int.unsigned Int.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vint (Int.modu i i0)). split. + { + apply FPDivision32.fp_modu32_correct; auto. + pose proof (Int.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int64.eq in EVAL. + change (Int64.unsigned Int64.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vlong (Int64.modu i i0)). split. + { + apply FPDivision64.fp_modu64_correct; auto. + pose proof (Int64.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int.eq in EVAL. + change (Int.unsigned Int.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vint (Int.divs i i0)). split. + { + apply FPDivision32.fp_divs32_correct; auto. + pose proof (Int.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int64.eq in EVAL. + change (Int64.unsigned Int64.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vlong (Int64.divs i i0)). split. + { + apply FPDivision64.fp_divs64_correct; auto. + pose proof (Int64.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int.eq in EVAL. + change (Int.unsigned Int.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vint (Int.mods i i0)). split. + { + apply FPDivision32.fp_mods32_correct; auto. + pose proof (Int.unsigned_range i0). + lia. + } + inv EVAL. + constructor. + - cbn in *. + intro EVAL. + inv EVAL. discriminate. + inv H0. discriminate. + inv H2. 2: discriminate. + inv Heval. + intro EVAL. + destruct v1; try discriminate. + destruct v0; try discriminate. + unfold Int64.eq in EVAL. + change (Int64.unsigned Int64.zero) with 0 in EVAL. + unfold zeq in EVAL. + destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ]. + { discriminate. } + exists (Vlong (Int64.mods i i0)). split. + { + apply FPDivision64.fp_mods64_correct; auto. + pose proof (Int64.unsigned_range i0). + lia. + } + inv EVAL. + constructor. - apply eval_abs; assumption. - apply eval_absl; assumption. Qed. diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml index 8a311dbb..88143bfa 100644 --- a/kvx/TargetPrinter.ml +++ b/kvx/TargetPrinter.ml @@ -586,6 +586,14 @@ module Target (*: TARGET*) = fprintf oc " fixedd.rz %a = %a, 0\n" ireg rd ireg rs | Pfixedudrzz(rd, rs) | Pfixedudrzz_i32(rd, rs) -> fprintf oc " fixedud.rz %a = %a, 0\n" ireg rd ireg rs + | Pfixedudrne(rd, rs) -> + fprintf oc " fixedud.rn %a = %a, 0\n" ireg rd ireg rs + | Pfixeddrne(rd, rs) -> + fprintf oc " fixedd.rn %a = %a, 0\n" ireg rd ireg rs + | Pfixeduwrne(rd, rs) -> + fprintf oc " fixeduw.rn %a = %a, 0\n" ireg rd ireg rs + | Pfixedwrne(rd, rs) -> + fprintf oc " fixedw.rn %a = %a, 0\n" ireg rd ireg rs (* Arith RI32 instructions *) | Pmake (rd, imm) -> diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v index ddfe94f3..52658b43 100644 --- a/kvx/ValueAOp.v +++ b/kvx/ValueAOp.v @@ -304,6 +304,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Olonguofsingle, v1::nil => longuofsingle_total v1 | Osingleoflong, v1::nil => singleoflong v1 | Osingleoflongu, v1::nil => singleoflongu v1 + | Ointofsingle_ne, v1::nil => intofsingle_ne_total v1 + | Ointuofsingle_ne, v1::nil => intuofsingle_ne_total v1 + | Olongoffloat_ne, v1::nil => longoffloat_ne_total v1 + | Olonguoffloat_ne, v1::nil => longuoffloat_ne_total v1 | Ocmp c, _ => of_optbool (eval_static_condition c vl) | (Oextfz stop start), v0::nil => eval_static_extfz stop start v0 | (Oextfs stop start), v0::nil => eval_static_extfs stop start v0 |