From 57780f0ec50350d7158c242ed77047ec6cbf5791 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 13 Jan 2022 20:52:22 +0100 Subject: relative bound of error --- kvx/FPDivision64.v | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 229 insertions(+), 4 deletions(-) (limited to 'kvx') diff --git a/kvx/FPDivision64.v b/kvx/FPDivision64.v index fc2050bb..f9557335 100644 --- a/kvx/FPDivision64.v +++ b/kvx/FPDivision64.v @@ -49,7 +49,7 @@ Qed. Definition approx_inv_thresh := (25/2251799813685248)%R. (* 1.11022302462516e-14 *) -Theorem approx_inv_longu_correct : +Theorem approx_inv_longu_correct_abs : forall b, (0 < Int64.unsigned b)%Z -> exists (f : float), @@ -283,6 +283,234 @@ Proof. gappa. Qed. +Local Notation "'rd'" := (round radix2 (FLT_exp (-1074) 53) ZnearestE). +Local Notation "'rs'" := (round radix2 (FLT_exp (-149) 24) ZnearestE). + +Definition approx_inv_rel_thresh := (1049/72057594037927936)%R. +Theorem approx_inv_longu_correct_rel : + forall b, + (0 < Int64.unsigned b)%Z -> + exists (f : float), + (approx_inv_longu (Vlong b)) = Vfloat f /\ + is_finite _ _ f = true /\ (Rabs(IZR (Int64.unsigned b) * (B2R _ _ f) - 1) <= approx_inv_rel_thresh)%R. +Proof. + intros b NONZ. + unfold approx_inv_longu. + cbn. + econstructor. + split. + reflexivity. + Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int. + unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int. + set (re := (@eq_refl Datatypes.comparison Lt)). + change (Int.signed (Int.repr 1)) with 1%Z. + set (b' := Int64.unsigned b) in *. + pose proof (Int64.unsigned_range b) as RANGE. + change Int64.modulus with 18446744073709551616%Z in RANGE. + assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'. + { split; apply IZR_le; lia. + } + + assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia. + destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _). + clear SILLY. + set (one_s := (BofZ 24 128 re re 1)) in *. + + pose proof (BofZ_correct 24 128 re re b') as C1. + cbn in C1. + rewrite Rlt_bool_true in C1; cycle 1. + { clear C1. + eapply (Rabs_relax (IZR 18446744073709551616)). + lra. + set (b'' := IZR b') in *. + gappa. + } + destruct C1 as (C1R & C1F & _). + set (b_s := (BofZ 24 128 re re b')) in *. + + assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE. + { rewrite C1R. + gappa. + } + assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra. + + pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2. + rewrite Rlt_bool_true in C2; cycle 1. + { clear C2. + apply Rabs_relax with (b := 1%R). + { cbn; lra. } + rewrite C0R. + set (r_b_s := B2R 24 128 b_s) in *. + cbn. + gappa. + } + + destruct C2 as (C2R & C2F & _). + set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *. + rewrite C0F in C2F. + + assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE. + { rewrite C2R. + set (r_b_s := B2R 24 128 b_s) in *. + rewrite C0R. + cbn. + gappa. + } + + pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3. + rewrite Rlt_bool_true in C3; cycle 1. + { clear C3. + set (r_invb_s := (B2R 24 128 invb_s)) in *. + apply Rabs_relax with (b := 1%R). + { replace 1%R with (bpow radix2 0)%R by reflexivity. + apply bpow_lt. + lia. + } + cbn. + gappa. + } + + destruct C3 as (C3R & C3F & _). + set (invb_d := (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *. + assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE. + { + rewrite C3R. + set (r_invb_s := B2R 24 128 invb_s) in *. + cbn. + gappa. + } + + pose proof (is_finite_Bopp 53 1024 Float.neg_nan invb_d) as opp_finite. + rewrite C3F in opp_finite. + + pose proof (BofZ_correct 53 1024 re re 1) as C4. + rewrite Rlt_bool_true in C4; cycle 1. + { clear C4. + cbn. + eapply (Rabs_relax (IZR 18446744073709551616)). + lra. + set (b'' := IZR b') in *. + gappa. + } + destruct C4 as (C4R & C4F & _). + + pose proof (BofZ_correct 53 1024 re re b') as C5. + cbn in C5. + rewrite Rlt_bool_true in C5; cycle 1. + { clear C5. + eapply (Rabs_relax (IZR 18446744073709551616)). + lra. + set (b'' := IZR b') in *. + gappa. + } + destruct C5 as (C5R & C5F & _). + set (b_d := (BofZ 53 1024 re re b')) in *. + + assert(1 <= B2R 53 1024 b_d <= 18446744073709551616)%R as b_d_RANGE. + { rewrite C5R. + gappa. + } + + pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE + (Bopp 53 1024 Float.neg_nan invb_d) (BofZ 53 1024 re re b') + (BofZ 53 1024 re re 1) opp_finite C5F C4F) as C6. + rewrite Rlt_bool_true in C6; cycle 1. + { clear C6. + rewrite C4R. + rewrite B2R_Bopp. + cbn. + eapply (Rabs_relax (IZR 18446744073709551616)). + { lra. } + fold invb_d. + fold b_d. + set (r_invb_d := B2R 53 1024 invb_d) in *. + set (r_b_d := B2R 53 1024 b_d) in *. + gappa. + } + fold b_d in C6. + destruct C6 as (C6R & C6F & _). + + pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE + (Bfma 53 1024 re re Float.fma_nan mode_NE + (Bopp 53 1024 Float.neg_nan invb_d) b_d (BofZ 53 1024 re re 1)) + invb_d invb_d C6F C3F C3F) as C7. + rewrite Rlt_bool_true in C7; cycle 1. + { clear C7. + rewrite C6R. + rewrite B2R_Bopp. + eapply (Rabs_relax (bpow radix2 64)). + { apply bpow_lt. lia. } + rewrite C4R. + cbn. + set (r_invb_d := B2R 53 1024 invb_d) in *. + set (r_b_d := B2R 53 1024 b_d) in *. + gappa. + } + destruct C7 as (C7R & C7F & _). + + split. assumption. + rewrite C7R. + rewrite C6R. + rewrite C5R. + rewrite C4R. + rewrite B2R_Bopp. + rewrite C3R. + rewrite C2R. + rewrite C1R. + rewrite C0R. + cbn. + set(b1 := IZR b') in *. + + replace (rd 1) with 1%R by gappa. + replace (rd (rs (1 / rs b1))) with + ((((rd (rs (1 / rs b1)) - (/b1))/(/b1))+1)*(/ b1))%R ; cycle 1. + { field. lra. } + set (er0 := ((rd (rs (1 / rs b1)) - (/b1))/(/b1))%R). + replace (rd b1) with ((((rd b1) - b1)/b1 + 1) * b1)%R; cycle 1. + { field. lra. } + set (er1 := (((rd b1) - b1)/b1)%R). + replace (- ((er0 + 1) * / b1) * ((er1 + 1) * b1) + 1)%R + with (1 - (er0 + 1)*(er1 + 1))%R ; cycle 1. + { field. lra. } + set (z0 := (1 - (er0 + 1) * (er1 + 1))%R). + assert (Rabs er0 <= 257/2147483648)%R as er0_ABS. + { unfold er0. + gappa. + } + assert (Rabs er1 <= 1/9007199254740992)%R as er1_ABS. + { unfold er1. + gappa. + } + replace (rd z0) with ((rd(z0)-z0)+z0)%R by ring. + set (ea0 := (rd(z0)-z0)%R). + assert (Rabs ea0 <= 1/75557863725914323419136)%R as ea0_ABS. + { unfold ea0. unfold z0. + gappa. + } + set (z1 := ((ea0 + z0) * ((er0 + 1) * / b1) + (er0 + 1) * / b1)%R). + replace (rd z1) with ((((rd z1)-z1)/z1+1)*z1)%R; cycle 1. + { field. + unfold z1. + unfold z0. + gappa. + } + set (er2 := ((rd z1 - z1) / z1)%R). + assert (Rabs er2 <= 1/9007199254740992)%R as er2_ABS. + { unfold er2. + unfold z1, z0. + gappa. + } + unfold z1, z0. + replace (b1 * + ((er2 + 1) * + ((ea0 + (1 - (er0 + 1) * (er1 + 1))) * ((er0 + 1) * / b1) + + (er0 + 1) * / b1)) - 1)%R + with (-er0*er0*er1*er2 - er0*er0*er1 + ea0*er0*er2 - er0*er0*er2 - 2*er0*er1*er2 + ea0*er0 - er0*er0 - 2*er0*er1 + ea0*er2 - er1*er2 + ea0 - er1 + er2)%R; cycle 1. + { field. lra. } + unfold approx_inv_rel_thresh. + gappa. +Qed. + Definition step1_real_inv_longu b := let invb_s := ExtValues.invfs (Val.maketotal (Val.singleoflongu b)) in Val.floatofsingle invb_s. @@ -290,9 +518,6 @@ Definition step1_real_inv_longu b := Definition step1_real_inv_thresh := (3/33554432)%R. (* 8.94069671630859e-8 *) -Local Notation "'rd'" := (round radix2 (FLT_exp (-1074) 53) ZnearestE). -Local Notation "'rs'" := (round radix2 (FLT_exp (-149) 24) ZnearestE). - Theorem step1_real_inv_longu_correct : forall b, (0 < Int64.unsigned b)%Z -> -- cgit