Merge remote-tracking branch 'origin/kvx_fp_division' into kvx-work

author: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr> 2022-03-07 16:21:13 +0100
committer: David Monniaux <David.Monniaux@univ-grenoble-alpes.fr> 2022-03-07 16:21:13 +0100
commit: 25e82e849de35eaef24412b468d3a36c72f4fcb6 (patch)
tree: e6abc778dfa37ac5df55c8b0926ed681b9c04f04
parent: ab776cd94e000d07c4d14521a8d0c635d3b8412c (diff)
parent: 2d9138547d93c32c0ec5ae54b4afc022f5c434ff (diff)
download: compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.tar.gz
compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.zip
31 files changed, 4784 insertions, 20 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c8ccedb8..c730a00e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -266,19 +266,21 @@ build_rv32:
 
 build_kvx:
   stage: build
-  image: coqorg/coq:8.12.2-ocaml-4.11.2-flambda
+  image: coqorg/coq:8.13.2-ocaml-4.11.2-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
-    - sudo apt-get -y install sshpass openssh-client libzip4 lttng-tools liblttng-ctl-dev liblttng-ust-dev babeltrace libzip4
+    - sudo apt-get -y install sshpass openssh-client libzip4 lttng-tools liblttng-ctl-dev liblttng-ust-dev babeltrace libzip4 bison flex libmpfr-dev libboost-dev
+    # bison, flex, libmpfr-dev, libboost-dev for gappa
     - ./.download_from_Kalray.sh
     - (cd download ; rm -f *dkms*.deb *eclipse*.deb *llvm*.deb *board-mgmt* *oce-host* *pocl* *flash-util* *barebox* *-kann-* *-kaf-* *-stb-* *-opencv* *-eigen* *-task* *-blis* *-lz4*)
     - sudo dpkg -i download/*.deb
     - rm -rf download
     - eval `opam config env`
-    - opam update
-    - opam install -y menhir
+    - opam update && opam pin add coq-flocq 3.4.0 --no-action
+    - opam install -y menhir gappa coq-gappa coq-flocq
   script:
-    - source /opt/kalray/accesscore/kalray.sh && ./config_kvx.sh
+    - rm -rf flocq
+    - source /opt/kalray/accesscore/kalray.sh && ./config_kvx.sh --use-external-Flocq
     - source /opt/kalray/accesscore/kalray.sh && make -j "$NJOBS"
     - make -j "$NJOBS" clightgen
     - source /opt/kalray/accesscore/kalray.sh && make -C test CCOMPOPTS=-static SIMU='kvx-cluster -- ' EXECUTE='kvx-cluster -- ' all test
@@ -325,9 +327,9 @@ pages: # TODO: change to "deploy" when "build" succeeds (or integrate with "buil
     - if: '$CI_COMMIT_BRANCH == "master"'
       when: always
 
-build_aarch64_coq13:
+build_aarch64_coq14:
   stage: build
-  image: coqorg/coq:8.13.2-ocaml-4.11.2-flambda
+  image: coqorg/coq:8.14.1-ocaml-4.12.0-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-aarch64-linux-gnu qemu-user
diff --git a/INSTALL.md b/INSTALL.md
index 5e2e800d..755ba690 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -34,11 +34,23 @@ Install dependencies available through opam
 ```
 opam install coq menhir
 ```
-
 Note: it may happen that a newer version of Coq is not supported yet.
 You may downgrade to solve the problem:
 ```
-opam pin add coq 8.12.2 # example of Coq version
+opam pin add coq 8.13.2 # example of Coq version
+```
+
+### For Kalray KVX
+On this platform, we also need Gappa installed.
+You may need to first install some development packages. On Ubuntu:
+```
+apt install bison flex libmpfr-dev libboost-dev
+```
+
+This install Gappa and Flocq:
+```
+opam pin add coq-flocq 3.4.0 --no-action
+opam install gappa coq-gappa coq-flocq
 ```
 
 ## Compilation
diff --git a/Makefile b/Makefile
index c9bf9ed4..a841569b 100644
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,7 @@ BACKENDLIB?=Asmgenproof0.v Asmgenproof1.v
 DIRS := lib lib/Impure common $(ARCHDIRS) scheduling backend cfrontend driver \
 	export cparser
 
-RECDIRS:=lib common $(ARCHDIRS) scheduling backend cfrontend driver flocq exportclight \
+RECDIRS:=lib common $(ARCHDIRS) scheduling backend cfrontend driver exportclight \
   cparser
 
 COQINCLUDES := $(foreach d, $(DIRS), -R $(d) compcert.$(subst /,.,$d))
@@ -77,7 +77,7 @@ COQCOPTS ?= \
 
 cparser/Parser.vo: COQCOPTS += -w -deprecated-instance-without-locality
 
-COQC="$(COQBIN)coqc" -q $(COQINCLUDES) $(COQCOPTS)
+COQC=PATH=tools:$$PATH "$(COQBIN)coqc" -q $(COQINCLUDES) $(COQCOPTS)
 COQDEP="$(COQBIN)coqdep" $(COQINCLUDES)
 COQDOC="$(COQBIN)coqdoc"
 COQEXEC="$(COQBIN)coqtop" $(COQINCLUDES) -batch -load-vernac-source
@@ -297,10 +297,17 @@ else
 	ocamlc -o tools/modorder str.cma tools/modorder.ml
 endif
 
+tools/gappa:
+	echo "#!/bin/sh" > $@
+	echo -n "exec " >> $@
+	which gappa | tr -d '\n' >> $@
+	echo ' -Eprecision=100 "$$@"' >> $@
+	chmod a+rx $@
+
 latexdoc:
 	cd doc; $(COQDOC) --latex -o doc/doc.tex -g $(FILES)
 
-%.vo: %.v
+%.vo: %.v tools/gappa
 	@rm -f doc/$(*F).glob
 	@echo "COQC $*.v"
 	@$(COQC) -dump-glob doc/$(*F).glob $*.v
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 7a55147b..fcc70ac8 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -2269,6 +2269,24 @@ Proof.
   inv H; simpl; auto with va. rewrite E; constructor.
 Qed.
 
+Definition intofsingle_ne (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int_ne f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma intofsingle_ne_sound:
+  forall v x w, vmatch v x -> Val.intofsingle_ne v = Some w -> vmatch w (intofsingle_ne x).
+Proof.
+  unfold Val.intofsingle_ne; intros. destruct v; try discriminate.
+  destruct (Float32.to_int_ne f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor.
+Qed.
+
 Definition intuofsingle (x: aval) :=
   match x with
   | FS f =>
@@ -2287,6 +2305,24 @@ Proof.
   inv H; simpl; auto with va. rewrite E; constructor.
 Qed.
 
+Definition intuofsingle_ne (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu_ne f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma intuofsingle_ne_sound:
+  forall v x w, vmatch v x -> Val.intuofsingle_ne v = Some w -> vmatch w (intuofsingle_ne x).
+Proof.
+  unfold Val.intuofsingle_ne; intros. destruct v; try discriminate.
+  destruct (Float32.to_intu_ne f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor.
+Qed.
+
 Definition singleofint (x: aval) :=
   match x with
   | I i => FS(Float32.of_int i)
@@ -2349,6 +2385,42 @@ Proof.
   inv H; simpl; auto with va. rewrite E; constructor.
 Qed.
 
+Definition longoffloat_ne (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_long_ne f with
+      | Some i => L i
+      | None => if va_strict tt then Vbot else ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma longoffloat_ne_sound:
+  forall v x w, vmatch v x -> Val.longoffloat_ne v = Some w -> vmatch w (longoffloat_ne x).
+Proof.
+  unfold Val.longoffloat_ne; intros. destruct v; try discriminate.
+  destruct (Float.to_long_ne f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor.
+Qed.
+
+Definition longuoffloat_ne (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_longu_ne f with
+      | Some i => L i
+      | None => if va_strict tt then Vbot else ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma longuoffloat_ne_sound:
+  forall v x w, vmatch v x -> Val.longuoffloat_ne v = Some w -> vmatch w (longuoffloat_ne x).
+Proof.
+  unfold Val.longuoffloat_ne; intros. destruct v; try discriminate.
+  destruct (Float.to_longu_ne f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor.
+Qed.
+
 Definition floatoflong (x: aval) :=
   match x with
   | L i => F(Float.of_long i)
@@ -2566,6 +2638,46 @@ Definition longuofsingle_total (x: aval) :=
   | _ => ntop1 x
   end.
 
+Definition intofsingle_ne_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int_ne f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuofsingle_ne_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu_ne f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longoffloat_ne_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_long_ne f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuoffloat_ne_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_longu_ne f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
 Lemma intoffloat_total_sound:
   forall v x
          (MATCH : vmatch v x),
@@ -2606,6 +2718,26 @@ Proof.
   all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
 Qed.
 
+Lemma intofsingle_ne_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intofsingle_ne v)) (intofsingle_ne_total x).
+Proof.
+  unfold Val.intofsingle_ne, intofsingle_ne_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_int_ne f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuofsingle_ne_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuofsingle_ne v)) (intuofsingle_ne_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_intu_ne f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
 Lemma singleofint_total_sound:
   forall v x, vmatch v x ->
               vmatch (Val.maketotal (Val.singleofint v)) (singleofint x).
@@ -2648,6 +2780,26 @@ Proof.
   all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
 Qed.
 
+Lemma longoffloat_ne_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longoffloat_ne v)) (longoffloat_ne_total x).
+Proof.
+  unfold Val.longoffloat_ne, longoffloat_ne_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_long_ne f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuoffloat_ne_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuoffloat_ne v)) (longuoffloat_ne_total x).
+Proof.
+  unfold Val.longoffloat_ne, longoffloat_ne_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_longu_ne f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
 Lemma longofsingle_total_sound:
   forall v x
          (MATCH : vmatch v x),
@@ -5200,6 +5352,10 @@ Global Hint Resolve cnot_sound symbol_address_sound
        intofsingle_sound intuofsingle_sound singleofint_sound singleofintu_sound
        longoffloat_sound longuoffloat_sound floatoflong_sound floatoflongu_sound
        longofsingle_sound longuofsingle_sound singleoflong_sound singleoflongu_sound
+       intofsingle_ne_sound intuofsingle_ne_sound
+       longoffloat_ne_sound longuoffloat_ne_sound
+       intofsingle_ne_total_sound intuofsingle_ne_total_sound
+       longoffloat_ne_total_sound longuoffloat_ne_total_sound
        longofwords_sound loword_sound hiword_sound
        intoffloat_total_sound
        intuoffloat_total_sound
diff --git a/common/Values.v b/common/Values.v
index 87ebea00..cf5a1255 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -244,6 +244,18 @@ Definition intuoffloat (v: val) : option val :=
   | _ => None
   end.
 
+Definition intoffloat_ne (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vint (Float.to_int_ne f)
+  | _ => None
+  end.
+
+Definition intuoffloat_ne (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vint (Float.to_intu_ne f)
+  | _ => None
+  end.
+
 Definition floatofint (v: val) : option val :=
   match v with
   | Vint n => Some (Vfloat (Float.of_int n))
@@ -268,6 +280,18 @@ Definition intuofsingle (v: val) : option val :=
   | _ => None
   end.
 
+Definition intofsingle_ne (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vint (Float32.to_int_ne f)
+  | _ => None
+  end.
+
+Definition intuofsingle_ne (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vint (Float32.to_intu_ne f)
+  | _ => None
+  end.
+
 Definition singleofint (v: val) : option val :=
   match v with
   | Vint n => Some (Vsingle (Float32.of_int n))
@@ -623,6 +647,30 @@ Definition longuofsingle (v: val) : option val :=
   | _ => None
   end.
 
+Definition longoffloat_ne (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vlong (Float.to_long_ne f)
+  | _ => None
+  end.
+
+Definition longuoffloat_ne (v: val) : option val :=
+  match v with
+  | Vfloat f => option_map Vlong (Float.to_longu_ne f)
+  | _ => None
+  end.
+
+Definition longofsingle_ne (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vlong (Float32.to_long_ne f)
+  | _ => None
+  end.
+
+Definition longuofsingle_ne (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vlong (Float32.to_longu_ne f)
+  | _ => None
+  end.
+
 Definition floatoflong (v: val) : option val :=
   match v with
   | Vlong n => Some (Vfloat (Float.of_long n))
diff --git a/config_kvx.sh b/config_kvx.sh
index 9040c23b..289f6f0a 100755
--- a/config_kvx.sh
+++ b/config_kvx.sh
@@ -1 +1 @@
-exec ./config_simple.sh kvx-cos "$@"
+exec ./config_simple.sh kvx-cos -use-external-Flocq "$@"
diff --git a/configure b/configure
index 685ce390..cb19b3ea 100755
--- a/configure
+++ b/configure
@@ -823,7 +823,8 @@ BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v\\
     Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v Asmblockprops.v\\
     FPExtra.v \\
     ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
-    Asmblockdeps.v DecBoolOps.v Chunks.v Peephole.v ExtValues.v ExtFloats.v\\
+    Asmblockdeps.v DecBoolOps.v Chunks.v Peephole.v\\
+    FPDivision32.v FPDivision64.v ExtValues.v ExtFloats.v\\
     AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v
 EOF
 fi
diff --git a/flocq.sh b/flocq.sh
new file mode 100644
index 00000000..64b7534e
--- /dev/null
+++ b/flocq.sh
@@ -0,0 +1,2 @@
+COQINCLUDES="-R /home/monniaux/.opam/4.12.0+flambda/lib/coq/user-contrib/Flocq flocq"
+export COQINCLUDES
diff --git a/kvx/Asm.v b/kvx/Asm.v
index c8303d37..7a289fe5 100644
--- a/kvx/Asm.v
+++ b/kvx/Asm.v
@@ -163,6 +163,8 @@ Inductive instruction : Type :=
   | Pfloatuwrnsz (rd rs: ireg)                      (**r Floating Point Conversion from integer (u32 -> 32) *)
   | Pfloatudrnsz (rd rs: ireg)                       (**r Floating Point Conversion from unsigned integer (64 bits) *)
   | Pfloatdrnsz (rd rs: ireg)                       (**r Floating Point Conversion from integer (64 bits) *)
+
+  (* round to zero *)
   | Pfixedwrzz (rd rs: ireg)                        (**r Integer conversion from floating point *)
   | Pfixeduwrzz (rd rs: ireg)                        (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
   | Pfixeddrzz (rd rs: ireg)                        (**r Integer conversion from floating point (i64 -> 64 bits) *)
@@ -170,6 +172,12 @@ Inductive instruction : Type :=
   | Pfixedudrzz (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
   | Pfixedudrzz_i32 (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *)
 
+  (* round to nearest, prefer even numbers *)
+  | Pfixedwrne (rd rs: ireg)                        (**r Integer conversion from floating point *)
+  | Pfixeduwrne (rd rs: ireg)                        (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+  | Pfixeddrne (rd rs: ireg)                        (**r Integer conversion from floating point (i64 -> 64 bits) *)
+  | Pfixedudrne (rd rs: ireg)                        (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
+
   (** Arith RI32 *)
   | Pmake   (rd: ireg) (imm: int)                   (**r load immediate *)
 
@@ -357,6 +365,10 @@ Definition basic_to_instruction (b: basic) :=
   | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs
   | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs
   | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs
+  | PArithRR Asmvliw.Pfixedwrne rd rs => Pfixedwrne rd rs  
+  | PArithRR Asmvliw.Pfixeduwrne rd rs => Pfixeduwrne rd rs
+  | PArithRR Asmvliw.Pfixeddrne rd rs => Pfixeddrne rd rs
+  | PArithRR Asmvliw.Pfixedudrne rd rs => Pfixedudrne rd rs
 
   (* RI32 *)
   | PArithRI32 Asmvliw.Pmake rd imm  => Pmake rd imm
diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v
index b1895ee6..78a766ee 100644
--- a/kvx/Asmblockdeps.v
+++ b/kvx/Asmblockdeps.v
@@ -1537,6 +1537,10 @@ Definition string_of_name_rr (n: arith_name_rr): pstring :=
   | Pfixedudrzz => "Pfixedudrzz"
   | Pfixeddrzz_i32 => "Pfixeddrzz_i32"
   | Pfixedudrzz_i32 => "Pfixedudrzz_i32"
+  | Pfixedwrne => "Pfixedwrne"
+  | Pfixeduwrne => "Pfixeduwrne"
+  | Pfixeddrne => "Pfixeddrne"
+  | Pfixedudrne => "Pfixedudrne"
   end.
 
 Definition string_of_name_ri32 (n: arith_name_ri32): pstring :=
diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v
index fd41dfdd..07b31b11 100644
--- a/kvx/Asmblockgen.v
+++ b/kvx/Asmblockgen.v
@@ -811,6 +811,12 @@ Definition transl_op
   | Ointuofsingle, a1 :: nil =>
       do rd <- ireg_of res; do rs <- freg_of a1;
       OK (Pfixeduwrzz rd rs ::i k)
+  | Ointofsingle_ne, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfixedwrne rd rs ::i k)
+  | Ointuofsingle_ne, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfixeduwrne rd rs ::i k)
   | Olongoffloat, a1 :: nil =>
       do rd <- ireg_of res; do rs <- freg_of a1;
       OK (Pfixeddrzz rd rs ::i k)
@@ -823,6 +829,12 @@ Definition transl_op
   | Olonguoffloat, a1 :: nil =>
       do rd <- ireg_of res; do rs <- freg_of a1;
       OK (Pfixedudrzz rd rs ::i k)
+  | Olongoffloat_ne, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfixeddrne rd rs ::i k)
+  | Olonguoffloat_ne, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfixedudrne rd rs ::i k)
 
   | Ofloatofsingle, a1 :: nil =>
       do rd <- freg_of res; do rs <- freg_of a1;
diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v
index 0ce2ed69..3fa184c6 100644
--- a/kvx/Asmvliw.v
+++ b/kvx/Asmvliw.v
@@ -402,7 +402,10 @@ Inductive arith_name_rr : Type :=
   | Pfixedudrzz                                      (**r Integer conversion from floating point (float -> unsigned long) *)
   | Pfixeddrzz_i32                                  (**r Integer conversion from floating point (float -> int) *)
   | Pfixedudrzz_i32                                  (**r Integer conversion from floating point (float -> unsigned int) *)
-.
+  | Pfixedwrne                        (**r Integer conversion from floating point *)
+  | Pfixeduwrne                        (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+  | Pfixeddrne                         (**r Integer conversion from floating point (i64 -> 64 bits) *)
+  | Pfixedudrne.                        (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
 
 Inductive arith_name_ri32 : Type :=
   | Pmake                                           (**r load immediate *)
@@ -960,6 +963,10 @@ Definition arith_eval_rr n v :=
   | Pfixedudrzz => Val.maketotal (Val.longuoffloat v)
   | Pfixeddrzz_i32 => Val.maketotal (Val.intoffloat v)
   | Pfixedudrzz_i32 => Val.maketotal (Val.intuoffloat v)
+  | Pfixedudrne => Val.maketotal (Val.longuoffloat_ne v)
+  | Pfixeddrne => Val.maketotal (Val.longoffloat_ne v)
+  | Pfixeduwrne => Val.maketotal (Val.intuofsingle_ne v)
+  | Pfixedwrne => Val.maketotal (Val.intofsingle_ne v)
   end.
 
 Definition arith_eval_ri32 n i :=
diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v
index b5fc7459..5536e58c 100644
--- a/kvx/Builtins1.v
+++ b/kvx/Builtins1.v
@@ -26,6 +26,16 @@ Inductive platform_builtin : Type :=
 | BI_fmaxf
 | BI_fma
 | BI_fmaf
+| BI_lround_ne
+| BI_luround_ne
+| BI_fp_udiv32
+| BI_fp_udiv64
+| BI_fp_umod32
+| BI_fp_umod64
+| BI_fp_sdiv32
+| BI_fp_sdiv64
+| BI_fp_smod32
+| BI_fp_smod64
 | BI_abs
 | BI_absl.
 
@@ -38,6 +48,16 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
   :: ("__builtin_fmaxf", BI_fmaxf)
   :: ("__builtin_fma", BI_fma)
   :: ("__builtin_fmaf", BI_fmaf)
+  :: ("__builtin_lround_ne", BI_lround_ne)
+  :: ("__builtin_luround_ne", BI_luround_ne)
+  :: ("__builtin_fp_udiv32", BI_fp_udiv32)
+  :: ("__builtin_fp_udiv64", BI_fp_udiv64)
+  :: ("__builtin_fp_umod32", BI_fp_umod32)
+  :: ("__builtin_fp_umod64", BI_fp_umod64)
+  :: ("__builtin_fp_sdiv32", BI_fp_sdiv32)
+  :: ("__builtin_fp_sdiv64", BI_fp_sdiv64)
+  :: ("__builtin_fp_smod32", BI_fp_smod32)
+  :: ("__builtin_fp_smod64", BI_fp_smod64)
   :: ("__builtin_abs", BI_abs)
   :: ("__builtin_absl", BI_absl)
   :: nil.
@@ -52,6 +72,24 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
       mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default
   | BI_fmaf =>
       mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default
+  | BI_lround_ne | BI_luround_ne =>
+      mksignature (Tfloat :: nil) Tlong cc_default
+  | BI_fp_udiv32 =>
+      mksignature (Tint :: Tint :: nil) Tint cc_default
+  | BI_fp_udiv64 =>
+      mksignature (Tlong :: Tlong :: nil) Tlong cc_default
+  | BI_fp_umod32 =>
+      mksignature (Tint :: Tint :: nil) Tint cc_default
+  | BI_fp_umod64 =>
+      mksignature (Tlong :: Tlong :: nil) Tlong cc_default
+  | BI_fp_sdiv32 =>
+      mksignature (Tint :: Tint :: nil) Tint cc_default
+  | BI_fp_sdiv64 =>
+      mksignature (Tlong :: Tlong :: nil) Tlong cc_default
+  | BI_fp_smod32 =>
+      mksignature (Tint :: Tint :: nil) Tint cc_default
+  | BI_fp_smod64 =>
+      mksignature (Tlong :: Tlong :: nil) Tlong cc_default
   | BI_abs =>
       mksignature (Tint :: nil) Tint cc_default
   | BI_absl =>
@@ -66,6 +104,40 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (pl
   | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
   | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
   | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
+  | BI_lround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_long_ne
+  | BI_luround_ne => mkbuiltin_n1p Tfloat Tlong Float.to_longu_ne
+  | BI_fp_udiv32 => mkbuiltin_n2p Tint Tint Tint
+       (fun n1 n2 => if Int.eq n2 Int.zero
+                     then None
+                     else Some (Int.divu n1 n2))
+  | BI_fp_udiv64 => mkbuiltin_n2p Tlong Tlong Tlong
+       (fun n1 n2 => if Int64.eq n2 Int64.zero
+                     then None
+                     else Some (Int64.divu n1 n2))
+  | BI_fp_umod32 => mkbuiltin_n2p Tint Tint Tint
+       (fun n1 n2 => if Int.eq n2 Int.zero
+                     then None
+                     else Some (Int.modu n1 n2))
+  | BI_fp_umod64 => mkbuiltin_n2p Tlong Tlong Tlong
+       (fun n1 n2 => if Int64.eq n2 Int64.zero
+                     then None
+                     else Some (Int64.modu n1 n2))
+  | BI_fp_sdiv32 => mkbuiltin_n2p Tint Tint Tint
+       (fun n1 n2 => if Int.eq n2 Int.zero
+                     then None
+                     else Some (Int.divs n1 n2))
+  | BI_fp_sdiv64 => mkbuiltin_n2p Tlong Tlong Tlong
+       (fun n1 n2 => if Int64.eq n2 Int64.zero
+                     then None
+                     else Some (Int64.divs n1 n2))
+  | BI_fp_smod32 => mkbuiltin_n2p Tint Tint Tint
+       (fun n1 n2 => if Int.eq n2 Int.zero
+                     then None
+                     else Some (Int.mods n1 n2))
+  | BI_fp_smod64 => mkbuiltin_n2p Tlong Tlong Tlong
+       (fun n1 n2 => if Int64.eq n2 Int64.zero
+                     then None
+                     else Some (Int64.mods n1 n2))
   | BI_abs => mkbuiltin_n1t Tint Tint ExtValues.int_abs
   | BI_absl => mkbuiltin_n1t Tlong Tlong ExtValues.long_abs
   end.
diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml
index 4d016453..c0b69adf 100644
--- a/kvx/CBuiltins.ml
+++ b/kvx/CBuiltins.ml
@@ -133,6 +133,26 @@ let builtins = {
     "__builtin_fmaf",
       (TFloat(FFloat, []),
        [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false);
+    "__builtin_lround_ne",
+      (TInt(ILong, []), [TFloat(FDouble, [])], false);
+    "__builtin_luround_ne",
+    (TInt(IULong, []), [TFloat(FDouble, [])], false);
+    "__builtin_fp_udiv32",
+    (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false);
+    "__builtin_fp_udiv64",
+    (TInt(IULong, []), [TInt(IULong, []); TInt(IULong, [])], false);
+    "__builtin_fp_umod32",
+    (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false);
+    "__builtin_fp_umod64",
+    (TInt(IULong, []), [TInt(IULong, []); TInt(IULong, [])], false);
+    "__builtin_fp_sdiv32",
+    (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false);
+    "__builtin_fp_sdiv64",
+    (TInt(ILong, []), [TInt(ILong, []); TInt(ILong, [])], false);
+    "__builtin_fp_smod32",
+    (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false);
+    "__builtin_fp_smod64",
+    (TInt(ILong, []), [TInt(ILong, []); TInt(ILong, [])], false);
     "__builtin_abs",
       (TInt(IInt, []), [TInt(IInt, [])], false);
     "__builtin_absl",
diff --git a/kvx/ExtFloats.v b/kvx/ExtFloats.v
index b08503a5..332d3e3d 100644
--- a/kvx/ExtFloats.v
+++ b/kvx/ExtFloats.v
@@ -13,6 +13,8 @@
 (*                                                             *)
 (* *************************************************************)
 
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd.
 Require Import Floats Integers ZArith.
 
 Module ExtFloat.
@@ -30,6 +32,8 @@ Definition max (x : float) (y : float) : float :=
   | Some Eq | Some Gt => x
   | Some Lt | None => y
   end.
+
+Definition one := Float.of_int (Int.repr (1%Z)).
 End ExtFloat.
 
 Module ExtFloat32.
@@ -50,5 +54,4 @@ Definition max (x : float32) (y : float32) : float32 :=
 Definition one := Float32.of_int (Int.repr (1%Z)).
 Definition inv (x : float32) : float32 :=
   Float32.div one x.
-
 End ExtFloat32.
diff --git a/kvx/ExtIEEE754.v b/kvx/ExtIEEE754.v
new file mode 100644
index 00000000..095fd0cc
--- /dev/null
+++ b/kvx/ExtIEEE754.v
@@ -0,0 +1,12 @@
+Require Import Coq.ZArith.Zdiv.
+
+Open Scope Z.
+
+Definition Zdiv_ne (a b : Z) :=
+  let q := Z.div a b in
+  let q1 := Z.succ q in
+  match Z.compare (a-b*q) (b*q1-a) with
+  | Lt => q
+  | Gt => q1
+  | Eq => (if Z.even q then q else q1)
+  end.
diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v
index c493f708..f08c1157 100644
--- a/kvx/ExtValues.v
+++ b/kvx/ExtValues.v
@@ -755,6 +755,67 @@ Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1).
 Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1).
 Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1).
 
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd.
+Require Import  IEEE754_extra Zdiv Psatz Floats ExtFloats.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+    let q:=ZnearestE x in
+    let r:=a-q*b in
+    if r <? 0
+    then q-1
+    else q.
+
+Lemma floor_ball1:
+  forall x : R, forall y : Z,
+    (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+  intros x y BALL.
+  apply Rabs_lt_inv in BALL.
+  case (Rcompare_spec x (IZR y)); intro CMP.
+  - left. apply Zfloor_imp.
+    ring_simplify (y-1+1).
+    rewrite minus_IZR. lra.
+  - subst.
+    rewrite Zfloor_IZR. right. reflexivity.
+  - right. apply Zfloor_imp.
+    rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+  forall a b : Z, forall x : R,
+    b > 0 ->
+    (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+    div_approx_reals a b x = (a/b)%Z.
+Proof.                    
+  intros a b x bPOS GAP.
+  assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+  unfold div_approx_reals.
+  pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+  assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+  { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+                            (x - IZR a/ IZR b)) as TRI.
+    ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+    lra.
+  }
+  clear GAP NEAR.
+  rewrite Rabs_minus_sym in BALL.
+  pose proof (floor_ball1 _ _ BALL) as FLOOR.
+  clear BALL.
+  rewrite Zfloor_div in FLOOR by lia.
+  pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+  assert (0 < b) as bPOS' by lia.
+  pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+  case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Definition my_div (a b : val) :=
+  let b_d := Val.maketotal (Val.floatofintu b) in
+  let invb_d := Val.floatofsingle (invfs (Val.maketotal (Val.singleofintu b))) in
+  let alpha := fmsubf (Vfloat ExtFloat.one) invb_d b_d in
+  let x := fmaddf invb_d alpha invb_d in
+  Val.mulf (Val.maketotal (Val.floatofintu a)) x.
+
 Definition int_abs i1 := Int.repr (Z.abs (Int.signed i1)).
 Definition long_abs i1 := Int64.repr (Z.abs (Int64.signed i1)).
 
diff --git a/kvx/ExtZ.v b/kvx/ExtZ.v
new file mode 100644
index 00000000..095fd0cc
--- /dev/null
+++ b/kvx/ExtZ.v
@@ -0,0 +1,12 @@
+Require Import Coq.ZArith.Zdiv.
+
+Open Scope Z.
+
+Definition Zdiv_ne (a b : Z) :=
+  let q := Z.div a b in
+  let q1 := Z.succ q in
+  match Z.compare (a-b*q) (b*q1-a) with
+  | Lt => q
+  | Gt => q1
+  | Eq => (if Z.even q then q else q1)
+  end.
diff --git a/kvx/FPDivision32.v b/kvx/FPDivision32.v
new file mode 100644
index 00000000..5a7b536f
--- /dev/null
+++ b/kvx/FPDivision32.v
@@ -0,0 +1,883 @@
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+
+From Gappa Require Import Gappa_tactic.
+
+Local Open Scope cminorsel_scope.
+    
+Definition approx_inv b :=
+  let invb_s := Eop Oinvfs ((Eop Osingleofintu ((Eletvar 0%nat):::Enil)):::Enil) in
+  let invb_d := Eop Ofloatofsingle (invb_s ::: Enil) in
+  let b_d := Eop Ofloatoflongu ((Eop Ocast32unsigned ((Eletvar 1%nat):::Enil)):::Enil) in
+  let invb_d_var := Eletvar (0%nat) in
+  let one := Eop (Ofloatconst ExtFloat.one) Enil in
+  let alpha := Eop Ofmsubf (one ::: invb_d_var ::: b_d ::: Enil) in
+  let x := Eop Ofmaddf (invb_d_var ::: alpha ::: invb_d_var ::: Enil) in
+  Elet b (Elet invb_d x).
+
+Definition approx_inv_thresh := (1/17179869184)%R.
+
+(*
+Lemma BofZ_exact_zero:
+  forall (prec emax : Z) (prec_gt_0_ : Prec_gt_0 prec)
+         (Hmax : (prec < emax)%Z),
+  B2R (BofZ prec emax 0%Z (Hmax := Hmax)) = 0%R /\
+  is_finite (BofZ prec emax 0%Z (Hmax := Hmax)) = true /\
+    Bsign prec emax (BofZ prec emax 0%Z (Hmax := Hmax)) = false.
+Proof.
+  intros.
+  apply BofZ_exact.
+  pose proof (Z.pow_nonneg 2 prec).
+  lia.
+Qed.
+ *)
+
+Lemma Rabs_relax:
+  forall b b' (INEQ : (b < b')%R) x,
+    (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+  intros.
+  apply Rabs_lt.
+  lra.
+Qed.
+
+Theorem approx_inv_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_b : expr) (b : int)
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : ((Int.unsigned b) > 0)%Z),
+    exists f : float,
+      eval_expr ge sp cmenv memenv le (approx_inv expr_b) (Vfloat f) /\
+        is_finite _ _ f = true /\ (Rabs((B2R _ _ f) - (1 / IZR (Int.unsigned b))) <= approx_inv_thresh)%R.
+Proof.
+  intros. unfold approx_inv.
+  econstructor. constructor.
+  { repeat econstructor.
+    { eassumption. }
+    { reflexivity. } }
+  set (invb_d := (Float.of_single (ExtFloat32.inv (Float32.of_intu b)))).
+  set (b' := Int.unsigned b) in *. 
+  pose proof (Int.unsigned_range b) as RANGE.
+  fold b' in RANGE.
+  change  Int.modulus with  4294967296%Z in RANGE.
+  assert (0 <= b' <= Int64.max_unsigned)%Z as b'RANGE.
+  { change Int64.max_unsigned with 18446744073709551615%Z.
+    lia. }
+  assert (1 <= IZR b' <= 4294967295)%R as RANGE'.
+  { split.
+    { apply IZR_le. lia. }
+    apply IZR_le. lia.
+  }
+  cbn.
+  
+  set (b_d := (Float.of_longu (Int64.repr b'))) in *.
+  Local Transparent Float.of_longu.
+  unfold Float.of_longu in b_d.
+
+  assert(SILLY : (- 2 ^ 24 <= 1 <= 2 ^ 24)%Z) by lia.
+  destruct (BofZ_exact 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C0E & C0F & _).
+  clear SILLY.
+
+  assert(SILLY : (- 2 ^ 53 <= 1 <= 2 ^ 53)%Z) by lia.
+  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1 SILLY) as (C9E & C9F & _).
+  clear SILLY.
+  
+  pose proof (BofZ_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') as C1.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1. cbn.
+    eapply (Rabs_relax (IZR 4294967296)).
+    lra.
+    gappa.
+  }
+  rewrite Zlt_bool_false in C1 by lia.
+  destruct C1 as (C1E & C1F & _).
+  
+  Local Transparent Float32.of_intu Float32.of_int Float32.div.
+  unfold ExtFloat32.inv, ExtFloat32.one, Float32.of_intu, Float32.of_int, Float32.div in invb_d.
+  fold b' in invb_d.
+  change (Int.signed (Int.repr 1%Z)) with 1%Z in invb_d.
+  pose proof (Bdiv_correct 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)
+                      (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b')) as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2. rewrite C1E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { cbn; lra. }
+    unfold F2R. cbn. unfold F2R. cbn.
+    gappa.
+  }
+  assert (B2R 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b') <> 0%R) as NONZ.
+  { clear C2.
+    rewrite C1E.
+    cbn.
+    assert (1 <= round radix2 (FLT_exp (-149) 24) ZnearestE (IZR b'))%R by gappa.
+    lra.
+  }
+  destruct (C2 NONZ) as (C2E & C2F & _).
+  clear C2 NONZ.
+  Local Transparent Float.of_single.
+  unfold Float.of_single in invb_d.
+  pose proof (Bconv_correct 24 128 53 1024 (@eq_refl Datatypes.comparison Lt)
+              (@eq_refl Datatypes.comparison Lt) Float.of_single_nan mode_NE
+              (Bdiv 24 128 (@eq_refl Datatypes.comparison Lt)
+                 (@eq_refl Datatypes.comparison Lt) Float32.binop_nan mode_NE
+                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+                    (@eq_refl Datatypes.comparison Lt) 1)
+                 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt)
+                       (@eq_refl Datatypes.comparison Lt) b'))) as C3.
+  fold invb_d in C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    rewrite C2E.
+    rewrite C1E.
+    rewrite C0E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  change (is_finite 24 128 (BofZ 24 128 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1)) with true in C2F.
+  destruct (C3 C2F) as (C3E & C3F & _).
+  clear C3.
+  unfold Float.fma.
+  assert (is_finite _ _ (Float.neg invb_d) = true) as invb_d_F.
+  { Local Transparent Float.neg.
+    unfold Float.neg.
+    rewrite is_finite_Bopp.
+    assumption.
+  }
+
+  assert(SILLY : (- 2 ^ 53 <= b' <= 2 ^ 53)%Z) by lia.
+  destruct (BofZ_exact 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) b' SILLY) as (C4E & C4F & _).
+  clear SILLY.
+
+  assert (is_finite 53 1024 b_d = true) as b_d_F.
+  { unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    assumption.
+  }
+  
+  assert (is_finite 53 1024 ExtFloat.one = true) as one_F by reflexivity.
+  
+  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt)
+          (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+          (Float.neg invb_d) b_d ExtFloat.one invb_d_F b_d_F one_F) as C5.
+  
+  rewrite Rlt_bool_true in C5; cycle 1.
+  { clear C5.
+    unfold Float.neg.
+    rewrite B2R_Bopp.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C0E.
+    rewrite C1E.
+    unfold ExtFloat.one.
+    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+    rewrite C9E.
+    unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    rewrite C4E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }  
+  destruct C5 as (C5E & C5F & _).
+
+  pose proof (Bfma_correct 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE
+       (Bfma 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) Float.fma_nan mode_NE 
+             (Float.neg invb_d) b_d ExtFloat.one) invb_d invb_d C5F C3F C3F) as C6.
+  rewrite Rlt_bool_true in C6; cycle 1.
+  { clear C6.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C1E.
+    rewrite C0E.
+    rewrite C5E.
+    unfold Float.neg.
+    rewrite B2R_Bopp.
+    rewrite C3E.
+    rewrite C2E.
+    rewrite C0E.
+    rewrite C1E.
+    unfold b_d.
+    rewrite Int64.unsigned_repr by lia.
+    rewrite C4E.
+    unfold ExtFloat.one.
+    change  (Float.of_int (Int.repr 1)) with (BofZ 53 1024 (@eq_refl Datatypes.comparison Lt) (@eq_refl Datatypes.comparison Lt) 1).
+    rewrite C9E.
+    apply (Rabs_relax (bpow radix2 10)).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  destruct C6 as (C6E & C6F & _).
+  split.
+  { exact C6F. }
+  rewrite C6E.
+  rewrite C5E.
+  rewrite C3E.
+  rewrite C2E.
+  rewrite C1E.
+  rewrite C0E.
+  unfold Float.neg.
+  rewrite B2R_Bopp.
+  unfold ExtFloat.one.
+  Local Transparent Float.of_int.
+  unfold Float.of_int.
+  rewrite (Int.signed_repr 1) by (cbn ; lia).
+  rewrite C9E.
+  rewrite C3E.
+  rewrite C2E.
+  rewrite C0E.
+  rewrite C1E.
+  unfold b_d.
+  rewrite Int64.unsigned_repr by lia.
+  rewrite C4E.
+  cbn.
+  set (rd := round radix2 (FLT_exp (-1074) 53) ZnearestE) in *.
+  set (rs := round radix2 (FLT_exp (-149) 24) ZnearestE) in *.
+  set (bi := IZR b') in *.
+  set (invb0 := rd (rs (1/ rs bi))%R) in *.
+  set (alpha := (- invb0 * bi + 1)%R) in *.
+  set (alpha' := ((1/bi - rd (rs (1/ rs bi)))/(1/bi))%R) in *.
+  assert (alpha = alpha')%R as expand_alpha.
+  {
+    unfold alpha, alpha', invb0.
+    field.
+    lra.
+  }
+  assert(-1/2097152 <= alpha' <= 1/2097152)%R as alpha_BOUND.
+  { unfold alpha', rd, rs.
+    gappa.
+  }
+  set (delta := (rd (rd alpha * invb0 + invb0) - (alpha * invb0 + invb0))%R).
+  assert(-1/1125899906842624 <= delta <= 1/1125899906842624)%R as delta_BOUND.
+  { unfold delta, invb0. rewrite expand_alpha. unfold rd, rs.
+    gappa.
+  }
+  replace (rd (rd alpha * invb0 + invb0) - 1/bi)%R with
+    (delta + ((alpha * invb0 + invb0)-1/bi))%R by (unfold delta; ring).
+  replace (alpha * invb0 + invb0 - 1 / bi)%R with (alpha * (invb0 - 1/bi) + (alpha * (1/bi) + invb0 - 1 / bi))%R by ring.
+  replace (alpha * (1 / bi) + invb0 - 1 / bi)%R with 0%R; cycle 1.
+  { unfold alpha.
+    field.
+    lra.
+  }
+  rewrite expand_alpha.
+  unfold invb0, rd, rs, approx_inv_thresh.
+  apply Rabs_le.
+  gappa.
+Qed.
+
+Definition fp_divu32 a b :=
+  let a_var := Eletvar (1%nat) in
+  let b_var := Eletvar (0%nat) in
+  let a_d := Eop Ofloatoflongu ((Eop Ocast32unsigned (a_var ::: Enil)) ::: Enil) in
+  let qr := Eop Olonguoffloat_ne ((Eop Omulf (a_d:::(approx_inv b_var):::Enil)):::Enil) in
+  let qr_var := Eletvar 0%nat in
+  let rem := Eop Omsubl ((Eop Ocast32unsigned ((Eletvar (2%nat)):::Enil)):::
+                         qr_var :::
+                         (Eop Ocast32unsigned ((Eletvar (1%nat)):::Enil)):::Enil) in
+  let qr_m1 := Eop (Oaddlimm (Int64.repr (-1)%Z)) (qr_var:::Enil) in
+  let cases := Eop (Osel (Ccompl0 Clt) Tlong)
+                   (qr_m1 ::: qr_var ::: rem ::: Enil) in (* (Elet qr cases) *)
+  Eop Olowlong ((Elet a (Elet (lift b) (Elet qr cases))) ::: Enil).
+
+Open Scope Z.
+
+Definition div_approx_reals (a b : Z) (x : R) :=
+    let q:=ZnearestE x in
+    let r:=a-q*b in
+    if r <? 0
+    then q-1
+    else q.
+
+Lemma floor_ball1:
+  forall x : R, forall y : Z,
+    (Rabs (x - IZR y) < 1)%R -> Zfloor x = (y-1)%Z \/ Zfloor x = y.
+Proof.
+  intros x y BALL.
+  apply Rabs_lt_inv in BALL.
+  case (Rcompare_spec x (IZR y)); intro CMP.
+  - left. apply Zfloor_imp.
+    ring_simplify (y-1+1).
+    rewrite minus_IZR. lra.
+  - subst.
+    rewrite Zfloor_IZR. right. reflexivity.
+  - right. apply Zfloor_imp.
+    rewrite plus_IZR. lra.
+Qed.
+
+Theorem div_approx_reals_correct:
+  forall a b : Z, forall x : R,
+    b > 0 ->
+    (Rabs (x - IZR a/ IZR b) < 1/2)%R ->
+    div_approx_reals a b x = (a/b)%Z.
+Proof.                    
+  intros a b x bPOS GAP.
+  assert (0 < IZR b)%R by (apply IZR_lt ; lia).
+  unfold div_approx_reals.
+  pose proof (Znearest_imp2 (fun x => negb (Z.even x)) x) as NEAR.
+  assert (Rabs (IZR (ZnearestE x) - IZR a/ IZR b) < 1)%R as BALL.
+  { pose proof (Rabs_triang (IZR (ZnearestE x) - x)
+                            (x - IZR a/ IZR b)) as TRI.
+    ring_simplify (IZR (ZnearestE x) - x + (x - IZR a / IZR b))%R in TRI.
+    lra.
+  }
+  clear GAP NEAR.
+  rewrite Rabs_minus_sym in BALL.
+  pose proof (floor_ball1 _ _ BALL) as FLOOR.
+  clear BALL.
+  rewrite Zfloor_div in FLOOR by lia.
+  pose proof (Z_div_mod_eq_full a b) as DIV_MOD.
+  assert (0 < b) as bPOS' by lia.
+  pose proof (Z.mod_pos_bound a b bPOS') as MOD_BOUNDS.
+  case Z.ltb_spec; intro; destruct FLOOR; lia.
+Qed.
+
+Opaque approx_inv.
+
+Theorem fp_divu32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divu32 expr_a expr_b)
+            (Vint (Int.divu a b)).
+Proof.
+  intros.
+  assert (eval_expr ge sp cmenv memenv (Vint b :: Vint a :: le)
+                    (Eletvar 0) (Vint b)) as EVAL_b'.
+  { constructor. reflexivity. }
+  destruct (approx_inv_correct ge sp cmenv memenv (Vint b :: Vint a :: le) (Eletvar 0) b EVAL_b' b_nz) as (inv_b & inv_b_eval & inv_b_finite & inv_b_correct).
+  unfold fp_divu32.
+  repeat econstructor.
+  { exact EVAL_a. }
+  { apply eval_lift. exact EVAL_b. }
+  exact inv_b_eval.
+  cbn. f_equal.
+  rewrite <- Float.of_intu_of_longu.
+  unfold Float.to_longu_ne.
+  rewrite ZofB_ne_range_correct by lia.
+  set (prod := (Float.mul (Float.of_intu a) inv_b)).
+  pose proof (Int.unsigned_range a) as a_range.
+  pose proof (Int.unsigned_range b) as b_range.
+  change Int.modulus with 4294967296 in a_range.
+  change Int.modulus with 4294967296 in b_range.
+  set (prod' := (B2R _ _ prod)).
+  set (prod_r := ZnearestE prod') in *.
+
+  Local Transparent Float.mul Float.of_intu.
+  unfold Float.mul, Float.of_intu in prod.
+  set (a' := Int.unsigned a) in *.
+  set (b' := Int.unsigned b) in *.
+  assert (IZR_a' : (0 <= IZR a' <= 4294967295)%R).
+  { split; apply IZR_le; lia. }
+  assert (IZR_b' : (1 <= IZR b' <= 4294967295)%R).
+  { split; apply IZR_le; lia. }
+  assert (SILLY : -2^53 <= a' <= 2^53).
+  { cbn; lia. }
+  destruct (BofZ_exact 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a' SILLY) as (C0E  & C0F & _).
+  clear SILLY.
+  pose proof (Bmult_correct  53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE
+                               (BofZ 53 1024  (@eq_refl _ Lt) (@eq_refl _ Lt)a') inv_b) as C1.
+  set (inv_b_r :=  B2R 53 1024 inv_b) in *.
+  assert (INV_RANGE : (1/4294967296 <= 1/IZR b' <= 1)%R).
+  { split; unfold Rdiv.
+    - apply Rmult_le_compat_l. lra.
+      apply Rinv_le. apply IZR_lt. lia.
+      apply IZR_le. lia.
+    - replace 1%R with (1 / 1)%R at 2 by field.
+      apply Rmult_le_compat_l. lra.
+      apply Rinv_le. apply IZR_lt. lia.
+      apply IZR_le. lia.
+  }
+  apply Rabs_def2b in inv_b_correct.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1.
+    rewrite C0E.
+    apply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    replace inv_b_r with (1 / IZR b' +  (inv_b_r - 1 / IZR b'))%R by ring.
+    set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+    unfold approx_inv_thresh in inv_b_correct.
+    cbn.
+    assert (b'_RANGE : (1 <= IZR b' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    gappa.
+  }
+  rewrite C0F in C1.
+  cbn in C1.
+  rewrite C0E in C1.
+  rewrite inv_b_finite in C1.
+  fold prod in C1.
+  fold prod' in C1.
+  destruct C1 as (C1E & C1F & _).
+  rewrite C1F. cbn.
+
+  assert(prod'_range : (0 <= prod' <= 17179869181/4)%R).
+  {
+    rewrite C1E.
+    replace inv_b_r with (1/IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+    assert (a'_RANGE : (0 <= IZR a' <= 4294967295)%R).
+    { split; apply IZR_le; lia.
+    }
+    unfold approx_inv_thresh in inv_b_correct.
+    set (true_inv := (1 / IZR b')%R) in *.
+    set (delta := (inv_b_r - true_inv)%R) in *.
+    gappa.
+  }
+
+  assert(0 <= prod_r <= 4294967295) as prod_r_range.
+  { unfold prod_r.
+    rewrite <- (Znearest_IZR  (fun x => negb (Z.even x)) 0).
+    replace 4294967295 with (ZnearestE (17179869181 / 4)%R); cycle 1.
+    { apply Znearest_imp.
+      apply Rabs_lt.
+      lra.
+    }
+    split; apply Znearest_le; lra.
+  }
+
+  replace (_ && _ ) with true; cycle 1.
+  {
+    symmetry.
+    rewrite andb_true_iff.
+    split; apply Zle_imp_le_bool; lia.
+  }
+  cbn.
+  f_equal.
+  unfold Int.divu.
+  assert(Rabs
+             (round radix2 (FLT_exp (-1074) 53) ZnearestE (IZR a' * inv_b_r) - (IZR a' * inv_b_r)) <= 1/512)%R as R1 by gappa.
+
+  assert ( (Rabs (B2R 53 1024 prod - IZR (Int.unsigned a) / IZR (Int.unsigned b)) < 1 / 2)%R) as NEAR.
+  {
+    unfold prod.
+    pose proof (Bmult_correct 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) Float.binop_nan mode_NE (BofZ 53 1024 (@eq_refl _ Lt) (@eq_refl _ Lt) a') inv_b) as C2.
+    rewrite C0E in C2.
+    rewrite Rlt_bool_true in C2; cycle 1.
+    { clear C2.
+      apply (Rabs_relax (bpow radix2 64)).
+      { apply bpow_lt. reflexivity. }
+      cbn.
+      fold inv_b_r.
+      replace inv_b_r with (1 / IZR b' + (inv_b_r - 1 / IZR b'))%R by ring.
+      set (delta := (inv_b_r - 1 / IZR b')%R) in *.
+      unfold approx_inv_thresh in *.
+      gappa.
+    }
+    destruct C2 as (C2E & C2F & _).
+    rewrite C2E.
+    fold inv_b_r a' b'.
+    replace ((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+               (IZR a' / IZR b'))%R with
+      (((round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) (IZR a' * inv_b_r)) -
+          (IZR a' * inv_b_r)) +
+         (IZR a' * (inv_b_r - 1 / IZR b')))%R by (field ; lra).
+    set(delta :=  (inv_b_r - 1 / IZR b')%R) in *.
+    cbn. 
+    unfold approx_inv_thresh in *.
+    assert (Rabs(IZR a' * delta) <= 3/8)%R as R2.
+    { apply Rabs_le.
+      split; nra.
+    }
+    rewrite <- C1E.
+    rewrite <- C1E in R1.
+    pose proof (Rabs_triang (prod' - IZR a' * inv_b_r)  (IZR a' * delta))%R.
+    lra.
+  }
+  pose proof (div_approx_reals_correct (Int.unsigned a) (Int.unsigned b) prod' b_nz NEAR) as DIV_CORRECT.
+  rewrite <- DIV_CORRECT.
+
+  unfold div_approx_reals in *.
+  fold a' b' prod' prod_r.
+  unfold Int64.mul.
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  unfold Int64.sub.
+  rewrite Int64.unsigned_repr by (cbn; lia).
+  rewrite Int64.unsigned_repr by (cbn; nia).
+  assert (FMA_RANGE : Int64.min_signed <= a' - prod_r * b' <= Int64.max_signed).
+  { cbn.
+    unfold prod_r.
+    rewrite <- C1E in R1.
+    assert (IZR (-9223372036854775808) <= IZR (a' - ZnearestE prod' * b') <= IZR 9223372036854775807)%R.
+    2: split; apply le_IZR; lra.
+    rewrite minus_IZR.
+    rewrite mult_IZR.
+    replace (IZR (ZnearestE prod')) with (prod' + (IZR (ZnearestE prod') - prod'))%R by ring.
+    pose proof (Znearest_imp2 (fun x => negb (Z.even x)) prod') as R2.
+    set (delta1 := (IZR (ZnearestE prod') - prod')%R) in *.
+    replace prod' with ((prod' - IZR a' * inv_b_r) + IZR a' * (inv_b_r - 1 / IZR b')
+                        + IZR a' / IZR b')%R by (field; lra).
+    set (delta2 :=  (inv_b_r - 1 / IZR b')%R) in *.
+    set (delta3 := (prod' - IZR a' * inv_b_r)%R) in *.
+    replace (IZR a' - (delta3 + IZR a' * delta2 + IZR a' / IZR b' + delta1) * IZR b')%R with
+      (- (delta3 + IZR a' * delta2 + delta1) * IZR b')%R by (field; lra).
+    unfold approx_inv_thresh in *.
+    assert (Rabs(IZR a' * delta2) <= 1/4)%R as R4.
+    { apply Rabs_le.
+      split; 
+        nra. }
+    set (delta4 := (IZR a' * delta2)%R) in *.
+    apply Rabs_def2b in R1.
+    apply Rabs_def2b in R2.
+    apply Rabs_def2b in R4.
+    split; nra.
+  }
+  fold a' b' prod_r in DIV_CORRECT.
+
+  pose proof (Zlt_cases (a' - prod_r * b') 0) as CMP.
+  destruct (Z.ltb (a' - prod_r * b') 0).
+  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with true; cycle 1.
+    { unfold Int64.lt.
+      change (Int64.signed Int64.zero) with 0.
+      rewrite Int64.signed_repr by exact FMA_RANGE.
+      rewrite zlt_true by lia.
+      reflexivity.
+    }
+    cbn.
+    f_equal.
+    rewrite Int64.add_signed.
+    rewrite (Int64.signed_repr prod_r) by (cbn ; lia).
+    rewrite Int64.signed_repr by (cbn ; lia).
+    unfold Int64.loword.
+    rewrite Int64.unsigned_repr. reflexivity.
+    split.
+    2: cbn; lia.
+    replace (prod_r + (-1)) with (prod_r - 1) by lia.
+    rewrite DIV_CORRECT.
+    apply Z.div_pos; lia.
+    
+  - replace (Int64.lt (Int64.repr (a' - prod_r * b')) Int64.zero) with false; cycle 1.
+    { unfold Int64.lt.
+      change (Int64.signed Int64.zero) with 0.
+      rewrite Int64.signed_repr by exact FMA_RANGE.
+      rewrite zlt_false by lia.
+      reflexivity.
+    }
+    cbn.
+    unfold Int64.loword.
+    rewrite Int64.unsigned_repr by (cbn; lia).
+    reflexivity.
+Qed.
+
+Definition e_msubl a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+Definition fp_modu32 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat)
+                                                    (fp_divu32 (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_modu32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_modu32 expr_a expr_b)
+            (Vint (Int.modu a b)).
+Proof.
+  intros.
+  rewrite Int.modu_divu; cycle 1.
+  { intro Z.
+    subst.
+    rewrite Int.unsigned_zero in b_nz.
+    lia.
+  }
+  unfold fp_modu32.
+  Local Opaque fp_divu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divu32_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int.mul_commut.
+  reflexivity.
+Qed.
+
+Definition e_is_neg a := Eop (Ocmp (Ccompimm Clt Int.zero)) (a ::: Enil).
+Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil).
+Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil).
+Definition e_neg a := Eop Oneg (a ::: Enil).
+Definition e_abs a := Eop (Oabsdiffimm Int.zero) (a ::: Enil).
+
+Definition fp_divs32 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+          (e_ite Tint (Ccompu0 Cne) (e_xorw (e_is_neg (Eletvar 2%nat))
+                                            (e_is_neg (Eletvar 1%nat)))
+                 (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Lemma nonneg_signed_unsigned:
+  forall x (x_NONNEG : Int.signed x >= 0),
+    (Int.signed x) = (Int.unsigned x).
+Proof.
+  intros.
+  pose proof (Int.unsigned_range x).
+  unfold Int.signed in *.
+  destruct zlt. reflexivity.
+  change Int.modulus with 4294967296%Z in *.
+  change Int.half_modulus with 2147483648%Z in *.
+  lia.
+Qed.
+
+Lemma int_min_signed_unsigned :
+  (- Int.min_signed < Int.max_unsigned)%Z.
+Proof.
+  reflexivity.
+Qed.
+
+Lemma int_divs_divu :
+  forall a b
+    (b_NOT0 : Int.signed b <> 0),
+    Int.divs a b = if xorb (Int.lt a Int.zero)
+                           (Int.lt b Int.zero)
+                   then Int.neg (Int.divu (ExtValues.int_abs a)
+                                          (ExtValues.int_abs b))
+                   else Int.divu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+  intros.
+  unfold Int.divs, Int.divu, Int.lt, ExtValues.int_abs.
+  pose proof (Int.signed_range a) as a_RANGE.
+  pose proof (Int.signed_range b) as b_RANGE.
+  change (Int.signed Int.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+    { pose proof int_min_signed_unsigned. lia. }
+
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Z.opp_involutive.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+    { pose proof Int.max_signed_unsigned. lia. }
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite Int.neg_repr.
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+Qed.
+
+Lemma nonzero_unsigned_signed :
+  forall b, Int.unsigned b > 0 -> Int.signed b <> 0.
+Proof.
+  intros b GT EQ.
+  rewrite Int.unsigned_signed in GT.
+  unfold Int.lt in GT.
+  rewrite Int.signed_zero in GT.
+  destruct zlt in GT; lia.
+Qed.
+
+Theorem fp_divs32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divs32 expr_a expr_b)
+            (Vint (Int.divs a b)).
+Proof.
+  intros.
+  unfold fp_divs32.
+  Local Opaque fp_divu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_divu32_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int.max_signed_unsigned.
+    pose proof int_min_signed_unsigned.
+    pose proof (Int.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite int_divs_divu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+  change (Int.signed Int.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; destruct zlt; reflexivity.
+Qed.
+
+Lemma int_mods_modu :
+  forall a b
+    (b_NOT0 : Int.signed b <> 0),
+    Int.mods a b = if Int.lt a Int.zero
+                   then Int.neg (Int.modu (ExtValues.int_abs a)
+                                          (ExtValues.int_abs b))
+                   else Int.modu (ExtValues.int_abs a) (ExtValues.int_abs b).
+Proof.
+  intros.
+  unfold Int.mods, Int.modu, Int.lt, ExtValues.int_abs.
+  pose proof (Int.signed_range a) as a_RANGE.
+  pose proof (Int.signed_range b) as b_RANGE.
+  change (Int.signed Int.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (- Int.signed a)); cycle 1.
+    { pose proof int_min_signed_unsigned. lia. }
+
+    destruct (zlt (Int.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int.signed a)) by lia.
+    rewrite (Int.unsigned_repr (Int.signed a)); cycle 1.
+    { pose proof Int.max_signed_unsigned. lia. }
+    destruct (zlt (Int.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof int_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int.signed b)) by lia.
+      rewrite Int.unsigned_repr ; cycle 1.
+      { pose proof Int.max_signed_unsigned. lia. }
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+Qed.
+
+Definition fp_mods32z a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_modu32 (e_abs (Eletvar (1%nat))) (e_abs (Eletvar (0%nat))))
+          (e_ite Tint (Ccomp0 Clt) (Eletvar 2%nat)
+                 (e_neg (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Theorem fp_mods32z_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods32z expr_a expr_b)
+            (Vint (Int.mods a b)).
+Proof.
+  intros.
+  unfold fp_mods32z.
+  Local Opaque fp_modu32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_modu32_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int.max_signed_unsigned.
+    pose proof int_min_signed_unsigned.
+    pose proof (Int.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite int_mods_modu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int.lt, ExtValues.int_abs, ExtValues.int_absdiff, ExtValues.Z_abs_diff.
+  change (Int.signed Int.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; reflexivity. 
+Qed.
+
+Definition e_msub a b c := Eop Omsub (a ::: b ::: c ::: Enil).
+
+Definition fp_mods32 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divs32 (Eletvar (1%nat)) (Eletvar (0%nat)))
+          (e_msub (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_mods32_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vint a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vint b))
+         (b_nz : (Int.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods32 expr_a expr_b)
+            (Vint (Int.mods a b)).
+Proof.
+  intros.
+  rewrite Int.mods_divs.
+  unfold fp_mods32.
+  Local Opaque fp_divs32.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divs32_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int.mul_commut.
+  reflexivity.
+Qed.
diff --git a/kvx/FPDivision64.v b/kvx/FPDivision64.v
new file mode 100644
index 00000000..298831a0
--- /dev/null
+++ b/kvx/FPDivision64.v
@@ -0,0 +1,2670 @@
+(*
+This needs a special gappa script
+
+#!/bin/sh
+/home/monniaux/.opam/4.12.0+flambda/bin/gappa -Eprecision=100 "$@"
+
+in PATH before the normal gappa
+ *)
+
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd Bits.
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Import Values Globalenvs.
+Require Compopts.
+Require Import Psatz.
+Require Import IEEE754_extra.
+Require Import Memory.
+
+From Gappa Require Import Gappa_tactic.
+
+Definition approx_inv_longu b :=
+  let invb_s := ExtValues.invfs (Val.singleoffloat (Val.maketotal (Val.floatoflongu b))) in
+  let invb_d := Val.floatofsingle invb_s in
+  let b_d := Val.maketotal (Val.floatoflongu b) in
+  let one := Vfloat (ExtFloat.one) in
+  let alpha := ExtValues.fmsubf one invb_d b_d in
+  ExtValues.fmaddf invb_d alpha invb_d.
+
+Lemma Rabs_relax:
+  forall b b' (INEQ : (b < b')%R) x,
+    (-b <= x <= b)%R -> (Rabs x < b')%R.
+Proof.
+  intros.
+  apply Rabs_lt.
+  lra.
+Qed.
+
+Local Notation "'rd'" := (round radix2 (FLT_exp (-1074) 53) ZnearestE).
+Local Notation "'rs'" := (round radix2 (FLT_exp (-149) 24) ZnearestE).
+
+Definition approx_inv_rel_thresh := (1049/72057594037927936)%R.
+Theorem approx_inv_longu_correct_rel :
+  forall b,
+    (0 < Int64.unsigned b)%Z ->
+    exists (f : float),
+      (approx_inv_longu (Vlong b)) = Vfloat f /\
+      is_finite _ _ f = true /\ (Rabs(IZR (Int64.unsigned b) * (B2R _ _ f) - 1) <= approx_inv_rel_thresh)%R.
+Proof.
+  intros b NONZ.
+  unfold approx_inv_longu.
+  cbn.
+  econstructor.
+  split.
+  reflexivity.
+  Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int Float.to_single.
+  unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int, Float.to_single.
+  set (re := (@eq_refl Datatypes.comparison Lt)).
+  change (Int.signed (Int.repr 1)) with 1%Z.
+  set (b' := Int64.unsigned b) in *.
+  pose proof (Int64.unsigned_range b) as RANGE.
+  change Int64.modulus with 18446744073709551616%Z in RANGE.                                              
+  assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'.
+  { split; apply IZR_le; lia.
+  }
+
+  assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia.
+  destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & _).
+  clear SILLY.
+  set (one_s := (BofZ 24 128 re re 1)) in *.
+  
+  pose proof (BofZ_correct 53 1024 re re b') as C5.
+  rewrite Rlt_bool_true in C5; cycle 1.
+  { clear C5.
+    eapply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C5.
+  destruct C5 as (C5R & C5F & C5S).
+  set (b_d :=  (BofZ 53 1024 re re b')) in *.
+
+  pose proof (Bconv_correct 53 1024 24 128 re re Float.to_single_nan mode_NE b_d C5F) as C1.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1.
+    apply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    rewrite C5R.
+    cbn.
+    gappa.
+  }
+  cbn in C1.
+  destruct C1 as (C1R & C1F & C1S).
+  set (b_s := (Bconv 53 1024 24 128 re re Float.to_single_nan mode_NE b_d)) in *.
+  assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE.
+  { rewrite C1R.
+    rewrite C5R.
+    cbn.
+    gappa.
+  }
+  assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra.
+  
+  pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := 1%R).
+    { cbn; lra. }
+    rewrite C0R.
+    set (r_b_s := B2R 24 128 b_s) in *.
+    cbn.
+    gappa.
+  }
+  
+  destruct C2 as (C2R & C2F & _).
+  set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *.
+  rewrite C0F in C2F.
+
+  assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE.
+  { rewrite C2R.
+    set (r_b_s := B2R 24 128 b_s) in *.
+    rewrite C0R.
+    cbn.
+    gappa.
+  }
+  
+  pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    set (r_invb_s := (B2R 24 128 invb_s)) in *.
+    apply Rabs_relax with (b := 1%R).
+    { replace 1%R with (bpow radix2 0)%R by reflexivity.
+      apply bpow_lt.
+      lia.
+    }
+    cbn.
+    gappa.
+  }
+  
+  destruct C3 as (C3R & C3F & _).
+  set (invb_d :=  (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *.
+  assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE.
+  { 
+    rewrite C3R.
+    set (r_invb_s := B2R 24 128 invb_s) in *.
+    cbn.
+    gappa.
+  }
+
+  pose proof (is_finite_Bopp 53 1024 Float.neg_nan invb_d) as opp_finite.
+  rewrite C3F in opp_finite.
+
+  pose proof (BofZ_correct 53 1024 re re 1) as C4.
+  rewrite Rlt_bool_true in C4; cycle 1.
+  { clear C4.
+    cbn.
+    eapply (Rabs_relax (IZR 18446744073709551616)).
+    lra.
+    set (b'' := IZR b') in *.
+    gappa.
+  }
+  destruct C4 as (C4R & C4F & _).
+    
+  assert(1 <= B2R 53 1024 b_d <= 18446744073709551616)%R as b_d_RANGE.
+  { rewrite C5R.
+    gappa.
+  }
+
+  pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+          (Bopp 53 1024 Float.neg_nan invb_d) (BofZ 53 1024 re re b')
+          (BofZ 53 1024 re re 1) opp_finite C5F C4F) as C6.
+  rewrite Rlt_bool_true in C6; cycle 1.
+  { clear C6.
+    rewrite C4R.
+    rewrite B2R_Bopp.
+    cbn.
+    eapply (Rabs_relax (IZR 18446744073709551616)).
+    { lra. }
+    fold invb_d.
+    fold b_d.
+    set (r_invb_d := B2R 53 1024 invb_d) in *.
+    set (r_b_d := B2R 53 1024 b_d) in *.
+    gappa.
+  }
+  fold b_d in C6.
+  destruct C6 as (C6R & C6F & _).
+
+  pose proof (Bfma_correct 53 1024 re re Float.fma_nan mode_NE
+       (Bfma 53 1024 re re Float.fma_nan mode_NE
+          (Bopp 53 1024 Float.neg_nan invb_d) b_d (BofZ 53 1024 re re 1))
+       invb_d invb_d C6F C3F C3F) as C7.
+  rewrite Rlt_bool_true in C7; cycle 1.
+  { clear C7.
+    rewrite C6R.
+    rewrite B2R_Bopp.
+    eapply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    rewrite C4R.
+    cbn.
+    set (r_invb_d := B2R 53 1024 invb_d) in *.
+    set (r_b_d := B2R 53 1024 b_d) in *.
+    gappa.
+  }
+  destruct C7 as (C7R & C7F & _).
+
+  split. assumption.
+  rewrite C7R.
+  rewrite C6R.
+  rewrite C5R.
+  rewrite C4R.
+  rewrite B2R_Bopp.
+  rewrite C3R.
+  rewrite C2R.
+  rewrite C1R.
+  rewrite C5R.
+  rewrite C0R.
+  cbn.
+  set(b1 := IZR b') in *.
+
+  replace (rd 1) with 1%R by gappa.
+  replace (rd (rs (1 / rs (rd b1)))) with
+    ((((rd (rs (1 / rs (rd b1))) - (/b1))/(/b1))+1)*(/ b1))%R ; cycle 1.
+  { field. lra. }
+  set (er0 := ((rd (rs (1 / rs (rd b1))) - (/b1))/(/b1))%R).
+  replace (rd b1) with ((((rd b1) - b1)/b1 + 1) * b1)%R; cycle 1.
+  { field. lra. }
+  set (er1 := (((rd b1) - b1)/b1)%R).
+  replace (- ((er0 + 1) * / b1) * ((er1 + 1) * b1) + 1)%R
+    with (1 - (er0 + 1)*(er1 + 1))%R ; cycle 1.
+  { field. lra. }
+  set (z0 := (1 - (er0 + 1) * (er1 + 1))%R).
+  assert (Rabs er0 <= 257/2147483648)%R as er0_ABS.
+  { unfold er0.
+    gappa.
+  }
+  assert (Rabs er1 <= 1/9007199254740992)%R as er1_ABS.
+  { unfold er1.
+    gappa.
+  }
+  replace (rd z0) with ((rd(z0)-z0)+z0)%R by ring.
+  set (ea0 := (rd(z0)-z0)%R).
+  assert (Rabs ea0 <= 1/75557863725914323419136)%R as ea0_ABS.
+  { unfold ea0. unfold z0.
+    gappa.
+  }
+  set (z1 :=  ((ea0 + z0) * ((er0 + 1) * / b1) + (er0 + 1) * / b1)%R).
+  replace (rd z1) with ((((rd z1)-z1)/z1+1)*z1)%R; cycle 1.
+  { field.
+    unfold z1.
+    unfold z0.
+    gappa.
+  }
+  set (er2 := ((rd z1 - z1) / z1)%R).
+  assert (Rabs er2 <= 1/9007199254740992)%R as er2_ABS.
+  { unfold er2.
+    unfold z1, z0.
+    gappa.
+  }
+  unfold z1, z0.
+  replace  (b1 *
+      ((er2 + 1) *
+       ((ea0 + (1 - (er0 + 1) * (er1 + 1))) * ((er0 + 1) * / b1) +
+          (er0 + 1) * / b1)) - 1)%R
+      with (-er0*er0*er1*er2 - er0*er0*er1 + ea0*er0*er2 - er0*er0*er2 - 2*er0*er1*er2 + ea0*er0 - er0*er0 - 2*er0*er1 + ea0*er2 - er1*er2 + ea0 - er1 + er2)%R; cycle 1.
+  { field. lra. }
+  unfold approx_inv_rel_thresh.
+  gappa.
+Qed.
+
+Definition step1_real_inv_longu b :=
+  let invb_s := ExtValues.invfs (Val.singleoffloat (Val.maketotal (Val.floatoflongu b))) in
+  Val.floatofsingle invb_s.
+
+Definition step1_real_inv_thresh := (3/33554432)%R.
+(* 8.94069671630859e-8 *)
+
+Theorem step1_real_inv_longu_correct :
+  forall b,
+    (0 < Int64.unsigned b)%Z ->
+    exists (f : float),
+      (step1_real_inv_longu (Vlong b)) = Vfloat f /\
+        (B2R _ _ f) = (rd (rs (1 / rs (rd (IZR (Int64.unsigned b)))))) /\
+        is_finite _ _ f = true /\
+        Bsign _ _ f = false.
+Proof.
+  intros b NONZ.
+  unfold step1_real_inv_longu.
+  cbn.
+  econstructor.
+  split.
+  reflexivity.
+  Local Transparent Float.neg Float.of_single Float32.of_longu Float32.div Float.of_longu Float32.of_int Float.of_int Float.to_single.
+  unfold Float.fma, Float.neg, Float.of_single, Float32.of_longu, ExtFloat32.inv, Float32.div, Float.of_longu, ExtFloat32.one, Float32.of_int, ExtFloat.one, Float.of_int, Float.to_single.
+  set (re := (@eq_refl Datatypes.comparison Lt)).
+  change (Int.signed (Int.repr 1)) with 1%Z.
+  set (b' := Int64.unsigned b) in *.
+  pose proof (Int64.unsigned_range b) as RANGE.
+  change Int64.modulus with 18446744073709551616%Z in RANGE.                                              
+  assert(1 <= IZR b' <= 18446744073709551616)%R as RANGE'.
+  { split; apply IZR_le; lia.
+  }
+
+  assert (-16777216 <= 1 <= 16777216)%Z as SILLY by lia.
+  destruct (BofZ_exact 24 128 re re 1 SILLY) as (C0R & C0F & C0S).
+  clear SILLY.
+  set (one_s := (BofZ 24 128 re re 1)) in *.
+
+  pose proof (BofZ_correct 53 1024 re re b') as C0'.
+  rewrite Rlt_bool_true in C0'; cycle 1.
+  { apply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C0'.
+  destruct C0' as (C0'R & C0'F & C0'S).
+  set (b_d :=  (BofZ 53 1024 re re b')) in *.
+
+  pose proof (Bconv_correct 53 1024 24 128 re re Float.to_single_nan mode_NE b_d C0'F) as C1.
+  rewrite C0'R in C1.
+  rewrite C0'S in C1.
+  rewrite Rlt_bool_true in C1; cycle 1.
+  { clear C1.
+    eapply (Rabs_relax (bpow radix2 64)).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  destruct C1 as (C1R & C1F & C1S).
+  set (b_s := (Bconv 53 1024 24 128 re re Float.to_single_nan mode_NE b_d)) in *.
+
+  assert(1 <= B2R 24 128 b_s <= 18446744073709551616)%R as b_s_RANGE.
+  { rewrite C1R.
+    cbn.
+    gappa.
+  }
+  assert(B2R 24 128 b_s <> 0)%R as b_s_NONZ by lra.
+  
+  pose proof (Bdiv_correct 24 128 re re Float32.binop_nan mode_NE one_s b_s b_s_NONZ) as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := 1%R).
+    { cbn; lra. }
+    rewrite C0R.
+    set (r_b_s := B2R 24 128 b_s) in *.
+    cbn.
+    gappa.
+  }
+  rewrite C1R in C2.
+  rewrite C1S in C2.
+  rewrite C0S in C2.
+  destruct C2 as (C2R & C2F & C2Sz).
+  change (1 <? 0)%Z with false in C2Sz.
+  replace (b' <? 0)%Z with false in C2Sz by lia.
+  change (xorb false false) with false in C2Sz.
+  set (invb_s := (Bdiv 24 128 re re Float32.binop_nan mode_NE one_s b_s)) in *.
+  rewrite C0F in C2F.
+  assert (is_nan 24 128 invb_s = false) as NAN.
+  { apply is_finite_not_is_nan.
+    assumption.
+  }
+  pose proof (C2Sz NAN) as C2S.
+  clear C2Sz.
+  
+  assert ((1/18446744073709551616 <= B2R 24 128 invb_s <= 1)%R) as invb_s_RANGE.
+  { rewrite C2R.
+    set (r_b_s := B2R 24 128 b_s) in *.
+    rewrite C0R.
+    cbn.
+    gappa.
+  }
+  
+  pose proof (Bconv_correct 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s C2F) as C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    set (r_invb_s := (B2R 24 128 invb_s)) in *.
+    apply Rabs_relax with (b := 1%R).
+    { replace 1%R with (bpow radix2 0)%R by reflexivity.
+      apply bpow_lt.
+      lia.
+    }
+    cbn.
+    gappa.
+  }
+  destruct C3 as (C3R & C3F & C3S).
+  set (invb_d :=  (Bconv 24 128 53 1024 re re Float.of_single_nan mode_NE invb_s)) in *.
+  assert ((1/18446744073709551616 <= B2R 53 1024 invb_d <= 1)%R) as invb_d_RANGE.
+  { 
+    rewrite C3R.
+    set (r_invb_s := B2R 24 128 invb_s) in *.
+    cbn.
+    gappa.
+  }
+  rewrite C2S in C3S.
+  rewrite C2R in C3R.
+  rewrite C0R in C3R.
+
+  auto.
+Qed.
+
+Theorem step1_real_inv_longu_correct1 :
+  forall b,
+    (Int64.unsigned b = 1%Z) ->
+    exists f,
+    (step1_real_inv_longu (Vlong b)) = Vfloat f /\
+        (B2R _ _ f) = 1%R /\
+        is_finite _ _ f = true /\
+        Bsign _ _ f = false.
+Proof.
+  intros b EQ1.
+  assert (0 < Int64.unsigned b)%Z as b_RANGE by lia.
+  destruct (step1_real_inv_longu_correct b b_RANGE) as (f & C1E & C1R & C1F & C1S).
+  rewrite EQ1 in C1R.
+  exists f.
+  repeat split; try assumption.
+  rewrite C1R.
+  gappa.
+Qed.
+
+Lemma Bsign_false_nonneg:
+  forall prec emax f,
+    (Bsign prec emax f) = false -> (0 <= (B2R prec emax f))%R.
+Proof.
+  intros until f. intro SIGN.
+  destruct f.
+  1, 2, 3: cbn; lra.
+  cbn.
+  apply F2R_ge_0.
+  cbn.
+  cbn in SIGN.
+  rewrite SIGN.
+  cbn.
+  lia.
+Qed.
+
+Lemma Znearest_IZR_le :
+  forall rnd n x, (IZR n <= x)%R -> (n <= Znearest rnd x)%Z.
+Proof.
+  intros until x. intro ORDER.
+  pose proof (Znearest_ge_floor rnd x).
+  pose proof (Zfloor_le _ _ ORDER) as KK.
+  rewrite Zfloor_IZR in KK.
+  lia.
+Qed.
+
+Lemma Znearest_le_IZR :
+  forall rnd n x, (x <= IZR n)%R -> (Znearest rnd x <= n)%Z.
+Proof.
+  intros until x. intro ORDER.
+  pose proof (Znearest_le_ceil rnd x).
+  pose proof (Zceil_le _ _ ORDER) as KK.
+  rewrite Zceil_IZR in KK.
+  lia.
+Qed.
+
+Definition step1_real_div_longu a b :=
+  Val.mulf (Val.maketotal (Val.floatoflongu a)) (step1_real_inv_longu b).
+
+Definition step1_div_longu a b :=
+  Val.maketotal (Val.longuoffloat_ne (step1_real_div_longu a b)).
+
+Definition step1_real_quotient (a b : R) :=
+             rd ((rd (a)) * (rd (rs (1 / rs (rd (b)))))).
+  
+Theorem step1_real_div_longu_correct:
+  forall a b,
+    (1 < Int64.unsigned b)%Z ->
+    exists (q : float),
+      (step1_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\
+        (B2R _ _ q) = step1_real_quotient (IZR (Int64.unsigned a))
+                                          (IZR (Int64.unsigned b)) /\
+        is_finite _ _ q = true /\
+        Bsign _ _ q = false.
+Proof.
+  intros a b b_NON01.
+  assert (0 < Int64.unsigned b)%Z as b_NON0 by lia.
+  destruct (step1_real_inv_longu_correct b b_NON0) as (f & C1E & C1R & C1F & C1S).
+  unfold step1_real_div_longu.
+  rewrite C1E.
+  cbn.
+  set (b' := Int64.unsigned b) in *.
+  Local Transparent Float.mul.
+  unfold Float.mul, Float.of_longu.
+  econstructor.
+  split. reflexivity.
+  set (a' := Int64.unsigned a) in *.
+  set (re :=  (@eq_refl Datatypes.comparison Lt)).
+
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in a_RANGE.
+  assert (0 <= IZR a' <= 18446744073709551615)%R as IZR_a_RANGE.
+  { split; apply IZR_le; lia. }
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+  assert (2 <= IZR b' <= 18446744073709551615)%R as IZR_b_RANGE.
+  { split; apply IZR_le; lia. }
+  
+  pose proof (BofZ_correct 53 1024 re re a') as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  destruct C2 as (C2R & C2F & C2S).
+  rewrite Zlt_bool_false in C2S by lia.
+
+  pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C3.
+  rewrite C1S in C3.
+  rewrite C2S in C3.
+  rewrite C1F in C3.
+  rewrite C2F in C3.
+  rewrite C1R in C3.
+  rewrite C2R in C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt ; lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C3.
+  destruct C3 as (C3R & C3F & C3Sz).
+  assert (is_nan 53 1024
+          (Bmult 53 1024 re re Float.binop_nan mode_NE 
+                 (BofZ 53 1024 re re a') f) = false) as NAN.
+  { apply is_finite_not_is_nan.
+    assumption. }
+  pose proof (C3Sz NAN) as C3S.
+  clear NAN C3Sz.
+
+  auto.
+Qed.
+
+Definition smallb_thresh :=       4398046511104%Z.
+
+Definition smallb_approx_real_range := 2200000000000%R.
+Lemma step1_smallb_real :
+  forall a b
+    (a_RANGE : (1 <= a <= 18446744073709551615)%R)
+    (b_RANGE : (1 <= b <= IZR smallb_thresh)%R),
+    (Rabs((step1_real_quotient a b) * b - a) <= smallb_approx_real_range)%R.
+Proof.
+  intros.
+  unfold smallb_thresh in b_RANGE.
+  unfold smallb_approx_real_range.
+  unfold step1_real_quotient.
+  set (q := ((rd (a)) * (rd (rs (1 / rs (rd b)))))%R) in *.
+  replace ((rd q) *b - a)%R with
+     ((rd(q)-q)/q * rd(a) * (1 + (rd (rs (1 / rs (rd b))) - 1/b)/(1/b)) +
+  (rd (a)) * ((rd (rs (1 / rs (rd b))) - 1 / b) / (1/b)) +
+        (rd(a) - a))%R; cycle 1.
+  { unfold q.
+    field.
+    split. lra.
+    split. gappa.
+    gappa.
+  }
+  unfold q.
+  gappa.
+Qed.
+
+Lemma step1_div_longu_a0 :
+  forall b,
+    (0 < Int64.unsigned b)%Z ->
+    (step1_div_longu (Vlong Int64.zero) (Vlong b)) = Vlong Int64.zero.
+Proof.
+  intros b b_NOT0.
+  unfold step1_div_longu.
+  unfold step1_real_div_longu.
+  destruct (step1_real_inv_longu_correct b b_NOT0) as
+    (f & C1E & C1R & C1F & C1S).
+  rewrite C1E.
+  cbn.
+  unfold Float.to_longu_ne, Float.of_longu, Float.mul.
+  rewrite Int64.unsigned_zero.
+  set (re :=  (@eq_refl Datatypes.comparison Lt)).
+  assert (- 2 ^ 53 <= 0 <= 2 ^ 53)%Z as SILLY by lia.
+  destruct (BofZ_exact 53 1024 re re 0 SILLY) as (C2R & C2F & C2S).
+  
+  pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE
+                            (BofZ 53 1024 re re 0) f) as C3.
+  rewrite C1F in C3.
+  rewrite C2F in C3.
+  rewrite C1S in C3.
+  rewrite C2S in C3.
+  rewrite Z.ltb_irrefl in C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt. lia. }
+    cbn.
+    rewrite Rmult_0_l.
+    gappa.
+  }
+  rewrite C2R in C3.
+  rewrite Rmult_0_l in C3.
+  destruct C3 as (C3R & C3F & C3Sz).
+  change (true && true) with true in C3F.
+  change (xorb false false) with false in C3Sz.
+  assert (is_nan 53 1024
+           (Bmult 53 1024 re re Float.binop_nan mode_NE 
+                  (BofZ 53 1024 re re 0) f) = false) as NAN.
+  { apply is_finite_not_is_nan.
+    assumption.
+  }
+  pose proof (C3Sz NAN) as C3S.
+  clear NAN C3Sz.
+  pose proof ((ZofB_ne_range_correct 53 1024
+             (Bmult 53 1024 re re Float.binop_nan mode_NE
+                    (BofZ 53 1024 re re 0) f) 0 Int64.max_unsigned)) as C4.
+  rewrite C3R in C4.
+  replace (round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE) 0)
+    with 0%R in C4 by (cbn ; gappa).
+  rewrite Znearest_IZR in C4.
+  cbn zeta in C4.
+  rewrite Z.leb_refl in C4.
+  change (0 <=? Int64.max_unsigned)%Z with true in C4.
+  rewrite andb_true_r in C4.
+  rewrite andb_true_r in C4.
+  rewrite C3F in C4.
+  rewrite C4.
+  reflexivity.
+Qed.
+
+Lemma step1_div_longu_correct_anyb :
+    forall a b
+     (b_NOT01 : (1 < Int64.unsigned b)%Z),
+    exists (q : int64),
+      (step1_div_longu (Vlong a) (Vlong b)) = Vlong q.
+Proof.
+  intros.
+
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in *.
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+  assert (0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+  { split; apply IZR_le; lia. }
+  assert (2 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia. }
+
+  assert (0 < b')%Z as b_NOT0 by lia.
+                                  
+  destruct (Z_le_gt_dec a' 0).
+  { assert (a' = 0%Z) as ZERO by lia.
+    replace a with Int64.zero; cycle 1.
+    {
+      unfold a' in ZERO.
+      unfold Int64.zero.
+      rewrite <- ZERO.
+      apply Int64.repr_unsigned.
+    }
+    exists Int64.zero.
+    apply step1_div_longu_a0.
+    exact b_NOT0.
+  }
+
+  unfold step1_div_longu.
+  unfold step1_real_div_longu.
+  destruct (step1_real_inv_longu_correct b b_NOT0) as (f & C1E & C1R & C1F & C1S).
+  rewrite C1E.
+  cbn.
+  unfold Float.of_longu, Float.mul, Float.to_longu_ne.
+  set (re := (@eq_refl Datatypes.comparison Lt)).
+  fold a'.
+  fold b' in C1R.
+  pose proof (BofZ_correct 53 1024 re re a') as C2.
+  rewrite Rlt_bool_true in C2; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C2.
+  destruct C2 as (C2R & C2F & C2S).
+  pose proof  (Bmult_correct 53 1024 re re Float.binop_nan mode_NE
+                             (BofZ 53 1024 re re a') f) as C3.
+  rewrite C2R in C3.
+  rewrite C2F in C3.
+  rewrite C2S in C3.
+  rewrite C1R in C3.
+  rewrite C1F in C3.
+  rewrite C1S in C3.
+  rewrite Rlt_bool_true in C3; cycle 1.
+  { clear C3.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt. lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C3.
+  destruct C3 as (C3R & C3F & _).
+  pose proof (ZofB_ne_range_correct 53 1024
+               (Bmult 53 1024 re re Float.binop_nan mode_NE
+                      (BofZ 53 1024 re re a') f) 0 Int64.max_unsigned) as C4.
+  rewrite C3R in C4.
+  rewrite C3F in C4.
+  cbn zeta in C4.
+  rewrite Zle_bool_true in C4 ; cycle 1.
+  { clear C4.
+    apply Znearest_lub.
+    gappa.
+  }
+  rewrite Zle_bool_true in C4 ; cycle 1.
+  { clear C4.
+    apply Znearest_glb.
+    cbn.
+    gappa.
+  }
+  rewrite C4.
+  cbn.
+  eauto.
+Qed.
+
+Definition smallb_approx_range := 4400000000000%Z.
+Lemma step1_div_longu_correct :
+    forall a b,
+    (1 < Int64.unsigned b <= smallb_thresh)%Z ->
+    exists (q : int64),
+      (step1_div_longu (Vlong a) (Vlong b)) = Vlong q /\
+        (Z.abs (Int64.unsigned a - Int64.unsigned b*Int64.unsigned q) <= smallb_approx_range)%Z.
+Proof.
+  intros a b b_RANGE.
+
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in a_RANGE.
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+
+  destruct (Z_le_gt_dec a' 0).
+  { assert (a' = 0%Z) as ZERO by lia.
+    exists Int64.zero.
+    rewrite ZERO.
+    rewrite Int64.unsigned_zero.
+    replace (Z.abs (0 - b' * 0))%Z with 0%Z by lia.
+    replace a with Int64.zero; cycle 1.
+    {
+      unfold a' in ZERO.
+      unfold Int64.zero.
+      rewrite <- ZERO.
+      apply Int64.repr_unsigned.
+    }
+    split.
+    { apply step1_div_longu_a0.
+      lia.
+    }
+    unfold smallb_approx_range.
+    lia.
+  }
+
+  unfold step1_div_longu.
+  assert (1 < b')%Z as b_NOT01 by lia.   
+  destruct (step1_real_div_longu_correct a b b_NOT01) as (q & C1E & C1R & C1F & C1S).
+  rewrite C1E. cbn.
+  unfold Float.to_longu_ne.
+  pose proof (ZofB_ne_range_correct 53 1024 q 0 Int64.max_unsigned) as C2.
+  rewrite C1F in C2.
+
+  
+  assert (1 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+  { split; apply IZR_le; lia. }
+  assert (2 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'.
+  { split; apply IZR_le; lia. }
+  assert (1 <= IZR b' <= IZR smallb_thresh)%R as b_RANGE'' by lra.
+  pose proof (step1_smallb_real (IZR a') (IZR b') a_RANGE' b_RANGE'') as DELTA.
+  fold a' in C1R.
+  fold b' in C1R.
+  rewrite <- C1R in DELTA.
+
+  assert (0 <= B2R _ _ q)%R as q_NONNEG.
+  { apply Bsign_false_nonneg. assumption. }
+  cbn in C2.
+  rewrite Zle_bool_true in C2; cycle 1.
+  { apply Znearest_IZR_le. assumption. }
+  assert (B2R _ _ q <= 9223376000000000000)%R as q_SMALL.
+  { replace (B2R _ _ q) with
+      ((IZR a' / IZR b') + (B2R _ _ q * IZR b' - IZR a') / IZR b')%R; cycle 1.
+    { field. lra. }
+    unfold smallb_approx_real_range in DELTA.
+    unfold smallb_thresh in b_RANGE'.
+    set (y := (B2R 53 1024 q * IZR b' - IZR a')%R) in *.
+    gappa.
+  }
+  rewrite Zle_bool_true in C2; cycle 1.
+  { apply Znearest_le_IZR. lra. }
+  cbn in C2.
+
+  change Int64.max_unsigned with 18446744073709551615%Z.
+  rewrite C2.
+  cbn.
+
+  econstructor. split. reflexivity.
+  rewrite Int64.unsigned_repr; cycle 1.
+  { split.
+    - apply Znearest_IZR_le. lra.
+    - apply Znearest_le_IZR.
+      change Int64.max_unsigned with 18446744073709551615%Z.
+      lra.
+  }
+  apply le_IZR.
+  rewrite abs_IZR.
+  unfold smallb_approx_real_range, smallb_approx_range, smallb_thresh in *.
+  rewrite minus_IZR.
+  rewrite mult_IZR.
+  set (q_r := B2R 53 1024 q) in *.
+  assert (Rabs (IZR (ZnearestE q_r) - q_r) <= / 2)%R as NEAR
+      by apply Znearest_imp2.
+  set (q_i := IZR (ZnearestE q_r)) in *.
+  replace  (IZR a' - IZR b' * q_i)%R with
+    (-(IZR b' * (q_i - q_r)) - (q_r * IZR b' - IZR a'))%R by ring.
+  set (delta1 := (q_i - q_r)%R) in *.
+  set (delta2 := (q_r * IZR b' - IZR a')%R) in *.
+  gappa.
+Qed.
+
+Lemma le_IZR3 :
+  forall n m p : Z, (IZR n <= IZR m <= IZR p)%R -> (n <= m <= p)%Z.
+Proof.
+  intros ; split ; apply le_IZR ; lra.
+Qed.
+
+Definition mostb_thresh := 18446740000000000000%Z.
+Lemma step1_div_longu_correct_mostb :
+    forall a b,
+      (1 < Int64.unsigned b <= mostb_thresh)%Z ->
+    exists (q : int64),
+      (step1_div_longu (Vlong a) (Vlong b)) = Vlong q /\
+        (Int64.min_signed <= (Int64.unsigned a - Int64.unsigned b*Int64.unsigned q) <= Int64.max_signed)%Z.
+Proof.
+  intros a b b_RANGE.
+
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in a_RANGE.
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+
+  destruct (Z_le_gt_dec a' 0).
+  { assert (a' = 0%Z) as ZERO by lia.
+    exists Int64.zero.
+    rewrite ZERO.
+    rewrite Int64.unsigned_zero.
+    replace (Z.abs (0 - b' * 0))%Z with 0%Z by lia.
+    replace a with Int64.zero; cycle 1.
+    {
+      unfold a' in ZERO.
+      unfold Int64.zero.
+      rewrite <- ZERO.
+      apply Int64.repr_unsigned.
+    }
+    split.
+    { apply step1_div_longu_a0.
+      lia.
+    }
+    change Int64.min_signed with (-9223372036854775808)%Z.
+    change Int64.max_signed with ( 9223372036854775807)%Z.
+    lia.
+  }
+
+  unfold step1_div_longu.
+  assert (1 < b')%Z as b_NOT01 by lia.   
+  destruct (step1_real_div_longu_correct a b b_NOT01) as (q & C1E & C1R & C1F & C1S).
+  rewrite C1E. cbn.
+  unfold Float.to_longu_ne.
+  pose proof (ZofB_ne_range_correct 53 1024 q 0 Int64.max_unsigned) as C2.
+  rewrite C1F in C2.
+
+  
+  assert (1 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+  { split; apply IZR_le; lia. }
+  assert (2 <= IZR b' <= IZR mostb_thresh)%R as b_RANGE'.
+  { split; apply IZR_le; lia. }
+  assert (1 <= IZR b' <= IZR mostb_thresh)%R as b_RANGE'' by lra.
+  cbn zeta in C2.
+  rewrite C2.
+  cbn.
+  rewrite C1R.
+  unfold step1_real_quotient.
+  fold a' b'.
+  unfold mostb_thresh in *.
+  
+  rewrite Zle_bool_true ; cycle 1.
+  { apply Znearest_IZR_le.
+    gappa.
+  }
+  rewrite Zle_bool_true ; cycle 1.
+  { apply Znearest_le_IZR.
+    gappa.
+  }
+  cbn.
+  econstructor; split. reflexivity.
+  set (q_r := (rd (rd (IZR a') * rd (rs (1 / rs ( rd (IZR b'))))))%R).
+  assert (Rabs (IZR (ZnearestE q_r) - q_r) <= /2)%R as NEAR by apply Znearest_imp2.
+  set (delta1 := (IZR (ZnearestE q_r) - q_r)%R) in NEAR.
+  apply le_IZR3.
+  rewrite minus_IZR.
+  rewrite mult_IZR.
+  rewrite Int64.unsigned_repr ; cycle 1.
+  { split.
+    - apply Znearest_IZR_le. unfold q_r.
+      gappa.
+    - apply Znearest_le_IZR. unfold q_r.
+      change Int64.max_unsigned with 18446744073709551615%Z.
+      gappa.
+  }
+  replace (IZR (ZnearestE q_r)) with ((IZR (ZnearestE q_r) - q_r) + q_r)%R by ring.
+  fold delta1.
+  unfold q_r.
+  set (a1 := IZR a') in *. 
+  set (b1 := IZR b') in *.
+  replace (rd (rd a1 * rd (rs (1 / rs (rd b1)))))%R with
+    ((((rd (rd a1 * rd (rs (1 / rs (rd b1))))-(a1 * (1 / b1))) / (a1 * (1 / b1)))+1) * (a1 / b1))%R;
+    cycle 1.
+  { field. lra. }
+  set (delta2 := ((rd (rd a1 * rd (rs (1 / rs (rd b1))))-(a1 * (1 / b1))) / (a1 * (1 / b1)))%R) in *.
+  (* assert (Rabs (delta2) <= 1/4194304)%R.
+  {  unfold delta2. gappa. } *)
+  replace (a1 - b1 * (delta1 + (delta2 + 1) * (a1 / b1)))%R with
+    (-b1*delta1 - a1*delta2)%R; cycle 1.
+  { field. lra. }
+  unfold delta2.
+  gappa.
+Qed.
+
+Lemma find_quotient:
+  forall (a b : Z)
+         (b_POS : (0 < b)%Z)
+         (qr : R)
+         (GAP : (Rabs (IZR a / IZR b - qr) < /2)%R),
+    (a / b)%Z =
+      let q := ZnearestE qr in
+      if (b*q >? a)%Z
+      then (q-1)%Z
+      else q.
+Proof.
+  intros.
+  set (q := ZnearestE qr).
+  cbn zeta.
+  set (b' := IZR b) in *.
+  set (a' := IZR a) in *.
+  assert (1 <= b')%R as b_POS'.
+  { apply IZR_le.
+    lia.
+  }
+  
+  pose proof (Znearest_imp2 (fun x : Z => negb (Z.even x)) qr) as ROUND.
+  fold q in ROUND.
+  set (q' := IZR q) in *.
+  
+  pose proof (Rabs_triang (a' / b' - qr)
+                          (qr - q'))%R as TRIANGLE.
+  replace ((a' / b' - qr) +  (qr - q'))%R with
+    (a' / b' - q')%R in TRIANGLE by ring.
+  rewrite <- Rabs_Ropp in ROUND.
+  replace (- (q' - qr))%R with (qr - q')%R in ROUND by ring.
+  assert (Z.abs (a - b*q) < b)%Z as DELTA.
+  { apply lt_IZR.
+    rewrite <- Rabs_Zabs.
+    rewrite minus_IZR.
+    rewrite mult_IZR.
+    fold a' q' b'.
+    apply Rmult_lt_reg_r with (r := (/b')%R).
+    { apply Rinv_0_lt_compat. lra. }
+    rewrite Rinv_r by lra.
+    replace (/ b')%R with (/ Rabs(b'))%R ; cycle 1.
+    { f_equal.
+      apply Rabs_pos_eq. lra. }
+    rewrite <- Rabs_Rinv by lra.
+    rewrite <- Rabs_mult.
+    replace ((a' - b' * q') * / b')%R with (a'/b' - q')%R by (field ; lra).
+    lra.
+  }
+   
+  pose proof (Zgt_cases (b * q) a)%Z as CASE.
+  destruct (_ >? _)%Z.
+  { apply Zdiv_unique with (b := (a - (q-1)*b)%Z).
+    ring.
+    split; lia.
+  }
+
+  apply Zdiv_unique with (b := (a - q*b)%Z).
+  ring.
+  split; lia.
+Qed.
+
+Definition step2_real_div_long a b :=
+  Val.mulf (Val.maketotal (Val.floatoflong a)) (approx_inv_longu b).
+
+Definition smalla_thresh := 34184372088832%Z.
+
+Lemma step2_real_div_long_smalla_correct :
+    forall (a b : int64)
+           (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z)
+           (b_NOT0 : (0 < Int64.unsigned b)%Z),
+    exists (q : float),
+      (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\
+        (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+      is_finite _ _ q = true.
+Proof.
+  intros.
+  unfold step2_real_div_long.
+  destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+  rewrite C0E.
+  econstructor.
+  split. reflexivity.
+  Local Transparent Float.of_long.
+  unfold Float.mul, Float.of_long.
+  set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+  set (a' := Int64.signed a) in *.
+  set (b' := Int64.unsigned b) in *.
+  assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  assert(Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'.
+  { rewrite Rabs_Zabs.
+    apply IZR_le.
+    assumption.
+  }
+  assert (- 2 ^ 53 <= a' <= 2 ^ 53)%Z as SILLY.
+  { unfold smalla_thresh in a_SMALL.
+    apply Z.abs_le.
+    lia.
+  } 
+  destruct (BofZ_exact 53 1024 re re (Int64.signed a) SILLY) as (C1R & C1F & C1S).
+  fold a' in C1R, C1F, C1S.
+  pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+  rewrite Rlt_bool_true in C2 ; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := bpow radix2 53).
+    { apply bpow_lt. lia. }
+    cbn.
+    rewrite C1R.
+    unfold approx_inv_rel_thresh in C0R.
+    replace (B2R 53 1024 f) with
+      ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+    { field. lra. }
+    unfold smalla_thresh in *.
+    gappa.
+  }
+  rewrite C0F in C2.
+  rewrite C1R in C2.
+  rewrite C1F in C2.
+  rewrite C1S in C2.
+  cbn in C2.
+  destruct C2 as (C2R & C2F & _).
+  split.
+  2: exact C2F.
+  rewrite C2R.
+  replace (IZR a' * (B2R 53 1024 f))%R with
+    ((IZR a'/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+  { field. lra. }
+  set (delta1 := (IZR b' * B2R 53 1024 f - 1)%R) in *.
+  set (q1 := (IZR a' / IZR b' * (delta1 + 1))%R).
+  replace (rd q1) with (((rd q1) - q1) + q1)%R by ring.
+  set (delta2 := ((rd q1) - q1)%R).
+  unfold q1.
+  replace (delta2 + IZR a' / IZR b' * (delta1 + 1) - IZR a' / IZR b')%R with
+    (delta2 + (IZR a' / IZR b') * delta1)%R by ring.
+  unfold delta2.
+  unfold q1.
+  unfold approx_inv_rel_thresh in *.
+  unfold smalla_thresh in *.
+  gappa.
+Qed.
+
+Definition step2_div_long' a b :=
+  Val.maketotal (Val.longoffloat_ne (step2_real_div_long a b)).
+
+Definition step2_div_long a b :=
+  let q := step2_div_long' a b in
+  Val.select (Val.cmpl_bool Clt (Val.subl a (Val.mull q b)) (Vlong Int64.zero))
+             (Val.addl q (Vlong Int64.mone)) q Tlong.
+
+Lemma function_ite :
+  forall {A B : Type} (fn : A->B) (b : bool) (x y: A),
+    fn (if b then x else y) = (if b then fn x else fn y).
+Proof.
+  intros.
+  destruct b; reflexivity.
+Qed.
+
+Lemma normalize_ite :
+  forall ty (b : bool) x y,
+    Val.normalize (if b then x else y) ty =
+      (if b then Val.normalize x ty else Val.normalize y ty).
+Proof.
+  intros.
+  destruct b; reflexivity.
+Qed.
+
+
+Lemma int64_mul_signed_unsigned:
+    forall x y : int64,
+      Int64.mul x y = Int64.repr (Int64.signed x * Int64.unsigned y).
+Proof.
+  intros.
+  unfold Int64.mul.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_mult.
+  - apply Int64.eqm_sym.
+    apply Int64.eqm_signed_unsigned.
+  - apply Int64.eqm_refl.
+Qed.
+
+Lemma int64_eqm_signed_repr:
+  forall z : Z, Int64.eqm z (Int64.signed (Int64.repr z)).
+Proof.
+  intros.
+  apply Int64.eqm_trans with (y := Int64.unsigned (Int64.repr z)).
+  - apply Int64.eqm_unsigned_repr.
+  - apply Int64.eqm_sym.
+    apply Int64.eqm_signed_unsigned.
+Qed.
+
+Lemma signed_repr_sub:
+  forall x y,
+    Int64.repr (Int64.signed (Int64.repr x) - y) =
+    Int64.repr (x - y).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_sub.
+  - apply Int64.eqm_sym.
+    apply int64_eqm_signed_repr.
+  - apply Int64.eqm_refl.
+Qed.
+
+Lemma signed_repr_sub2:
+  forall x y,
+    Int64.repr (x - Int64.signed (Int64.repr y)) =
+    Int64.repr (x - y).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_sub.
+  - apply Int64.eqm_refl.
+  - apply Int64.eqm_sym.
+    apply int64_eqm_signed_repr.
+Qed.
+                                                                        
+Lemma step2_div_long_smalla_correct :
+    forall a b
+      (a_SMALL : (Z.abs (Int64.signed a) <= smalla_thresh)%Z)
+      (b_NOT0 : (0 < Int64.unsigned b)%Z)
+      (b_NOT_VERY_BIG : (Int64.unsigned b <= Int64.max_signed)%Z),
+      step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z.
+Proof.
+  intros.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+  set (a' := (Int64.signed a)) in *.
+  set (b' := (Int64.unsigned b)) in *.
+  assert (Rabs (IZR a') <= IZR smalla_thresh)%R as a_RANGE'.
+  { rewrite Rabs_Zabs.
+    apply IZR_le.
+    assumption.
+  }
+  assert (1 <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  } 
+  destruct (step2_real_div_long_smalla_correct a b a_SMALL b_NOT0) as (q & C1R & C1E & C1F).
+  fold a' b' in C1E.
+  assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+  { apply Zle_imp_le_bool.
+    change Int64.min_signed with (-9223372036854775808)%Z.
+    apply Znearest_lub.
+    set (q' :=  B2R 53 1024 q) in *.
+    replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+    unfold smalla_thresh in a_RANGE'.
+    gappa.
+  }
+  assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH.
+  { apply Zle_imp_le_bool.
+    change Int64.max_signed with (9223372036854775807)%Z.
+    apply Znearest_glb.
+    set (q' :=  B2R 53 1024 q) in *.
+    replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+    unfold smalla_thresh in a_RANGE'.
+    gappa.
+  }
+  unfold step2_div_long, step2_div_long'.
+  rewrite C1R.
+  cbn.
+  unfold Float.to_long_ne.
+  rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed).
+  rewrite C1F.
+  rewrite q_LOW.
+  rewrite q_HIGH.
+  cbn.
+  rewrite normalize_ite.
+  cbn.
+  rewrite <- (function_ite Vlong).
+  f_equal.
+  unfold Int64.lt.
+  set (q' :=  B2R 53 1024 q) in *.
+  fold a'.
+  assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED.
+  { apply Int64.signed_repr.
+    split; lia.
+  }
+  rewrite Int64.add_signed.
+  rewrite q_SIGNED.
+  rewrite Int64.signed_mone.
+  rewrite Int64.signed_zero.
+  rewrite <- (function_ite Int64.repr).
+  f_equal.
+  replace  (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring.
+  
+  set (q'' :=  (ZnearestE q')) in *.
+  fold a'.
+  rewrite int64_mul_signed_unsigned.
+  rewrite q_SIGNED.
+  fold b'.
+
+  rewrite Int64.sub_signed.
+  fold a'.
+  rewrite signed_repr_sub2.
+
+  assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+  { replace (IZR a' / IZR b' - q')%R with
+      (-(q' - IZR a' / IZR b'))%R by ring.
+    rewrite Rabs_Ropp.
+    lra.
+  }
+  pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+  fold q'' in QUOTIENT.
+  cbn zeta in QUOTIENT.
+
+  assert (b' <> 0)%Z as NONZ by lia.
+  pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+  assert (b' > 0)%Z as b_GT0 by lia.
+  pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+  destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+  { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+    replace q'' with (1 + (a' / b'))%Z by lia.
+    replace (a' - (1 + a' / b') * b')%Z
+      with ((a' - a' / b' * b')-b')%Z by ring.
+    rewrite <- MOD.
+    rewrite Int64.signed_repr; cycle 1.
+    { change Int64.min_signed with (-9223372036854775808)%Z in *.
+      change Int64.max_signed with (9223372036854775807)%Z in *.
+      lia.
+    }
+    rewrite zlt_true by lia.
+    ring.
+  }
+  replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+  rewrite <- QUOTIENT.
+  replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring.
+  rewrite <- MOD.
+  rewrite Int64.signed_repr ; cycle 1.
+  { change Int64.min_signed with (-9223372036854775808)%Z in *.
+    change Int64.max_signed with (9223372036854775807)%Z in *.
+    lia.
+  }
+  rewrite zlt_false by lia.
+  reflexivity.
+Qed.
+
+Definition twostep_div_longu a b :=
+  let q1 := step1_div_longu a b in
+  let q2 := step2_div_long (Val.subl a (Val.mull b q1)) b in
+  Val.addl q1 q2.
+
+Lemma unsigned_repr_sub :
+  forall a b,
+    Int64.repr (a - b) = Int64.repr (a - Int64.unsigned (Int64.repr b)).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_sub.
+  - apply Int64.eqm_refl.
+  - apply Int64.eqm_unsigned_repr.
+Qed.
+
+Lemma unsigned_repr_add :
+  forall a b,
+    Int64.repr (a + b) = Int64.repr (a + Int64.unsigned (Int64.repr b)).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_add.
+  - apply Int64.eqm_refl.
+  - apply Int64.eqm_unsigned_repr.
+Qed.
+    
+Lemma twostep_div_longu_smallb_correct :
+    forall a b
+      (b_RANGE : (1 < Int64.unsigned b <= smallb_thresh)%Z),
+          (twostep_div_longu (Vlong a) (Vlong b)) =
+            (Val.maketotal (Val.divlu (Vlong a) (Vlong b))).
+Proof.
+  intros.
+  unfold twostep_div_longu.
+  destruct (step1_div_longu_correct a b b_RANGE) as (q1 & C1R & C1E).
+  rewrite C1R.
+  set (q1' := Int64.unsigned q1) in *.
+  set (b' := Int64.unsigned b) in *.
+  set (a' := Int64.unsigned a) in *.
+  assert ( Z.abs (Int64.signed (Int64.sub a (Int64.mul b q1))) <= smalla_thresh)%Z as r1_SMALL.
+  { unfold smalla_thresh, smallb_approx_range in *.
+    unfold Int64.sub, Int64.mul.
+    fold q1' b' a'.
+    rewrite <- unsigned_repr_sub.
+    rewrite Int64.signed_repr ; cycle 1.
+    { change Int64.min_signed with (-9223372036854775808)%Z.
+      change Int64.max_signed with (9223372036854775807)%Z.
+      lia.
+    }
+    lia.
+  }
+  assert (0 < b')%Z as b_NOT0 by lia.
+  assert (b' <= Int64.max_signed)%Z as b_NOTBIG.
+  { change Int64.max_signed with (9223372036854775807)%Z.
+    unfold smallb_thresh in b_RANGE.
+    lia.
+  }
+  cbn.
+  rewrite (step2_div_long_smalla_correct (Int64.sub a (Int64.mul b q1)) b r1_SMALL b_NOT0 b_NOTBIG).
+  unfold Int64.add, Int64.sub, Int64.mul, Int64.divu.
+  fold q1' b' a'.
+  rewrite <- unsigned_repr_sub.
+  rewrite <- unsigned_repr_add.
+  rewrite Int64.signed_repr ; cycle 1.
+  {
+    change Int64.min_signed with (-9223372036854775808)%Z.
+    change Int64.max_signed with (9223372036854775807)%Z.
+    unfold smallb_approx_range in *.
+    lia.
+  }
+  rewrite Z.add_comm.
+  rewrite <- Z.div_add by lia.
+  replace (a' - b' * q1' + q1' * b')%Z with a' by ring.
+  rewrite Int64.eq_false ; cycle 1.
+  { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0.
+    rewrite Int64.unsigned_zero in b_NOT0.
+    lia.
+  }
+  reflexivity.
+Qed.
+
+
+Lemma step2_real_div_long_bigb_correct :
+    forall (a b : int64)
+           (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z),
+    exists (q : float),
+      (step2_real_div_long (Vlong a) (Vlong b)) = Vfloat q /\
+        (Rabs ((B2R _ _ q) - (IZR (Int64.signed a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+      is_finite _ _ q = true.
+Proof.
+  intros.
+  unfold step2_real_div_long.
+  assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia).
+  destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+  rewrite C0E.
+  econstructor.
+  split. reflexivity.
+  Local Transparent Float.of_long.
+  unfold Float.mul, Float.of_long.
+  set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+  pose proof (Int64.signed_range a) as a_RANGE.
+  set (a' := Int64.signed a) in *.
+  set (b' := Int64.unsigned b) in *.
+  assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  change Int64.min_signed with (-9223372036854775808)%Z in a_RANGE'.
+  change Int64.max_signed with (9223372036854775807)%Z in a_RANGE'.
+  pose proof (BofZ_correct 53 1024 re re a') as C1.
+  rewrite Rlt_bool_true in C1 ; cycle 1.
+  { clear C1.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C1.
+  destruct C1 as (C1R & C1F & C1S).
+
+  unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'.
+
+  pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+  rewrite Rlt_bool_true in C2 ; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := bpow radix2 53).
+    { apply bpow_lt. lia. }
+    cbn.
+    rewrite C1R.
+    unfold approx_inv_rel_thresh in C0R.
+    replace (B2R 53 1024 f) with
+      ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+    { field.  lra. }
+    gappa.
+  }
+  rewrite C0F in C2.
+  rewrite C1R in C2.
+  rewrite C1F in C2.
+  rewrite C1S in C2.
+  cbn in C2.
+  destruct C2 as (C2R & C2F & _).
+  split.
+  2: exact C2F.
+  rewrite C2R.
+  set (f' := (B2R 53 1024 f)) in *.  
+  replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with
+    ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1.
+  { field. lra. }
+  unfold approx_inv_rel_thresh in *.
+  gappa.
+Qed.
+                                                                        
+Lemma step2_div_long_bigb_correct :
+    forall a b
+           (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z)
+           (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z),
+      step2_div_long (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.signed a / Int64.unsigned b))%Z.
+Proof.
+  intros.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in b_RANGE.
+  pose proof (Int64.signed_range a) as a_RANGE.
+  set (a' := (Int64.signed a)) in *.
+  set (b' := (Int64.unsigned b)) in *.
+  assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  assert(IZR Int64.min_signed <= IZR a' <= IZR Int64.max_signed)%R as a_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  unfold smallb_thresh in *; cbn in b_RANGE'.
+  change Int64.min_signed with (-9223372036854775808)%Z in *.
+  change Int64.max_signed with (9223372036854775807)%Z in *.
+  assert (0 < b')%Z as b_NOT0 by lia.
+
+  destruct (step2_real_div_long_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F).
+  fold a' b' in C1E.
+  assert ((Int64.min_signed <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+  { apply Zle_imp_le_bool.
+    change Int64.min_signed with (-9223372036854775808)%Z.
+    apply Znearest_lub.
+    set (q' :=  B2R 53 1024 q) in *.
+    replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+    gappa.
+  }
+  assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_signed)=true)%Z as q_HIGH.
+  { apply Zle_imp_le_bool.
+    change Int64.max_signed with (9223372036854775807)%Z.
+    apply Znearest_glb.
+    set (q' :=  B2R 53 1024 q) in *.
+    replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+    gappa.
+  }
+  unfold step2_div_long, step2_div_long'.
+  rewrite C1R.
+  cbn.
+  unfold Float.to_long_ne.
+  rewrite (ZofB_ne_range_correct _ _ q Int64.min_signed Int64.max_signed).
+  rewrite C1F.
+  rewrite q_LOW.
+  rewrite q_HIGH.
+  cbn.
+  rewrite normalize_ite.
+  cbn.
+  rewrite <- (function_ite Vlong).
+  f_equal.
+  unfold Int64.lt.
+  set (q' :=  B2R 53 1024 q) in *.
+  fold a'.
+  assert (Int64.signed (Int64.repr (ZnearestE q')) = ZnearestE q') as q_SIGNED.
+  { apply Int64.signed_repr.
+    split; lia.
+  }
+  rewrite Int64.add_signed.
+  rewrite q_SIGNED.
+  rewrite Int64.signed_mone.
+  rewrite Int64.signed_zero.
+  rewrite <- (function_ite Int64.repr).
+  f_equal.
+  replace  (ZnearestE q' + -1)%Z with (ZnearestE q' - 1)%Z by ring.
+  
+  set (q'' :=  (ZnearestE q')) in *.
+  fold a'.
+  rewrite int64_mul_signed_unsigned.
+  rewrite q_SIGNED.
+  fold b'.
+
+  rewrite Int64.sub_signed.
+  fold a'.
+  rewrite signed_repr_sub2.
+
+  assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+  { replace (IZR a' / IZR b' - q')%R with
+      (-(q' - IZR a' / IZR b'))%R by ring.
+    rewrite Rabs_Ropp.
+    lra.
+  }
+  pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+  fold q'' in QUOTIENT.
+  cbn zeta in QUOTIENT.
+
+  assert (b' <> 0)%Z as NONZ by lia.
+  pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+  assert (b' > 0)%Z as b_GT0 by lia.
+  pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+  destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+  { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+    replace q'' with (1 + (a' / b'))%Z by lia.
+    replace (a' - (1 + a' / b') * b')%Z
+      with ((a' - a' / b' * b')-b')%Z by ring.
+    rewrite <- MOD.
+    rewrite Int64.signed_repr; cycle 1.
+    { change Int64.min_signed with (-9223372036854775808)%Z in *.
+      change Int64.max_signed with (9223372036854775807)%Z in *.
+      lia.
+    }
+    rewrite zlt_true by lia.
+    ring.
+  }
+  replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+  rewrite <- QUOTIENT.
+  replace (a' / b' * b' - a')%Z with (-(a' - a' / b' * b'))%Z by ring.
+  rewrite <- MOD.
+  rewrite Int64.signed_repr ; cycle 1.
+  { change Int64.min_signed with (-9223372036854775808)%Z in *.
+    change Int64.max_signed with (9223372036854775807)%Z in *.
+    lia.
+  }
+  rewrite zlt_false by lia.
+  reflexivity.
+Qed.
+
+Definition step2_real_div_longu a b :=
+  Val.mulf (Val.maketotal (Val.floatoflongu a)) (approx_inv_longu b).
+
+Definition step2_div_longu' a b :=
+  Val.maketotal (Val.longuoffloat_ne (step2_real_div_longu a b)).
+
+Definition step2_div_longu a b :=
+  let q := step2_div_longu' a b in
+  Val.select (Val.cmpl_bool Cgt (Val.subl (Val.mull q b) a) (Vlong Int64.zero))
+             (Val.addl q (Vlong Int64.mone)) q Tlong.
+
+Lemma step2_real_div_longu_bigb_correct :
+    forall (a b : int64)
+           (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z),
+    exists (q : float),
+      (step2_real_div_longu (Vlong a) (Vlong b)) = Vfloat q /\
+        (Rabs ((B2R _ _ q) - (IZR (Int64.unsigned a)) / (IZR (Int64.unsigned b))) <= (32767/65536))%R /\
+      is_finite _ _ q = true.
+Proof.
+  intros.
+  unfold step2_real_div_longu.
+  assert (0 < Int64.unsigned b)%Z as b_NOT0 by (unfold smallb_thresh in *; lia).
+  destruct (approx_inv_longu_correct_rel b b_NOT0) as (f & C0E & C0F & C0R).
+  rewrite C0E.
+  econstructor.
+  split. reflexivity.
+  Local Transparent Float.of_longu.
+  unfold Float.mul, Float.of_longu.
+  set (re := (@eq_refl Datatypes.comparison Lt)) in *.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in *.
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+  assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  pose proof (BofZ_correct 53 1024 re re a') as C1.
+  rewrite Rlt_bool_true in C1 ; cycle 1.
+  { clear C1.
+    apply Rabs_relax with (b := bpow radix2 64).
+    { apply bpow_lt; lia. }
+    cbn.
+    gappa.
+  }
+  cbn in C1.
+  destruct C1 as (C1R & C1F & C1S).
+
+  unfold smallb_thresh in b_RANGE'; cbn in b_RANGE'.
+
+  pose proof (Bmult_correct 53 1024 re re Float.binop_nan mode_NE (BofZ 53 1024 re re a') f) as C2.
+  rewrite Rlt_bool_true in C2 ; cycle 1.
+  { clear C2.
+    apply Rabs_relax with (b := bpow radix2 53).
+    { apply bpow_lt. lia. }
+    cbn.
+    rewrite C1R.
+    unfold approx_inv_rel_thresh in C0R.
+    replace (B2R 53 1024 f) with
+      ((1/IZR b') * ((IZR b' * B2R 53 1024 f - 1) + 1))%R ; cycle 1.
+    { field.  lra. }
+    gappa.
+  }
+  rewrite C0F in C2.
+  rewrite C1R in C2.
+  rewrite C1F in C2.
+  rewrite C1S in C2.
+  cbn in C2.
+  destruct C2 as (C2R & C2F & _).
+  split.
+  2: exact C2F.
+  rewrite C2R.
+  set (f' := (B2R 53 1024 f)) in *.  
+  replace (rd(rd (IZR a') * f') - IZR a' / IZR b')%R with
+    ((rd(rd (IZR a') * f') - IZR a' * f') + IZR a' / IZR b' * (IZR b' * f' - 1))%R ; cycle 1.
+  { field. lra. }
+  unfold approx_inv_rel_thresh in *.
+  gappa.
+Qed.
+
+Lemma repr_unsigned_mul:
+  forall a b,
+    (Int64.repr (Int64.unsigned (Int64.repr a) * b)) = Int64.repr (a * b).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_mult.
+  - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+  - apply Int64.eqm_refl.
+Qed.
+
+Lemma repr_unsigned_sub:
+  forall a b,
+    (Int64.repr (Int64.unsigned (Int64.repr a) - b)) = Int64.repr (a - b).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_sub.
+  - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+  - apply Int64.eqm_refl.
+Qed.
+
+Lemma repr_unsigned_add:
+  forall a b,
+    (Int64.repr (Int64.unsigned (Int64.repr a) + b)) = Int64.repr (a + b).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_add.
+  - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+  - apply Int64.eqm_refl.
+Qed.
+  
+Lemma step2_div_longu_bigb_correct :
+  forall a b
+           (b_BIG : ((Int64.unsigned b) > smallb_thresh)%Z)
+           (b_NOT_TOO_BIG : ((Int64.unsigned b) <= Int64.max_signed)%Z),
+      step2_div_longu (Vlong a) (Vlong b) = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z.
+Proof.
+  intros.
+  pose proof (Int64.unsigned_range b) as b_RANGE.
+  pose proof (Int64.unsigned_range a) as a_RANGE.
+  change Int64.modulus with 18446744073709551616%Z in *.
+  set (a' := (Int64.unsigned a)) in *.
+  set (b' := (Int64.unsigned b)) in *.
+  assert (IZR (1 + smallb_thresh) <= IZR b' <= 18446744073709551615)%R as b_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  assert(0 <= IZR a' <= 18446744073709551615)%R as a_RANGE'.
+  { split; apply IZR_le; lia.
+  }
+  unfold smallb_thresh in *; cbn in b_RANGE'.
+  assert (0 < b')%Z as b_NOT0 by lia.
+
+  destruct (step2_real_div_longu_bigb_correct a b b_BIG) as (q & C1R & C1E & C1F).
+  fold a' b' in C1E.
+
+  assert ((0 <=? ZnearestE (B2R 53 1024 q))=true)%Z as q_LOW.
+  { apply Zle_imp_le_bool.
+    set (q' :=  B2R 53 1024 q) in *.
+    assert (-32767 / 65536 <= q')%R as LOWROUND.
+    { replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+      gappa.
+    }
+    destruct (Rcase_abs q').
+    { replace (ZnearestE q') with 0%Z. lia.
+      symmetry.
+      apply Znearest_imp.
+      apply Rabs_lt.
+      split; lra.
+    }
+    apply Znearest_lub.
+    lra.
+  }
+  assert ((ZnearestE (B2R 53 1024 q) <=? Int64.max_unsigned)=true)%Z as q_HIGH.
+  { apply Zle_imp_le_bool.
+    change Int64.max_unsigned with (18446744073709551615)%Z.
+    apply Znearest_glb.
+    set (q' :=  B2R 53 1024 q) in *.
+    replace q' with (IZR a' / IZR b' + (q' - IZR a' / IZR b'))%R by ring.
+    gappa.
+  }
+  
+  unfold step2_div_longu, step2_div_longu'.
+  rewrite C1R.
+  cbn.
+  unfold Float.to_longu_ne.
+  rewrite (ZofB_ne_range_correct _ _ q _ _).
+  rewrite C1F.
+  rewrite q_LOW.
+  rewrite q_HIGH.
+  cbn.
+  rewrite normalize_ite.
+  cbn.
+  rewrite <- (function_ite Vlong).
+  f_equal.
+  unfold Int64.lt.
+  set (q' :=  B2R 53 1024 q) in *.
+  fold a'.
+  rewrite Int64.signed_zero.
+  set (q'' :=  (ZnearestE q')) in *.
+  assert ((Rabs (IZR a' / IZR b' - q') < / 2)%R) as HALF.
+  { replace (IZR a' / IZR b' - q')%R with
+      (-(q' - IZR a' / IZR b'))%R by ring.
+    rewrite Rabs_Ropp.
+    lra.
+  }
+  pose proof (find_quotient a' b' b_NOT0 q' HALF) as QUOTIENT.
+  fold q'' in QUOTIENT.
+  cbn zeta in QUOTIENT.
+
+  assert (b' <> 0)%Z as NONZ by lia.
+  pose proof (Zmod_eq_full a' b' NONZ) as MOD.
+  assert (b' > 0)%Z as b_GT0 by lia.
+  pose proof (Z_mod_lt a' b' b_GT0) as MOD_LT.
+  destruct (Z_lt_dec a' (b' * q'')) as [LT | GE].
+  { replace (b' * q'' >? a')%Z with true in QUOTIENT by lia.
+    unfold Int64.sub, Int64.mul.
+    fold a' b'.
+    replace q'' with (1 + a'/b')%Z by lia.
+    rewrite repr_unsigned_mul.
+    rewrite repr_unsigned_sub.
+    
+    replace ((1 + a' / b') * b' - a')%Z with (b' -  (a' - a' / b' * b'))%Z by ring.
+    rewrite <- MOD.
+    rewrite Int64.signed_repr ; cycle 1.
+    { change Int64.max_signed with 9223372036854775807%Z in *.
+      change Int64.min_signed with (-9223372036854775808)%Z in *.
+      lia.
+    }
+    rewrite zlt_true by lia.
+    replace q'' with (1 + (a' / b'))%Z by lia.
+    apply Int64.eqm_samerepr.
+    apply Int64.eqm_trans with (y := ((1 + a' / b') + (-1))%Z).
+    { apply Int64.eqm_add.
+      apply Int64.eqm_sym.
+      apply Int64.eqm_unsigned_repr.
+      rewrite Int64.unsigned_mone.
+      replace (-1)%Z with (0 - 1)%Z by ring.
+      apply Int64.eqm_add.
+      exists 1%Z.
+      lia.
+      apply Int64.eqm_refl.
+    }
+    replace (1 + a' / b' + -1)%Z with (a'/b')%Z by ring.
+    apply Int64.eqm_refl.
+  }
+  replace (b' * q'' >? a')%Z with false in QUOTIENT by lia.
+  rewrite <- QUOTIENT.
+  unfold Int64.sub, Int64.mul.
+  fold a' b'.
+  rewrite repr_unsigned_mul.
+  rewrite repr_unsigned_sub.
+  replace (a' / b' * b' - a')%Z with (- (a' mod b'))%Z by lia.
+  rewrite Int64.signed_repr ; cycle 1.
+  { change Int64.max_signed with 9223372036854775807%Z in *.
+    change Int64.min_signed with (-9223372036854775808)%Z in *.
+    lia.
+  }
+  rewrite zlt_false by lia.
+  reflexivity.
+Qed.
+
+Lemma one_bigb_div :
+  forall a b
+     (b_RANGE : (9223372036854775808 <= b < 18446744073709551616)%Z)
+     (ORDER : (b <= a < 18446744073709551616)%Z),
+  (a / b = 1)%Z.
+Proof.
+  intros.
+  assert (((a - b) / b) = 0)%Z as ZERO.
+  { apply Zdiv_small. lia.
+  }
+  replace a with (1 * b + (a - b))%Z by ring.
+  rewrite Z.div_add_l by lia.
+  rewrite ZERO.
+  ring.
+Qed.
+
+Lemma repr_unsigned_sub2:
+  forall a b,
+    (Int64.repr (a - Int64.unsigned (Int64.repr b))) = Int64.repr (a - b).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_sub.
+  - apply Int64.eqm_refl.
+  - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+Qed.
+
+Lemma repr_unsigned_add2:
+  forall a b,
+    (Int64.repr (a + Int64.unsigned (Int64.repr b))) = Int64.repr (a + b).
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  apply Int64.eqm_add.
+  - apply Int64.eqm_refl.
+  - apply Int64.eqm_sym. apply Int64.eqm_unsigned_repr.
+Qed.
+    
+Lemma twostep_div_longu_mostb_correct :
+    forall a b
+      (b_RANGE : (1 < Int64.unsigned b <= Int64.max_signed)%Z),
+          (twostep_div_longu (Vlong a) (Vlong b)) =
+            (Val.maketotal (Val.divlu (Vlong a) (Vlong b))).
+Proof.
+  intros.
+  destruct (Z_le_gt_dec (Int64.unsigned b) smallb_thresh).
+  { apply twostep_div_longu_smallb_correct. lia. }
+  set (a' := Int64.unsigned a).
+  set (b' := Int64.unsigned b).
+  assert (0 < b')%Z as b_NOT0 by lia.
+  cbn.
+  rewrite Int64.eq_false ; cycle 1.
+  { intro Z. unfold b' in b_NOT0. rewrite Z in b_NOT0.
+    rewrite Int64.unsigned_zero in b_NOT0.
+    lia.
+  }
+  cbn.
+
+  unfold twostep_div_longu.
+  assert (1 < Int64.unsigned b <= mostb_thresh)%Z as MOST_B.
+  { unfold mostb_thresh.
+    change Int64.max_signed with 9223372036854775807%Z in b_RANGE.
+    lia.
+  }
+  destruct (step1_div_longu_correct_mostb a b MOST_B) as
+    (q & step1_REW & step1_RANGE).
+  rewrite step1_REW.
+  cbn.
+  rewrite step2_div_long_bigb_correct; cycle 1.
+  1, 2: lia.
+  f_equal.
+
+  unfold Int64.sub, Int64.mul.
+  rewrite repr_unsigned_sub2.
+  rewrite Int64.signed_repr by lia.
+  unfold Int64.add, Int64.divu.
+  fold a' b'.
+  set (q' := Int64.unsigned q) in *.
+  rewrite repr_unsigned_add2.
+  rewrite <- Z.div_add_l by lia.
+  f_equal. f_equal.
+  ring.
+Qed.
+
+Definition full2_div_longu a b m :=
+  let is_big := Val.cmpl_bool Clt b (Vlong Int64.zero) in
+  let is_one := Val.cmplu_bool (Mem.valid_pointer m) Cle b (Vlong Int64.one) in
+  let is_special := Val.or (Val.of_optbool is_big) (Val.of_optbool is_one) in
+  let if_big := Val.longofintu (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) Cge a b)) in
+  let if_special := Val.select is_big if_big a Tlong in
+  Val.select (Val.cmp_bool Cne is_special (Vint Int.zero))
+             if_special
+             (twostep_div_longu a b) Tlong.
+
+Lemma full2_div_longu_correct :
+  forall a b m
+           (b_NOT0 : (0 < Int64.unsigned b)%Z),
+      full2_div_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr (Int64.unsigned a / Int64.unsigned b))%Z.
+Proof.
+
+  Local Opaque twostep_div_longu.
+  intros.
+  unfold full2_div_longu.
+  cbn.
+  unfold Int64.lt, Int64.ltu.
+  pose proof (Int64.unsigned_range a).
+  pose proof (Int64.unsigned_range b).
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+  rewrite Int64.signed_zero.
+  rewrite Int64.unsigned_one.
+  destruct zlt.
+  { replace  (Val.cmp_bool Cne
+       (Val.or Vtrue
+          (if negb (if zlt 1 b' then true else false) then Vtrue else Vfalse))
+       (Vint Int.zero)) with (Some true) ; cycle 1.
+    { destruct zlt; reflexivity.
+    }
+    cbn.
+    destruct zlt; cbn.
+    { rewrite Zdiv_small by lia.
+      reflexivity.
+    }
+    rewrite one_bigb_div.
+    reflexivity.
+    {
+      change Int64.modulus with 18446744073709551616%Z in *.
+      split. 2: lia.
+      unfold b'.
+      rewrite Int64.unsigned_signed.
+      unfold Int64.lt.
+      rewrite Int64.signed_zero.
+      rewrite zlt_true by lia.
+      pose proof (Int64.signed_range b).
+      change Int64.min_signed with (-9223372036854775808)%Z in *.
+      change Int64.max_signed with (9223372036854775807)%Z in *.
+      change Int64.modulus with 18446744073709551616%Z in *.
+      lia.
+    }
+    change Int64.modulus with 18446744073709551616%Z in *.
+    lia.
+  }
+  destruct zlt; cbn.
+  { change (negb (Int.eq (Int.or Int.zero Int.zero) Int.zero)) with false.
+    cbn.
+    rewrite twostep_div_longu_mostb_correct.
+    {
+      cbn.
+      unfold Int64.eq.
+      fold b'.
+      rewrite Int64.unsigned_zero.
+      rewrite zeq_false by lia.
+      reflexivity.
+    }
+    
+    change Int64.modulus with 18446744073709551616%Z in *.
+    split. lia.
+    rewrite Int64.unsigned_signed.
+    unfold Int64.lt.
+    rewrite Int64.signed_zero.
+    rewrite zlt_false by lia.
+    pose proof (Int64.signed_range b).
+    change Int64.min_signed with (-9223372036854775808)%Z in *.
+    change Int64.max_signed with (9223372036854775807)%Z in *.
+    change Int64.modulus with 18446744073709551616%Z in *.
+    lia.
+  }
+  change (negb (Int.eq (Int.or Int.zero Int.one) Int.zero)) with true.
+  cbn.
+  replace b' with 1%Z by lia.
+  rewrite Z.div_1_r.
+  unfold a'.
+  rewrite Int64.repr_unsigned.
+  reflexivity.
+Qed.
+
+Definition step2_mod_long a b :=
+  let q := step2_div_long' a b in
+  let r := Val.subl a (Val.mull q b) in
+  Val.select (Val.cmpl_bool Clt r (Vlong Int64.zero))
+             (Val.addl r b) r Tlong.
+
+Definition twostep_mod_longu a b :=
+  let q1 := step1_div_longu a b in
+  step2_mod_long (Val.subl a (Val.mull b q1)) b.
+
+Lemma vlong_eq: forall a b, (Vlong a) = (Vlong b) -> a = b.
+Proof.
+  intros a b EQ.
+  congruence.
+Qed.
+
+Lemma move_repr_in_mod :
+  forall a b c,
+    Int64.repr (a - b * c)%Z =
+      Int64.repr (a - b * Int64.unsigned (Int64.repr c))%Z.
+Proof.
+  intros.
+  apply Int64.eqm_samerepr.
+  auto 10 with ints.
+Qed.
+
+Lemma twostep_mod_longu_mostb_correct :
+    forall a b
+      (b_RANGE : (1 < Int64.unsigned b <= Int64.max_signed)%Z),
+          (twostep_mod_longu (Vlong a) (Vlong b)) =
+            (Val.maketotal (Val.modlu (Vlong a) (Vlong b))).
+Proof.
+  intros.
+  Local Transparent twostep_div_longu.
+  pose proof (twostep_div_longu_mostb_correct a b b_RANGE) as div_correct.
+  unfold twostep_div_longu, twostep_mod_longu, step2_div_long, step2_mod_long in *.
+  set (q1 := (step1_div_longu (Vlong a) (Vlong b))) in *.
+  set (q2 := (step2_div_long' (Val.subl (Vlong a) (Val.mull (Vlong b) q1)) (Vlong b))) in *.
+  cbn in div_correct.
+  cbn.
+  unfold Int64.eq in *.
+  change (Int64.unsigned Int64.zero) with 0%Z in *.
+  rewrite zeq_false by lia.
+  rewrite zeq_false in div_correct by lia.
+  cbn in div_correct.
+  cbn.
+  destruct q1 as [  |  | q1l |  |  |  ] ; cbn in *; try discriminate.
+  destruct q2 as [  |  | q2l |  |  |  ] ; cbn in *; try discriminate.
+  rewrite <- (function_ite Vlong).
+  rewrite <- (function_ite Vlong) in div_correct.
+  cbn. cbn in div_correct.
+  unfold Int64.lt, Int64.sub, Int64.mul, Int64.add, Int64.divu, Int64.modu in *.
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+  set (q1' := Int64.unsigned q1l) in *.
+  set (q2' := Int64.unsigned q2l) in *.
+  change (Int64.signed Int64.zero) with 0%Z in *.
+  replace (Int64.repr
+               (Int64.unsigned
+                  (Int64.repr (a' - Int64.unsigned (Int64.repr (b' * q1')))) -
+                  Int64.unsigned (Int64.repr (q2' * b'))))
+    with (Int64.repr (a' - (b' * q1') - (q2' * b')))%Z in * ; cycle 1.
+  {
+    apply Int64.eqm_samerepr.
+    auto 10 with ints.
+  }
+  replace (a' - b' * q1' - q2' * b')%Z with (a' - b' * (q1' + q2'))%Z in * by ring.
+  f_equal.
+  apply vlong_eq in div_correct.
+  rewrite Z.mod_eq by lia.
+  rewrite (move_repr_in_mod a' b' (a' / b'))%Z.
+  rewrite <- div_correct.
+  clear div_correct.
+  rewrite <- (move_repr_in_mod a' b')%Z.
+ 
+  destruct zlt as [NEG | POS].
+  2: reflexivity.
+  rewrite repr_unsigned_add.
+  replace (a' - b' * (q1' + q2') + b')%Z with (a' - b' * (q1' + q2' - 1))%Z by ring.
+  apply Int64.eqm_samerepr.
+  assert (Int64.eqm (Int64.unsigned (Int64.repr (q2' + Int64.unsigned Int64.mone)))
+                    (q2' -1))%Z as EQM.
+  { apply Int64.eqm_trans with (y :=  (q2' + Int64.unsigned Int64.mone)%Z).
+    apply Int64.eqm_sym.
+    apply Int64.eqm_unsigned_repr.
+    apply Int64.eqm_add.
+    apply Int64.eqm_refl.
+    exists (1)%Z.
+    reflexivity.
+  }
+  replace  (q1' + q2' - 1)%Z with (q1' + (q2' - 1))%Z by ring.
+  auto with ints.
+Qed.
+    
+Definition full2_mod_longu a b m :=
+  let is_big := Val.cmpl_bool Clt b (Vlong Int64.zero) in
+  let is_one := Val.cmplu_bool (Mem.valid_pointer m) Cle b (Vlong Int64.one) in
+  let is_special := Val.or (Val.of_optbool is_big) (Val.of_optbool is_one) in
+  let if_big := Val.select (Val.cmplu_bool (Mem.valid_pointer m) Cge a b) (Val.subl a b) a Tlong in
+  let if_special := Val.select is_big if_big (Vlong Int64.zero) Tlong in
+  Val.select (Val.cmp_bool Cne is_special (Vint Int.zero))
+             if_special
+             (twostep_mod_longu a b) Tlong.
+
+Lemma full2_mod_longu_correct :
+  forall a b m
+           (b_NOT0 : (0 < Int64.unsigned b)%Z),
+      full2_mod_longu (Vlong a) (Vlong b) m = Vlong (Int64.repr ((Int64.unsigned a) mod (Int64.unsigned b)))%Z.
+Proof.
+
+  Local Opaque twostep_mod_longu.
+  intros.
+  unfold full2_mod_longu.
+  cbn.
+  unfold Int64.lt, Int64.ltu.
+  pose proof (Int64.unsigned_range a).
+  pose proof (Int64.unsigned_range b).
+  set (a' := Int64.unsigned a) in *.
+  set (b' := Int64.unsigned b) in *.
+  rewrite Int64.signed_zero.
+  rewrite Int64.unsigned_one.
+  destruct zlt.
+  { replace  (Val.cmp_bool Cne
+       (Val.or Vtrue
+          (if negb (if zlt 1 b' then true else false) then Vtrue else Vfalse))
+       (Vint Int.zero)) with (Some true) ; cycle 1.
+    { destruct zlt; reflexivity.
+    }
+    cbn.
+    rewrite Z.mod_eq by lia.
+    
+    destruct zlt; cbn.
+    { rewrite Zdiv_small by lia.
+      replace (a' - b' * 0)%Z with a' by ring.
+      unfold a'.
+      rewrite Int64.repr_unsigned.
+      reflexivity.
+    }
+    rewrite one_bigb_div.
+    { unfold Int64.sub.
+      fold a' b'.
+      repeat f_equal. ring.
+    }
+    {
+      change Int64.modulus with 18446744073709551616%Z in *.
+      split. 2: lia.
+      unfold b'.
+      rewrite Int64.unsigned_signed.
+      unfold Int64.lt.
+      rewrite Int64.signed_zero.
+      rewrite zlt_true by lia.
+      pose proof (Int64.signed_range b).
+      change Int64.min_signed with (-9223372036854775808)%Z in *.
+      change Int64.max_signed with (9223372036854775807)%Z in *.
+      change Int64.modulus with 18446744073709551616%Z in *.
+      lia.
+    }
+    change Int64.modulus with 18446744073709551616%Z in *.
+    lia.
+  }
+  destruct zlt; cbn.
+  { change (negb (Int.eq (Int.or Int.zero Int.zero) Int.zero)) with false.
+    cbn.
+    rewrite twostep_mod_longu_mostb_correct.
+    {
+      cbn.
+      unfold Int64.eq.
+      fold b'.
+      rewrite Int64.unsigned_zero.
+      rewrite zeq_false by lia.
+      reflexivity.
+    }
+    
+    change Int64.modulus with 18446744073709551616%Z in *.
+    split. lia.
+    rewrite Int64.unsigned_signed.
+    unfold Int64.lt.
+    rewrite Int64.signed_zero.
+    rewrite zlt_false by lia.
+    pose proof (Int64.signed_range b).
+    change Int64.min_signed with (-9223372036854775808)%Z in *.
+    change Int64.max_signed with (9223372036854775807)%Z in *.
+    change Int64.modulus with 18446744073709551616%Z in *.
+    lia.
+  }
+  change (negb (Int.eq (Int.or Int.zero Int.one) Int.zero)) with true.
+  cbn.
+  replace b' with 1%Z by lia.
+  rewrite Z.mod_1_r.
+  reflexivity.
+Qed.
+
+Open Scope cminorsel_scope.
+Definition e_invfs a := Eop Oinvfs (a ::: Enil).
+Definition e_float_of_longu a := Eop Ofloatoflongu (a ::: Enil).
+Definition e_float_of_long a := Eop Ofloatoflong (a ::: Enil).
+Definition e_float_of_single a := Eop Ofloatofsingle (a ::: Enil).
+Definition e_single_of_float a := Eop Osingleoffloat (a ::: Enil).
+Definition e_long_of_float_ne a := Eop Olongoffloat_ne (a ::: Enil).
+Definition e_longu_of_float_ne a := Eop Olonguoffloat_ne (a ::: Enil).
+Definition e_mulf a b := Eop Omulf (a ::: b ::: Enil).
+Definition e_float_const c := Eop (Ofloatconst c) Enil.
+Definition e_fmaddf a b c := Eop Ofmaddf (a ::: b ::: c ::: Enil).
+Definition e_fmsubf a b c := Eop Ofmsubf (a ::: b ::: c ::: Enil).
+Definition e_addlimm a b := Eop (Oaddlimm b) (a ::: Enil).
+Definition e_msubl a b c := Eop Omsubl (a ::: b ::: c ::: Enil).
+Definition e_ite ty c vc v1 v2 := Eop (Osel c ty) (v1 ::: v2 ::: vc ::: Enil).
+Definition e_cmplimm c v n := Eop (Ocmp (Ccomplimm c n)) (v ::: Enil).
+Definition e_cmpluimm c v n := Eop (Ocmp (Ccompluimm c n)) (v ::: Enil).
+Definition e_addl a b := Eop Oaddl (a ::: b ::: Enil).
+Definition e_or a b := Eop Oor (a ::: b ::: Enil).
+Definition e_cast32unsigned a := Eop Ocast32unsigned (a ::: Enil).
+Definition e_cmplu c a b := Eop (Ocmp (Ccomplu c)) (a ::: b ::: Enil).
+
+Definition a_var1 := Eletvar (4%nat).
+Definition a_d_var1 := Eletvar (3%nat).
+Definition b_var1 := Eletvar (2%nat).
+Definition b_d_var1 := Eletvar (1%nat).
+Definition binv_d_var1 := Eletvar (0%nat).
+
+Definition e_setup1 a b rest :=
+  Elet a (Elet (e_float_of_longu (Eletvar 0%nat))
+    (Elet (lift (lift b)) (Elet (e_float_of_longu (Eletvar 0%nat))
+      (Elet (e_float_of_single (e_invfs (e_single_of_float (Eletvar 0%nat))))
+      rest)))).                  
+Definition e_step1 := e_longu_of_float_ne (e_mulf a_d_var1 binv_d_var1).
+
+Lemma e_step1_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv (le : letenv)
+         (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)),
+         (eval_expr ge sp cmenv memenv le (e_setup1 expr_a expr_b (e_step1))
+                    (step1_div_longu (Vlong a) (Vlong b))).           
+Proof.
+  intros.
+  unfold e_setup1, step1_div_longu.
+  repeat econstructor.
+  { eassumption. }
+  { cbn. apply eval_lift. apply eval_lift. eassumption. }
+Qed.
+
+Definition e_setup2 a b rest := (e_setup1 a b (Elet e_step1 rest)).
+
+Definition a_var2 := Eletvar (5%nat).
+Definition a_d_var2 := Eletvar (4%nat).
+Definition b_var2 := Eletvar (3%nat).
+Definition b_d_var2 := Eletvar (2%nat).
+Definition binv_d_var2 := Eletvar (1%nat).
+Definition step1_var2 := Eletvar (0%nat).
+
+Definition e_step2 := e_msubl a_var2 b_var2 step1_var2.
+
+Definition e_setup3 a b rest := (e_setup2 a b (Elet e_step2 rest)).
+
+Definition a_var3 := Eletvar (6%nat).
+Definition a_d_var3 := Eletvar (5%nat).
+Definition b_var3 := Eletvar (4%nat).
+Definition b_d_var3 := Eletvar (3%nat).
+Definition binv_d_var3 := Eletvar (2%nat).
+Definition step1_var3 := Eletvar (1%nat).
+Definition step2_var3 := Eletvar (0%nat).
+
+Definition e_step3 :=
+ e_long_of_float_ne
+       (e_mulf (e_float_of_long step2_var3)
+          (e_fmaddf
+             binv_d_var3
+             (e_fmsubf (e_float_const ExtFloat.one)
+                binv_d_var3
+                b_d_var3 )
+             binv_d_var3)).
+
+Lemma e_step3_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv (le : letenv)
+         (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)),
+         (eval_expr ge sp cmenv memenv le (e_setup3 expr_a expr_b (e_step3))
+           (step2_div_long' (Val.subl (Vlong a) (Val.mull (Vlong b) (step1_div_longu (Vlong a) (Vlong b)))) (Vlong b))).
+Proof.
+intros.  
+unfold e_setup2, e_setup1, e_step2, step2_div_long', step2_real_div_long, approx_inv_longu.
+repeat (econstructor + apply eval_lift + eassumption).
+Qed.
+
+Definition e_setup4 a b rest := (e_setup3 a b (Elet e_step3 rest)).
+
+Definition a_var4 := Eletvar (7%nat).
+Definition a_d_var4 := Eletvar (6%nat).
+Definition b_var4 := Eletvar (5%nat).
+Definition b_d_var4 := Eletvar (4%nat).
+Definition binv_d_var4 := Eletvar (3%nat).
+Definition step1_var4 := Eletvar (2%nat).
+Definition step2_var4 := Eletvar (1%nat).
+Definition step3_var4 := Eletvar (0%nat).
+
+Definition e_step4 :=
+  e_ite Tlong (Ccompl0 Clt) (e_msubl step2_var4 step3_var4 b_var4)
+             (e_addlimm step3_var4 Int64.mone) step3_var4.
+
+Lemma e_step4_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv (le : letenv)
+         (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)),
+         (eval_expr ge sp cmenv memenv le (e_setup4 expr_a expr_b (e_step4))
+           (step2_div_long (Val.subl (Vlong a) (Val.mull (Vlong b) (step1_div_longu (Vlong a) (Vlong b)))) (Vlong b))).
+Proof.
+intros.  
+unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu.
+repeat (econstructor + apply eval_lift + eassumption).
+Qed.
+
+Definition e_setup5 a b rest := (e_setup4 a b (Elet e_step4 rest)).
+
+Definition a_var5 := Eletvar (8%nat).
+Definition a_d_var5 := Eletvar (7%nat).
+Definition b_var5 := Eletvar (6%nat).
+Definition b_d_var5 := Eletvar (5%nat).
+Definition binv_d_var5 := Eletvar (4%nat).
+Definition step1_var5 := Eletvar (3%nat).
+Definition step2_var5 := Eletvar (2%nat).
+Definition step3_var5 := Eletvar (1%nat).
+Definition step4_var5 := Eletvar (0%nat).
+
+Definition e_step5 := e_addl step1_var5 step4_var5.
+
+Lemma e_step5_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv (le : letenv)
+         (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)),
+         (eval_expr ge sp cmenv memenv le (e_setup5 expr_a expr_b (e_step5))
+           (twostep_div_longu (Vlong a) (Vlong b))).
+Proof.
+  intros.
+  Local Transparent twostep_div_longu.
+  repeat unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu, twostep_div_longu.
+repeat (econstructor + apply eval_lift + eassumption).
+Qed.
+
+Definition e_setup6 a b rest := (e_setup5 a b (Elet e_step5 rest)).
+
+Definition a_var6 := Eletvar (9%nat).
+Definition a_d_var6 := Eletvar (8%nat).
+Definition b_var6 := Eletvar (7%nat).
+Definition b_d_var6 := Eletvar (6%nat).
+Definition binv_d_var6 := Eletvar (5%nat).
+Definition step1_var6 := Eletvar (4%nat).
+Definition step2_var6 := Eletvar (3%nat).
+Definition step3_var6 := Eletvar (2%nat).
+Definition step4_var6 := Eletvar (1%nat).
+Definition twostep_var6 := Eletvar (0%nat).
+
+Definition e_step6 := e_cmplimm Clt b_var6 Int64.zero.
+
+Definition e_setup7 a b rest := e_setup6 a b (Elet e_step6 rest).
+
+Definition a_var7 := Eletvar (10%nat).
+Definition a_d_var7 := Eletvar (9%nat).
+Definition b_var7 := Eletvar (8%nat).
+Definition b_d_var7 := Eletvar (7%nat).
+Definition binv_d_var7 := Eletvar (6%nat).
+Definition step1_var7 := Eletvar (5%nat).
+Definition step2_var7 := Eletvar (5%nat).
+Definition step3_var7 := Eletvar (3%nat).
+Definition step4_var7 := Eletvar (2%nat).
+Definition twostep_var7 := Eletvar (1%nat).
+Definition is_big_var7 := Eletvar (0%nat).
+
+Definition e_is_one :=  e_cmpluimm Cle b_var7 Int64.one.
+Definition e_is_special := e_or is_big_var7 e_is_one.
+Definition e_if_big := e_cast32unsigned (e_cmplu Cge a_var7 b_var7).
+Definition e_if_special := e_ite Tlong (Ccompu0 Cne) is_big_var7 e_if_big a_var7.
+Definition e_step7 := e_ite Tlong (Ccompu0 Cne) e_is_special e_if_special twostep_var7.
+
+Lemma e_step7_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv (le : letenv)
+         (expr_a : expr) (a : int64) (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (expr_b : expr) (b : int64) (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b)),
+         (eval_expr ge sp cmenv memenv le (e_setup7 expr_a expr_b (e_step7))
+           (full2_div_longu (Vlong a) (Vlong b) memenv)).
+Proof.
+  intros.
+  Local Transparent full2_div_longu.
+  repeat unfold e_setup2, e_setup1, e_step2, step2_div_long, step2_div_long', step2_real_div_long, approx_inv_longu, step1_div_longu, twostep_div_longu, full2_div_longu.
+  repeat (econstructor + apply eval_lift + eassumption).
+  cbn.
+  repeat f_equal.
+  destruct (Int64.lt b Int64.zero); cbn; change (Int.eq Int.one Int.zero) with false; change (Int.eq Int.zero Int.zero) with true; cbn; reflexivity.
+Qed.
+
+Definition fp_divu64 a b := e_setup7 a b e_step7.
+
+Theorem fp_divu64_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int64)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b))
+         (b_nz : (Int64.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divu64 expr_a expr_b)
+            (Vlong (Int64.divu a b)).
+Proof.
+  intros.
+  unfold Int64.divu.
+  rewrite <- full2_div_longu_correct with (m := memenv) by lia.
+  apply e_step7_correct; assumption.
+Qed.
+
+Definition fp_modu64 a b := Elet a (Elet (lift b) (e_msubl (Eletvar 1%nat) (Eletvar 0%nat)
+                                                    (fp_divu64 (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_modu64_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int64)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b))
+         (b_nz : (Int64.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_modu64 expr_a expr_b)
+            (Vlong (Int64.modu a b)).
+Proof.
+  intros.
+  rewrite Int64.modu_divu; cycle 1.
+  { intro Z.
+    subst.
+    rewrite Int64.unsigned_zero in b_nz.
+    lia.
+  }
+  unfold fp_modu64.
+  Local Opaque fp_divu64.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divu64_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int64.mul_commut.
+  reflexivity.
+Qed.
+
+Definition e_is_negl a := Eop (Ocmp (Ccomplimm Clt Int64.zero)) (a ::: Enil).
+Definition e_xorw a b := Eop Oxor (a ::: b ::: Enil).
+Definition e_negl a := Eop Onegl (a ::: Enil).
+Definition e_absl a := Eop (Oabsdifflimm Int64.zero) (a ::: Enil).
+
+Definition fp_divs64 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divu64 (e_absl (Eletvar (1%nat))) (e_absl (Eletvar (0%nat))))
+          (e_ite Tlong (Ccompu0 Cne) (e_xorw (e_is_negl (Eletvar 2%nat))
+                                            (e_is_negl (Eletvar 1%nat)))
+                 (e_negl (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Lemma nonneg_signed_unsigned:
+  forall x (x_NONNEG : (Int64.signed x >= 0)%Z),
+    (Int64.signed x) = (Int64.unsigned x).
+Proof.
+  intros.
+  pose proof (Int64.unsigned_range x).
+  unfold Int64.signed in *.
+  destruct zlt. reflexivity.
+  change Int64.modulus with 18446744073709551616%Z in *.
+  change Int64.half_modulus with 9223372036854775808%Z in *.
+  lia.
+Qed.
+
+Lemma long_min_signed_unsigned :
+  (- Int64.min_signed < Int64.max_unsigned)%Z.
+Proof.
+  reflexivity.
+Qed.
+
+Lemma long_divs_divu :
+  forall a b
+    (b_NOT0 : (Int64.signed b <> 0)%Z),
+    Int64.divs a b = if xorb (Int64.lt a Int64.zero)
+                           (Int64.lt b Int64.zero)
+                   then Int64.neg (Int64.divu (ExtValues.long_abs a)
+                                          (ExtValues.long_abs b))
+                   else Int64.divu (ExtValues.long_abs a) (ExtValues.long_abs b).
+Proof.
+  intros.
+  unfold Int64.divs, Int64.divu, Int64.lt, ExtValues.long_abs.
+  pose proof (Int64.signed_range a) as a_RANGE.
+  pose proof (Int64.signed_range b) as b_RANGE.
+  change (Int64.signed Int64.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int64.signed a)) by lia.
+    rewrite (Int64.unsigned_repr (- Int64.signed a)); cycle 1.
+    { pose proof long_min_signed_unsigned. lia. }
+
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof long_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int64.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int64.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Z.opp_involutive.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof Int64.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int64.signed a)) at 1.
+      rewrite Z.quot_opp_l by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      rewrite Int64.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int64.signed a)) by lia.
+    rewrite (Int64.unsigned_repr (Int64.signed a)); cycle 1.
+    { pose proof Int64.max_signed_unsigned. lia. }
+    destruct zlt.
+    + rewrite (Z.abs_neq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof long_min_signed_unsigned. lia. }
+      rewrite Int64.neg_repr.
+      rewrite <- (Z.opp_involutive (Int64.signed b)) at 1.
+      rewrite Z.quot_opp_r by lia.
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof Int64.max_signed_unsigned. lia. }
+      rewrite Z.quot_div_nonneg by lia.
+      reflexivity.
+Qed.
+
+Lemma nonzero_unsigned_signed :
+  forall b, (Int64.unsigned b > 0 -> Int64.signed b <> 0)%Z.
+Proof.
+  intros b GT EQ.
+  rewrite Int64.unsigned_signed in GT.
+  unfold Int64.lt in GT.
+  rewrite Int64.signed_zero in GT.
+  destruct zlt in GT; lia.
+Qed.
+
+Theorem fp_divs64_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int64)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b))
+         (b_nz : (Int64.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_divs64 expr_a expr_b)
+            (Vlong (Int64.divs a b)).
+Proof.
+  intros.
+  unfold fp_divs64.
+  Local Opaque fp_divu64.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_divu64_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.long_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int64.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int64.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int64.max_signed_unsigned.
+    pose proof long_min_signed_unsigned.
+    pose proof (Int64.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite long_divs_divu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int64.lt, ExtValues.long_abs, ExtValues.long_absdiff, ExtValues.Z_abs_diff.
+  change (Int64.signed Int64.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; destruct zlt; reflexivity.
+Qed.
+
+Lemma long_mods_modu :
+  forall a b
+    (b_NOT0 : (Int64.signed b <> 0)%Z),
+    Int64.mods a b = if Int64.lt a Int64.zero
+                   then Int64.neg (Int64.modu (ExtValues.long_abs a)
+                                          (ExtValues.long_abs b))
+                   else Int64.modu (ExtValues.long_abs a) (ExtValues.long_abs b).
+Proof.
+  intros.
+  unfold Int64.mods, Int64.modu, Int64.lt, ExtValues.long_abs.
+  pose proof (Int64.signed_range a) as a_RANGE.
+  pose proof (Int64.signed_range b) as b_RANGE.
+  change (Int64.signed Int64.zero) with 0%Z.
+  destruct zlt.
+  - cbn. rewrite (Z.abs_neq (Int64.signed a)) by lia.
+    rewrite (Int64.unsigned_repr (- Int64.signed a)); cycle 1.
+    { pose proof long_min_signed_unsigned. lia. }
+
+    destruct (zlt (Int64.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof long_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int64.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite <- (Z.opp_involutive (Int64.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int64.neg_repr.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof Int64.max_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int64.signed a)) at 1.
+      rewrite Z.rem_opp_l by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      rewrite Int64.neg_repr.
+      reflexivity.
+    
+  - cbn. rewrite (Z.abs_eq (Int64.signed a)) by lia.
+    rewrite (Int64.unsigned_repr (Int64.signed a)); cycle 1.
+    { pose proof Int64.max_signed_unsigned. lia. }
+    destruct (zlt (Int64.signed b) 0%Z).
+    + rewrite (Z.abs_neq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof long_min_signed_unsigned. lia. }
+      rewrite <- (Z.opp_involutive (Int64.signed b)) at 1.
+      rewrite Z.rem_opp_r by lia.
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+      
+    + rewrite (Z.abs_eq (Int64.signed b)) by lia.
+      rewrite Int64.unsigned_repr ; cycle 1.
+      { pose proof Int64.max_signed_unsigned. lia. }
+      rewrite Z.rem_mod_nonneg by lia.
+      reflexivity.
+Qed.
+
+Definition fp_mods64z a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_modu64 (e_absl (Eletvar (1%nat))) (e_absl (Eletvar (0%nat))))
+          (e_ite Tlong (Ccompl0 Clt) (Eletvar 2%nat)
+                 (e_negl (Eletvar 0%nat)) (Eletvar 0%nat)))).
+
+Theorem fp_mods64z_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int64)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b))
+         (b_nz : (Int64.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods64z expr_a expr_b)
+            (Vlong (Int64.mods a b)).
+Proof.
+  intros.
+  unfold fp_mods64z.
+  Local Opaque fp_modu64.
+  repeat (econstructor + apply eval_lift + eassumption).
+  apply fp_modu64_correct.
+  all: repeat (econstructor + apply eval_lift + eassumption).
+  { unfold ExtValues.long_absdiff, ExtValues.Z_abs_diff.
+    rewrite Int64.signed_zero. rewrite Z.sub_0_r.
+    rewrite Int64.unsigned_repr.
+    { pose proof (nonzero_unsigned_signed b b_nz).
+      lia.
+    }
+    pose proof Int64.max_signed_unsigned.
+    pose proof long_min_signed_unsigned.
+    pose proof (Int64.signed_range b).
+    lia.
+  }
+  cbn.
+  rewrite long_mods_modu ; cycle 1.
+  { apply nonzero_unsigned_signed. assumption. }
+  unfold Int64.lt, ExtValues.long_abs, ExtValues.long_absdiff, ExtValues.Z_abs_diff.
+  change (Int64.signed Int64.zero) with 0%Z.
+  repeat rewrite Z.sub_0_r.
+  destruct zlt; reflexivity. 
+Qed.
+
+Definition fp_mods64 a b :=
+  Elet a (Elet (lift b)
+    (Elet (fp_divs64 (Eletvar (1%nat)) (Eletvar (0%nat)))
+          (e_msubl (Eletvar 2%nat) (Eletvar 1%nat) (Eletvar 0%nat)))).
+
+Theorem fp_mods64_correct :
+  forall (ge : genv)  (sp: val) cmenv memenv
+         (le : letenv) (expr_a expr_b : expr) (a b : int64)
+         (EVAL_a : eval_expr ge sp cmenv memenv le expr_a (Vlong a))
+         (EVAL_b : eval_expr ge sp cmenv memenv le expr_b (Vlong b))
+         (b_nz : (Int64.unsigned b > 0)%Z),
+  eval_expr ge sp cmenv memenv le (fp_mods64 expr_a expr_b)
+            (Vlong (Int64.mods a b)).
+Proof.
+  intros.
+  rewrite Int64.mods_divs.
+  unfold fp_mods64.
+  Local Opaque fp_divs64.
+  repeat (econstructor + apply eval_lift + eassumption).
+  { apply fp_divs64_correct;
+    repeat (econstructor + apply eval_lift + eassumption).
+  }
+  cbn.
+  rewrite Int64.mul_commut.
+  reflexivity.
+Qed.
diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v
index cc86fc88..5a3b7190 100644
--- a/kvx/NeedOp.v
+++ b/kvx/NeedOp.v
@@ -131,6 +131,8 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv)
   | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
   | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
+  | Ointofsingle_ne | Ointuofsingle_ne => op1 (default nv)
+  | Olongoffloat_ne | Olonguoffloat_ne => op1 (default nv)
   | Ocmp c => needs_of_condition c
   | Oextfz _ _ | Oextfs _ _  | Oextfzl _ _ | Oextfsl _ _ => op1 (default nv)
   | Oinsf _ _ | Oinsfl _ _ => op2 (default nv)
diff --git a/kvx/Op.v b/kvx/Op.v
index 86db0c0a..d726afde 100644
--- a/kvx/Op.v
+++ b/kvx/Op.v
@@ -209,6 +209,11 @@ Inductive operation : Type :=
   | Olonguofsingle           (**r [rd = unsigned_long_of_float32(r1)] *)
   | Osingleoflong            (**r [rd = float32_of_signed_long(r1)] *)
   | Osingleoflongu           (**r [rd = float32_of_unsigned_int(r1)] *)
+  | Ointofsingle_ne          (**r [rd = signed_int_of_float64(r1)] *)
+  | Ointuofsingle_ne         (**r [rd = unsigned_int_of_float64(r1)] *)
+  | Olongoffloat_ne          (**r [rd = signed_long_of_float64(r1)] *)
+  | Olonguoffloat_ne         (**r [rd = unsigned_long_of_float64(r1)] *)
+
 (*c Boolean tests: *)
   | Ocmp (cond: condition)   (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
   | Oextfz (stop : Z) (start : Z)
@@ -470,6 +475,11 @@ Definition eval_operation
   | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
   | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
   | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
+  | Ointofsingle_ne, v1::nil => Some (Val.maketotal (Val.intofsingle_ne v1))
+  | Ointuofsingle_ne, v1::nil => Some (Val.maketotal (Val.intuofsingle_ne v1))
+  | Olongoffloat_ne, v1::nil => Some (Val.maketotal (Val.longoffloat_ne v1))
+  | Olonguoffloat_ne, v1::nil => Some (Val.maketotal (Val.longuoffloat_ne v1))
+                                    
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | (Oextfz stop start), v0::nil => Some (extfz stop start v0)
   | (Oextfs stop start), v0::nil => Some (extfs stop start v0)
@@ -691,6 +701,12 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Olonguofsingle => (Tsingle :: nil, Tlong)
   | Osingleoflong => (Tlong :: nil, Tsingle)
   | Osingleoflongu => (Tlong :: nil, Tsingle)
+
+  | Ointofsingle_ne => (Tsingle :: nil, Tint)
+  | Ointuofsingle_ne => (Tsingle :: nil, Tint)
+  | Olongoffloat_ne => (Tfloat :: nil, Tlong)
+  | Olonguoffloat_ne => (Tfloat :: nil, Tlong)
+                        
   | Ocmp c => (type_of_condition c, Tint)                
   | Oextfz _ _ | Oextfs _ _ => (Tint :: nil, Tint)
   | Oextfzl _ _ | Oextfsl _ _ => (Tlong :: nil, Tlong)
@@ -992,6 +1008,12 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   (* singleoflong, singleoflongu *)
   - destruct v0; cbn...
   - destruct v0; cbn...
+  (* intofsingle_ne, intuofsingle_ne *)
+  - destruct v0; cbn... destruct (Float32.to_int_ne f); cbn; trivial.
+  - destruct v0; cbn... destruct (Float32.to_intu_ne f); cbn; trivial.
+  (* longoffloat_ne, longuoffloat_ne *)
+  - destruct v0; cbn... destruct (Float.to_long_ne f); cbn; trivial.
+  - destruct v0; cbn... destruct (Float.to_longu_ne f); cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m)... destruct b...
  (* extfz *)
@@ -1686,6 +1708,14 @@ Proof.
   (* singleoflong, singleoflongu *)
   - inv H4; cbn; auto.
   - inv H4; cbn; auto.
+
+  (* intofsingle_ne, intuofsingle_ne *)
+  - inv H4; cbn; auto. destruct (Float32.to_int_ne f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float32.to_intu_ne f0); cbn; auto.
+  (* longoffloat_ne, longuoffloat_ne *)
+  - inv H4; cbn; auto. destruct (Float.to_long_ne f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float.to_longu_ne f0); cbn; auto.
+    
   (* cmp *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml
index 3eb0b95f..c729c72d 100644
--- a/kvx/PostpassSchedulingOracle.ml
+++ b/kvx/PostpassSchedulingOracle.ml
@@ -88,10 +88,10 @@ let arith_rr_real = function
   | Pfloatuwrnsz ->     Floatuwz
   | Pfloatudrnsz ->     Floatudz
   | Pfloatdrnsz ->      Floatdz
-  | Pfixedwrzz  ->       Fixedw
-  | Pfixeduwrzz ->      Fixeduw
-  | Pfixeddrzz -> Fixedd
-  | Pfixedudrzz -> Fixedud
+  | Pfixedwrzz | Pfixedwrne ->       Fixedw
+  | Pfixeduwrzz | Pfixeduwrne ->      Fixeduw
+  | Pfixeddrzz | Pfixeddrne -> Fixedd
+  | Pfixedudrzz | Pfixedudrne -> Fixedud
   | Pfixeddrzz_i32 ->   Fixedd
   | Pfixedudrzz_i32 ->  Fixedud
 
diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp
index f243089d..5225a71c 100644
--- a/kvx/SelectOp.vp
+++ b/kvx/SelectOp.vp
@@ -760,6 +760,8 @@ Definition gen_absl args :=
   | _ => None
   end.
 
+Require FPDivision32 FPDivision64.
+
 Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
   match b with
   | BI_fmin => Some (Eop Ominf args)
@@ -768,6 +770,40 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
   | BI_fmaxf => Some (Eop Omaxfs args)
   | BI_fma => gen_fma args
   | BI_fmaf => gen_fmaf args
+  | BI_lround_ne => Some (Eop Olongoffloat_ne args)
+  | BI_luround_ne => Some (Eop Olonguoffloat_ne args)
+  | BI_fp_udiv32 => (match args with
+                     | a:::b:::Enil => Some (FPDivision32.fp_divu32 a b)
+                     | _ => None
+                     end)
+  | BI_fp_udiv64 => (match args with
+                     | a:::b:::Enil => Some (FPDivision64.fp_divu64 a b)
+                     | _ => None
+                     end)
+  | BI_fp_umod32 => (match args with
+                     | a:::b:::Enil => Some (FPDivision32.fp_modu32 a b)
+                     | _ => None
+                     end)
+  | BI_fp_umod64 => (match args with
+                     | a:::b:::Enil => Some (FPDivision64.fp_modu64 a b)
+                     | _ => None
+                     end)
+  | BI_fp_sdiv32 => (match args with
+                     | a:::b:::Enil => Some (FPDivision32.fp_divs32 a b)
+                     | _ => None
+                     end)
+  | BI_fp_sdiv64 => (match args with
+                     | a:::b:::Enil => Some (FPDivision64.fp_divs64 a b)
+                     | _ => None
+                     end)
+  | BI_fp_smod32 => (match args with
+                     | a:::b:::Enil => Some (FPDivision32.fp_mods32 a b)
+                     | _ => None
+                     end)
+  | BI_fp_smod64 => (match args with
+                     | a:::b:::Enil => Some (FPDivision64.fp_mods64 a b)
+                     | _ => None
+                     end)
   | BI_abs => gen_abs args
   | BI_absl => gen_absl args
   end.
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
index 4ddf6ece..19393091 100644
--- a/kvx/SelectOpproof.v
+++ b/kvx/SelectOpproof.v
@@ -1935,6 +1935,8 @@ Proof.
   constructor.
 Qed.
 
+Require FPDivision32.
+
 Theorem eval_platform_builtin:
   forall bf al a vl v le,
   platform_builtin bf al = Some a ->
@@ -1948,6 +1950,190 @@ Proof.
        repeat (try econstructor; try eassumption)).
   - apply eval_fma; assumption.
   - apply eval_fmaf; assumption.
+  - cbn in *;
+      destruct vl; trivial. destruct vl; trivial.
+      destruct v0; try discriminate;
+      cbn; rewrite H0; reflexivity.
+  - cbn in *;
+      destruct vl; trivial. destruct vl; trivial.
+      destruct v0; try discriminate;
+        cbn; rewrite H0; reflexivity.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int.eq in EVAL.
+    change (Int.unsigned Int.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vint (Int.divu i i0)). split.
+    {
+      apply FPDivision32.fp_divu32_correct; auto.
+      pose proof (Int.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int64.eq in EVAL.
+    change (Int64.unsigned Int64.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vlong (Int64.divu i i0)). split.
+    {
+      apply FPDivision64.fp_divu64_correct; auto.
+      pose proof (Int64.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int.eq in EVAL.
+    change (Int.unsigned Int.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vint (Int.modu i i0)). split.
+    {
+      apply FPDivision32.fp_modu32_correct; auto.
+      pose proof (Int.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int64.eq in EVAL.
+    change (Int64.unsigned Int64.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vlong (Int64.modu i i0)). split.
+    {
+      apply FPDivision64.fp_modu64_correct; auto.
+      pose proof (Int64.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int.eq in EVAL.
+    change (Int.unsigned Int.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vint (Int.divs i i0)). split.
+    {
+      apply FPDivision32.fp_divs32_correct; auto.
+      pose proof (Int.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int64.eq in EVAL.
+    change (Int64.unsigned Int64.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vlong (Int64.divs i i0)). split.
+    {
+      apply FPDivision64.fp_divs64_correct; auto.
+      pose proof (Int64.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int.eq in EVAL.
+    change (Int.unsigned Int.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vint (Int.mods i i0)). split.
+    {
+      apply FPDivision32.fp_mods32_correct; auto.
+      pose proof (Int.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
+  - cbn in *.
+    intro EVAL.
+    inv EVAL. discriminate.
+    inv H0. discriminate.
+    inv H2. 2: discriminate.
+    inv Heval.
+    intro EVAL.
+    destruct v1; try discriminate.
+    destruct v0; try discriminate.
+    unfold Int64.eq in EVAL.
+    change (Int64.unsigned Int64.zero) with 0 in EVAL.
+    unfold zeq in EVAL.
+    destruct (Z.eq_dec (Int64.unsigned i0) 0) as [EQ | NEQ].
+    { discriminate. }
+    exists (Vlong (Int64.mods i i0)). split.
+    {
+      apply FPDivision64.fp_mods64_correct; auto.
+      pose proof (Int64.unsigned_range i0).
+      lia.
+    }
+    inv EVAL.
+    constructor.
   - apply eval_abs; assumption.
   - apply eval_absl; assumption.
 Qed.
diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml
index 8a311dbb..88143bfa 100644
--- a/kvx/TargetPrinter.ml
+++ b/kvx/TargetPrinter.ml
@@ -586,6 +586,14 @@ module Target (*: TARGET*) =
          fprintf oc "	fixedd.rz	%a = %a, 0\n" ireg rd ireg rs
       | Pfixedudrzz(rd, rs) | Pfixedudrzz_i32(rd, rs) ->
          fprintf oc "	fixedud.rz	%a = %a, 0\n" ireg rd ireg rs
+      | Pfixedudrne(rd, rs) ->
+         fprintf oc "	fixedud.rn	%a = %a, 0\n" ireg rd ireg rs
+      | Pfixeddrne(rd, rs) ->
+         fprintf oc "	fixedd.rn	%a = %a, 0\n" ireg rd ireg rs
+      | Pfixeduwrne(rd, rs) ->
+         fprintf oc "	fixeduw.rn	%a = %a, 0\n" ireg rd ireg rs
+      | Pfixedwrne(rd, rs) ->
+         fprintf oc "	fixedw.rn	%a = %a, 0\n" ireg rd ireg rs
 
       (* Arith RI32 instructions *)
       | Pmake (rd, imm) ->
diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v
index ddfe94f3..52658b43 100644
--- a/kvx/ValueAOp.v
+++ b/kvx/ValueAOp.v
@@ -304,6 +304,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
+  | Ointofsingle_ne, v1::nil => intofsingle_ne_total v1
+  | Ointuofsingle_ne, v1::nil => intuofsingle_ne_total v1
+  | Olongoffloat_ne, v1::nil => longoffloat_ne_total v1
+  | Olonguoffloat_ne, v1::nil => longuoffloat_ne_total v1
   | Ocmp c, _ => of_optbool (eval_static_condition c vl)
   | (Oextfz stop start), v0::nil => eval_static_extfz stop start v0
   | (Oextfs stop start), v0::nil => eval_static_extfs stop start v0
diff --git a/lib/Floats.v b/lib/Floats.v
index 9ee5302d..b12c6585 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -360,6 +360,15 @@ Definition to_long (f:float): option int64 := (**r conversion to signed 64-bit i
 Definition to_longu (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
   option_map Int64.repr (ZofB_range _ _ f 0 Int64.max_unsigned).
 
+Definition to_int_ne (f:float): option int := (**r conversion to signed 32-bit int *)
+  option_map Int.repr (ZofB_ne_range _ _ f Int.min_signed Int.max_signed).
+Definition to_intu_ne (f:float): option int := (**r conversion to unsigned 32-bit int *)
+  option_map Int.repr (ZofB_ne_range _ _ f 0 Int.max_unsigned).
+Definition to_long_ne (f:float): option int64 := (**r conversion to signed 64-bit int *)
+  option_map Int64.repr (ZofB_ne_range _ _ f Int64.min_signed Int64.max_signed).
+Definition to_longu_ne (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
+  option_map Int64.repr (ZofB_ne_range _ _ f 0 Int64.max_unsigned).
+
 Definition of_int (n:int): float := (**r conversion from signed 32-bit int *)
   BofZ 53 1024 __ __ (Int.signed n).
 Definition of_intu (n:int): float:= (**r conversion from unsigned 32-bit int *)
@@ -1136,6 +1145,15 @@ Definition to_long (f:float32): option int64 := (**r conversion to signed 64-bit
 Definition to_longu (f:float32): option int64 := (**r conversion to unsigned 64-bit int *)
   option_map Int64.repr (ZofB_range _ _ f 0 Int64.max_unsigned).
 
+Definition to_int_ne (f:float32): option int := (**r conversion to signed 32-bit int *)
+  option_map Int.repr (ZofB_ne_range _ _ f Int.min_signed Int.max_signed).
+Definition to_intu_ne (f:float32): option int := (**r conversion to unsigned 32-bit int *)
+  option_map Int.repr (ZofB_ne_range _ _ f 0 Int.max_unsigned).
+Definition to_long_ne (f:float32): option int64 := (**r conversion to signed 64-bit int *)
+  option_map Int64.repr (ZofB_ne_range _ _ f Int64.min_signed Int64.max_signed).
+Definition to_longu_ne (f:float32): option int64 := (**r conversion to unsigned 64-bit int *)
+  option_map Int64.repr (ZofB_ne_range _ _ f 0 Int64.max_unsigned).
+
 Definition of_int (n:int): float32 := (**r conversion from signed 32-bit int to single-precision float *)
   BofZ 24 128 __ __ (Int.signed n).
 Definition of_intu (n:int): float32 := (**r conversion from unsigned 32-bit int to single-precision float *)
diff --git a/lib/IEEE754_extra.v b/lib/IEEE754_extra.v
index b0d1944e..35feb29d 100644
--- a/lib/IEEE754_extra.v
+++ b/lib/IEEE754_extra.v
@@ -24,8 +24,206 @@ Require Import Psatz.
 Require Import Bool.
 Require Import Eqdep_dec.
 
+Require Import Coq.Logic.FunctionalExtensionality.
+
 Local Open Scope Z_scope.
 
+
+Lemma Znearest_lub :
+  forall choice (n : Z) (x : R), (IZR n <= x)%R -> (n <= Znearest choice x)%Z.
+Proof.
+  intros until x. intro BND.
+  pose proof (Zfloor_lub n x BND).
+  pose proof (Znearest_ge_floor choice x).
+  lia.
+Qed.
+
+Lemma Znearest_glb :
+  forall choice (n : Z) (x : R), (x <= IZR n)%R -> (Znearest choice x <= n)%Z.
+Proof.
+  intros until x. intro BND.
+  pose proof (Zceil_glb n x BND).
+  pose proof (Znearest_le_ceil choice x).
+  lia.
+Qed.
+
+Lemma Znearest_IZR :
+  forall choice n, (Znearest choice (IZR n)) = n.
+Proof.
+  intros.
+  unfold Znearest.
+  case Rcompare_spec ; intro ORDER.
+  - apply Zfloor_IZR.
+  - destruct choice.
+    + apply Zceil_IZR.
+    + apply Zfloor_IZR.
+  - apply Zceil_IZR.
+Qed.
+
+Lemma ZnearestE_IZR:
+  forall n, (ZnearestE (IZR n)) = n.
+Proof.
+  apply Znearest_IZR.
+Qed.
+
+Lemma Zfloor_opp :
+  forall x : R, (Zfloor (- x)) = - (Zceil x).
+Proof.
+  unfold Zceil, Zfloor.
+  intro x.
+  rewrite Z.opp_involutive.
+  reflexivity.
+Qed.
+
+Lemma Zceil_opp :
+  forall x : R, (Zceil (- x)) = - (Zfloor x).
+Proof.
+  unfold Zceil, Zfloor.
+  intro x.
+  rewrite Ropp_involutive.
+  reflexivity.
+Qed.
+
+Lemma ZnearestE_opp
+     : forall x : R, ZnearestE (- x) = - ZnearestE x.
+Proof.
+  intro.
+  rewrite Znearest_opp.
+  f_equal.
+  f_equal.
+  apply functional_extensionality.
+  intro.
+  rewrite Z.even_opp.
+  fold (Z.succ x0).
+  rewrite Z.even_succ.
+  f_equal.
+  apply Z.negb_odd.
+Qed.
+
+Lemma Zceil_non_floor:
+  forall x : R, (x > IZR(Zfloor x))%R -> Zceil x = Z.succ(Zfloor x).
+Proof.
+  intros x BETWEEN.
+  unfold Z.succ.
+  apply Zceil_imp.
+  split.
+  { rewrite minus_IZR.
+    rewrite plus_IZR.
+    lra.
+  }
+  rewrite plus_IZR.
+  pose proof (Zfloor_ub x).
+  lra.
+Qed.
+
+(** more complicated way of proving
+Lemma Zceil_non_ceil:
+  forall x : R, (x < IZR(Zceil x))%R -> Zceil x = Z.succ(Zfloor x).
+Proof.
+  intros x BETWEEN.
+  unfold Z.succ.
+  cut (Zfloor x = (Zceil x) - 1). { intros; lia. }
+  apply Zfloor_imp.
+  split.
+  { rewrite minus_IZR.
+    pose proof (Zceil_lb x).
+    lra.
+  }
+  rewrite plus_IZR.
+  rewrite minus_IZR.
+  lra.
+Qed.    
+
+Lemma ZnearestE_opp
+     : forall x : R, ZnearestE (- x) = - ZnearestE x.
+Proof.
+  intro x.
+  unfold ZnearestE.
+  case (Rcompare_spec (x - IZR (Zfloor x)) (/ 2)); intro CMP.
+  - pose proof (Zfloor_lb x) as LB.
+    destruct (Rcompare_spec x (IZR (Zfloor x))) as [ ABSURD | EXACT | INEXACT].
+    lra.
+    { set (n := Zfloor x) in *.
+      rewrite EXACT.
+      rewrite <- opp_IZR.
+      rewrite Zfloor_IZR.
+      rewrite opp_IZR.
+      rewrite Rcompare_Lt by lra.
+      reflexivity.
+    }
+    rewrite Rcompare_Gt.
+    { apply Zceil_opp. }
+    rewrite Zfloor_opp.
+    rewrite opp_IZR.
+    rewrite Zceil_non_floor by assumption.
+    unfold Z.succ.
+    rewrite plus_IZR.
+    lra.
+  - rewrite Rcompare_Eq.
+    { rewrite Zceil_opp.
+      rewrite Zfloor_opp.
+      rewrite Z.even_opp.
+      rewrite Zceil_non_floor by lra.
+      rewrite Z.even_succ.
+      rewrite Z.negb_odd.
+      destruct (Z.even (Zfloor x)); reflexivity.
+    }
+    rewrite Zfloor_opp.
+    rewrite opp_IZR.
+    ring_simplify.
+    rewrite Zceil_non_floor by lra.
+    unfold Z.succ.
+    rewrite plus_IZR.
+    lra.
+  - rewrite Rcompare_Lt.
+    { apply Zfloor_opp. }
+    rewrite Zfloor_opp.
+    rewrite opp_IZR.
+    rewrite Zceil_non_floor by lra.
+    unfold Z.succ.
+    rewrite plus_IZR.
+    lra.
+Qed.
+ *)
+
+Lemma Znearest_imp2:
+  forall choice x, (Rabs (IZR (Znearest choice x) - x) <= /2)%R.
+Proof.
+  intros.
+  unfold Znearest.
+  pose proof (Zfloor_lb x) as FL.
+  pose proof (Zceil_ub x) as CU.
+  pose proof (Zceil_non_floor x) as NF.
+  case Rcompare_spec; intro CMP; apply Rabs_le; split; try lra.
+  - destruct choice; lra.
+  - destruct choice. 2: lra.
+    rewrite NF. 2: lra.
+    unfold Z.succ. rewrite plus_IZR. lra.
+  - rewrite NF. 2: lra.
+    unfold Z.succ. rewrite plus_IZR. lra.
+Qed.
+
+Theorem Znearest_le
+  : forall choice (x y : R), (x <= y)%R -> Znearest choice x <= Znearest choice y.
+Proof.
+  intros.
+  destruct (Z_le_gt_dec (Znearest choice x) (Znearest choice y)) as [LE | GT].
+  assumption.
+  exfalso.
+  assert (1 <= IZR (Znearest choice x) - IZR(Znearest choice y))%R as GAP.
+  { rewrite <- minus_IZR.
+    apply IZR_le.
+    lia.
+  }
+  pose proof (Znearest_imp2 choice x) as Rx.
+  pose proof (Znearest_imp2 choice y) as Ry.
+  apply Rabs_le_inv in Rx.
+  apply Rabs_le_inv in Ry.
+  assert (x = y) by lra.
+  subst y.
+  lia.
+Qed.
+
 Section Extra_ops.
 
 (** [prec] is the number of bits of the mantissa including the implicit one.
@@ -879,6 +1077,224 @@ Proof.
   unfold ZofB_range. rewrite D. rewrite Zle_bool_true by lia. rewrite Zle_bool_true by lia. auto.
 Qed.
 
+(** ZofB_ne : convert float to integer, round to nearest *)
+
+Definition Zdiv_ne (a b : Z) :=
+  let q := Z.div a b in
+  let q1 := Z.succ q in
+  match Z.compare (a-b*q) (b*q1-a) with
+  | Lt => q
+  | Gt => q1
+  | Eq => (if Z.even q then q else q1)
+  end.
+
+Definition ZofB_ne (f: binary_float): option Z :=
+  match f with
+    | B754_finite _ _ s m (Zpos e) _ => Some (cond_Zopp s (Zpos m) * Z.pow_pos radix2 e)%Z
+    | B754_finite _ _ s m 0 _ => Some (cond_Zopp s (Zpos m))
+    | B754_finite _ _ s m (Zneg e) _ => Some (cond_Zopp s (Zdiv_ne (Zpos m)  (Z.pow_pos radix2 e)))%Z
+    | B754_zero _ _ _ => Some 0%Z
+    | _ => None
+  end.
+
+Ltac field_simplify_den := field_simplify ; [idtac | lra].
+Ltac Rdiv_lt_0_den := apply Rdiv_lt_0_compat ; [idtac | lra].
+
+Hint Rewrite <- plus_IZR minus_IZR opp_IZR mult_IZR : l_IZR.
+Ltac l_IZR := autorewrite with l_IZR.
+
+Theorem ZofB_ne_correct:
+  forall f,
+    ZofB_ne f = if is_finite _ _ f then Some (ZnearestE (B2R _ _ f)) else None.
+Proof.
+  destruct f as [s|s|s p H|s m e H]; simpl; auto.
+- f_equal. symmetry. apply (ZnearestE_IZR 0).
+- destruct e; f_equal.
+  + unfold F2R; cbn. rewrite Rmult_1_r. rewrite ZnearestE_IZR. auto.
+  + unfold F2R; cbn. rewrite <- mult_IZR. rewrite ZnearestE_IZR. auto.
+  + unfold F2R; cbn. rewrite IZR_cond_Zopp. rewrite <- cond_Ropp_mult_l.
+    assert (EQ: forall x, ZnearestE (cond_Ropp s x) = cond_Zopp s (ZnearestE x)).
+    { intros. destruct s; cbn; auto. apply ZnearestE_opp. }
+    rewrite EQ. f_equal.
+    generalize (Zpower_pos_gt_0 2 p (eq_refl _)); intros.
+    set (p2p := (Z.pow_pos 2 p)) in *.
+    set (zm := Z.pos m) in *.
+    assert (p2p > 0) as POS by lia.
+    assert (0 < IZR p2p)%R as POS2.
+    { apply IZR_lt. assumption. }
+    unfold Zdiv_ne, Z.succ in *.
+    case Z.compare_spec; intro CMP.
+    * pose proof (Z_div_mod_eq_full zm p2p) as DECOMPOSE.
+      destruct (Z_mod_lt zm p2p POS) as [MOD1 MOD2].
+      set (q := zm / p2p) in *.
+      set (r := zm mod p2p) in *.
+      rewrite inbetween_int_NE with (m := q) (l := loc_Inexact Eq).
+      { cbn. unfold cond_incr.
+        destruct Z.even; reflexivity.
+      }
+      constructor.
+      split.
+      ** assert (0 < IZR zm / IZR p2p - IZR q)%R.
+         2: lra.
+         field_simplify_den.
+         Rdiv_lt_0_den.
+         l_IZR.
+         apply IZR_lt.
+         lia.
+      ** assert (0 < IZR (q + 1) - (IZR zm * / IZR p2p))%R.
+         2: lra.
+         field_simplify_den.
+         Rdiv_lt_0_den.
+         l_IZR.
+         apply IZR_lt.
+         lia.
+      ** apply Rcompare_Eq.
+         assert ((IZR q + IZR (q + 1))/2 - (IZR zm * / IZR p2p) = 0)%R; [idtac|lra].
+         field_simplify_den.
+         l_IZR.
+         replace (q * p2p + (q + 1) * p2p - 2 * zm) with 0 by lia.
+         field. apply IZR_neq. lia.
+    * symmetry.
+      apply Znearest_imp with (n := zm / p2p).
+      apply Rabs_lt. split.
+     ** pose proof (Z_mult_div_ge zm p2p POS).
+        assert (0 <= IZR zm * / IZR p2p - IZR (zm / p2p))%R.
+        2: lra.
+        field_simplify_den.
+        apply Rmult_le_pos.
+        { l_IZR.
+          apply IZR_le.
+          lia.
+        }
+        assert (0 < / IZR p2p)%R.
+        2: lra.
+        apply Rinv_0_lt_compat. assumption.
+     ** assert (0 < 2*(IZR p2p * IZR (zm / p2p) - IZR zm) + (IZR p2p))%R as LT.
+        { l_IZR.
+          apply IZR_lt.
+          lia. }
+        assert (0 < -(IZR zm * / IZR p2p - IZR (zm / p2p) - / 2))%R as GT.
+        2: lra.
+        field_simplify_den.
+        Rdiv_lt_0_den.
+        lra.
+    * symmetry.
+      apply Znearest_imp.
+      apply Rabs_lt. split.
+      ** assert (0 < (IZR zm - IZR p2p * IZR (zm / p2p)) - (IZR p2p * (IZR (zm / p2p) + 1) - IZR zm))%R.
+         { ring_simplify.
+           l_IZR.
+           apply IZR_lt.
+           lia.
+         }
+         assert (0 < (/ 2) + IZR zm * / IZR p2p - IZR (zm / p2p + 1))%R.
+         2: lra.
+         field_simplify_den.
+         Rdiv_lt_0_den.
+         rewrite plus_IZR.
+         lra.
+      ** assert (0 < IZR (zm / p2p + 1) - (IZR zm * / IZR p2p))%R.
+         2: lra.
+         field_simplify_den.
+         Rdiv_lt_0_den.
+         l_IZR.
+         apply IZR_lt.
+         pose proof (Z_div_mod_eq_full zm p2p) as DECOMPOSE.
+         ring_simplify.
+         set (q := (zm / p2p)) in *.
+         pose proof (Z_mod_lt zm p2p POS) as MOD.
+         lia.
+Qed.
+
+Theorem ZofB_ne_ball:
+  forall f n, ZofB_ne f = Some n -> (IZR n-1/2 <= B2R _ _ f <= IZR n+1/2)%R.
+Proof.
+  intros. rewrite ZofB_ne_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+  pose proof (Znearest_imp2  (fun x => negb (Z.even x)) (B2R prec emax f)) as ABS.
+  pose proof (Rabs_le_inv _ _ ABS).
+  lra.
+Qed.
+
+(*
+Theorem ZofB_ne_minus:
+  forall minus_nan m f p q,
+  ZofB_ne f = Some p -> 0 <= p < 2*q -> q <= 2^prec -> (IZR q <= B2R _ _ f)%R ->
+  ZofB_ne (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) = Some (p - q).
+Proof.
+  intros.
+  assert (Q: -2^prec <= q <= 2^prec).
+  { split; auto.  generalize (Zpower_ge_0 radix2 prec); simpl; lia. }
+  assert (RANGE: (IZR p -1/2 <= B2R _ _ f <= IZR p + 1/2)%R) by ( apply ZofB_ne_ball; auto ).    
+  rewrite ZofB_ne_correct in H. destruct (is_finite prec emax f) eqn:FIN; try discriminate.
+  assert (PQ2: (IZR p + 1 <= IZR q * 2)%R).
+  { l_IZR. apply IZR_le. lia. }
+  assert (EXACT: round radix2 fexp (round_mode m) (B2R _ _ f - IZR q)%R = (B2R _ _ f - IZR q)%R).
+  { apply round_generic. apply valid_rnd_round_mode.
+    apply sterbenz_aux. now apply FLT_exp_valid. apply FLT_exp_monotone. apply generic_format_B2R.
+    apply integer_representable_n. auto. lra. }
+  destruct (BofZ_exact q Q) as (A & B & C).
+  generalize (Bminus_correct _ _ _ Hmax minus_nan m f (BofZ q) FIN B).
+  rewrite Rlt_bool_true.
+- fold emin; fold fexp. intros (D & E & F).
+  rewrite ZofB_ne_correct. rewrite E. rewrite D. rewrite A. rewrite EXACT.
+  inversion H. f_equal.
+  rewrite ! Ztrunc_floor. apply Zfloor_minus.
+  lra. lra.
+- rewrite A. fold emin; fold fexp. rewrite EXACT.
+  apply Rle_lt_trans with (bpow radix2 prec).
+  apply Rle_trans with (IZR q). apply Rabs_le. lra.
+  rewrite <- IZR_Zpower. apply IZR_le; auto. red in prec_gt_0_; lia.
+  apply bpow_lt. auto.
+Qed.
+ *)
+
+Definition ZofB_ne_range (f: binary_float) (zmin zmax: Z): option Z :=
+  match ZofB_ne f with
+  | None => None
+  | Some z => if Z.leb zmin z && Z.leb z zmax then Some z else None
+  end.
+
+Theorem ZofB_ne_range_correct:
+  forall f min max,
+  let n := ZnearestE (B2R _ _ f) in
+  ZofB_ne_range f min max =
+  if is_finite _ _ f && Z.leb min n && Z.leb n max then Some n else None.
+Proof.
+  intros. unfold ZofB_ne_range. rewrite ZofB_ne_correct. fold n.
+  destruct (is_finite prec emax f); auto.
+Qed.
+
+Lemma ZofB_ne_range_inversion:
+  forall f min max n,
+  ZofB_ne_range f min max = Some n ->
+  min <= n /\ n <= max /\ ZofB_ne f = Some n.
+Proof.
+  intros. rewrite ZofB_ne_range_correct in H. rewrite ZofB_ne_correct.
+  destruct (is_finite prec emax f); try discriminate.
+  set (n1 := ZnearestE (B2R _ _ f)) in *.
+  destruct (min <=? n1) eqn:MIN; try discriminate.
+  destruct (n1 <=? max) eqn:MAX; try discriminate.
+  simpl in H. inversion H. subst n.
+  split. apply Zle_bool_imp_le; auto.
+  split. apply Zle_bool_imp_le; auto.
+  auto.
+Qed.
+
+
+(*
+Theorem ZofB_ne_range_minus:
+  forall minus_nan m f p q,
+  ZofB_ne_range f 0 (2 * q - 1) = Some p -> q <= 2^prec -> (IZR q <= B2R _ _ f)%R ->
+  ZofB_ne_range (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) (-q) (q - 1) = Some (p - q).
+Proof.
+  intros. destruct (ZofB_ne_range_inversion _ _ _ _ H) as (A & B & C).
+  set (f' := Bminus prec emax prec_gt_0_ Hmax minus_nan m f (BofZ q)).
+  assert (D: ZofB_ne f' = Some (p - q)).
+  { apply ZofB_ne_minus. auto. lia. auto. auto. }
+  unfold ZofB_range. rewrite D. rewrite Zle_bool_true by lia. rewrite Zle_bool_true by lia. auto.
+Qed.
+ *)
+
 (** ** Algebraic identities *)
 
 (** Commutativity of addition and multiplication *)
diff --git a/test/monniaux/division/compare_timings.c b/test/monniaux/division/compare_timings.c
new file mode 100644
index 00000000..15195d3e
--- /dev/null
+++ b/test/monniaux/division/compare_timings.c
@@ -0,0 +1,67 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include "cycles.h"
+
+#define CHECKS(mode, quotient) \
+void checks_##mode() {				\
+  uint64_t checksum=UINT64_C(0),		\
+    a=UINT64_C(0x10000000000),			\
+    b=UINT64_C(0x1000);				\
+  for(int i=0; i<10000; i++) {			\
+    uint64_t q = (quotient);			\
+    a += UINT64_C(0x36667);			\
+    b += UINT64_C(0x13);			\
+    checksum += q;				\
+  }						\
+  printf("checksum = %" PRIx64 "\n", checksum);	\
+}
+
+#define CHECKS2(mode, quotient) \
+void checks2_##mode() {				\
+  uint64_t checksum=UINT64_C(0),		\
+    a=UINT64_C(0x10000000000),			\
+    b=UINT64_C(0x1000);				\
+  for(int i=0; i<5000; i++) {			\
+    uint64_t q = (quotient);			\
+    a += UINT64_C(0x36667);			\
+    b += UINT64_C(0x13);			\
+    checksum += q;				\
+    q = (quotient);		 	        \
+    a += UINT64_C(0x36667);			\
+    b += UINT64_C(0x13);			\
+    checksum += q;				\
+  }						\
+  printf("checksum = %" PRIx64 "\n", checksum);	\
+}
+
+CHECKS(normal, a/b)
+CHECKS(fp, __builtin_fp_udiv64(a, b))
+
+CHECKS2(normal, a/b)
+CHECKS2(fp, __builtin_fp_udiv64(a, b))
+       
+int main() {
+  cycle_t start, stop;
+  cycle_count_config();
+
+  start = get_cycle();
+  checks_normal();
+  stop = get_cycle();
+  printf("normal division: %" PRcycle " cycles\n", stop-start);
+
+  start = get_cycle();
+  checks_fp();
+  stop = get_cycle();
+  printf("fp division: %" PRcycle " cycles\n", stop-start);
+
+  start = get_cycle();
+  checks2_normal();
+  stop = get_cycle();
+  printf("normal division x2: %" PRcycle " cycles\n", stop-start);
+
+  start = get_cycle();
+  checks2_fp();
+  stop = get_cycle();
+  printf("fp division x2: %" PRcycle " cycles\n", stop-start);
+}
diff --git a/test/monniaux/division/cycles.h b/test/monniaux/division/cycles.h
new file mode 120000
index 00000000..84e54d21
--- /dev/null
+++ b/test/monniaux/division/cycles.h
@@ -0,0 +1 @@
+../cycles.h
+\ No newline at end of file
author	David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>	2022-03-07 16:21:13 +0100
committer	David Monniaux <David.Monniaux@univ-grenoble-alpes.fr>	2022-03-07 16:21:13 +0100
commit	25e82e849de35eaef24412b468d3a36c72f4fcb6 (patch)
tree	e6abc778dfa37ac5df55c8b0926ed681b9c04f04
parent	ab776cd94e000d07c4d14521a8d0c635d3b8412c (diff)
parent	2d9138547d93c32c0ec5ae54b4afc022f5c434ff (diff)
download	compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.tar.gz compcert-kvx-25e82e849de35eaef24412b468d3a36c72f4fcb6.zip