From 2a0168fea37b68ad14e2cb60bf215111e49d4870 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Wed, 23 Jul 2014 08:54:56 +0000
Subject: Merge of "newspilling" branch: - Support single-precision floats as
 first-class values - Introduce chunks Many32, Many64 and types Tany32, Tany64
 to   support saving and restoring registers without knowing   the exact types
 (int/single/float) of their contents, just   their sizes. - Memory model:
 generalize the opaque encoding of pointers to   apply to any value, not just
 pointers, if chunks Many32/Many64   are selected. - More properties of FP
 arithmetic proved.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2537 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 .depend                        |   23 +-
 Makefile                       |    3 +-
 arm/Archi.v                    |   18 +-
 arm/Asm.v                      |   79 +-
 arm/Asmgen.v                   |   95 +-
 arm/Asmgenproof.v              |    9 +-
 arm/Asmgenproof1.v             |  255 +++--
 arm/ConstpropOp.vp             |   30 +-
 arm/ConstpropOpproof.v         |   68 +-
 arm/Machregs.v                 |   28 +-
 arm/NeedOp.v                   |    9 +-
 arm/Op.v                       |  112 +-
 arm/PrintAsm.ml                |  121 +-
 arm/SelectOp.vp                |   34 +-
 arm/SelectOpproof.v            |   76 ++
 arm/ValueAOp.v                 |   17 +
 arm/eabi/Conventions1.v        |   52 +-
 arm/hardfloat/Conventions1.v   |   97 +-
 backend/Allocation.v           |  116 +-
 backend/Allocproof.v           |  227 ++--
 backend/CMtypecheck.ml         |   27 +-
 backend/CSE.v                  |    3 +-
 backend/CSEproof.v             |    1 -
 backend/Cminor.v               |   62 +-
 backend/Constprop.v            |    1 +
 backend/Constpropproof.v       |    2 +
 backend/IRC.ml                 |   53 +-
 backend/Inliningproof.v        |    1 +
 backend/Lineartyping.v         |  786 +++----------
 backend/Locations.v            |   61 +-
 backend/NeedDomain.v           |  101 +-
 backend/PrintCminor.ml         |   24 +-
 backend/PrintXTL.ml            |    2 +
 backend/RTLtyping.v            |   88 +-
 backend/Regalloc.ml            |   60 +-
 backend/SelectDiv.vp           |   12 +
 backend/SelectDivproof.v       |   16 +
 backend/SelectLong.vp          |    4 +
 backend/SelectLongproof.v      |   28 +
 backend/Selection.v            |   15 +
 backend/Selectionproof.v       |   15 +
 backend/Stacking.v             |    8 +-
 backend/Stackingproof.v        |  111 +-
 backend/ValueAnalysis.v        |    4 +-
 backend/ValueDomain.v          |  317 +++--
 cfrontend/C2C.ml               |   18 +-
 cfrontend/Cexec.v              |   19 +-
 cfrontend/Clight.v             |    6 +-
 cfrontend/Cminorgen.v          |    2 +
 cfrontend/Cminorgenproof.v     |   24 +-
 cfrontend/Cop.v                |  274 +++--
 cfrontend/Csharpminor.v        |    4 +-
 cfrontend/Cshmgen.v            |  110 +-
 cfrontend/Cshmgenproof.v       |  131 ++-
 cfrontend/Ctypes.v             |   21 +-
 cfrontend/Initializers.v       |    4 +-
 cfrontend/Initializersproof.v  |   21 +-
 cfrontend/PrintClight.ml       |    4 +
 cfrontend/PrintCsyntax.ml      |    2 +
 cfrontend/SimplExpr.v          |    3 +
 cfrontend/SimplExprspec.v      |    4 +
 cfrontend/SimplLocals.v        |    5 +-
 cfrontend/SimplLocalsproof.v   |   57 +-
 common/AST.v                   |   68 +-
 common/Events.v                |   15 +-
 common/Globalenvs.v            |    6 +-
 common/Memdata.v               |  529 +++++----
 common/Memory.v                |  325 +++---
 common/Memtype.v               |   20 +-
 common/PrintAST.ml             |    4 +
 common/Values.v                |  139 ++-
 driver/Interp.ml               |    2 +-
 exportclight/ExportClight.ml   |    6 +-
 flocq/Appli/Fappli_IEEE_bits.v |   51 +
 ia32/Archi.v                   |   15 +-
 ia32/Asm.v                     |   99 +-
 ia32/Asmgen.v                  |  101 +-
 ia32/Asmgenproof.v             |   15 +-
 ia32/Asmgenproof1.v            |  213 +++-
 ia32/ConstpropOp.vp            |   14 +-
 ia32/ConstpropOpproof.v        |   56 +-
 ia32/Machregs.v                |   30 +-
 ia32/NeedOp.v                  |   10 +-
 ia32/Op.v                      |  134 ++-
 ia32/PrintAsm.ml               |  103 +-
 ia32/PrintOp.ml                |    2 +
 ia32/SelectOp.vp               |   35 +-
 ia32/SelectOpproof.v           |  102 +-
 ia32/Unusedglob1.ml            |    6 +-
 ia32/ValueAOp.v                |   13 +
 ia32/standard/Conventions1.v   |   74 +-
 lib/Camlcoq.ml                 |    9 +-
 lib/Fappli_IEEE_extra.v        | 1506 ++++++++++++++++++++++++
 lib/Floats.v                   | 2482 ++++++++++++++++------------------------
 powerpc/Archi.v                |   14 +-
 powerpc/Asm.v                  |   60 +-
 powerpc/Asmgen.v               |   79 +-
 powerpc/Asmgenproof.v          |   14 +-
 powerpc/Asmgenproof1.v         |  135 +--
 powerpc/ConstpropOp.vp         |   14 +-
 powerpc/ConstpropOpproof.v     |   56 +-
 powerpc/Machregs.v             |   37 +-
 powerpc/NeedOp.v               |    8 +-
 powerpc/Op.v                   |  137 +--
 powerpc/PrintAsm.ml            |   77 +-
 powerpc/SelectOp.vp            |   29 +-
 powerpc/SelectOpproof.v        |  124 +-
 powerpc/Unusedglob1.ml         |    4 +
 powerpc/ValueAOp.v             |    9 +
 powerpc/eabi/Conventions1.v    |   64 +-
 test/c/fftsp.c                 |   12 +-
 test/regression/NaNs.c         |   48 +-
 test/spass/Makefile.bak        |   13 -
 113 files changed, 6831 insertions(+), 4267 deletions(-)
 create mode 100644 lib/Fappli_IEEE_extra.v
 delete mode 100644 test/spass/Makefile.bak

diff --git a/.depend b/.depend
index 0529e871..5c27a2cb 100644
--- a/.depend
+++ b/.depend
@@ -8,7 +8,8 @@ lib/Ordered.vo lib/Ordered.glob lib/Ordered.v.beautified: lib/Ordered.v lib/Coql
 lib/Iteration.vo lib/Iteration.glob lib/Iteration.v.beautified: lib/Iteration.v lib/Axioms.vo lib/Coqlib.vo lib/Wfsimpl.vo
 lib/Integers.vo lib/Integers.glob lib/Integers.v.beautified: lib/Integers.v lib/Coqlib.vo
 $(ARCH)/Archi.vo $(ARCH)/Archi.glob $(ARCH)/Archi.v.beautified: $(ARCH)/Archi.v flocq/Appli/Fappli_IEEE.vo flocq/Appli/Fappli_IEEE_bits.vo
-lib/Floats.vo lib/Floats.glob lib/Floats.v.beautified: lib/Floats.v lib/Axioms.vo lib/Coqlib.vo lib/Integers.vo flocq/Appli/Fappli_IEEE.vo flocq/Appli/Fappli_IEEE_bits.vo flocq/Core/Fcore.vo flocq/Calc/Fcalc_round.vo flocq/Calc/Fcalc_bracket.vo flocq/Prop/Fprop_Sterbenz.vo $(ARCH)/Archi.vo
+lib/Fappli_IEEE_extra.vo lib/Fappli_IEEE_extra.glob lib/Fappli_IEEE_extra.v.beautified: lib/Fappli_IEEE_extra.v flocq/Core/Fcore.vo flocq/Core/Fcore_digits.vo flocq/Calc/Fcalc_digits.vo flocq/Calc/Fcalc_ops.vo flocq/Calc/Fcalc_round.vo flocq/Calc/Fcalc_bracket.vo flocq/Prop/Fprop_Sterbenz.vo flocq/Appli/Fappli_IEEE.vo flocq/Appli/Fappli_rnd_odd.vo flocq/Appli/Fappli_IEEE_bits.vo
+lib/Floats.vo lib/Floats.glob lib/Floats.v.beautified: lib/Floats.v lib/Coqlib.vo lib/Integers.vo flocq/Appli/Fappli_IEEE.vo flocq/Appli/Fappli_IEEE_bits.vo lib/Fappli_IEEE_extra.vo flocq/Core/Fcore.vo $(ARCH)/Archi.vo
 lib/Parmov.vo lib/Parmov.glob lib/Parmov.v.beautified: lib/Parmov.v lib/Axioms.vo lib/Coqlib.vo
 lib/UnionFind.vo lib/UnionFind.glob lib/UnionFind.v.beautified: lib/UnionFind.v lib/Coqlib.vo
 lib/Wfsimpl.vo lib/Wfsimpl.glob lib/Wfsimpl.v.beautified: lib/Wfsimpl.v lib/Axioms.vo
@@ -29,13 +30,13 @@ common/Switch.vo common/Switch.glob common/Switch.v.beautified: common/Switch.v
 common/Determinism.vo common/Determinism.glob common/Determinism.v.beautified: common/Determinism.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Behaviors.vo
 common/Unityping.vo common/Unityping.glob common/Unityping.v.beautified: common/Unityping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo
 backend/Cminor.vo backend/Cminor.glob backend/Cminor.v.beautified: backend/Cminor.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Events.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo common/Switch.vo
-$(ARCH)/Op.vo $(ARCH)/Op.glob $(ARCH)/Op.v.beautified: $(ARCH)/Op.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo
+$(ARCH)/Op.vo $(ARCH)/Op.glob $(ARCH)/Op.v.beautified: $(ARCH)/Op.v lib/Axioms.vo lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo
 backend/CminorSel.vo backend/CminorSel.glob backend/CminorSel.v.beautified: backend/CminorSel.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Events.vo common/Values.vo common/Memory.vo backend/Cminor.vo $(ARCH)/Op.vo common/Globalenvs.vo common/Switch.vo common/Smallstep.vo
 $(ARCH)/SelectOp.vo $(ARCH)/SelectOp.glob $(ARCH)/SelectOp.v.beautified: $(ARCH)/SelectOp.v lib/Coqlib.vo driver/Compopts.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo
 backend/SelectDiv.vo backend/SelectDiv.glob backend/SelectDiv.v.beautified: backend/SelectDiv.v lib/Coqlib.vo driver/Compopts.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
 backend/SelectLong.vo backend/SelectLong.glob backend/SelectLong.v.beautified: backend/SelectLong.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo common/Errors.vo
 backend/Selection.vo backend/Selection.glob backend/Selection.v.beautified: backend/Selection.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectDiv.vo backend/SelectLong.vo
-$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
+$(ARCH)/SelectOpproof.vo $(ARCH)/SelectOpproof.glob $(ARCH)/SelectOpproof.v.beautified: $(ARCH)/SelectOpproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo
 backend/SelectDivproof.vo backend/SelectDivproof.glob backend/SelectDivproof.v.beautified: backend/SelectDivproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo $(ARCH)/SelectOpproof.vo backend/SelectDiv.vo
 backend/SelectLongproof.vo backend/SelectLongproof.glob backend/SelectLongproof.v.beautified: backend/SelectLongproof.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo $(ARCH)/SelectOpproof.vo backend/SelectLong.vo
 backend/Selectionproof.vo backend/Selectionproof.glob backend/Selectionproof.v.beautified: backend/Selectionproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Cminor.vo $(ARCH)/Op.vo backend/CminorSel.vo $(ARCH)/SelectOp.vo backend/SelectDiv.vo backend/SelectLong.vo backend/Selection.vo $(ARCH)/SelectOpproof.vo backend/SelectDivproof.vo backend/SelectLongproof.vo
@@ -59,20 +60,20 @@ $(ARCH)/ValueAOp.vo $(ARCH)/ValueAOp.glob $(ARCH)/ValueAOp.v.beautified: $(ARCH)
 backend/ValueAnalysis.vo backend/ValueAnalysis.glob backend/ValueAnalysis.v.beautified: backend/ValueAnalysis.v lib/Coqlib.vo lib/Maps.vo driver/Compopts.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo lib/Lattice.vo backend/Kildall.vo backend/Registers.vo $(ARCH)/Op.vo backend/RTL.vo backend/ValueDomain.vo $(ARCH)/ValueAOp.vo backend/Liveness.vo lib/Axioms.vo
 $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob $(ARCH)/ConstpropOp.v.beautified: $(ARCH)/ConstpropOp.v lib/Coqlib.vo driver/Compopts.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo backend/ValueDomain.vo
 backend/Constprop.vo backend/Constprop.glob backend/Constprop.v.beautified: backend/Constprop.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/Liveness.vo backend/ValueDomain.vo $(ARCH)/ValueAOp.vo backend/ValueAnalysis.vo $(ARCH)/ConstpropOp.vo
-$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo driver/Compopts.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/ValueDomain.vo $(ARCH)/ConstpropOp.vo
+$(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob $(ARCH)/ConstpropOpproof.v.beautified: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo driver/Compopts.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/ValueDomain.vo $(ARCH)/ConstpropOp.vo
 backend/Constpropproof.vo backend/Constpropproof.glob backend/Constpropproof.v.beautified: backend/Constpropproof.v lib/Coqlib.vo lib/Maps.vo driver/Compopts.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/ValueDomain.vo $(ARCH)/ValueAOp.vo backend/ValueAnalysis.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo $(ARCH)/ConstpropOpproof.vo
 backend/CSEdomain.vo backend/CSEdomain.glob backend/CSEdomain.v.beautified: backend/CSEdomain.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo
 $(ARCH)/CombineOp.vo $(ARCH)/CombineOp.glob $(ARCH)/CombineOp.v.beautified: $(ARCH)/CombineOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/CSEdomain.vo
 backend/CSE.vo backend/CSE.glob backend/CSE.v.beautified: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/ValueDomain.vo backend/ValueAnalysis.vo backend/CSEdomain.vo backend/Kildall.vo $(ARCH)/CombineOp.vo
-$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/RTL.vo backend/CSEdomain.vo $(ARCH)/CombineOp.vo
+$(ARCH)/CombineOpproof.vo $(ARCH)/CombineOpproof.glob $(ARCH)/CombineOpproof.v.beautified: $(ARCH)/CombineOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/CSEdomain.vo $(ARCH)/CombineOp.vo
 backend/CSEproof.vo backend/CSEproof.glob backend/CSEproof.v.beautified: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/ValueDomain.vo $(ARCH)/ValueAOp.vo backend/ValueAnalysis.vo backend/CSEdomain.vo $(ARCH)/CombineOp.vo $(ARCH)/CombineOpproof.vo backend/CSE.vo
 backend/NeedDomain.vo backend/NeedDomain.glob backend/NeedDomain.v.beautified: backend/NeedDomain.v lib/Coqlib.vo lib/Maps.vo lib/IntvSets.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo lib/Lattice.vo backend/Registers.vo backend/ValueDomain.vo $(ARCH)/Op.vo backend/RTL.vo
 $(ARCH)/NeedOp.vo $(ARCH)/NeedOp.glob $(ARCH)/NeedOp.v.beautified: $(ARCH)/NeedOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/NeedDomain.vo backend/RTL.vo
 backend/Deadcode.vo backend/Deadcode.glob backend/Deadcode.v.beautified: backend/Deadcode.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Memory.vo backend/Registers.vo $(ARCH)/Op.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/ValueDomain.vo backend/ValueAnalysis.vo backend/NeedDomain.vo $(ARCH)/NeedOp.vo
 backend/Deadcodeproof.vo backend/Deadcodeproof.glob backend/Deadcodeproof.v.beautified: backend/Deadcodeproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/IntvSets.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo backend/ValueDomain.vo backend/ValueAnalysis.vo backend/NeedDomain.vo $(ARCH)/NeedOp.vo backend/Deadcode.vo
-$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo
+$(ARCH)/Machregs.vo $(ARCH)/Machregs.glob $(ARCH)/Machregs.v.beautified: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo
 backend/Locations.vo backend/Locations.glob backend/Locations.v.beautified: backend/Locations.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo
-$(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob $(ARCH)/$(VARIANT)/Conventions1.v.beautified: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo backend/Locations.vo
+$(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob $(ARCH)/$(VARIANT)/Conventions1.v.beautified: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo common/Events.vo backend/Locations.vo
 backend/Conventions.vo backend/Conventions.glob backend/Conventions.v.beautified: backend/Conventions.v lib/Coqlib.vo common/AST.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Conventions1.vo
 backend/LTL.vo backend/LTL.glob backend/LTL.v.beautified: backend/LTL.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo
 backend/Allocation.vo backend/Allocation.glob backend/Allocation.v.beautified: backend/Allocation.v lib/FSetAVLplus.vo $(ARCH)/Archi.vo lib/Coqlib.vo lib/Ordered.vo common/Errors.vo lib/Maps.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Memdata.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/Locations.vo backend/Conventions.vo backend/RTLtyping.vo backend/LTL.vo
@@ -80,7 +81,7 @@ backend/Allocproof.vo backend/Allocproof.glob backend/Allocproof.v.beautified: b
 backend/Tunneling.vo backend/Tunneling.glob backend/Tunneling.v.beautified: backend/Tunneling.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo backend/LTL.vo
 backend/Tunnelingproof.vo backend/Tunnelingproof.glob backend/Tunnelingproof.v.beautified: backend/Tunnelingproof.v lib/Coqlib.vo lib/Maps.vo lib/UnionFind.vo common/AST.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Tunneling.vo
 backend/Linear.vo backend/Linear.glob backend/Linear.v.beautified: backend/Linear.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Conventions.vo
-backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo lib/Ordered.vo lib/Maps.vo lib/Iteration.vo common/AST.vo lib/Integers.vo common/Values.vo common/Events.vo $(ARCH)/Op.vo $(ARCH)/Machregs.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Linear.vo common/Globalenvs.vo common/Memory.vo
+backend/Lineartyping.vo backend/Lineartyping.glob backend/Lineartyping.v.beautified: backend/Lineartyping.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Values.vo common/Globalenvs.vo common/Memory.vo common/Events.vo $(ARCH)/Op.vo $(ARCH)/Machregs.vo backend/Locations.vo backend/Conventions.vo backend/LTL.vo backend/Linear.vo
 backend/Linearize.vo backend/Linearize.glob backend/Linearize.v.beautified: backend/Linearize.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo common/AST.vo common/Errors.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Kildall.vo lib/Lattice.vo
 backend/Linearizeproof.vo backend/Linearizeproof.glob backend/Linearizeproof.v.beautified: backend/Linearizeproof.v lib/Coqlib.vo lib/Maps.vo lib/Ordered.vo lib/Lattice.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Errors.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Linearize.vo
 backend/CleanupLabels.vo backend/CleanupLabels.glob backend/CleanupLabels.v.beautified: backend/CleanupLabels.v lib/Coqlib.vo lib/Ordered.vo backend/Linear.vo
@@ -91,10 +92,10 @@ $(ARCH)/$(VARIANT)/Stacklayout.vo $(ARCH)/$(VARIANT)/Stacklayout.glob $(ARCH)/$(
 backend/Stacking.vo backend/Stacking.glob backend/Stacking.v.beautified: backend/Stacking.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo $(ARCH)/Op.vo backend/Locations.vo backend/Linear.vo backend/Bounds.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Lineartyping.vo
 backend/Stackingproof.vo backend/Stackingproof.glob backend/Stackingproof.v.beautified: backend/Stackingproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo common/Values.vo $(ARCH)/Op.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo backend/LTL.vo backend/Linear.vo backend/Lineartyping.vo backend/Mach.vo backend/Bounds.vo backend/Conventions.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Stacking.vo
 $(ARCH)/Asm.vo $(ARCH)/Asm.glob $(ARCH)/Asm.v.beautified: $(ARCH)/Asm.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Conventions.vo
-$(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob $(ARCH)/Asmgen.v.beautified: $(ARCH)/Asmgen.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Memdata.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo
+$(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob $(ARCH)/Asmgen.v.beautified: $(ARCH)/Asmgen.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo
 backend/Asmgenproof0.vo backend/Asmgenproof0.glob backend/Asmgenproof0.v.beautified: backend/Asmgenproof0.v lib/Coqlib.vo lib/Intv.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo
-$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo backend/Conventions.vo
-$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Conventions.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo
+$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob $(ARCH)/Asmgenproof1.v.beautified: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo backend/Asmgenproof0.vo
+$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob $(ARCH)/Asmgenproof.v.beautified: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Asmgenproof0.vo $(ARCH)/Asmgenproof1.vo
 cfrontend/Ctypes.vo cfrontend/Ctypes.glob cfrontend/Ctypes.v.beautified: cfrontend/Ctypes.v lib/Coqlib.vo common/AST.vo common/Errors.vo $(ARCH)/Archi.vo
 cfrontend/Cop.vo cfrontend/Cop.glob cfrontend/Cop.v.beautified: cfrontend/Cop.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo cfrontend/Ctypes.vo
 cfrontend/Csyntax.vo cfrontend/Csyntax.glob cfrontend/Csyntax.v.beautified: cfrontend/Csyntax.v lib/Coqlib.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo cfrontend/Ctypes.vo cfrontend/Cop.vo
diff --git a/Makefile b/Makefile
index 9cad0e83..6bbf114b 100644
--- a/Makefile
+++ b/Makefile
@@ -66,7 +66,8 @@ FLOCQ=\
 # General-purpose libraries (in lib/)
 
 LIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
-  Iteration.v Integers.v Archi.v Floats.v Parmov.v UnionFind.v Wfsimpl.v \
+  Iteration.v Integers.v Archi.v Fappli_IEEE_extra.v Floats.v \
+  Parmov.v UnionFind.v Wfsimpl.v \
   Postorder.v FSetAVLplus.v IntvSets.v
 
 # Parts common to the front-ends and the back-end (in common/)
diff --git a/arm/Archi.v b/arm/Archi.v
index e6935413..5657f31d 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -25,12 +25,24 @@ Definition big_endian := false.
 Notation align_int64 := 8%Z (only parsing).
 Notation align_float64 := 8%Z (only parsing).
 
-Program Definition default_pl : bool * nan_pl 53 := (false, nat_iter 51 xO xH).
+Program Definition default_pl_64 : bool * nan_pl 53 :=
+  (false, nat_iter 51 xO xH).
 
-Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
   (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
       In all other cases, choose first NaN *)
   (Pos.testbit (proj1_sig pl1) 51 &&
    negb (Pos.testbit (proj1_sig pl2) 51))%bool.
 
-Global Opaque big_endian default_pl choose_binop_pl.
+Program Definition default_pl_32 : bool * nan_pl 24 :=
+  (false, nat_iter 22 xO xH).
+
+Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+  (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
+      In all other cases, choose first NaN *)
+  (Pos.testbit (proj1_sig pl1) 22 &&
+   negb (Pos.testbit (proj1_sig pl2) 22))%bool.
+
+Global Opaque big_endian
+              default_pl_64 choose_binop_pl_64
+              default_pl_32 choose_binop_pl_32.
diff --git a/arm/Asm.v b/arm/Asm.v
index 9d3ba5b9..f054db04 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -143,6 +143,7 @@ Inductive instruction : Type :=
   | Pcmp: ireg -> shift_op -> instruction         (**r integer comparison *)
   | Peor: ireg -> ireg -> shift_op -> instruction (**r bitwise exclusive or *)
   | Pldr: ireg -> ireg -> shift_addr -> instruction (**r int32 load *)
+  | Pldr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 load to int register *)
   | Pldrb: ireg -> ireg -> shift_addr -> instruction (**r unsigned int8 load *)
   | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
   | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *)
@@ -155,6 +156,7 @@ Inductive instruction : Type :=
   | Porr: ireg -> ireg -> shift_op -> instruction  (**r bitwise or *)
   | Prsb: ireg -> ireg -> shift_op -> instruction  (**r integer reverse subtraction *)
   | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *)
+  | Pstr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 store from int register *)
   | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *)
   | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *)
   | Psdiv: instruction                              (**r signed division *)
@@ -172,15 +174,31 @@ Inductive instruction : Type :=
   | Pfsubd: freg -> freg -> freg -> instruction     (**r float subtraction *)
   | Pflid: freg -> float -> instruction             (**r load float constant *)
   | Pfcmpd: freg -> freg -> instruction             (**r float comparison *)
-  | Pfcmpzd: freg -> instruction             (**r float comparison with 0.0 *)
+  | Pfcmpzd: freg -> instruction                    (**r float comparison with 0.0 *)
   | Pfsitod: freg -> ireg -> instruction            (**r signed int to float *)
   | Pfuitod: freg -> ireg -> instruction            (**r unsigned int to float *)
   | Pftosizd: ireg -> freg -> instruction           (**r float to signed int *)
   | Pftouizd: ireg -> freg -> instruction           (**r float to unsigned int *)
-  | Pfcvtsd: freg -> freg -> instruction            (**r round to singled precision *)
+  | Pfabss: freg -> freg -> instruction             (**r float absolute value *)
+  | Pfnegs: freg -> freg -> instruction             (**r float opposite *)
+  | Pfadds: freg -> freg -> freg -> instruction     (**r float addition *)
+  | Pfdivs: freg -> freg -> freg -> instruction     (**r float division *)
+  | Pfmuls: freg -> freg -> freg -> instruction     (**r float multiplication *)
+  | Pfsubs: freg -> freg -> freg -> instruction     (**r float subtraction *)
+  | Pflis: freg -> float32 -> instruction           (**r load float constant *)
+  | Pfcmps: freg -> freg -> instruction             (**r float comparison *)
+  | Pfcmpzs: freg -> instruction                    (**r float comparison with 0.0 *)
+  | Pfsitos: freg -> ireg -> instruction            (**r signed int to float *)
+  | Pfuitos: freg -> ireg -> instruction            (**r unsigned int to float *)
+  | Pftosizs: ireg -> freg -> instruction           (**r float to signed int *)
+  | Pftouizs: ireg -> freg -> instruction           (**r float to unsigned int *)
+  | Pfcvtsd: freg -> freg -> instruction            (**r round to single precision *)
+  | Pfcvtds: freg -> freg -> instruction            (**r expand to double precision *)
   | Pfldd: freg -> ireg -> int -> instruction       (**r float64 load *)
+  | Pfldd_a: freg -> ireg -> int -> instruction     (**r any64 load to FP reg *)
   | Pflds: freg -> ireg -> int -> instruction       (**r float32 load *)
   | Pfstd: freg -> ireg -> int -> instruction       (**r float64 store *)
+  | Pfstd_a: freg -> ireg -> int -> instruction     (**r any64 store from FP reg *)
   | Pfsts: freg -> ireg -> int -> instruction       (**r float32 store *)
 
   (* Pseudo-instructions *)
@@ -392,7 +410,7 @@ Definition compare_int (rs: regset) (v1 v2: val) (m: mem) :=
     #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
     #CV <- (Val.sub_overflow v1 v2).
 
-(** Semantics of [fcmpd] instruction:
+(** Semantics of [fcmp] instructions:
 <<
 ==	N=0 Z=1 C=1 V=0
 <	N=1 Z=0 C=0 V=0
@@ -415,6 +433,20 @@ Definition compare_float (rs: regset) (v1 v2: val) :=
         #CV <- Vundef
   end.
 
+Definition compare_float32 (rs: regset) (v1 v2: val) :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 =>
+      rs#CN <- (Val.of_bool (Float32.cmp Clt f1 f2))
+        #CZ <- (Val.of_bool (Float32.cmp Ceq f1 f2))
+        #CC <- (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+        #CV <- (Val.of_bool (negb (Float32.cmp Ceq f1 f2 || Float32.cmp Clt f1 f2 || Float32.cmp Cgt f1 f2)))
+  | _, _ =>
+      rs#CN <- Vundef
+        #CZ <- Vundef
+        #CC <- Vundef
+        #CV <- Vundef
+  end.
+
 (** Testing a condition *)
 
 Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
@@ -523,6 +555,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m
   | Pldr r1 r2 sa => 
       exec_load Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+  | Pldr_a r1 r2 sa => 
+      exec_load Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pldrb r1 r2 sa => 
       exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pldrh r1 r2 sa => 
@@ -551,6 +585,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m
   | Pstr r1 r2 sa => 
       exec_store Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+  | Pstr_a r1 r2 sa => 
+      exec_store Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pstrb r1 r2 sa => 
       exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pstrh r1 r2 sa => 
@@ -606,19 +642,48 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m
   | Pftouizd r1 r2 =>
       Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m
+  | Pfabss r1 r2 => 
+      Next (nextinstr (rs#r1 <- (Val.absfs rs#r2))) m
+  | Pfnegs r1 r2 => 
+      Next (nextinstr (rs#r1 <- (Val.negfs rs#r2))) m
+  | Pfadds r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.addfs rs#r2 rs#r3))) m
+  | Pfdivs r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.divfs rs#r2 rs#r3))) m
+  | Pfmuls r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.mulfs rs#r2 rs#r3))) m
+  | Pfsubs r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.subfs rs#r2 rs#r3))) m
+  | Pflis r1 f =>            
+      Next (nextinstr (rs#r1 <- (Vsingle f))) m
+  | Pfcmps r1 r2 =>              
+      Next (nextinstr (compare_float32 rs rs#r1 rs#r2)) m
+  | Pfcmpzs r1 =>              
+      Next (nextinstr (compare_float32 rs rs#r1 (Vsingle Float32.zero))) m
+  | Pfsitos r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.singleofint rs#r2)))) m
+  | Pfuitos r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.singleofintu rs#r2)))) m
+  | Pftosizs r1 r2 =>
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intofsingle rs#r2)))) m
+  | Pftouizs r1 r2 =>
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuofsingle rs#r2)))) m
   | Pfcvtsd r1 r2 =>
       Next (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
+  | Pfcvtds r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.floatofsingle rs#r2))) m
   | Pfldd r1 r2 n =>
       exec_load Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+  | Pfldd_a r1 r2 n =>
+      exec_load Many64 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pflds r1 r2 n =>
       exec_load Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfstd r1 r2 n =>      
       exec_store Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+  | Pfstd_a r1 r2 n =>      
+      exec_store Many64 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfsts r1 r2 n =>
-      match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with
-      | Next rs' m' => Next (rs'#FR6 <- Vundef) m'
-      | Stuck => Stuck
-      end
+      exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
   (* Pseudo-instructions *)
   | Pallocframe sz pos =>             
       let (m1, stk) := Mem.alloc m 0 sz in
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 2513a5e8..fa4faa60 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -185,6 +185,18 @@ Definition transl_cond
   | Cnotcompfzero cmp, a1 :: nil =>
       do r1 <- freg_of a1;
       OK (Pfcmpzd r1 :: k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmps r1 r2 :: k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmps r1 r2 :: k)
+  | Ccompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmpzs r1 :: k)
+  | Cnotcompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmpzs r1 :: k)
   | _, _ =>
       Error(msg "Asmgen.transl_cond")
   end.
@@ -241,6 +253,10 @@ Definition cond_for_cond (cond: condition) :=
   | Cnotcompf cmp => cond_for_float_not_cmp cmp
   | Ccompfzero cmp => cond_for_float_cmp cmp
   | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
+  | Ccompfs cmp => cond_for_float_cmp cmp
+  | Cnotcompfs cmp => cond_for_float_not_cmp cmp
+  | Ccompfszero cmp => cond_for_float_cmp cmp
+  | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
   end.
 
 (** Translation of the arithmetic operation [r <- op(args)].
@@ -261,6 +277,9 @@ Definition transl_op
   | Ofloatconst f, nil =>
       do r <- freg_of res;
       OK (Pflid r f :: k)
+  | Osingleconst f, nil =>
+      do r <- freg_of res;
+      OK (Pflis r f :: k)
   | Oaddrsymbol s ofs, nil =>
       do r <- ireg_of res;
       OK (Ploadsymbol r s ofs :: k)
@@ -400,9 +419,30 @@ Definition transl_op
   | Odivf, a1 :: a2 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
       OK (Pfdivd r r1 r2 :: k)
+  | Onegfs, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfnegs r r1 :: k)
+  | Oabsfs, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfabss r r1 :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfadds r r1 r2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfsubs r r1 r2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfmuls r r1 r2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfdivs r r1 r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1;
       OK (Pfcvtsd r r1 :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfcvtds r r1 :: k)
   | Ointoffloat, a1 :: nil =>
       do r <- ireg_of res; do r1 <- freg_of a1;
       OK (Pftosizd r r1 :: k)
@@ -415,6 +455,18 @@ Definition transl_op
   | Ofloatofintu, a1 :: nil =>
       do r <- freg_of res; do r1 <- ireg_of a1;
       OK (Pfuitod r r1 :: k)
+  | Ointofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1;
+      OK (Pftosizs r r1 :: k)
+  | Ointuofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1;
+      OK (Pftouizs r r1 :: k)
+  | Osingleofint, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1;
+      OK (Pfsitos r r1 :: k)
+  | Osingleofintu, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1;
+      OK (Pfuitos r r1 :: k)
   | Ocmp cmp, _ =>
       do r <- ireg_of res;
       transl_cond cmp args
@@ -440,31 +492,34 @@ Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) :=
   indexed_memory_access (fun base n => Pldr dst base (SAimm n)) mk_immed_mem_word base ofs k.
 
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of dst; OK (loadind_int base ofs r k)
-  | Tfloat =>
-      do r <- freg_of dst;
-      OK (indexed_memory_access (Pfldd r) mk_immed_mem_float base ofs k)
-  | Tsingle =>
-      do r <- freg_of dst;
+  match ty, preg_of dst with
+  | Tint, IR r =>
+      OK (indexed_memory_access (fun base n => Pldr r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tany32, IR r =>
+      OK (indexed_memory_access (fun base n => Pldr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tsingle, FR r =>
       OK (indexed_memory_access (Pflds r) mk_immed_mem_float base ofs k)
-  | Tlong =>
+  | Tfloat, FR r =>
+      OK (indexed_memory_access (Pfldd r) mk_immed_mem_float base ofs k)
+  | Tany64, FR r =>
+      OK (indexed_memory_access (Pfldd_a r) mk_immed_mem_float base ofs k)
+  | _, _ =>
       Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of src;
+  match ty, preg_of src with
+  | Tint, IR r =>
       OK (indexed_memory_access (fun base n => Pstr r base (SAimm n)) mk_immed_mem_word base ofs k)
-  | Tfloat =>
-      do r <- freg_of src;
-      OK (indexed_memory_access (Pfstd r) mk_immed_mem_float base ofs k)
-  | Tsingle =>
-      do r <- freg_of src;
+  | Tany32, IR r =>
+      OK (indexed_memory_access (fun base n => Pstr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tsingle, FR r =>
       OK (indexed_memory_access (Pfsts r) mk_immed_mem_float base ofs k)
-  | Tlong =>
+  | Tfloat, FR r =>
+      OK (indexed_memory_access (Pfstd r) mk_immed_mem_float base ofs k)
+  | Tany64, FR r =>
+      OK (indexed_memory_access (Pfstd_a r) mk_immed_mem_float base ofs k)
+  | _, _ =>
       Error (msg "Asmgen.storeind")
   end.
 
@@ -546,7 +601,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pflds mk_immed_mem_float dst addr args k
   | Mfloat64 =>
       transl_memory_access_float Pfldd mk_immed_mem_float dst addr args k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_load")
   end.
 
@@ -567,7 +622,7 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pfsts mk_immed_mem_float src addr args k
   | Mfloat64 =>
       transl_memory_access_float Pfstd mk_immed_mem_float src addr args k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index cfe4f542..341f6a01 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -237,16 +237,15 @@ Hint Resolve indexed_memory_access_label.
 Remark loadind_label:
   forall base ofs ty dst k c, loadind base ofs ty dst k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct ty; monadInv H.
-  unfold loadind_int; TailNoLabel.
-  TailNoLabel.
-  TailNoLabel.
+  unfold loadind, loadind_int; intros;
+  destruct ty, (preg_of dst); inv H; TailNoLabel.
 Qed.
 
 Remark storeind_label:
   forall base ofs ty src k c, storeind src base ofs ty k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct ty; monadInv H; TailNoLabel.
+  unfold storeind; intros;
+  destruct ty, (preg_of src); inv H; TailNoLabel.
 Qed.
 
 Remark transl_cond_label:
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 3e002175..a0d67526 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -405,22 +405,6 @@ Proof.
   split. Simpl. intros; Simpl.
 Qed.
 
-(*
-Lemma loadind_float_correct:
-  forall (base: ireg) ofs dst (rs: regset) m v k,
-  Mem.loadv Mfloat64al32 m (Val.add rs#base (Vint ofs)) = Some v ->
-  exists rs',
-     exec_straight ge fn (loadind_float base ofs dst k) rs m k rs' m
-  /\ rs'#dst = v
-  /\ forall r, r <> PC -> r <> IR14 -> r <> dst -> rs'#r = rs#r.
-Proof.
-  intros; unfold loadind_float. apply indexed_memory_access_correct; intros.
-  econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_load. rewrite H0; rewrite H; eauto. auto.
-  split. Simpl. intros; Simpl.
-Qed.
-*)
-
 Lemma loadind_correct:
   forall (base: ireg) ofs ty dst k c (rs: regset) m v,
   loadind base ofs ty dst k = OK c ->
@@ -430,56 +414,32 @@ Lemma loadind_correct:
   /\ rs'#(preg_of dst) = v
   /\ forall r, r <> PC -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  unfold loadind; intros.
-  destruct ty; monadInv H.
+  unfold loadind; intros. destruct ty; destruct (preg_of dst); inv H; simpl in H0.
 - (* int *)
-  erewrite ireg_of_eq by eauto. apply loadind_int_correct; auto. 
+  apply loadind_int_correct; auto. 
 - (* float *)
-  erewrite freg_of_eq by eauto. simpl in H0.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
   split. Simpl. intros; Simpl.
 - (* single *)
-  erewrite freg_of_eq by eauto. simpl in H0.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
   split. Simpl. intros; Simpl.
-Qed.
-
-(** Indexed memory stores. *)
-
-(*
-Lemma storeind_int_correct:
-  forall (base: ireg) ofs (src: ireg) (rs: regset) m m' k,
-  Mem.storev Mint32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
-  src <> IR14 ->
-  exists rs',
-     exec_straight ge fn (storeind_int src base ofs k) rs m k rs' m'
-  /\ forall r, r <> PC -> r <> IR14 -> rs'#r = rs#r.
-Proof.
-  intros; unfold storeind_int. apply indexed_memory_access_correct; intros.
+- (* any32 *)
+  apply indexed_memory_access_correct; intros.
   econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H1. rewrite H2; auto with asmgen. rewrite H; eauto. auto.
-  intros; Simpl.
-Qed.
-
-Lemma storeind_float_correct:
-  forall (base: ireg) ofs (src: freg) (rs: regset) m m' k,
-  Mem.storev Mfloat64al32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
-  exists rs',
-     exec_straight ge fn (storeind_float src base ofs k) rs m k rs' m'
-  /\ forall r, r <> PC -> r <> IR14 -> rs'#r = rs#r.
-Proof.
-  intros; unfold storeind_float. apply indexed_memory_access_correct; intros.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
+- (* any64 *)
+  apply indexed_memory_access_correct; intros.
   econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H0. rewrite H1; auto with asmgen. rewrite H; eauto. auto.
-  intros; Simpl.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
 Qed.
-*)
+
+(** Indexed memory stores. *)
 
 Lemma storeind_correct:
   forall (base: ireg) ofs ty src k c (rs: regset) m m',
@@ -490,29 +450,38 @@ Lemma storeind_correct:
   /\ forall r, r <> PC -> r <> IR14 -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
   unfold storeind; intros.
-  destruct ty; monadInv H; simpl in H0.
+  assert (NOT14: preg_of src <> IR IR14) by eauto with asmgen.
+  destruct ty; destruct (preg_of src); inv H; simpl in H0.
 - (* int *)
-  erewrite ireg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  assert (IR x <> IR IR14) by eauto with asmgen. congruence.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 - (* float *)
-  erewrite freg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 - (* single *)
-  erewrite freg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
+- (* any32 *)
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
+- (* any64 *)
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 Qed.
 
 (** Translation of shift immediates *)
@@ -775,6 +744,112 @@ Proof.
   exfalso; eapply Float.cmp_lt_gt_false; eauto.
 Qed.
 
+Lemma compare_float32_spec:
+  forall rs f1 f2,
+  let rs1 := nextinstr (compare_float32 rs (Vsingle f1) (Vsingle f2)) in
+     rs1#CN = Val.of_bool (Float32.cmp Clt f1 f2)
+  /\ rs1#CZ = Val.of_bool (Float32.cmp Ceq f1 f2)
+  /\ rs1#CC = Val.of_bool (negb (Float32.cmp Clt f1 f2))
+  /\ rs1#CV = Val.of_bool (negb (Float32.cmp Ceq f1 f2 || Float32.cmp Clt f1 f2 || Float32.cmp Cgt f1 f2)).
+Proof.
+  intros. intuition.
+Qed.
+
+Lemma compare_float32_inv:
+  forall rs v1 v2,
+  let rs1 := nextinstr (compare_float32 rs v1 v2) in
+  forall r', data_preg r' = true -> rs1#r' = rs#r'.
+Proof.
+  intros. unfold rs1, compare_float32.
+  assert (nextinstr (rs#CN <- Vundef #CZ <- Vundef #CC <- Vundef #CV <- Vundef) r' = rs r').
+  { repeat Simplif. }
+  destruct v1; destruct v2; auto.
+  repeat Simplif. 
+Qed.
+
+Lemma compare_float32_nextpc:
+  forall rs v1 v2,
+  nextinstr (compare_float32 rs v1 v2) PC = Val.add (rs PC) Vone.
+Proof.
+  intros. unfold compare_float32. destruct v1; destruct v2; reflexivity.
+Qed.
+
+Lemma cond_for_float32_cmp_correct:
+  forall c n1 n2 rs,
+  eval_testcond (cond_for_float_cmp c)
+                (nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))) =
+  Some(Float32.cmp c n1 n2).
+Proof.
+  intros.
+  generalize (compare_float32_spec rs n1 n2). 
+  set (rs' := nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))).
+  intros [A [B [C D]]].
+  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
+  destruct c; simpl.
+(* eq *)
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq n1 n2); auto.
+(* lt *)
+  destruct (Float32.cmp Clt n1 n2); auto.
+(* le *)
+  rewrite Float32.cmp_le_lt_eq. 
+  destruct (Float32.cmp Clt n1 n2); destruct (Float32.cmp Ceq n1 n2); auto.
+(* gt *)
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+  exfalso; eapply Float32.cmp_gt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+(* ge *)
+  rewrite Float32.cmp_ge_gt_eq. 
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+Qed.
+
+Lemma cond_for_float32_not_cmp_correct:
+  forall c n1 n2 rs,
+  eval_testcond (cond_for_float_not_cmp c)
+                (nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2)))=
+  Some(negb(Float32.cmp c n1 n2)).
+Proof.
+  intros.
+  generalize (compare_float32_spec rs n1 n2).
+  set (rs' := nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))).
+  intros [A [B [C D]]].
+  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
+  destruct c; simpl.
+(* eq *)
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq n1 n2); auto.
+(* lt *)
+  destruct (Float32.cmp Clt n1 n2); auto.
+(* le *)
+  rewrite Float32.cmp_le_lt_eq. 
+  destruct (Float32.cmp Clt n1 n2) eqn:LT; destruct (Float32.cmp Ceq n1 n2) eqn:EQ; auto.
+(* gt *)
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+  exfalso; eapply Float32.cmp_gt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+(* ge *)
+  rewrite Float32.cmp_ge_gt_eq. 
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+Qed.
+
 Ltac ArgsInv :=
   repeat (match goal with
   | [ H: Error _ = OK _ |- _ ] => discriminate
@@ -881,6 +956,37 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
 Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
   exact I.
   apply compare_float_inv.
+- (* Ccompfs *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
+  simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+  apply compare_float32_inv.
+- (* Cnotcompfs *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
+  simpl in CMP. inv CMP.
+Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct.
+  exact I.
+  apply compare_float32_inv.
+- (* Ccompfszero *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
+  destruct (rs x); try discriminate.
+  simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+  apply compare_float32_inv.
+- (* Cnotcompfzero *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. simpl in CMP. inv CMP.
+  simpl. apply cond_for_float32_not_cmp_correct.
+  exact I.
+  apply compare_float32_inv.
 Qed.
 
 (** Translation of arithmetic operations. *)
@@ -1039,6 +1145,18 @@ Transparent destroyed_by_op.
   (* floatofintu *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
   intuition Simpl.
+  (* intofsingle *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  simpl. intuition Simpl.
+  (* intuofsingle *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  simpl. intuition Simpl.
+  (* singleofint *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  intuition Simpl.
+  (* singleofintu *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  intuition Simpl.
   (* Ocmp *)
   contradiction.
 Qed.
@@ -1234,6 +1352,8 @@ Proof.
   discriminate.
   eapply transl_load_float_correct; eauto.
   eapply transl_load_float_correct; eauto.
+  discriminate.
+  discriminate.
 Qed.
 
 Lemma transl_store_correct:
@@ -1256,13 +1376,10 @@ Proof.
 - eapply transl_store_int_correct; eauto.
 - eapply transl_store_int_correct; eauto.
 - discriminate.
-- unfold transl_memory_access_float in H. monadInv H. rewrite (freg_of_eq _ _ EQ) in *. 
-  eapply transl_memory_access_correct; eauto. 
-  intros. econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H. rewrite H2; eauto with asmgen. 
-  rewrite H1. eauto. auto. intros. Simpl.
-  simpl; auto.
 - eapply transl_store_float_correct; eauto.
+- eapply transl_store_float_correct; eauto.
+- discriminate.
+- discriminate.
 Qed.
 
 End CONSTRUCTORS.
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index 2b658a4e..4f4bf5aa 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -70,6 +70,22 @@ Nondetfunction cond_strength_reduction
       if Float.eq_dec n2 Float.zero
       then (Cnotcompfzero c, r1 :: nil)
       else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Ccompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Ccompfszero c, r1 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Cnotcompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Cnotcompfszero c, r1 :: nil)
+      else (cond, args)
   | _, _, _ => 
       (cond, args)
   end.
@@ -164,18 +180,19 @@ Definition make_xorimm (n: int) (r: reg) :=
   else (Oxorimm n, r :: nil).
 
 Definition make_mulfimm (n: float) (r r1 r2: reg) :=
-  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
   then (Oaddf, r :: r :: nil)
   else (Omulf, r1 :: r2 :: nil).
 
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
-Definition make_singleoffloat (r: reg) (a: aval) :=
-  if vincl a Fsingle && generate_float_constants tt
-  then (Omove, r :: nil)
-  else (Osingleoffloat, r :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
@@ -207,10 +224,11 @@ Nondetfunction op_strength_reduction
   | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
-  | Osingleoffloat, r1 :: nil, v1 :: nil => make_singleoffloat r1 v1
   | Ocmp c, args, vl => make_cmp c args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index 00ea8bc0..597c9602 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -80,6 +80,10 @@ Ltac SimplVM :=
       let E := fresh in
       assert (E: v = Vfloat n) by (inversion H; auto);
       rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
   | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
       let E := fresh in
       assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); 
@@ -123,6 +127,18 @@ Proof.
 - destruct (Float.eq_dec n2 Float.zero); simpl; auto.
   subst n2; auto.
   rewrite H1; auto. 
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (rs#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto. 
+- destruct (Float32.eq_dec n2 Float32.zero).
+  subst n2. simpl. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (rs#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto. 
+- destruct (Float32.eq_dec n2 Float32.zero); simpl; auto.
+  subst n2; auto.
+  rewrite H1; auto. 
 - auto.
 Qed.
 
@@ -335,7 +351,7 @@ Lemma make_mulfimm_correct:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
   simpl. econstructor; split; eauto. 
@@ -348,13 +364,40 @@ Lemma make_mulfimm_correct_2:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
   rewrite Float.mul_commut; auto. 
   simpl. econstructor; split; eauto. 
 Qed.
 
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float32.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float32.mul2_add; auto. 
+  rewrite Float32.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma make_cast8signed_correct:
   forall r x,
   vmatch bc rs#r x ->
@@ -383,21 +426,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_singleoffloat_correct:
-  forall r x,
-  vmatch bc rs#r x ->
-  let (op, args) := make_singleoffloat r x in
-  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.singleoffloat rs#r) v.
-Proof.
-  intros; unfold make_singleoffloat. 
-  destruct (vincl x Fsingle && generate_float_constants tt) eqn:INCL. 
-  InvBooleans. exists rs#r; split; auto. 
-  assert (V: vmatch bc rs#r Fsingle).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite Float.singleoffloat_of_single by auto. auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -459,14 +487,16 @@ Proof.
   InvApproxRegs; SimplVM. inv H0. apply make_shrimm_correct; auto.
 (* shru *)
   InvApproxRegs; SimplVM. inv H0. apply make_shruimm_correct; auto.
-(* singleoffloat *)
-  InvApproxRegs; SimplVM; inv H0. apply make_singleoffloat_correct; auto.
 (* cmp *)
   inv H0. apply make_cmp_correct; auto.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) rs#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
+(* mulfs *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) rs#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/arm/Machregs.v b/arm/Machregs.v
index b55259be..791ccbb6 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -44,14 +44,10 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | R0 => Tint  | R1 => Tint  | R2 => Tint  | R3 => Tint
-  | R4 => Tint  | R5 => Tint  | R6 => Tint  | R7 => Tint
-  | R8 => Tint  | R9 => Tint  | R10 => Tint | R11 => Tint
-  | R12 => Tint
-  | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat
-  | F4 => Tfloat| F5 => Tfloat  | F6 => Tfloat | F7 => Tfloat
-  | F8 => Tfloat | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat
-  | F12 => Tfloat | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
+  | R0  | R1  | R2  | R3   | R4  | R5  | R6  | R7 
+  | R8  | R9  | R10 | R11  | R12 => Tany32
+  | F0  | F1  | F2  | F3   | F4 | F5   | F6  | F7 
+  | F8  | F9  | F10  | F11 | F12  | F13  | F14  | F15 => Tany64
   end.
 
 Open Scope positive_scope.
@@ -84,18 +80,14 @@ Definition is_stack_reg (r: mreg) : bool := false.
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
   | Odiv | Odivu => R0 :: R1 :: R2 :: R3 :: R12 :: nil
-  | Ointoffloat | Ointuoffloat => F6 :: nil
+  | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle => F6 :: nil
   | _ => nil
   end.
 
 Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
   nil.
 
-Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
-  match chunk with
-  | Mfloat32 => F6 :: nil
-  | _ => nil
-  end.
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
   nil.
@@ -106,16 +98,10 @@ Definition destroyed_by_jumptable: list mreg :=
 Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_memcpy sz al => if zle sz 32 then nil else R2 :: R3 :: R12 :: nil
-  | EF_vstore Mfloat32 => F6 :: nil
-  | EF_vstore_global Mfloat32 _ _ => F6 :: nil
   | _ => nil
   end.
 
-Definition destroyed_by_setstack (ty: typ): list mreg :=
-  match ty with
-  | Tsingle => F6 :: nil
-  | _ => nil
-  end.
+Definition destroyed_by_setstack (ty: typ): list mreg := nil.
 
 Definition destroyed_at_function_entry: list mreg :=
   R12 :: nil.
diff --git a/arm/NeedOp.v b/arm/NeedOp.v
index 3fb0d721..e91ea64d 100644
--- a/arm/NeedOp.v
+++ b/arm/NeedOp.v
@@ -42,6 +42,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Omove => nv::nil
   | Ointconst n => nil
   | Ofloatconst n => nil
+  | Osingleconst n => nil
   | Oaddrsymbol id ofs => nil
   | Oaddrstack ofs => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
@@ -74,8 +75,11 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Oshrximm _ => op1 (default nv)
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
-  | Osingleoffloat => op1 (singleoffloat nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Ofloatofsingle | Osingleoffloat => op1 (default nv)
   | Ointoffloat | Ointuoffloat | Ofloatofint | Ofloatofintu => op1 (default nv)
+  | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
   | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocmp c => needs_of_condition c
@@ -87,7 +91,6 @@ Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   | Ocast16signed => sign_ext_redundant 16 nv
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
-  | Osingleoffloat => singleoffloat_redundant nv
   | _ => false
   end.
 
@@ -180,7 +183,6 @@ Proof.
 - apply notint_sound; auto. 
 - apply notint_sound. apply needs_of_shift_sound; auto.
 - apply needs_of_shift_sound; auto. 
-- apply singleoffloat_sound; auto. 
 Qed.
 
 Lemma operation_is_redundant_sound:
@@ -195,7 +197,6 @@ Proof.
 - apply sign_ext_redundant_sound; auto. omega.
 - apply andimm_redundant_sound; auto. 
 - apply orimm_redundant_sound; auto.
-- apply singleoffloat_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
diff --git a/arm/Op.v b/arm/Op.v
index b50a7b00..e7971f0b 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -57,11 +57,14 @@ Inductive condition : Type :=
   | Ccompushift: comparison -> shift -> condition (**r unsigned integer comparison *)
   | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *)
   | Ccompuimm: comparison -> int -> condition (**r unsigned integer comparison with a constant *)
-  | Ccompf: comparison -> condition     (**r floating-point comparison *)
+  | Ccompf: comparison -> condition     (**r 64-bit floating-point comparison *)
   | Cnotcompf: comparison -> condition (**r negation of a floating-point comparison *)
   | Ccompfzero: comparison -> condition     (**r floating-point comparison with 0.0 *)
-  | Cnotcompfzero: comparison -> condition. (**r negation of a floating-point comparison with 0.0 *)
-
+  | Cnotcompfzero: comparison -> condition  (**r negation of a floating-point comparison with 0.0 *)
+  | Ccompfs: comparison -> condition    (**r 32-bit floating-point comparison *)
+  | Cnotcompfs: comparison -> condition (**r negation of a floating-point comparison *)
+  | Ccompfszero: comparison -> condition     (**r floating-point comparison with 0.0 *)
+  | Cnotcompfszero: comparison -> condition.  (**r negation of a floating-point comparison with 0.0 *)
 
 (** Arithmetic and logical operations.  In the descriptions, [rd] is the
   result of the operation and [r1], [r2], etc, are the arguments. *)
@@ -69,7 +72,8 @@ Inductive condition : Type :=
 Inductive operation : Type :=
   | Omove: operation                    (**r [rd = r1] *)
   | Ointconst: int -> operation         (**r [rd] is set to the given integer constant *)
-  | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
+  | Ofloatconst: float -> operation     (**r [rd] is set to the given 64-bit float constant *)
+  | Osingleconst: float32 -> operation  (**r [rd] is set to the given 32-bit float constant *)
   | Oaddrsymbol: ident -> int -> operation (**r [rd] is set to the the address of the symbol plus the offset *)
   | Oaddrstack: int -> operation        (**r [rd] is set to the stack pointer plus the given offset *)
 (*c Integer arithmetic: *)
@@ -113,12 +117,23 @@ Inductive operation : Type :=
   | Osubf: operation                    (**r [rd = r1 - r2] *)
   | Omulf: operation                    (**r [rd = r1 * r2] *)
   | Odivf: operation                    (**r [rd = r1 / r2] *)
+  | Onegfs: operation                   (**r [rd = - r1] *)
+  | Oabsfs: operation                   (**r [rd = abs(r1)] *)
+  | Oaddfs: operation                   (**r [rd = r1 + r2] *)
+  | Osubfs: operation                   (**r [rd = r1 - r2] *)
+  | Omulfs: operation                   (**r [rd = r1 * r2] *)
+  | Odivfs: operation                   (**r [rd = r1 / r2] *)
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle: operation           (**r [rd] is [r1] expanded to double-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ointuoffloat: operation             (**r [rd = unsigned_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
   | Ofloatofintu: operation             (**r [rd = float_of_unsigned_int(r1)] *)
+  | Ointofsingle: operation              (**r [rd = signed_int_of_single(r1)] *)
+  | Ointuofsingle: operation             (**r [rd = unsigned_int_of_single(r1)] *)
+  | Osingleofint: operation              (**r [rd = single_of_signed_int(r1)] *)
+  | Osingleofintu: operation             (**r [rd = single_of_unsigned_int(r1)] *)
 (*c Manipulating 64-bit integers: *)
   | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
   | Olowlong: operation                 (**r [rd = low-word(r1)] *)
@@ -160,6 +175,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Float32.eq_dec; intro.
   assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
   generalize eq_shift; intro.
   generalize eq_condition; intro.
@@ -203,6 +219,10 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem):
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
   | Ccompfzero c, v1 :: nil => Val.cmpf_bool c v1 (Vfloat Float.zero)
   | Cnotcompfzero c, v1 :: nil => option_map negb (Val.cmpf_bool c v1 (Vfloat Float.zero))
+  | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => Val.cmpfs_bool c v1 (Vsingle Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => option_map negb (Val.cmpfs_bool c v1 (Vsingle Float32.zero))
   | _, _ => None
   end.
 
@@ -213,6 +233,7 @@ Definition eval_operation
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
   | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
   | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs)
   | Oaddrstack ofs, nil => Some (Val.add sp (Vint ofs))
   | Ocast8signed, v1::nil => Some (Val.sign_ext 8 v1)
@@ -254,11 +275,22 @@ Definition eval_operation
   | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
   | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
   | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
+  | Onegfs, v1::nil => Some(Val.negfs v1)
+  | Oabsfs, v1::nil => Some(Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ointuoffloat, v1::nil => Val.intuoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
   | Ofloatofintu, v1::nil => Val.floatofintu v1
+  | Ointofsingle, v1::nil => Val.intofsingle v1
+  | Ointuofsingle, v1::nil => Val.intuofsingle v1
+  | Osingleofint, v1::nil => Val.singleofint v1
+  | Osingleofintu, v1::nil => Val.singleofintu v1
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some(Val.loword v1)
   | Ohighlong, v1::nil => Some(Val.hiword v1)
@@ -281,8 +313,6 @@ Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
       destruct x; simpl in H; try discriminate; FuncInv
-  | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
-      destruct v; simpl in H; try discriminate; FuncInv
   | H: (Some _ = Some _) |- _ =>
       injection H; intros; clear H; FuncInv
   | _ =>
@@ -303,13 +333,18 @@ Definition type_of_condition (c: condition) : list typ :=
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
   | Ccompfzero _ => Tfloat :: nil
   | Cnotcompfzero _ => Tfloat :: nil
+  | Ccompfs _ => Tsingle :: Tsingle :: nil
+  | Cnotcompfs _ => Tsingle :: Tsingle :: nil
+  | Ccompfszero _ => Tsingle :: nil
+  | Cnotcompfszero _ => Tsingle :: nil
   end.
 
 Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
@@ -351,11 +386,22 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
   | Ofloatofintu => (Tint :: nil, Tfloat)
+  | Ointofsingle => (Tsingle :: nil, Tint)
+  | Ointuofsingle => (Tsingle :: nil, Tint)
+  | Osingleofint => (Tint :: nil, Tsingle)
+  | Osingleofintu => (Tint :: nil, Tsingle)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
   | Ohighlong => (Tlong :: nil, Tint)
@@ -390,7 +436,6 @@ Proof with (try exact I).
   intros.
   destruct op; simpl; simpl in H0; FuncInv; try subst v...
   congruence.
-  destruct (Float.is_single_dec f); red; auto. 
   unfold Genv.symbol_address. destruct (Genv.find_symbol genv i)...
   destruct sp...
   destruct v0...
@@ -433,9 +478,20 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0... simpl. apply Float.singleoffloat_is_single.  
-  destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); simpl in H2; inv H2...
-  destruct v0; simpl in H0; inv H0. destruct (Float.intuoffloat f); simpl in H2; inv H2...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); simpl in H2; inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
@@ -503,6 +559,10 @@ Definition negate_condition (cond: condition): condition :=
   | Cnotcompf c => Ccompf c
   | Ccompfzero c => Cnotcompfzero c
   | Cnotcompfzero c => Ccompfzero c
+  | Ccompfs c => Cnotcompfs c
+  | Cnotcompfs c => Ccompfs c
+  | Ccompfszero c => Cnotcompfszero c
+  | Cnotcompfszero c => Ccompfszero c
   end.
 
 Lemma eval_negate_condition:
@@ -520,6 +580,10 @@ Proof.
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0); auto. destruct b; auto.
   repeat (destruct vl; auto). 
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v (Vfloat Float.zero)); auto. destruct b; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0); auto. destruct b; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v (Vsingle Float32.zero)); auto. destruct b; auto.
 Qed.
 
 (** Shifting stack-relative references.  This is used in [Stacking]. *)
@@ -772,6 +836,8 @@ Ltac InvInject :=
       inv H; InvInject
   | [ H: val_inject _ (Vfloat _) _ |- _ ] =>
       inv H; InvInject
+  | [ H: val_inject _ (Vsingle _) _ |- _ ] =>
+      inv H; InvInject
   | [ H: val_inject _ (Vptr _ _) _ |- _ ] =>
       inv H; InvInject
   | [ H: val_list_inject _ nil _ |- _ ] =>
@@ -804,6 +870,10 @@ Proof.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; simpl in H0; inv H0; auto.
   inv H3; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; simpl in H0; inv H0; auto.
+  inv H3; simpl in H0; inv H0; auto.
 Qed.
 
 Ltac TrivialExists :=
@@ -869,11 +939,27 @@ Proof.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
+
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+
   inv H4; simpl; auto.
+  inv H4; simpl; auto.
+
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in *; inv H1. TrivialExists.
+  inv H4; simpl in *; inv H1. TrivialExists.
 
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intuoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in *; inv H1. TrivialExists.
   inv H4; simpl in *; inv H1. TrivialExists.
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 7e6827ef..2ad91142 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -161,9 +161,10 @@ let distance_to_emit_constants () =
 (* Associate labels to floating-point constants and to symbols *)
 
 let float_labels = (Hashtbl.create 39 : (int64, int) Hashtbl.t)
+let float32_labels = (Hashtbl.create 39 : (int32, int) Hashtbl.t)
 
 let label_float f =
-  let bf = camlint64_of_coqint(Floats.Float.bits_of_double f) in
+  let bf = camlint64_of_coqint(Floats.Float.to_bits f) in
   try
     Hashtbl.find float_labels bf
   with Not_found ->
@@ -173,6 +174,17 @@ let label_float f =
     max_pos_constants := min !max_pos_constants (!currpos + 1024);
     lbl'
 
+let label_float32 f =
+  let bf = camlint_of_coqint(Floats.Float32.to_bits f) in
+  try
+    Hashtbl.find float32_labels bf
+  with Not_found ->
+    let lbl' = new_label() in
+    Hashtbl.add float32_labels bf lbl';
+    size_constants := !size_constants + 4;
+    max_pos_constants := min !max_pos_constants (!currpos + 1024);
+    lbl'
+
 let symbol_labels =
   (Hashtbl.create 39 : (ident * Integers.Int.int, int) Hashtbl.t)
 
@@ -188,6 +200,7 @@ let label_symbol id ofs =
 
 let reset_constants () =
   Hashtbl.clear float_labels;
+  Hashtbl.clear float32_labels;
   Hashtbl.clear symbol_labels;
   size_constants := 0;
   max_pos_constants := max_int
@@ -200,6 +213,10 @@ let emit_constants oc =
       and bflo = Int64.logand bf 0xFFFF_FFFFL in
       fprintf oc ".L%d:	.word	0x%Lx, 0x%Lx\n" lbl bflo bfhi)
     float_labels;
+  Hashtbl.iter
+    (fun bf lbl ->
+      fprintf oc ".L%d:	.word	0x%lx\n" lbl bf)
+    float32_labels;
   Hashtbl.iter
     (fun (id, ofs) lbl ->
       fprintf oc ".L%d:	.word	%a\n"
@@ -348,8 +365,7 @@ let print_builtin_vload_common oc chunk args res =
         fprintf oc "	ldr	%a, [%a, #0]\n" ireg res2 ireg addr
       end; 2
   | Mfloat32, [IR addr], [FR res] ->
-      fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr;
-      fprintf oc "	fcvtds	%a, %a\n" freg res freg_single res; 2
+      fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr; 1
   | Mfloat64, [IR addr], [FR res] ->
       fprintf oc "	fldd	%a, [%a, #0]\n" freg res ireg addr; 1
   | _ ->
@@ -379,8 +395,7 @@ let print_builtin_vstore_common oc chunk args =
       fprintf oc "	str	%a, [%a, #0]\n" ireg src2 ireg addr;
       fprintf oc "	str	%a, [%a, #4]\n" ireg src1 ireg addr; 2
   | Mfloat32, [IR  addr; FR src] ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg src;
-      fprintf oc "	fsts	%a, [%a, #0]\n" freg_single FR6 ireg addr; 2
+      fprintf oc "	fsts	%a, [%a, #0]\n" freg_single src ireg addr; 1
   | Mfloat64, [IR  addr; FR src] ->
       fprintf oc "	fstd	%a, [%a, #0]\n" freg src ireg addr; 1
   | _ ->
@@ -409,11 +424,11 @@ let align n a = (n + a - 1) land (-a)
 let rec next_arg_location ir ofs = function
   | [] ->
       Int32.of_int (ir * 4 + ofs)
-  | (Tint | Tsingle) :: l ->
+  | (Tint | Tsingle | Tany32) :: l ->
       if ir < 4
       then next_arg_location (ir + 1) ofs l
       else next_arg_location ir (ofs + 4) l
-  | (Tfloat | Tlong) :: l ->
+  | (Tfloat | Tlong | Tany64) :: l ->
       if ir < 3
       then next_arg_location (align ir 2 + 2) ofs l
       else next_arg_location ir (align ofs 8 + 8) l
@@ -525,11 +540,9 @@ module FixupEABI = struct
 
   let fixup_single oc dir f i =
     match dir with
-    | Incoming ->     (* f <- i; f <- double_of_single f *)
-        fprintf oc "	fmsr	%a, %a\n" freg_single f ireg i;
-        fprintf oc "	fcvtds	%a, %a\n" freg f freg_single f
-    | Outgoing ->     (* f <- single_of_double f; i <- f *)
-        fprintf oc "	fcvtsd	%a, %a\n" freg_single f freg f;
+    | Incoming ->     (* f <- i *)
+        fprintf oc "	fmsr	%a, %a\n" freg_single f ireg i
+    | Outgoing ->     (* i <- f *)
         fprintf oc "	fmrs	%a, %a\n" ireg i freg_single f
 
   let fixup_conventions oc dir tyl =
@@ -537,11 +550,11 @@ module FixupEABI = struct
       if i >= 4 then 0 else
         match tyl with
         | [] -> 0
-        | Tint :: tyl' ->
+        | (Tint | Tany32) :: tyl' ->
             fixup (i+1) tyl'
         | Tlong :: tyl' ->
             fixup (((i + 1) land (-2)) + 2) tyl'
-        | Tfloat :: tyl' ->
+        | (Tfloat | Tany64) :: tyl' ->
             let i = (i + 1) land (-2) in
             if i >= 4 then 0 else begin
               fixup_double oc dir (freg_param i) (ireg_param i) (ireg_param (i+1));
@@ -549,7 +562,7 @@ module FixupEABI = struct
             end
         | Tsingle :: tyl' ->
             fixup_single oc dir (freg_param i) (ireg_param i);
-            2 + fixup (i+1) tyl'
+            1 + fixup (i+1) tyl'
     in fixup 0 tyl
 
   let fixup_arguments oc dir sg =
@@ -577,8 +590,8 @@ module FixupHF = struct
   let rec fixup_actions used fr tyl =
     match tyl with
     | [] -> []
-    | (Tint | Tlong) :: tyl' -> fixup_actions used fr tyl'
-    | Tfloat :: tyl' ->
+    | (Tint | Tlong | Tany32) :: tyl' -> fixup_actions used fr tyl'
+    | (Tfloat | Tany64) :: tyl' ->
         if fr >= 8 then [] else begin
           let dr = find_double used 0 in
           assert (dr < 8);
@@ -599,7 +612,7 @@ module FixupHF = struct
         1 + fixup_outgoing oc act
       end
   | (fr, Single, sr) :: act ->
-        fprintf oc "	fcvtsd	s%d, d%d\n" sr fr;
+        fprintf oc "	fcpys	s%d, s%d\n" sr (2*fr);
         1 + fixup_outgoing oc act
 
   let rec fixup_incoming oc = function
@@ -612,8 +625,10 @@ module FixupHF = struct
       end
   | (fr, Single, sr) :: act ->
       let n = fixup_incoming oc act in
-      fprintf oc "	fcvtds	d%d, s%d\n" fr sr;
+      if fr = sr then n else begin
+        fprintf oc "	fcpys	s%d, s%d\n" (2*fr) sr;
         1 + n
+      end
 
   let fixup_arguments oc dir sg =
     if sg.sig_cc.cc_vararg then
@@ -628,14 +643,8 @@ module FixupHF = struct
   let fixup_result oc dir sg =
     if sg.sig_cc.cc_vararg then
       FixupEABI.fixup_result oc dir sg
-    else begin
-      match proj_sig_res sg, dir with
-      | Tsingle, Outgoing ->
-          fprintf oc "	fcvtsd	s0, d0\n"; 1
-      | Tsingle, Incoming ->
-          fprintf oc "	fcvtds	d0, s0\n"; 1
-      | _ -> 0
-    end
+    else
+      0
 end
 
 let (fixup_arguments, fixup_result) =
@@ -703,7 +712,7 @@ let print_instruction oc = function
       fprintf oc "	cmp	%a, %a\n" ireg r1 shift_op so; 1
   | Peor(r1, r2, so) ->
       fprintf oc "	eor	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
-  | Pldr(r1, r2, sa) ->
+  | Pldr(r1, r2, sa) | Pldr_a(r1, r2, sa) ->
       fprintf oc "	ldr	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Pldrb(r1, r2, sa) ->
       fprintf oc "	ldrb	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
@@ -727,7 +736,7 @@ let print_instruction oc = function
       fprintf oc "	orr	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
   | Prsb(r1, r2, so) ->
       fprintf oc "	rsb	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
-  | Pstr(r1, r2, sa) ->
+  | Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) ->
       fprintf oc "	str	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa;
       begin match r1, r2, sa with
       | IR14, IR13, SAimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
@@ -786,19 +795,53 @@ let print_instruction oc = function
   | Pftouizd(r1, r2) ->
       fprintf oc "	ftouizd	%a, %a\n" freg_single FR6 freg r2;
       fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pfabss(r1, r2) ->
+      fprintf oc "	fabss	%a, %a\n" freg_single r1 freg_single r2; 1
+  | Pfnegs(r1, r2) ->
+      fprintf oc "	fnegs	%a, %a\n" freg_single r1 freg_single r2; 1
+  | Pfadds(r1, r2, r3) ->
+      fprintf oc "	fadds	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfdivs(r1, r2, r3) ->
+      fprintf oc "	fdivs	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfmuls(r1, r2, r3) ->
+      fprintf oc "	fmuls	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfsubs(r1, r2, r3) ->
+      fprintf oc "	fsubs	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pflis(r1, f) ->
+      (* We could make good use of the fconsts instruction, but it's available
+         in VFD v3 and up, not in v1 nor v2 *)
+      let lbl = label_float32 f in
+      fprintf oc "	flds	%a, .L%d @ %.12g\n" freg_single r1 lbl (camlfloat_of_coqfloat32 f); 1
+  | Pfcmps(r1, r2) ->
+      fprintf oc "	fcmps	%a, %a\n" freg_single r1 freg_single r2;
+      fprintf oc "	fmstat\n"; 2
+  | Pfcmpzs(r1) ->
+      fprintf oc "	fcmpzs	%a\n" freg_single r1;
+      fprintf oc "	fmstat\n"; 2
+  | Pfsitos(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fsitos	%a, %a\n" freg_single r1 freg_single r1; 2
+  | Pfuitos(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fuitos	%a, %a\n" freg_single r1 freg_single r1; 2
+  | Pftosizs(r1, r2) ->
+      fprintf oc "	ftosizs	%a, %a\n" freg_single FR6 freg_single r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pftouizs(r1, r2) ->
+      fprintf oc "	ftouizs	%a, %a\n" freg_single FR6 freg_single r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
   | Pfcvtsd(r1, r2) ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single r1 freg r2;
-      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
-  | Pfldd(r1, r2, n) ->
+      fprintf oc "	fcvtsd	%a, %a\n" freg_single r1 freg r2; 1
+  | Pfcvtds(r1, r2) ->
+      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r2; 1
+  | Pfldd(r1, r2, n) | Pfldd_a(r1, r2, n) ->
       fprintf oc "	fldd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
   | Pflds(r1, r2, n) ->
-      fprintf oc "	flds	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n;
-      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
-  | Pfstd(r1, r2, n) ->
+      fprintf oc "	flds	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
+  | Pfstd(r1, r2, n) | Pfstd_a(r1, r2, n) ->
       fprintf oc "	fstd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
   | Pfsts(r1, r2, n) ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg r1;
-      fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single FR6 ireg r2 coqint n; 2
+      fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
   (* Pseudo-instructions *)
   | Pallocframe(sz, ofs) ->
       fprintf oc "	mov	r12, sp\n";
@@ -927,10 +970,10 @@ let print_init oc = function
   | Init_int64 n ->
       fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
-      fprintf oc "	.word	0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float.bits_of_single n))
+      fprintf oc "	.word	0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float32.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_float64 n ->
-      fprintf oc "	.quad	%Ld %s %.18g\n" (camlint64_of_coqint (Floats.Float.bits_of_double n))
+      fprintf oc "	.quad	%Ld %s %.18g\n" (camlint64_of_coqint (Floats.Float.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_space n ->
       if Z.gt n Z.zero then
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index e4005c92..ad8a9454 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -321,6 +321,12 @@ Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
 (** ** Comparisons *)
 
 Nondetfunction compimm (default: comparison -> int -> condition)
@@ -370,6 +376,9 @@ Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
 Definition compf (c: comparison) (e1: expr) (e2: expr) :=
   Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
 
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil).
+
 (** ** Integer conversions *)
 
 Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e.
@@ -391,21 +400,38 @@ Nondetfunction cast16signed (e: expr) :=
 (** ** Floating-point conversions *)
 
 Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
 Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
 
 Nondetfunction floatofint (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofint n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofintu n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ => Eop Ofloatofintu (e ::: Enil)
   end.
 
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil).
+
+Nondetfunction singleofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
+  | _ => Eop Osingleofint (e ::: Enil)
+  end.
+
+Nondetfunction singleofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
+  | _ => Eop Osingleofintu (e ::: Enil)
+  end.
+
 (** ** Recognition of addressing modes for load and store operations *)
 
 (** We do not recognize the [Aindexed2] and [Aindexed2shift] modes
@@ -424,6 +450,8 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
   | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
+  | Many32 => true
+  | Many64 => false
   end.
 
 Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
@@ -436,6 +464,8 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
   | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
+  | Many32 => true
+  | Many64 => false
   end.
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index 1dd2c200..c68d2277 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -598,6 +598,31 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+ 
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
@@ -693,6 +718,12 @@ Proof.
   intros; red; intros. unfold compf. TrivialExists.
 Qed.
 
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
   red; intros until x. unfold cast8signed; case (cast8signed_match a); intros.
@@ -724,6 +755,11 @@ Proof.
   red; intros. unfold singleoffloat. TrivialExists.
 Qed.
 
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
 Theorem eval_intoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -764,6 +800,46 @@ Proof.
   TrivialExists.
 Qed.
 
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofint. case (singleofint_match a); intros.
+  InvEval. simpl in H0. TrivialExists. 
+  TrivialExists. 
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intuofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofintu. case (singleofintu_match a); intros.
+  InvEval. simpl in H0. TrivialExists. 
+  TrivialExists.
+Qed.
+
 Theorem eval_addressing:
   forall le chunk a v b ofs,
   eval_expr ge sp e m le a v ->
diff --git a/arm/ValueAOp.v b/arm/ValueAOp.v
index b3123618..a14d6b98 100644
--- a/arm/ValueAOp.v
+++ b/arm/ValueAOp.v
@@ -44,6 +44,10 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
   | Ccompfzero c, v1 :: nil => cmpf_bool c v1 (F Float.zero)
   | Cnotcompfzero c, v1 :: nil => cnot (cmpf_bool c v1 (F Float.zero))
+  | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => cmpfs_bool c v1 (FS Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => cnot (cmpfs_bool c v1 (FS Float32.zero))
   | _, _ => Bnone
   end.
 
@@ -61,6 +65,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omove, v1::nil => v1
   | Ointconst n, nil => I n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ftop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ftop
   | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
   | Oaddrstack ofs, nil => Ptr (Stk ofs)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
@@ -102,11 +107,22 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubf, v1::v2::nil => subf v1 v2
   | Omulf, v1::v2::nil => mulf v1 v2
   | Odivf, v1::v2::nil => divf v1 v2
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
   | Ointuoffloat, v1::nil => intuoffloat v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
+  | Ointofsingle, v1::nil => intofsingle v1
+  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Osingleofint, v1::nil => singleofint v1
+  | Osingleofintu, v1::nil => singleofintu v1
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
   | Ohighlong, v1::nil => hiword v1
@@ -185,6 +201,7 @@ Proof.
   unfold eval_operation, eval_static_operation; intros;
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
   rewrite Int.add_zero_l; eauto with va.
   fold (Val.sub (Vint i) a1). auto with va.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto. 
diff --git a/arm/eabi/Conventions1.v b/arm/eabi/Conventions1.v
index c02af1aa..c26d29ee 100644
--- a/arm/eabi/Conventions1.v
+++ b/arm/eabi/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -178,13 +179,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -235,12 +236,21 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R0 :: nil
-  | Some Tint => R0 :: nil
-  | Some (Tfloat | Tsingle) => F0 :: nil
+  | Some (Tint | Tany32) => R0 :: nil
+  | Some (Tfloat | Tsingle | Tany64) => F0 :: nil
   | Some Tlong => R1 :: R0 :: nil
   end.
 
-(** The result location is a caller-save register or a temporary *)
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
+Qed.
+
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
   forall (s: signature) (r: mreg), 
@@ -292,12 +302,12 @@ Definition sreg_param (n: Z) : mreg :=
 Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
-      (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs))
+  | (Tint | Tany32) as ty :: tys =>
+      (if zle 0 ofs then S Outgoing ofs ty else R (ireg_param ofs))
       :: loc_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat | Tany64) as ty :: tys =>
       let ofs := align ofs 2 in
-      (if zle 0 ofs then S Outgoing ofs Tfloat else R (freg_param ofs))
+      (if zle 0 ofs then S Outgoing ofs ty else R (freg_param ofs))
       :: loc_arguments_rec tys (ofs + 2)
   | Tsingle :: tys =>
       (if zle 0 ofs then S Outgoing ofs Tsingle else R (sreg_param ofs))
@@ -321,8 +331,8 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | (Tint | Tsingle) :: tys => size_arguments_rec tys (ofs + 1)
-  | (Tfloat | Tlong) :: tys => size_arguments_rec tys (align ofs 2 + 2)
+  | (Tint | Tsingle | Tany32) :: tys => size_arguments_rec tys (ofs + 1)
+  | (Tfloat | Tlong | Tany64) :: tys => size_arguments_rec tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
@@ -404,6 +414,19 @@ Proof.
   split. omega. split. omega. congruence.
   apply sreg_param_caller_save.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tany32 *)
+  destruct H.
+  subst l. destruct (zle 0 ofs).
+  split. omega. split. omega. congruence.
+  apply ireg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tany64 *)
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  destruct H.
+  subst l. destruct (zle 0 (align ofs 2)).
+  split. omega. split. auto. congruence.
+  apply freg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -433,6 +456,9 @@ Proof.
   assert (ofs <= align ofs 2) by (apply align_le; omega).
   apply Zle_trans with (align ofs 2 + 2); auto; omega.
   apply Zle_trans with (ofs + 1); auto; omega.
+  apply Zle_trans with (ofs + 1); auto; omega.
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  apply Zle_trans with (align ofs 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -474,6 +500,10 @@ Proof.
     eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega.
   - (* Tsingle *)
     destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
+  - (* Tany32 *)
+    destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
+  - (* Tany64 *)
+    destruct H1; auto. destruct (zle 0 (align ofs0 2)); inv H1. apply H0. omega.
   }
   unfold size_arguments. apply H1. auto.  
 Qed.
diff --git a/arm/hardfloat/Conventions1.v b/arm/hardfloat/Conventions1.v
index e3875e7d..40a761cc 100644
--- a/arm/hardfloat/Conventions1.v
+++ b/arm/hardfloat/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -178,13 +179,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -235,12 +236,21 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R0 :: nil
-  | Some Tint => R0 :: nil
-  | Some (Tfloat | Tsingle) => F0 :: nil
+  | Some (Tint | Tany32) => R0 :: nil
+  | Some (Tfloat | Tsingle | Tany64) => F0 :: nil
   | Some Tlong => R1 :: R0 :: nil
   end.
 
-(** The result location is a caller-save register or a temporary *)
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
+Qed.
+
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
   forall (s: signature) (r: mreg), 
@@ -286,15 +296,15 @@ Fixpoint loc_arguments_rec
      (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
+  | (Tint | Tany32) as ty :: tys =>
       if zlt ir 4
       then R (ireg_param ir) :: loc_arguments_rec tys (ir + 1) fr ofs
-      else S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1)
-  | Tfloat :: tys =>
+      else S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 1)
+  | (Tfloat | Tany64) as ty :: tys =>
       if zlt fr 8
       then R (freg_param fr) :: loc_arguments_rec tys ir (fr + 1) ofs
       else let ofs := align ofs 2 in
-           S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2)
+           S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 2)
   | Tsingle :: tys =>
       if zlt fr 8
       then R (freg_param fr) :: loc_arguments_rec tys ir (fr + 1) ofs
@@ -325,12 +335,12 @@ Fixpoint loc_arguments_vararg
      (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
-      (if zlt ofs 0 then R (ireg_param (ofs + 4)) else S Outgoing ofs Tint)
+  | (Tint|Tany32) as ty :: tys =>
+      (if zlt ofs 0 then R (ireg_param (ofs + 4)) else S Outgoing ofs ty)
       :: loc_arguments_vararg tys (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat|Tany64) as ty :: tys =>
       let ofs := align ofs 2 in
-      (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs Tfloat)
+      (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs ty)
       :: loc_arguments_vararg tys (ofs + 2)
   | Tsingle :: tys =>
       (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs Tsingle)
@@ -356,11 +366,11 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys =>
+  | (Tint|Tany32) :: tys =>
       if zlt ir 4
       then size_arguments_rec tys (ir + 1) fr ofs
       else size_arguments_rec tys ir fr (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat|Tany64) :: tys =>
       if zlt fr 8
       then size_arguments_rec tys ir (fr + 1) ofs
       else size_arguments_rec tys ir fr (align ofs 2 + 2)
@@ -378,8 +388,8 @@ Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
 Fixpoint size_arguments_vararg (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => Zmax 0 ofs
-  | (Tint | Tsingle) :: tys => size_arguments_vararg tys (ofs + 1)
-  | (Tfloat | Tlong) :: tys => size_arguments_vararg tys (align ofs 2 + 2)
+  | (Tint | Tsingle | Tany32) :: tys => size_arguments_vararg tys (ofs + 1)
+  | (Tfloat | Tlong | Tany64) :: tys => size_arguments_vararg tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
@@ -469,6 +479,19 @@ Proof.
   eapply IHtyl; eauto.
   subst. split; [omega | congruence].
   eapply INCR. eapply IHtyl; eauto. omega.
+- (* any32 *)
+  destruct (zlt ir 4); destruct H.
+  subst. left; apply ireg_param_in_params.
+  eapply IHtyl; eauto.
+  subst. split; [omega | congruence].
+  eapply INCR. eapply IHtyl; eauto. omega.
+- (* any64 *)
+  destruct (zlt fr 8); destruct H.
+  subst. right; apply freg_param_in_params.
+  eapply IHtyl; eauto.
+  subst. split. apply Zle_ge. apply align_le. omega. congruence.
+  eapply INCR. eapply IHtyl; eauto.
+  apply Zle_trans with (align ofs 2). apply align_le; omega. omega.
 Qed.
 
 Remark loc_arguments_vararg_charact:
@@ -530,6 +553,20 @@ Proof.
   right; apply freg_param_in_params.
   split. xomega. congruence.
   eapply INCR. eapply IHtyl; eauto. omega.
+- (* any32 *)
+  destruct H. 
+  destruct (zlt ofs 0); subst l. 
+  left; apply ireg_param_in_params.
+  split. xomega. congruence.
+  eapply INCR. eapply IHtyl; eauto. omega.
+- (* any64 *)
+  set (ofs' := align ofs 2) in *.
+  assert (ofs <= ofs') by (apply align_le; omega).
+  destruct H.
+  destruct (zlt ofs' 0); subst l.
+  right; apply freg_param_in_params.
+  split. xomega. congruence.
+  eapply INCR. eapply IHtyl; eauto. omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -569,6 +606,10 @@ Proof.
   apply Zle_trans with (align ofs0 2 + 2); auto; omega.
   destruct (zlt fr 8); eauto.
   apply Zle_trans with (ofs0 + 1); eauto. omega.
+  destruct (zlt ir 4); eauto. apply Zle_trans with (ofs0 + 1); auto; omega.
+  destruct (zlt fr 8); eauto.
+  apply Zle_trans with (align ofs0 2). apply align_le; omega.
+  apply Zle_trans with (align ofs0 2 + 2); auto; omega.
 Qed.
 
 Remark size_arguments_vararg_above:
@@ -582,6 +623,8 @@ Proof.
   assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
   assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
   xomega.
+  xomega.
+  assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
 Qed.
 
 Lemma size_arguments_above:
@@ -626,6 +669,18 @@ Proof.
   eauto.
   inv H. apply size_arguments_rec_above.
   eauto.
+- (* any32 *)
+  destruct (zlt ir 4); destruct H.
+  discriminate.
+  eauto.
+  inv H. apply size_arguments_rec_above. 
+  eauto.
+- (* any64 *)
+  destruct (zlt fr 8); destruct H.
+  discriminate.
+  eauto. 
+  inv H. apply size_arguments_rec_above. 
+  eauto.
 Qed.
 
 Lemma loc_arguments_vararg_bounded:
@@ -656,6 +711,14 @@ Proof.
   destruct H. 
   destruct (zlt ofs0 0); inv H. apply size_arguments_vararg_above. 
   eauto.
+- (* any32 *)
+  destruct H. 
+  destruct (zlt ofs0 0); inv H. apply size_arguments_vararg_above. 
+  eauto.
+- (* any64 *)
+  destruct H.
+  destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_vararg_above.
+  eauto.
 Qed.
 
 Lemma loc_arguments_bounded:
diff --git a/backend/Allocation.v b/backend/Allocation.v
index f4fcd6e4..919843b5 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -609,6 +609,20 @@ Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
      (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
      (Some e).
 
+(** [loc_type_compat env l e] checks that for all equations [r = l] in [e],
+  the type [env r] of [r] is compatible with the type of [l]. *)
+
+Definition sel_type (k: equation_kind) (ty: typ) : typ :=
+  match k with
+  | Full => ty
+  | Low | High => Tint
+  end.
+
+Definition loc_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
+  EqSet2.for_all_between
+    (fun q => subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l))
+    (select_loc_l l) (select_loc_h l) (eqs2 e).
+
 (** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
     [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
 *)
@@ -759,18 +773,25 @@ Definition destroyed_by_move (src dst: loc) :=
   | _, _ => destroyed_by_op Omove
   end.
 
+Definition well_typed_move (env: regenv) (dst: loc) (e: eqs) : bool :=
+  match dst with
+  | R r => true
+  | S sl ofs ty => loc_type_compat env dst e
+  end.
+
 (** Simulate the effect of a sequence of moves [mv] on a set of
   equations [e].  The set [e] is the equations that must hold
   after the sequence of moves.  Return the set of equations that
   must hold before the sequence of moves.  Return [None] if the
   set of equations [e] cannot hold after the sequence of moves. *)
 
-Fixpoint track_moves (mv: moves) (e: eqs) : option eqs :=
+Fixpoint track_moves (env: regenv) (mv: moves) (e: eqs) : option eqs :=
   match mv with
   | nil => Some e
   | (src, dst) :: mv =>
-      do e1 <- track_moves mv e;
+      do e1 <- track_moves env mv e;
       assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
+      assertion (well_typed_move env dst e1);
       subst_loc dst src e1
   end.
 
@@ -803,89 +824,90 @@ Definition kind_second_word := if Archi.big_endian then Low else High.
   equations that must hold "before" these instructions, or [None] if
   impossible. *)
 
-Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs :=
+Definition transfer_aux (f: RTL.function) (env: regenv)
+                        (shape: block_shape) (e: eqs) : option eqs :=
   match shape with
   | BSnop mv s =>
-      track_moves mv e
+      track_moves env mv e
   | BSmove src dst mv s =>
-      track_moves mv (subst_reg dst src e)
+      track_moves env mv (subst_reg dst src e)
   | BSmakelong src1 src2 dst mv s =>
       let e1 := subst_reg_kind dst High src1 Full e in
       let e2 := subst_reg_kind dst Low src2 Full e1 in
       assertion (reg_unconstrained dst e2);
-      track_moves mv e2
+      track_moves env mv e2
   | BSlowlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src Low e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BShighlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src High e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BSop op args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
-      track_moves mv e
+      track_moves env mv e
   | BSload chunk addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s =>
-      do e1 <- track_moves mv3 e;
+      do e1 <- track_moves env mv3 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in
       assertion (loc_unconstrained (R dst2') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args2' e2;
-      do e4 <- track_moves mv2 e3;
+      do e4 <- track_moves env mv2 e3;
       let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in
       assertion (loc_unconstrained (R dst1') e5);
       assertion (can_undef (destroyed_by_load Mint32 addr) e5);
       assertion (reg_unconstrained dst e5);
       do e6 <- add_equations args args1' e5;
-      track_moves mv1 e6
+      track_moves env mv1 e6
   | BSload2_1 addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr) e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSloaddead chunk addr args dst mv s =>
       assertion (reg_unconstrained dst e);
-      track_moves mv e
+      track_moves env mv e
   | BSstore chunk addr args src mv args' src' s =>
       assertion (can_undef (destroyed_by_store chunk addr) e);
       do e1 <- add_equations (src :: args) (src' :: args') e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s =>
       assertion (can_undef (destroyed_by_store Mint32 addr') e);
       do e1 <- add_equations args args2' 
                   (add_equation (Eq kind_second_word src (R src2')) e);
-      do e2 <- track_moves mv2 e1;
+      do e2 <- track_moves env mv2 e1;
       assertion (can_undef (destroyed_by_store Mint32 addr) e2);
       do e3 <- add_equations args args1' 
                   (add_equation (Eq kind_first_word src (R src1')) e2);
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BScall sg ros args res mv1 ros' mv2 s =>
       let args' := loc_arguments sg in
       let res' := map R (loc_result sg) in
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- remove_equations_res res (sig_res sg) res' e1;
       assertion (forallb (fun l => reg_loc_unconstrained res l e2) res');
       assertion (no_caller_saves e2);
       do e3 <- add_equation_ros ros ros' e2;
       do e4 <- add_equations_args args (sig_args sg) args' e3;
-      track_moves mv1 e4
+      track_moves env mv1 e4
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
@@ -893,9 +915,9 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSbuiltin ef args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let args' := map R args' in
       let res' := map R res' in
       do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1;
@@ -903,23 +925,23 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (forallb (fun l => loc_unconstrained l e2) res');
       assertion (can_undef (destroyed_by_builtin ef) e2);
       do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSannot txt typ args res mv1 args' s =>
       do e1 <- add_equations_args args (annot_args_typ typ) args' e;
-      track_moves mv1 e1
+      track_moves env mv1 e1
   | BScond cond args mv args' s1 s2 =>
       assertion (can_undef (destroyed_by_cond cond) e);
       do e1 <- add_equations args args' e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSjumptable arg mv arg' tbl =>
       assertion (can_undef destroyed_by_jumptable e);
-      track_moves mv (add_equation (Eq Full arg (R arg')) e)
+      track_moves env mv (add_equation (Eq Full arg (R arg')) e)
   | BSreturn None mv =>
-      track_moves mv empty_eqs
+      track_moves env mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := map R (loc_result (RTL.fn_sig f)) in
       do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
-      track_moves mv e1
+      track_moves env mv e1
   end.
 
 (** The main transfer function for the dataflow analysis.  Like [transfer_aux],
@@ -927,7 +949,7 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
   equations that must hold "after".  It also handles error propagation
   and reporting. *)
 
-Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
+Definition transfer (f: RTL.function) (env: regenv) (shapes: PTree.t block_shape)
                     (pc: node) (after: res eqs) : res eqs :=
   match after with
   | Error _ => after
@@ -935,7 +957,7 @@ Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
       match shapes!pc with
       | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil)
       | Some shape =>
-          match transfer_aux f shape e with
+          match transfer_aux f env shape e with
           | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil)
           | Some e' => OK e'
           end
@@ -1083,8 +1105,8 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BSreturn optarg mv => nil
   end.
 
-Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) :=
-  DS.fixpoint_allnodes bsh successors_block_shape (transfer f bsh).
+Definition analyze (f: RTL.function) (env: regenv) (bsh: PTree.t block_shape) :=
+  DS.fixpoint_allnodes bsh successors_block_shape (transfer f env bsh).
 
 (** * Validating and translating functions and programs *)
 
@@ -1118,9 +1140,9 @@ Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e
   and stack size. *)
 
 Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function)
-                                 (e1: eqs) : option unit :=
+                                 (env: regenv) (e1: eqs) : option unit :=
   do mv <- pair_entrypoints rtl ltl;
-  do e2 <- track_moves mv e1;
+  do e2 <- track_moves env mv e1;
   assertion (compat_entry (RTL.fn_params rtl)
                           (sig_args (RTL.fn_sig rtl))
                           (loc_parameters (RTL.fn_sig rtl)) e2);
@@ -1133,10 +1155,10 @@ Local Close Scope option_monad_scope.
 Local Open Scope error_monad_scope.
 
 Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
-                             (bsh: PTree.t block_shape)
+                             (env: regenv) (bsh: PTree.t block_shape)
                              (a: PMap.t LEq.t): res unit :=
-  do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
-  match check_entrypoints_aux rtl ltl e1 with
+  do e1 <- transfer rtl env bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
+  match check_entrypoints_aux rtl ltl env e1 with
   | None => Error (msg "invalid register allocation at entry point")
   | Some _ => OK tt
   end.
@@ -1145,11 +1167,11 @@ Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
   a source RTL function and an LTL function generated by the external
   register allocator. *)
 
-Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit :=
+Definition check_function (rtl: RTL.function) (ltl: LTL.function) (env: regenv): res unit :=
   let bsh := pair_codes rtl ltl in
-  match analyze rtl bsh with
+  match analyze rtl env bsh with
   | None => Error (msg "allocation analysis diverges")
-  | Some a => check_entrypoints rtl ltl bsh a
+  | Some a => check_entrypoints rtl ltl env bsh a
   end.
 
 (** [regalloc] is the external register allocator.  It is written in OCaml
@@ -1165,7 +1187,7 @@ Definition transf_function (f: RTL.function) : res LTL.function :=
   | OK env =>
       match regalloc f with
       | Error m => Error m
-      | OK tf => do x <- check_function f tf; OK tf
+      | OK tf => do x <- check_function f tf env; OK tf
       end
   end.
 
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 93038783..588a674e 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -451,7 +451,7 @@ Lemma add_equations_args_lessdef:
   forall rs ls rl tyl ll e e',
   add_equations_args rl tyl ll e = Some e' ->
   satisf rs ls e' ->
-  Val.has_type_list (rs##rl) (normalize_list tyl) ->
+  Val.has_type_list (rs##rl) tyl ->
   Val.lessdef_list (rs##rl) (decode_longs tyl (map ls ll)).
 Proof.
   intros until e'. functional induction (add_equations_args rl tyl ll e); simpl; intros.
@@ -969,7 +969,6 @@ Proof.
   split. apply eqs_same; auto. auto. 
 Qed.
 
-(*
 Lemma loc_type_compat_charact:
   forall env l e q,
   loc_type_compat env l e = true ->
@@ -1009,8 +1008,7 @@ Proof.
   destruct (rs#r); exact I.
   eelim Loc.diff_not_eq. eexact A. auto.
 Qed.
-*)
-(*
+
 Remark val_lessdef_normalize:
   forall v v' ty,
   Val.has_type v ty -> Val.lessdef v v' ->
@@ -1018,19 +1016,23 @@ Remark val_lessdef_normalize:
 Proof.
   intros. inv H0. rewrite Val.load_result_same; auto. auto. 
 Qed.
-*)
 
 Lemma subst_loc_satisf:
   forall env src dst rs ls e e',
   subst_loc dst src e = Some e' ->
-  (*well_typed_move env dst e = true ->*)
+  well_typed_move env dst e = true ->
   wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) ls) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
-  subst dst. rewrite Locmap.gss. apply (H1 _ B).
+  subst dst. rewrite Locmap.gss.
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H2 _ B). 
+  apply val_lessdef_normalize; auto. apply (H2 _ B). 
   rewrite Locmap.gso; auto.
 Qed.
 
@@ -1077,18 +1079,22 @@ Proof.
 Qed.
 
 Lemma subst_loc_undef_satisf:
-  forall src dst rs ls ml e e',
+  forall env src dst rs ls ml e e',
   subst_loc dst src e = Some e' ->
-  (*well_typed_move env dst e = true ->*)
+  well_typed_move env dst e = true ->
   can_undef_except dst ml e = true ->
-  (*wt_regset env rs ->*)
+  wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
   subst dst. rewrite Locmap.gss.
-  destruct q as [k r l]; simpl in *. apply (H1 _ B). 
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H3 _ B). 
+  apply val_lessdef_normalize; auto. apply (H3 _ B). 
   rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. 
   eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto.
 Qed.
@@ -1327,30 +1333,45 @@ Proof.
   auto. congruence.
 Qed.
 
+Lemma loadv_int64_split:
+  forall m a v,
+  Mem.loadv Mint64 m a = Some v ->
+  exists v1 v2,
+     Mem.loadv Mint32 m a = Some (if Archi.big_endian then v1 else v2)
+  /\ Mem.loadv  Mint32 m (Val.add a (Vint (Int.repr 4))) = Some (if Archi.big_endian then v2 else v1)
+  /\ Val.lessdef (Val.hiword v) v1
+  /\ Val.lessdef (Val.loword v) v2.
+Proof.
+  intros. exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & A & B & C).
+  exists v1, v2. split; auto. split; auto. 
+  inv C; auto. destruct v1, v2; simpl; auto. 
+  rewrite Int64.hi_ofwords, Int64.lo_ofwords; auto. 
+Qed.
+
 (** * Properties of the dataflow analysis *)
 
 Lemma analyze_successors:
-  forall f bsh an pc bs s e,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e.
+  exists e', transfer f env bsh s an!!s = OK e' /\ EqSet.Subset e' e.
 Proof.
   unfold analyze; intros. exploit DS.fixpoint_allnodes_solution; eauto.
-  rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros.
+  rewrite H2. unfold DS.L.ge. destruct (transfer f env bsh s an#s); intros.
   exists e0; auto. 
   contradiction.
 Qed.
 
 Lemma satisf_successors:
-  forall f bsh an pc bs s e rs ls,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e rs ls,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
   satisf rs ls e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'.
+  exists e', transfer f env bsh s an!!s = OK e' /\ satisf rs ls e'.
 Proof.
   intros. exploit analyze_successors; eauto. intros [e' [A B]]. 
   exists e'; split; auto. eapply satisf_incr; eauto.
@@ -1360,12 +1381,13 @@ Qed.
 
 Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop :=
   | transf_function_spec_intro:  
-      forall an mv k e1 e2,
-      analyze f (pair_codes f tf) = Some an ->
+      forall env an mv k e1 e2,
+      wt_function f env ->
+      analyze f env (pair_codes f tf) = Some an ->
       (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
       wf_moves mv ->
-      transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
-      track_moves mv e1 = Some e2 ->
+      transfer f env (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
+      track_moves env mv e1 = Some e2 ->
       compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
       can_undef destroyed_at_function_entry e2 = true ->
       RTL.fn_stacksize f = LTL.fn_stacksize tf ->
@@ -1380,33 +1402,36 @@ Proof.
   unfold transf_function; intros.
   destruct (type_function f) as [env|] eqn:TY; try discriminate.
   destruct (regalloc f); try discriminate.
-  destruct (check_function f f0) as [] eqn:?; inv H.
+  destruct (check_function f f0 env) as [] eqn:?; inv H.
   unfold check_function in Heqr. 
-  destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate.
+  destruct (analyze f env (pair_codes f tf)) as [an|] eqn:?; try discriminate.
   monadInv Heqr. 
-  destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate.
+  destruct (check_entrypoints_aux f tf env x) as [y|] eqn:?; try discriminate.
   unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
   exploit extract_moves_sound; eauto. intros [A B]. subst b.
   exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
-  econstructor; eauto. congruence. 
+  econstructor; eauto. eapply type_function_correct; eauto. congruence. 
 Qed.
 
 Lemma invert_code:
-  forall f tf pc i opte e,
+  forall f env tf pc i opte e,
+  wt_function f env ->
   (RTL.fn_code f)!pc = Some i ->
-  transfer f (pair_codes f tf) pc opte = OK e ->
+  transfer f env (pair_codes f tf) pc opte = OK e ->
   exists eafter, exists bsh, exists bb,
   opte = OK eafter /\ 
   (pair_codes f tf)!pc = Some bsh /\
   (LTL.fn_code tf)!pc = Some bb /\
   expand_block_shape bsh i bb /\
-  transfer_aux f bsh eafter = Some e.
+  transfer_aux f env bsh eafter = Some e /\
+  wt_instr f env i.
 Proof.
-  intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter.
+  intros. destruct opte as [eafter|]; simpl in H1; try discriminate. exists eafter.
   destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. 
   exploit matching_instr_block; eauto. intros [bb [A B]].
-  destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. 
-  exists bb. tauto. 
+  destruct (transfer_aux f env bsh eafter) as [e1|] eqn:?; inv H1. 
+  exists bb. exploit wt_instr_at; eauto.
+  tauto. 
 Qed.
 
 (** * Semantic preservation *)
@@ -1480,10 +1505,11 @@ Proof.
 Qed.
 
 Lemma exec_moves:
-  forall mv rs s f sp bb m e e' ls,
-  track_moves mv e = Some e' ->
+  forall mv env rs s f sp bb m e e' ls,
+  track_moves env mv e = Some e' ->
   wf_moves mv ->
   satisf rs ls e' ->
+  wt_regset env rs ->
   exists ls',
     star step tge (Block s f sp (expand_moves mv bb) ls m)
                E0 (Block s f sp bb ls' m)
@@ -1495,7 +1521,7 @@ Opaque destroyed_by_op.
 - unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
   (* step *)
 - destruct a as [src dst]. unfold expand_moves. simpl. 
-  destruct (track_moves mv e) as [e1|] eqn:?; MonadInv.
+  destruct (track_moves env mv e) as [e1|] eqn:?; MonadInv.
   assert (wf_moves mv). red; intros. apply H0; auto with coqlib. 
   destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
   (* reg-reg *)
@@ -1521,13 +1547,17 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
       sg.(sig_res) = Some Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
-      forall res f sp pc rs s tf bb ls ts sg an e
+      forall res f sp pc rs s tf bb ls ts sg an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs)
+        (WTRES: env res = proj_sig_res sg)
         (STEPS: forall v ls1 m,
            Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) ->
+           Val.has_type v (env res) ->
            agree_callee_save ls ls1 ->
            exists ls2,
            star LTL.step tge (Block ts tf sp bb ls1 m)
@@ -1540,13 +1570,15 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
 
 Inductive match_states: RTL.state -> LTL.state -> Prop :=
   | match_states_intro:
-      forall s f sp pc rs m ts tf ls m' an e
+      forall s f sp pc rs m ts tf ls m' an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
         (SAT: satisf rs ls e)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs),
       match_states (RTL.State s f sp pc rs m)
                    (LTL.State ts tf sp pc ls m')
   | match_states_call:
@@ -1555,7 +1587,8 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (FUN: transf_fundef f = OK tf)
         (ARGS: Val.lessdef_list args (decode_longs (sig_args (funsig tf)) (map ls (loc_arguments (funsig tf)))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
       match_states (RTL.Callstate s f args m)
                    (LTL.Callstate ts tf ls m')
   | match_states_return:
@@ -1563,7 +1596,8 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (STACKS: match_stackframes s ts sg)
         (RES: Val.lessdef_list (encode_long (sig_res sg) res) (map ls (map R (loc_result sg))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTRES: Val.has_type res (proj_sig_res sg)),
       match_states (RTL.Returnstate s res m)
                    (LTL.Returnstate ts ls m').
 
@@ -1576,13 +1610,14 @@ Proof.
   intros. inv H. 
   constructor. congruence.
   econstructor; eauto.
+  unfold proj_sig_res in *. rewrite H0; auto. 
   intros. unfold loc_result in H; rewrite H0 in H; eauto. 
 Qed.
 
 Ltac UseShape :=
   match goal with
-  | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] =>
-      destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); 
+  | [ WT: wt_function _ _, CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ _ = OK _ |- _ ] =>
+      destruct (invert_code _ _ _ _ _ _ _ WT CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR & WTI); 
       inv EBS; unfold transfer_aux in TR; MonadInv
   end.
 
@@ -1626,7 +1661,8 @@ Proof.
   econstructor; eauto. 
 
 (* op move *)
-- simpl in H0. inv H0.  
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0.  
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1637,7 +1673,8 @@ Proof.
   econstructor; eauto.
 
 (* op makelong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1649,7 +1686,8 @@ Proof.
   econstructor; eauto.
 
 (* op lowlong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1661,7 +1699,8 @@ Proof.
   econstructor; eauto.
 
 (* op highlong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1673,7 +1712,8 @@ Proof.
   econstructor; eauto.
 
 (* op regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_operation_lessdef; eauto. intros [v' [F G]]. 
   exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
@@ -1697,9 +1737,11 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
+  eapply wt_exec_Iop; eauto. 
 
 (* load regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
   exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. 
@@ -1715,7 +1757,8 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1729,15 +1772,12 @@ Proof.
     eapply reg_unconstrained_satisf. eauto. 
     eapply add_equations_satisf; eauto. assumption.
     rewrite Regmap.gss. apply Val.lessdef_trans with v1'; auto. 
-    subst v. unfold v1', kind_first_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
   assert (LD3: Val.lessdef_list rs##args (reglist ls3 args2')).
   { replace (rs##args) with ((rs#dst<-v)##args). 
     eapply add_equations_lessdef; eauto. 
     apply list_map_exten; intros. rewrite Regmap.gso; auto.
-    exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
     eapply addressing_not_long; eauto. 
   }
   exploit eval_addressing_lessdef. eexact LD3.
@@ -1747,9 +1787,7 @@ Proof.
   assert (SAT4: satisf (rs#dst <- v) ls4 e0).
   { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. 
     eapply add_equations_satisf; eauto. assumption.
-    apply Val.lessdef_trans with v2'; auto. 
-    rewrite Regmap.gss. subst v. unfold v2', kind_second_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+    rewrite Regmap.gss. apply Val.lessdef_trans with v2'; auto.
   }
   exploit (exec_moves mv3); eauto. intros [ls5 [A5 B5]].
   econstructor; split.
@@ -1769,7 +1807,8 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1781,8 +1820,6 @@ Proof.
   assert (SAT2: satisf (rs#dst <- v) ls2 e0).
   { eapply parallel_assignment_satisf; eauto.
     apply Val.lessdef_trans with v1'; auto. 
-    subst v. unfold v1', kind_first_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
     eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
@@ -1799,7 +1836,8 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1812,8 +1850,6 @@ Proof.
   assert (SAT2: satisf (rs#dst <- v) ls2 e0).
   { eapply parallel_assignment_satisf; eauto.
     apply Val.lessdef_trans with v2'; auto. 
-    subst v. unfold v2', kind_second_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
     eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
@@ -1839,6 +1875,7 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
 
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
@@ -1918,19 +1955,21 @@ Proof.
   exploit analyze_successors; eauto. simpl. left; eauto. intros [enext [U V]].
   econstructor; eauto.
   econstructor; eauto.
+  inv WTI. congruence.
   intros. exploit (exec_moves mv2). eauto. eauto. 
   eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto.
   eapply add_equation_ros_satisf; eauto. 
   eapply add_equations_args_satisf; eauto. 
   congruence.
+  apply wt_regset_assign; auto. 
   intros [ls2 [A2 B2]].
   exists ls2; split. 
   eapply star_right. eexact A2. constructor. traceEq.
   apply satisf_incr with eafter; auto. 
   rewrite SIG. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H7. apply wt_regset_list; auto. 
-  simpl. red; auto.
+  simpl. red; auto. 
+  inv WTI. rewrite SIG. rewrite <- H7. apply wt_regset_list; auto. 
 
 (* tailcall *)
 - set (sg := RTL.funsig fd) in *.
@@ -1951,15 +1990,15 @@ Proof.
   eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq.
   destruct (transf_function_inv _ _ FUN); auto.
   rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H6. apply wt_regset_list; auto. 
   apply return_regs_agree_callee_save.
+  rewrite SIG. inv WTI. rewrite <- H6. apply wt_regset_list; auto. 
 
 (* builtin *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- assert (WTRS': wt_regset env (rs#res <- v)) by (eapply wt_exec_Ibuiltin; eauto).
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto.
   eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (E: map ls1 (map R args') = reglist ls1 args').
@@ -1990,7 +2029,6 @@ Proof.
 (* annot *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (v = Vundef). red in H0; inv H0. auto.
@@ -2009,6 +2047,7 @@ Proof.
   econstructor; eauto.
   change (destroyed_by_builtin (EF_annot txt typ)) with (@nil mreg). 
   simpl. subst v. assumption.
+  apply wt_regset_assign; auto. subst v. constructor. 
 
 (* cond *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
@@ -2040,43 +2079,44 @@ Proof.
 
 (* return *)
 - destruct (transf_function_inv _ _ FUN). 
-  exploit Mem.free_parallel_extends; eauto. rewrite H9. intros [m'' [P Q]].
-  destruct or as [r|]; MonadInv.
-  (* with an argument *)
-+ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]].
+  inv WTI; MonadInv.
++ (* without an argument *)
+  exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
   eapply star_right. eexact A1.
   econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto. rewrite <- H10.
-  replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
-     with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
-  eapply add_equations_res_lessdef; eauto.
-  rewrite !list_map_compose. apply list_map_exten; intros.
-  unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
+  simpl. econstructor; eauto.
+  unfold encode_long, loc_result. rewrite <- H11; rewrite H13. simpl; auto.
   apply return_regs_agree_callee_save.
-
-  (* without an argument *)
-+ assert (SG: f.(RTL.fn_sig).(sig_res) = None).
-  { exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
-    inv WTI; auto. }
+  constructor.
++ (* with an argument *)
   exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
   eapply star_right. eexact A1.
   econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto.
-  unfold encode_long, loc_result. rewrite <- H10. rewrite SG. simpl; auto.
+  simpl. econstructor; eauto. rewrite <- H11.
+  replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
+     with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
+  eapply add_equations_res_lessdef; eauto.
+  rewrite !list_map_compose. apply list_map_exten; intros.
+  unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
   apply return_regs_agree_callee_save.
+  unfold proj_sig_res. rewrite <- H11; rewrite H13. apply WTRS. 
 
 (* internal function *)
 - monadInv FUN. simpl in *.
   destruct (transf_function_inv _ _ EQ). 
-  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H7; apply Zle_refl. 
+  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. 
   intros [m'' [U V]].
+  assert (WTRS: wt_regset env (init_regs args (fn_params f))).
+  { apply wt_init_regs. inv H0. rewrite wt_params. rewrite H9. auto. }
   exploit (exec_moves mv). eauto. eauto.
     eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. 
-    rewrite call_regs_param_values. rewrite H8. eexact ARGS.
+    rewrite call_regs_param_values. rewrite H9. eexact ARGS.
+    exact WTRS.
   intros [ls1 [A B]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -2108,13 +2148,15 @@ Proof.
   exploit list_in_map_inv; eauto. intros [r [A B]]; subst l'. 
   destruct l; simpl; auto. red; intros; subst r0; elim H0.
   eapply loc_result_caller_save; eauto.
- 
+  simpl. eapply external_call_well_typed; eauto. 
+
 (* return *)
 - inv STACKS.
-  exploit STEPS; eauto. intros [ls2 [A B]].
+  exploit STEPS; eauto. rewrite WTRES0; auto. intros [ls2 [A B]].
   econstructor; split.
   eapply plus_left. constructor. eexact A. traceEq.
   econstructor; eauto.
+  apply wt_regset_assign; auto. rewrite WTRES0; auto.
 Qed.
 
 Lemma initial_states_simulation:
@@ -2135,6 +2177,7 @@ Proof.
   rewrite SIG; rewrite H3; simpl; auto.
   red; auto.
   apply Mem.extends_refl.
+  rewrite SIG, H3. constructor.
 Qed.
 
 Lemma final_states_simulation:
diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml
index 5e46d769..02c3f210 100644
--- a/backend/CMtypecheck.ml
+++ b/backend/CMtypecheck.ml
@@ -34,12 +34,16 @@ let tint = Base Tint
 let tfloat = Base Tfloat
 let tlong = Base Tlong
 let tsingle = Base Tsingle
+let tany32 = Base Tany32
+let tany64 = Base Tany64
 
 let ty_of_typ = function
   | Tint -> tint
   | Tfloat -> tfloat
   | Tlong -> tlong
-  | Tsingle -> tfloat (* should be tsingle when supported *)
+  | Tsingle -> tsingle
+  | Tany32 -> tany32
+  | Tany64 -> tany64
 
 let ty_of_sig_args tyl = List.map ty_of_typ tyl
 
@@ -88,6 +92,7 @@ let name_of_comparison = function
 let type_constant = function
   | Ointconst _ -> tint
   | Ofloatconst _ -> tfloat
+  | Osingleconst _ -> tsingle
   | Olongconst _ -> tlong
   | Oaddrsymbol _ -> tint
   | Oaddrstack _ -> tint
@@ -101,11 +106,18 @@ let type_unary_operation = function
   | Onotint -> tint, tint
   | Onegf -> tfloat, tfloat
   | Oabsf -> tfloat, tfloat
-  | Osingleoffloat -> tfloat, tfloat
+  | Onegfs -> tsingle, tsingle
+  | Oabsfs -> tsingle, tsingle
+  | Osingleoffloat -> tfloat, tsingle
+  | Ofloatofsingle -> tsingle, tfloat
   | Ointoffloat -> tfloat, tint
   | Ointuoffloat -> tfloat, tint
   | Ofloatofint -> tint, tfloat
   | Ofloatofintu -> tint, tfloat
+  | Ointofsingle -> tsingle, tint
+  | Ointuofsingle -> tsingle, tint
+  | Osingleofint -> tint, tsingle
+  | Osingleofintu -> tint, tsingle
   | Onegl -> tlong, tlong
   | Onotl -> tlong, tlong
   | Ointoflong -> tlong, tint
@@ -115,6 +127,8 @@ let type_unary_operation = function
   | Olonguoffloat -> tfloat, tlong
   | Ofloatoflong -> tlong, tfloat
   | Ofloatoflongu -> tlong, tfloat
+  | Olongofsingle -> tsingle, tlong
+  | Olonguofsingle -> tsingle, tlong
   | Osingleoflong -> tlong, tfloat
   | Osingleoflongu -> tlong, tfloat
 
@@ -136,6 +150,10 @@ let type_binary_operation = function
   | Osubf -> tfloat, tfloat, tfloat
   | Omulf -> tfloat, tfloat, tfloat
   | Odivf -> tfloat, tfloat, tfloat
+  | Oaddfs -> tsingle, tsingle, tsingle
+  | Osubfs -> tsingle, tsingle, tsingle
+  | Omulfs -> tsingle, tsingle, tsingle
+  | Odivfs -> tsingle, tsingle, tsingle
   | Oaddl -> tlong, tlong, tlong
   | Osubl -> tlong, tlong, tlong
   | Omull -> tlong, tlong, tlong
@@ -152,6 +170,7 @@ let type_binary_operation = function
   | Ocmp _ -> tint, tint, tint
   | Ocmpu _ -> tint, tint, tint
   | Ocmpf _ -> tfloat, tfloat, tint
+  | Ocmpfs _ -> tsingle, tsingle, tint
   | Ocmpl _ -> tlong, tlong, tint
   | Ocmplu _ -> tlong, tlong, tint
 
@@ -166,8 +185,10 @@ let type_chunk = function
   | Mint16unsigned -> tint
   | Mint32 -> tint
   | Mint64 -> tlong
-  | Mfloat32 -> tfloat
+  | Mfloat32 -> tsingle
   | Mfloat64 -> tfloat
+  | Many32 -> tany32
+  | Many64 -> tany64
 
 let name_of_chunk = PrintAST.name_of_chunk
 
diff --git a/backend/CSE.v b/backend/CSE.v
index 373acce8..fa229871 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -293,7 +293,6 @@ Definition store_normalized_range (chunk: memory_chunk) : aval :=
   | Mint8unsigned => Uns 8
   | Mint16signed => Sgn 16
   | Mint16unsigned => Uns 16
-  | Mfloat32 => Fsingle
   | _ => Vtop
   end.
 
@@ -380,7 +379,7 @@ Variable n: numbering.
 Fixpoint reduce_rec (niter: nat) (op: A) (args: list valnum) : option(A * list reg) :=
   match niter with
   | O => None
-  | S niter' =>
+  | Datatypes.S niter' =>
       match f (fun v => find_valnum_num v n.(num_eqs)) op args with
       | None => None
       | Some(op', args') =>
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 317fb82f..af138f83 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -497,7 +497,6 @@ Proof.
 - inv H. rewrite is_uns_zero_ext in H3 by omega. rewrite H3; auto.
 - inv H. rewrite is_sgn_sign_ext in H3 by omega. rewrite H3; auto.
 - inv H. rewrite is_uns_zero_ext in H3 by omega. rewrite H3; auto.
-- inv H. rewrite Float.singleoffloat_of_single by auto. auto.
 Qed.
 
 Lemma add_store_result_hold:
diff --git a/backend/Cminor.v b/backend/Cminor.v
index aaf75107..bf20de29 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -35,7 +35,8 @@ Require Import Switch.
 
 Inductive constant : Type :=
   | Ointconst: int -> constant     (**r integer constant *)
-  | Ofloatconst: float -> constant (**r floating-point constant *)
+  | Ofloatconst: float -> constant (**r double-precision floating-point constant *)
+  | Osingleconst: float32 -> constant (**r single-precision floating-point constant *)
   | Olongconst: int64 -> constant  (**r long integer constant *)
   | Oaddrsymbol: ident -> int -> constant (**r address of the symbol plus the offset *)
   | Oaddrstack: int -> constant.   (**r stack pointer plus the given offset *)
@@ -47,22 +48,31 @@ Inductive unary_operation : Type :=
   | Ocast16signed: unary_operation         (**r 16-bit sign extension *)
   | Onegint: unary_operation               (**r integer opposite *)
   | Onotint: unary_operation               (**r bitwise complement  *)
-  | Onegf: unary_operation                 (**r float opposite *)
-  | Oabsf: unary_operation                 (**r float absolute value *)
+  | Onegf: unary_operation                 (**r float64 opposite *)
+  | Oabsf: unary_operation                 (**r float64 absolute value *)
+  | Onegfs: unary_operation                (**r float32 opposite *)
+  | Oabsfs: unary_operation                (**r float32 absolute value *)
   | Osingleoffloat: unary_operation        (**r float truncation to float32 *)
-  | Ointoffloat: unary_operation           (**r signed integer to float *)
-  | Ointuoffloat: unary_operation          (**r unsigned integer to float *)
-  | Ofloatofint: unary_operation           (**r float to signed integer *)
-  | Ofloatofintu: unary_operation          (**r float to unsigned integer *)
+  | Ofloatofsingle: unary_operation        (**r float extension to float64 *)
+  | Ointoffloat: unary_operation           (**r signed integer to float64 *)
+  | Ointuoffloat: unary_operation          (**r unsigned integer to float64 *)
+  | Ofloatofint: unary_operation           (**r float64 to signed integer *)
+  | Ofloatofintu: unary_operation          (**r float64 to unsigned integer *)
+  | Ointofsingle: unary_operation          (**r signed integer to float32 *)
+  | Ointuofsingle: unary_operation         (**r unsigned integer to float32 *)
+  | Osingleofint: unary_operation          (**r float32 to signed integer *)
+  | Osingleofintu: unary_operation        (**r float32 to unsigned integer *)
   | Onegl: unary_operation                 (**r long integer opposite *)
   | Onotl: unary_operation                 (**r long bitwise complement *)
   | Ointoflong: unary_operation            (**r long to int *)
   | Olongofint: unary_operation            (**r signed int to long *)
   | Olongofintu: unary_operation           (**r unsigned int to long *)
-  | Olongoffloat: unary_operation          (**r float to signed long *)
-  | Olonguoffloat: unary_operation         (**r float to unsigned long *)
-  | Ofloatoflong: unary_operation          (**r signed long to float *)
-  | Ofloatoflongu: unary_operation         (**r unsigned long to float *)
+  | Olongoffloat: unary_operation          (**r float64 to signed long *)
+  | Olonguoffloat: unary_operation         (**r float64 to unsigned long *)
+  | Ofloatoflong: unary_operation          (**r signed long to float64 *)
+  | Ofloatoflongu: unary_operation         (**r unsigned long to float64 *)
+  | Olongofsingle: unary_operation         (**r float32 to signed long *)
+  | Olonguofsingle: unary_operation        (**r float32 to unsigned long *)
   | Osingleoflong: unary_operation         (**r signed long to float32 *)
   | Osingleoflongu: unary_operation.       (**r unsigned long to float32 *)
 
@@ -80,10 +90,14 @@ Inductive binary_operation : Type :=
   | Oshl: binary_operation                 (**r integer left shift *)
   | Oshr: binary_operation                 (**r integer right signed shift *)
   | Oshru: binary_operation                (**r integer right unsigned shift *)
-  | Oaddf: binary_operation                (**r float addition *)
-  | Osubf: binary_operation                (**r float subtraction *)
-  | Omulf: binary_operation                (**r float multiplication *)
-  | Odivf: binary_operation                (**r float division *)
+  | Oaddf: binary_operation                (**r float64 addition *)
+  | Osubf: binary_operation                (**r float64 subtraction *)
+  | Omulf: binary_operation                (**r float64 multiplication *)
+  | Odivf: binary_operation                (**r float64 division *)
+  | Oaddfs: binary_operation               (**r float32 addition *)
+  | Osubfs: binary_operation               (**r float32 subtraction *)
+  | Omulfs: binary_operation               (**r float32 multiplication *)
+  | Odivfs: binary_operation               (**r float32 division *)
   | Oaddl: binary_operation                (**r long addition *)
   | Osubl: binary_operation                (**r long subtraction *)
   | Omull: binary_operation                (**r long multiplication *)
@@ -99,7 +113,8 @@ Inductive binary_operation : Type :=
   | Oshrlu: binary_operation               (**r long right unsigned shift *)
   | Ocmp: comparison -> binary_operation   (**r integer signed comparison *)
   | Ocmpu: comparison -> binary_operation  (**r integer unsigned comparison *)
-  | Ocmpf: comparison -> binary_operation  (**r float comparison *)
+  | Ocmpf: comparison -> binary_operation  (**r float64 comparison *)
+  | Ocmpfs: comparison -> binary_operation (**r float32 comparison *)
   | Ocmpl: comparison -> binary_operation  (**r long signed comparison *)
   | Ocmplu: comparison -> binary_operation. (**r long unsigned comparison *)
 
@@ -240,6 +255,7 @@ Definition eval_constant (sp: val) (cst: constant) : option val :=
   match cst with
   | Ointconst n => Some (Vint n)
   | Ofloatconst n => Some (Vfloat n)
+  | Osingleconst n => Some (Vsingle n)
   | Olongconst n => Some (Vlong n)
   | Oaddrsymbol s ofs =>
       Some(match Genv.find_symbol ge s with
@@ -258,11 +274,18 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
   | Onotint => Some (Val.notint arg)
   | Onegf => Some (Val.negf arg)
   | Oabsf => Some (Val.absf arg)
+  | Onegfs => Some (Val.negfs arg)
+  | Oabsfs => Some (Val.absfs arg)
   | Osingleoffloat => Some (Val.singleoffloat arg)
+  | Ofloatofsingle => Some (Val.floatofsingle arg)
   | Ointoffloat => Val.intoffloat arg
   | Ointuoffloat => Val.intuoffloat arg
   | Ofloatofint => Val.floatofint arg
   | Ofloatofintu => Val.floatofintu arg
+  | Ointofsingle => Val.intofsingle arg
+  | Ointuofsingle => Val.intuofsingle arg
+  | Osingleofint => Val.singleofint arg
+  | Osingleofintu => Val.singleofintu arg
   | Onegl => Some (Val.negl arg)
   | Onotl => Some (Val.notl arg)
   | Ointoflong => Some (Val.loword arg)
@@ -272,6 +295,8 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
   | Olonguoffloat => Val.longuoffloat arg
   | Ofloatoflong => Val.floatoflong arg
   | Ofloatoflongu => Val.floatoflongu arg
+  | Olongofsingle => Val.longofsingle arg
+  | Olonguofsingle => Val.longuofsingle arg
   | Osingleoflong => Val.singleoflong arg
   | Osingleoflongu => Val.singleoflongu arg
   end.
@@ -296,6 +321,10 @@ Definition eval_binop
   | Osubf => Some (Val.subf arg1 arg2)
   | Omulf => Some (Val.mulf arg1 arg2)
   | Odivf => Some (Val.divf arg1 arg2)
+  | Oaddfs => Some (Val.addfs arg1 arg2)
+  | Osubfs => Some (Val.subfs arg1 arg2)
+  | Omulfs => Some (Val.mulfs arg1 arg2)
+  | Odivfs => Some (Val.divfs arg1 arg2)
   | Oaddl => Some (Val.addl arg1 arg2)
   | Osubl => Some (Val.subl arg1 arg2)
   | Omull => Some (Val.mull arg1 arg2)
@@ -312,6 +341,7 @@ Definition eval_binop
   | Ocmp c => Some (Val.cmp c arg1 arg2)
   | Ocmpu c => Some (Val.cmpu (Mem.valid_pointer m) c arg1 arg2)
   | Ocmpf c => Some (Val.cmpf c arg1 arg2)
+  | Ocmpfs c => Some (Val.cmpfs c arg1 arg2)
   | Ocmpl c => Val.cmpl c arg1 arg2
   | Ocmplu c => Val.cmplu c arg1 arg2
   end.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index d2c4d447..bdfc4b17 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -75,6 +75,7 @@ Definition const_for_result (a: aval) : option operation :=
   match a with
   | I n => Some(Ointconst n)
   | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
+  | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
   | Ptr(Gl symb ofs) => Some(Oaddrsymbol symb ofs)
   | Ptr(Stk ofs) => Some(Oaddrstack ofs)
   | _ => None
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index ecae5dc6..b79c721e 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -159,6 +159,8 @@ Proof.
   inv H. inv H0. exists (Vint n); auto.
 - (* float *)
   destruct (Compopts.generate_float_constants tt); inv H. inv H0. exists (Vfloat f); auto.
+- (* single *)
+  destruct (Compopts.generate_float_constants tt); inv H. inv H0. exists (Vsingle f); auto.
 - (* pointer *)
   destruct p; try discriminate.
   + (* global *)
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 6cb17e39..dcd8624a 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -35,7 +35,7 @@ type node =
   { ident: int;                         (*r unique identifier *)
     typ: typ;                           (*r its type *)
     var: var;                           (*r the XTL variable it comes from *)
-    regclass: int;                      (*r identifier of register class *)
+    mutable regclass: int;              (*r identifier of register class *)
     mutable accesses: int;              (*r number of defs and uses *)
     mutable spillcost: float;           (*r estimated cost of spilling *)
     mutable adjlist: node list;         (*r all nodes it interferes with *)
@@ -239,15 +239,20 @@ type graph = {
 
 (* Register classes and reserved registers *)
 
-let num_register_classes = 2
+(* We have two main register classes:
+     0 for integer registers
+     1 for floating-point registers
+   plus a third pseudo-class 2 that has no registers and forces
+   stack allocation.  XTL variables are mapped to classes 0 and 1
+   according to their types.  A variable can be forced into class 2
+   by giving it a negative spill cost. *)
 
 let class_of_type = function
-  | Tint -> 0
-  | Tfloat | Tsingle -> 1
+  | Tint | Tany32 -> 0
+  | Tfloat | Tsingle | Tany64 -> 1
   | Tlong -> assert false
 
-let type_of_class c =
-  if c = 0 then Tint else Tfloat
+let no_spill_class = 2
 
 let reserved_registers = ref ([]: mreg list)
 
@@ -267,14 +272,19 @@ let init costs =
   and float_callee_save = remove_reserved float_callee_save_regs in
   {
     caller_save_registers =
-      [| Array.of_list int_caller_save; Array.of_list float_caller_save |];
+      [| Array.of_list int_caller_save;
+         Array.of_list float_caller_save;
+         [||] |];
     callee_save_registers =
-      [| Array.of_list int_callee_save; Array.of_list float_callee_save |];
+      [| Array.of_list int_callee_save;
+         Array.of_list float_callee_save;
+         [||] |];
     num_available_registers =
       [| List.length int_caller_save + List.length int_callee_save;
-         List.length float_caller_save + List.length float_callee_save |];
+         List.length float_caller_save + List.length float_callee_save;
+         0 |];
     start_points =
-      [| 0; 0 |];
+      [| 0; 0; 0 |];
     allocatable_registers =
       int_caller_save @ int_callee_save @ float_caller_save @ float_callee_save;
     stats_of_reg = costs;
@@ -303,10 +313,13 @@ let newNodeOfReg g r ty =
   let st = g.stats_of_reg r in
   g.nextIdent <- g.nextIdent + 1;
   { ident = g.nextIdent; typ = ty; 
-    var = V(r, ty); regclass = class_of_type ty;
+    var = V(r, ty);
+    regclass = if st.cost >= 0 then class_of_type ty else no_spill_class;
     accesses = st.usedefs;
     spillcost = weightedSpillCost st;
-    adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+    adjlist = [];
+    degree = if st.cost >= 0 then 0 else 1;
+    movelist = []; extra_adj = []; extra_pref = [];
     alias = None;
     color = None;
     nstate = Initial;
@@ -382,11 +395,19 @@ let recordExtraPref n1 n2 =
       mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
   n1.extra_pref <- m :: n1.extra_pref
 
+let recordExtraPref2 n1 n2 =
+  let m =
+    { src = n1; dst = n2; mstate = FrozenMoves;
+      mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+  n1.extra_pref <- m :: n1.extra_pref;
+  n2.extra_pref <- m :: n2.extra_pref
+
 let addMovePref g n1 n2 =
-  assert (n1.regclass = n2.regclass);
   match n1.color, n2.color with
   | None, None ->
-      recordMove g n1 n2
+      if n1.regclass = n2.regclass
+      then recordMove g n1 n2
+      else recordExtraPref2 n1 n2
   | Some (R mr1), None ->
       if List.mem mr1 g.allocatable_registers then recordMove g n1 n2
   | None, Some (R mr2) ->
@@ -866,7 +887,7 @@ let assign_color g n =
           n.color <- Some loc
       | None ->
           (* Last, pick a Local stack slot *)
-          n.color <- Some (find_slot slot_conflicts (type_of_class n.regclass))
+          n.color <- Some (find_slot slot_conflicts n.typ)
 
 (* Extract the location of a variable *)
 
@@ -884,7 +905,7 @@ let location_of_var g v =
         match ty with
         | Tint -> R dummy_int_reg
         | Tfloat | Tsingle -> R dummy_float_reg
-        | Tlong -> assert false
+        | Tlong | Tany32 | Tany64 -> assert false
 
 (* The exported interface *)
 
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index 8e20442d..2564a736 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -691,6 +691,7 @@ Proof.
   apply Zone_divide.
   apply Zone_divide.
   apply H2; omega.
+  apply H2; omega.
 Qed.
 
 (** Preservation by external calls *)
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index b08fe878..ccf17e1e 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -12,15 +12,12 @@
 
 (** Type-checking Linear code. *)
 
-Require Import FSets.
-Require FSetAVL.
 Require Import Coqlib.
-Require Import Ordered.
-Require Import Maps.
-Require Import Iteration.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
 Require Import Events.
 Require Import Op.
 Require Import Machregs.
@@ -29,172 +26,20 @@ Require Import Conventions.
 Require Import LTL.
 Require Import Linear.
 
-(** The typechecker for Linear enforce several properties useful for
-  the proof of the [Stacking] pass:
-- for each instruction, the type of the result register or slot
-  agrees with the type of values produced by the instruction;
-- correctness conditions on the offsets of stack slots
-  accessed through [Lgetstack] and [Lsetstack] Linear instructions.
-*)
-
-(** * Tracking the flow of single-precision floats *)
-
-Module Locset := FSetAVL.Make(OrderedLoc).
-
-(** At each program point, we infer a set of locations that are
-  guaranteed to contain single-precision floats.  [None] means
-  that the program point is not reachable. *)
-
-Definition singlefloats := option Locset.t.
-
-Definition FSbot : singlefloats := None.
-Definition FStop : singlefloats := Some Locset.empty.
-
-Definition setreg (fs: singlefloats) (r: mreg) (t: typ) :=
-  match fs with
-  | None => None
-  | Some s =>
-      Some(if typ_eq t Tsingle then Locset.add (R r) s else Locset.remove (R r) s)
-  end.
-
-Fixpoint setregs (fs: singlefloats) (rl: list mreg) (tl: list typ) :=
-  match rl, tl with
-  | nil, nil => fs
-  | r1 :: rs, t1 :: ts => setregs (setreg fs r1 t1) rs ts
-  | _, _ => fs
-  end.
-
-Definition copyloc (fs: singlefloats) (dst src: loc) :=
-  match fs with
-  | None => None
-  | Some s =>
-      Some(if Locset.mem src s then Locset.add dst s else Locset.remove dst s)
-  end.
-
-Definition destroyed_at_call_regs :=
-  List.fold_right (fun r fs => Locset.add (R r) fs) Locset.empty destroyed_at_call.
-
-Definition callregs (fs: singlefloats) :=
-  match fs with
-  | None => None
-  | Some s => Some (Locset.diff s destroyed_at_call_regs)
-  end.
-
-(** The forward dataflow analysis below records [singlefloats] sets
-  at every label.  Sets at other program points are recomputed when
-  needed. *)
-
-Definition labelmap := PTree.t Locset.t.
-
-(** [update_label lbl fs lm] updates the label map [lm] to reflect the
-  fact that the [singlefloats] set [fs] can flow into label [lbl].
-  It returns the set after the label, an updated label map, and a
-  boolean indicating whether the label map changed. *)
-
-Definition update_label (lbl: label) (fs: singlefloats) (lm: labelmap) : 
-                                              singlefloats * labelmap * bool :=
-  match fs, PTree.get lbl lm with
-  | None, None => (None, lm, false)
-  | None, Some s => (Some s, lm, false)
-  | Some s, None => (Some s, PTree.set lbl s lm, true)
-  | Some s1, Some s2 =>
-      if Locset.subset s2 s1
-      then (Some s2, lm, false)
-      else let s := Locset.inter s1 s2 in (Some s, PTree.set lbl s lm, true)
-  end.
-
-(** [update_labels] is similar, for a list of labels (coming from a 
-    [Ljumptable] instruction). *)
-
-Fixpoint update_labels (lbls: list label) (fs: singlefloats) (lm: labelmap): labelmap * bool :=
-  match lbls with
-  | nil => (lm, false)
-  | lbl1 :: lbls =>
-      let '(_, lm1, changed1) := update_label lbl1 fs lm in
-      let '(lm2, changed2) := update_labels lbls fs lm1 in
-      (lm2, changed1 || changed2)
-  end.
-
-(** One pass of forward analysis over the code [c].  Returns an updated
-  label map and a boolean indicating whether the label map changed. *)
-
-Fixpoint ana_code (lm: labelmap) (ch: bool) (fs: singlefloats) (c: code) : labelmap * bool :=
-  match c with
-  | nil => (lm, ch)
-  | Lgetstack sl ofs ty rd :: c =>
-      ana_code lm ch (copyloc fs (R rd) (S sl ofs ty)) c
-  | Lsetstack rs sl ofs ty :: c =>
-      ana_code lm ch (copyloc fs (S sl ofs ty) (R rs)) c
-  | Lop op args dst :: c =>
-      match is_move_operation op args with
-      | Some src => ana_code lm ch (copyloc fs (R dst) (R src)) c
-      | None     => ana_code lm ch (setreg fs dst (snd (type_of_operation op))) c
-      end
-  | Lload chunk addr args dst :: c =>
-      ana_code lm ch (setreg fs dst (type_of_chunk chunk)) c
-  | Lstore chunk addr args src :: c =>
-      ana_code lm ch fs c
-  | Lcall sg ros :: c =>
-      ana_code lm ch (callregs fs) c
-  | Ltailcall sg ros :: c =>
-      ana_code lm ch None c
-  | Lbuiltin ef args res :: c =>
-      ana_code lm ch (setregs fs res (proj_sig_res' (ef_sig ef))) c
-  | Lannot ef args :: c =>
-      ana_code lm ch fs c
-  | Llabel lbl :: c =>
-      let '(fs1, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) fs1 c
-  | Lgoto lbl :: c =>
-      let '(_, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) None c
-  | Lcond cond args lbl :: c =>
-      let '(_, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) fs c
-  | Ljumptable r lbls :: c =>
-      let '(lm1, ch1) := update_labels lbls fs lm in
-      ana_code lm1 (ch || ch1) None c
-  | Lreturn :: c =>
-      ana_code lm ch None c
-  end.
-
-(** Iterating [ana_code] until the label map is stable. *)
-
-Definition ana_iter (c: code) (lm: labelmap) : labelmap + labelmap :=
-  let '(lm1, ch) := ana_code lm false FStop c in
-  if ch then inr _ lm1 else inl _ lm.
-
-Definition ana_function (f: function): option labelmap :=
-  PrimIter.iterate _ _ (ana_iter f.(fn_code)) (PTree.empty _).
-
-(** * The type-checker *)
-
-(** The typing rules are presented as boolean-valued functions so that we
+(** The rules are presented as boolean-valued functions so that we
   get an executable type-checker for free. *)
 
 Section WT_INSTR.
 
 Variable funct: function.
 
-Variable lm: labelmap.
-
-Definition FSmem (l: loc) (fs: singlefloats) : bool :=
-  match fs with None => true | Some s => Locset.mem l s end.
-
-Definition loc_type (fs: singlefloats) (l: loc) :=
-  let ty := Loc.type l in
-  if typ_eq ty Tfloat && FSmem l fs then Tsingle else ty.
-
 Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool :=
   match sl with
   | Local => zle 0 ofs
   | Outgoing => zle 0 ofs
   | Incoming => In_dec Loc.eq (S Incoming ofs ty) (loc_parameters funct.(fn_sig))
   end &&
-  match ty with
-  | Tint | Tfloat | Tsingle => true
-  | Tlong => false
-  end.
+  match ty with Tlong => false | _ => true end.
 
 Definition slot_writable (sl: slot) : bool :=
   match sl with
@@ -210,475 +55,169 @@ Definition loc_valid (l: loc) : bool :=
   | S _ _ _ => false
   end.
 
-Fixpoint wt_code (fs: singlefloats) (c: code) : bool :=
-  match c with
-  | nil => true
-  | Lgetstack sl ofs ty rd :: c =>
-      subtype ty (mreg_type rd) &&
-      slot_valid sl ofs ty &&
-      wt_code (copyloc fs (R rd) (S sl ofs ty)) c
-  | Lsetstack rs sl ofs ty :: c =>
-      subtype (loc_type fs (R rs)) ty &&
-      slot_valid sl ofs ty && slot_writable sl &&
-      wt_code (copyloc fs (S sl ofs ty) (R rs)) c
-  | Lop op args dst :: c =>
+Definition wt_instr (i: instruction) : bool :=
+  match i with
+  | Lgetstack sl ofs ty r =>
+      subtype ty (mreg_type r) && slot_valid sl ofs ty
+  | Lsetstack r sl ofs ty =>
+      slot_valid sl ofs ty && slot_writable sl
+  | Lop op args res =>
       match is_move_operation op args with
-      | Some src =>
-          typ_eq (mreg_type src) (mreg_type dst) &&
-          wt_code (copyloc fs (R dst) (R src)) c
-      | None =>
-          let (ty_args, ty_res) := type_of_operation op in
-          subtype ty_res (mreg_type dst) &&
-          wt_code (setreg fs dst ty_res) c
+      | Some arg =>
+          subtype (mreg_type arg) (mreg_type res)
+      | None => 
+          let (targs, tres) := type_of_operation op in
+          subtype tres (mreg_type res)
       end
-  | Lload chunk addr args dst :: c =>
-      subtype (type_of_chunk chunk) (mreg_type dst) &&
-      wt_code (setreg fs dst (type_of_chunk chunk)) c
-  | Lstore chunk addr args src :: c =>
-      wt_code fs c
-  | Lcall sg ros :: c =>
-      wt_code (callregs fs) c
-  | Ltailcall sg ros :: c =>
-      zeq (size_arguments sg) 0 &&
-      wt_code None c
-  | Lbuiltin ef args res :: c =>
-      let ty_res := proj_sig_res' (ef_sig ef) in
-      subtype_list ty_res (map mreg_type res) &&
-      wt_code (setregs fs res ty_res) c
-  | Lannot ef args :: c =>
-      forallb loc_valid args &&
-      wt_code fs c
-  | Llabel lbl :: c =>
-      wt_code lm!lbl c
-  | Lgoto lbl :: c =>
-      wt_code None c
-  | Lcond cond args lbl :: c =>
-      wt_code fs c
-  | Ljumptable r lbls :: c =>
-      wt_code None c
-  | Lreturn :: c =>
-      wt_code None c
+  | Lload chunk addr args dst =>
+      subtype (type_of_chunk chunk) (mreg_type dst)
+  | Ltailcall sg ros =>
+      zeq (size_arguments sg) 0
+  | Lbuiltin ef args res =>
+      subtype_list (proj_sig_res' (ef_sig ef)) (map mreg_type res) 
+  | Lannot ef args =>
+      forallb loc_valid args
+  | _ =>
+      true
   end.
 
 End WT_INSTR.
 
-Definition wt_funcode (f: function) (lm: labelmap) : bool :=
-  wt_code f lm FStop f.(fn_code).
+Definition wt_code (f: function) (c: code) : bool :=
+  forallb (wt_instr f) c.
 
 Definition wt_function (f: function) : bool :=
-  match ana_function f with
-  | None => false
-  | Some lm => wt_funcode f lm
-  end.
-
-(** * Properties of the static analysis *)
-
-Inductive FSincl: singlefloats -> singlefloats -> Prop :=
-  | FSincl_none: forall fs,
-      FSincl None fs
-  | FSincl_subset: forall s1 s2,
-      Locset.Subset s2 s1 -> FSincl (Some s1) (Some s2).
-
-Lemma update_label_false:
-  forall lbl fs lm fs' lm',
-  update_label lbl fs lm = (fs', lm', false) ->
-  FSincl fs lm!lbl /\ fs' = lm!lbl /\ lm' = lm.
-Proof.
-  unfold update_label; intros. 
-  destruct fs as [s1|]; destruct lm!lbl as [s2|].
-- destruct (Locset.subset s2 s1) eqn:S; inv H.
-  intuition. constructor. apply Locset.subset_2; auto.
-- inv H.
-- inv H. intuition. constructor. 
-- inv H. intuition. constructor.
-Qed.
-
-Lemma update_labels_false:
-  forall fs lbls lm lm',
-  update_labels lbls fs lm = (lm', false) ->
-  (forall lbl, In lbl lbls -> FSincl fs lm!lbl) /\ lm' = lm.
-Proof.
-  induction lbls; simpl; intros. 
-- inv H. tauto. 
-- destruct (update_label a fs lm) as [[fs1 lm1] changed1] eqn:UL.
-  destruct (update_labels lbls fs lm1) as [lm2 changed2] eqn:ULS.
-  inv H. apply orb_false_iff in H2. destruct H2; subst.
-  exploit update_label_false; eauto. intros (A & B & C).
-  exploit IHlbls; eauto. intros (D & E). subst.
-  split. intros. destruct H. congruence. auto.
-  auto.
-Qed.
-
-Lemma ana_code_false:
-  forall lm' c lm ch fs, ana_code lm ch fs c = (lm', false) -> ch = false /\ lm' = lm.
-Proof.
-  induction c; simpl; intros.
-- inv H; auto. 
-- destruct a; try (eapply IHc; eauto; fail).
-  + destruct (is_move_operation o l); eapply IHc; eauto.
-  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E).
-    auto.
-  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E). 
-    auto.
-  + destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E). 
-    auto.
-  + destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_labels_false; eauto. intros (C & D); subst.
-    auto.
-Qed.
-
-Lemma ana_function_inv:
-  forall f lm,
-  ana_function f = Some lm -> ana_code lm false FStop f.(fn_code) = (lm, false).
-Proof.
-  intros. unfold ana_function in H.
-  eapply PrimIter.iterate_prop with
-    (Q := fun lm => ana_code lm false FStop (fn_code f) = (lm, false))
-    (P := fun (lm: labelmap) => True); eauto.
-  intros. unfold ana_iter.
-  destruct (ana_code a false FStop (fn_code f)) as (lm1, ch1) eqn:ANA.
-  destruct ch1. auto. exploit ana_code_false; eauto. intros [A B]. congruence. 
-Qed.
-
-Remark wt_ana_code_cons:
-  forall f lm fs i c,
-  ana_code lm false fs (i :: c) = (lm, false) ->
-  wt_code f lm fs (i :: c) = true ->
-  exists fs', ana_code lm false fs' c = (lm, false) /\ wt_code f lm fs' c = true.
-Proof.
-  simpl; intros; destruct i; InvBooleans; try (econstructor; split; eassumption).
-- destruct (is_move_operation o l).
-  InvBooleans; econstructor; eauto.
-  destruct (type_of_operation o); InvBooleans; econstructor; eauto.
-- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (C & D & E); subst.
-  econstructor; eauto.
-- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-- destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-- destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-Qed.
-
-Lemma wt_find_label_rec:
-  forall f lm lbl c' c fs,
-  find_label lbl c = Some c' ->
-  ana_code lm false fs c = (lm, false) ->
-  wt_code f lm fs c = true ->
-  ana_code lm false (PTree.get lbl lm) c' = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c' = true.
-Proof.
-  induction c; intros.
-- discriminate.
-- simpl in H. specialize (is_label_correct lbl a). destruct (is_label lbl a); intros IL.
-  + subst a. inv H. simpl in *. 
-    destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit ana_code_false; eauto. intros [A B]; subst.
-    exploit update_label_false; eauto. intros (C & D & E). subst.
-    InvBooleans. auto.
-  + exploit wt_ana_code_cons; eauto. intros (fs' & A & B).
-    eapply IHc; eauto. 
-Qed.
+  wt_code f f.(fn_code).
 
-Lemma wt_find_label:
-  forall f lm lbl c,
-  ana_function f = Some lm ->
-  wt_funcode f lm = true ->
-  find_label lbl f.(fn_code) = Some c ->
-  ana_code lm false (PTree.get lbl lm) c = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c = true.
-Proof.
-  intros. eapply wt_find_label_rec; eauto. apply ana_function_inv; auto. 
-Qed.
+(** Typing the run-time state. *)
 
-(** * Soundness of the type system *)
-
-Require Import Values.
-Require Import Globalenvs.
-Require Import Memory.
-
-Module LSF := FSetFacts.Facts(Locset).
-
-(** ** Typing the run-time state *)
-
-Inductive wt_locset: singlefloats -> locset -> Prop :=
-  | wt_locset_intro: forall s ls
-      (TY: forall l, Val.has_type (ls l) (Loc.type l))
-      (SINGLE: forall l, Locset.In l s -> Val.has_type (ls l) Tsingle),
-    wt_locset (Some s) ls.
-
-Lemma wt_mreg:
-  forall fs ls r, wt_locset fs ls -> Val.has_type (ls (R r)) (mreg_type r).
-Proof.
-  intros. inv H. apply (TY (R r)). 
-Qed.
-
-Lemma wt_loc_type:
-  forall fs ls l, wt_locset fs ls -> Val.has_type (ls l) (loc_type fs l).
-Proof.
-  intros. inv H. unfold loc_type, FSmem.
-  destruct (typ_eq (Loc.type l) Tfloat); simpl; auto.
-  destruct (Locset.mem l s) eqn:MEM; auto. 
-  apply SINGLE. apply Locset.mem_2; auto.
-Qed.
-
-Lemma loc_type_subtype:
-  forall fs l, subtype (loc_type fs l) (Loc.type l) = true.
-Proof.
-  unfold loc_type; intros. destruct (typ_eq (Loc.type l) Tfloat); simpl.
-  rewrite e. destruct (FSmem l fs); auto. 
-  destruct (Loc.type l); auto.
-Qed.
-
-Lemma wt_locset_top:
-  forall ls,
-  (forall l, Val.has_type (ls l) (Loc.type l)) -> 
-  wt_locset FStop ls.
-Proof.
-  intros; constructor; intros. 
-  auto.
-  eelim Locset.empty_1; eauto.
-Qed.
-
-Lemma wt_locset_mon:
-  forall fs1 fs2 ls,
-  FSincl fs1 fs2 -> wt_locset fs1 ls -> wt_locset fs2 ls.
-Proof.
-  intros. inv H0; inv H. constructor; intros; auto. 
-Qed.
+Definition wt_locset (ls: locset) : Prop :=
+  forall l, Val.has_type (ls l) (Loc.type l).
 
 Lemma wt_setreg:
-  forall fs ls r v ty,
-  Val.has_type v ty -> subtype ty (mreg_type r) = true -> wt_locset fs ls ->
-  wt_locset (setreg fs r ty) (Locmap.set (R r) v ls).
-Proof.
-  intros. inv H1. constructor; intros. 
-- unfold Locmap.set. destruct (Loc.eq (R r) l).
-  subst l; simpl. eapply Val.has_subtype; eauto.
-  destruct (Loc.diff_dec (R r) l); simpl; auto.
-- unfold Locmap.set. destruct (Loc.eq (R r) l).
-  destruct (typ_eq ty Tsingle). congruence. 
-  subst l. rewrite LSF.remove_iff in H1. intuition.
-  destruct (Loc.diff_dec (R r) l); simpl; auto.
-  apply SINGLE. 
-  destruct (typ_eq ty Tsingle).
-  rewrite LSF.add_iff in H1; intuition.
-  rewrite LSF.remove_iff in H1; intuition.
-Qed.
-
-Lemma wt_setregs:
-  forall vl tyl rl fs rs,
-  Val.has_type_list vl tyl ->
-  subtype_list tyl (map mreg_type rl) = true ->
-  wt_locset fs rs ->
-  wt_locset (setregs fs rl tyl) (Locmap.setlist (map R rl) vl rs).
-Proof.
-  induction vl; simpl; intros.
-- destruct tyl; try contradiction. destruct rl; try discriminate. 
-  simpl. auto.
-- destruct tyl as [ | ty tyl]; try contradiction. destruct H.
-  destruct rl as [ | r rl]; simpl in H0; try discriminate. InvBooleans.
-  simpl. eapply IHvl; eauto. eapply wt_setreg; eauto. 
-Qed.
-
-Lemma undef_regs_type:
-  forall ty l rl ls,
-  Val.has_type (ls l) ty -> Val.has_type (undef_regs rl ls l) ty.
+  forall ls r v,
+  Val.has_type v (mreg_type r) -> wt_locset ls -> wt_locset (Locmap.set (R r) v ls).
 Proof.
-  induction rl; simpl; intros.
-- auto.
-- unfold Locmap.set. destruct (Loc.eq (R a) l). red; auto. 
-  destruct (Loc.diff_dec (R a) l); auto. red; auto.
+  intros; red; intros.
+  unfold Locmap.set. 
+  destruct (Loc.eq (R r) l).
+  subst l; auto.
+  destruct (Loc.diff_dec (R r) l). auto. red. auto.
 Qed.
 
-Lemma wt_copyloc_gen:
-  forall fs ls src dst temps,
-  Val.has_type (ls src) (Loc.type dst) ->
-  wt_locset fs ls ->
-  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
+Lemma wt_setstack:
+  forall ls sl ofs ty v,
+  wt_locset ls -> wt_locset (Locmap.set (S sl ofs ty) v ls).
 Proof.
-  intros. inversion H0; subst. constructor; intros. 
-- unfold Locmap.set. destruct (Loc.eq dst l).
-  subst l. auto.
-  destruct (Loc.diff_dec dst l); simpl; auto.
-  apply undef_regs_type; auto.
-- unfold Locmap.set. destruct (Loc.eq dst l).
-  subst l. destruct (Locset.mem src s) eqn:E.
-  apply SINGLE. apply Locset.mem_2; auto. 
-  rewrite LSF.remove_iff in H1. intuition.
-  destruct (Loc.diff_dec dst l); simpl; auto.
-  apply undef_regs_type; auto.
-  apply SINGLE.
-  destruct (Locset.mem src s).
-  rewrite LSF.add_iff in H1. intuition.
-  rewrite LSF.remove_iff in H1. intuition.
-Qed.
-
-Lemma wt_copyloc:
-  forall fs ls src dst temps,
-  subtype (Loc.type src) (Loc.type dst) = true ->
-  wt_locset fs ls ->
-  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
-Proof.
-  intros. eapply wt_copyloc_gen; eauto.
-  eapply Val.has_subtype; eauto. inv H0; auto. 
+  intros; red; intros.
+  unfold Locmap.set. 
+  destruct (Loc.eq (S sl ofs ty) l).
+  subst l. simpl. 
+  generalize (Val.load_result_type (chunk_of_type ty) v). 
+  replace (type_of_chunk (chunk_of_type ty)) with ty. auto.
+  destruct ty; reflexivity.
+  destruct (Loc.diff_dec (S sl ofs ty) l). auto. red. auto.
 Qed.
 
 Lemma wt_undef_regs:
-  forall fs ls temps, wt_locset fs ls -> wt_locset fs (undef_regs temps ls).
+  forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls).
 Proof.
-  intros. inv H; constructor; intros.
-- apply undef_regs_type; auto. 
-- apply undef_regs_type; auto. 
+  induction rs; simpl; intros. auto. apply wt_setreg; auto. red; auto.
 Qed.
 
 Lemma wt_call_regs:
-  forall fs ls, wt_locset fs ls -> wt_locset FStop (call_regs ls).
-Proof.
-  intros. inv H. apply wt_locset_top; intros. 
-  unfold call_regs. 
-  destruct l; auto. 
-  destruct sl; try exact I. 
-  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)); auto.
-Qed.
-
-Remark destroyed_at_call_regs_charact:
-  forall l,
-  Locset.In l destroyed_at_call_regs <->
-  match l with R r => In r destroyed_at_call | S _ _ _ => False end.
+  forall ls, wt_locset ls -> wt_locset (call_regs ls).
 Proof.
-  intros. unfold destroyed_at_call_regs. generalize destroyed_at_call. 
-  induction l0; simpl.
-- rewrite LSF.empty_iff. destruct l; tauto.
-- rewrite LSF.add_iff. rewrite IHl0. destruct l; intuition congruence.
+  intros; red; intros. unfold call_regs. destruct l. auto.
+  destruct sl.
+  red; auto.
+  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)). auto.
+  red; auto.
 Qed.
 
 Lemma wt_return_regs:
-  forall fs caller fs' callee,
-  wt_locset fs caller -> wt_locset fs' callee ->
-  wt_locset (callregs fs) (return_regs caller callee).
+  forall caller callee,
+  wt_locset caller -> wt_locset callee -> wt_locset (return_regs caller callee).
 Proof.
-  intros. inv H; inv H0; constructor; intros.
-- unfold return_regs. destruct l; auto. 
+  intros; red; intros.
+  unfold return_regs. destruct l; auto.
   destruct (in_dec mreg_eq r destroyed_at_call); auto.
-- unfold callregs. 
-  rewrite LSF.diff_iff in H. rewrite destroyed_at_call_regs_charact in H. destruct H.
-  unfold return_regs. destruct l.
-+ destruct (in_dec mreg_eq r destroyed_at_call). tauto. auto. 
-+ auto.
 Qed.
 
 Lemma wt_init:
-  forall s, wt_locset (Some s) (Locmap.init Vundef).
+  wt_locset (Locmap.init Vundef).
 Proof.
-  intros; constructor; intros; simpl; auto.
+  red; intros. unfold Locmap.init. red; auto.
 Qed.
 
-Lemma callregs_setregs_result:
-  forall sg fs,
-  FSincl (setregs fs (loc_result sg) (proj_sig_res' sg)) (callregs fs).
+Lemma wt_setlist:
+  forall vl rl rs,
+  Val.has_type_list vl (map mreg_type rl) ->
+  wt_locset rs ->
+  wt_locset (Locmap.setlist (map R rl) vl rs).
 Proof.
-  assert (X: forall rl tyl, setregs None rl tyl = None).
-  {
-    induction rl; destruct tyl; simpl; auto. 
-  }
-  assert (Y: forall rl s tyl,
-             exists s', setregs (Some s) rl tyl = Some s'
-                     /\ forall l, Locset.In l s -> ~In l (map R rl) -> Locset.In l s').
-  {
-    induction rl; simpl; intros.
-  - exists s; split. destruct tyl; auto. tauto.
-  - destruct tyl. exists s; tauto. 
-    destruct (IHrl (if typ_eq t Tsingle
-                    then Locset.add (R a) s
-                    else Locset.remove (R a) s) tyl) as (s1 & A & B).
-    exists s1; split; auto. intros. apply B.
-    destruct (typ_eq t Tsingle). 
-    rewrite LSF.add_iff. tauto. 
-    rewrite LSF.remove_iff. tauto.
-    tauto.
-  }
-  intros. destruct fs as [s|]; simpl.
-  - destruct (Y (loc_result sg) s (proj_sig_res' sg)) as (s' & A & B).
-    rewrite A. constructor. red; intros.
-    rewrite LSF.diff_iff in H. destruct H. apply B. auto. 
-    red; intros. exploit list_in_map_inv; eauto. intros (r & U & V).
-    subst a. elim H0. rewrite destroyed_at_call_regs_charact. 
-    eapply loc_result_caller_save; eauto.
-  - rewrite X. constructor.
+  induction vl; destruct rl; simpl; intros; try contradiction.
+  auto.
+  destruct H. apply IHvl; auto. apply wt_setreg; auto. 
+Qed.
+
+Lemma wt_find_label:
+  forall f lbl c,
+  wt_function f = true ->
+  find_label lbl f.(fn_code) = Some c ->
+  wt_code f c = true.
+Proof.
+  unfold wt_function; intros until c. generalize (fn_code f). induction c0; simpl; intros.
+  discriminate.
+  InvBooleans. destruct (is_label lbl a). 
+  congruence.
+  auto.
 Qed.
 
+(** Soundness of the type system *)
+
 Definition wt_fundef (fd: fundef) :=
   match fd with
   | Internal f => wt_function f = true
   | External ef => True
   end.
 
-Inductive wt_callstack: list stackframe -> singlefloats -> Prop :=
-  | wt_callstack_nil: forall s,
-      wt_callstack nil (Some s)
-  | wt_callstack_cons: forall f sp rs c s fs lm fs0 fs1
-        (WTSTK: wt_callstack s fs0)
-        (ANF: ana_function f = Some lm)
-        (WTF: wt_funcode f lm = true)
-        (ANC: ana_code lm false (callregs fs1) c = (lm, false))
-        (WTC: wt_code f lm (callregs fs1) c = true)
-        (WTRS: wt_locset fs rs)
-        (INCL: FSincl (callregs fs) (callregs fs1)),
-      wt_callstack (Stackframe f sp rs c :: s) fs.
+Inductive wt_callstack: list stackframe -> Prop :=
+  | wt_callstack_nil:
+      wt_callstack nil
+  | wt_callstack_cons: forall f sp rs c s
+        (WTSTK: wt_callstack s)
+        (WTF: wt_function f = true)
+        (WTC: wt_code f c = true)
+        (WTRS: wt_locset rs),
+      wt_callstack (Stackframe f sp rs c :: s).
 
 Lemma wt_parent_locset:
-  forall s fs, wt_callstack s fs -> wt_locset fs (parent_locset s).
+  forall s, wt_callstack s -> wt_locset (parent_locset s).
 Proof.
   induction 1; simpl.
 - apply wt_init.
 - auto.
 Qed.
 
-Lemma wt_callstack_change_fs:
-  forall s fs, wt_callstack s fs -> wt_callstack s (callregs fs).
-Proof.
-  induction 1.
-- constructor. 
-- econstructor; eauto. 
-  apply wt_locset_mon with fs; auto.
-  + destruct fs; simpl; constructor.
-    red; intros. eapply Locset.diff_1; eauto. 
-  + inv INCL; simpl; constructor.
-    destruct fs; simpl in H0; inv H0. 
-    red; intros. exploit H2; eauto. rewrite ! LSF.diff_iff. tauto.
-Qed.
-
 Inductive wt_state: state -> Prop :=
-  | wt_regular_state: forall s f sp c rs m lm fs fs0
-        (WTSTK: wt_callstack s fs0)
-        (ANF: ana_function f = Some lm)
-        (WTF: wt_funcode f lm = true)
-        (ANC: ana_code lm false fs c = (lm, false))
-        (WTC: wt_code f lm fs c = true)
-        (WTRS: wt_locset fs rs),
+  | wt_regular_state: forall s f sp c rs m
+        (WTSTK: wt_callstack s )
+        (WTF: wt_function f = true)
+        (WTC: wt_code f c = true)
+        (WTRS: wt_locset rs),
       wt_state (State s f sp c rs m)
-  | wt_call_state: forall s fd rs m fs
-        (WTSTK: wt_callstack s fs)
+  | wt_call_state: forall s fd rs m
+        (WTSTK: wt_callstack s)
         (WTFD: wt_fundef fd)
-        (WTRS: wt_locset fs rs),
+        (WTRS: wt_locset rs),
       wt_state (Callstate s fd rs m)
-  | wt_return_state: forall s rs m fs
-        (WTSTK: wt_callstack s fs)
-        (WTRS: wt_locset (callregs fs) rs),
+  | wt_return_state: forall s rs m
+        (WTSTK: wt_callstack s)
+        (WTRS: wt_locset rs),
       wt_state (Returnstate s rs m).
 
-(** ** Preservation of state typing by transitions *)
+(** Preservation of state typing by transitions *)
 
 Section SOUNDNESS.
 
@@ -709,129 +248,89 @@ Proof.
 - (* getstack *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_copyloc; auto.
+  eapply wt_setreg; eauto. eapply Val.has_subtype; eauto. apply WTRS. 
+  apply wt_undef_regs; auto.
 - (* setstack *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_copyloc_gen; auto.
-  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto.
+  apply wt_setstack. apply wt_undef_regs; auto.
 - (* op *)
   simpl in *. destruct (is_move_operation op args) as [src | ] eqn:ISMOVE.
   + (* move *)
     InvBooleans. exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst. 
     simpl in H. inv H.
-    econstructor; eauto. 
-    apply wt_copyloc; auto. simpl. rewrite H0. 
-    destruct (mreg_type res); auto.
+    econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. apply WTRS. 
+    apply wt_undef_regs; auto.
   + (* other ops *) 
     destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
     econstructor; eauto.
-    apply wt_setreg; auto. 
-    change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. 
-    eapply type_of_operation_sound; eauto. 
+    apply wt_setreg; auto. eapply Val.has_subtype; eauto. 
+    change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto. 
     red; intros; subst op. simpl in ISMOVE. 
     destruct args; try discriminate. destruct args; discriminate. 
     apply wt_undef_regs; auto.
 - (* load *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_setreg. 
+  apply wt_setreg. eapply Val.has_subtype; eauto. 
   destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
-  auto. 
   apply wt_undef_regs; auto.
 - (* store *)
   simpl in *; InvBooleans. 
-  econstructor; eauto.
+  econstructor. eauto. eauto. eauto. 
   apply wt_undef_regs; auto.
 - (* call *)
   simpl in *; InvBooleans.
   econstructor; eauto. econstructor; eauto.
-  destruct (callregs fs); constructor. red; auto. 
   eapply wt_find_function; eauto.
 - (* tailcall *)
   simpl in *; InvBooleans.
-  econstructor. apply wt_callstack_change_fs; eauto. 
+  econstructor; eauto. 
   eapply wt_find_function; eauto.
-  eapply wt_return_regs. apply wt_parent_locset; auto. eauto.
+  apply wt_return_regs; auto. apply wt_parent_locset; auto. 
 - (* builtin *)
   simpl in *; InvBooleans.
   econstructor; eauto.
-  apply wt_setregs. 
-  eapply external_call_well_typed'; eauto.
-  auto.
+  apply wt_setlist. 
+  eapply Val.has_subtype_list; eauto. eapply external_call_well_typed'; eauto. 
   apply wt_undef_regs; auto.
 - (* annot *)
   simpl in *; InvBooleans.
   econstructor; eauto. 
 - (* label *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  econstructor; eauto.
-  eapply wt_locset_mon; eauto.
+  simpl in *. econstructor; eauto.
 - (* goto *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto. 
-  eapply wt_locset_mon; eauto.
+  simpl in *. econstructor; eauto. eapply wt_find_label; eauto. 
 - (* cond branch, taken *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto. 
-  eapply wt_locset_mon. eauto. 
+  simpl in *. econstructor. auto. auto. eapply wt_find_label; eauto.
   apply wt_undef_regs; auto.
 - (* cond branch, not taken *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  econstructor. eauto. eauto. eauto. eauto. eauto.  
+  simpl in *. econstructor. auto. auto. auto. 
   apply wt_undef_regs; auto.
 - (* jumptable *)
-  simpl in *.
-  destruct (update_labels tbl fs lm) as [lm1 ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_labels_false; eauto. intros (A & B); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto.
-  apply wt_undef_regs. eapply wt_locset_mon; eauto. apply A. eapply list_nth_z_in; eauto. 
+  simpl in *. econstructor. auto. auto. eapply wt_find_label; eauto.
+  apply wt_undef_regs; auto.
 - (* return *)
-  econstructor. eauto. 
-  eapply wt_return_regs. 
-  apply wt_parent_locset; auto.
-  eauto.
+  simpl in *. InvBooleans. 
+  econstructor; eauto.
+  apply wt_return_regs; auto. apply wt_parent_locset; auto.
 - (* internal function *)
-  simpl in WTFD. unfold wt_function in WTFD.
-  destruct (ana_function f) as [lm|] eqn:ANF; try discriminate.
-  econstructor. eauto. eauto. eauto. apply ana_function_inv; auto. exact WTFD. 
-  apply wt_undef_regs. eapply wt_call_regs; eauto. 
+  simpl in WTFD.
+  econstructor. eauto. eauto. eauto.
+  apply wt_undef_regs. apply wt_call_regs. auto. 
 - (* external function *)
-  econstructor. eauto. 
-  eapply wt_locset_mon.
-  eapply callregs_setregs_result. 
-  eapply wt_setregs.
-  eapply external_call_well_typed'; eauto.
-  unfold proj_sig_res', loc_result. destruct (sig_res (ef_sig ef) )as [[] | ]; auto.
-  auto.
+  econstructor. auto. apply wt_setlist; auto.
+  eapply Val.has_subtype_list. apply loc_result_type. eapply external_call_well_typed'; eauto.
 - (* return *)
-  inv WTSTK. econstructor; eauto. 
-  apply wt_locset_mon with (callregs fs); auto. 
+  inv WTSTK. econstructor; eauto.
 Qed.
 
 Theorem wt_initial_state:
   forall S, initial_state prog S -> wt_state S.
 Proof.
-  induction 1. econstructor. 
-  apply wt_callstack_nil with (s := Locset.empty). 
+  induction 1. econstructor. constructor. 
   unfold ge0 in H1. exploit Genv.find_funct_ptr_inversion; eauto.
-  intros [id IN]. eapply wt_prog; eauto. 
+  intros [id IN]. eapply wt_prog; eauto.
   apply wt_init.
 Qed.
 
@@ -850,10 +349,9 @@ Qed.
 Lemma wt_state_setstack:
   forall s f sp sl ofs ty r c rs m,
   wt_state (State s f sp (Lsetstack r sl ofs ty :: c) rs m) ->
-  Val.has_type (rs (R r)) ty /\ slot_valid f sl ofs ty = true /\ slot_writable sl = true.
+  slot_valid f sl ofs ty = true /\ slot_writable sl = true.
 Proof.
-  intros. inv H. simpl in WTC; InvBooleans. intuition. 
-  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto. 
+  intros. inv H. simpl in WTC; InvBooleans. intuition.
 Qed.
 
 Lemma wt_state_tailcall:
@@ -877,5 +375,5 @@ Lemma wt_callstate_wt_regs:
   wt_state (Callstate s f rs m) ->
   forall r, Val.has_type (rs (R r)) (mreg_type r).
 Proof.
-  intros. inv H. eapply wt_mreg; eauto.
+  intros. inv H. apply WTRS. 
 Qed.
diff --git a/backend/Locations.v b/backend/Locations.v
index 96f1eba1..5674b93a 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -69,7 +69,14 @@ Defined.
 Open Scope Z_scope.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 | Tsingle => 1 end.
+  match ty with
+  | Tint => 1
+  | Tlong => 2
+  | Tfloat => 2
+  | Tsingle => 1
+  | Tany32 => 1
+  | Tany64 => 2
+  end.
 
 Lemma typesize_pos:
   forall (ty: typ), typesize ty > 0.
@@ -301,16 +308,29 @@ Module Locmap.
 
   Definition set (l: loc) (v: val) (m: t) : t :=
     fun (p: loc) =>
-      if Loc.eq l p then v
-      else if Loc.diff_dec l p then m p
+      if Loc.eq l p then
+        match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end
+      else if Loc.diff_dec l p then
+        m p
       else Vundef.
 
   Lemma gss: forall l v m,
-    (set l v m) l = v.
+    (set l v m) l = 
+    match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end.
   Proof.
     intros. unfold set. apply dec_eq_true.
   Qed.
 
+  Lemma gss_reg: forall r v m, (set (R r) v m) (R r) = v.
+  Proof.
+    intros. unfold set. rewrite dec_eq_true. auto.
+  Qed.
+
+  Lemma gss_typed: forall l v m, Val.has_type v (Loc.type l) -> (set l v m) l = v.
+  Proof.
+    intros. rewrite gss. destruct l. auto. apply Val.load_result_same; auto. 
+  Qed.
+
   Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p.
   Proof.
     intros. unfold set. destruct (Loc.eq l p).
@@ -336,10 +356,11 @@ Module Locmap.
   Proof.
     assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
       induction ll; simpl; intros. auto. apply IHll. 
-      unfold set. destruct (Loc.eq a l). auto.
+      unfold set. destruct (Loc.eq a l).
+      destruct a. auto. destruct ty; reflexivity. 
       destruct (Loc.diff_dec a l); auto.
     induction ll; simpl; intros. contradiction. 
-    destruct H. apply P. subst a. apply gss.  
+    destruct H. apply P. subst a. apply gss_typed. exact I. 
     auto.
   Qed.
 
@@ -364,10 +385,12 @@ Module IndexedTyp <: INDEXED_TYPE.
   Definition t := typ.
   Definition index (x: t) :=
     match x with
-    | Tint => 1%positive
-    | Tsingle => 2%positive
-    | Tfloat => 3%positive
-    | Tlong => 4%positive
+    | Tany32 => 1%positive
+    | Tint => 2%positive
+    | Tsingle => 3%positive
+    | Tany64 => 4%positive
+    | Tfloat => 5%positive
+    | Tlong => 6%positive
     end.
   Lemma index_inj: forall x y, index x = index y -> x = y.
   Proof. destruct x; destruct y; simpl; congruence. Qed.
@@ -456,7 +479,7 @@ Module OrderedLoc <: OrderedType.
   Definition diff_low_bound (l: loc) : loc :=
     match l with
     | R mr => l
-    | S sl ofs ty => S sl (ofs - 1) Tfloat
+    | S sl ofs ty => S sl (ofs - 1) Tany64
     end.
 
   Definition diff_high_bound (l: loc) : loc :=
@@ -480,9 +503,9 @@ Module OrderedLoc <: OrderedType.
         destruct H. right.
         destruct H0. right. generalize (RANGE ty'); omega. 
         destruct H0. 
-        assert (ty' = Tint \/ ty' = Tsingle). 
+        assert (ty' = Tint \/ ty' = Tsingle \/ ty' = Tany32). 
         { unfold OrderedTyp.lt in H1. destruct ty'; auto; compute in H1; congruence. }
-        right. destruct H2; subst ty'; simpl typesize; omega. 
+        right. destruct H2 as [E|[E|E]]; subst ty'; simpl typesize; omega. 
       + destruct H. left. apply OrderedSlot.lt_not_eq; auto.
         destruct H. right.
         destruct H0. left; omega.
@@ -502,13 +525,13 @@ Module OrderedLoc <: OrderedType.
       destruct H. contradiction. 
       destruct H.
       right; right; split; auto. left; omega. 
-      left; right; split; auto. destruct ty'; simpl in *.
-      destruct (zlt ofs' (ofs - 1)). left; auto. 
-      right; split. omega. compute. auto. 
+      left; right; split; auto.
+      assert (EITHER: typesize ty' = 1 /\ OrderedTyp.lt ty' Tany64 \/ typesize ty' = 2).
+      { destruct ty'; compute; auto. }
+      destruct (zlt ofs' (ofs - 1)). left; auto.
+      destruct EITHER as [[P Q] | P].
+      right; split; auto. omega.
       left; omega. 
-      left; omega.
-      destruct (zlt ofs' (ofs - 1)). left; auto. 
-      right; split. omega. compute. auto. 
   Qed.
 
 End OrderedLoc.
diff --git a/backend/NeedDomain.v b/backend/NeedDomain.v
index f050c726..73b6831a 100644
--- a/backend/NeedDomain.v
+++ b/backend/NeedDomain.v
@@ -33,7 +33,6 @@ Require Import RTL.
 Inductive nval : Type :=
   | Nothing              (**r value is entirely unused *)
   | I (m: int)           (**r only need the bits that are 1 in [m] *)
-  | Fsingle              (**r only need the value after conversion to single float *)
   | All.                 (**r every bit of the value is used *)
 
 Definition eq_nval (x y: nval) : {x=y} + {x<>y}.
@@ -56,12 +55,6 @@ Fixpoint vagree (v w: val) (x: nval) {struct x}: Prop :=
       | Vint p, _ => False
       | _, _ => True                                 
       end
-  | Fsingle =>
-      match v, w with
-      | Vfloat f, Vfloat g => Float.singleoffloat f = Float.singleoffloat g
-      | Vfloat _, _ => False
-      | _, _ => True
-      end
   | All => Val.lessdef v w
   end.
 
@@ -115,9 +108,7 @@ Inductive nge: nval -> nval -> Prop :=
   | nge_all: forall x, nge x Nothing
   | nge_int: forall m1 m2,
       (forall i, 0 <= i < Int.zwordsize -> Int.testbit m2 i = true -> Int.testbit m1 i = true) ->
-      nge (I m1) (I m2)
-  | nge_single:
-      nge Fsingle Fsingle.
+      nge (I m1) (I m2).
 
 Lemma nge_refl: forall x, nge x x.
 Proof.
@@ -145,7 +136,6 @@ Definition nlub (x y: nval) : nval :=
   | Nothing, _ => y
   | _, Nothing => x
   | I m1, I m2 => I (Int.or m1 m2)
-  | Fsingle, Fsingle => Fsingle
   | _, _ => All
   end.
 
@@ -388,14 +378,14 @@ Ltac InvAgree :=
   auto || exact Logic.I ||
   match goal with
   | [ H: False |- _ ] => contradiction
-  | [ H: match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end |- _ ] => destruct v
+  | [ H: match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vsingle _ => _ | Vptr _ _ => _ end |- _ ] => destruct v
   end).
 
 (** And immediate, or immediate *)
 
 Definition andimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.and m n)
   | All => I n
   end.
@@ -408,13 +398,12 @@ Proof.
   unfold andimm; intros; destruct x; simpl in *; unfold Val.and.
 - auto.
 - InvAgree. apply iagree_and; auto.
-- destruct v; destruct w; tauto.
 - InvAgree. rewrite iagree_and_eq in H. rewrite H; auto.
 Qed.
 
 Definition orimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.and m (Int.not n))
   | _ => I (Int.not n)
   end.
@@ -427,21 +416,16 @@ Proof.
   unfold orimm; intros; destruct x; simpl in *.
 - auto.
 - unfold Val.or; InvAgree. apply iagree_or; auto.
-- destruct v; destruct w; tauto.
 - InvAgree. simpl. apply Val.lessdef_same. f_equal. apply iagree_mone. 
   apply iagree_or. rewrite Int.and_commut. rewrite Int.and_mone. auto.
 Qed.
 
 (** Bitwise operations: and, or, xor, not *)
 
-Definition bitwise (x: nval) :=
-  match x with
-  | Fsingle => Nothing
-  | _ => x
-  end.
+Definition bitwise (x: nval) := x.
 
 Remark bitwise_idem: forall nv, bitwise (bitwise nv) = bitwise nv.
-Proof. destruct nv; auto. Qed.
+Proof. auto. Qed.
 
 Lemma vagree_bitwise_binop:
   forall f,
@@ -456,7 +440,6 @@ Proof.
   unfold bitwise; intros. destruct x; simpl in *.
 - auto. 
 - InvAgree. 
-- destruct v1; auto. destruct v2; auto. 
 - inv H0; auto. inv H1; auto. destruct w1; auto.
 Qed.
 
@@ -489,7 +472,7 @@ Qed.
 
 Definition shlimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.shru m n)
   | All => I (Int.shru Int.mone n)
   end.
@@ -504,14 +487,13 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_shl; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shl; auto.
 - destruct v; auto with na.
 Qed.
 
 Definition shruimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.shl m n)
   | All => I (Int.shl Int.mone n)
   end.
@@ -526,14 +508,13 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_shru; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shru; auto.
 - destruct v; auto with na.
 Qed.
 
 Definition shrimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (let m' := Int.shl m n in
               if Int.eq_dec (Int.shru m' n) m
               then m'
@@ -554,14 +535,13 @@ Proof.
   destruct (Int.eq_dec (Int.shru (Int.shl m n) n) m).
   apply iagree_shr_1; auto.
   apply iagree_shr; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shr. auto.
 - destruct v; auto with na.
 Qed.
 
 Definition rolm (x: nval) (amount mask: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.ror (Int.and m mask) amount)
   | _ => I (Int.ror mask amount)
   end.
@@ -575,7 +555,6 @@ Proof.
 - auto.
 - unfold Val.rolm; InvAgree. unfold Int.rolm. 
   apply iagree_and. apply iagree_rol. auto. 
-- unfold Val.rolm; destruct v, w; auto.
 - unfold Val.rolm; InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. 
   unfold Int.rolm. apply iagree_and. apply iagree_rol. rewrite Int.and_commut. 
   rewrite Int.and_mone. auto.
@@ -583,7 +562,7 @@ Qed.
 
 Definition ror (x: nval) (amount: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.rol m amount)
   | All => All
   end.
@@ -598,7 +577,6 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_ror; auto. 
-- destruct v, w; auto.
 - inv H; auto.
 - destruct v; auto with na.
 Qed.
@@ -608,7 +586,7 @@ Qed.
 
 Definition modarith (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (complete_mask m)
   | All => All
   end.
@@ -621,7 +599,6 @@ Proof.
   unfold modarith; intros. destruct x; simpl in *. 
 - auto.
 - unfold Val.add; InvAgree. apply eqmod_iagree. apply Int.eqmod_add; apply iagree_eqmod; auto. 
-- unfold Val.add; destruct v1, w1; auto; destruct v2, w2; auto.
 - inv H; auto. inv H0; auto. destruct w1; auto. 
 Qed.
 
@@ -638,7 +615,6 @@ Proof.
   unfold mul, add; intros. destruct x; simpl in *. 
 - auto.
 - unfold Val.mul; InvAgree. apply eqmod_iagree. apply Int.eqmod_mult; apply iagree_eqmod; auto. 
-- unfold Val.mul; destruct v1, w1; auto; destruct v2, w2; auto.
 - inv H; auto. inv H0; auto. destruct w1; auto. 
 Qed.
 
@@ -651,7 +627,6 @@ Proof.
 - auto.
 - unfold Val.neg; InvAgree. 
   apply eqmod_iagree. apply Int.eqmod_neg. apply iagree_eqmod; auto. 
-- destruct v, w; simpl; auto. 
 - inv H; simpl; auto.
 Qed.
 
@@ -659,7 +634,7 @@ Qed.
 
 Definition zero_ext (n: Z) (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.zero_ext n m)
   | All => I (Int.zero_ext n Int.mone)
   end.
@@ -677,7 +652,6 @@ Proof.
   red; intros. autorewrite with ints; try omega. 
   destruct (zlt i1 n); auto. apply H; auto.
   autorewrite with ints; try omega. rewrite zlt_true; auto.
-- unfold Val.zero_ext; destruct v; destruct w; auto.
 - unfold Val.zero_ext; InvAgree; auto. apply Val.lessdef_same. f_equal. 
   Int.bit_solve; try omega. destruct (zlt i1 n); auto. apply H; auto. 
   autorewrite with ints; try omega. apply zlt_true; auto.
@@ -685,7 +659,7 @@ Qed.
 
 Definition sign_ext (n: Z) (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.or (Int.zero_ext n m) (Int.shl Int.one (Int.repr (n - 1))))
   | All => I (Int.zero_ext n Int.mone)
   end.
@@ -710,7 +684,6 @@ Proof.
   right. rewrite Int.unsigned_repr. rewrite zlt_false by omega. 
   replace (n - 1 - (n - 1)) with 0 by omega. reflexivity. 
   generalize Int.wordsize_max_unsigned; omega.
-- unfold Val.sign_ext; destruct v; destruct w; auto.
 - unfold Val.sign_ext; InvAgree; auto. apply Val.lessdef_same. f_equal. 
   Int.bit_solve; try omega.
   set (j := if zlt i1 n then i1 else n - 1).
@@ -721,31 +694,12 @@ Proof.
   unfold j. destruct (zlt i1 n); omega.
 Qed.
 
-Definition singleoffloat (x: nval) :=
-  match x with
-  | Nothing | I _ => Nothing
-  | Fsingle | All => Fsingle
-  end.
-
-Lemma singleoffloat_sound:
-  forall v w x,
-  vagree v w (singleoffloat x) ->
-  vagree (Val.singleoffloat v) (Val.singleoffloat w) x.
-Proof.
-  unfold singleoffloat; intros. destruct x; simpl in *. 
-- auto.
-- unfold Val.singleoffloat; destruct v, w; auto. 
-- unfold Val.singleoffloat; InvAgree. congruence.
-- unfold Val.singleoffloat; InvAgree; auto. rewrite H; auto. 
-Qed.
-
 (** The needs of a memory store concerning the value being stored. *)
 
 Definition store_argument (chunk: memory_chunk) :=
   match chunk with
   | Mint8signed | Mint8unsigned => I (Int.repr 255)
   | Mint16signed | Mint16unsigned => I (Int.repr 65535)
-  | Mfloat32 => Fsingle
   | _ => All
   end.
 
@@ -781,10 +735,9 @@ Proof.
   change 16 with (Int.size (Int.repr 65535)). apply iagree_eqmod; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
-- InvAgree. apply SAME. simpl.
-  rewrite <- (Float.bits_of_singleoffloat f).
-  rewrite <- (Float.bits_of_singleoffloat f0).
-  congruence. 
+- apply encode_val_inject. rewrite val_inject_id; auto.
+- apply encode_val_inject. rewrite val_inject_id; auto.
+- apply encode_val_inject. rewrite val_inject_id; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
 Qed.
 
@@ -803,8 +756,6 @@ Proof.
   auto. compute; auto.
 - apply zero_ext_sound with (v := Vint i) (w := Vint i0) (x := All) (n := 16).
   auto. omega.
-- apply singleoffloat_sound with (v := Vfloat f) (w := Vfloat f0) (x := All).
-  auto. 
 Qed.
 
 (** The needs of a comparison *)
@@ -1034,24 +985,6 @@ Proof.
   rewrite Int.bits_zero_ext in H3 by omega. rewrite zlt_false in H3 by auto. discriminate.
 Qed.
 
-Definition singleoffloat_redundant (x: nval) :=
-  match x with
-  | Nothing => true
-  | Fsingle => true
-  | _ => false
-  end.
-
-Lemma singleoffloat_redundant_sound:
-  forall v w x,
-  singleoffloat_redundant x = true ->
-  vagree v w (singleoffloat x) ->
-  vagree (Val.singleoffloat v) w x.
-Proof.
-  unfold singleoffloat; intros. destruct x; try discriminate.
-- auto.
-- simpl in *; InvAgree. simpl. rewrite Float.singleoffloat_idem; auto. 
-Qed.
-
 (** * Neededness for register environments *)
 
 Module NVal <: SEMILATTICE.
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 4c53c5ea..51a45b27 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -31,10 +31,10 @@ let rec precedence = function
   | Evar _ -> (16, NA)
   | Econst _ -> (16, NA)
   | Eunop _ -> (15, RtoL)
-  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR)
-  | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddl|Osubl), _, _) -> (12, LtoR)
+  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omulfs|Odivfs|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR)
+  | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddfs|Osubfs|Oaddl|Osubl), _, _) -> (12, LtoR)
   | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR)
-  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
+  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpfs _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
   | Ebinop((Oand|Oandl), _, _) -> (8, LtoR)
   | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR)
   | Ebinop((Oor|Oorl), _, _) -> (6, LtoR)
@@ -55,11 +55,18 @@ let name_of_unop = function
   | Onotint -> "~"
   | Onegf -> "-f"
   | Oabsf -> "absf"
+  | Onegfs -> "-s"
+  | Oabsfs -> "abss"
   | Osingleoffloat -> "float32"
+  | Ofloatofsingle -> "float64"
   | Ointoffloat -> "intoffloat"
   | Ointuoffloat -> "intuoffloat"
   | Ofloatofint -> "floatofint"
   | Ofloatofintu -> "floatofintu"
+  | Ointofsingle -> "intofsingle"
+  | Ointuofsingle -> "intuofsingle"
+  | Osingleofint -> "singleofint"
+  | Osingleofintu -> "singleofintu"
   | Onegl -> "-l"
   | Onotl -> "~l"
   | Ointoflong -> "intoflong"
@@ -69,6 +76,8 @@ let name_of_unop = function
   | Olonguoffloat -> "longuoffloat"
   | Ofloatoflong -> "floatoflong"
   | Ofloatoflongu -> "floatoflongu"
+  | Olongofsingle -> "longofsingle"
+  | Olonguofsingle -> "longuofsingle"
   | Osingleoflong -> "singleoflong"
   | Osingleoflongu -> "singleoflongu"
 
@@ -98,6 +107,10 @@ let name_of_binop = function
   | Osubf -> "-f"
   | Omulf -> "*f"
   | Odivf -> "/f"
+  | Oaddfs -> "+s"
+  | Osubfs -> "-s"
+  | Omulfs -> "*s"
+  | Odivfs -> "/s"
   | Oaddl -> "+l"
   | Osubl -> "-l"
   | Omull -> "*l"
@@ -114,6 +127,7 @@ let name_of_binop = function
   | Ocmp c -> comparison_name c
   | Ocmpu c -> comparison_name c ^ "u"
   | Ocmpf c -> comparison_name c ^ "f"
+  | Ocmpfs c -> comparison_name c ^ "s"
   | Ocmpl c -> comparison_name c ^ "l"
   | Ocmplu c -> comparison_name c ^ "lu"
 
@@ -135,6 +149,8 @@ let rec expr p (prec, e) =
       fprintf p "%ld" (camlint_of_coqint n)
   | Econst(Ofloatconst f) ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Econst(Osingleconst f) ->
+      fprintf p "%Ff" (camlfloat_of_coqfloat32 f)
   | Econst(Olongconst n) ->
       fprintf p "%LdLL" (camlint64_of_coqint n)
   | Econst(Oaddrsymbol(id, ofs)) ->
@@ -171,6 +187,8 @@ let name_of_type = function
   | Tfloat -> "float"
   | Tlong -> "long"
   | Tsingle -> "single"
+  | Tany32 -> "any32"
+  | Tany64 -> "any64"
 
 let print_sig p sg =
   List.iter
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index ff458091..08ccf129 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -34,6 +34,8 @@ let short_name_of_type = function
   | Tfloat -> 'f'
   | Tlong -> 'l'
   | Tsingle -> 's'
+  | Tany32 -> 'w'
+  | Tany64 -> 'd'
 
 let loc pp = function
   | R r -> mreg pp r
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index e8ae7ae0..5042c775 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -36,8 +36,6 @@ Require Import Conventions.
   is very simple, consisting of the four types [Tint] (for integers
   and pointers), [Tfloat] (for double-precision floats), [Tlong]
   (for 64-bit integers) and [Tsingle] (for single-precision floats).
-  At the RTL level, we simplify things further by equating [Tsingle]
-  with [Tfloat].
 
   Additionally, we impose that each pseudo-register has the same type
   throughout the function.  This requirement helps with register allocation,
@@ -57,11 +55,6 @@ Require Import Conventions.
   which can work over both integers and floats.
 *)
 
-Definition normalize (ty: typ) : typ :=
-  match ty with Tsingle => Tfloat | _ => ty end.
-
-Definition normalize_list (tyl: list typ) : list typ := map normalize tyl.
-
 Definition regenv := reg -> typ.
 
 Section WT_INSTR.
@@ -86,39 +79,39 @@ Inductive wt_instr : instruction -> Prop :=
       forall op args res s,
       op <> Omove ->
       map env args = fst (type_of_operation op) ->
-      env res = normalize (snd (type_of_operation op)) ->
+      env res = snd (type_of_operation op) ->
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
       forall chunk addr args dst s,
       map env args = type_of_addressing addr ->
-      env dst = normalize (type_of_chunk chunk) ->
+      env dst = type_of_chunk chunk ->
       valid_successor s ->
       wt_instr (Iload chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       map env args = type_of_addressing addr ->
-      env src = type_of_chunk_use chunk ->
+      env src = type_of_chunk chunk ->
       valid_successor s ->
       wt_instr (Istore chunk addr args src s)
   | wt_Icall:
       forall sig ros args res s,
       match ros with inl r => env r = Tint | inr s => True end ->
-      map env args = normalize_list sig.(sig_args) ->
-      env res = normalize (proj_sig_res sig) ->
+      map env args = sig.(sig_args) ->
+      env res = proj_sig_res sig ->
       valid_successor s ->
       wt_instr (Icall sig ros args res s)
   | wt_Itailcall:
       forall sig ros args,
       match ros with inl r => env r = Tint | inr s => True end ->
-      map env args = normalize_list sig.(sig_args) ->
+      map env args = sig.(sig_args) ->
       sig.(sig_res) = funct.(fn_sig).(sig_res) ->
       tailcall_possible sig ->
       wt_instr (Itailcall sig ros args)
   | wt_Ibuiltin:
       forall ef args res s,
-      map env args = normalize_list (ef_sig ef).(sig_args) ->
-      env res = normalize (proj_sig_res (ef_sig ef)) ->
+      map env args = (ef_sig ef).(sig_args) ->
+      env res = proj_sig_res (ef_sig ef) ->
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
@@ -139,7 +132,7 @@ Inductive wt_instr : instruction -> Prop :=
   | wt_Ireturn_some:
       forall arg ty,
       funct.(fn_sig).(sig_res) = Some ty ->
-      env arg = normalize ty ->
+      env arg = ty ->
       wt_instr (Ireturn (Some arg)).
 
 End WT_INSTR.
@@ -152,7 +145,7 @@ End WT_INSTR.
 Record wt_function (f: function) (env: regenv): Prop :=
   mk_wt_function {
     wt_params:
-      map env f.(fn_params) = normalize_list f.(fn_sig).(sig_args);
+      map env f.(fn_params) = f.(fn_sig).(sig_args);
     wt_norepet:
       list_norepet f.(fn_params);
     wt_instrs:
@@ -231,23 +224,23 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
         end
       else
        (let (targs, tres) := type_of_operation op in
-        do e1 <- S.set_list e args targs; S.set e1 res (normalize tres))
+        do e1 <- S.set_list e args targs; S.set e1 res tres)
   | Iload chunk addr args dst s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
-      S.set e1 dst (normalize (type_of_chunk chunk))
+      S.set e1 dst (type_of_chunk chunk)
   | Istore chunk addr args src s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
-      S.set e1 src (type_of_chunk_use chunk)
+      S.set e1 src (type_of_chunk chunk)
   | Icall sig ros args res s =>
       do x <- check_successor s;
       do e1 <- type_ros e ros;
-      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
-      S.set e2 res (normalize (proj_sig_res sig))
+      do e2 <- S.set_list e1 args sig.(sig_args);
+      S.set e2 res (proj_sig_res sig)
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
-      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
+      do e2 <- S.set_list e1 args sig.(sig_args);
       if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
@@ -256,8 +249,8 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Ibuiltin ef args res s =>
       let sig := ef_sig ef in
       do x <- check_successor s;
-      do e1 <- S.set_list e args (normalize_list sig.(sig_args));
-      S.set e1 res (normalize (proj_sig_res sig))
+      do e1 <- S.set_list e args sig.(sig_args);
+      S.set e1 res (proj_sig_res sig)
  | Icond cond args s1 s2 =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
@@ -271,7 +264,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Ireturn optres =>
       match optres, f.(fn_sig).(sig_res) with
       | None, None => OK e
-      | Some r, Some t => S.set e r (normalize t)
+      | Some r, Some t => S.set e r t
       | _, _ => Error(msg "bad return")
       end
   end.
@@ -297,7 +290,7 @@ Definition check_params_norepet (params: list reg): res unit :=
 
 Definition type_function : res regenv :=
   do e1 <- type_code S.initial;
-  do e2 <- S.set_list e1 f.(fn_params) (normalize_list f.(fn_sig).(sig_args));
+  do e2 <- S.set_list e1 f.(fn_params) f.(fn_sig).(sig_args);
   do te <- S.solve e2;
   do x1 <- check_params_norepet f.(fn_params);
   do x2 <- check_successor f.(fn_entrypoint);
@@ -597,7 +590,7 @@ Proof.
   intros. destruct H. 
   destruct (type_code_complete te S.initial) as (e1 & A & B).
   auto. apply S.satisf_initial. 
-  destruct (S.set_list_complete te f.(fn_params) (normalize_list f.(fn_sig).(sig_args)) e1) as (e2 & C & D); auto.
+  destruct (S.set_list_complete te f.(fn_params) f.(fn_sig).(sig_args) e1) as (e2 & C & D); auto.
   destruct (S.solve_complete te e2) as (te' & E); auto.
   exists te'; unfold type_function.
   rewrite A; simpl. rewrite C; simpl. rewrite E; simpl. 
@@ -637,23 +630,6 @@ Proof.
   apply H.
 Qed.
 
-Lemma normalize_subtype:
-  forall ty, subtype ty (normalize ty) = true.
-Proof.
-  intros. destruct ty; reflexivity.
-Qed.
-
-Lemma wt_regset_assign2:
-  forall env rs v r ty,
-  wt_regset env rs ->
-  Val.has_type v ty ->
-  env r = normalize ty ->
-  wt_regset env (rs#r <- v).
-Proof.
-  intros. eapply wt_regset_assign; eauto.
-  rewrite H1. eapply Val.has_subtype; eauto. apply normalize_subtype.
-Qed.
-
 Lemma wt_regset_list:
   forall env rs,
   wt_regset env rs ->
@@ -684,9 +660,8 @@ Proof.
   intros. inv H. 
   simpl in H0. inv H0. apply wt_regset_assign; auto.
   rewrite H4; auto.
-  eapply wt_regset_assign2; auto.
-  eapply type_of_operation_sound; eauto.
-  auto.
+  eapply wt_regset_assign; auto.
+  rewrite H8. eapply type_of_operation_sound; eauto.
 Qed.
 
 Lemma wt_exec_Iload:
@@ -697,8 +672,7 @@ Lemma wt_exec_Iload:
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  eapply wt_regset_assign2; eauto.
-  eapply Mem.load_type; eauto.
+  eapply wt_regset_assign; eauto. rewrite H8; eapply Mem.load_type; eauto.
 Qed.
 
 Lemma wt_exec_Ibuiltin:
@@ -709,8 +683,8 @@ Lemma wt_exec_Ibuiltin:
   wt_regset env (rs#res <- vres).
 Proof.
   intros. inv H. 
-  eapply wt_regset_assign2; eauto. 
-  eapply external_call_well_typed; eauto.
+  eapply wt_regset_assign; eauto. 
+  rewrite H7; eapply external_call_well_typed; eauto.
 Qed.
 
 Lemma wt_instr_at:
@@ -728,7 +702,7 @@ Inductive wt_stackframes: list stackframe -> signature -> Prop :=
       forall s res f sp pc rs env sg,
       wt_function f env ->
       wt_regset env rs ->
-      env res = normalize (proj_sig_res sg) ->
+      env res = proj_sig_res sg ->
       wt_stackframes s (fn_sig f) ->
       wt_stackframes (Stackframe res f sp pc rs :: s) sg.
 
@@ -743,12 +717,12 @@ Inductive wt_state: state -> Prop :=
       forall s f args m,
       wt_stackframes s (funsig f) ->
       wt_fundef f ->
-      Val.has_type_list args (normalize_list (sig_args (funsig f))) ->
+      Val.has_type_list args (sig_args (funsig f)) ->
       wt_state (Callstate s f args m)
   | wt_state_return:
       forall s v m sg,
       wt_stackframes s sg ->
-      Val.has_type v (normalize (proj_sig_res sg)) ->
+      Val.has_type v (proj_sig_res sg) ->
       wt_state (Returnstate s v m).
 
 Remark wt_stackframes_change_sig:
@@ -814,15 +788,13 @@ Proof.
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto. 
-  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. rewrite <- H3. auto. 
+  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. auto. 
   (* internal function *)
   simpl in *. inv H5.
   econstructor; eauto.
   inv H1. apply wt_init_regs; auto. rewrite wt_params0. auto. 
   (* external function *)
   econstructor; eauto. simpl.  
-  change (Val.has_type res (normalize (proj_sig_res (ef_sig ef)))).
-  eapply Val.has_subtype. apply normalize_subtype. 
   eapply external_call_well_typed; eauto.
   (* return *)
   inv H1. econstructor; eauto.
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index b736f291..687cbbd3 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -92,7 +92,14 @@ let rec constrain_regs vl cl =
   | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl'
 
 let move v1 v2 k =
-  if v1 = v2 then k else Xmove(v1, v2) :: k
+  if v1 = v2 then
+    k
+  else if is_stack_reg v1 then begin
+    let t = new_temp (typeof v2) in Xmove(v1, t) :: Xmove(t, v2) :: k
+  end else if is_stack_reg v2 then begin
+    let t = new_temp (typeof v1) in Xmove(v1, t) :: Xmove(t, v2) :: k
+  end else
+    Xmove(v1, v2) :: k
 
 let rec movelist vl1 vl2 k =
   match vl1, vl2 with
@@ -104,7 +111,7 @@ let xparmove srcs dsts k =
   assert (List.length srcs = List.length dsts);
   match srcs, dsts with
   | [], [] -> k
-  | [src], [dst] -> Xmove(src, dst) :: k
+  | [src], [dst] -> move src dst k
   | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k
 
 (* Return the XTL basic block corresponding to the given RTL instruction.
@@ -365,9 +372,15 @@ let spill_costs f =
     | L l -> ()
     | V(r, ty) ->
         let st = get_stats r in
-        let c1 = st.cost + amount in
-        let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
-        st.cost <- c2;
+        if st.cost < 0 then
+          (* the variable must be spilled, don't change its cost *)
+          assert (amount < max_int)
+        else begin
+          (* saturating addition *)
+          let c1 = st.cost + amount in
+          let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
+          st.cost <- c2
+        end;           
         st.usedefs <- st.usedefs + uses in
 
   let charge_list amount uses vl =
@@ -376,9 +389,23 @@ let spill_costs f =
   let charge_ros amount ros =
     match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in
 
+  let force_stack_allocation v =
+    match v with
+    | L l -> ()
+    | V(r, ty) ->
+        let st = get_stats r in
+        assert (st.cost < max_int);
+        st.cost <- (-1) in
+
   let charge_instr = function
     | Xmove(src, dst) ->
-        charge 1 1 src; charge 1 1 dst
+        if is_stack_reg src then
+          force_stack_allocation dst
+        else if is_stack_reg dst then
+          force_stack_allocation src
+        else begin
+          charge 1 1 src; charge 1 1 dst
+        end
     | Xreload(src, dst) ->
         charge 1 1 src; charge max_int 1 dst
         (* dest must not be spilled! *)
@@ -491,10 +518,13 @@ let add_interfs_instr g instr live =
             add_interfs_list g (vmreg mr) srcs;
             IRC.add_interf g (vmreg mr) ftmp)
           (destroyed_by_setstack Tsingle)
+  | Xop(Ofloatofsingle, arg1::_, res) when Configuration.arch = "powerpc" ->
+      add_interfs_def g res live;
+      IRC.add_pref g arg1 res
   | Xop(op, args, res) ->
       begin match is_two_address op args with
       | None ->
-          add_interfs_def g res live;
+          add_interfs_def g res live
       | Some(arg1, argl) ->
           (* Treat as "res := arg1; res := op(res, argl)" *)
           add_interfs_def g res live;
@@ -502,7 +532,7 @@ let add_interfs_instr g instr live =
           add_interfs_move g arg1 res
             (vset_addlist (res :: argl) (VSet.remove res live))
       end;
-      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op);
+      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
   | Xload(chunk, addr, args, dst) ->
       add_interfs_def g dst live;
       add_interfs_destroyed g (VSet.remove dst live) 
@@ -576,10 +606,16 @@ let tospill_instr alloc instr ts =
       then ts
       else VSet.add src (VSet.add dst ts)
   | Xreload(src, dst) ->
-      assert (is_reg alloc dst);
+      if not (is_reg alloc dst) then begin
+        printf "Error: %a was spilled\n" PrintXTL.var dst;
+        assert false
+      end;
       ts
   | Xspill(src, dst) ->
-      assert (is_reg alloc src);
+      if not (is_reg alloc src) then begin
+        printf "Error: %a was spilled\n" PrintXTL.var src;
+        assert false
+      end;
       ts
   | Xparmove(srcs, dsts, itmp, ftmp) ->
       assert (is_reg alloc itmp && is_reg alloc ftmp);
@@ -834,7 +870,9 @@ let make_parmove srcs dsts itmp ftmp k =
   assert (Array.length dst = n);
   let status = Array.make n To_move in
   let temp_for =
-    function Tint -> itmp | (Tfloat|Tsingle) -> ftmp | Tlong -> assert false in
+    function (Tint|Tany32) -> itmp
+           | (Tfloat|Tsingle|Tany64) -> ftmp
+           | Tlong -> assert false in
   let code = ref [] in
   let add_move s d =
     match s, d with
diff --git a/backend/SelectDiv.vp b/backend/SelectDiv.vp
index 938ce5d8..a275a850 100644
--- a/backend/SelectDiv.vp
+++ b/backend/SelectDiv.vp
@@ -182,4 +182,16 @@ Nondetfunction divf (e1: expr) (e2: expr) :=
   | _ => Eop Odivf (e1 ::: e2 ::: Enil)
   end.
 
+Definition divfsimm (e: expr) (n: float32) :=
+  match Float32.exact_inverse n with
+  | Some n' => Eop Omulfs (e ::: Eop (Osingleconst n') Enil ::: Enil)
+  | None    => Eop Odivfs (e ::: Eop (Osingleconst n) Enil ::: Enil)
+  end.
+
+Nondetfunction divfs (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Osingleconst n2) Enil => divfsimm e1 n2
+  | _ => Eop Odivfs (e1 ::: e2 ::: Enil)
+  end.
+
 
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index 9228cde4..d4bd4f5c 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -570,4 +570,20 @@ Proof.
 - TrivialExists.
 Qed.
 
+Theorem eval_divfs:
+  forall le a b x y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (divfs a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+  intros until y. unfold divfs. destruct (divfs_match b); intros.
+- unfold divfsimm. destruct (Float32.exact_inverse n2) as [n2' | ] eqn:EINV.
+  + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
+    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. 
+    simpl; eauto. 
+    destruct x; simpl; auto. erewrite Float32.div_mul_inverse; eauto. 
+  + TrivialExists. 
+- TrivialExists.
+Qed.
+
 End CMCONSTRS.
diff --git a/backend/SelectLong.vp b/backend/SelectLong.vp
index 0c1cbb3a..ab4ab8c4 100644
--- a/backend/SelectLong.vp
+++ b/backend/SelectLong.vp
@@ -138,6 +138,10 @@ Definition floatoflong (arg: expr) :=
   Eexternal hf.(i64_stod) sig_l_f (arg ::: Enil).
 Definition floatoflongu (arg: expr) :=
   Eexternal hf.(i64_utod) sig_l_f (arg ::: Enil).
+Definition longofsingle (arg: expr) := 
+  longoffloat (floatofsingle arg).
+Definition longuofsingle (arg: expr) :=
+  longuoffloat (floatofsingle arg).
 Definition singleoflong (arg: expr) :=
   Eexternal hf.(i64_stof) sig_l_s (arg ::: Enil).
 Definition singleoflongu (arg: expr) :=
diff --git a/backend/SelectLongproof.v b/backend/SelectLongproof.v
index ec0dd2cd..c7c7ab2d 100644
--- a/backend/SelectLongproof.v
+++ b/backend/SelectLongproof.v
@@ -435,6 +435,34 @@ Proof.
   auto.
 Qed.
 
+Theorem eval_longofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (longofsingle hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longofsingle.
+  destruct x; simpl in H0; inv H0. destruct (Float32.to_long f) as [n|] eqn:EQ; simpl in H2; inv H2.
+  exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+  apply Float32.to_long_double in EQ. 
+  eapply eval_longoffloat; eauto. simpl.
+  change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
+Theorem eval_longuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (longuofsingle hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longuofsingle.
+  destruct x; simpl in H0; inv H0. destruct (Float32.to_longu f) as [n|] eqn:EQ; simpl in H2; inv H2.
+  exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+  apply Float32.to_longu_double in EQ. 
+  eapply eval_longuoffloat; eauto. simpl.
+  change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
 Theorem eval_singleoflong:
   forall le a x y,
   eval_expr ge sp e m le a x ->
diff --git a/backend/Selection.v b/backend/Selection.v
index f62a888e..9bd37ef5 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -70,6 +70,7 @@ Definition sel_constant (cst: Cminor.constant) : expr :=
   match cst with
   | Cminor.Ointconst n => Eop (Ointconst n) Enil
   | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil
+  | Cminor.Osingleconst f => Eop (Osingleconst f) Enil
   | Cminor.Olongconst n => longconst n
   | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs
   | Cminor.Oaddrstack ofs => addrstack ofs
@@ -85,11 +86,18 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
   | Cminor.Onotint => notint arg
   | Cminor.Onegf => negf arg
   | Cminor.Oabsf => absf arg
+  | Cminor.Onegfs => negfs arg
+  | Cminor.Oabsfs => absfs arg
   | Cminor.Osingleoffloat => singleoffloat arg
+  | Cminor.Ofloatofsingle => floatofsingle arg
   | Cminor.Ointoffloat => intoffloat arg
   | Cminor.Ointuoffloat => intuoffloat arg
   | Cminor.Ofloatofint => floatofint arg
   | Cminor.Ofloatofintu => floatofintu arg
+  | Cminor.Ointofsingle => intofsingle arg
+  | Cminor.Ointuofsingle => intuofsingle arg
+  | Cminor.Osingleofint => singleofint arg
+  | Cminor.Osingleofintu => singleofintu arg
   | Cminor.Onegl => negl hf arg
   | Cminor.Onotl => notl arg
   | Cminor.Ointoflong => intoflong arg
@@ -99,6 +107,8 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
   | Cminor.Olonguoffloat => longuoffloat hf arg
   | Cminor.Ofloatoflong => floatoflong hf arg
   | Cminor.Ofloatoflongu => floatoflongu hf arg
+  | Cminor.Olongofsingle => longofsingle hf arg
+  | Cminor.Olonguofsingle => longuofsingle hf arg
   | Cminor.Osingleoflong => singleoflong hf arg
   | Cminor.Osingleoflongu => singleoflongu hf arg
   end.
@@ -122,6 +132,10 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   | Cminor.Osubf => subf arg1 arg2
   | Cminor.Omulf => mulf arg1 arg2
   | Cminor.Odivf => divf arg1 arg2
+  | Cminor.Oaddfs => addfs arg1 arg2
+  | Cminor.Osubfs => subfs arg1 arg2
+  | Cminor.Omulfs => mulfs arg1 arg2
+  | Cminor.Odivfs => divfs arg1 arg2
   | Cminor.Oaddl => addl hf arg1 arg2
   | Cminor.Osubl => subl hf arg1 arg2
   | Cminor.Omull => mull hf arg1 arg2
@@ -138,6 +152,7 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   | Cminor.Ocmp c => comp c arg1 arg2
   | Cminor.Ocmpu c => compu c arg1 arg2
   | Cminor.Ocmpf c => compf c arg1 arg2
+  | Cminor.Ocmpfs c => compfs c arg1 arg2
   | Cminor.Ocmpl c => cmpl c arg1 arg2
   | Cminor.Ocmplu c => cmplu c arg1 arg2
   end.
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 175b025c..55977b47 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -191,11 +191,18 @@ Proof.
   apply eval_notint; auto.
   apply eval_negf; auto.
   apply eval_absf; auto.
+  apply eval_negfs; auto.
+  apply eval_absfs; auto.
   apply eval_singleoffloat; auto.
+  apply eval_floatofsingle; auto.
   eapply eval_intoffloat; eauto.
   eapply eval_intuoffloat; eauto.
   eapply eval_floatofint; eauto.
   eapply eval_floatofintu; eauto.
+  eapply eval_intofsingle; eauto.
+  eapply eval_intuofsingle; eauto.
+  eapply eval_singleofint; eauto.
+  eapply eval_singleofintu; eauto.
   eapply eval_negl; eauto.
   eapply eval_notl; eauto.
   eapply eval_intoflong; eauto.
@@ -205,6 +212,8 @@ Proof.
   eapply eval_longuoffloat; eauto.
   eapply eval_floatoflong; eauto.
   eapply eval_floatoflongu; eauto.
+  eapply eval_longofsingle; eauto.
+  eapply eval_longuofsingle; eauto.
   eapply eval_singleoflong; eauto.
   eapply eval_singleoflongu; eauto.
 Qed.
@@ -234,6 +243,10 @@ Proof.
   apply eval_subf; auto.
   apply eval_mulf; auto.
   apply eval_divf; auto.
+  apply eval_addfs; auto.
+  apply eval_subfs; auto.
+  apply eval_mulfs; auto.
+  apply eval_divfs; auto.
   eapply eval_addl; eauto.
   eapply eval_subl; eauto.
   eapply eval_mull; eauto.
@@ -250,6 +263,7 @@ Proof.
   apply eval_comp; auto.
   apply eval_compu; auto.
   apply eval_compf; auto.
+  apply eval_compfs; auto.
   exists v; split; auto. eapply eval_cmpl; eauto.
   exists v; split; auto. eapply eval_cmplu; eauto.
 Qed.
@@ -377,6 +391,7 @@ Proof.
   destruct cst; simpl in *; inv H. 
   exists (Vint i); split; auto. econstructor. constructor. auto. 
   exists (Vfloat f); split; auto. econstructor. constructor. auto.
+  exists (Vsingle f); split; auto. econstructor. constructor. auto. 
   exists (Val.longofwords (Vint (Int64.hiword i)) (Vint (Int64.loword i))); split.
   eapply eval_Eop. constructor. EvalOp. simpl; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
   simpl. rewrite Int64.ofwords_recompose. auto.
diff --git a/backend/Stacking.v b/backend/Stacking.v
index f7c16d14..4ee43bb1 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -73,12 +73,12 @@ Definition save_callee_save_regs
 Definition save_callee_save_int (fe: frame_env) :=
   save_callee_save_regs 
     fe_num_int_callee_save index_int_callee_save FI_saved_int
-    Tint fe int_callee_save_regs.
+    Tany32 fe int_callee_save_regs.
 
 Definition save_callee_save_float (fe: frame_env) :=
   save_callee_save_regs
     fe_num_float_callee_save index_float_callee_save FI_saved_float 
-    Tfloat fe float_callee_save_regs.
+    Tany64 fe float_callee_save_regs.
 
 Definition save_callee_save (fe: frame_env) (k: Mach.code) :=
   save_callee_save_int fe (save_callee_save_float fe k).
@@ -103,12 +103,12 @@ Definition restore_callee_save_regs
 Definition restore_callee_save_int (fe: frame_env) :=
   restore_callee_save_regs 
     fe_num_int_callee_save index_int_callee_save FI_saved_int
-    Tint fe int_callee_save_regs.
+    Tany32 fe int_callee_save_regs.
 
 Definition restore_callee_save_float (fe: frame_env) :=
   restore_callee_save_regs
     fe_num_float_callee_save index_float_callee_save FI_saved_float 
-    Tfloat fe float_callee_save_regs.
+    Tany64 fe float_callee_save_regs.
 
 Definition restore_callee_save (fe: frame_env) (k: Mach.code) :=
   restore_callee_save_int fe (restore_callee_save_float fe k).
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index b69f1ec2..28b155ab 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -132,8 +132,8 @@ Definition type_of_index (idx: frame_index) :=
   | FI_retaddr => Tint
   | FI_local x ty => ty
   | FI_arg x ty => ty
-  | FI_saved_int x => Tint
-  | FI_saved_float x => Tfloat
+  | FI_saved_int x => Tany32
+  | FI_saved_float x => Tany64
   end.
 
 (** Non-overlap between the memory areas corresponding to two
@@ -194,8 +194,8 @@ Proof.
   destruct idx1; destruct idx2;
   simpl in V1; simpl in V2; repeat InvIndexValid; simpl in DIFF;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -211,8 +211,8 @@ Proof.
   destruct idx;
   simpl in V; repeat InvIndexValid;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -254,6 +254,17 @@ Proof.
   auto with align_4.
 Qed.
 
+Lemma offset_of_index_aligned_2:
+  forall idx, index_valid idx ->
+  (align_chunk (chunk_of_type (type_of_index idx)) | offset_of_index fe idx).
+Proof.
+  intros. replace (align_chunk (chunk_of_type (type_of_index idx))) with 4.
+  apply offset_of_index_aligned.
+  assert (type_of_index idx <> Tlong) by
+    (destruct idx; simpl; simpl in H; intuition congruence).
+  destruct (type_of_index idx); auto; congruence.
+Qed.
+
 Lemma fe_stack_data_aligned:
   (8 | fe_stack_data fe).
 Proof.
@@ -271,7 +282,7 @@ Lemma index_local_valid:
 Proof.
   unfold slot_within_bounds, slot_valid, index_valid; intros. 
   InvBooleans. 
-  split. destruct ty; congruence. auto.
+  split. destruct ty; auto || discriminate. auto.
 Qed.
 
 Lemma index_arg_valid:
@@ -281,7 +292,7 @@ Lemma index_arg_valid:
 Proof.
   unfold slot_within_bounds, slot_valid, index_valid; intros. 
   InvBooleans. 
-  split. destruct ty; congruence. auto.
+  split. destruct ty; auto || discriminate. auto. 
 Qed.
 
 Lemma index_saved_int_valid:
@@ -322,8 +333,8 @@ Proof.
   AddPosProps.
   destruct idx; simpl in V; repeat InvIndexValid;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -414,7 +425,7 @@ Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v' [A B]].
   assert (v' = v).
     destruct v; destruct (type_of_index idx); simpl in *;
-    try contradiction; auto. rewrite Floats.Float.singleoffloat_of_single in B; auto. 
+    try contradiction; auto.
   subst v'. auto.
 Qed.
 
@@ -479,11 +490,7 @@ Proof.
   rewrite size_type_chunk. 
   apply Mem.range_perm_implies with Freeable; auto with mem.
   apply offset_of_index_perm; auto.
-  replace (align_chunk (chunk_of_type (type_of_index idx))) with 4.
-  apply offset_of_index_aligned; auto.
-  assert (type_of_index idx <> Tlong).
-    destruct idx; simpl in *; tauto || congruence.
-  destruct (type_of_index idx); reflexivity || congruence. 
+  apply offset_of_index_aligned_2; auto.
   exists m'; auto.
 Qed.
 
@@ -514,7 +521,8 @@ Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v'' [A B]].
   exists v''; split; auto.
   inv H2; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto.
-  rewrite Floats.Float.singleoffloat_of_single; auto. 
+  econstructor; eauto.
+  econstructor; eauto.
   econstructor; eauto.
 Qed.
 
@@ -529,6 +537,8 @@ Proof.
   exists v''; split; auto.
   inv H1; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto. 
   econstructor; eauto.
+  econstructor; eauto.
+  econstructor; eauto.
 Qed.
 
 Lemma gso_index_contains_inj:
@@ -576,11 +586,7 @@ Proof.
   rewrite size_type_chunk.
   apply Mem.range_perm_implies with Freeable; auto with mem.
   apply offset_of_index_perm; auto. 
-  replace (align_chunk (chunk_of_type (type_of_index idx))) with 4. 
-  apply offset_of_index_aligned.
-  assert (type_of_index idx <> Tlong).
-    destruct idx; simpl in *; tauto || congruence.
-  destruct (type_of_index idx); reflexivity || congruence. 
+  apply offset_of_index_aligned_2; auto.
   intros [v C]. 
   exists v; split; auto. constructor; auto. 
 Qed.
@@ -844,18 +850,17 @@ Lemma agree_frame_set_local:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
-  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   unfold Locmap.set.
   destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)).
   eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. 
   simpl. simpl in d. intuition.
@@ -884,20 +889,19 @@ Lemma agree_frame_set_outgoing:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
-  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto.
   red; left; congruence.
 (* outgoing *)
   unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
   eapply gso_index_contains_inj; eauto with stacking.
   red. red in d. intuition. 
@@ -1145,12 +1149,6 @@ Proof.
   apply check_mreg_list_incl; compute; auto.
 Qed.
 
-Remark destroyed_by_move_at_function_entry:
-  incl (destroyed_by_op Omove) destroyed_at_function_entry.
-Proof.
-  apply check_mreg_list_incl; compute; auto.
-Qed.
-
 Remark temp_for_parent_frame_caller_save:
   In temp_for_parent_frame destroyed_at_call.
 Proof.
@@ -1164,6 +1162,12 @@ Hint Resolve destroyed_by_op_caller_save destroyed_by_load_caller_save
     destroyed_by_cond_caller_save destroyed_by_jumptable_caller_save
     destroyed_at_function_entry_caller_save: stacking.
 
+Remark destroyed_by_setstack_function_entry:
+  forall ty, incl (destroyed_by_setstack ty) destroyed_at_function_entry.
+Proof.
+  destruct ty; apply check_mreg_list_incl; compute; auto. 
+Qed.
+
 Remark transl_destroyed_by_op:
   forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op.
 Proof.
@@ -1367,12 +1371,12 @@ Lemma save_callee_save_correct:
   /\ agree_regs j ls rs'.
 Proof.
   intros.
-  assert (UNDEF: forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef).
-    intros. unfold ls. apply LTL_undef_regs_same. apply destroyed_by_move_at_function_entry; auto.
+  assert (UNDEF: forall r ty, In r (destroyed_by_setstack ty) -> ls (R r) = Vundef).
+    intros. unfold ls. apply LTL_undef_regs_same. eapply destroyed_by_setstack_function_entry; eauto.
   exploit (save_callee_save_regs_correct 
              fe_num_int_callee_save
              index_int_callee_save
-             FI_saved_int Tint
+             FI_saved_int Tany32
              j cs fb sp int_callee_save_regs ls).
   intros. apply index_int_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_int_callee_save_pos; auto. assumption.
@@ -1380,8 +1384,7 @@ Proof.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply int_callee_save_type. auto.
-Local Transparent destroyed_by_setstack.
-  simpl. tauto.
+  eauto.
   auto.
   apply incl_refl. 
   apply int_callee_save_norepet.
@@ -1391,7 +1394,7 @@ Local Transparent destroyed_by_setstack.
   exploit (save_callee_save_regs_correct 
              fe_num_float_callee_save
              index_float_callee_save
-             FI_saved_float Tfloat
+             FI_saved_float Tany64
              j cs fb sp float_callee_save_regs ls).
   intros. apply index_float_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_float_callee_save_pos; auto. assumption.
@@ -1399,7 +1402,7 @@ Local Transparent destroyed_by_setstack.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply float_callee_save_type. auto.
-  simpl. tauto. 
+  eauto.
   auto. 
   apply incl_refl. 
   apply float_callee_save_norepet.
@@ -1462,6 +1465,16 @@ Proof.
   left. apply Plt_ne; auto.
 Qed.
 
+Lemma undef_regs_type:
+  forall ty l rl ls,
+  Val.has_type (ls l) ty -> Val.has_type (LTL.undef_regs rl ls l) ty.
+Proof.
+  induction rl; simpl; intros.
+- auto.
+- unfold Locmap.set. destruct (Loc.eq (R a) l). red; auto. 
+  destruct (Loc.diff_dec (R a) l); auto. red; auto.
+Qed.
+
 (** As a corollary of the previous lemmas, we obtain the following
   correctness theorem for the execution of a function prologue
   (allocation of the frame + saving of the link and return address +
@@ -1732,7 +1745,7 @@ Proof.
              fe_num_int_callee_save
              index_int_callee_save
              FI_saved_int
-             Tint
+             Tany32
              int_callee_save_regs
              j cs fb sp' ls0 m'); auto.
   intros. unfold mreg_within_bounds. split; intros.
@@ -1750,7 +1763,7 @@ Proof.
              fe_num_float_callee_save
              index_float_callee_save
              FI_saved_float
-             Tfloat
+             Tany64
              float_callee_save_regs
              j cs fb sp' ls0 m'); auto.
   intros. unfold mreg_within_bounds. split; intros.
@@ -2318,8 +2331,8 @@ Proof.
   unfold parent_sp.
   assert (slot_valid f Outgoing pos ty = true).
     exploit loc_arguments_acceptable; eauto. intros [A B]. 
-    unfold slot_valid. unfold proj_sumbool. rewrite zle_true. 
-    destruct ty; reflexivity || congruence. omega.
+    unfold slot_valid. unfold proj_sumbool. rewrite zle_true by omega.
+    destruct ty; auto; congruence.
   assert (slot_within_bounds (function_bounds f) Outgoing pos ty).
     eauto.
   exploit agree_outgoing; eauto. intros [v [A B]].
@@ -2525,7 +2538,7 @@ Proof.
   apply agree_frame_set_reg; auto.
 
 - (* Lsetstack *)
-  exploit wt_state_setstack; eauto. intros (VTY & SV & SW). 
+  exploit wt_state_setstack; eauto. intros (SV & SW). 
   set (idx := match sl with
               | Local => FI_local ofs ty
               | Incoming => FI_link (*dummy*)
@@ -2552,15 +2565,15 @@ Proof.
     omega.
   apply match_stacks_change_mach_mem with m'; auto.
   eauto with mem. eauto with mem. intros. rewrite <- H1; eapply Mem.load_store_other; eauto. left; apply Plt_ne; auto. 
-  eauto. eauto. auto. 
+  eauto. eauto. 
   apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. 
   destruct sl.
   + eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS. 
+    apply destroyed_by_setstack_caller_save. auto. auto. auto.
     assumption. 
   + simpl in SW; discriminate.
   + eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS.
+    apply destroyed_by_setstack_caller_save. auto. auto. auto.
     assumption.
   + eauto with coqlib.
 
@@ -2613,13 +2626,15 @@ Proof.
   apply plus_one. econstructor. 
   instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved.
   eexact C. eauto.
-  econstructor; eauto with coqlib.
+  econstructor. eauto.
   eapply match_stacks_parallel_stores. eexact MINJ. eexact B. eauto. eauto. auto. 
+  eauto. eauto. 
   rewrite transl_destroyed_by_store. 
   apply agree_regs_undef_regs; auto. 
   apply agree_frame_undef_locs; auto.
   eapply agree_frame_parallel_stores; eauto.
-  apply destroyed_by_store_caller_save. 
+  apply destroyed_by_store_caller_save.
+  eauto with coqlib.
 
 - (* Lcall *)
   exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index e293028d..813944d6 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -166,7 +166,7 @@ Definition store_init_data (ab: ablock) (p: Z) (id: init_data) : ablock :=
   | Init_int32 n => ablock_store Mint32 ab p (I n)
   | Init_int64 n => ablock_store Mint64 ab p (L n)
   | Init_float32 n => ablock_store Mfloat32 ab p
-                        (if propagate_float_constants tt then F n else ftop)
+                        (if propagate_float_constants tt then FS n else ftop)
   | Init_float64 n => ablock_store Mfloat64 ab p
                         (if propagate_float_constants tt then F n else ftop)
   | Init_addrof symb ofs => ablock_store Mint32 ab p (Ptr (Gl symb ofs))
@@ -922,7 +922,7 @@ Proof.
     eapply VMTOP; eauto.
   - exploit BC'INV; eauto. intros (b'' & delta & J'). 
     exploit Mem.loadbytes_inject. eexact IMEM. eauto. eauto. intros (bytes & A & B).
-    inv B. inv H3. eapply PMTOP; eauto. 
+    inv B. inv H3. inv H7. eapply PMTOP; eauto. 
   }
   (* Conclusions *)
   exists bc'; splitall.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index c8431bb7..75dd9d20 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -509,13 +509,13 @@ Inductive aval : Type :=
   | Sgn (n: Z)
   | L (n: int64)
   | F (f: float)
-  | Fsingle
+  | FS (f: float32)
   | Ptr (p: aptr)
   | Ifptr (p: aptr).
 
 Definition eq_aval: forall (v1 v2: aval), {v1=v2} + {v1<>v2}.
 Proof.
-  intros. generalize zeq Int.eq_dec Int64.eq_dec Float.eq_dec eq_aptr; intros.
+  intros. generalize zeq Int.eq_dec Int64.eq_dec Float.eq_dec Float32.eq_dec eq_aptr; intros.
   decide equality. 
 Defined.
 
@@ -537,14 +537,14 @@ Inductive vmatch : val -> aval -> Prop :=
   | vmatch_Sgn_undef: forall n, vmatch Vundef (Sgn n)
   | vmatch_l: forall i, vmatch (Vlong i) (L i)
   | vmatch_f: forall f, vmatch (Vfloat f) (F f)
-  | vmatch_single: forall f, Float.is_single f -> vmatch (Vfloat f) Fsingle
-  | vmatch_single_undef: vmatch Vundef Fsingle
+  | vmatch_s: forall f, vmatch (Vsingle f) (FS f)
   | vmatch_ptr: forall b ofs p, pmatch b ofs p -> vmatch (Vptr b ofs) (Ptr p)
   | vmatch_ptr_undef: forall p, vmatch Vundef (Ptr p)
   | vmatch_ifptr_undef: forall p, vmatch Vundef (Ifptr p)
   | vmatch_ifptr_i: forall i p, vmatch (Vint i) (Ifptr p)
   | vmatch_ifptr_l: forall i p, vmatch (Vlong i) (Ifptr p)
   | vmatch_ifptr_f: forall f p, vmatch (Vfloat f) (Ifptr p)
+  | vmatch_ifptr_s: forall f p, vmatch (Vsingle f) (Ifptr p)
   | vmatch_ifptr_p: forall b ofs p, pmatch b ofs p -> vmatch (Vptr b ofs) (Ifptr p).
 
 Lemma vmatch_ifptr:
@@ -569,11 +569,14 @@ Proof. constructor. Qed.
 Lemma vmatch_ftop: forall f, vmatch (Vfloat f) ftop.
 Proof. intros; constructor. Qed.
 
+Lemma vmatch_ftop_single: forall f, vmatch (Vsingle f) ftop.
+Proof. intros; constructor. Qed.
+
 Lemma vmatch_undef_ftop: vmatch Vundef ftop.
 Proof. constructor. Qed.    
 
 Hint Constructors vmatch : va.
-Hint Resolve vmatch_itop vmatch_undef_itop vmatch_ftop vmatch_undef_ftop : va.
+Hint Resolve vmatch_itop vmatch_undef_itop vmatch_ftop vmatch_ftop_single vmatch_undef_ftop : va.
 
 (* Some properties about [is_uns] and [is_sgn]. *)
 
@@ -770,22 +773,21 @@ Inductive vge: aval -> aval -> Prop :=
   | vge_i: forall i, vge (I i) (I i)
   | vge_l: forall i, vge (L i) (L i)
   | vge_f: forall f, vge (F f) (F f)
+  | vge_s: forall f, vge (FS f) (FS f)
   | vge_uns_i: forall n i, 0 <= n -> is_uns n i -> vge (Uns n) (I i)
   | vge_uns_uns: forall n1 n2, n1 >= n2 -> vge (Uns n1) (Uns n2)
   | vge_sgn_i: forall n i, 0 < n -> is_sgn n i -> vge (Sgn n) (I i)
   | vge_sgn_sgn: forall n1 n2, n1 >= n2 -> vge (Sgn n1) (Sgn n2)
   | vge_sgn_uns: forall n1 n2, n1 > n2 -> vge (Sgn n1) (Uns n2)
-  | vge_single_f: forall f, Float.is_single f -> vge Fsingle (F f)
-  | vge_single: vge Fsingle Fsingle
   | vge_p_p: forall p q, pge p q -> vge (Ptr p) (Ptr q)
   | vge_ip_p: forall p q, pge p q -> vge (Ifptr p) (Ptr q)
   | vge_ip_ip: forall p q, pge p q -> vge (Ifptr p) (Ifptr q)
   | vge_ip_i: forall p i, vge (Ifptr p) (I i)
   | vge_ip_l: forall p i, vge (Ifptr p) (L i)
   | vge_ip_f: forall p f, vge (Ifptr p) (F f)
+  | vge_ip_s: forall p f, vge (Ifptr p) (FS f)
   | vge_ip_uns: forall p n, vge (Ifptr p) (Uns n)
-  | vge_ip_sgn: forall p n, vge (Ifptr p) (Sgn n)
-  | vge_ip_single: forall p, vge (Ifptr p) Fsingle.
+  | vge_ip_sgn: forall p n, vge (Ifptr p) (Sgn n).
 
 Hint Constructors vge : va.
 
@@ -836,12 +838,9 @@ Definition vlub (v w: aval) : aval :=
   | Sgn n1, Uns n2 => sgn (Z.max n1 (n2 + 1))
   | Sgn n1, Sgn n2 => sgn (Z.max n1 n2)
   | F f1, F f2 =>
-      if Float.eq_dec f1 f2 then v else
-      if Float.is_single_dec f1 && Float.is_single_dec f2 then Fsingle else ftop
-  | F f, Fsingle | Fsingle, F f =>
-      if Float.is_single_dec f then Fsingle else ftop
-  | Fsingle, Fsingle =>
-      Fsingle
+      if Float.eq_dec f1 f2 then v else ftop
+  | FS f1, FS f2 =>
+      if Float32.eq_dec f1 f2 then v else ftop
   | L i1, L i2 =>
       if Int64.eq i1 i2 then v else ltop
   | Ptr p1, Ptr p2 => Ptr(plub p1 p2)
@@ -865,8 +864,8 @@ Proof.
 - f_equal; apply Z.max_comm.
 - f_equal; apply Z.max_comm.
 - rewrite Int64.eq_sym. predSpec Int64.eq Int64.eq_spec n0 n; congruence.
-- rewrite dec_eq_sym. destruct (Float.eq_dec f0 f). congruence. 
-  rewrite andb_comm. auto.
+- rewrite dec_eq_sym. destruct (Float.eq_dec f0 f). congruence. auto.
+- rewrite dec_eq_sym. destruct (Float32.eq_dec f0 f). congruence. auto.
 - f_equal; apply plub_comm.
 - f_equal; apply plub_comm.
 - f_equal; apply plub_comm.
@@ -937,12 +936,8 @@ Proof.
 - eapply vge_trans. apply vge_sgn_sgn'. eauto with va.
 - eapply vge_trans. apply vge_sgn_sgn'. eauto with va.
 - destruct (Int64.eq n n0); constructor.
-- destruct (Float.eq_dec f f0). constructor.
-  destruct (Float.is_single_dec f && Float.is_single_dec f0) eqn:FS.
-  InvBooleans. auto with va.
-  constructor.
-- destruct (Float.is_single_dec f); constructor; auto. 
-- destruct (Float.is_single_dec f); constructor; auto.
+- destruct (Float.eq_dec f f0); constructor.
+- destruct (Float32.eq_dec f f0); constructor.
 - constructor; apply pge_lub_l; auto.
 - constructor; apply pge_lub_l; auto.
 - constructor; apply pge_lub_l; auto.
@@ -1043,8 +1038,7 @@ Definition vincl (v w: aval) : bool :=
   | Sgn n, Sgn m => zle n m
   | L i, L j => Int64.eq_dec i j
   | F i, F j => Float.eq_dec i j
-  | F i, Fsingle => Float.is_single_dec i
-  | Fsingle, Fsingle => true
+  | FS i, FS j => Float32.eq_dec i j
   | Ptr p, Ptr q => pincl p q
   | Ptr p, Ifptr q => pincl p q
   | Ifptr p, Ifptr q => pincl p q
@@ -1063,7 +1057,7 @@ Proof.
   InvBooleans. constructor; auto with va.
   InvBooleans. subst; auto with va.
   InvBooleans. subst; auto with va.
-  InvBooleans. auto with va.
+  InvBooleans. subst; auto with va.
   constructor; apply pincl_ge; auto.
   constructor; apply pincl_ge; auto.
   constructor; apply pincl_ge; auto.
@@ -1154,6 +1148,28 @@ Proof.
   intros. unfold binop_float; inv H; auto with va; inv H0; auto with va.
 Qed.
 
+Definition unop_single (sem: float32 -> float32) (x: aval) :=
+  match x with FS n => FS (sem n) | _ => ftop end.
+
+Lemma unop_single_sound:
+  forall sem v x,
+  vmatch v x ->
+  vmatch (match v with Vsingle i => Vsingle(sem i) | _ => Vundef end) (unop_single sem x).
+Proof.
+  intros. unfold unop_single; inv H; auto with va.
+Qed.
+
+Definition binop_single (sem: float32 -> float32 -> float32) (x y: aval) :=
+  match x, y with FS n, FS m => FS (sem n m) | _, _ => ftop end.
+
+Lemma binop_single_sound:
+  forall sem v x w y,
+  vmatch v x -> vmatch w y ->
+  vmatch (match v, w with Vsingle i, Vsingle j => Vsingle(sem i j) | _, _ => Vundef end) (binop_single sem x y).
+Proof.
+  intros. unfold binop_single; inv H; auto with va; inv H0; auto with va.
+Qed.
+
 (** Logical operations *)
 
 Definition shl (v w: aval) :=
@@ -1636,6 +1652,42 @@ Lemma divf_sound:
   forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divf v w) (divf x y).
 Proof (binop_float_sound Float.div).
 
+Definition negfs := unop_single Float32.neg.
+
+Lemma negfs_sound: 
+  forall v x, vmatch v x -> vmatch (Val.negfs v) (negfs x).
+Proof (unop_single_sound Float32.neg).
+
+Definition absfs := unop_single Float32.abs.
+
+Lemma absfs_sound: 
+  forall v x, vmatch v x -> vmatch (Val.absfs v) (absfs x).
+Proof (unop_single_sound Float32.abs).
+
+Definition addfs := binop_single Float32.add.
+
+Lemma addfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.addfs v w) (addfs x y).
+Proof (binop_single_sound Float32.add).
+
+Definition subfs := binop_single Float32.sub.
+
+Lemma subfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.subfs v w) (subfs x y).
+Proof (binop_single_sound Float32.sub).
+
+Definition mulfs := binop_single Float32.mul.
+
+Lemma mulfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.mulfs v w) (mulfs x y).
+Proof (binop_single_sound Float32.mul).
+
+Definition divfs := binop_single Float32.div.
+
+Lemma divfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y).
+Proof (binop_single_sound Float32.div).
+
 (** Conversions *)
 
 Definition zero_ext (nbits: Z) (v: aval) :=
@@ -1683,23 +1735,38 @@ Qed.
 
 Definition singleoffloat (v: aval) :=
   match v with
-  | F f => F (Float.singleoffloat f)
-  | _   => Fsingle
+  | F f => FS (Float.to_single f)
+  | _   => ftop
   end.
 
 Lemma singleoffloat_sound:
   forall v x, vmatch v x -> vmatch (Val.singleoffloat v) (singleoffloat x).
 Proof.
-  intros. 
-  assert (DEFAULT: vmatch (Val.singleoffloat v) Fsingle).
-  { destruct v; constructor. apply Float.singleoffloat_is_single. }
+  intros.  
+  assert (DEFAULT: vmatch (Val.singleoffloat v) ftop).
+  { destruct v; constructor. }
+  destruct x; auto. inv H. constructor. 
+Qed.
+
+Definition floatofsingle (v: aval) :=
+  match v with
+  | FS f => F (Float.of_single f)
+  | _   => ftop
+  end.
+
+Lemma floatofsingle_sound:
+  forall v x, vmatch v x -> vmatch (Val.floatofsingle v) (floatofsingle x).
+Proof.
+  intros.  
+  assert (DEFAULT: vmatch (Val.floatofsingle v) ftop).
+  { destruct v; constructor. }
   destruct x; auto. inv H. constructor. 
 Qed.
 
 Definition intoffloat (x: aval) :=
   match x with
   | F f =>
-      match Float.intoffloat f with
+      match Float.to_int f with
       | Some i => I i
       | None => if va_strict tt then Vbot else itop
       end
@@ -1710,14 +1777,14 @@ Lemma intoffloat_sound:
   forall v x w, vmatch v x -> Val.intoffloat v = Some w -> vmatch w (intoffloat x).
 Proof.
   unfold Val.intoffloat; intros. destruct v; try discriminate. 
-  destruct (Float.intoffloat f) as [i|] eqn:E; simpl in H0; inv H0.
+  destruct (Float.to_int f) as [i|] eqn:E; simpl in H0; inv H0.
   inv H; simpl; auto with va. rewrite E; constructor. 
 Qed.
 
 Definition intuoffloat (x: aval) :=
   match x with
   | F f =>
-      match Float.intuoffloat f with
+      match Float.to_intu f with
       | Some i => I i
       | None => if va_strict tt then Vbot else itop
       end
@@ -1728,13 +1795,13 @@ Lemma intuoffloat_sound:
   forall v x w, vmatch v x -> Val.intuoffloat v = Some w -> vmatch w (intuoffloat x).
 Proof.
   unfold Val.intuoffloat; intros. destruct v; try discriminate. 
-  destruct (Float.intuoffloat f) as [i|] eqn:E; simpl in H0; inv H0.
+  destruct (Float.to_intu f) as [i|] eqn:E; simpl in H0; inv H0.
   inv H; simpl; auto with va. rewrite E; constructor. 
 Qed.
 
 Definition floatofint (x: aval) :=
   match x with
-  | I i => F(Float.floatofint i)
+  | I i => F(Float.of_int i)
   | _   => ftop
   end.
 
@@ -1747,7 +1814,7 @@ Qed.
 
 Definition floatofintu (x: aval) :=
   match x with
-  | I i => F(Float.floatofintu i)
+  | I i => F(Float.of_intu i)
   | _   => ftop
   end.
 
@@ -1758,6 +1825,68 @@ Proof.
   inv H; simpl; auto with va.
 Qed.
 
+Definition intofsingle (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else itop
+      end
+  | _ => itop
+  end.
+
+Lemma intofsingle_sound:
+  forall v x w, vmatch v x -> Val.intofsingle v = Some w -> vmatch w (intofsingle x).
+Proof.
+  unfold Val.intofsingle; intros. destruct v; try discriminate. 
+  destruct (Float32.to_int f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor. 
+Qed.
+
+Definition intuofsingle (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else itop
+      end
+  | _ => itop
+  end.
+
+Lemma intuofsingle_sound:
+  forall v x w, vmatch v x -> Val.intuofsingle v = Some w -> vmatch w (intuofsingle x).
+Proof.
+  unfold Val.intuofsingle; intros. destruct v; try discriminate. 
+  destruct (Float32.to_intu f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor. 
+Qed.
+
+Definition singleofint (x: aval) :=
+  match x with
+  | I i => FS(Float32.of_int i)
+  | _   => ftop
+  end.
+
+Lemma singleofint_sound:
+  forall v x w, vmatch v x -> Val.singleofint v = Some w -> vmatch w (singleofint x).
+Proof.
+  unfold Val.singleofint; intros. destruct v; inv H0.
+  inv H; simpl; auto with va.
+Qed.
+
+Definition singleofintu (x: aval) :=
+  match x with
+  | I i => FS(Float32.of_intu i)
+  | _   => ftop
+  end.
+
+Lemma singleofintu_sound:
+  forall v x w, vmatch v x -> Val.singleofintu v = Some w -> vmatch w (singleofintu x).
+Proof.
+  unfold Val.singleofintu; intros. destruct v; inv H0.
+  inv H; simpl; auto with va.
+Qed.
+
 Definition floatofwords (x y: aval) :=
   match x, y with
   | I i, I j => F(Float.from_words i j)
@@ -1862,6 +1991,18 @@ Proof.
   intros. inv H; try constructor; inv H0; constructor. 
 Qed.
 
+Definition cmpfs_bool (c: comparison) (v w: aval) : abool :=
+  match v, w with
+  | FS f1, FS f2 => Just (Float32.cmp c f1 f2)
+  | _, _ => Btop
+  end.
+
+Lemma cmpfs_bool_sound:
+  forall c v w x y, vmatch v x -> vmatch w y -> cmatch (Val.cmpfs_bool c v w) (cmpfs_bool c x y).
+Proof.
+  intros. inv H; try constructor; inv H0; constructor. 
+Qed.
+
 Definition maskzero (x: aval) (mask: int) : abool :=
   match x with
   | I i => Just (Int.eq (Int.and i mask) Int.zero)
@@ -1939,9 +2080,10 @@ Definition vnormalize (chunk: memory_chunk) (v: aval) :=
   | Mint16unsigned, _ => Uns 16
   | Mint32, (I _ | Ptr _ | Ifptr _) => v
   | Mint64, L _ => v
-  | Mfloat32, F f => F (Float.singleoffloat f)
-  | Mfloat32, _ => Fsingle
+  | Mfloat32, FS f => v
   | Mfloat64, F f => v
+  | Many32, (I _ | Ptr _ | Ifptr _ | FS _) => v
+  | Many64, _ => v
   | _, _ => Ifptr Pbot
   end.
 
@@ -1963,12 +2105,10 @@ Proof.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
 - constructor. xomega. apply is_sign_ext_sgn; auto with va. apply Z.min_case; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - constructor. omega. apply is_sign_ext_sgn; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
 - constructor. omega. apply is_sign_ext_sgn; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 Qed.
 
 Lemma vnormalize_cast:
@@ -1992,9 +2132,13 @@ Proof.
 - (* int64 *)
   destruct v; try contradiction; constructor.
 - (* float32 *)
-  rewrite H2. destruct v; simpl; constructor. apply Float.singleoffloat_is_single. 
+  destruct v; try contradiction; constructor.
 - (* float64 *)
   destruct v; try contradiction; constructor.
+- (* any32 *)
+  auto.
+- (* any64 *)
+  auto.
 Qed.
 
 Lemma vnormalize_monotone:
@@ -2024,12 +2168,10 @@ Proof.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
 - destruct (zlt n2 8); constructor; auto with va.
 - destruct (zlt n2 16); constructor; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - constructor; auto with va. apply is_sign_ext_sgn; auto with va.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
 - constructor; auto with va. apply is_sign_ext_sgn; auto with va.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - destruct (zlt n 8); constructor; auto with va.
 - destruct (zlt n 16); constructor; auto with va.
 Qed.
@@ -2064,6 +2206,8 @@ Definition chunk_compat (chunk chunk': memory_chunk) : bool :=
   | Mfloat32, Mfloat32 => true
   | Mint64, Mint64 => true
   | Mfloat64, Mfloat64 => true
+  | Many32, Many32 => true
+  | Many64, Many64 => true
   | _, _ => false
   end.
 
@@ -2148,7 +2292,7 @@ Definition ablock_storebytes_anywhere (ab: ablock) (p: aptr) :=
 
 Definition smatch (m: mem) (b: block) (p: aptr) : Prop :=
   (forall chunk ofs v, Mem.load chunk m b ofs = Some v -> vmatch v (Ifptr p))
-/\(forall ofs b' ofs' i, Mem.loadbytes m b ofs 1 = Some (Pointer b' ofs' i :: nil) -> pmatch b' ofs' p).
+/\(forall ofs b' ofs' q i, Mem.loadbytes m b ofs 1 = Some (Fragment (Vptr b' ofs') q i :: nil) -> pmatch b' ofs' p).
 
 Remark loadbytes_load_ext:
   forall b m m',
@@ -2219,10 +2363,10 @@ Proof.
 Qed.
 
 Lemma smatch_loadbytes:
-  forall m b p b' ofs' i n ofs bytes,
+  forall m b p b' ofs' q i n ofs bytes,
   Mem.loadbytes m b ofs n = Some bytes ->
   smatch m b p ->
-  In (Pointer b' ofs' i) bytes ->
+  In (Fragment (Vptr b' ofs') q i) bytes ->
   pmatch b' ofs' p.
 Proof.
   intros. exploit In_loadbytes; eauto. intros (ofs1 & A & B).
@@ -2251,11 +2395,11 @@ Proof.
 Qed.
 
 Lemma storebytes_provenance:
-  forall m b ofs bytes m' b' ofs' b'' ofs'' i,
+  forall m b ofs bytes m' b' ofs' b'' ofs'' q i,
   Mem.storebytes m b ofs bytes = Some m' ->
-  Mem.loadbytes m' b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil) ->
-  In (Pointer b'' ofs'' i) bytes
-  \/ Mem.loadbytes m b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil).
+  Mem.loadbytes m' b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil) ->
+  In (Fragment (Vptr b'' ofs'') q i) bytes
+  \/ Mem.loadbytes m b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil).
 Proof.
   intros. 
   assert (EITHER:
@@ -2275,26 +2419,24 @@ Proof.
 Qed.
 
 Lemma store_provenance:
-  forall chunk m b ofs v m' b' ofs' b'' ofs'' i,
+  forall chunk m b ofs v m' b' ofs' b'' ofs'' q i,
   Mem.store chunk m b ofs v = Some m' ->
-  Mem.loadbytes m' b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil) ->
-  v = Vptr b'' ofs'' /\ chunk = Mint32
-  \/ Mem.loadbytes m b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil).
+  Mem.loadbytes m' b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil) ->
+  v = Vptr b'' ofs'' /\ (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64)
+  \/ Mem.loadbytes m b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil).
 Proof.
   intros. exploit storebytes_provenance; eauto. eapply Mem.store_storebytes; eauto. 
   intros [A|A]; auto. left.
-  assert (IN_ENC_BYTES: forall bl, ~In (Pointer b'' ofs'' i) (inj_bytes bl)).
-  {
-    induction bl; simpl. tauto. red; intros; elim IHbl. destruct H1. congruence. auto. 
-  }
-  assert (IN_REP_UNDEF: forall n, ~In (Pointer b'' ofs'' i) (list_repeat n Undef)).
-  {
-    intros; red; intros. exploit in_list_repeat; eauto. congruence.
-  }
-  unfold encode_val in A; destruct chunk, v;
-  try (eelim IN_ENC_BYTES; eassumption);
-  try (eelim IN_REP_UNDEF; eassumption).
-  simpl in A. split; auto. intuition congruence.
+  generalize (encode_val_shape chunk v). intros ENC; inv ENC.
+- split; auto. rewrite <- H1 in A; destruct A.
+  + congruence.
+  + exploit H5; eauto. intros (j & P & Q); congruence.
+- rewrite <- H1 in A; destruct A. 
+  + congruence.
+  + exploit H3; eauto. intros [byte P]; congruence. 
+- rewrite <- H1 in A; destruct A.
+  + congruence.
+  + exploit H2; eauto. congruence.
 Qed.
 
 Lemma smatch_store:
@@ -2307,7 +2449,7 @@ Proof.
   intros. destruct H0 as [A B]. split.
 - intros chunk' ofs' v' LOAD. destruct v'; auto with va.
   exploit Mem.load_pointer_store; eauto. 
-  intros [(P & Q & R & S & T) | DISJ]. 
+  intros [(P & Q & R & S) | DISJ]. 
 + subst. apply vmatch_vplub_l. auto.
 + apply vmatch_vplub_r. apply A with (chunk := chunk') (ofs := ofs').
   rewrite <- LOAD. symmetry. eapply Mem.load_store_other; eauto.
@@ -2325,17 +2467,20 @@ Lemma smatch_storebytes:
   forall m b ofs bytes m' b' p p',
   Mem.storebytes m b ofs bytes = Some m' ->
   smatch m b' p ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p') ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p') ->
   smatch m' b' (plub p' p).
 Proof.
   intros. destruct H0 as [A B]. split.
 - intros. apply vmatch_ifptr. intros bx ofsx EQ; subst v. 
   exploit Mem.load_loadbytes; eauto. intros (bytes' & P & Q).
-  exploit decode_val_pointer_inv; eauto. intros [U V]. 
-  subst chunk bytes'. 
-  exploit In_loadbytes; eauto.
-  instantiate (1 := Pointer bx ofsx 3%nat). simpl; auto.
-  intros (ofs' & X & Y).
+  destruct bytes' as [ | byte1' bytes']. 
+  exploit Mem.loadbytes_length; eauto. intros. destruct chunk; discriminate.
+  generalize (decode_val_shape chunk byte1' bytes'). rewrite <- Q. 
+  intros DEC; inv DEC; try contradiction.
+  assert (v = Vptr bx ofsx).
+  { destruct H5 as [E|[E|E]]; rewrite E in H4; destruct v; simpl in H4; congruence. }
+  exploit In_loadbytes; eauto. eauto with coqlib. 
+  intros (ofs' & X & Y). subst v. 
   exploit storebytes_provenance; eauto. intros [Z | Z].
   apply pmatch_lub_l. eauto. 
   apply pmatch_lub_r. eauto. 
@@ -2530,10 +2675,10 @@ Proof.
 Qed.
 
 Lemma ablock_loadbytes_sound:
-  forall m b ab b' ofs' i n ofs bytes,
+  forall m b ab b' ofs' q i n ofs bytes,
   Mem.loadbytes m b ofs n = Some bytes ->
   bmatch m b ab ->
-  In (Pointer b' ofs' i) bytes ->
+  In (Fragment (Vptr b' ofs') q i) bytes ->
   pmatch b' ofs' (ablock_loadbytes ab).
 Proof.
   intros. destruct H0. eapply smatch_loadbytes; eauto. 
@@ -2542,7 +2687,7 @@ Qed.
 Lemma ablock_storebytes_anywhere_sound:
   forall m b ofs bytes p m' b' ab,
   Mem.storebytes m b ofs bytes = Some m' ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p) ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p) ->
   bmatch m b' ab ->
   bmatch m' b' (ablock_storebytes_anywhere ab p).
 Proof.
@@ -2566,7 +2711,7 @@ Lemma ablock_storebytes_sound:
   forall m b ofs bytes m' p ab sz,
   Mem.storebytes m b ofs bytes = Some m' ->
   length bytes = nat_of_Z sz ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p) ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p) ->
   bmatch m b ab ->
   bmatch m' b (ablock_storebytes ab p ofs sz).
 Proof.
@@ -3060,7 +3205,7 @@ Theorem loadbytes_sound:
   romatch m rm ->
   mmatch m am ->
   pmatch b ofs p ->
-  forall  b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' (loadbytes am rm p).
+  forall  b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' (loadbytes am rm p).
 Proof.
   intros. unfold loadbytes; inv H2.
 - (* Gl id ofs *)
@@ -3093,7 +3238,7 @@ Theorem storebytes_sound:
   mmatch m am ->
   pmatch b ofs p ->
   length bytes = nat_of_Z sz ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' q) ->
+  (forall b' ofs' qt i, In (Fragment (Vptr b' ofs') qt i) bytes -> pmatch b' ofs' q) ->
   mmatch m' (storebytes am p sz q).
 Proof.
   intros until q; intros STORE MM PM LENGTH BYTES.
@@ -3403,7 +3548,7 @@ Proof.
     unfold Mem.loadbytes. rewrite pred_dec_true. reflexivity. 
     red; intros. replace ofs0 with ofs by omega. auto.
   }
-  destruct mv; econstructor.
+  destruct mv; econstructor. destruct v; econstructor. 
   apply inj_of_bc_valid.
   assert (PM: pmatch bc b i Ptop).
   { exploit mmatch_top; eauto. intros [P Q].
@@ -3456,7 +3601,7 @@ Proof.
     - exploit Mem.load_inject. eauto. eauto. apply inj_of_bc_valid; auto. 
       intros (v' & A & B). eapply vmatch_inj_top; eauto. 
     - exploit Mem.loadbytes_inject. eauto. eauto. apply inj_of_bc_valid; auto. 
-      intros (bytes' & A & B). inv B. inv H4. eapply pmatch_inj_top; eauto. 
+      intros (bytes' & A & B). inv B. inv H4. inv H8. eapply pmatch_inj_top; eauto. 
   }
   constructor; simpl; intros. 
   - apply ablock_init_sound. apply SM. congruence.
@@ -3701,7 +3846,11 @@ Hint Resolve cnot_sound symbol_address_sound
        divs_sound divu_sound mods_sound modu_sound shrx_sound
        negf_sound absf_sound
        addf_sound subf_sound mulf_sound divf_sound
-       zero_ext_sound sign_ext_sound singleoffloat_sound
+       negfs_sound absfs_sound
+       addfs_sound subfs_sound mulfs_sound divfs_sound
+       zero_ext_sound sign_ext_sound singleoffloat_sound floatofsingle_sound
        intoffloat_sound intuoffloat_sound floatofint_sound floatofintu_sound
+       intofsingle_sound intuofsingle_sound singleofint_sound singleofintu_sound
        longofwords_sound loword_sound hiword_sound 
-       cmpu_bool_sound cmp_bool_sound cmpf_bool_sound maskzero_sound : va.
+       cmpu_bool_sound cmp_bool_sound cmpf_bool_sound cmpfs_bool_sound
+       maskzero_sound : va.
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index d389d0a8..e7d83377 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -441,7 +441,13 @@ let z_of_str hex str fst =
 let convertFloat f kind =
   let mant = z_of_str f.C.hex (f.C.intPart ^ f.C.fracPart) 0 in
   match mant with
-    | Z.Z0 -> Float.zero
+    | Z.Z0 ->
+      begin match kind with
+      | FFloat ->
+	  Vsingle (Float.to_single Float.zero)
+      | FDouble | FLongDouble ->
+	  Vfloat Float.zero
+      end
     | Z.Zpos mant ->
 
       let sgExp = match f.C.exp.[0] with '+' | '-' -> true | _ -> false in
@@ -454,10 +460,10 @@ let convertFloat f kind =
       let base = P.of_int (if f.C.hex then 2 else 10) in
 
       begin match kind with
-	| FFloat ->
-	  Float.build_from_parsed32 base mant exp
-	| FDouble | FLongDouble ->
-	  Float.build_from_parsed64 base mant exp
+      | FFloat ->
+	  Vsingle (Float32.from_parsed base mant exp)
+      | FDouble | FLongDouble ->
+	  Vfloat (Float.from_parsed base mant exp)
       end
 
     | Z.Zneg _ -> assert false
@@ -482,7 +488,7 @@ let rec convertExpr env e =
   | C.EConst(C.CFloat(f, k)) ->
       if k = C.FLongDouble && not !Clflags.option_flongdouble then
         unsupported "'long double' floating-point literal";
-      Eval(Vfloat(convertFloat f k), ty)
+      Eval(convertFloat f k, ty)
   | C.EConst(C.CStr s) ->
       let ty = typeStringLiteral s in
       Evalof(Evar(name_for_string_literal env s, ty), ty)
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index b41902c9..eea1997e 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -104,7 +104,7 @@ Definition eventval_of_val (v: val) (t: typ) : option eventval :=
   match v, t with
   | Vint i, AST.Tint => Some (EVint i)
   | Vfloat f, AST.Tfloat => Some (EVfloat f)
-  | Vfloat f, AST.Tsingle => if Float.is_single_dec f then Some (EVfloatsingle f) else None
+  | Vsingle f, AST.Tsingle => Some (EVsingle f)
   | Vlong n, AST.Tlong => Some (EVlong n)
   | Vptr b ofs, AST.Tint => do id <- Genv.invert_symbol ge b; Some (EVptr_global id ofs)
   | _, _ => None
@@ -124,7 +124,7 @@ Definition val_of_eventval (ev: eventval) (t: typ) : option val :=
   match ev, t with
   | EVint i, AST.Tint => Some (Vint i)
   | EVfloat f, AST.Tfloat => Some (Vfloat f)
-  | EVfloatsingle f, AST.Tsingle => if Float.is_single_dec f then Some (Vfloat f) else None
+  | EVsingle f, AST.Tsingle => Some (Vsingle f)
   | EVlong n, AST.Tlong => Some (Vlong n)
   | EVptr_global id ofs, AST.Tint => do b <- Genv.find_symbol ge id; Some (Vptr b ofs)
   | _, _ => None
@@ -133,11 +133,7 @@ Definition val_of_eventval (ev: eventval) (t: typ) : option val :=
 Lemma eventval_of_val_sound:
   forall v t ev, eventval_of_val v t = Some ev -> eventval_match ge ev t v.
 Proof.
-  intros. destruct v; destruct t; simpl in H; inv H.
-  constructor.
-  constructor.
-  constructor.
-  destruct (Float.is_single_dec f); inv H1. constructor; auto.
+  intros. destruct v; destruct t; simpl in H; inv H; try constructor.
   destruct (Genv.invert_symbol ge b) as [id|] eqn:?; inv H1. 
   constructor. apply Genv.invert_find_symbol; auto.
 Qed.
@@ -146,7 +142,6 @@ Lemma eventval_of_val_complete:
   forall ev t v, eventval_match ge ev t v -> eventval_of_val v t = Some ev.
 Proof.
   induction 1; simpl; auto.
-  rewrite pred_dec_true; auto.
   rewrite (Genv.find_invert_symbol _ _ H). auto. 
 Qed.
 
@@ -170,11 +165,7 @@ Qed.
 Lemma val_of_eventval_sound:
   forall ev t v, val_of_eventval ev t = Some v -> eventval_match ge ev t v.
 Proof.
-  intros. destruct ev; destruct t; simpl in H; inv H.
-  constructor.
-  constructor.
-  constructor.
-  destruct (Float.is_single_dec f); inv H1. constructor; auto.
+  intros. destruct ev; destruct t; simpl in H; inv H; try constructor.
   destruct (Genv.find_symbol ge i) as [b|] eqn:?; inv H1.
   constructor. auto.
 Qed.
@@ -182,7 +173,7 @@ Qed.
 Lemma val_of_eventval_complete:
   forall ev t v, eventval_match ge ev t v -> val_of_eventval ev t = Some v.
 Proof.
-  induction 1; simpl; auto. rewrite pred_dec_true; auto. rewrite H; auto.
+  induction 1; simpl; auto. rewrite H; auto.
 Qed.
 
 (** Volatile memory accesses. *)
diff --git a/cfrontend/Clight.v b/cfrontend/Clight.v
index f2ba2405..d9fb6501 100644
--- a/cfrontend/Clight.v
+++ b/cfrontend/Clight.v
@@ -47,7 +47,8 @@ Require Import Cop.
 
 Inductive expr : Type :=
   | Econst_int: int -> type -> expr       (**r integer literal *)
-  | Econst_float: float -> type -> expr   (**r float literal *)
+  | Econst_float: float -> type -> expr   (**r double float literal *)
+  | Econst_single: float32 -> type -> expr (**r single float literal *)
   | Econst_long: int64 -> type -> expr    (**r long integer literal *)
   | Evar: ident -> type -> expr           (**r variable *)
   | Etempvar: ident -> type -> expr       (**r temporary variable *)
@@ -69,6 +70,7 @@ Definition typeof (e: expr) : type :=
   match e with
   | Econst_int _ ty => ty
   | Econst_float _ ty => ty
+  | Econst_single _ ty => ty
   | Econst_long _ ty => ty
   | Evar _ ty => ty
   | Etempvar _ ty => ty
@@ -352,6 +354,8 @@ Inductive eval_expr: expr -> val -> Prop :=
       eval_expr (Econst_int i ty) (Vint i)
   | eval_Econst_float:   forall f ty,
       eval_expr (Econst_float f ty) (Vfloat f)
+  | eval_Econst_single:   forall f ty,
+      eval_expr (Econst_single f ty) (Vsingle f)
   | eval_Econst_long:   forall i ty,
       eval_expr (Econst_long i ty) (Vlong i)
   | eval_Etempvar:  forall id ty v,
diff --git a/cfrontend/Cminorgen.v b/cfrontend/Cminorgen.v
index 8ecf4989..ae6ec56b 100644
--- a/cfrontend/Cminorgen.v
+++ b/cfrontend/Cminorgen.v
@@ -70,6 +70,8 @@ Definition transl_constant (cst: Csharpminor.constant): constant :=
       Ointconst n
   | Csharpminor.Ofloatconst n =>
       Ofloatconst n
+  | Csharpminor.Osingleconst n =>
+      Osingleconst n
   | Csharpminor.Olongconst n =>
       Olongconst n
   end.
diff --git a/cfrontend/Cminorgenproof.v b/cfrontend/Cminorgenproof.v
index dba445df..3bf790ca 100644
--- a/cfrontend/Cminorgenproof.v
+++ b/cfrontend/Cminorgenproof.v
@@ -1299,8 +1299,15 @@ Proof.
   inv H; inv H0; simpl; TrivialExists.
   inv H; inv H0; simpl; TrivialExists.
   inv H; inv H0; simpl; TrivialExists.
-  inv H0; simpl in H; inv H. simpl. destruct (Float.intoffloat f0); simpl in *; inv H1. TrivialExists.
-  inv H0; simpl in H; inv H. simpl. destruct (Float.intuoffloat f0); simpl in *; inv H1. TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H; inv H0; simpl; TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.to_int f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.to_intu f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float32.to_int f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float32.to_intu f0); simpl in *; inv H1. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
   inv H; inv H0; simpl; TrivialExists.
@@ -1308,10 +1315,12 @@ Proof.
   inv H; inv H0; simpl; TrivialExists.
   inv H; inv H0; simpl; TrivialExists.
   inv H; inv H0; simpl; TrivialExists.
-  inv H0; simpl in H; inv H. simpl. destruct (Float.longoffloat f0); simpl in *; inv H1. TrivialExists.
-  inv H0; simpl in H; inv H. simpl. destruct (Float.longuoffloat f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.to_long f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float.to_longu f0); simpl in *; inv H1. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float32.to_long f0); simpl in *; inv H1. TrivialExists.
+  inv H0; simpl in H; inv H. simpl. destruct (Float32.to_longu f0); simpl in *; inv H1. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
   inv H0; simpl in H; inv H. simpl. TrivialExists.
 Qed.
@@ -1361,6 +1370,10 @@ Proof.
   inv H; inv H0; inv H1; TrivialExists.
   inv H; inv H0; inv H1; TrivialExists.
   inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
+  inv H; inv H0; inv H1; TrivialExists.
   inv H0; try discriminate; inv H1; try discriminate. simpl in *. 
     destruct (Int64.eq i0 Int64.zero
       || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H; TrivialExists.
@@ -1392,6 +1405,8 @@ Proof.
   simpl; auto.
 (* cmpf *)
   inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
+(* cmpfs *)
+  inv H; inv H0; inv H1; TrivialExists. apply val_inject_val_of_optbool.
 (* cmpl *)
   unfold Val.cmpl in *. inv H0; inv H1; simpl in H; inv H.
   econstructor; split. simpl; eauto. apply val_inject_val_of_bool.
@@ -1470,6 +1485,7 @@ Proof.
   destruct cst; simpl; intros; inv H. 
   exists (Vint i); auto.
   exists (Vfloat f0); auto.
+  exists (Vsingle f0); auto.
   exists (Vlong i); auto.
 Qed.
 
diff --git a/cfrontend/Cop.v b/cfrontend/Cop.v
index 83fe7726..ff43cbd5 100644
--- a/cfrontend/Cop.v
+++ b/cfrontend/Cop.v
@@ -74,17 +74,25 @@ Inductive incr_or_decr : Type := Incr | Decr.
 Inductive classify_cast_cases : Type :=
   | cast_case_neutral                   (**r int|pointer -> int32|pointer *)
   | cast_case_i2i (sz2:intsize) (si2:signedness)   (**r int -> int *)
-  | cast_case_f2f (sz2:floatsize)                  (**r float -> float *)
-  | cast_case_i2f (si1:signedness) (sz2:floatsize) (**r int -> float *)
-  | cast_case_f2i (sz2:intsize) (si2:signedness)   (**r float -> int *)
+  | cast_case_f2f                                  (**r double -> double *)
+  | cast_case_s2s                                  (**r single -> single *)
+  | cast_case_f2s                                  (**r double -> single *)
+  | cast_case_s2f                                  (**r single -> double *)
+  | cast_case_i2f (si1: signedness)                (**r int -> double *)
+  | cast_case_i2s (si1: signedness)                (**r int -> single *)
+  | cast_case_f2i (sz2:intsize) (si2:signedness)   (**r double -> int *)
+  | cast_case_s2i (sz2:intsize) (si2:signedness)   (**r single -> int *)
   | cast_case_l2l                       (**r long -> long *)
   | cast_case_i2l (si1: signedness)     (**r int -> long *)
   | cast_case_l2i (sz2: intsize) (si2: signedness) (**r long -> int *)
-  | cast_case_l2f (si1: signedness) (sz2: floatsize) (**r long -> float *)
-  | cast_case_f2l (si2: signedness)     (**r float -> long *)
-  | cast_case_f2bool                    (**r float -> bool *)
-  | cast_case_l2bool                    (**r long -> bool *)
-  | cast_case_p2bool                    (**r pointer -> bool *)
+  | cast_case_l2f (si1: signedness)                (**r long -> double *)
+  | cast_case_l2s (si1: signedness)                (**r long -> single *)
+  | cast_case_f2l (si2:signedness)                 (**r double -> long *)
+  | cast_case_s2l (si2:signedness)                 (**r single -> long *)
+  | cast_case_f2bool                               (**r double -> bool *)
+  | cast_case_s2bool                               (**r single -> bool *)
+  | cast_case_l2bool                               (**r long -> bool *)
+  | cast_case_p2bool                               (**r pointer -> bool *)
   | cast_case_struct (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r struct -> struct *)
   | cast_case_union (id1: ident) (fld1: fieldlist) (id2: ident) (fld2: fieldlist) (**r union -> union *)
   | cast_case_void                                 (**r any -> void *)
@@ -93,19 +101,27 @@ Inductive classify_cast_cases : Type :=
 Definition classify_cast (tfrom tto: type) : classify_cast_cases :=
   match tto, tfrom with
   | Tint I32 si2 _, (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ _) => cast_case_neutral
-  | Tint IBool _ _, Tfloat _ _ => cast_case_f2bool
+  | Tint IBool _ _, Tfloat F64 _ => cast_case_f2bool
+  | Tint IBool _ _, Tfloat F32 _ => cast_case_s2bool
   | Tint IBool _ _, (Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ _) => cast_case_p2bool
   | Tint sz2 si2 _, Tint sz1 si1 _ => cast_case_i2i sz2 si2
-  | Tint sz2 si2 _, Tfloat sz1 _ => cast_case_f2i sz2 si2
-  | Tfloat sz2 _, Tfloat sz1 _ => cast_case_f2f sz2
-  | Tfloat sz2 _, Tint sz1 si1 _ => cast_case_i2f si1 sz2
+  | Tint sz2 si2 _, Tfloat F64 _ => cast_case_f2i sz2 si2
+  | Tint sz2 si2 _, Tfloat F32 _ => cast_case_s2i sz2 si2
+  | Tfloat F64 _, Tfloat F64 _ => cast_case_f2f
+  | Tfloat F32 _, Tfloat F32 _ => cast_case_s2s
+  | Tfloat F64 _, Tfloat F32 _ => cast_case_s2f
+  | Tfloat F32 _, Tfloat F64 _ => cast_case_f2s
+  | Tfloat F64 _, Tint sz1 si1 _ => cast_case_i2f si1
+  | Tfloat F32 _, Tint sz1 si1 _ => cast_case_i2s si1
   | (Tpointer _ _ | Tcomp_ptr _ _), (Tint _ _ _ | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ _) => cast_case_neutral
   | Tlong _ _, Tlong _ _ => cast_case_l2l
   | Tlong _ _, Tint sz1 si1 _ => cast_case_i2l si1
   | Tint IBool _ _, Tlong _ _ => cast_case_l2bool
   | Tint sz2 si2 _, Tlong _ _ => cast_case_l2i sz2 si2
-  | Tlong si2 _, Tfloat sz1 _ => cast_case_f2l si2
-  | Tfloat sz2 _, Tlong si1 _ => cast_case_l2f si1 sz2
+  | Tlong si2 _, Tfloat F64 _ => cast_case_f2l si2
+  | Tlong si2 _, Tfloat F32 _ => cast_case_s2l si2
+  | Tfloat F64 _, Tlong si1 _ => cast_case_l2f si1
+  | Tfloat F32 _, Tlong si1 _ => cast_case_l2s si1
   | (Tpointer _ _ | Tcomp_ptr _ _), Tlong _ _ => cast_case_l2i I32 Unsigned
   | Tlong si2 _, (Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ _) => cast_case_i2l si2
   | Tstruct id2 fld2 _, Tstruct id1 fld1 _ => cast_case_struct id1 fld1 id2 fld2
@@ -128,24 +144,28 @@ Definition cast_int_int (sz: intsize) (sg: signedness) (i: int) : int :=
   | IBool, _ => if Int.eq i Int.zero then Int.zero else Int.one
   end.
 
-Definition cast_int_float (si: signedness) (sz: floatsize) (i: int) : float :=
-  match si, sz with
-  | Signed, F64 => Float.floatofint i
-  | Unsigned, F64 => Float.floatofintu i
-  | Signed, F32 => Float.singleofint i
-  | Unsigned, F32 => Float.singleofintu i
+Definition cast_int_float (si: signedness) (i: int) : float :=
+  match si with
+  | Signed => Float.of_int i
+  | Unsigned => Float.of_intu i
   end.
 
 Definition cast_float_int (si : signedness) (f: float) : option int :=
   match si with
-  | Signed => Float.intoffloat f
-  | Unsigned => Float.intuoffloat f
+  | Signed => Float.to_int f
+  | Unsigned => Float.to_intu f
   end.
 
-Definition cast_float_float (sz: floatsize) (f: float) : float :=
-  match sz with
-  | F32 => Float.singleoffloat f
-  | F64 => f
+Definition cast_int_single (si: signedness) (i: int) : float32 :=
+  match si with
+  | Signed => Float32.of_int i
+  | Unsigned => Float32.of_intu i
+  end.
+
+Definition cast_single_int (si : signedness) (f: float32) : option int :=
+  match si with
+  | Signed => Float32.to_int f
+  | Unsigned => Float32.to_intu f
   end.
 
 Definition cast_int_long (si: signedness) (i: int) : int64 :=
@@ -154,18 +174,28 @@ Definition cast_int_long (si: signedness) (i: int) : int64 :=
   | Unsigned => Int64.repr (Int.unsigned i)
   end.
 
-Definition cast_long_float (si: signedness) (sz: floatsize) (i: int64) : float :=
-  match si, sz with
-  | Signed, F64 => Float.floatoflong i
-  | Unsigned, F64 => Float.floatoflongu i
-  | Signed, F32 => Float.singleoflong i
-  | Unsigned, F32 => Float.singleoflongu i
+Definition cast_long_float (si: signedness) (i: int64) : float :=
+  match si with
+  | Signed => Float.of_long i
+  | Unsigned => Float.of_longu i
+  end.
+
+Definition cast_long_single (si: signedness) (i: int64) : float32 :=
+  match si with
+  | Signed => Float32.of_long i
+  | Unsigned => Float32.of_longu i
   end.
 
 Definition cast_float_long (si : signedness) (f: float) : option int64 :=
   match si with
-  | Signed => Float.longoffloat f
-  | Unsigned => Float.longuoffloat f
+  | Signed => Float.to_long f
+  | Unsigned => Float.to_longu f
+  end.
+
+Definition cast_single_long (si : signedness) (f: float32) : option int64 :=
+  match si with
+  | Signed => Float32.to_long f
+  | Unsigned => Float32.to_longu f
   end.
 
 Definition sem_cast (v: val) (t1 t2: type) : option val :=
@@ -180,14 +210,34 @@ Definition sem_cast (v: val) (t1 t2: type) : option val :=
       | Vint i => Some (Vint (cast_int_int sz2 si2 i))
       | _ => None
       end
-  | cast_case_f2f sz2 =>
+  | cast_case_f2f =>
+      match v with
+      | Vfloat f => Some (Vfloat f)
+      | _ => None
+      end
+  | cast_case_s2s =>
+      match v with
+      | Vsingle f => Some (Vsingle f)
+      | _ => None
+      end
+  | cast_case_s2f =>
       match v with
-      | Vfloat f => Some (Vfloat (cast_float_float sz2 f))
+      | Vsingle f => Some (Vfloat (Float.of_single f))
       | _ => None
       end
-  | cast_case_i2f si1 sz2 =>
+  | cast_case_f2s =>
       match v with
-      | Vint i => Some (Vfloat (cast_int_float si1 sz2 i))
+      | Vfloat f => Some (Vsingle (Float.to_single f))
+      | _ => None
+      end
+  | cast_case_i2f si1 =>
+      match v with
+      | Vint i => Some (Vfloat (cast_int_float si1 i))
+      | _ => None
+      end
+  | cast_case_i2s si1 =>
+      match v with
+      | Vint i => Some (Vsingle (cast_int_single si1 i))
       | _ => None
       end
   | cast_case_f2i sz2 si2 =>
@@ -199,12 +249,27 @@ Definition sem_cast (v: val) (t1 t2: type) : option val :=
           end
       | _ => None
       end
+  | cast_case_s2i sz2 si2 =>
+      match v with
+      | Vsingle f =>
+          match cast_single_int si2 f with
+          | Some i => Some (Vint (cast_int_int sz2 si2 i))
+          | None => None
+          end
+      | _ => None
+      end
   | cast_case_f2bool =>
       match v with
       | Vfloat f =>
           Some(Vint(if Float.cmp Ceq f Float.zero then Int.zero else Int.one))
       | _ => None
       end
+  | cast_case_s2bool =>
+      match v with
+      | Vsingle f =>
+          Some(Vint(if Float32.cmp Ceq f Float32.zero then Int.zero else Int.one))
+      | _ => None
+      end
   | cast_case_p2bool =>
       match v with
       | Vint i => Some (Vint (cast_int_int IBool Signed i))
@@ -232,9 +297,14 @@ Definition sem_cast (v: val) (t1 t2: type) : option val :=
           Some(Vint(if Int64.eq n Int64.zero then Int.zero else Int.one))
       | _ => None
       end
-  | cast_case_l2f si1 sz2 =>
+  | cast_case_l2f si1 =>
+      match v with
+      | Vlong i => Some (Vfloat (cast_long_float si1 i))
+      | _ => None
+      end
+  | cast_case_l2s si1 =>
       match v with
-      | Vlong i => Some (Vfloat (cast_long_float si1 sz2 i))
+      | Vlong i => Some (Vsingle (cast_long_single si1 i))
       | _ => None
       end
   | cast_case_f2l si2 =>
@@ -246,6 +316,15 @@ Definition sem_cast (v: val) (t1 t2: type) : option val :=
           end
       | _ => None
       end
+  | cast_case_s2l si2 =>
+      match v with
+      | Vsingle f =>
+          match cast_single_long si2 f with
+          | Some i => Some (Vlong i)
+          | None => None
+          end
+      | _ => None
+      end
   | cast_case_struct id1 fld1 id2 fld2 =>
       match v with
       | Vptr b ofs =>
@@ -271,7 +350,8 @@ Definition sem_cast (v: val) (t1 t2: type) : option val :=
 
 Inductive classify_bool_cases : Type :=
   | bool_case_i                           (**r integer *)
-  | bool_case_f                           (**r float *)
+  | bool_case_f                           (**r double float *)
+  | bool_case_s                           (**r single float *)
   | bool_case_p                           (**r pointer *)
   | bool_case_l                           (**r long *)
   | bool_default.
@@ -280,7 +360,8 @@ Definition classify_bool (ty: type) : classify_bool_cases :=
   match typeconv ty with
   | Tint _ _ _ => bool_case_i
   | Tpointer _ _ | Tcomp_ptr _ _ => bool_case_p
-  | Tfloat _ _ => bool_case_f
+  | Tfloat F64 _ => bool_case_f
+  | Tfloat F32 _ => bool_case_s
   | Tlong _ _ => bool_case_l
   | _ => bool_default
   end.
@@ -302,6 +383,11 @@ Definition bool_val (v: val) (t: type) : option bool :=
       | Vfloat f => Some (negb (Float.cmp Ceq f Float.zero))
       | _ => None
       end
+  | bool_case_s =>
+      match v with
+      | Vsingle f => Some (negb (Float32.cmp Ceq f Float32.zero))
+      | _ => None
+      end
   | bool_case_p =>
       match v with
       | Vint n => Some (negb (Int.eq n Int.zero))
@@ -333,6 +419,11 @@ Definition sem_notbool (v: val) (ty: type) : option val :=
       | Vfloat f => Some (Val.of_bool (Float.cmp Ceq f Float.zero))
       | _ => None
       end
+  | bool_case_s =>
+      match v with
+      | Vsingle f => Some (Val.of_bool (Float32.cmp Ceq f Float32.zero))
+      | _ => None
+      end
   | bool_case_p =>
       match v with
       | Vint n => Some (Val.of_bool (Int.eq n Int.zero))
@@ -351,7 +442,8 @@ Definition sem_notbool (v: val) (ty: type) : option val :=
 
 Inductive classify_neg_cases : Type :=
   | neg_case_i(s: signedness)              (**r int *)
-  | neg_case_f(sz: floatsize)              (**r float *)
+  | neg_case_f                             (**r double float *)
+  | neg_case_s                             (**r single float *)
   | neg_case_l(s: signedness)              (**r long *)
   | neg_default.
 
@@ -359,7 +451,8 @@ Definition classify_neg (ty: type) : classify_neg_cases :=
   match ty with
   | Tint I32 Unsigned _ => neg_case_i Unsigned
   | Tint _ _ _ => neg_case_i Signed
-  | Tfloat sz _ => neg_case_f sz
+  | Tfloat F64 _ => neg_case_f
+  | Tfloat F32 _ => neg_case_s
   | Tlong si _ => neg_case_l si
   | _ => neg_default
   end.
@@ -371,11 +464,16 @@ Definition sem_neg (v: val) (ty: type) : option val :=
       | Vint n => Some (Vint (Int.neg n))
       | _ => None
       end
-  | neg_case_f sz =>
+  | neg_case_f =>
       match v with
       | Vfloat f => Some (Vfloat (Float.neg f))
       | _ => None
       end
+  | neg_case_s =>
+      match v with
+      | Vsingle f => Some (Vsingle (Float32.neg f))
+      | _ => None
+      end
   | neg_case_l sg =>
       match v with
       | Vlong n => Some (Vlong (Int64.neg n))
@@ -388,17 +486,22 @@ Definition sem_absfloat (v: val) (ty: type) : option val :=
   match classify_neg ty with
   | neg_case_i sg =>
       match v with
-      | Vint n => Some (Vfloat (Float.abs (cast_int_float sg F64 n)))
+      | Vint n => Some (Vfloat (Float.abs (cast_int_float sg n)))
       | _ => None
       end
-  | neg_case_f sz =>
+  | neg_case_f =>
       match v with
       | Vfloat f => Some (Vfloat (Float.abs f))
       | _ => None
       end
+  | neg_case_s =>
+      match v with
+      | Vsingle f => Some (Vfloat (Float.abs (Float.of_single f)))
+      | _ => None
+      end
   | neg_case_l sg =>
       match v with
-      | Vlong n => Some (Vfloat (Float.abs (cast_long_float sg F64 n)))
+      | Vlong n => Some (Vfloat (Float.abs (cast_long_float sg n)))
       | _ => None
       end
   | neg_default => None
@@ -446,7 +549,8 @@ Definition sem_notint (v: val) (ty: type): option val :=
 Inductive binarith_cases: Type :=
   | bin_case_i (s: signedness)         (**r at int type *)
   | bin_case_l (s: signedness)         (**r at long int type *)
-  | bin_case_f (sz: floatsize)         (**r at float type *)
+  | bin_case_f                         (**r at double float type *)
+  | bin_case_s                         (**r at single float type *)
   | bin_default.                       (**r error *)
 
 Definition classify_binarith (ty1: type) (ty2: type) : binarith_cases :=
@@ -458,31 +562,24 @@ Definition classify_binarith (ty1: type) (ty2: type) : binarith_cases :=
   | Tlong _ _, Tlong _ _ => bin_case_l Unsigned
   | Tlong sg _, Tint _ _ _ => bin_case_l sg
   | Tint _ _ _, Tlong sg _ => bin_case_l sg
-  | Tfloat F32 _, Tfloat F32 _ => bin_case_f F32
-  | Tfloat _ _, Tfloat _ _ => bin_case_f F64
-  | Tfloat sz _, (Tint _ _ _ | Tlong _ _) => bin_case_f sz
-  | (Tint _ _ _ | Tlong _ _), Tfloat sz _ => bin_case_f sz
+  | Tfloat F32 _, Tfloat F32 _ => bin_case_s
+  | Tfloat _ _, Tfloat _ _ => bin_case_f
+  | Tfloat F64 _, (Tint _ _ _ | Tlong _ _) => bin_case_f
+  | (Tint _ _ _ | Tlong _ _), Tfloat F64 _ => bin_case_f
+  | Tfloat F32 _, (Tint _ _ _ | Tlong _ _) => bin_case_s
+  | (Tint _ _ _ | Tlong _ _), Tfloat F32 _ => bin_case_s
   | _, _ => bin_default
   end.
 
-(** The static type of the result. *)
-
-Definition binarith_result_type (c: binarith_cases) : option type :=
-  match c with
-  | bin_case_i sg => Some(Tint I32 sg noattr)
-  | bin_case_l sg => Some(Tlong sg noattr)
-  | bin_case_f sz => Some(Tfloat sz noattr)
-  | bin_default   => None
-  end.
-
-(** The type at which the computation is done.  Both arguments are
-  converted to this type before the actual computation.  *)
+(** The static type of the result. Both arguments are converted to this type
+    before the actual computation. *)
 
 Definition binarith_type (c: binarith_cases) : type :=
   match c with
   | bin_case_i sg => Tint I32 sg noattr
   | bin_case_l sg => Tlong sg noattr
-  | bin_case_f sz => Tfloat F64 noattr
+  | bin_case_f    => Tfloat F64 noattr
+  | bin_case_s    => Tfloat F32 noattr
   | bin_default   => Tvoid
   end.
 
@@ -490,6 +587,7 @@ Definition sem_binarith
     (sem_int: signedness -> int -> int -> option val)
     (sem_long: signedness -> int64 -> int64 -> option val)
     (sem_float: float -> float -> option val)
+    (sem_single: float32 -> float32 -> option val)
     (v1: val) (t1: type) (v2: val) (t2: type) : option val :=
   let c := classify_binarith t1 t2 in
   let t := binarith_type c in
@@ -505,11 +603,16 @@ Definition sem_binarith
       | Vint n1, Vint n2 => sem_int sg n1 n2
       | _,  _ => None
       end
-  | bin_case_f sz =>
+  | bin_case_f =>
       match v1', v2' with
       | Vfloat n1, Vfloat n2 => sem_float n1 n2
       | _,  _ => None
       end
+  | bin_case_s =>
+      match v1', v2' with
+      | Vsingle n1, Vsingle n2 => sem_single n1 n2
+      | _,  _ => None
+      end
   | bin_case_l sg =>
       match v1', v2' with
       | Vlong n1, Vlong n2 => sem_long sg n1 n2
@@ -569,6 +672,7 @@ Definition sem_add (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
         (fun sg n1 n2 => Some(Vint(Int.add n1 n2)))
         (fun sg n1 n2 => Some(Vlong(Int64.add n1 n2)))
         (fun n1 n2 => Some(Vfloat(Float.add n1 n2)))
+        (fun n1 n2 => Some(Vsingle(Float32.add n1 n2)))
         v1 t1 v2 t2
   end.
 
@@ -617,6 +721,7 @@ Definition sem_sub (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
         (fun sg n1 n2 => Some(Vint(Int.sub n1 n2)))
         (fun sg n1 n2 => Some(Vlong(Int64.sub n1 n2)))
         (fun n1 n2 => Some(Vfloat(Float.sub n1 n2)))
+        (fun n1 n2 => Some(Vsingle(Float32.sub n1 n2)))
         v1 t1 v2 t2
   end.
  
@@ -627,6 +732,7 @@ Definition sem_mul (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
     (fun sg n1 n2 => Some(Vint(Int.mul n1 n2)))
     (fun sg n1 n2 => Some(Vlong(Int64.mul n1 n2)))
     (fun n1 n2 => Some(Vfloat(Float.mul n1 n2)))
+    (fun n1 n2 => Some(Vsingle(Float32.mul n1 n2)))
     v1 t1 v2 t2.
 
 Definition sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
@@ -652,6 +758,7 @@ Definition sem_div (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
           then None else Some(Vlong(Int64.divu n1 n2))
       end)
     (fun n1 n2 => Some(Vfloat(Float.div n1 n2)))
+    (fun n1 n2 => Some(Vsingle(Float32.div n1 n2)))
     v1 t1 v2 t2.
 
 Definition sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
@@ -677,6 +784,7 @@ Definition sem_mod (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
           then None else Some(Vlong(Int64.modu n1 n2))
       end)
     (fun n1 n2 => None)
+    (fun n1 n2 => None)
     v1 t1 v2 t2.
 
 Definition sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
@@ -684,6 +792,7 @@ Definition sem_and (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
     (fun sg n1 n2 => Some(Vint(Int.and n1 n2)))
     (fun sg n1 n2 => Some(Vlong(Int64.and n1 n2)))
     (fun n1 n2 => None)
+    (fun n1 n2 => None)
     v1 t1 v2 t2.
 
 Definition sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
@@ -691,6 +800,7 @@ Definition sem_or (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
     (fun sg n1 n2 => Some(Vint(Int.or n1 n2)))
     (fun sg n1 n2 => Some(Vlong(Int64.or n1 n2)))
     (fun n1 n2 => None)
+    (fun n1 n2 => None)
     v1 t1 v2 t2.
 
 Definition sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
@@ -698,6 +808,7 @@ Definition sem_xor (v1:val) (t1:type) (v2: val) (t2:type) : option val :=
     (fun sg n1 n2 => Some(Vint(Int.xor n1 n2)))
     (fun sg n1 n2 => Some(Vlong(Int64.xor n1 n2)))
     (fun n1 n2 => None)
+    (fun n1 n2 => None)
     v1 t1 v2 t2.
 
 (** *** Shifts *)
@@ -818,6 +929,8 @@ Definition sem_cmp (c:comparison)
             Some(Val.of_bool(match sg with Signed => Int64.cmp c n1 n2 | Unsigned => Int64.cmpu c n1 n2 end)))
         (fun n1 n2 =>
             Some(Val.of_bool(Float.cmp c n1 n2)))
+        (fun n1 n2 =>
+            Some(Val.of_bool(Float32.cmp c n1 n2)))
         v1 t1 v2 t2
   end.
 
@@ -946,7 +1059,9 @@ Proof.
   inv H0; inv H; TrivialInject.
 - econstructor; eauto. 
 - destruct (cast_float_int si2 f0); inv H1; TrivialInject.
+- destruct (cast_single_int si2 f0); inv H1; TrivialInject.
 - destruct (cast_float_long si2 f0); inv H1; TrivialInject.
+- destruct (cast_single_long si2 f0); inv H1; TrivialInject.
 - destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto.
 - destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H2; TrivialInject. econstructor; eauto.
 - econstructor; eauto.
@@ -976,18 +1091,19 @@ Definition optval_self_injects (ov: option val) : Prop :=
   end.
 
 Remark sem_binarith_inject:
-  forall sem_int sem_long sem_float v1 t1 v2 t2 v v1' v2',
-  sem_binarith sem_int sem_long sem_float v1 t1 v2 t2 = Some v ->
+  forall sem_int sem_long sem_float sem_single v1 t1 v2 t2 v v1' v2',
+  sem_binarith sem_int sem_long sem_float sem_single v1 t1 v2 t2 = Some v ->
   val_inject f v1 v1' -> val_inject f v2 v2' ->
   (forall sg n1 n2, optval_self_injects (sem_int sg n1 n2)) ->
   (forall sg n1 n2, optval_self_injects (sem_long sg n1 n2)) ->
   (forall n1 n2, optval_self_injects (sem_float n1 n2)) ->
-  exists v', sem_binarith sem_int sem_long sem_float v1' t1 v2' t2 = Some v' /\ val_inject f v v'.
+  (forall n1 n2, optval_self_injects (sem_single n1 n2)) ->
+  exists v', sem_binarith sem_int sem_long sem_float sem_single v1' t1 v2' t2 = Some v' /\ val_inject f v v'.
 Proof.
   intros. 
   assert (SELF: forall ov v, ov = Some v -> optval_self_injects ov -> val_inject f v v).
   {
-    intros. subst ov; simpl in H6. destruct v0; contradiction || constructor.
+    intros. subst ov; simpl in H7. destruct v0; contradiction || constructor.
   }
   unfold sem_binarith in *.
   set (c := classify_binarith t1 t2) in *.
@@ -1093,6 +1209,7 @@ Proof.
             || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I.
   destruct (Int64.eq n2 Int64.zero); exact I.
   exact I.
+  exact I.
 - (* mod *)
   eapply sem_binarith_inject; eauto; intros.
   destruct sg.
@@ -1104,6 +1221,7 @@ Proof.
             || Int64.eq n1 (Int64.repr Int64.min_signed) && Int64.eq n2 Int64.mone); exact I.
   destruct (Int64.eq n2 Int64.zero); exact I.
   exact I.
+  exact I.
 - (* and *)
   eapply sem_binarith_inject; eauto; intros; exact I.
 - (* or *)
@@ -1168,7 +1286,8 @@ Proof.
     match t with
     | Tint _ _ _ => bool_case_i
     | Tpointer _ _ | Tcomp_ptr _ _ | Tarray _ _ _ | Tfunction _ _ _ => bool_case_p
-    | Tfloat _ _ => bool_case_f
+    | Tfloat F64 _ => bool_case_f
+    | Tfloat F32 _ => bool_case_s
     | Tlong _ _ => bool_case_l
     | _ => bool_default
     end).
@@ -1178,7 +1297,14 @@ Proof.
   unfold bool_val. rewrite A. unfold sem_cast. destruct t; simpl; auto; destruct v; auto.
   destruct (Int.eq i0 Int.zero); auto.
   destruct (Int64.eq i Int64.zero); auto.
+  destruct f; auto.
+  destruct f; auto.
+  destruct f; auto.
+  destruct f; auto.
   destruct (Float.cmp Ceq f0 Float.zero); auto.
+  destruct f; auto.
+  destruct (Float32.cmp Ceq f0 Float32.zero); auto.
+  destruct f; auto.
   destruct (Int.eq i Int.zero); auto. 
   destruct (Int.eq i Int.zero); auto. 
   destruct (Int.eq i Int.zero); auto. 
@@ -1360,8 +1486,8 @@ Qed.
 
 Lemma classify_binarith_arithmetic_conversion:
   forall t1 t2,
-  binarith_result_type (classify_binarith (proj_type t1) (proj_type t2)) =
-  Some (proj_type (usual_arithmetic_conversion t1 t2)).
+  binarith_type (classify_binarith (proj_type t1) (proj_type t2)) =
+  proj_type (usual_arithmetic_conversion t1 t2).
 Proof.
   destruct t1; destruct t2; try reflexivity.
 - destruct it; destruct it0; reflexivity.
diff --git a/cfrontend/Csharpminor.v b/cfrontend/Csharpminor.v
index 4b2e915c..d37fa819 100644
--- a/cfrontend/Csharpminor.v
+++ b/cfrontend/Csharpminor.v
@@ -33,7 +33,8 @@ Require Import Smallstep.
 
 Inductive constant : Type :=
   | Ointconst: int -> constant          (**r integer constant *)
-  | Ofloatconst: float -> constant      (**r floating-point constant *)
+  | Ofloatconst: float -> constant      (**r double-precision floating-point constant *)
+  | Osingleconst: float32 -> constant   (**r single-precision floating-point constant *)
   | Olongconst: int64 -> constant.      (**r long integer constant *)
 
 Definition unary_operation : Type := Cminor.unary_operation.
@@ -253,6 +254,7 @@ Definition eval_constant (cst: constant) : option val :=
   match cst with
   | Ointconst n => Some (Vint n)
   | Ofloatconst n => Some (Vfloat n)
+  | Osingleconst n => Some (Vsingle n)
   | Olongconst n => Some (Vlong n)
   end.
 
diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v
index 685fa71b..cedfd8bf 100644
--- a/cfrontend/Cshmgen.v
+++ b/cfrontend/Cshmgen.v
@@ -50,16 +50,25 @@ Open Local Scope error_monad_scope.
 
 Definition make_intconst (n: int) :=  Econst (Ointconst n).
 
+Definition make_longconst (f: int64) :=  Econst (Olongconst f).
+
 Definition make_floatconst (f: float) :=  Econst (Ofloatconst f).
 
-Definition make_longconst (f: int64) :=  Econst (Olongconst f).
+Definition make_singleconst (f: float32) :=  Econst (Osingleconst f).
 
-Definition make_floatofint (e: expr) (sg: signedness) (sz: floatsize) :=
-  match sg, sz with
-  | Signed, F64 => Eunop Ofloatofint e
-  | Unsigned, F64 => Eunop Ofloatofintu e
-  | Signed, F32 => Eunop Osingleoffloat (Eunop Ofloatofint e)
-  | Unsigned, F32 => Eunop Osingleoffloat (Eunop Ofloatofintu e)
+Definition make_singleoffloat (e: expr) := Eunop Osingleoffloat e.
+Definition make_floatofsingle (e: expr) := Eunop Ofloatofsingle e.
+
+Definition make_floatofint (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Ofloatofint e
+  | Unsigned => Eunop Ofloatofintu e
+  end.
+
+Definition make_singleofint (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Osingleofint e
+  | Unsigned => Eunop Osingleofintu e
   end.
 
 Definition make_intoffloat (e: expr) (sg: signedness) :=
@@ -68,18 +77,28 @@ Definition make_intoffloat (e: expr) (sg: signedness) :=
   | Unsigned => Eunop Ointuoffloat e
   end.
 
+Definition make_intofsingle (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Ointofsingle e
+  | Unsigned => Eunop Ointuofsingle e
+  end.
+
 Definition make_longofint (e: expr) (sg: signedness) :=
   match sg with
   | Signed => Eunop Olongofint e
   | Unsigned => Eunop Olongofintu e
   end.
 
-Definition make_floatoflong (e: expr) (sg: signedness) (sz: floatsize) :=
-  match sg, sz with
-  | Signed, F64 => Eunop Ofloatoflong e
-  | Unsigned, F64 => Eunop Ofloatoflongu e
-  | Signed, F32 => Eunop Osingleoflong e
-  | Unsigned, F32 => Eunop Osingleoflongu e
+Definition make_floatoflong (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Ofloatoflong e
+  | Unsigned => Eunop Ofloatoflongu e
+  end.
+
+Definition make_singleoflong (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Osingleoflong e
+  | Unsigned => Eunop Osingleoflongu e
   end.
 
 Definition make_longoffloat (e: expr) (sg: signedness) :=
@@ -88,11 +107,18 @@ Definition make_longoffloat (e: expr) (sg: signedness) :=
   | Unsigned => Eunop Olonguoffloat e
   end.
 
+Definition make_longofsingle (e: expr) (sg: signedness) :=
+  match sg with
+  | Signed => Eunop Olongofsingle e
+  | Unsigned => Eunop Olonguofsingle e
+  end.
+
 Definition make_cmp_ne_zero (e: expr) :=
   match e with
   | Ebinop (Ocmp c) e1 e2 => e
   | Ebinop (Ocmpu c) e1 e2 => e
   | Ebinop (Ocmpf c) e1 e2 => e
+  | Ebinop (Ocmpfs c) e1 e2 => e
   | Ebinop (Ocmpl c) e1 e2 => e
   | Ebinop (Ocmplu c) e1 e2 => e
   | _ => Ebinop (Ocmp Cne) e (make_intconst Int.zero)
@@ -111,25 +137,27 @@ Definition make_cast_int (e: expr) (sz: intsize) (si: signedness) :=
   | IBool, _ => make_cmp_ne_zero e
   end.
 
-Definition make_cast_float (e: expr) (sz: floatsize) :=
-  match sz with
-  | F32 => Eunop Osingleoffloat e
-  | F64 => e
-  end.
-
 Definition make_cast (from to: type) (e: expr) :=
   match classify_cast from to with
   | cast_case_neutral => OK e
   | cast_case_i2i sz2 si2 => OK (make_cast_int e sz2 si2)
-  | cast_case_f2f sz2 => OK (make_cast_float e sz2)
-  | cast_case_i2f si1 sz2 => OK (make_floatofint e si1 sz2)
+  | cast_case_f2f => OK e
+  | cast_case_s2s => OK e
+  | cast_case_f2s => OK (make_singleoffloat e)
+  | cast_case_s2f => OK (make_floatofsingle e)
+  | cast_case_i2f si1 => OK (make_floatofint e si1)
+  | cast_case_i2s si1 => OK (make_singleofint e si1)
   | cast_case_f2i sz2 si2 => OK (make_cast_int (make_intoffloat e si2) sz2 si2)
+  | cast_case_s2i sz2 si2 => OK (make_cast_int (make_intofsingle e si2) sz2 si2)
   | cast_case_l2l => OK e
   | cast_case_i2l si1 => OK (make_longofint e si1)
   | cast_case_l2i sz2 si2 => OK (make_cast_int (Eunop Ointoflong e) sz2 si2)
-  | cast_case_l2f si1 sz2 => OK (make_floatoflong e si1 sz2)
+  | cast_case_l2f si1 => OK (make_floatoflong e si1)
+  | cast_case_l2s si1 => OK (make_singleoflong e si1)
   | cast_case_f2l si2 => OK (make_longoffloat e si2)
+  | cast_case_s2l si2 => OK (make_longofsingle e si2)
   | cast_case_f2bool => OK (Ebinop (Ocmpf Cne) e (make_floatconst Float.zero))
+  | cast_case_s2bool => OK (Ebinop (Ocmpfs Cne) e (make_singleconst Float32.zero))
   | cast_case_l2bool => OK (Ebinop (Ocmpl Cne) e (make_longconst Int64.zero))
   | cast_case_p2bool => OK (Ebinop (Ocmpu Cne) e (make_intconst Int.zero))
   | cast_case_struct id1 fld1 id2 fld2 => OK e
@@ -145,6 +173,7 @@ Definition make_boolean (e: expr) (ty: type) :=
   match classify_bool ty with
   | bool_case_i => make_cmp_ne_zero e
   | bool_case_f => Ebinop (Ocmpf Cne) e (make_floatconst Float.zero)
+  | bool_case_s => Ebinop (Ocmpfs Cne) e (make_singleconst Float32.zero)
   | bool_case_p => Ebinop (Ocmpu Cne) e (make_intconst Int.zero)
   | bool_case_l => Ebinop (Ocmpl Cne) e (make_longconst Int64.zero)
   | bool_default => e   (**r should not happen *)
@@ -156,6 +185,7 @@ Definition make_notbool (e: expr) (ty: type) :=
   match classify_bool ty with
   | bool_case_i => OK (Ebinop (Ocmp Ceq) e (make_intconst Int.zero))
   | bool_case_f => OK (Ebinop (Ocmpf Ceq) e (make_floatconst Float.zero))
+  | bool_case_s => OK (Ebinop (Ocmpfs Ceq) e (make_singleconst Float32.zero))
   | bool_case_p => OK (Ebinop (Ocmpu Ceq) e (make_intconst Int.zero))
   | bool_case_l => OK (Ebinop (Ocmpl Ceq) e (make_longconst Int64.zero))
   | _ => Error (msg "Cshmgen.make_notbool")
@@ -164,16 +194,18 @@ Definition make_notbool (e: expr) (ty: type) :=
 Definition make_neg (e: expr) (ty: type) :=
   match classify_neg ty with
   | neg_case_i _ => OK (Eunop Onegint e)
-  | neg_case_f _ => OK (Eunop Onegf e)
+  | neg_case_f => OK (Eunop Onegf e)
+  | neg_case_s => OK (Eunop Onegfs e)
   | neg_case_l _ => OK (Eunop Onegl e)
   | _ => Error (msg "Cshmgen.make_neg")
   end.
 
 Definition make_absfloat (e: expr) (ty: type) :=
   match classify_neg ty with
-  | neg_case_i sg => OK (Eunop Oabsf (make_floatofint e sg F64))
-  | neg_case_f _ => OK (Eunop Oabsf e)
-  | neg_case_l sg => OK (Eunop Oabsf (make_floatoflong e sg F64))
+  | neg_case_i sg => OK (Eunop Oabsf (make_floatofint e sg))
+  | neg_case_f => OK (Eunop Oabsf e)
+  | neg_case_s => OK (Eunop Oabsf (make_floatofsingle e))
+  | neg_case_l sg => OK (Eunop Oabsf (make_floatoflong e sg))
   | _ => Error (msg "Cshmgen.make_absfloat")
   end.
 
@@ -186,7 +218,7 @@ Definition make_notint (e: expr) (ty: type) :=
 
 (** Binary operators *)
 
-Definition make_binarith (iop iopu fop lop lopu: binary_operation)
+Definition make_binarith (iop iopu fop sop lop lopu: binary_operation)
                          (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
   let c := classify_binarith ty1 ty2 in
   let ty := binarith_type c in
@@ -195,7 +227,8 @@ Definition make_binarith (iop iopu fop lop lopu: binary_operation)
   match c with
   | bin_case_i Signed => OK (Ebinop iop e1' e2')
   | bin_case_i Unsigned => OK (Ebinop iopu e1' e2')
-  | bin_case_f _ => OK (Ebinop fop e1' e2')
+  | bin_case_f => OK (Ebinop fop e1' e2')
+  | bin_case_s => OK (Ebinop sop e1' e2')
   | bin_case_l Signed => OK (Ebinop lop e1' e2')
   | bin_case_l Unsigned => OK (Ebinop lopu e1' e2')
   | bin_default => Error (msg "Cshmgen.make_binarith")
@@ -216,7 +249,7 @@ Definition make_add (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Oadd e2 (Ebinop Omul n (Eunop Ointoflong e1)))
   | add_default =>
-      make_binarith Oadd Oadd Oaddf Oaddl Oaddl e1 ty1 e2 ty2
+      make_binarith Oadd Oadd Oaddf Oaddfs Oaddl Oaddl e1 ty1 e2 ty2
   end.
 
 Definition make_sub (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
@@ -231,14 +264,14 @@ Definition make_sub (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
       let n := make_intconst (Int.repr (Ctypes.sizeof ty)) in
       OK (Ebinop Osub e1 (Ebinop Omul n (Eunop Ointoflong e2)))
   | sub_default =>
-      make_binarith Osub Osub Osubf Osubl Osubl e1 ty1 e2 ty2
+      make_binarith Osub Osub Osubf Osubfs Osubl Osubl e1 ty1 e2 ty2
   end.
 
 Definition make_mul (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  make_binarith Omul Omul Omulf Omull Omull e1 ty1 e2 ty2.
+  make_binarith Omul Omul Omulf Omulfs Omull Omull e1 ty1 e2 ty2.
 
 Definition make_div (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
-  make_binarith Odiv Odivu Odivf Odivl Odivlu e1 ty1 e2 ty2.
+  make_binarith Odiv Odivu Odivf Odivfs Odivl Odivlu e1 ty1 e2 ty2.
 
 Definition make_binarith_int (iop iopu lop lopu: binary_operation)
                              (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
@@ -251,7 +284,7 @@ Definition make_binarith_int (iop iopu lop lopu: binary_operation)
   | bin_case_i Unsigned => OK (Ebinop iopu e1' e2')
   | bin_case_l Signed => OK (Ebinop lop e1' e2')
   | bin_case_l Unsigned => OK (Ebinop lopu e1' e2')
-  | bin_case_f _ | bin_default => Error (msg "Cshmgen.make_binarith_int")
+  | bin_case_f | bin_case_s | bin_default => Error (msg "Cshmgen.make_binarith_int")
   end.
 
 Definition make_mod (e1: expr) (ty1: type) (e2: expr) (ty2: type) :=
@@ -294,7 +327,9 @@ Definition make_cmp (c: comparison) (e1: expr) (ty1: type) (e2: expr) (ty2: type
   | cmp_case_pl => OK (Ebinop (Ocmpu c) e1 (Eunop Ointoflong e2))
   | cmp_case_lp => OK (Ebinop (Ocmpu c) (Eunop Ointoflong e1) e2)
   | cmp_default =>
-      make_binarith (Ocmp c) (Ocmpu c) (Ocmpf c) (Ocmpl c) (Ocmplu c) e1 ty1 e2 ty2
+      make_binarith
+        (Ocmp c) (Ocmpu c) (Ocmpf c) (Ocmpfs c) (Ocmpl c) (Ocmplu c)
+        e1 ty1 e2 ty2
   end.
 
 (** [make_load addr ty_res] loads a value of type [ty_res] from
@@ -374,6 +409,8 @@ Fixpoint transl_expr (a: Clight.expr) {struct a} : res expr :=
       OK(make_intconst n)
   | Clight.Econst_float n _ =>
       OK(make_floatconst n)
+  | Clight.Econst_single n _ =>
+      OK(make_singleconst n)
   | Clight.Econst_long n _ =>
       OK(make_longconst n)
   | Clight.Evar id ty =>
@@ -454,8 +491,9 @@ Fixpoint transl_arglist (al: list Clight.expr) (tyl: typelist)
   | a1 :: a2, Tnil =>
       (* Tolerance for calls to K&R or variadic functions *)
       do ta1 <- transl_expr a1;
+      do ta1' <- make_cast (typeof a1) (default_argument_conversion (typeof a1)) ta1;
       do ta2 <- transl_arglist a2 Tnil;
-      OK (ta1 :: ta2)
+      OK (ta1' :: ta2)
   | _, _ =>
       Error(msg "Cshmgen.transl_arglist: arity mismatch")
   end.
@@ -470,7 +508,7 @@ Fixpoint typlist_of_arglist (al: list Clight.expr) (tyl: typelist)
       typ_of_type ty1 :: typlist_of_arglist a2 ty2
   | a1 :: a2, Tnil =>
       (* Tolerance for calls to K&R or variadic functions *)
-      typ_of_type_default (typeof a1) :: typlist_of_arglist a2 Tnil
+      typ_of_type (default_argument_conversion (typeof a1)) :: typlist_of_arglist a2 Tnil
   end.
 
 (** * Translation of statements *)
diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v
index 15c4e4cd..a977e0f4 100644
--- a/cfrontend/Cshmgenproof.v
+++ b/cfrontend/Cshmgenproof.v
@@ -159,6 +159,13 @@ Proof.
   intros. unfold make_floatconst. econstructor. reflexivity. 
 Qed.
 
+Lemma make_singleconst_correct:
+  forall n e le m,
+  eval_expr ge e le m (make_singleconst n) (Vsingle n).
+Proof.
+  intros. unfold make_singleconst. econstructor. reflexivity. 
+Qed.
+
 Lemma make_longconst_correct:
   forall n e le m,
   eval_expr ge e le m (make_longconst n) (Vlong n).
@@ -166,62 +173,35 @@ Proof.
   intros. unfold make_floatconst. econstructor. reflexivity. 
 Qed.
 
-Lemma make_floatofint_correct:
-  forall a n sg sz e le m,
-  eval_expr ge e le m a (Vint n) ->
-  eval_expr ge e le m (make_floatofint a sg sz) (Vfloat(cast_int_float sg sz n)).
+Lemma make_singleoffloat_correct:
+  forall a n e le m,
+  eval_expr ge e le m a (Vfloat n) ->
+  eval_expr ge e le m (make_singleoffloat a) (Vsingle (Float.to_single n)).
 Proof.
-  intros. unfold make_floatofint, cast_int_float.
-  destruct sz.
-  destruct sg. 
-  rewrite Float.singleofint_floatofint. econstructor. econstructor; eauto. simpl; reflexivity. auto.
-  rewrite Float.singleofintu_floatofintu. econstructor. econstructor; eauto. simpl; reflexivity. auto.
-  destruct sg; econstructor; eauto. 
+  intros. econstructor. eauto. auto.
 Qed.
 
-Lemma make_intoffloat_correct:
-  forall e le m a sg f i,
-  eval_expr ge e le m a (Vfloat f) ->
-  cast_float_int sg f = Some i ->
-  eval_expr ge e le m (make_intoffloat a sg) (Vint i).
+Lemma make_floatofsingle_correct:
+  forall a n e le m,
+  eval_expr ge e le m a (Vsingle n) ->
+  eval_expr ge e le m (make_floatofsingle a) (Vfloat (Float.of_single n)).
 Proof.
-  unfold cast_float_int, make_intoffloat; intros.
-  destruct sg; econstructor; eauto; simpl; rewrite H0; auto.
+  intros. econstructor. eauto. auto.
 Qed.
 
-Lemma make_longofint_correct:
+Lemma make_floatofint_correct:
   forall a n sg e le m,
   eval_expr ge e le m a (Vint n) ->
-  eval_expr ge e le m (make_longofint a sg) (Vlong(cast_int_long sg n)).
+  eval_expr ge e le m (make_floatofint a sg) (Vfloat(cast_int_float sg n)).
 Proof.
-  intros. unfold make_longofint, cast_int_long. 
+  intros. unfold make_floatofint, cast_int_float.
   destruct sg; econstructor; eauto. 
 Qed.
 
-Lemma make_floatoflong_correct:
-  forall a n sg sz e le m,
-  eval_expr ge e le m a (Vlong n) ->
-  eval_expr ge e le m (make_floatoflong a sg sz) (Vfloat(cast_long_float sg sz n)).
-Proof.
-  intros. unfold make_floatoflong, cast_int_long. 
-  destruct sg; destruct sz; econstructor; eauto. 
-Qed.
-
-Lemma make_longoffloat_correct:
-  forall e le m a sg f i,
-  eval_expr ge e le m a (Vfloat f) ->
-  cast_float_long sg f = Some i ->
-  eval_expr ge e le m (make_longoffloat a sg) (Vlong i).
-Proof.
-  unfold cast_float_long, make_longoffloat; intros.
-  destruct sg; econstructor; eauto; simpl; rewrite H0; auto.
-Qed.
-
 Hint Resolve make_intconst_correct make_floatconst_correct make_longconst_correct
-             make_floatofint_correct make_intoffloat_correct
-             make_longofint_correct
-             make_floatoflong_correct make_longoffloat_correct
-             eval_Eunop eval_Ebinop: cshm.
+             make_singleconst_correct make_singleoffloat_correct make_floatofsingle_correct
+             make_floatofint_correct: cshm.
+Hint Constructors eval_expr eval_exprlist: cshm.
 Hint Extern 2 (@eq trace _ _) => traceEq: cshm.
 
 Lemma make_cmp_ne_zero_correct:
@@ -248,6 +228,9 @@ Proof.
   inv H. econstructor; eauto. rewrite H6. decEq. decEq. 
   simpl in H6. inv H6. unfold Val.cmp in H0. eauto.
   inv H. econstructor; eauto. rewrite H6. decEq. decEq. 
+  simpl in H6. unfold Val.cmpfs in H6.
+  destruct (Val.cmpfs_bool c v1 v2) as [[]|]; inv H6; reflexivity. 
+  inv H. econstructor; eauto. rewrite H6. decEq. decEq. 
   simpl in H6. unfold Val.cmpl in H6.
   destruct (Val.cmpl_bool c v1 v2) as [[]|]; inv H6; reflexivity. 
   inv H. econstructor; eauto. rewrite H6. decEq. decEq. 
@@ -268,16 +251,7 @@ Proof.
   apply make_cmp_ne_zero_correct; auto.
 Qed.
 
-Lemma make_cast_float_correct:
-  forall e le m a n sz,
-  eval_expr ge e le m a (Vfloat n) ->
-  eval_expr ge e le m (make_cast_float a sz) (Vfloat (cast_float_float sz n)).
-Proof.
-  intros. unfold make_cast_float, cast_float_float. 
-  destruct sz. eauto with cshm. auto.
-Qed.
-
-Hint Resolve make_cast_int_correct make_cast_float_correct: cshm.
+Hint Resolve make_cast_int_correct: cshm.
 
 Lemma make_cast_correct:
   forall e le m a b v ty1 ty2 v',
@@ -288,14 +262,40 @@ Lemma make_cast_correct:
 Proof.
   intros. unfold make_cast, sem_cast in *;
   destruct (classify_cast ty1 ty2); inv H; destruct v; inv H1; eauto with cshm.
+  (* single -> int *)
+  unfold make_singleofint, cast_int_float. destruct si1; eauto with cshm. 
   (* float -> int *)
-  destruct (cast_float_int si2 f) as [i|] eqn:E; inv H2. eauto with cshm.
+  destruct (cast_float_int si2 f) as [i|] eqn:E; inv H2.
+  apply make_cast_int_correct. 
+  unfold cast_float_int in E. unfold make_intoffloat.
+  destruct si2; econstructor; eauto; simpl; rewrite E; auto.
+  (* single -> int *)
+  destruct (cast_single_int si2 f) as [i|] eqn:E; inv H2.
+  apply make_cast_int_correct. 
+  unfold cast_single_int in E. unfold make_intofsingle.
+  destruct si2; econstructor; eauto with cshm; simpl; rewrite E; auto.
+  (* long -> int *)
+  unfold make_longofint, cast_int_long. destruct si1; eauto with cshm.
+  (* long -> float *)
+  unfold make_floatoflong, cast_long_float. destruct si1; eauto with cshm.
+  (* long -> single *)
+  unfold make_singleoflong, cast_long_single. destruct si1; eauto with cshm.
   (* float -> long *)
-  destruct (cast_float_long si2 f) as [i|] eqn:E; inv H2. eauto with cshm.
+  destruct (cast_float_long si2 f) as [i|] eqn:E; inv H2.
+  unfold cast_float_long in E. unfold make_longoffloat.
+  destruct si2; econstructor; eauto; simpl; rewrite E; auto.
+  (* single -> long *)
+  destruct (cast_single_long si2 f) as [i|] eqn:E; inv H2.
+  unfold cast_single_long in E. unfold make_longofsingle.
+  destruct si2; econstructor; eauto with cshm; simpl; rewrite E; auto.
   (* float -> bool *)
   econstructor; eauto with cshm.
   simpl. unfold Val.cmpf, Val.cmpf_bool. rewrite Float.cmp_ne_eq.
   destruct (Float.cmp Ceq f Float.zero); auto.
+  (* single -> bool *)
+  econstructor; eauto with cshm.
+  simpl. unfold Val.cmpfs, Val.cmpfs_bool. rewrite Float32.cmp_ne_eq.
+  destruct (Float32.cmp Ceq f Float32.zero); auto.
   (* long -> bool *)
   econstructor; eauto with cshm.
   simpl. unfold Val.cmpl, Val.cmpl_bool, Int64.cmp.
@@ -327,6 +327,10 @@ Proof.
   econstructor; split. econstructor; eauto with cshm. simpl. eauto. 
   unfold Val.cmpf, Val.cmpf_bool. simpl. rewrite <- Float.cmp_ne_eq. 
   destruct (Float.cmp Cne f Float.zero); constructor. 
+(* single *)
+  econstructor; split. econstructor; eauto with cshm. simpl. eauto. 
+  unfold Val.cmpfs, Val.cmpfs_bool. simpl. rewrite <- Float32.cmp_ne_eq. 
+  destruct (Float32.cmp Cne f Float32.zero); constructor. 
 (* pointer *)
   econstructor; split. econstructor; eauto with cshm. simpl. eauto. 
   unfold Val.cmpu, Val.cmpu_bool. simpl.
@@ -357,6 +361,9 @@ Lemma make_absfloat_correct:
 Proof.
   unfold sem_absfloat, make_absfloat; intros until m; intros SEM MAKE EV1;
   destruct (classify_neg tya); inv MAKE; destruct va; inv SEM; eauto with cshm.
+  unfold make_floatoflong, cast_long_float. destruct s.
+  econstructor. econstructor; simpl; eauto. simpl; eauto. simpl; eauto. 
+  econstructor. econstructor; simpl; eauto. simpl; eauto. simpl; eauto. 
 Qed.
 
 Lemma make_notbool_correct:
@@ -396,7 +403,8 @@ Section MAKE_BIN.
 Variable sem_int: signedness -> int -> int -> option val.
 Variable sem_long: signedness -> int64 -> int64 -> option val.
 Variable sem_float: float -> float -> option val.
-Variables iop iopu fop lop lopu: binary_operation.
+Variable sem_single: float32 -> float32 -> option val.
+Variables iop iopu fop sop lop lopu: binary_operation.
 
 Hypothesis iop_ok:
   forall x y m, eval_binop iop (Vint x) (Vint y) m = sem_int Signed x y.
@@ -408,11 +416,13 @@ Hypothesis lopu_ok:
   forall x y m, eval_binop lopu (Vlong x) (Vlong y) m = sem_long Unsigned x y.
 Hypothesis fop_ok:
   forall x y m, eval_binop fop (Vfloat x) (Vfloat y) m = sem_float x y.
+Hypothesis sop_ok:
+  forall x y m, eval_binop sop (Vsingle x) (Vsingle y) m = sem_single x y.
 
 Lemma make_binarith_correct:
   binary_constructor_correct
-    (make_binarith iop iopu fop lop lopu)
-    (sem_binarith sem_int sem_long sem_float).
+    (make_binarith iop iopu fop sop lop lopu)
+    (sem_binarith sem_int sem_long sem_float sem_single).
 Proof.
   red; unfold make_binarith, sem_binarith;
   intros until m; intros SEM MAKE EV1 EV2.
@@ -429,12 +439,13 @@ Proof.
 - destruct s; inv H0; econstructor; eauto with cshm. 
   rewrite lop_ok; auto. rewrite lopu_ok; auto.
 - erewrite <- fop_ok in SEM; eauto with cshm.
+- erewrite <- sop_ok in SEM; eauto with cshm.
 Qed.
 
 Lemma make_binarith_int_correct:
   binary_constructor_correct
     (make_binarith_int iop iopu lop lopu)
-    (sem_binarith sem_int sem_long (fun x y => None)).
+    (sem_binarith sem_int sem_long (fun x y => None) (fun x y => None)).
 Proof.
   red; unfold make_binarith_int, sem_binarith;
   intros until m; intros SEM MAKE EV1 EV2.
@@ -930,6 +941,8 @@ Proof.
   apply make_intconst_correct.
 (* const float *)
   apply make_floatconst_correct.
+(* const single *)
+  apply make_singleconst_correct.
 (* const long *)
   apply make_longconst_correct.
 (* temp var *)
diff --git a/cfrontend/Ctypes.v b/cfrontend/Ctypes.v
index 41d0dcbf..c437a6bc 100644
--- a/cfrontend/Ctypes.v
+++ b/cfrontend/Ctypes.v
@@ -196,6 +196,18 @@ Definition typeconv (ty: type) : type :=
   | _                   => remove_attributes ty
   end.
 
+(** Default conversion for arguments to an unprototyped or variadic function.
+  Like [typeconv] but also converts single floats to double floats. *)
+
+Definition default_argument_conversion (ty: type) : type :=
+  match ty with
+  | Tint (I8 | I16 | IBool) _ _ => Tint I32 Signed noattr
+  | Tfloat _ _          => Tfloat F64 noattr
+  | Tarray t sz a       => Tpointer t noattr
+  | Tfunction _ _ _     => Tpointer ty noattr
+  | _                   => remove_attributes ty
+  end.
+
 (** * Operations over types *)
 
 (** Alignment of a type, in bytes. *)
@@ -723,12 +735,3 @@ Fixpoint typlist_of_typelist (tl: typelist) : list AST.typ :=
 Definition signature_of_type (args: typelist) (res: type) (cc: calling_convention): signature :=
   mksignature (typlist_of_typelist args) (opttyp_of_type res) cc.
 
-(** Like [typ_of_type], but apply default argument promotion. *)
-
-Definition typ_of_type_default (t: type) : AST.typ :=
-  match t with
-  | Tfloat _ _ => AST.Tfloat
-  | Tlong _ _ => AST.Tlong
-  | _ => AST.Tint
-  end.
-
diff --git a/cfrontend/Initializers.v b/cfrontend/Initializers.v
index 4054f6e1..3454ddcf 100644
--- a/cfrontend/Initializers.v
+++ b/cfrontend/Initializers.v
@@ -55,7 +55,7 @@ Fixpoint constval (a: expr) : res val :=
   match a with
   | Eval v ty =>
       match v with
-      | Vint _ | Vfloat _ | Vlong _ => OK v
+      | Vint _ | Vfloat _ | Vsingle _ | Vlong _ => OK v
       | Vptr _ _ | Vundef => Error(msg "illegal constant")
       end
   | Evalof l ty =>
@@ -156,7 +156,7 @@ Definition transl_init_single (ty: type) (a: expr) : res init_data :=
   | Vint n, Tpointer _ _ => OK(Init_int32 n)
   | Vint n, Tcomp_ptr _ _ => OK(Init_int32 n)
   | Vlong n, Tlong _ _ => OK(Init_int64 n)
-  | Vfloat f, Tfloat F32 _ => OK(Init_float32 f)
+  | Vsingle f, Tfloat F32 _ => OK(Init_float32 f)
   | Vfloat f, Tfloat F64 _ => OK(Init_float64 f)
   | Vptr id ofs, Tint I32 sg _ => OK(Init_addrof id ofs)
   | Vptr id ofs, Tpointer _ _ => OK(Init_addrof id ofs)
diff --git a/cfrontend/Initializersproof.v b/cfrontend/Initializersproof.v
index f11901d7..16004fc0 100644
--- a/cfrontend/Initializersproof.v
+++ b/cfrontend/Initializersproof.v
@@ -506,6 +506,9 @@ Proof.
   (* float *)
   destruct ty; try discriminate. 
   destruct f1; inv EQ2; simpl in H2; inv H2; assumption.
+  (* single *)
+  destruct ty; try discriminate. 
+  destruct f1; inv EQ2; simpl in H2; inv H2; assumption.
   (* pointer *)
   unfold inj in H.
   assert (data = Init_addrof b1 ofs1 /\ chunk = Mint32).
@@ -525,20 +528,18 @@ Lemma transl_init_single_size:
   Genv.init_data_size data = sizeof ty.
 Proof.
   intros. monadInv H. destruct x0. 
-  monadInv EQ2.
-  destruct ty; try discriminate. 
+- monadInv EQ2.
+- destruct ty; try discriminate. 
   destruct i0; inv EQ2; auto.
   inv EQ2; auto.
   inv EQ2; auto.
-  destruct ty; inv EQ2; auto.
-  destruct ty; try discriminate.
+- destruct ty; inv EQ2; auto.
+- destruct ty; try discriminate.
   destruct f0; inv EQ2; auto.
-  destruct ty; try discriminate.
-  destruct i0; auto.
-  inv EQ2.
-  inv EQ2.
-  inv EQ2; auto.
-  inv EQ2.
+- destruct ty; try discriminate.
+  destruct f0; inv EQ2; auto.
+- destruct ty; try discriminate.
+  destruct i0; inv EQ2; auto.
   inv EQ2; auto.
   inv EQ2; auto.
 Qed.
diff --git a/cfrontend/PrintClight.ml b/cfrontend/PrintClight.ml
index 49705c46..c5a6e6e1 100644
--- a/cfrontend/PrintClight.ml
+++ b/cfrontend/PrintClight.ml
@@ -43,6 +43,7 @@ let rec precedence = function
   | Efield _ -> (16, LtoR)
   | Econst_int _ -> (16, NA)
   | Econst_float _ -> (16, NA)
+  | Econst_single _ -> (16, NA)
   | Econst_long _ -> (16, NA)
   | Eunop _ -> (15, RtoL)
   | Eaddrof _ -> (15, RtoL)
@@ -82,6 +83,8 @@ let rec expr p (prec, e) =
       fprintf p "%ld" (camlint_of_coqint n)
   | Econst_float(f, _) ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Econst_single(f, _) ->
+      fprintf p "%Ff" (camlfloat_of_coqfloat32 f)
   | Econst_long(n, Tlong(Unsigned, _)) ->
       fprintf p "%LuLLU" (camlint64_of_coqint n)
   | Econst_long(n, _) ->
@@ -269,6 +272,7 @@ let rec collect_expr e =
   match e with
   | Econst_int _ -> ()
   | Econst_float _ -> ()
+  | Econst_single _ -> ()
   | Econst_long _ -> ()
   | Evar _ -> ()
   | Etempvar _ -> ()
diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml
index 9441d71e..4e0ee7f3 100644
--- a/cfrontend/PrintCsyntax.ml
+++ b/cfrontend/PrintCsyntax.ml
@@ -191,6 +191,8 @@ let print_typed_value p v ty =
       fprintf p "%ld" (camlint_of_coqint n)
   | Vfloat f, _ ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Vsingle f, _ ->
+      fprintf p "%Ff" (camlfloat_of_coqfloat32 f)
   | Vlong n, Tlong(Unsigned, _) ->
       fprintf p "%LuLLU" (camlint64_of_coqint n)
   | Vlong n, _ ->
diff --git a/cfrontend/SimplExpr.v b/cfrontend/SimplExpr.v
index f9aa8dbf..05d9c860 100644
--- a/cfrontend/SimplExpr.v
+++ b/cfrontend/SimplExpr.v
@@ -116,6 +116,7 @@ Function eval_simpl_expr (a: expr) : option val :=
   match a with
   | Econst_int n _ => Some(Vint n)
   | Econst_float n _ => Some(Vfloat n)
+  | Econst_single n _ => Some(Vsingle n)
   | Econst_long n _ => Some(Vlong n)
   | Ecast b ty => 
       match eval_simpl_expr b with
@@ -242,6 +243,8 @@ Fixpoint transl_expr (dst: destination) (a: Csyntax.expr) : mon (list statement
       ret (finish dst nil (Econst_int n ty))
   | Csyntax.Eval (Vfloat n) ty =>
       ret (finish dst nil (Econst_float n ty))
+  | Csyntax.Eval (Vsingle n) ty =>
+      ret (finish dst nil (Econst_single n ty))
   | Csyntax.Eval (Vlong n) ty =>
       ret (finish dst nil (Econst_long n ty))
   | Csyntax.Eval _ ty =>
diff --git a/cfrontend/SimplExprspec.v b/cfrontend/SimplExprspec.v
index 3dae7ab1..1ef2e76d 100644
--- a/cfrontend/SimplExprspec.v
+++ b/cfrontend/SimplExprspec.v
@@ -799,6 +799,10 @@ Opaque makeif.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
+  intros. destruct dst; simpl in *; inv H2. 
+    constructor. auto. intros; constructor.
+    constructor.
+    constructor. auto. intros; constructor.
 (* var *)
   monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
 (* field *)
diff --git a/cfrontend/SimplLocals.v b/cfrontend/SimplLocals.v
index 29565089..9c529280 100644
--- a/cfrontend/SimplLocals.v
+++ b/cfrontend/SimplLocals.v
@@ -45,7 +45,8 @@ Definition make_cast (a: expr) (tto: type) : expr :=
   match classify_cast (typeof a) tto with
   | cast_case_neutral => a
   | cast_case_i2i I32 _ => a
-  | cast_case_f2f F64 => a
+  | cast_case_f2f => a
+  | cast_case_s2s => a
   | cast_case_l2l => a
   | cast_case_struct _ _ _ _ => a
   | cast_case_union _ _ _ _ => a
@@ -59,6 +60,7 @@ Fixpoint simpl_expr (cenv: compilenv) (a: expr) : expr :=
   match a with
   | Econst_int _ _ => a
   | Econst_float _ _ => a
+  | Econst_single _ _ => a
   | Econst_long _ _ => a
   | Evar id ty => if VSet.mem id cenv then Etempvar id ty else Evar id ty
   | Etempvar id ty => Etempvar id ty
@@ -157,6 +159,7 @@ Fixpoint addr_taken_expr (a: expr): VSet.t :=
   match a with
   | Econst_int _ _ => VSet.empty
   | Econst_float _ _ => VSet.empty
+  | Econst_single _ _ => VSet.empty
   | Econst_long _ _ => VSet.empty
   | Evar id ty => VSet.empty
   | Etempvar id ty => VSet.empty
diff --git a/cfrontend/SimplLocalsproof.v b/cfrontend/SimplLocalsproof.v
index 6eec8cce..6024de4d 100644
--- a/cfrontend/SimplLocalsproof.v
+++ b/cfrontend/SimplLocalsproof.v
@@ -189,9 +189,10 @@ Inductive val_casted: val -> type -> Prop :=
   | val_casted_int: forall sz si attr n,
       cast_int_int sz si n = n ->
       val_casted (Vint n) (Tint sz si attr)
-  | val_casted_float: forall sz attr n,
-      cast_float_float sz n = n ->
-      val_casted (Vfloat n) (Tfloat sz attr)
+  | val_casted_float: forall attr n,
+       val_casted (Vfloat n) (Tfloat F64 attr)
+  | val_casted_single: forall attr n,
+       val_casted (Vsingle n) (Tfloat F32 attr)
   | val_casted_long: forall si attr n,
       val_casted (Vlong n) (Tlong si attr)
   | val_casted_ptr_ptr: forall b ofs ty attr,
@@ -220,14 +221,6 @@ Proof.
   destruct (Int.eq i Int.zero); auto.
 Qed.
 
-Remark cast_float_float_idem:
-  forall sz f, cast_float_float sz (cast_float_float sz f) = cast_float_float sz f.
-Proof.
-  intros; destruct sz; simpl.
-  apply Float.singleoffloat_idem; auto.
-  auto.
-Qed.
-
 Lemma cast_val_is_casted:
   forall v ty ty' v', sem_cast v ty ty' = Some v' -> val_casted v' ty'.
 Proof.
@@ -235,28 +228,32 @@ Proof.
 (* void *)
   constructor.
 (* int *)
-  destruct i; destruct ty; simpl in H; try discriminate; destruct v; inv H.
+  destruct i; destruct ty; simpl in H; try (destruct f); try discriminate; destruct v; inv H.
   constructor. apply (cast_int_int_idem I8 s).
   constructor. apply (cast_int_int_idem I8 s).
-  destruct (cast_float_int s f0); inv H1.   constructor. apply (cast_int_int_idem I8 s). 
+  destruct (cast_single_int s f); inv H1. constructor. apply (cast_int_int_idem I8 s).
+  destruct (cast_float_int s f); inv H1. constructor. apply (cast_int_int_idem I8 s).
   constructor. apply (cast_int_int_idem I16 s).
   constructor. apply (cast_int_int_idem I16 s).
-  destruct (cast_float_int s f0); inv H1.   constructor. apply (cast_int_int_idem I16 s). 
+  destruct (cast_single_int s f); inv H1. constructor. apply (cast_int_int_idem I16 s).
+  destruct (cast_float_int s f); inv H1. constructor. apply (cast_int_int_idem I16 s).
   constructor. auto.
   constructor.
   constructor. auto. 
-  destruct (cast_float_int s f0); inv H1. constructor. auto.
-  constructor. auto.
-  constructor.
+  destruct (cast_single_int s f); inv H1. constructor. auto.
+  destruct (cast_float_int s f); inv H1. constructor; auto.
   constructor; auto.
   constructor.
   constructor; auto.
+  constructor.
   constructor; auto.
+  constructor.
   constructor; auto.
-  constructor; auto.
+  constructor.
   constructor. simpl. destruct (Int.eq i0 Int.zero); auto.
   constructor. simpl. destruct (Int64.eq i Int64.zero); auto.
-  constructor. simpl. destruct (Float.cmp Ceq f0 Float.zero); auto.
+  constructor. simpl. destruct (Float32.cmp Ceq f Float32.zero); auto.
+  constructor. simpl. destruct (Float.cmp Ceq f Float.zero); auto.
   constructor. simpl. destruct (Int.eq i Int.zero); auto.
   constructor; auto.
   constructor. simpl. destruct (Int.eq i Int.zero); auto.
@@ -266,23 +263,17 @@ Proof.
   constructor. simpl. destruct (Int.eq i0 Int.zero); auto.
   constructor; auto.
 (* long *)
-  destruct ty; try discriminate.
+  destruct ty; try (destruct f); try discriminate.
   destruct v; inv H. constructor.
   destruct v; inv H. constructor.
-  destruct v; try discriminate. destruct (cast_float_long s f0); inv H. constructor.
+  destruct v; try discriminate. destruct (cast_single_long s f); inv H. constructor.
+  destruct v; try discriminate. destruct (cast_float_long s f); inv H. constructor.
   destruct v; inv H. constructor.
   destruct v; inv H. constructor.
   destruct v; inv H. constructor.
   destruct v; inv H. constructor.
 (* float *)
-  destruct ty; simpl in H; try discriminate; destruct v; inv H.
-  constructor. unfold cast_float_float, cast_int_float.
-  destruct f; destruct s; auto.
-  rewrite Float.singleofint_floatofint. apply Float.singleoffloat_idem.
-  rewrite Float.singleofintu_floatofintu. apply Float.singleoffloat_idem.
-  constructor. unfold cast_float_float, cast_long_float.
-  destruct f; destruct s; auto. apply Float.singleoflong_idem. apply Float.singleoflongu_idem.
-  constructor. apply cast_float_float_idem.
+  destruct f; destruct ty; simpl in H; try (destruct f); try discriminate; destruct v; inv H; constructor.
 (* pointer *)
   destruct ty; simpl in H; try discriminate; destruct v; inv H; try constructor.
 (* impossible cases *)
@@ -310,7 +301,8 @@ Proof.
   clear H1. inv H0. auto.
   inversion H0; clear H0; subst chunk. simpl in *. 
   destruct (Int.eq n Int.zero); subst n; reflexivity.
-  destruct sz; inversion H0; clear H0; subst chunk; simpl in *; congruence.
+  inv H0; auto.
+  inv H0; auto.
   inv H0; auto.
   inv H0; auto.
   inv H0; auto.
@@ -327,7 +319,6 @@ Lemma cast_val_casted:
 Proof.
   intros. inversion H; clear H; subst v ty; unfold sem_cast; simpl; auto.
   destruct sz; congruence.
-  congruence.
   unfold proj_sumbool; repeat rewrite dec_eq_true; auto.
   unfold proj_sumbool; repeat rewrite dec_eq_true; auto.
 Qed.
@@ -373,7 +364,8 @@ Proof.
   destruct (classify_cast (typeof a) tto); auto.
   destruct v1; inv H0; auto.
   destruct sz2; auto. destruct v1; inv H0; auto.
-  destruct sz2; auto. destruct v1; inv H0; auto.
+  destruct v1; inv H0; auto.
+  destruct v1; inv H0; auto.
   destruct v1; inv H0; auto.
   destruct v1; try discriminate.
   destruct (ident_eq id1 id2 && fieldlist_eq fld1 fld2); inv H0; auto.
@@ -1416,6 +1408,7 @@ Proof.
 (* const *)
   exists (Vint i); split; auto. constructor.
   exists (Vfloat f0); split; auto. constructor.
+  exists (Vsingle f0); split; auto. constructor.
   exists (Vlong i); split; auto. constructor.
 (* tempvar *)
   exploit me_temps; eauto. intros [[tv [A B]] C]. 
diff --git a/common/AST.v b/common/AST.v
index 29d14524..1c46389e 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -35,22 +35,15 @@ Definition ident_eq := peq.
 
 Parameter ident_of_string : String.string -> ident.
 
-(** The intermediate languages are weakly typed, using only four
-  types: [Tint] for 32-bit integers and pointers, [Tfloat] for 64-bit
-  floating-point numbers, [Tlong] for 64-bit integers, [Tsingle]
-  for 32-bit floating-point numbers. *)
+(** The intermediate languages are weakly typed, using the following types: *)
 
 Inductive typ : Type :=
-  | Tint
-  | Tfloat
-  | Tlong
-  | Tsingle.
-
-Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 4 | Tfloat => 8 | Tlong => 8 | Tsingle => 4 end.
-
-Lemma typesize_pos: forall ty, typesize ty > 0.
-Proof. destruct ty; simpl; omega. Qed.
+  | Tint                (**r 32-bit integers or pointers *)
+  | Tfloat              (**r 64-bit double-precision floats *)
+  | Tlong               (**r 64-bit integers *)
+  | Tsingle             (**r 32-bit single-precision floats *)
+  | Tany32              (**r any 32-bit value *)
+  | Tany64.             (**r any 64-bit value, i.e. any value *)
 
 Lemma typ_eq: forall (t1 t2: typ), {t1=t2} + {t1<>t2}.
 Proof. decide equality. Defined.
@@ -62,8 +55,22 @@ Definition opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}
 Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2}
                      := list_eq_dec typ_eq.
 
-(** All values of type [Tsingle] are also of type [Tfloat].  This
-  corresponds to the following subtyping relation over types. *)
+Definition typesize (ty: typ) : Z :=
+  match ty with
+  | Tint => 4
+  | Tfloat => 8
+  | Tlong => 8
+  | Tsingle => 4
+  | Tany32 => 4
+  | Tany64 => 8
+  end.
+
+Lemma typesize_pos: forall ty, typesize ty > 0.
+Proof. destruct ty; simpl; omega. Qed.
+
+(** All values of size 32 bits are also of type [Tany32].  All values
+  are of type [Tany64].  This corresponds to the following subtyping
+  relation over types. *)
 
 Definition subtype (ty1 ty2: typ) : bool :=
   match ty1, ty2 with
@@ -71,7 +78,8 @@ Definition subtype (ty1 ty2: typ) : bool :=
   | Tlong, Tlong => true
   | Tfloat, Tfloat => true
   | Tsingle, Tsingle => true
-  | Tsingle, Tfloat => true
+  | (Tint | Tsingle | Tany32), Tany32 => true
+  | _, Tany64 => true
   | _, _ => false
   end.
 
@@ -133,7 +141,9 @@ Inductive memory_chunk : Type :=
   | Mint32          (**r 32-bit integer, or pointer *)
   | Mint64          (**r 64-bit integer *)
   | Mfloat32        (**r 32-bit single-precision float *)
-  | Mfloat64.        (**r 64-bit double-precision float *)
+  | Mfloat64        (**r 64-bit double-precision float *)
+  | Many32          (**r any value that fits in 32 bits *)
+  | Many64.         (**r any value *)
 
 Definition chunk_eq: forall (c1 c2: memory_chunk), {c1=c2} + {c1<>c2}.
 Proof. decide equality. Defined.
@@ -151,18 +161,8 @@ Definition type_of_chunk (c: memory_chunk) : typ :=
   | Mint64 => Tlong
   | Mfloat32 => Tsingle
   | Mfloat64 => Tfloat
-  end.
-
-Definition type_of_chunk_use (c: memory_chunk) : typ :=
-  match c with
-  | Mint8signed => Tint
-  | Mint8unsigned => Tint
-  | Mint16signed => Tint
-  | Mint16unsigned => Tint
-  | Mint32 => Tint
-  | Mint64 => Tlong
-  | Mfloat32 => Tfloat
-  | Mfloat64 => Tfloat
+  | Many32 => Tany32
+  | Many64 => Tany64
   end.
 
 (** The chunk that is appropriate to store and reload a value of
@@ -174,6 +174,8 @@ Definition chunk_of_type (ty: typ) :=
   | Tfloat => Mfloat64
   | Tlong => Mint64
   | Tsingle => Mfloat32
+  | Tany32 => Many32
+  | Tany64 => Many64
   end.
 
 (** Initialization data for global variables. *)
@@ -183,7 +185,7 @@ Inductive init_data: Type :=
   | Init_int16: int -> init_data
   | Init_int32: int -> init_data
   | Init_int64: int64 -> init_data
-  | Init_float32: float -> init_data
+  | Init_float32: float32 -> init_data
   | Init_float64: float -> init_data
   | Init_space: Z -> init_data
   | Init_addrof: ident -> int -> init_data.  (**r address of symbol + offset *)
@@ -586,9 +588,9 @@ Definition ef_sig (ef: external_function): signature :=
   | EF_external name sg => sg
   | EF_builtin name sg => sg
   | EF_vload chunk => mksignature (Tint :: nil) (Some (type_of_chunk chunk)) cc_default
-  | EF_vstore chunk => mksignature (Tint :: type_of_chunk_use chunk :: nil) None cc_default
+  | EF_vstore chunk => mksignature (Tint :: type_of_chunk chunk :: nil) None cc_default
   | EF_vload_global chunk _ _ => mksignature nil (Some (type_of_chunk chunk)) cc_default
-  | EF_vstore_global chunk _ _ => mksignature (type_of_chunk_use chunk :: nil) None cc_default
+  | EF_vstore_global chunk _ _ => mksignature (type_of_chunk chunk :: nil) None cc_default
   | EF_malloc => mksignature (Tint :: nil) (Some Tint) cc_default
   | EF_free => mksignature (Tint :: nil) None cc_default
   | EF_memcpy sz al => mksignature (Tint :: Tint :: nil) None cc_default
diff --git a/common/Events.v b/common/Events.v
index 48cd91e4..5eee93a7 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -57,7 +57,7 @@ Inductive eventval: Type :=
   | EVint: int -> eventval
   | EVlong: int64 -> eventval
   | EVfloat: float -> eventval
-  | EVfloatsingle: float -> eventval
+  | EVsingle: float32 -> eventval
   | EVptr_global: ident -> int -> eventval.
 
 Inductive event: Type :=
@@ -274,8 +274,7 @@ Inductive eventval_match: eventval -> typ -> val -> Prop :=
   | ev_match_float: forall f,
       eventval_match (EVfloat f) Tfloat (Vfloat f)
   | ev_match_single: forall f,
-      Float.is_single f ->
-      eventval_match (EVfloatsingle f) Tsingle (Vfloat f)
+      eventval_match (EVsingle f) Tsingle (Vsingle f)
   | ev_match_ptr: forall id b ofs,
       Genv.find_symbol ge id = Some b ->
       eventval_match (EVptr_global id ofs) Tint (Vptr b ofs).
@@ -388,7 +387,7 @@ Definition eventval_valid (ev: eventval) : Prop :=
   | EVint _ => True
   | EVlong _ => True
   | EVfloat _ => True
-  | EVfloatsingle f => Float.is_single f
+  | EVsingle _ => True
   | EVptr_global id ofs => exists b, Genv.find_symbol ge id = Some b
   end.
 
@@ -397,7 +396,7 @@ Definition eventval_type (ev: eventval) : typ :=
   | EVint _ => Tint
   | EVlong _ => Tlong
   | EVfloat _ => Tfloat
-  | EVfloatsingle _ => Tsingle
+  | EVsingle _ => Tsingle
   | EVptr_global id ofs => Tint
   end.
 
@@ -412,7 +411,7 @@ Proof.
   destruct H1 as [b EQ]. exists (Vptr b i1); constructor; auto.
   exists (Vlong i0); constructor.
   exists (Vfloat f1); constructor.
-  exists (Vfloat f1); constructor; auto.
+  exists (Vsingle f1); constructor; auto.
   exists (Vint i); constructor.
   destruct H1 as [b' EQ]. exists (Vptr b' i0); constructor; auto.
 Qed.
@@ -967,7 +966,7 @@ Qed.
 Lemma volatile_store_ok:
   forall chunk,
   extcall_properties (volatile_store_sem chunk) 
-                     (mksignature (Tint :: type_of_chunk_use chunk :: nil) None cc_default).
+                     (mksignature (Tint :: type_of_chunk chunk :: nil) None cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
@@ -1022,7 +1021,7 @@ Qed.
 Lemma volatile_store_global_ok:
   forall chunk id ofs,
   extcall_properties (volatile_store_global_sem chunk id ofs) 
-                     (mksignature (type_of_chunk_use chunk :: nil) None cc_default).
+                     (mksignature (type_of_chunk chunk :: nil) None cc_default).
 Proof.
   intros; constructor; intros.
 (* well typed *)
diff --git a/common/Globalenvs.v b/common/Globalenvs.v
index bd38fa3d..a34a05dc 100644
--- a/common/Globalenvs.v
+++ b/common/Globalenvs.v
@@ -516,7 +516,7 @@ Definition store_init_data (m: mem) (b: block) (p: Z) (id: init_data) : option m
   | Init_int16 n => Mem.store Mint16unsigned m b p (Vint n)
   | Init_int32 n => Mem.store Mint32 m b p (Vint n)
   | Init_int64 n => Mem.store Mint64 m b p (Vlong n)
-  | Init_float32 n => Mem.store Mfloat32 m b p (Vfloat n)
+  | Init_float32 n => Mem.store Mfloat32 m b p (Vsingle n)
   | Init_float64 n => Mem.store Mfloat64 m b p (Vfloat n)
   | Init_addrof symb ofs =>
       match find_symbol ge symb with
@@ -848,7 +848,7 @@ Fixpoint load_store_init_data (m: mem) (b: block) (p: Z) (il: list init_data) {s
       Mem.load Mint64 m b p = Some(Vlong n)
       /\ load_store_init_data m b (p + 8) il'
   | Init_float32 n :: il' =>
-      Mem.load Mfloat32 m b p = Some(Vfloat(Float.singleoffloat n))
+      Mem.load Mfloat32 m b p = Some(Vsingle n)
       /\ load_store_init_data m b (p + 4) il'
   | Init_float64 n :: il' =>
       Mem.load Mfloat64 m b p = Some(Vfloat n)
@@ -895,7 +895,7 @@ Proof.
   eapply (A Mint16unsigned (Vint i)); eauto.
   eapply (A Mint32 (Vint i)); eauto.
   eapply (A Mint64 (Vlong i)); eauto.
-  eapply (A Mfloat32 (Vfloat f)); eauto.
+  eapply (A Mfloat32 (Vsingle f)); eauto.
   eapply (A Mfloat64 (Vfloat f)); eauto.
   inv Heqo. red; intros. transitivity (Mem.load chunk m1 b p0). 
   eapply store_init_data_list_outside; eauto. right. simpl. xomega. 
diff --git a/common/Memdata.v b/common/Memdata.v
index 1b74705f..96278a29 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -39,6 +39,8 @@ Definition size_chunk (chunk: memory_chunk) : Z :=
   | Mint64 => 8
   | Mfloat32 => 4
   | Mfloat64 => 8
+  | Many32 => 4
+  | Many64 => 8
   end.
 
 Lemma size_chunk_pos:
@@ -86,6 +88,8 @@ Definition align_chunk (chunk: memory_chunk) : Z :=
   | Mint64 => 8
   | Mfloat32 => 4
   | Mfloat64 => 4
+  | Many32 => 4
+  | Many64 => 4
   end.
 
 Lemma align_chunk_pos:
@@ -112,12 +116,27 @@ Proof.
         | exists 8; reflexivity ].
 Qed.
 
+Inductive quantity : Type := Q32 | Q64.
+
+Definition quantity_eq (q1 q2: quantity) : {q1 = q2} + {q1 <> q2}.
+Proof. decide equality. Defined.
+Global Opaque quantity_eq.
+
+Definition size_quantity_nat (q: quantity) :=
+  match q with Q32 => 4%nat | Q64 => 8%nat end.
+
+Lemma size_quantity_nat_pos:
+  forall q, exists n, size_quantity_nat q = S n.
+Proof.
+  intros. destruct q; [exists 3%nat | exists 7%nat]; auto.
+Qed.
+
 (** * Memory values *)
 
 (** A ``memory value'' is a byte-sized quantity that describes the current
   content of a memory cell.  It can be either:
 - a concrete 8-bit integer;
-- a byte-sized fragment of an opaque pointer;
+- a byte-sized fragment of an opaque value;
 - the special constant [Undef] that represents uninitialized memory.
 *)
 
@@ -126,7 +145,7 @@ Qed.
 Inductive memval: Type :=
   | Undef: memval
   | Byte: byte -> memval
-  | Pointer: block -> int -> nat -> memval.
+  | Fragment: val -> quantity -> nat -> memval.
 
 (** * Encoding and decoding integers *)
 
@@ -311,25 +330,28 @@ Proof.
   simpl. decEq. auto.
 Qed.
 
-Fixpoint inj_pointer (n: nat) (b: block) (ofs: int) {struct n}: list memval :=
+Fixpoint inj_value_rec (n: nat) (v: val) (q: quantity) {struct n}: list memval :=
   match n with
   | O => nil
-  | S m => Pointer b ofs m :: inj_pointer m b ofs
+  | S m => Fragment v q m :: inj_value_rec m v q
   end.
 
-Fixpoint check_pointer (n: nat) (b: block) (ofs: int) (vl: list memval) 
+Definition inj_value (q: quantity) (v: val): list memval :=
+  inj_value_rec (size_quantity_nat q) v q.
+
+Fixpoint check_value (n: nat) (v: val) (q: quantity) (vl: list memval) 
                        {struct n} : bool :=
   match n, vl with
   | O, nil => true
-  | S m, Pointer b' ofs' m' :: vl' =>
-      eq_block b b' && Int.eq_dec ofs ofs' && beq_nat m m' && check_pointer m b ofs vl'
+  | S m, Fragment v' q' m' :: vl' =>
+      Val.eq v v' && quantity_eq q q' && beq_nat m m' && check_value m v q vl'
   | _, _ => false
   end.
 
-Definition proj_pointer (vl: list memval) : val :=
+Definition proj_value (q: quantity) (vl: list memval) : val :=
   match vl with
-  | Pointer b ofs n :: vl' =>
-      if check_pointer 4%nat b ofs vl then Vptr b ofs else Vundef
+  | Fragment v q' n :: vl' =>
+      if check_value (size_quantity_nat q) v q vl then v else Vundef
   | _ => Vundef
   end.
 
@@ -338,10 +360,12 @@ Definition encode_val (chunk: memory_chunk) (v: val) : list memval :=
   | Vint n, (Mint8signed | Mint8unsigned) => inj_bytes (encode_int 1%nat (Int.unsigned n))
   | Vint n, (Mint16signed | Mint16unsigned) => inj_bytes (encode_int 2%nat (Int.unsigned n))
   | Vint n, Mint32 => inj_bytes (encode_int 4%nat (Int.unsigned n))
-  | Vptr b ofs, Mint32 => inj_pointer 4%nat b ofs
+  | Vptr b ofs, Mint32 => inj_value Q32 v
   | Vlong n, Mint64 => inj_bytes (encode_int 8%nat (Int64.unsigned n))
-  | Vfloat n, Mfloat32 => inj_bytes (encode_int 4%nat (Int.unsigned (Float.bits_of_single n)))
-  | Vfloat n, Mfloat64 => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.bits_of_double n)))
+  | Vsingle n, Mfloat32 => inj_bytes (encode_int 4%nat (Int.unsigned (Float32.to_bits n)))
+  | Vfloat n, Mfloat64 => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.to_bits n)))
+  | _, Many32 => inj_value Q32 v
+  | _, Many64 => inj_value Q64 v
   | _, _ => list_repeat (size_chunk_nat chunk) Undef
   end.
 
@@ -355,12 +379,15 @@ Definition decode_val (chunk: memory_chunk) (vl: list memval) : val :=
       | Mint16unsigned => Vint(Int.zero_ext 16 (Int.repr (decode_int bl)))
       | Mint32 => Vint(Int.repr(decode_int bl))
       | Mint64 => Vlong(Int64.repr(decode_int bl))
-      | Mfloat32 => Vfloat(Float.single_of_bits (Int.repr (decode_int bl)))
-      | Mfloat64 => Vfloat(Float.double_of_bits (Int64.repr (decode_int bl)))
+      | Mfloat32 => Vsingle(Float32.of_bits (Int.repr (decode_int bl)))
+      | Mfloat64 => Vfloat(Float.of_bits (Int64.repr (decode_int bl)))
+      | Many32 => Vundef
+      | Many64 => Vundef
       end
   | None =>
       match chunk with
-      | Mint32 => proj_pointer vl
+      | Mint32 | Many32 => Val.load_result chunk (proj_value Q32 vl)
+      | Many64 => Val.load_result chunk (proj_value Q64 vl)
       | _ => Vundef
       end
   end.
@@ -374,14 +401,40 @@ Proof.
         | rewrite length_inj_bytes; apply encode_int_length ].
 Qed.
 
-Lemma check_inj_pointer:
-  forall b ofs n, check_pointer n b ofs (inj_pointer n b ofs) = true.
+Lemma check_inj_value:
+  forall v q n, check_value n v q (inj_value_rec n v q) = true.
 Proof.
   induction n; simpl. auto. 
   unfold proj_sumbool. rewrite dec_eq_true. rewrite dec_eq_true.  
   rewrite <- beq_nat_refl. simpl; auto.
 Qed.
 
+Lemma proj_inj_value:
+  forall q v, proj_value q (inj_value q v) = v.
+Proof.
+  intros. unfold proj_value, inj_value. destruct (size_quantity_nat_pos q) as [n EQ]. 
+  rewrite EQ at 1. simpl. rewrite check_inj_value. auto.
+Qed.
+
+Remark in_inj_value:
+  forall mv v q, In mv (inj_value q v) -> exists n, mv = Fragment v q n.
+Proof.
+Local Transparent inj_value.
+  unfold inj_value; intros until q. generalize (size_quantity_nat q). induction n; simpl; intros.
+  contradiction.
+  destruct H. exists n; auto. eauto. 
+Qed.
+
+Lemma proj_inj_value_mismatch:
+  forall q1 q2 v, q1 <> q2 -> proj_value q1 (inj_value q2 v) = Vundef.
+Proof.
+  intros. unfold proj_value. destruct (inj_value q2 v) eqn:V. auto. destruct m; auto.
+  destruct (in_inj_value (Fragment v0 q n) v q2) as [n' EQ]. 
+  rewrite V; auto with coqlib. inv EQ.
+  destruct (size_quantity_nat_pos q1) as [p EQ1]; rewrite EQ1; simpl.
+  unfold proj_sumbool. rewrite dec_eq_true. rewrite dec_eq_false by congruence. auto.
+Qed.
+
 Definition decode_encode_val (v1: val) (chunk1 chunk2: memory_chunk) (v2: val) : Prop :=
   match v1, chunk1, chunk2 with
   | Vundef, _, _ => v2 = Vundef
@@ -394,21 +447,30 @@ Definition decode_encode_val (v1: val) (chunk1 chunk2: memory_chunk) (v2: val) :
   | Vint n, Mint16signed, Mint16unsigned => v2 = Vint(Int.zero_ext 16 n)
   | Vint n, Mint16unsigned, Mint16unsigned => v2 = Vint(Int.zero_ext 16 n)
   | Vint n, Mint32, Mint32 => v2 = Vint n
-  | Vint n, Mint32, Mfloat32 => v2 = Vfloat(Float.single_of_bits n)
-  | Vint n, (Mint64 | Mfloat32 | Mfloat64), _ => v2 = Vundef
+  | Vint n, Many32, (Mint32 | Many32) => v2 = Vint n
+  | Vint n, Mint32, Mfloat32 => v2 = Vsingle(Float32.of_bits n)
+  | Vint n, Many64, Many64 => v2 = Vint n
+  | Vint n, (Mint64 | Mfloat32 | Mfloat64 | Many64), _ => v2 = Vundef
   | Vint n, _, _ => True (**r nothing meaningful to say about v2 *)
-  | Vptr b ofs, Mint32, Mint32 => v2 = Vptr b ofs
+  | Vptr b ofs, (Mint32 | Many32), (Mint32 | Many32) => v2 = Vptr b ofs
+  | Vptr b ofs, Many64, Many64 => v2 = Vptr b ofs
   | Vptr b ofs, _, _ => v2 = Vundef
   | Vlong n, Mint64, Mint64 => v2 = Vlong n
-  | Vlong n, Mint64, Mfloat64 => v2 = Vfloat(Float.double_of_bits n)
-  | Vlong n, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mfloat32|Mfloat64), _ => v2 = Vundef
+  | Vlong n, Mint64, Mfloat64 => v2 = Vfloat(Float.of_bits n)
+  | Vlong n, Many64, Many64 => v2 = Vlong n
+  | Vlong n, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mfloat32|Mfloat64|Many32), _ => v2 = Vundef
   | Vlong n, _, _ => True (**r nothing meaningful to say about v2 *)
-  | Vfloat f, Mfloat32, Mfloat32 => v2 = Vfloat(Float.singleoffloat f)
-  | Vfloat f, Mfloat32, Mint32 => v2 = Vint(Float.bits_of_single f)
   | Vfloat f, Mfloat64, Mfloat64 => v2 = Vfloat f
-  | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mint64), _ => v2 = Vundef
-  | Vfloat f, Mfloat64, Mint64 => v2 = Vlong(Float.bits_of_double f)
+  | Vfloat f, Mfloat64, Mint64 => v2 = Vlong(Float.to_bits f)
+  | Vfloat f, Many64, Many64 => v2 = Vfloat f
+  | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mfloat32|Mint64|Many32), _ => v2 = Vundef
   | Vfloat f, _, _ => True   (* nothing interesting to say about v2 *)
+  | Vsingle f, Mfloat32, Mfloat32 => v2 = Vsingle f
+  | Vsingle f, Mfloat32, Mint32 => v2 = Vint(Float32.to_bits f)
+  | Vsingle f, Many32, Many32 => v2 = Vsingle f
+  | Vsingle f, Many64, Many64 => v2 = Vsingle f
+  | Vsingle f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32|Mint64|Mfloat64|Many64), _ => v2 = Vundef
+  | Vsingle f, _, _ => True (* nothing interesting to say about v2 *)
   end.
 
 Remark decode_val_undef:
@@ -417,14 +479,23 @@ Proof.
   intros. unfold decode_val. simpl. destruct chunk; auto.
 Qed.
 
+Remark proj_bytes_inj_value:
+  forall q v, proj_bytes (inj_value q v) = None.
+Proof.
+  intros. destruct q; reflexivity. 
+Qed.
+
 Lemma decode_encode_val_general:
   forall v chunk1 chunk2,
   decode_encode_val v chunk1 chunk2 (decode_val chunk2 (encode_val chunk1 v)).
 Proof.
-Opaque inj_pointer.
+Opaque inj_value.
   intros.
-  destruct v; destruct chunk1; simpl; try (apply decode_val_undef);
-  destruct chunk2; unfold decode_val; auto; try (rewrite proj_inj_bytes).
+  destruct v; destruct chunk1 eqn:C1; simpl; try (apply decode_val_undef);
+  destruct chunk2 eqn:C2; unfold decode_val; auto; 
+  try (rewrite proj_inj_bytes); try (rewrite proj_bytes_inj_value);
+  try (rewrite proj_inj_value); try (rewrite proj_inj_value_mismatch by congruence);
+  auto.
   rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega.
   rewrite decode_encode_int_1. decEq. apply Int.zero_ext_idem. omega.
   rewrite decode_encode_int_1. decEq. apply Int.sign_ext_zero_ext. omega.
@@ -437,14 +508,10 @@ Opaque inj_pointer.
   rewrite decode_encode_int_4. auto.
   rewrite decode_encode_int_8. auto.
   rewrite decode_encode_int_8. auto.
-  rewrite decode_encode_int_4. auto.
-  rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single.
   rewrite decode_encode_int_8. auto.
-  rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
-  change (proj_bytes (inj_pointer 4 b i)) with (@None (list byte)). simpl. 
-  unfold proj_pointer. generalize (check_inj_pointer b i 4%nat).
-Transparent inj_pointer. 
-  simpl. intros EQ; rewrite EQ; auto.
+  rewrite decode_encode_int_8. decEq. apply Float.of_to_bits.
+  rewrite decode_encode_int_4. auto.
+  rewrite decode_encode_int_4. decEq. apply Float32.of_to_bits.
 Qed.
 
 Lemma decode_encode_val_similar:
@@ -465,12 +532,8 @@ Lemma decode_val_type:
 Proof.
   intros. unfold decode_val. 
   destruct (proj_bytes cl). 
-  destruct chunk; simpl; auto. apply Float.single_of_bits_is_single.
   destruct chunk; simpl; auto.
-  unfold proj_pointer. destruct cl; try (exact I).
-  destruct m; try (exact I).
-  destruct (check_pointer 4%nat b i (Pointer b i n :: cl));
-  exact I.
+  destruct chunk; exact I || apply Val.load_result_type.
 Qed.
 
 Lemma encode_val_int8_signed_unsigned:
@@ -518,7 +581,6 @@ Lemma decode_val_cast:
   | Mint8unsigned => v = Val.zero_ext 8 v
   | Mint16signed => v = Val.sign_ext 16 v
   | Mint16unsigned => v = Val.zero_ext 16 v
-  | Mfloat32 => v = Val.singleoffloat v
   | _ => True
   end.
 Proof.
@@ -527,145 +589,132 @@ Proof.
   unfold Val.zero_ext. rewrite Int.zero_ext_idem; auto. omega. 
   unfold Val.sign_ext. rewrite Int.sign_ext_idem; auto. omega.
   unfold Val.zero_ext. rewrite Int.zero_ext_idem; auto. omega.
-  simpl. rewrite Float.singleoffloat_of_bits. auto.
 Qed.
 
 (** Pointers cannot be forged. *)
 
-Definition memval_valid_first (mv: memval) : Prop :=
-  match mv with
-  | Pointer b ofs n => n = 3%nat
-  | _ => True
-  end.
-
-Definition memval_valid_cont (mv: memval) : Prop :=
-  match mv with
-  | Pointer b ofs n => n <> 3%nat
-  | _ => True
+Definition quantity_chunk (chunk: memory_chunk) :=
+  match chunk with
+  | Mint64 | Mfloat64 | Many64 => Q64
+  | _ => Q32
   end.
 
-Inductive encoding_shape: list memval -> Prop :=
-  | encoding_shape_intro: forall mv1 mvl,
-      memval_valid_first mv1 ->
-      (forall mv, In mv mvl -> memval_valid_cont mv) ->
-      encoding_shape (mv1 :: mvl).
-
-Lemma encode_val_shape:
-  forall chunk v, encoding_shape (encode_val chunk v).
+Inductive shape_encoding (chunk: memory_chunk) (v: val): list memval -> Prop :=
+  | shape_encoding_f: forall q i mvl,
+      (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64) ->
+      q = quantity_chunk chunk ->
+      S i = size_quantity_nat q ->
+      (forall mv, In mv mvl -> exists j, mv = Fragment v q j /\ S j <> size_quantity_nat q) ->
+      shape_encoding chunk v (Fragment v q i :: mvl)
+  | shape_encoding_b: forall b mvl,
+      match v with Vint _ => True | Vlong _ => True | Vfloat _ => True | Vsingle _ => True | _ => False end ->
+      (forall mv, In mv mvl -> exists b', mv = Byte b') ->
+      shape_encoding chunk v (Byte b :: mvl)
+  | shape_encoding_u: forall mvl,
+      (forall mv, In mv mvl -> mv = Undef) ->
+      shape_encoding chunk v (Undef :: mvl).
+
+Lemma encode_val_shape: forall chunk v, shape_encoding chunk v (encode_val chunk v).
 Proof.
   intros. 
-  destruct (size_chunk_nat_pos chunk) as [sz1 EQ].
-  assert (A: encoding_shape (list_repeat (size_chunk_nat chunk) Undef)).
-    rewrite EQ; simpl; constructor. exact I. 
-    intros. replace mv with Undef. exact I. symmetry; eapply in_list_repeat; eauto.
-  assert (B: forall bl, length bl = size_chunk_nat chunk ->
-          encoding_shape (inj_bytes bl)).
-    intros. destruct bl; simpl in *. congruence. 
-    constructor. exact I. unfold inj_bytes. intros.
-    exploit list_in_map_inv; eauto. intros [x [C D]]. subst mv. exact I.
-  destruct v; auto; destruct chunk; simpl; auto; try (apply B; apply encode_int_length).
-  constructor. red. auto.
-  simpl; intros. intuition; subst mv; red; simpl; congruence.
-Qed.
-
-Lemma check_pointer_inv:
-  forall b ofs n mv,
-  check_pointer n b ofs mv = true -> mv = inj_pointer n b ofs.
-Proof.
-  induction n; destruct mv; simpl. 
-  auto.
-  congruence.
-  congruence.
-  destruct m; try congruence. intro. 
-  destruct (andb_prop _ _ H). destruct (andb_prop _ _ H0). 
-  destruct (andb_prop _ _ H2).
-  decEq. decEq. symmetry; eapply proj_sumbool_true; eauto.
-  symmetry; eapply proj_sumbool_true; eauto.
-  symmetry; apply beq_nat_true; auto.
-  auto.
+  destruct (size_chunk_nat_pos chunk) as [sz EQ]. 
+  assert (A: forall mv q n,
+             (n < size_quantity_nat q)%nat ->
+             In mv (inj_value_rec n v q) ->
+             exists j, mv = Fragment v q j /\ S j <> size_quantity_nat q).
+  {
+    induction n; simpl; intros. contradiction. destruct H0. 
+    exists n; split; auto. omega. apply IHn; auto. omega. 
+  } 
+  assert (B: forall q,
+             q = quantity_chunk chunk -> 
+             (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64) ->
+             shape_encoding chunk v (inj_value q v)).
+  {
+Local Transparent inj_value.
+    intros. unfold inj_value. destruct (size_quantity_nat_pos q) as [sz' EQ']. 
+    rewrite EQ'. simpl. constructor; auto.
+    intros; eapply A; eauto. omega. 
+  }
+  assert (C: forall bl,
+             match v with Vint _ => True | Vlong _ => True | Vfloat _ => True | Vsingle _ => True | _ => False end ->
+             length (inj_bytes bl) = size_chunk_nat chunk ->
+             shape_encoding chunk v (inj_bytes bl)).
+  {
+    intros. destruct bl as [|b1 bl]. simpl in H0; congruence. simpl. 
+    constructor; auto. unfold inj_bytes; intros. exploit list_in_map_inv; eauto. 
+    intros (b & P & Q); exists b; auto.
+  }
+  assert (D: shape_encoding chunk v (list_repeat (size_chunk_nat chunk) Undef)).
+  {
+    intros. rewrite EQ; simpl; constructor; auto. 
+    intros. eapply in_list_repeat; eauto. 
+  }
+  generalize (encode_val_length chunk v). intros LEN. 
+  unfold encode_val; unfold encode_val in LEN; destruct v; destruct chunk; (apply B || apply C || apply D); auto; red; auto.
 Qed.
 
-Inductive decoding_shape: list memval -> Prop :=
-  | decoding_shape_intro: forall mv1 mvl,
-      memval_valid_first mv1 -> mv1 <> Undef ->
-      (forall mv, In mv mvl -> memval_valid_cont mv /\ mv <> Undef) ->
-      decoding_shape (mv1 :: mvl).
-
-Lemma decode_val_shape:
-  forall chunk mvl,
-  List.length mvl = size_chunk_nat chunk ->
-  decode_val chunk mvl = Vundef \/ decoding_shape mvl.
+Inductive shape_decoding (chunk: memory_chunk): list memval -> val -> Prop :=
+  | shape_decoding_f: forall v q i mvl,
+      (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64) ->
+      q = quantity_chunk chunk ->
+      S i = size_quantity_nat q ->
+      (forall mv, In mv mvl -> exists j, mv = Fragment v q j /\ S j <> size_quantity_nat q) ->
+      shape_decoding chunk (Fragment v q i :: mvl) (Val.load_result chunk v)
+  | shape_decoding_b: forall b mvl v,
+      match v with Vint _ => True | Vlong _ => True | Vfloat _ => True | Vsingle _ => True |  _ => False end ->
+      (forall mv, In mv mvl -> exists b', mv = Byte b') ->
+      shape_decoding chunk (Byte b :: mvl) v
+  | shape_decoding_u: forall mvl,
+      shape_decoding chunk mvl Vundef.
+
+Lemma decode_val_shape: forall chunk mv1 mvl,
+  shape_decoding chunk (mv1 :: mvl) (decode_val chunk (mv1 :: mvl)).
 Proof.
-  intros. destruct (size_chunk_nat_pos chunk) as [sz EQ]. 
+  intros. 
+  assert (A: forall mv mvs bs, proj_bytes mvs = Some bs -> In mv mvs ->
+                               exists b, mv = Byte b).
+  {
+    induction mvs; simpl; intros. 
+    contradiction.
+    destruct a; try discriminate. destruct H0. exists i; auto. 
+    destruct (proj_bytes mvs); try discriminate. eauto. 
+  }
+  assert (B: forall v q mv n mvs,
+             check_value n v q mvs = true -> In mv mvs -> (n < size_quantity_nat q)%nat ->
+             exists j, mv = Fragment v q j /\ S j <> size_quantity_nat q).
+  {
+    induction n; destruct mvs; simpl; intros; try discriminate.
+    contradiction.
+    destruct m; try discriminate. InvBooleans. apply beq_nat_true in H4. subst. 
+    destruct H0. subst mv. exists n0; split; auto. omega. 
+    eapply IHn; eauto. omega.  
+  }
+  assert (U: forall mvs, shape_decoding chunk mvs (Val.load_result chunk Vundef)).
+  {
+    intros. replace (Val.load_result chunk Vundef) with Vundef. constructor. 
+    destruct chunk; auto.
+  }
+  assert (C: forall q, size_quantity_nat q = size_chunk_nat chunk ->
+             (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64) ->
+             shape_decoding chunk (mv1 :: mvl) (Val.load_result chunk (proj_value q (mv1 :: mvl)))).
+  {
+    intros. unfold proj_value. destruct mv1; auto.
+    destruct (size_quantity_nat_pos q) as [sz EQ]. rewrite EQ. 
+    simpl. unfold proj_sumbool. rewrite dec_eq_true.
+    destruct (quantity_eq q q0); auto. 
+    destruct (beq_nat sz n) eqn:EQN; auto.
+    destruct (check_value sz v q mvl) eqn:CHECK; auto. 
+    simpl. apply beq_nat_true in EQN. subst n q0. constructor. auto. 
+    destruct H0 as [E|[E|E]]; subst chunk; destruct q; auto || discriminate.
+    congruence. 
+    intros. eapply B; eauto. omega. 
+  }
   unfold decode_val. 
-  caseEq (proj_bytes mvl).
-  intros bl PROJ. right. exploit inj_proj_bytes; eauto. intros. subst mvl.
-  destruct bl; simpl in H. congruence. simpl. constructor. 
-  red; auto. congruence.
-  unfold inj_bytes; intros. exploit list_in_map_inv; eauto. intros [b [A B]]. 
-  subst mv. split. red; auto. congruence.
-  intros. destruct chunk; auto. unfold proj_pointer.
-  destruct mvl; auto. destruct m; auto. 
-  caseEq (check_pointer 4%nat b i (Pointer b i n :: mvl)); auto.
-  intros. right. exploit check_pointer_inv; eauto. simpl; intros; inv H2.
-  constructor. red. auto. congruence. 
-  simpl; intros. intuition; subst mv; simpl; congruence.
-Qed.
-
-Lemma encode_val_pointer_inv:
-  forall chunk v b ofs n mvl,
-  encode_val chunk v = Pointer b ofs n :: mvl ->
-  chunk = Mint32 /\ v = Vptr b ofs /\ mvl = inj_pointer 3%nat b ofs.
-Proof.
-  intros until mvl.
-  assert (A: list_repeat (size_chunk_nat chunk) Undef = Pointer b ofs n :: mvl ->
-            chunk = Mint32 /\ v = Vptr b ofs /\ mvl = inj_pointer 3 b ofs).
-    intros. destruct (size_chunk_nat_pos chunk) as [sz SZ]. rewrite SZ in H. simpl in H. discriminate.
-  assert (B: forall bl, length bl <> 0%nat -> inj_bytes bl = Pointer b ofs n :: mvl ->
-            chunk = Mint32 /\ v = Vptr b ofs /\ mvl = inj_pointer 3 b ofs).
-    intros. destruct bl; simpl in *; congruence.
-  unfold encode_val; destruct v; destruct chunk;
-  (apply A; assumption) ||
-  (apply B; rewrite encode_int_length; congruence) || idtac.
-  simpl. intros EQ; inv EQ; auto. 
-Qed.
-
-Lemma decode_val_pointer_inv:
-  forall chunk mvl b ofs,
-  decode_val chunk mvl = Vptr b ofs ->
-  chunk = Mint32 /\ mvl = inj_pointer 4%nat b ofs.
-Proof.
-  intros until ofs; unfold decode_val.
-  destruct (proj_bytes mvl). 
-  destruct chunk; congruence.
-  destruct chunk; try congruence.
-  unfold proj_pointer. destruct mvl. congruence. destruct m; try congruence.
-  case_eq (check_pointer 4%nat b0 i (Pointer b0 i n :: mvl)); intros.
-  inv H0. split; auto. apply check_pointer_inv; auto. 
-  congruence.
-Qed.
-
-Inductive pointer_encoding_shape: list memval -> Prop :=
-  | pointer_encoding_shape_intro: forall mv1 mvl,
-      ~memval_valid_cont mv1 ->
-      (forall mv, In mv mvl -> ~memval_valid_first mv) ->
-      pointer_encoding_shape (mv1 :: mvl).
-
-Lemma encode_pointer_shape:
-  forall b ofs, pointer_encoding_shape (encode_val Mint32 (Vptr b ofs)).
-Proof.
-  intros. simpl. constructor.
-  unfold memval_valid_cont. red; intro. elim H. auto. 
-  unfold memval_valid_first. simpl; intros; intuition; subst mv; congruence.
-Qed.
-
-Lemma decode_pointer_shape:
-  forall chunk mvl b ofs,
-  decode_val chunk mvl = Vptr b ofs ->
-  chunk = Mint32 /\ pointer_encoding_shape mvl.
-Proof.
-  intros. exploit decode_val_pointer_inv; eauto. intros [A B].
-  split; auto. subst mvl. apply encode_pointer_shape. 
+  destruct (proj_bytes (mv1 :: mvl)) as [bl|] eqn:PB. 
+  exploit (A mv1); eauto with coqlib. intros [b1 EQ1]; subst mv1. 
+  destruct chunk; (apply shape_decoding_u || apply shape_decoding_b); eauto with coqlib.
+  destruct chunk; (apply shape_decoding_u || apply C); auto. 
 Qed.
 
 (** * Compatibility with memory injections *)
@@ -675,18 +724,17 @@ Qed.
 Inductive memval_inject (f: meminj): memval -> memval -> Prop :=
   | memval_inject_byte:
       forall n, memval_inject f (Byte n) (Byte n)
-  | memval_inject_ptr:
-      forall b1 ofs1 b2 ofs2 delta n,
-      f b1 = Some (b2, delta) ->
-      ofs2 = Int.add ofs1 (Int.repr delta) ->
-      memval_inject f (Pointer b1 ofs1 n) (Pointer b2 ofs2 n)
+  | memval_inject_frag:
+      forall v1 v2 q n,
+      val_inject f v1 v2 ->
+      memval_inject f (Fragment v1 q n) (Fragment v2 q n)
   | memval_inject_undef:
       forall mv, memval_inject f Undef mv.
 
 Lemma memval_inject_incr:
   forall f f' v1 v2, memval_inject f v1 v2 -> inject_incr f f' -> memval_inject f' v1 v2.
 Proof.
-  intros. inv H; econstructor. rewrite (H0 _ _ _ H1). reflexivity. auto.
+  intros. inv H; econstructor. eapply val_inject_incr; eauto. 
 Qed.
 
 (** [decode_val], applied to lists of memory values that are pairwise
@@ -706,38 +754,33 @@ Proof.
   congruence.
 Qed.
 
-Lemma check_pointer_inject:
+Lemma check_value_inject:
   forall f vl vl',
   list_forall2 (memval_inject f) vl vl' ->
-  forall n b ofs b' delta,
-  check_pointer n b ofs vl = true ->
-  f b = Some(b', delta) ->
-  check_pointer n b' (Int.add ofs (Int.repr delta)) vl' = true.
+  forall v v' q n,
+  check_value n v q vl = true ->
+  val_inject f v v' -> v <> Vundef ->
+  check_value n v' q vl' = true.
 Proof.
   induction 1; intros; destruct n; simpl in *; auto. 
   inv H; auto.
-  destruct (andb_prop _ _ H1). destruct (andb_prop _ _ H). 
-  destruct (andb_prop _ _ H5). 
-  assert (n = n0) by (apply beq_nat_true; auto).
-  assert (b = b0) by (eapply proj_sumbool_true; eauto). 
-  assert (ofs = ofs1) by (eapply proj_sumbool_true; eauto).
-  subst. rewrite H3 in H2; inv H2. 
-  unfold proj_sumbool. rewrite dec_eq_true. rewrite dec_eq_true. 
-  rewrite <- beq_nat_refl. simpl. eauto. 
-  congruence.
+  InvBooleans. assert (n = n0) by (apply beq_nat_true; auto). subst v1 q0 n0.
+  replace v2 with v'. 
+  unfold proj_sumbool; rewrite ! dec_eq_true. rewrite <- beq_nat_refl. simpl; eauto. 
+  inv H2; try discriminate; inv H4; congruence.
+  discriminate.
 Qed.
 
-Lemma proj_pointer_inject:
-  forall f vl1 vl2,
+Lemma proj_value_inject:
+  forall f q vl1 vl2,
   list_forall2 (memval_inject f) vl1 vl2 ->
-  val_inject f (proj_pointer vl1) (proj_pointer vl2).
+  val_inject f (proj_value q vl1) (proj_value q vl2).
 Proof.
-  intros. unfold proj_pointer.
+  intros. unfold proj_value. 
   inversion H; subst. auto. inversion H0; subst; auto.
-  case_eq (check_pointer 4%nat b0 ofs1 (Pointer b0 ofs1 n :: al)); intros.
-  exploit check_pointer_inject. eexact H. eauto. eauto. 
-  intro. rewrite H4. econstructor; eauto. 
-  constructor.
+  destruct (check_value (size_quantity_nat q) v1 q (Fragment v1 q0 n :: al)) eqn:B; auto.
+  destruct (Val.eq v1 Vundef). subst; auto. 
+  erewrite check_value_inject by eauto. auto.
 Qed.
 
 Lemma proj_bytes_not_inject:
@@ -754,9 +797,9 @@ Proof.
   auto.
 Qed.
 
-Lemma check_pointer_undef:
-  forall n b ofs vl,
-  In Undef vl -> check_pointer n b ofs vl = false.
+Lemma check_value_undef:
+  forall n q v vl,
+  In Undef vl -> check_value n v q vl = false.
 Proof.
   induction n; intros; simpl. 
   destruct vl. elim H. auto.
@@ -765,12 +808,12 @@ Proof.
   rewrite IHn; auto. apply andb_false_r. 
 Qed.
 
-Lemma proj_pointer_undef:
-  forall vl, In Undef vl -> proj_pointer vl = Vundef.
+Lemma proj_value_undef:
+  forall q vl, In Undef vl -> proj_value q vl = Vundef.
 Proof.
-  intros; unfold proj_pointer.
+  intros; unfold proj_value.
   destruct vl; auto. destruct m; auto. 
-  rewrite check_pointer_undef. auto. auto.
+  rewrite check_value_undef. auto. auto.
 Qed.
 
 Theorem decode_val_inject:
@@ -779,13 +822,20 @@ Theorem decode_val_inject:
   val_inject f (decode_val chunk vl1) (decode_val chunk vl2).
 Proof.
   intros. unfold decode_val. 
-  case_eq (proj_bytes vl1); intros. 
-  exploit proj_bytes_inject; eauto. intros. rewrite H1. 
+  destruct (proj_bytes vl1) as [bl1|] eqn:PB1.
+  exploit proj_bytes_inject; eauto. intros PB2. rewrite PB2. 
   destruct chunk; constructor.
+  assert (A: forall q fn,
+     val_inject f (Val.load_result chunk (proj_value q vl1))
+                  (match proj_bytes vl2 with
+                   | Some bl => fn bl
+                   | None => Val.load_result chunk (proj_value q vl2)
+                   end)).
+  { intros. destruct (proj_bytes vl2) as [bl2|] eqn:PB2.
+    rewrite proj_value_undef. destruct chunk; auto. eapply proj_bytes_not_inject; eauto. congruence.
+    apply val_load_result_inject. apply proj_value_inject; auto.
+  }
   destruct chunk; auto.
-  case_eq (proj_bytes vl2); intros.
-  rewrite proj_pointer_undef. auto. eapply proj_bytes_not_inject; eauto. congruence.
-  apply proj_pointer_inject; auto.
 Qed.
 
 (** Symmetrically, [encode_val], applied to values related by [val_inject],
@@ -805,6 +855,13 @@ Proof.
   induction vl; simpl; constructor; auto. constructor. 
 Qed.  
 
+Lemma repeat_Undef_inject_encode_val:
+  forall f chunk v,
+  list_forall2 (memval_inject f) (list_repeat (size_chunk_nat chunk) Undef) (encode_val chunk v).
+Proof.
+  intros. rewrite <- (encode_val_length chunk v). apply repeat_Undef_inject_any.
+Qed.
+
 Lemma repeat_Undef_inject_self:
   forall f n,
   list_forall2 (memval_inject f) (list_repeat n Undef) (list_repeat n Undef).
@@ -812,19 +869,24 @@ Proof.
   induction n; simpl; constructor; auto. constructor.
 Qed.  
 
+Lemma inj_value_inject:
+  forall f v1 v2 q, val_inject f v1 v2 -> list_forall2 (memval_inject f) (inj_value q v1) (inj_value q v2).
+Proof.
+  intros.
+Local Transparent inj_value.
+  unfold inj_value. generalize (size_quantity_nat q). induction n; simpl; constructor; auto.
+  constructor; auto. 
+Qed.
+
 Theorem encode_val_inject:
   forall f v1 v2 chunk,
   val_inject f v1 v2 ->
   list_forall2 (memval_inject f) (encode_val chunk v1) (encode_val chunk v2).
 Proof.
-  intros. inv H; simpl.
-  destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
-  destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
-  destruct chunk; apply inj_bytes_inject || apply repeat_Undef_inject_self.
-  destruct chunk; try (apply repeat_Undef_inject_self). 
-  repeat econstructor; eauto. 
-  replace (size_chunk_nat chunk) with (length (encode_val chunk v2)).
-  apply repeat_Undef_inject_any. apply encode_val_length. 
+  intros. inversion H; subst; simpl; destruct chunk;
+  auto using inj_bytes_inject, inj_value_inject, repeat_Undef_inject_self, repeat_Undef_inject_encode_val.
+  unfold encode_val. destruct v2; apply inj_value_inject; auto.
+  unfold encode_val. destruct v2; apply inj_value_inject; auto.
 Qed.
 
 Definition memval_lessdef: memval -> memval -> Prop := memval_inject inject_id.
@@ -832,8 +894,7 @@ Definition memval_lessdef: memval -> memval -> Prop := memval_inject inject_id.
 Lemma memval_lessdef_refl:
   forall mv, memval_lessdef mv mv.
 Proof.
-  red. destruct mv; econstructor.
-  unfold inject_id; reflexivity. rewrite Int.add_zero; auto. 
+  red. destruct mv; econstructor. apply val_inject_id. auto.
 Qed.
 
 (** [memval_inject] and compositions *)
@@ -845,9 +906,8 @@ Lemma memval_inject_compose:
 Proof.
   intros. inv H.
   inv H0. constructor.
-  inv H0. econstructor. 
-  unfold compose_meminj; rewrite H1; rewrite H5; eauto. 
-  rewrite Int.add_assoc. decEq. unfold Int.add. apply Int.eqm_samerepr. auto with ints.
+  inv H0. econstructor.
+  eapply val_inject_compose; eauto. 
   constructor.
 Qed. 
 
@@ -905,28 +965,22 @@ Qed.
 Lemma decode_val_int64:
   forall l1 l2,
   length l1 = 4%nat -> length l2 = 4%nat ->
-  decode_val Mint64 (l1 ++ l2) =
-  Val.longofwords (decode_val Mint32 (if Archi.big_endian then l1 else l2))
-                  (decode_val Mint32 (if Archi.big_endian then l2 else l1)).
+  Val.lessdef
+    (decode_val Mint64 (l1 ++ l2))
+    (Val.longofwords (decode_val Mint32 (if Archi.big_endian then l1 else l2))
+                     (decode_val Mint32 (if Archi.big_endian then l2 else l1))).
 Proof.
   intros. unfold decode_val.
-  assert (PP: forall vl, match proj_pointer vl with Vundef => True | Vptr _ _ => True | _ => False end).
-    intros. unfold proj_pointer. destruct vl; auto. destruct m; auto. 
-    destruct (check_pointer 4 b i (Pointer b i n :: vl)); auto. 
-  assert (PP1: forall vl v2, Val.longofwords (proj_pointer vl) v2 = Vundef).
-    intros. generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction.
-  assert (PP2: forall v1 vl, Val.longofwords v1 (proj_pointer vl) = Vundef).
-    intros. destruct v1; simpl; auto. 
-    generalize (PP vl). destruct (proj_pointer vl); reflexivity || contradiction.
   rewrite proj_bytes_append. 
-  destruct (proj_bytes l1) as [b1|] eqn:B1; destruct (proj_bytes l2) as [b2|] eqn:B2.
-- exploit length_proj_bytes. eexact B1. rewrite H; intro L1.
+  destruct (proj_bytes l1) as [b1|] eqn:B1; destruct (proj_bytes l2) as [b2|] eqn:B2; auto.
+  exploit length_proj_bytes. eexact B1. rewrite H; intro L1.
   exploit length_proj_bytes. eexact B2. rewrite H0; intro L2.
   assert (UR: forall l, length l = 4%nat -> Int.unsigned (Int.repr (int_of_bytes l)) = int_of_bytes l).
     intros. apply Int.unsigned_repr. 
     generalize (int_of_bytes_range l). rewrite H1. 
     change (two_p (Z.of_nat 4 * 8)) with (Int.max_unsigned + 1). 
     omega.
+  apply Val.lessdef_same.
   unfold decode_int, rev_if_be. destruct Archi.big_endian; rewrite B1; rewrite B2.
   + rewrite <- (rev_length b1) in L1. 
     rewrite <- (rev_length b2) in L2.
@@ -938,9 +992,6 @@ Proof.
   + unfold Val.longofwords. f_equal. rewrite Int64.ofwords_add. f_equal.
     rewrite !UR by auto. rewrite int_of_bytes_append. 
     rewrite L1. change (Z.of_nat 4 * 8) with 32. ring.
-- destruct Archi.big_endian; rewrite B1; rewrite B2; auto.
-- destruct Archi.big_endian; rewrite B1; rewrite B2; auto.
-- destruct Archi.big_endian; rewrite B1; rewrite B2; auto.
 Qed.
 
 Lemma bytes_of_int_append:
diff --git a/common/Memory.v b/common/Memory.v
index 9afdfd39..e45df664 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -690,7 +690,6 @@ Theorem load_cast:
   | Mint8unsigned => v = Val.zero_ext 8 v
   | Mint16signed => v = Val.sign_ext 16 v
   | Mint16unsigned => v = Val.zero_ext 16 v
-  | Mfloat32 => v = Val.singleoffloat v
   | _ => True
   end.
 Proof.
@@ -727,24 +726,6 @@ Proof.
   rewrite pred_dec_false; auto.
 Qed.
 
-(*
-Theorem load_float64al32:
-  forall m b ofs v,
-  load Mfloat64 m b ofs = Some v -> load Mfloat64al32 m b ofs = Some v.
-Proof.
-  unfold load; intros. destruct (valid_access_dec m Mfloat64 b ofs Readable); try discriminate.
-  rewrite pred_dec_true. assumption. 
-  apply valid_access_compat with Mfloat64; auto. simpl; omega. 
-Qed.
-
-Theorem loadv_float64al32:
-  forall m a v,
-  loadv Mfloat64 m a = Some v -> loadv Mfloat64al32 m a = Some v.
-Proof.
-  unfold loadv; intros. destruct a; auto. apply load_float64al32; auto.
-Qed.
-*)
-
 (** ** Properties related to [loadbytes] *)
 
 Theorem range_perm_loadbytes:
@@ -896,7 +877,7 @@ Theorem load_int64_split:
   exists v1 v2,
      load Mint32 m b ofs = Some (if Archi.big_endian then v1 else v2)
   /\ load Mint32 m b (ofs + 4) = Some (if Archi.big_endian then v2 else v1)
-  /\ v = Val.longofwords v1 v2.
+  /\ Val.lessdef v (Val.longofwords v1 v2).
 Proof.
   intros. 
   exploit load_valid_access; eauto. intros [A B]. simpl in *.
@@ -927,7 +908,7 @@ Theorem loadv_int64_split:
   exists v1 v2,
      loadv Mint32 m a = Some (if Archi.big_endian then v1 else v2)
   /\ loadv  Mint32 m (Val.add a (Vint (Int.repr 4))) = Some (if Archi.big_endian then v2 else v1)
-  /\ v = Val.longofwords v1 v2.
+  /\ Val.lessdef v (Val.longofwords v1 v2).
 Proof.
   intros. destruct a; simpl in H; try discriminate.
   exploit load_int64_split; eauto. intros (v1 & v2 & L1 & L2 & EQ).
@@ -1138,18 +1119,17 @@ Proof.
   red; intro; elim n0; red; intros; eauto with mem.
 Qed.
 
-Lemma setN_property:
-  forall (P: memval -> Prop) vl p q c,
-  (forall v, In v vl -> P v) ->
+Lemma setN_in:
+  forall vl p q c,
   p <= q < p + Z_of_nat (length vl) ->
-  P(ZMap.get q (setN vl p c)).
+  In (ZMap.get q (setN vl p c)) vl.
 Proof.
   induction vl; intros.
-  simpl in H0. omegaContradiction.
-  simpl length in H0. rewrite inj_S in H0. simpl. 
+  simpl in H. omegaContradiction.
+  simpl length in H. rewrite inj_S in H. simpl. 
   destruct (zeq p q). subst q. rewrite setN_outside. rewrite ZMap.gss. 
   auto with coqlib. omega.
-  apply IHvl. auto with coqlib. omega.
+  right. apply IHvl. omega.
 Qed.
 
 Lemma getN_in:
@@ -1164,84 +1144,114 @@ Proof.
   right. apply IHn. omega. 
 Qed.
 
+End STORE.
+
+Local Hint Resolve perm_store_1 perm_store_2: mem.
+Local Hint Resolve store_valid_block_1 store_valid_block_2: mem.
+Local Hint Resolve store_valid_access_1 store_valid_access_2
+             store_valid_access_3: mem.
+
+Lemma load_store_overlap:
+  forall chunk m1 b ofs v m2 chunk' ofs' v',
+  store chunk m1 b ofs v = Some m2 ->
+  load chunk' m2 b ofs' = Some v' ->
+  ofs' + size_chunk chunk' > ofs ->
+  ofs + size_chunk chunk > ofs' ->
+  exists mv1 mvl mv1' mvl',
+      shape_encoding chunk v (mv1 :: mvl)
+  /\  shape_decoding chunk' (mv1' :: mvl') v'
+  /\  (   (ofs' = ofs /\ mv1' = mv1)
+       \/ (ofs' > ofs /\ In mv1' mvl)
+       \/ (ofs' < ofs /\ In mv1 mvl')).
+Proof.
+  intros. 
+  exploit load_result; eauto. erewrite store_mem_contents by eauto; simpl.
+  rewrite PMap.gss. 
+  set (c := (mem_contents m1)#b). intros V'. 
+  destruct (size_chunk_nat_pos chunk) as [sz SIZE]. 
+  destruct (size_chunk_nat_pos chunk') as [sz' SIZE'].
+  destruct (encode_val chunk v) as [ | mv1 mvl] eqn:ENC.
+  generalize (encode_val_length chunk v); rewrite ENC; simpl; congruence.
+  set (c' := setN (mv1::mvl) ofs c) in *.
+  exists mv1, mvl, (ZMap.get ofs' c'), (getN sz' (ofs' + 1) c').
+  split. rewrite <- ENC. apply encode_val_shape.
+  split. rewrite V', SIZE'. apply decode_val_shape. 
+  destruct (zeq ofs' ofs).
+- subst ofs'. left; split. auto. unfold c'. simpl. 
+  rewrite setN_outside by omega. apply ZMap.gss.
+- right. destruct (zlt ofs ofs').
+(* If ofs < ofs':  the load reads (at ofs') a continuation byte from the write.
+       ofs   ofs'   ofs+|chunk|
+        [-------------------]       write
+             [-------------------]  read
+*)
++ left; split. omega. unfold c'. simpl. apply setN_in. 
+  assert (Z.of_nat (length (mv1 :: mvl)) = size_chunk chunk).
+  { rewrite <- ENC; rewrite encode_val_length. rewrite size_chunk_conv; auto. }
+  simpl length in H3. rewrite inj_S in H3. omega.
+(* If ofs > ofs':  the load reads (at ofs) the first byte from the write.
+       ofs'   ofs   ofs'+|chunk'|
+               [-------------------]  write
+         [----------------]           read
+*)
++ right; split. omega. replace mv1 with (ZMap.get ofs c'). 
+  apply getN_in. 
+  assert (size_chunk chunk' = Zsucc (Z.of_nat sz')).
+  { rewrite size_chunk_conv. rewrite SIZE'. rewrite inj_S; auto. }
+  omega.
+  unfold c'. simpl. rewrite setN_outside by omega. apply ZMap.gss.
+Qed.
+
+Definition compat_pointer_chunks (chunk1 chunk2: memory_chunk) : Prop :=
+  match chunk1, chunk2 with
+  | (Mint32 | Many32), (Mint32 | Many32) => True
+  | Many64, Many64 => True
+  | _, _ => False
+  end.
+
+Lemma compat_pointer_chunks_true:
+  forall chunk1 chunk2,
+  (chunk1 = Mint32 \/ chunk1 = Many32 \/ chunk1 = Many64) ->
+  (chunk2 = Mint32 \/ chunk2 = Many32 \/ chunk2 = Many64) ->
+  quantity_chunk chunk1 = quantity_chunk chunk2 ->
+  compat_pointer_chunks chunk1 chunk2.
+Proof.
+  intros. destruct H as [P|[P|P]]; destruct H0 as [Q|[Q|Q]];
+  subst; red; auto; discriminate.
+Qed.
+
 Theorem load_pointer_store:
-  forall chunk' b' ofs' v_b v_o,
+  forall chunk m1 b ofs v m2 chunk' b' ofs' v_b v_o,
+  store chunk m1 b ofs v = Some m2 ->
   load chunk' m2 b' ofs' = Some(Vptr v_b v_o) ->
-  (chunk = Mint32 /\ v = Vptr v_b v_o /\ chunk' = Mint32 /\ b' = b /\ ofs' = ofs)
+  (v = Vptr v_b v_o /\ compat_pointer_chunks chunk chunk' /\ b' = b /\ ofs' = ofs)
   \/ (b' <> b \/ ofs' + size_chunk chunk' <= ofs \/ ofs + size_chunk chunk <= ofs').
 Proof.
-  intros. exploit load_result; eauto. rewrite store_mem_contents; simpl. 
-  rewrite PMap.gsspec. destruct (peq b' b); auto. subst b'. intro DEC.
+  intros.
+  destruct (peq b' b); auto. subst b'.
   destruct (zle (ofs' + size_chunk chunk') ofs); auto.
   destruct (zle (ofs + size_chunk chunk) ofs'); auto.
-  destruct (size_chunk_nat_pos chunk) as [sz SZ].
-  destruct (size_chunk_nat_pos chunk') as [sz' SZ'].
-  exploit decode_pointer_shape; eauto. intros [CHUNK' PSHAPE]. clear CHUNK'.
-  generalize (encode_val_shape chunk v). intro VSHAPE.  
-  set (c := m1.(mem_contents)#b) in *.
-  set (c' := setN (encode_val chunk v) ofs c) in *.
-  destruct (zeq ofs ofs').
-
-(* 1.  ofs = ofs':  must be same chunks and same value *)
-  subst ofs'. inv VSHAPE. 
-  exploit decode_val_pointer_inv; eauto. intros [A B].
-  subst chunk'. simpl in B. inv B.
-  generalize H4. unfold c'. rewrite <- H0. simpl. 
-  rewrite setN_outside; try omega. rewrite ZMap.gss. intros.
-  exploit (encode_val_pointer_inv chunk v v_b v_o). 
-  rewrite <- H0. subst mv1. eauto. intros [C [D E]].
-  left; auto.
-
-  destruct (zlt ofs ofs').
-
-(* 2. ofs < ofs':
-
-      ofs   ofs'   ofs+|chunk|
-       [-------------------]       write
-            [-------------------]  read
-
-   The byte at ofs' satisfies memval_valid_cont (consequence of write).
-   For the read to return a pointer, it must satisfy ~memval_valid_cont. 
-*)
-  elimtype False.
-  assert (~memval_valid_cont (ZMap.get ofs' c')).
-    rewrite SZ' in PSHAPE. simpl in PSHAPE. inv PSHAPE. auto.
-  assert (memval_valid_cont (ZMap.get ofs' c')).
-    inv VSHAPE. unfold c'. rewrite <- H1. simpl. 
-    apply setN_property. auto.
-    assert (length mvl = sz). 
-      generalize (encode_val_length chunk v). rewrite <- H1. rewrite SZ. 
-      simpl; congruence.
-    rewrite H4. rewrite size_chunk_conv in *. omega.
-  contradiction.
-
-(* 3. ofs > ofs':
-
-      ofs'   ofs   ofs'+|chunk'|
-              [-------------------]  write
-        [----------------]           read
-
-   The byte at ofs satisfies memval_valid_first (consequence of write).
-   For the read to return a pointer, it must satisfy ~memval_valid_first.
-*)
-  elimtype False.
-  assert (memval_valid_first (ZMap.get ofs c')).
-    inv VSHAPE. unfold c'. rewrite <- H0. simpl. 
-    rewrite setN_outside. rewrite ZMap.gss. auto. omega.
-  assert (~memval_valid_first (ZMap.get ofs c')).
-    rewrite SZ' in PSHAPE. simpl in PSHAPE. inv PSHAPE. 
-    apply H4. apply getN_in. rewrite size_chunk_conv in *.
-    rewrite SZ' in *. rewrite inj_S in *. omega.
-  contradiction.
+  exploit load_store_overlap; eauto. 
+  intros (mv1 & mvl & mv1' & mvl' & ENC & DEC & CASES).
+  inv DEC; try contradiction.
+  destruct CASES as [(A & B) | [(A & B) | (A & B)]].
+- (* Same offset *)
+  subst. inv ENC. 
+  assert (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64)
+  by (destruct chunk; auto || contradiction). 
+  left; split. rewrite H3.
+  destruct H4 as [P|[P|P]]; subst chunk'; destruct v0; simpl in H3; congruence.
+  split. apply compat_pointer_chunks_true; auto.
+  auto.
+- (* ofs' > ofs *)
+  inv ENC. 
+  + exploit H10; eauto. intros (j & P & Q). inv P. congruence.
+  + exploit H8; eauto. intros (n & P); congruence.
+  + exploit H2; eauto. congruence.
+- (* ofs' < ofs *)
+  exploit H7; eauto. intros (j & P & Q). subst mv1. inv ENC. congruence.
 Qed.
 
-End STORE.
-
-Local Hint Resolve perm_store_1 perm_store_2: mem.
-Local Hint Resolve store_valid_block_1 store_valid_block_2: mem.
-Local Hint Resolve store_valid_access_1 store_valid_access_2
-             store_valid_access_3: mem.
-
 Theorem load_store_pointer_overlap:
   forall chunk m1 b ofs v_b v_o m2 chunk' ofs' v,
   store chunk m1 b ofs (Vptr v_b v_o) = Some m2 ->
@@ -1251,102 +1261,37 @@ Theorem load_store_pointer_overlap:
   ofs + size_chunk chunk > ofs' ->
   v = Vundef.
 Proof.
-  intros.
-  exploit store_mem_contents; eauto. intro ST.
-  exploit load_result; eauto. intro LD.
-  rewrite LD; clear LD.
-Opaque encode_val.
-  rewrite ST; simpl.
-  rewrite PMap.gss.
-  set (c := m1.(mem_contents)#b).
-  set (c' := setN (encode_val chunk (Vptr v_b v_o)) ofs c).
-  destruct (decode_val_shape chunk' (getN (size_chunk_nat chunk') ofs' c'))
-  as [OK | VSHAPE].
-  apply getN_length. 
-  exact OK.
-  elimtype False.
-  destruct (size_chunk_nat_pos chunk) as [sz SZ]. 
-  destruct (size_chunk_nat_pos chunk') as [sz' SZ']. 
-  assert (ENC: encode_val chunk (Vptr v_b v_o) = list_repeat (size_chunk_nat chunk) Undef
-               \/ pointer_encoding_shape (encode_val chunk (Vptr v_b v_o))).
-  destruct chunk; try (left; reflexivity). 
-  right. apply encode_pointer_shape. 
-  assert (GET: getN (size_chunk_nat chunk) ofs c' = encode_val chunk (Vptr v_b v_o)).
-  unfold c'. rewrite <- (encode_val_length chunk (Vptr v_b v_o)). 
-  apply getN_setN_same.
-  destruct (zlt ofs ofs').
-
-(* ofs < ofs':
-
-      ofs   ofs'   ofs+|chunk|
-       [-------------------]       write
-            [-------------------]  read
-
-   The byte at ofs' is Undef or not memval_valid_first (because write of pointer).
-   The byte at ofs' must be memval_valid_first and not Undef (otherwise load returns Vundef).
-*)
-  assert (memval_valid_first (ZMap.get ofs' c') /\ ZMap.get ofs' c' <> Undef).
-    rewrite SZ' in VSHAPE. simpl in VSHAPE. inv VSHAPE. auto.
-  assert (~memval_valid_first (ZMap.get ofs' c') \/ ZMap.get ofs' c' = Undef).
-    unfold c'. destruct ENC.
-    right. apply setN_property. rewrite H5. intros. eapply in_list_repeat; eauto.
-    rewrite encode_val_length. rewrite <- size_chunk_conv. omega.
-    left. revert H5. rewrite <- GET. rewrite SZ. simpl. intros. inv H5.
-    apply setN_property. apply H9. rewrite getN_length.
-    rewrite size_chunk_conv in H3. rewrite SZ in H3. rewrite inj_S in H3. omega. 
-  intuition. 
-
-(* ofs > ofs':
-
-      ofs'   ofs   ofs'+|chunk'|
-              [-------------------]  write
-        [----------------]           read
-
-   The byte at ofs is Undef or not memval_valid_cont (because write of pointer).
-   The byte at ofs must be memval_valid_cont and not Undef (otherwise load returns Vundef).
-*)
-  assert (memval_valid_cont (ZMap.get ofs c') /\ ZMap.get ofs c' <> Undef).
-    rewrite SZ' in VSHAPE. simpl in VSHAPE. inv VSHAPE. 
-    apply H8. apply getN_in. rewrite size_chunk_conv in H2. 
-    rewrite SZ' in H2. rewrite inj_S in H2. omega. 
-  assert (~memval_valid_cont (ZMap.get ofs c') \/ ZMap.get ofs c' = Undef).
-    elim ENC. 
-    rewrite <- GET. rewrite SZ. simpl. intros. right; congruence.
-    rewrite <- GET. rewrite SZ. simpl. intros. inv H5. auto.
-  intuition.
+  intros. 
+  exploit load_store_overlap; eauto. 
+  intros (mv1 & mvl & mv1' & mvl' & ENC & DEC & CASES).
+  destruct CASES as [(A & B) | [(A & B) | (A & B)]].
+- congruence.
+- inv ENC. 
+  + exploit H9; eauto. intros (j & P & Q). subst mv1'. inv DEC. congruence. auto.
+  + contradiction.
+  + exploit H5; eauto. intros; subst. inv DEC; auto.
+- inv DEC. 
+  + exploit H10; eauto. intros (j & P & Q). subst mv1. inv ENC. congruence. 
+  + exploit H8; eauto. intros (n & P). subst mv1. inv ENC. contradiction. 
+  + auto.
 Qed.
 
 Theorem load_store_pointer_mismatch:
   forall chunk m1 b ofs v_b v_o m2 chunk' v,
   store chunk m1 b ofs (Vptr v_b v_o) = Some m2 ->
   load chunk' m2 b ofs = Some v ->
-  chunk <> Mint32 \/ chunk' <> Mint32 ->
+  ~compat_pointer_chunks chunk chunk' ->
   v = Vundef.
 Proof.
   intros.
-  exploit store_mem_contents; eauto. intro ST.
-  exploit load_result; eauto. intro LD.
-  rewrite LD; clear LD.
-Opaque encode_val.
-  rewrite ST; simpl.
-  rewrite PMap.gss. 
-  set (c1 := m1.(mem_contents)#b).
-  set (e := encode_val chunk (Vptr v_b v_o)).
-  destruct (size_chunk_nat_pos chunk) as [sz SZ].
-  destruct (size_chunk_nat_pos chunk') as [sz' SZ'].
-  assert (match e with
-          | Undef :: _ => True
-          | Pointer _ _ _ :: _ => chunk = Mint32
-          | _ => False
-          end).
-Transparent encode_val.
-  unfold e, encode_val. rewrite SZ. destruct chunk; simpl; auto.
-  destruct e as [ | e1 el]. contradiction.
-  rewrite SZ'. simpl. rewrite setN_outside. rewrite ZMap.gss. 
-  destruct e1; try contradiction. 
-  destruct chunk'; auto. 
-  destruct chunk'; auto. intuition.
-  omega.
+  exploit load_store_overlap; eauto. 
+  generalize (size_chunk_pos chunk'); omega.
+  generalize (size_chunk_pos chunk); omega.
+  intros (mv1 & mvl & mv1' & mvl' & ENC & DEC & CASES).
+  destruct CASES as [(A & B) | [(A & B) | (A & B)]]; try omegaContradiction.
+  inv ENC; inv DEC; auto.
+- elim H1. apply compat_pointer_chunks_true; auto.
+- contradiction.
 Qed.
 
 Lemma store_similar_chunks:
@@ -1403,16 +1348,6 @@ Theorem store_int16_sign_ext:
   store Mint16signed m b ofs (Vint n).
 Proof. intros. apply store_similar_chunks. apply encode_val_int16_sign_ext. auto. Qed.
 
-Theorem store_float32_truncate:
-  forall m b ofs n,
-  store Mfloat32 m b ofs (Vfloat (Float.singleoffloat n)) =
-  store Mfloat32 m b ofs (Vfloat n).
-Proof.
-  intros. apply store_similar_chunks. simpl. decEq.
-  repeat rewrite encode_float32_eq. rewrite Float.bits_of_singleoffloat. auto.
-  auto.
-Qed.
-
 (*
 Theorem store_float64al32:
   forall m b ofs v m',
diff --git a/common/Memtype.v b/common/Memtype.v
index 37ab33c2..1ab1cb73 100644
--- a/common/Memtype.v
+++ b/common/Memtype.v
@@ -310,7 +310,6 @@ Axiom load_cast:
   | Mint8unsigned => v = Val.zero_ext 8 v
   | Mint16signed => v = Val.sign_ext 16 v
   | Mint16unsigned => v = Val.zero_ext 16 v
-  | Mfloat32 => v = Val.singleoffloat v
   | _ => True
   end.
 
@@ -443,6 +442,13 @@ Axiom load_store_other:
 
 (** Integrity of pointer values. *)
 
+Definition compat_pointer_chunks (chunk1 chunk2: memory_chunk) : Prop :=
+  match chunk1, chunk2 with
+  | (Mint32 | Many32), (Mint32 | Many32) => True
+  | Many64, Many64 => True
+  | _, _ => False
+  end.
+
 Axiom load_store_pointer_overlap:
   forall chunk m1 b ofs v_b v_o m2 chunk' ofs' v,
   store chunk m1 b ofs (Vptr v_b v_o) = Some m2 ->
@@ -455,13 +461,13 @@ Axiom load_store_pointer_mismatch:
   forall chunk m1 b ofs v_b v_o m2 chunk' v,
   store chunk m1 b ofs (Vptr v_b v_o) = Some m2 ->
   load chunk' m2 b ofs = Some v ->
-  chunk <> Mint32 \/ chunk' <> Mint32 ->
+  ~compat_pointer_chunks chunk chunk' ->
   v = Vundef.
 Axiom load_pointer_store:
-  forall chunk m1 b ofs v m2, store chunk m1 b ofs v = Some m2 ->
-  forall chunk' b' ofs' v_b v_o,
+  forall chunk m1 b ofs v m2 chunk' b' ofs' v_b v_o,
+  store chunk m1 b ofs v = Some m2 ->
   load chunk' m2 b' ofs' = Some(Vptr v_b v_o) ->
-  (chunk = Mint32 /\ v = Vptr v_b v_o /\ chunk' = Mint32 /\ b' = b /\ ofs' = ofs)
+  (v = Vptr v_b v_o /\ compat_pointer_chunks chunk chunk' /\ b' = b /\ ofs' = ofs)
   \/ (b' <> b \/ ofs' + size_chunk chunk' <= ofs \/ ofs + size_chunk chunk <= ofs').
 
 (** Load-store properties for [loadbytes]. *)
@@ -500,10 +506,6 @@ Axiom store_int16_sign_ext:
   forall m b ofs n,
   store Mint16signed m b ofs (Vint (Int.sign_ext 16 n)) =
   store Mint16signed m b ofs (Vint n).
-Axiom store_float32_truncate:
-  forall m b ofs n,
-  store Mfloat32 m b ofs (Vfloat (Float.singleoffloat n)) =
-  store Mfloat32 m b ofs (Vfloat n).
 
 (** ** Properties of [storebytes]. *)
 
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index 329c0c79..c0eab04f 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -21,6 +21,8 @@ let name_of_type = function
   | Tfloat -> "float"
   | Tlong -> "long"
   | Tsingle -> "single"
+  | Tany32 -> "any32"
+  | Tany64 -> "any64"
 
 let name_of_chunk = function
   | Mint8signed -> "int8s"
@@ -31,6 +33,8 @@ let name_of_chunk = function
   | Mint64 -> "int64"
   | Mfloat32 -> "float32"
   | Mfloat64 -> "float64"
+  | Many32 -> "any32"
+  | Many64 -> "any64"
 
 let name_of_external = function
   | EF_external(name, sg) -> sprintf "extern %S" (extern_atom name)
diff --git a/common/Values.v b/common/Values.v
index 5ac9f3ee..a12fb636 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -38,6 +38,7 @@ Inductive val: Type :=
   | Vint: int -> val
   | Vlong: int64 -> val
   | Vfloat: float -> val
+  | Vsingle: float32 -> val
   | Vptr: block -> int -> val.
 
 Definition Vzero: val := Vint Int.zero.
@@ -55,14 +56,28 @@ Definition Vfalse: val := Vint Int.zero.
 
 Module Val.
 
+Definition eq (x y: val): {x=y} + {x<>y}.
+Proof.
+  decide equality. 
+  apply Int.eq_dec.
+  apply Int64.eq_dec.
+  apply Float.eq_dec.
+  apply Float32.eq_dec.
+  apply Int.eq_dec.
+  apply eq_block.
+Defined.
+Global Opaque eq.
+
 Definition has_type (v: val) (t: typ) : Prop :=
   match v, t with
   | Vundef, _ => True
   | Vint _, Tint => True
   | Vlong _, Tlong => True
   | Vfloat _, Tfloat => True
-  | Vfloat f, Tsingle => Float.is_single f
+  | Vsingle _, Tsingle => True
   | Vptr _ _, Tint => True
+  | (Vint _ | Vptr _ _ | Vsingle _), Tany32 => True
+  | _, Tany64 => True
   | _, _ => False
   end.
 
@@ -83,8 +98,8 @@ Lemma has_subtype:
   forall ty1 ty2 v,
   subtype ty1 ty2 = true -> has_type v ty1 -> has_type v ty2.
 Proof.
-  intros. destruct ty1; destruct ty2; simpl in H; discriminate || assumption || idtac.
-  unfold has_type in *. destruct v; auto. 
+  intros. destruct ty1; destruct ty2; simpl in H; discriminate || assumption || idtac;
+  unfold has_type in *; destruct v; auto.
 Qed.
 
 Lemma has_subtype_list:
@@ -126,30 +141,66 @@ Definition absf (v: val) : val :=
   | _ => Vundef
   end.
 
+Definition negfs (v: val) : val :=
+  match v with
+  | Vsingle f => Vsingle (Float32.neg f)
+  | _ => Vundef
+  end.
+
+Definition absfs (v: val) : val :=
+  match v with
+  | Vsingle f => Vsingle (Float32.abs f)
+  | _ => Vundef
+  end.
+
 Definition maketotal (ov: option val) : val :=
   match ov with Some v => v | None => Vundef end.
 
 Definition intoffloat (v: val) : option val :=
   match v with
-  | Vfloat f => option_map Vint (Float.intoffloat f)
+  | Vfloat f => option_map Vint (Float.to_int f)
   | _ => None
   end.
 
 Definition intuoffloat (v: val) : option val :=
   match v with
-  | Vfloat f => option_map Vint (Float.intuoffloat f)
+  | Vfloat f => option_map Vint (Float.to_intu f)
   | _ => None
   end.
 
 Definition floatofint (v: val) : option val :=
   match v with
-  | Vint n => Some (Vfloat (Float.floatofint n))
+  | Vint n => Some (Vfloat (Float.of_int n))
   | _ => None
   end.
 
 Definition floatofintu (v: val) : option val :=
   match v with
-  | Vint n => Some (Vfloat (Float.floatofintu n))
+  | Vint n => Some (Vfloat (Float.of_intu n))
+  | _ => None
+  end.
+
+Definition intofsingle (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vint (Float32.to_int f)
+  | _ => None
+  end.
+
+Definition intuofsingle (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vint (Float32.to_intu f)
+  | _ => None
+  end.
+
+Definition singleofint (v: val) : option val :=
+  match v with
+  | Vint n => Some (Vsingle (Float32.of_int n))
+  | _ => None
+  end.
+
+Definition singleofintu (v: val) : option val :=
+  match v with
+  | Vint n => Some (Vsingle (Float32.of_intu n))
   | _ => None
   end.
 
@@ -195,7 +246,13 @@ Definition sign_ext (nbits: Z) (v: val) : val :=
 
 Definition singleoffloat (v: val) : val :=
   match v with
-  | Vfloat f => Vfloat(Float.singleoffloat f)
+  | Vfloat f => Vsingle (Float.to_single f)
+  | _ => Vundef
+  end.
+
+Definition floatofsingle (v: val) : val :=
+  match v with
+  | Vsingle f => Vfloat (Float.of_single f)
   | _ => Vundef
   end.
 
@@ -394,6 +451,30 @@ Definition floatofwords (v1 v2: val) : val :=
   | _, _ => Vundef
   end.
 
+Definition addfs (v1 v2: val): val :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 => Vsingle(Float32.add f1 f2)
+  | _, _ => Vundef
+  end.
+
+Definition subfs (v1 v2: val): val :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 => Vsingle(Float32.sub f1 f2)
+  | _, _ => Vundef
+  end.
+
+Definition mulfs (v1 v2: val): val :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 => Vsingle(Float32.mul f1 f2)
+  | _, _ => Vundef
+  end.
+
+Definition divfs (v1 v2: val): val :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 => Vsingle(Float32.div f1 f2)
+  | _, _ => Vundef
+  end.
+
 (** Operations on 64-bit integers *)
 
 Definition longofwords (v1 v2: val) : val :=
@@ -440,37 +521,49 @@ Definition longofintu (v: val) : val :=
 
 Definition longoffloat (v: val) : option val :=
   match v with
-  | Vfloat f => option_map Vlong (Float.longoffloat f)
+  | Vfloat f => option_map Vlong (Float.to_long f)
   | _ => None
   end.
 
 Definition longuoffloat (v: val) : option val :=
   match v with
-  | Vfloat f => option_map Vlong (Float.longuoffloat f)
+  | Vfloat f => option_map Vlong (Float.to_longu f)
+  | _ => None
+  end.
+
+Definition longofsingle (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vlong (Float32.to_long f)
+  | _ => None
+  end.
+
+Definition longuofsingle (v: val) : option val :=
+  match v with
+  | Vsingle f => option_map Vlong (Float32.to_longu f)
   | _ => None
   end.
 
 Definition floatoflong (v: val) : option val :=
   match v with
-  | Vlong n => Some (Vfloat (Float.floatoflong n))
+  | Vlong n => Some (Vfloat (Float.of_long n))
   | _ => None
   end.
 
 Definition floatoflongu (v: val) : option val :=
   match v with
-  | Vlong n => Some (Vfloat (Float.floatoflongu n))
+  | Vlong n => Some (Vfloat (Float.of_longu n))
   | _ => None
   end.
 
 Definition singleoflong (v: val) : option val :=
   match v with
-  | Vlong n => Some (Vfloat (Float.singleoflong n))
+  | Vlong n => Some (Vsingle (Float32.of_long n))
   | _ => None
   end.
 
 Definition singleoflongu (v: val) : option val :=
   match v with
-  | Vlong n => Some (Vfloat (Float.singleoflongu n))
+  | Vlong n => Some (Vsingle (Float32.of_longu n))
   | _ => None
   end.
 
@@ -625,6 +718,12 @@ Definition cmpf_bool (c: comparison) (v1 v2: val): option bool :=
   | _, _ => None
   end.
 
+Definition cmpfs_bool (c: comparison) (v1 v2: val): option bool :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 => Some (Float32.cmp c f1 f2)
+  | _, _ => None
+  end.
+
 Definition cmpl_bool (c: comparison) (v1 v2: val): option bool :=
   match v1, v2 with
   | Vlong n1, Vlong n2 => Some (Int64.cmp c n1 n2)
@@ -649,6 +748,9 @@ Definition cmpu (c: comparison) (v1 v2: val): val :=
 Definition cmpf (c: comparison) (v1 v2: val): val :=
   of_optbool (cmpf_bool c v1 v2).
 
+Definition cmpfs (c: comparison) (v1 v2: val): val :=
+  of_optbool (cmpfs_bool c v1 v2).
+
 Definition cmpl (c: comparison) (v1 v2: val): option val :=
   option_map of_bool (cmpl_bool c v1 v2).
 
@@ -681,22 +783,23 @@ Definition load_result (chunk: memory_chunk) (v: val) :=
   | Mint32, Vint n => Vint n
   | Mint32, Vptr b ofs => Vptr b ofs
   | Mint64, Vlong n => Vlong n
-  | Mfloat32, Vfloat f => Vfloat(Float.singleoffloat f)
+  | Mfloat32, Vsingle f => Vsingle f
   | Mfloat64, Vfloat f => Vfloat f
+  | Many32, (Vint _ | Vptr _ _ | Vsingle _) => v
+  | Many64, _ => v
   | _, _ => Vundef
   end.
 
 Lemma load_result_type:
   forall chunk v, has_type (load_result chunk v) (type_of_chunk chunk).
 Proof.
-  intros. destruct chunk; destruct v; simpl; auto. apply Float.singleoffloat_is_single.
+  intros. destruct chunk; destruct v; simpl; auto.
 Qed.
 
 Lemma load_result_same:
   forall v ty, has_type v ty -> load_result (chunk_of_type ty) v = v.
 Proof.
   unfold has_type; intros. destruct v; destruct ty; try contradiction; auto.
-  simpl. rewrite Float.singleoffloat_of_single; auto. 
 Qed.
 
 (** Theorems on arithmetic operations. *)
@@ -1388,6 +1491,8 @@ Inductive val_inject (mi: meminj): val -> val -> Prop :=
       forall i, val_inject mi (Vlong i) (Vlong i)
   | val_inject_float:
       forall f, val_inject mi (Vfloat f) (Vfloat f)
+  | val_inject_single:
+      forall f, val_inject mi (Vsingle f) (Vsingle f)
   | val_inject_ptr:
       forall b1 ofs1 b2 ofs2 delta,
       mi b1 = Some (b2, delta) ->
diff --git a/driver/Interp.ml b/driver/Interp.ml
index af657890..db4537cb 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -46,7 +46,7 @@ let print_id_ofs p (id, ofs) =
 let print_eventval p = function
   | EVint n -> fprintf p "%ld" (camlint_of_coqint n)
   | EVfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f)
-  | EVfloatsingle f -> fprintf p "%F" (camlfloat_of_coqfloat f)
+  | EVsingle f -> fprintf p "%F" (camlfloat_of_coqfloat32 f)
   | EVlong n -> fprintf p "%LdLL" (camlint64_of_coqint n)
   | EVptr_global(id, ofs) -> fprintf p "&%a" print_id_ofs (id, ofs)
 
diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml
index 409abcfa..02f1249c 100644
--- a/exportclight/ExportClight.ml
+++ b/exportclight/ExportClight.ml
@@ -121,10 +121,10 @@ let coqint p n =
   else fprintf p "(Int.repr (%ld))" n
 
 let coqfloat p n =
-  let n = camlint64_of_coqint(Floats.Float.bits_of_double n) in
+  let n = camlint64_of_coqint(Floats.Float.to_bits n) in
   if n >= 0L
-  then fprintf p "(Float.double_of_bits (Int64.repr %Ld))" n
-  else fprintf p "(Float.double_of_bits (Int64.repr (%Ld)))" n
+  then fprintf p "(Float.of_bits (Int64.repr %Ld))" n
+  else fprintf p "(Float.of_bits (Int64.repr (%Ld)))" n
 
 let coqint64 p n =
   let n = camlint64_of_coqint n in
diff --git a/flocq/Appli/Fappli_IEEE_bits.v b/flocq/Appli/Fappli_IEEE_bits.v
index a41fba98..937e8d43 100644
--- a/flocq/Appli/Fappli_IEEE_bits.v
+++ b/flocq/Appli/Fappli_IEEE_bits.v
@@ -248,6 +248,57 @@ discriminate.
 now apply Zlt_0_le_0_pred.
 Qed.
 
+Theorem bits_of_binary_float_range:
+  forall x, (0 <= bits_of_binary_float x < 2^(mw+ew+1))%Z.
+Proof.
+  intros. 
+Local Open Scope Z_scope.
+  assert (J: forall s m e,
+          0 <= m < 2^mw -> 0 <= e < 2^ew ->
+          0 <= join_bits s m e < 2^(mw+ew+1)).
+  {
+    intros. unfold join_bits. 
+    set (se := (if s then 2 ^ ew else 0) + e).
+    assert (0 <= se < 2^(ew+1)).
+    { rewrite (Zpower_plus radix2) by omega. change (radix2^1) with 2. simpl.
+      unfold se. destruct s; omega. }
+    assert (0 <= se * 2^mw <= (2^(ew+1) - 1) * 2^mw).
+    { split. apply Zmult_gt_0_le_0_compat; omega.
+      apply Zmult_le_compat_r; omega. }
+    rewrite Z.mul_sub_distr_r in H2.
+    replace (mw + ew + 1) with ((ew + 1) + mw) by omega. 
+    rewrite (Zpower_plus radix2) by omega. simpl. omega.
+  }
+  assert (D: forall p n, Z.of_nat (S (digits2_Pnat p)) <= n ->
+             0 <= Z.pos p < 2^n).
+  {
+    intros.  
+    generalize (digits2_Pnat_correct p). simpl. rewrite ! Zpower_nat_Z. intros [A B].
+    split. zify; omega. eapply Zlt_le_trans. eassumption. 
+    apply (Zpower_le radix2); auto.
+  } 
+  destruct x; unfold bits_of_binary_float.
+- apply J; omega.
+- apply J; omega. 
+- destruct n as [pl pl_range]. apply Z.ltb_lt in pl_range.
+  apply J. apply D. unfold prec, Z_of_nat' in pl_range; omega. omega.
+- unfold bounded in e0. apply Bool.andb_true_iff in e0; destruct e0 as [A B].
+  apply Z.leb_le in B.
+  unfold canonic_mantissa, FLT_exp in A. apply Zeq_bool_eq in A. 
+  assert (G: Z.of_nat (S (digits2_Pnat m)) <= prec) by (zify; omega).
+  assert (M: emin <= e) by (unfold emin; zify; omega). 
+  generalize (Zle_bool_spec (2^mw) (Z.pos m)); intro SPEC; inversion SPEC.
+  + apply J. 
+    * split. omega. generalize (D _ _ G). unfold prec. 
+      rewrite (Zpower_plus radix2) by omega.
+      change (radix2^1) with 2. simpl radix_val. omega.
+    * split. omega. unfold emin. replace (2^ew) with (2 * emax). omega.
+      symmetry. replace ew with (1 + (ew - 1)) by omega. 
+      apply (Zpower_plus radix2); omega.
+  + apply J. zify; omega. omega. 
+Local Close Scope Z_scope.
+Qed.
+
 Definition binary_float_of_bits_aux x :=
   let '(sx, mx, ex) := split_bits x in
   if Zeq_bool ex 0 then
diff --git a/ia32/Archi.v b/ia32/Archi.v
index a2e136c3..fff25cfc 100644
--- a/ia32/Archi.v
+++ b/ia32/Archi.v
@@ -25,10 +25,19 @@ Definition big_endian := false.
 Notation align_int64 := 4%Z (only parsing).
 Notation align_float64 := 4%Z (only parsing).
 
-Program Definition default_pl : bool * nan_pl 53 := 
+Program Definition default_pl_64 : bool * nan_pl 53 := 
   (true, nat_iter 51 xO xH).
 
-Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
   false.                        (**r always choose first NaN *)
 
-Global Opaque big_endian default_pl choose_binop_pl.
+Program Definition default_pl_32 : bool * nan_pl 24 := 
+  (true, nat_iter 22 xO xH).
+
+Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+  false.                        (**r always choose first NaN *)
+
+Global Opaque big_endian
+              default_pl_64 choose_binop_pl_64
+              default_pl_32 choose_binop_pl_32.
+
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 2e5b8b9d..15f80e42 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -118,10 +118,13 @@ Inductive instruction: Type :=
   | Pmovsd_fi (rd: freg) (n: float)     (**r (pseudo-instruction) *)
   | Pmovsd_fm (rd: freg) (a: addrmode)
   | Pmovsd_mf (a: addrmode) (r1: freg)
-  | Pfld_f (r1: freg)                   (**r [fld] from XMM register (pseudo) *)
-  | Pfld_m (a: addrmode)                (**r [fld] from memory *)
-  | Pfstp_f (rd: freg)                  (**r [fstp] to XMM register (pseudo) *)
-  | Pfstp_m (a: addrmode)               (**r [fstp] to memory *)
+  | Pmovss_fi (rd: freg) (n: float32)   (**r [movss] (single 32-bit float) *)
+  | Pmovss_fm (rd: freg) (a: addrmode)
+  | Pmovss_mf (a: addrmode) (r1: freg)
+  | Pfldl_m (a: addrmode)               (**r [fld] double precision *)
+  | Pfstpl_m (a: addrmode)              (**r [fstp] double precision *)
+  | Pflds_m (a: addrmode)               (**r [fld] simple precision *)
+  | Pfstps_m (a: addrmode)              (**r [fstp] simple precision *)
   | Pxchg_rr (r1: ireg) (r2: ireg)      (**r register-register exchange *)
   (** Moves with conversion *)
   | Pmovb_mr (a: addrmode) (rs: ireg)   (**r [mov] (8-bit int) *)
@@ -134,11 +137,12 @@ Inductive instruction: Type :=
   | Pmovzw_rm (rd: ireg) (a: addrmode)
   | Pmovsw_rr (rd: ireg) (rs: ireg)     (**r [movsw] (16-bit sign-extension) *)
   | Pmovsw_rm (rd: ireg) (a: addrmode)
-  | Pcvtss2sd_fm (rd: freg) (a: addrmode) (**r [cvtss2sd] (single float load) *)
-  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single (pseudo) *)
-  | Pcvtsd2ss_mf (a: addrmode) (r1: freg) (**r [cvtsd2ss] (single float store *)
+  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single float *)
+  | Pcvtss2sd_ff (rd: freg) (r1: freg)  (**r conversion to double float *)
   | Pcvttsd2si_rf (rd: ireg) (r1: freg) (**r double to signed int *)
   | Pcvtsi2sd_fr (rd: freg) (r1: ireg)  (**r signed int to double *)
+  | Pcvttss2si_rf (rd: ireg) (r1: freg) (**r single to signed int *)
+  | Pcvtsi2ss_fr (rd: freg) (r1: ireg)  (**r signed int to single *)
   (** Integer arithmetic *)
   | Plea (rd: ireg) (a: addrmode)
   | Pneg (rd: ireg)
@@ -180,6 +184,14 @@ Inductive instruction: Type :=
   | Pabsd (rd: freg)
   | Pcomisd_ff (r1 r2: freg)
   | Pxorpd_f (rd: freg)	              (**r [xor] with self = set to zero *)
+  | Padds_ff (rd: freg) (r1: freg)
+  | Psubs_ff (rd: freg) (r1: freg)
+  | Pmuls_ff (rd: freg) (r1: freg)
+  | Pdivs_ff (rd: freg) (r1: freg)
+  | Pnegs (rd: freg)
+  | Pabss (rd: freg)
+  | Pcomiss_ff (r1 r2: freg)
+  | Pxorps_f (rd: freg)	              (**r [xor] with self = set to zero *)
   (** Branches and calls *)
   | Pjmp_l (l: label)
   | Pjmp_s (symb: ident) (sg: signature)
@@ -190,6 +202,11 @@ Inductive instruction: Type :=
   | Pcall_s (symb: ident) (sg: signature)
   | Pcall_r (r: ireg) (sg: signature)
   | Pret
+  (** Saving and restoring registers *)
+  | Pmov_rm_a (rd: ireg) (a: addrmode)  (**r like [Pmov_rm], using [Many32] chunk *)
+  | Pmov_mr_a (a: addrmode) (rs: ireg)  (**r like [Pmov_mr], using [Many32] chunk *)
+  | Pmovsd_fm_a (rd: freg) (a: addrmode) (**r like [Pmovsd_fm], using [Many64] chunk *)
+  | Pmovsd_mf_a (a: addrmode) (r1: freg) (**r like [Pmovsd_mf], using [Many64] chunk *)
   (** Pseudo-instructions *)
   | Plabel(l: label)
   | Pallocframe(sz: Z)(ofs_ra ofs_link: int)
@@ -330,6 +347,18 @@ Definition compare_floats (vx vy: val) (rs: regset) : regset :=
       undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
   end.
 
+Definition compare_floats32 (vx vy: val) (rs: regset) : regset :=
+  match vx, vy with
+  | Vsingle x, Vsingle y =>
+      rs #ZF  <- (Val.of_bool (negb (Float32.cmp Cne x y)))
+         #CF  <- (Val.of_bool (negb (Float32.cmp Cge x y)))
+         #PF  <- (Val.of_bool (negb (Float32.cmp Ceq x y || Float32.cmp Clt x y || Float32.cmp Cgt x y)))
+         #SF  <- Vundef
+         #OF  <- Vundef
+  | _, _ =>
+      undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
+  end.
+
 (** Testing a condition *)
 
 Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
@@ -484,14 +513,20 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       exec_load Mfloat64 m a rs rd
   | Pmovsd_mf a r1 =>
       exec_store Mfloat64 m a rs r1 nil
-  | Pfld_f r1 =>
-      Next (nextinstr (rs#ST0 <- (rs r1))) m
-  | Pfld_m a =>
+  | Pmovss_fi rd n =>
+      Next (nextinstr (rs#rd <- (Vsingle n))) m
+  | Pmovss_fm rd a =>
+      exec_load Mfloat32 m a rs rd
+  | Pmovss_mf a r1 =>
+      exec_store Mfloat32 m a rs r1 nil
+  | Pfldl_m a =>
       exec_load Mfloat64 m a rs ST0
-  | Pfstp_f rd =>
-      Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m
-  | Pfstp_m a =>
+  | Pfstpl_m a =>
       exec_store Mfloat64 m a rs ST0 (ST0 :: nil)
+  | Pflds_m a =>
+      exec_load Mfloat32 m a rs ST0
+  | Pfstps_m a =>
+      exec_store Mfloat32 m a rs ST0 (ST0 :: nil)
   | Pxchg_rr r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m
   (** Moves with conversion *)
@@ -515,16 +550,18 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#rd <- (Val.sign_ext 16 rs#r1))) m
   | Pmovsw_rm rd a =>
       exec_load Mint16signed m a rs rd
-  | Pcvtss2sd_fm rd a =>
-      exec_load Mfloat32 m a rs rd
   | Pcvtsd2ss_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
-  | Pcvtsd2ss_mf a r1 =>
-      exec_store Mfloat32 m a rs r1 (FR XMM7 :: nil)
+  | Pcvtss2sd_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.floatofsingle rs#r1))) m
   | Pcvttsd2si_rf rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pcvtsi2sd_fr rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
+  | Pcvttss2si_rf rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intofsingle rs#r1)))) m
+  | Pcvtsi2ss_fr rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleofint rs#r1)))) m
   (** Integer arithmetic *)
   | Plea rd a =>
       Next (nextinstr (rs#rd <- (eval_addrmode a rs))) m
@@ -604,7 +641,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       end
   | Psetcc c rd =>
       Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m
-  (** Arithmetic operations over floats *)
+  (** Arithmetic operations over double-precision floats *)
   | Paddd_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m
   | Psubd_ff rd r1 =>
@@ -621,6 +658,23 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (compare_floats (rs r1) (rs r2) rs)) m
   | Pxorpd_f rd =>
       Next (nextinstr_nf (rs#rd <- (Vfloat Float.zero))) m
+  (** Arithmetic operations over single-precision floats *)
+  | Padds_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.addfs rs#rd rs#r1))) m
+  | Psubs_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.subfs rs#rd rs#r1))) m
+  | Pmuls_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.mulfs rs#rd rs#r1))) m
+  | Pdivs_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.divfs rs#rd rs#r1))) m
+  | Pnegs rd =>
+      Next (nextinstr (rs#rd <- (Val.negfs rs#rd))) m
+  | Pabss rd =>
+      Next (nextinstr (rs#rd <- (Val.absfs rs#rd))) m
+  | Pcomiss_ff r1 r2 =>
+      Next (nextinstr (compare_floats32 (rs r1) (rs r2) rs)) m
+  | Pxorps_f rd =>
+      Next (nextinstr_nf (rs#rd <- (Vsingle Float32.zero))) m
   (** Branches and calls *)
   | Pjmp_l lbl =>
       goto_label f lbl rs m
@@ -655,6 +709,15 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (rs#RA <- (Val.add rs#PC Vone) #PC <- (rs r)) m
   | Pret =>
       Next (rs#PC <- (rs#RA)) m
+  (** Saving and restoring registers *)
+  | Pmov_rm_a rd a =>
+      exec_load Many32 m a rs rd
+  | Pmov_mr_a a r1 =>
+      exec_store Many32 m a rs r1 nil
+  | Pmovsd_fm_a rd a =>
+      exec_load Many64 m a rs rd
+  | Pmovsd_mf_a a r1 =>
+      exec_store Many64 m a rs r1 nil
   (** Pseudo-instructions *)
   | Plabel lbl =>
       Next (nextinstr rs) m
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index f92d72c0..9c0a76e0 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -50,8 +50,6 @@ Definition mk_mov (rd rs: preg) (k: code) : res code :=
   match rd, rs with
   | IR rd, IR rs => OK (Pmov_rr rd rs :: k)
   | FR rd, FR rs => OK (Pmovsd_ff rd rs :: k)
-  | ST0, FR rs => OK (Pfld_f rs :: k)
-  | FR rd, ST0 => OK (Pfstp_f rd :: k)
   | _, _ => Error(msg "Asmgen.mk_mov")
   end.
 
@@ -90,42 +88,30 @@ Definition mk_smallstore (sto: addrmode -> ireg ->instruction)
   else
     OK (Pmov_rr EAX rs :: sto addr EAX :: k).
 
-(** Accessing slots in the stack frame.  *)
+(** Accessing slots in the stack frame. *)
 
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of dst;
-      OK (Pmov_rm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-  | Tfloat =>
-      match preg_of dst with
-      | FR r => OK (Pmovsd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | ST0  => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | _ => Error (msg "Asmgen.loadind")
-      end
-  | Tsingle =>
-      do r <- freg_of dst;
-      OK (Pcvtss2sd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-  | Tlong =>
-      Error (msg "Asmgen.loadind")
+  match ty, preg_of dst with
+  | Tint, IR r => OK (Pmov_rm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tsingle, FR r => OK (Pmovss_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tsingle, ST0  => OK (Pflds_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, FR r => OK (Pmovsd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, ST0  => OK (Pfldl_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany32, IR r => OK (Pmov_rm_a r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany64, FR r => OK (Pmovsd_fm_a r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | _, _ => Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of src;
-      OK (Pmov_mr (Addrmode (Some base) None (inl _ ofs)) r :: k)
-  | Tfloat =>
-      match preg_of src with
-      | FR r => OK (Pmovsd_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
-      | ST0  => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | _ => Error (msg "Asmgen.loadind")
-      end
-  | Tsingle =>
-      do r <- freg_of src;
-      OK (Pcvtsd2ss_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
-  | Tlong =>
-      Error (msg "Asmgen.storeind")
+  match ty, preg_of src with
+  | Tint, IR r => OK (Pmov_mr (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tsingle, FR r => OK (Pmovss_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tsingle, ST0 => OK (Pfstps_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, FR r => OK (Pmovsd_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tfloat, ST0  => OK (Pfstpl_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany32, IR r => OK (Pmov_mr_a (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tany64, FR r => OK (Pmovsd_mf_a (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | _, _ => Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of addressing modes *)
@@ -163,6 +149,12 @@ Definition floatcomp (cmp: comparison) (r1 r2: freg) : instruction :=
   | Ceq | Cne | Cgt | Cge => Pcomisd_ff r1 r2
   end.
 
+Definition floatcomp32 (cmp: comparison) (r1 r2: freg) : instruction :=
+  match cmp with
+  | Clt | Cle => Pcomiss_ff r2 r1
+  | Ceq | Cne | Cgt | Cge => Pcomiss_ff r1 r2
+  end.
+
 (** Translation of a condition.  Prepends to [k] the instructions
   that evaluate the condition and leave its boolean result in bits
   of the condition register. *)
@@ -183,6 +175,10 @@ Definition transl_cond
       do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
   | Cnotcompf cmp, a1 :: a2 :: nil =>
       do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
   | Cmaskzero n, a1 :: nil =>
       do r1 <- ireg_of a1; OK (Ptest_ri r1 n :: k)
   | Cmasknotzero n, a1 :: nil =>
@@ -224,7 +220,7 @@ Definition testcond_for_condition (cond: condition) : extcond :=
   | Ccompu c => Cond_base(testcond_for_unsigned_comparison c)
   | Ccompimm c n => Cond_base(testcond_for_signed_comparison c)
   | Ccompuimm c n => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccompf c =>
+  | Ccompf c | Ccompfs c =>
       match c with
       | Ceq => Cond_and Cond_np Cond_e
       | Cne => Cond_or Cond_p Cond_ne
@@ -233,7 +229,7 @@ Definition testcond_for_condition (cond: condition) : extcond :=
       | Cgt => Cond_base Cond_a
       | Cge => Cond_base Cond_ae
       end
-  | Cnotcompf c =>
+  | Cnotcompf c | Cnotcompfs c =>
       match c with
       | Ceq => Cond_or Cond_p Cond_ne
       | Cne => Cond_and Cond_np Cond_e
@@ -288,6 +284,9 @@ Definition transl_op
   | Ofloatconst f, nil =>
       do r <- freg_of res; 
       OK ((if Float.eq_dec f Float.zero then Pxorpd_f r else Pmovsd_fi r f) :: k)
+  | Osingleconst f, nil =>
+      do r <- freg_of res; 
+      OK ((if Float32.eq_dec f Float32.zero then Pxorps_f r else Pmovss_fi r f) :: k)
   | Oindirectsymbol id, nil =>
       do r <- ireg_of res;
       OK (Pmov_ra r id :: k)
@@ -412,12 +411,36 @@ Definition transl_op
   | Odivf, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k)
+  | Onegfs, a1 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; OK (Pnegs r :: k)
+  | Oabsfs, a1 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; OK (Pabss r :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Padds_ff r r2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Psubs_ff r r2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuls_ff r r2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivs_ff r r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtss2sd_ff r r1 :: k)
   | Ointoffloat, a1 :: nil =>
       do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttsd2si_rf r r1 :: k)
   | Ofloatofint, a1 :: nil =>
       do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2sd_fr r r1 :: k)
+  | Ointofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttss2si_rf r r1 :: k)
+  | Osingleofint, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2ss_fr r r1 :: k)
   | Ocmp c, args =>
       do r <- ireg_of res;
       transl_cond c args (mk_setcc (testcond_for_condition c) r k)
@@ -443,10 +466,10 @@ Definition transl_load (chunk: memory_chunk)
   | Mint32 =>
       do r <- ireg_of dest; OK(Pmov_rm r am :: k)
   | Mfloat32 =>
-      do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k)
+      do r <- freg_of dest; OK(Pmovss_fm r am :: k)
   | Mfloat64 =>
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_load")
   end.
 
@@ -462,10 +485,10 @@ Definition transl_store (chunk: memory_chunk)
   | Mint32 =>
       do r <- ireg_of src; OK(Pmov_mr am r :: k)
   | Mfloat32 =>
-      do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k)
+      do r <- freg_of src; OK(Pmovss_mf am r :: k)
   | Mfloat64 =>
       do r <- freg_of src; OK(Pmovsd_mf am r :: k)
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v
index 881375f0..eba710a1 100644
--- a/ia32/Asmgenproof.v
+++ b/ia32/Asmgenproof.v
@@ -168,11 +168,7 @@ Remark loadind_label:
   loadind base ofs ty dst k = OK c ->
   tail_nolabel k c.
 Proof.
-  unfold loadind; intros. destruct ty.
-  TailNoLabel.
-  destruct (preg_of dst); TailNoLabel.
-  discriminate.
-  TailNoLabel.
+  unfold loadind; intros. destruct ty; try discriminate; destruct (preg_of dst); TailNoLabel.
 Qed.
 
 Remark storeind_label:
@@ -180,11 +176,7 @@ Remark storeind_label:
   storeind src base ofs ty k = OK c ->
   tail_nolabel k c.
 Proof.
-  unfold storeind; intros. destruct ty.
-  TailNoLabel.
-  destruct (preg_of src); TailNoLabel.
-  discriminate.
-  TailNoLabel.
+  unfold storeind; intros. destruct ty; try discriminate; destruct (preg_of src); TailNoLabel.
 Qed.
 
 Remark mk_setcc_base_label:
@@ -220,6 +212,8 @@ Proof.
   destruct (Int.eq_dec i Int.zero); TailNoLabel.
   destruct c0; simpl; TailNoLabel.
   destruct c0; simpl; TailNoLabel.
+  destruct c0; simpl; TailNoLabel.
+  destruct c0; simpl; TailNoLabel.
 Qed.
 
 Remark transl_op_label:
@@ -230,6 +224,7 @@ Proof.
   unfold transl_op; intros. destruct op; TailNoLabel.
   destruct (Int.eq_dec i Int.zero); TailNoLabel.
   destruct (Float.eq_dec f Float.zero); TailNoLabel.
+  destruct (Float32.eq_dec f Float32.zero); TailNoLabel.
   eapply tail_nolabel_trans. eapply transl_cond_label; eauto. eapply mk_setcc_label.  
 Qed.
 
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index b3c815b6..7d71d1a2 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -107,7 +107,7 @@ Lemma mk_mov_correct:
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
   /\ rs2#rd = rs1#rs
-  /\ forall r, data_preg r = true -> r <> ST0 -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> rd -> rs2#r = rs1#r.
 Proof.
   unfold mk_mov; intros. 
   destruct rd; try (monadInv H); destruct rs; monadInv H.
@@ -117,12 +117,6 @@ Proof.
 (* movd *)
   econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
   split. Simplifs. intros; Simplifs.
-(* getfp0 *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
-  split. Simplifs. intros; Simplifs.
-(* setfp0 *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
-  split. Simplifs. intros; Simplifs.
 Qed.
 
 (** Properties of division *)
@@ -288,27 +282,10 @@ Proof.
   set (addr := Addrmode (Some base) None (inl (ident * int) ofs)) in *.
   assert (eval_addrmode ge addr rs = Val.add rs#base (Vint ofs)).
     unfold addr. simpl. rewrite Int.add_commut; rewrite Int.add_zero; auto. 
-  destruct ty; simpl in H0.
-  (* int *)
-  monadInv H.
-  rewrite (ireg_of_eq _ _ EQ). econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_load. rewrite H1. rewrite H0.
-  eauto. auto.
-  intuition Simplifs.
-  (* float *)
-  exists (nextinstr_nf (rs#(preg_of dst) <- v)).
-  split. destruct (preg_of dst); inv H; apply exec_straight_one; simpl; auto.
-  unfold exec_load. rewrite H1; rewrite H0; auto. 
-  unfold exec_load. rewrite H1; rewrite H0; auto.
-  intuition Simplifs.
-  (* long *)
-  inv H.
-  (* single *)
-  monadInv H.
-  rewrite (freg_of_eq _ _ EQ). econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_load. rewrite H1. rewrite H0.
-  eauto. auto.
-  intuition Simplifs.
+  exists (nextinstr_nf (rs#(preg_of dst) <- v)); split.
+- destruct ty; try discriminate; destruct (preg_of dst); inv H; simpl in H0;
+  apply exec_straight_one; auto; simpl; unfold exec_load; rewrite H1, H0; auto.
+- intuition Simplifs.
 Qed.
 
 Lemma storeind_correct:
@@ -319,33 +296,15 @@ Lemma storeind_correct:
      exec_straight ge fn c rs m k rs' m'
   /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
+Local Transparent destroyed_by_setstack.
   unfold storeind; intros.
   set (addr := Addrmode (Some base) None (inl (ident * int) ofs)) in *.
   assert (eval_addrmode ge addr rs = Val.add rs#base (Vint ofs)).
     unfold addr. simpl. rewrite Int.add_commut; rewrite Int.add_zero; auto. 
-  destruct ty; simpl in H0.
-  (* int *)
-  monadInv H.
-  rewrite (ireg_of_eq _ _ EQ) in H0. econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. rewrite H0.
-  eauto. auto. 
-  intros; Simplifs.
-  (* float *)
-  destruct (preg_of src); inv H.
-  econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. apply nextinstr_nf_inv1; auto.
-  econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. simpl. Simplifs.
-  (* long *)
-  inv H.
-  (* single *)
-  monadInv H.
-  rewrite (freg_of_eq _ _ EQ) in H0. econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. rewrite H0.
-  simpl. eauto. auto.
-  intros. destruct H2. Simplifs.
+  destruct ty; try discriminate; destruct (preg_of src); inv H; simpl in H0;
+  (econstructor; split;
+  [apply exec_straight_one; [simpl; unfold exec_store; rewrite H1, H0; eauto|auto]
+  |simpl; intros; unfold undef_regs; repeat Simplifs]).
 Qed.
 
 (** Translation of addressing modes *)
@@ -546,6 +505,21 @@ Proof.
   intros. Simplifs.
 Qed.
 
+Lemma compare_floats32_spec:
+  forall rs n1 n2,
+  let rs' := nextinstr (compare_floats32 (Vsingle n1) (Vsingle n2) rs) in
+     rs'#ZF = Val.of_bool (negb (Float32.cmp Cne n1 n2))
+  /\ rs'#CF = Val.of_bool (negb (Float32.cmp Cge n1 n2))
+  /\ rs'#PF = Val.of_bool (negb (Float32.cmp Ceq n1 n2 || Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2))
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+Proof.
+  intros. unfold rs'; unfold compare_floats32.
+  split. auto. 
+  split. auto.
+  split. auto.
+  intros. Simplifs.
+Qed.
+
 Definition eval_extcond (xc: extcond) (rs: regset) : option bool :=
   match xc with
   | Cond_base c =>
@@ -664,8 +638,104 @@ Proof.
   destruct (Float.cmp Cge n1 n2); auto.
 Qed.
 
+Lemma testcond_for_float32_comparison_correct:
+  forall c n1 n2 rs,
+  eval_extcond (testcond_for_condition (Ccompfs c))
+               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                            (Vsingle (swap_floats c n2 n1)) rs)) =
+  Some(Float32.cmp c n1 n2).
+Proof.
+  intros.
+  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
+  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                        (Vsingle (swap_floats c n2 n1)) rs)).
+  intros [A [B [C D]]].
+  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
+  destruct c; simpl.
+(* eq *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* lt *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2).
+  rewrite <- (Float32.cmp_swap Cne n1 n2).
+  simpl.
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_le_lt_eq.
+  caseEq (Float32.cmp Clt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_lt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* le *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
+  destruct (Float32.cmp Cle n1 n2); auto.
+(* gt *)
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_ge_gt_eq. 
+  caseEq (Float32.cmp Cgt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_gt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ge *)
+  destruct (Float32.cmp Cge n1 n2); auto.
+Qed.
+
+Lemma testcond_for_neg_float32_comparison_correct:
+  forall c n1 n2 rs,
+  eval_extcond (testcond_for_condition (Cnotcompfs c))
+               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                            (Vsingle (swap_floats c n2 n1)) rs)) =
+  Some(negb(Float32.cmp c n1 n2)).
+Proof.
+  intros.
+  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
+  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                          (Vsingle (swap_floats c n2 n1)) rs)).
+  intros [A [B [C D]]].
+  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
+  destruct c; simpl.
+(* eq *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* lt *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2).
+  rewrite <- (Float32.cmp_swap Cne n1 n2).
+  simpl.
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_le_lt_eq.
+  caseEq (Float32.cmp Clt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_lt_eq_false; eauto.
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* le *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
+  destruct (Float32.cmp Cle n1 n2); auto.
+(* gt *)
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_ge_gt_eq. 
+  caseEq (Float32.cmp Cgt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_gt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ge *)
+  destruct (Float32.cmp Cge n1 n2); auto.
+Qed.
+
 Remark swap_floats_commut:
-  forall c x y, swap_floats c (Vfloat x) (Vfloat y) = Vfloat (swap_floats c x y).
+  forall (A B: Type) c (f: A -> B) x y, swap_floats c (f x) (f y) = f (swap_floats c x y).
 Proof.
   intros. destruct c; auto. 
 Qed.
@@ -679,7 +749,18 @@ Proof.
   assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
     simpl. Simplifs. 
   unfold compare_floats; destruct vx; destruct vy; auto. Simplifs. 
-Qed.  
+Qed.
+
+Remark compare_floats32_inv:
+  forall vx vy rs r,
+  r <> CR ZF -> r <> CR CF -> r <> CR PF -> r <> CR SF -> r <> CR OF ->
+  compare_floats32 vx vy rs r = rs r.
+Proof.
+  intros. 
+  assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
+    simpl. Simplifs. 
+  unfold compare_floats32; destruct vx; destruct vy; auto. Simplifs. 
+Qed.
 
 Lemma transl_cond_correct:
   forall cond args k c rs m,
@@ -740,6 +821,24 @@ Proof.
   split. destruct (rs x); destruct (rs x0); simpl; auto.
   repeat rewrite swap_floats_commut. apply testcond_for_neg_float_comparison_correct.
   intros. Simplifs. apply compare_floats_inv; auto with asmgen. 
+(* compfs *)
+  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
+  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
+  split. apply exec_straight_one. 
+  destruct c0; simpl; auto.
+  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen. 
+  split. destruct (rs x); destruct (rs x0); simpl; auto.
+  repeat rewrite swap_floats_commut. apply testcond_for_float32_comparison_correct.
+  intros. Simplifs. apply compare_floats32_inv; auto with asmgen. 
+(* notcompfs *)
+  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
+  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
+  split. apply exec_straight_one. 
+  destruct c0; simpl; auto.
+  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen. 
+  split. destruct (rs x); destruct (rs x0); simpl; auto.
+  repeat rewrite swap_floats_commut. apply testcond_for_neg_float32_comparison_correct.
+  intros. Simplifs. apply compare_floats32_inv; auto with asmgen. 
 (* maskzero *)
   simpl. rewrite (ireg_of_eq _ _ EQ).
   econstructor. split. apply exec_straight_one. simpl; eauto. auto.
@@ -909,11 +1008,13 @@ Transparent destroyed_by_op.
   destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail).
 (* move *)
   exploit mk_mov_correct; eauto. intros [rs2 [A [B C]]]. 
-  apply SAME. exists rs2. split. eauto. split. simpl. auto. intros. destruct H; auto.
+  apply SAME. exists rs2. eauto. 
 (* intconst *)
   apply SAME. destruct (Int.eq_dec i Int.zero). subst i. TranslOp. TranslOp.
 (* floatconst *)
   apply SAME. destruct (Float.eq_dec f Float.zero). subst f. TranslOp. TranslOp.
+(* singleconst *)
+  apply SAME. destruct (Float32.eq_dec f Float32.zero). subst f. TranslOp. TranslOp.
 (* cast8signed *)
   apply SAME. eapply mk_intconv_correct; eauto.
 (* cast8unsigned *)
@@ -963,6 +1064,10 @@ Transparent destroyed_by_op.
   apply SAME. TranslOp. rewrite H0; auto.
 (* floatofint *)
   apply SAME. TranslOp. rewrite H0; auto.
+(* intofsingle *)
+  apply SAME. TranslOp. rewrite H0; auto.
+(* singleofint *)
+  apply SAME. TranslOp. rewrite H0; auto.
 (* condition *)
   exploit transl_cond_correct; eauto. intros [rs2 [P [Q R]]].
   exploit mk_setcc_correct; eauto. intros [rs3 [S [T U]]].
diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp
index b27f405a..8c7f01fa 100644
--- a/ia32/ConstpropOp.vp
+++ b/ia32/ConstpropOp.vp
@@ -174,10 +174,15 @@ Definition make_moduimm n (r1 r2: reg) :=
   end.
 
 Definition make_mulfimm (n: float) (r r1 r2: reg) :=
-  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
   then (Oaddf, r :: r :: nil)
   else (Omulf, r1 :: r2 :: nil).
 
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
 Definition make_cast8unsigned (r: reg) (a: aval) :=
@@ -186,10 +191,6 @@ Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
 Definition make_cast16unsigned (r: reg) (a: aval) :=
   if vincl a (Uns 16) then (Omove, r :: nil) else (Ocast16unsigned, r :: nil).
-Definition make_singleoffloat (r: reg) (a: aval) :=
-  if vincl a Fsingle && generate_float_constants tt
-  then (Omove, r :: nil)
-  else (Osingleoffloat, r :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
@@ -217,9 +218,10 @@ Nondetfunction op_strength_reduction
   | Olea addr, args, vl =>
       let (addr', args') := addr_strength_reduction addr args vl in
       (Olea addr', args')
-  | Osingleoffloat, r1 :: nil, v1 :: nil => make_singleoffloat r1 v1
   | Ocmp c, args, vl => make_cmp c args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v
index d9eea2bc..6adb26fe 100644
--- a/ia32/ConstpropOpproof.v
+++ b/ia32/ConstpropOpproof.v
@@ -77,6 +77,10 @@ Ltac SimplVM :=
       let E := fresh in
       assert (E: v = Vfloat n) by (inversion H; auto);
       rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
   | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
       let E := fresh in
       assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); 
@@ -368,7 +372,7 @@ Lemma make_mulfimm_correct:
   exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (e#r1); simpl; auto. rewrite Float.mul2_add; auto. 
   simpl. econstructor; split; eauto. 
@@ -381,13 +385,40 @@ Lemma make_mulfimm_correct_2:
   exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (e#r2); simpl; auto. rewrite Float.mul2_add; auto. 
   rewrite Float.mul_commut; auto. 
   simpl. econstructor; split; eauto. 
 Qed.
 
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  e#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); simpl; auto. rewrite Float32.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  e#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); simpl; auto. rewrite Float32.mul2_add; auto. 
+  rewrite Float32.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma make_cast8signed_correct:
   forall r x,
   vmatch bc e#r x ->
@@ -444,21 +475,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_singleoffloat_correct:
-  forall r x,
-  vmatch bc e#r x ->
-  let (op, args) := make_singleoffloat r x in
-  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.singleoffloat e#r) v.
-Proof.
-  intros; unfold make_singleoffloat.
-  destruct (vincl x Fsingle && generate_float_constants tt) eqn:INCL. 
-  InvBooleans. exists e#r; split; auto. 
-  assert (V: vmatch bc e#r Fsingle).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite Float.singleoffloat_of_single by auto. auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -510,14 +526,16 @@ Proof.
   exploit addr_strength_reduction_correct; eauto. 
   destruct (addr_strength_reduction addr args0 vl0) as [addr' args'].
   auto.
-(* singleoffloat *)
-  InvApproxRegs; SimplVM; inv H0. apply make_singleoffloat_correct; auto.
 (* cond *)
   inv H0. apply make_cmp_correct; auto.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
+(* mulfs *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index da80a6ed..a9f2b6c4 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -43,11 +43,8 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | AX => Tint | BX => Tint | CX => Tint | DX => Tint
-  | SI => Tint | DI => Tint | BP => Tint
-  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat | X3 => Tfloat
-  | X4 => Tfloat | X5 => Tfloat | X6 => Tfloat | X7 => Tfloat
-  | FP0 => Tfloat
+  | AX | BX | CX | DX | SI | DI | BP => Tany32
+  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 | FP0 => Tany64
   end.
 
 Local Open Scope positive_scope.
@@ -76,7 +73,6 @@ Definition is_stack_reg (r: mreg) : bool :=
 
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
-  | Omove => FP0 :: nil
   | Ocast8signed | Ocast8unsigned | Ocast16signed | Ocast16unsigned => AX :: nil
   | Omulhs => AX :: DX :: nil
   | Omulhu => AX :: DX :: nil
@@ -95,9 +91,7 @@ Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg
 Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
   match chunk with
   | Mint8signed | Mint8unsigned => AX :: CX :: nil
-  | Mint16signed | Mint16unsigned | Mint32 | Mint64 => nil
-  | Mfloat32 => X7 :: nil
-  | Mfloat64 => FP0 :: nil
+  | _ => nil
   end.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
@@ -116,9 +110,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   | EF_memcpy sz al =>
       if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil
   | EF_vstore (Mint8unsigned|Mint8signed) => AX :: CX :: nil
-  | EF_vstore Mfloat32 => X7 :: nil
   | EF_vstore_global (Mint8unsigned|Mint8signed) _ _ => AX :: nil
-  | EF_vstore_global Mfloat32 _ _ => X7 :: nil
   | EF_builtin id sg =>
       if ident_eq id builtin_write16_reversed
       || ident_eq id builtin_write32_reversed
@@ -127,12 +119,12 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   end.
 
 Definition destroyed_at_function_entry: list mreg :=
-  DX :: FP0 :: nil.   (* must include destroyed_by_op Omove *)
+  (* must include [destroyed_by_setstack ty] *)
+  DX :: FP0 :: nil.
 
 Definition destroyed_by_setstack (ty: typ): list mreg :=
   match ty with
-  | Tfloat => FP0 :: nil
-  | Tsingle => X7 :: FP0 :: nil
+  | Tfloat | Tsingle => FP0 :: nil
   | _ => nil
   end.
 
@@ -190,6 +182,7 @@ Definition two_address_op (op: operation) : bool :=
   | Omove => false
   | Ointconst _ => false
   | Ofloatconst _ => false
+  | Osingleconst _ => false
   | Oindirectsymbol _ => false
   | Ocast8signed => false
   | Ocast8unsigned => false
@@ -228,9 +221,18 @@ Definition two_address_op (op: operation) : bool :=
   | Osubf => true
   | Omulf => true
   | Odivf => true
+  | Onegfs => true
+  | Oabsfs => true
+  | Oaddfs => true
+  | Osubfs => true
+  | Omulfs => true
+  | Odivfs => true
   | Osingleoffloat => false
+  | Ofloatofsingle => false
   | Ointoffloat => false
   | Ofloatofint => false
+  | Ointofsingle => false
+  | Osingleofint => false
   | Omakelong => false
   | Olowlong => false
   | Ohighlong => false
diff --git a/ia32/NeedOp.v b/ia32/NeedOp.v
index 2c8698f9..52b9fcbe 100644
--- a/ia32/NeedOp.v
+++ b/ia32/NeedOp.v
@@ -37,6 +37,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Omove => op1 nv
   | Ointconst n => nil
   | Ofloatconst n => nil
+  | Osingleconst n => nil
   | Oindirectsymbol id => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
   | Ocast8unsigned => op1 (zero_ext 8 nv)
@@ -66,8 +67,10 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Olea addr => needs_of_addressing addr nv
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
-  | Osingleoffloat => op1 (singleoffloat nv)
-  | Ointoffloat | Ofloatofint => op1 (default nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Osingleoffloat | Ofloatofsingle => op1 (default nv)
+  | Ointoffloat | Ofloatofint | Ointofsingle | Osingleofint => op1 (default nv)
   | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocmp c => needs_of_condition c
@@ -81,7 +84,6 @@ Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   | Ocast16unsigned => zero_ext_redundant 16 nv
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
-  | Osingleoffloat => singleoffloat_redundant nv
   | _ => false
   end.
 
@@ -165,7 +167,6 @@ Proof.
 - apply shruimm_sound; auto. 
 - apply ror_sound; auto. 
 - eapply needs_of_addressing_sound; eauto.
-- apply singleoffloat_sound; auto. 
 - destruct (eval_condition c args m) as [b|] eqn:EC; simpl in H2. 
   erewrite needs_of_condition_sound by eauto.
   subst v; simpl. auto with na.
@@ -186,7 +187,6 @@ Proof.
 - apply zero_ext_redundant_sound; auto. omega.
 - apply andimm_redundant_sound; auto. 
 - apply orimm_redundant_sound; auto.
-- apply singleoffloat_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
diff --git a/ia32/Op.v b/ia32/Op.v
index e46c7400..14e4cbb1 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -42,8 +42,10 @@ Inductive condition : Type :=
   | Ccompu: comparison -> condition     (**r unsigned integer comparison *)
   | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *)
   | Ccompuimm: comparison -> int -> condition  (**r unsigned integer comparison with a constant *)
-  | Ccompf: comparison -> condition     (**r floating-point comparison *)
+  | Ccompf: comparison -> condition     (**r 64-bit floating-point comparison *)
   | Cnotcompf: comparison -> condition  (**r negation of a floating-point comparison *)
+  | Ccompfs: comparison -> condition    (**r 32-bit floating-point comparison *)
+  | Cnotcompfs: comparison -> condition (**r negation of a floating-point comparison *)
   | Cmaskzero: int -> condition         (**r test [(arg & constant) == 0] *)
   | Cmasknotzero: int -> condition.     (**r test [(arg & constant) != 0] *)
 
@@ -68,6 +70,7 @@ Inductive operation : Type :=
   | Omove: operation                    (**r [rd = r1] *)
   | Ointconst: int -> operation         (**r [rd] is set to the given integer constant *)
   | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
+  | Osingleconst: float32 -> operation  (**r [rd] is set to the given float constant *)
   | Oindirectsymbol: ident -> operation (**r [rd] is set to the address of the symbol *)
 (*c Integer arithmetic: *)
   | Ocast8signed: operation             (**r [rd] is 8-bit sign extension of [r1] *)
@@ -108,10 +111,19 @@ Inductive operation : Type :=
   | Osubf: operation                    (**r [rd = r1 - r2] *)
   | Omulf: operation                    (**r [rd = r1 * r2] *)
   | Odivf: operation                    (**r [rd = r1 / r2] *)
+  | Onegfs: operation                   (**r [rd = - r1] *)
+  | Oabsfs: operation                   (**r [rd = abs(r1)] *)
+  | Oaddfs: operation                   (**r [rd = r1 + r2] *)
+  | Osubfs: operation                   (**r [rd = r1 - r2] *)
+  | Omulfs: operation                   (**r [rd = r1 * r2] *)
+  | Odivfs: operation                   (**r [rd = r1 / r2] *)
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle: operation           (**r [rd] is [r1] extended to double-precision float *)
 (*c Conversions between int and float: *)
-  | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
-  | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
+  | Ointoffloat: operation              (**r [rd = signed_int_of_float64(r1)] *)
+  | Ofloatofint: operation              (**r [rd = float64_of_signed_int(r1)] *)
+  | Ointofsingle: operation             (**r [rd = signed_int_of_float32(r1)] *)
+  | Osingleofint: operation             (**r [rd = float32_of_signed_int(r1)] *)
 (*c Manipulating 64-bit integers: *)
   | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
   | Olowlong: operation                 (**r [rd = low-word(r1)] *)
@@ -145,6 +157,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Float32.eq_dec; intro.
   generalize Int64.eq_dec; intro.
   decide equality.
   apply peq.
@@ -169,6 +182,8 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool
   | Ccompuimm c n, v1 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint n)
   | Ccompf c, v1 :: v2 :: nil => Val.cmpf_bool c v1 v2
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
+  | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => Val.maskzero_bool v1 n
   | Cmasknotzero n, v1 :: nil => option_map negb (Val.maskzero_bool v1 n)
   | _, _ => None
@@ -204,6 +219,7 @@ Definition eval_operation
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
   | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
   | Oindirectsymbol id, nil => Some (Genv.symbol_address genv id Int.zero)
   | Ocast8signed, v1 :: nil => Some (Val.sign_ext 8 v1)
   | Ocast8unsigned, v1 :: nil => Some (Val.zero_ext 8 v1)
@@ -243,9 +259,18 @@ Definition eval_operation
   | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
   | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
   | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
+  | Onegfs, v1::nil => Some(Val.negfs v1)
+  | Oabsfs, v1::nil => Some(Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
+  | Ointofsingle, v1::nil => Val.intofsingle v1
+  | Osingleofint, v1::nil => Val.singleofint v1
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some(Val.loword v1)
   | Ohighlong, v1::nil => Some(Val.hiword v1)
@@ -275,6 +300,8 @@ Definition type_of_condition (c: condition) : list typ :=
   | Ccompuimm _ _ => Tint :: nil
   | Ccompf _ => Tfloat :: Tfloat :: nil
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
+  | Ccompfs _ => Tsingle :: Tsingle :: nil
+  | Cnotcompfs _ => Tsingle :: Tsingle :: nil
   | Cmaskzero _ => Tint :: nil
   | Cmasknotzero _ => Tint :: nil
   end.
@@ -295,7 +322,8 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
   | Oindirectsymbol _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
   | Ocast8unsigned => (Tint :: nil, Tint)
@@ -334,9 +362,18 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Ointofsingle => (Tsingle :: nil, Tint)
+  | Osingleofint => (Tint :: nil, Tsingle)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
   | Ohighlong => (Tlong :: nil, Tint)
@@ -380,7 +417,8 @@ Proof with (try exact I).
   destruct op; simpl in H0; FuncInv; subst; simpl.
   congruence.
   exact I.
-  destruct (Float.is_single_dec f); auto. 
+  exact I.
+  exact I.
   unfold Genv.symbol_address; destruct (Genv.find_symbol genv i)...
   destruct v0...
   destruct v0...
@@ -422,8 +460,17 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0... apply Float.singleoffloat_is_single.
-  destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
   destruct v0...
@@ -467,6 +514,8 @@ Definition negate_condition (cond: condition): condition :=
   | Ccompuimm c n => Ccompuimm (negate_comparison c) n
   | Ccompf c => Cnotcompf c
   | Cnotcompf c => Ccompf c
+  | Ccompfs c => Cnotcompfs c
+  | Cnotcompfs c => Ccompfs c
   | Cmaskzero n => Cmasknotzero n
   | Cmasknotzero n => Cmaskzero n
   end.
@@ -482,6 +531,8 @@ Proof.
   repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
   repeat (destruct vl; auto). 
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
   destruct vl; auto. destruct vl; auto.
   destruct vl; auto. destruct vl; auto. destruct (Val.maskzero_bool v i) as [[]|]; auto.
 Qed.
@@ -608,61 +659,6 @@ Proof.
   destruct c; simpl; try congruence. reflexivity. 
 Qed.
 
-(** Checking whether two addressings, applied to the same arguments, produce
-  separated memory addresses.  Used in [CSE]. *)
-
-Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
-                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
-  match addr1, addr2 with
-  | Aindexed ofs1, Aindexed ofs2 => 
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | Aglobal s1 ofs1, Aglobal s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Abased s1 ofs1, Abased s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Ainstack ofs1, Ainstack ofs2 =>
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | _, _ => false
-  end.
-
-Lemma addressing_separated_sound:
-  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
-  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
-  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
-  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
-  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
-Proof.
-  unfold addressing_separated; intros.
-  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
-  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
-(* Aindexed *)
-  destruct v; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-(* Aglobal *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)).
-  replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)).
-  right. apply Int.no_overlap_sound; auto. 
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Abased *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; simpl in *; try discriminate.
-  destruct v; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3).
-  right. apply Int.no_overlap_sound; auto. 
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Ainstack *)
-  destruct sp; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-Qed.
-
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment
@@ -770,6 +766,8 @@ Proof.
   eauto 3 using val_cmpu_bool_inject, Mem.valid_pointer_implies.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; try discriminate; auto.
   inv H3; try discriminate; auto.
 Qed.
@@ -853,7 +851,17 @@ Proof.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in H1; inv H1. simpl. TrivialExists.
   inv H4; inv H2; simpl; auto.
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 33e19f73..67fb80c1 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -353,7 +353,7 @@ let print_builtin_vload_common oc chunk addr res =
         fprintf oc "	movl	%a, %a\n" addressing addr ireg res2
       end
   | Mfloat32, [FR res] ->
-      fprintf oc "	cvtss2sd %a, %a\n" addressing addr freg res
+      fprintf oc "	movss	%a, %a\n" addressing addr freg res
   | Mfloat64, [FR res] ->
       fprintf oc "	movsd	%a, %a\n" addressing addr freg res
   | _ ->
@@ -394,8 +394,7 @@ let print_builtin_vstore_common oc chunk addr src tmp =
       fprintf oc "	movl	%a, %a\n" ireg src2 addressing addr;
       fprintf oc "	movl	%a, %a\n" ireg src1 addressing addr'
   | Mfloat32, [FR src] ->
-      fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg src;
-      fprintf oc "	movss	%%xmm7, %a\n" addressing addr
+      fprintf oc "	movss	%a, %a\n" freg src addressing addr
   | Mfloat64, [FR src] ->
       fprintf oc "	movsd	%a, %a\n" freg src addressing addr
   | _ ->
@@ -541,7 +540,8 @@ let print_builtin_inline oc name args res =
 
 (* Printing of instructions *)
 
-let float_literals : (int * int64) list ref = ref []
+let float64_literals : (int * int64) list ref = ref []
+let float32_literals : (int * int32) list ref = ref []
 let jumptables : (int * label list) list ref = ref []
 let indirect_symbols : StringSet.t ref = ref StringSet.empty
 
@@ -560,39 +560,38 @@ let print_instruction oc = function
         fprintf oc "	movl	L%a$non_lazy_ptr, %a\n" raw_symbol id ireg rd
       end else
         fprintf oc "	movl	$%a, %a\n" symbol id ireg rd
-  | Pmov_rm(rd, a) ->
+  | Pmov_rm(rd, a) | Pmov_rm_a(rd, a) ->
       fprintf oc "	movl	%a, %a\n" addressing a ireg rd
-  | Pmov_mr(a, r1) ->
+  | Pmov_mr(a, r1) | Pmov_mr_a(a, r1) ->
       fprintf oc "	movl	%a, %a\n" ireg r1 addressing a
   | Pmovsd_ff(rd, r1) ->
       fprintf oc "	movapd	%a, %a\n" freg r1 freg rd
   | Pmovsd_fi(rd, n) ->
-      let b = camlint64_of_coqint (Floats.Float.bits_of_double n) in
+      let b = camlint64_of_coqint (Floats.Float.to_bits n) in
       let lbl = new_label() in
       fprintf oc "	movsd	%a, %a %s %.18g\n" label lbl freg rd comment (camlfloat_of_coqfloat n);
-      float_literals := (lbl, b) :: !float_literals
+      float64_literals := (lbl, b) :: !float64_literals
   | Pmovsd_fm(rd, a) ->
       fprintf oc "	movsd	%a, %a\n" addressing a freg rd
   | Pmovsd_mf(a, r1) ->
       fprintf oc "	movsd	%a, %a\n" freg r1 addressing a
-  | Pfld_f(r1) ->
-      fprintf oc "	subl	$8, %%esp\n";
-      cfi_adjust oc 8l;
-      fprintf oc "	movsd	%a, 0(%%esp)\n" freg r1;
-      fprintf oc "	fldl	0(%%esp)\n";
-      fprintf oc "	addl	$8, %%esp\n";
-      cfi_adjust oc (-8l)
-  | Pfld_m(a) ->
+  | Pmovss_fi(rd, n) ->
+      let b = camlint_of_coqint (Floats.Float32.to_bits n) in
+      let lbl = new_label() in
+      fprintf oc "	movss	%a, %a %s %.18g\n" label lbl freg rd comment (camlfloat_of_coqfloat32 n);
+      float32_literals := (lbl, b) :: !float32_literals
+  | Pmovss_fm(rd, a) ->
+      fprintf oc "	movss	%a, %a\n" addressing a freg rd
+  | Pmovss_mf(a, r1) ->
+      fprintf oc "	movss	%a, %a\n" freg r1 addressing a
+  | Pfldl_m(a) ->
       fprintf oc "	fldl	%a\n" addressing a
-  | Pfstp_f(rd) ->
-      fprintf oc "	subl	$8, %%esp\n";
-      cfi_adjust oc 8l;
-      fprintf oc "	fstpl	0(%%esp)\n";
-      fprintf oc "	movsd	0(%%esp), %a\n" freg rd;
-      fprintf oc "	addl	$8, %%esp\n";
-      cfi_adjust oc (-8l)
-  | Pfstp_m(a) ->
+  | Pfstpl_m(a) ->
       fprintf oc "	fstpl	%a\n" addressing a
+  | Pflds_m(a) ->
+      fprintf oc "	flds	%a\n" addressing a
+  | Pfstps_m(a) ->
+      fprintf oc "	fstps	%a\n" addressing a
   | Pxchg_rr(r1, r2) ->
       fprintf oc "	xchgl	%a, %a\n" ireg r1 ireg r2
   (** Moves with conversion *)
@@ -616,18 +615,18 @@ let print_instruction oc = function
       fprintf oc "	movswl	%a, %a\n" ireg16 r1 ireg rd
   | Pmovsw_rm(rd, a) ->
       fprintf oc "	movswl	%a, %a\n" addressing a ireg rd
-  | Pcvtss2sd_fm(rd, a) ->
-      fprintf oc "	cvtss2sd %a, %a\n" addressing a freg rd
   | Pcvtsd2ss_ff(rd, r1) ->
-      fprintf oc "	cvtsd2ss %a, %a\n" freg r1 freg rd;
-      fprintf oc "	cvtss2sd %a, %a\n" freg rd freg rd
-  | Pcvtsd2ss_mf(a, r1) ->
-      fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg r1;
-      fprintf oc "	movss	%%xmm7, %a\n" addressing a
+      fprintf oc "	cvtsd2ss %a, %a\n" freg r1 freg rd
+  | Pcvtss2sd_ff(rd, r1) ->
+      fprintf oc "	cvtss2sd %a, %a\n" freg r1 freg rd
   | Pcvttsd2si_rf(rd, r1) ->
       fprintf oc "	cvttsd2si %a, %a\n" freg r1 ireg rd
   | Pcvtsi2sd_fr(rd, r1) ->
       fprintf oc "	cvtsi2sd %a, %a\n" ireg r1 freg rd
+  | Pcvttss2si_rf(rd, r1) ->
+      fprintf oc "	cvttss2si %a, %a\n" freg r1 ireg rd
+  | Pcvtsi2ss_fr(rd, r1) ->
+      fprintf oc "	cvtsi2ss %a, %a\n" ireg r1 freg rd
   (** Arithmetic and logical operations over integers *)
   | Plea(rd, a) ->
       fprintf oc "	leal	%a, %a\n" addressing a ireg rd
@@ -713,6 +712,24 @@ let print_instruction oc = function
       fprintf oc "	comisd	%a, %a\n" freg r2 freg r1
   | Pxorpd_f (rd) ->
       fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
+  | Padds_ff(rd, r1) ->
+      fprintf oc "	addss	%a, %a\n" freg r1 freg rd
+  | Psubs_ff(rd, r1) ->
+      fprintf oc "	subss	%a, %a\n" freg r1 freg rd
+  | Pmuls_ff(rd, r1) ->
+      fprintf oc "	mulss	%a, %a\n" freg r1 freg rd
+  | Pdivs_ff(rd, r1) ->
+      fprintf oc "	divss	%a, %a\n" freg r1 freg rd
+  | Pnegs (rd) ->
+      need_masks := true;
+      fprintf oc "	xorpd	%a, %a\n" raw_symbol "__negs_mask" freg rd
+  | Pabss (rd) ->
+      need_masks := true;
+      fprintf oc "	andpd	%a, %a\n" raw_symbol "__abss_mask" freg rd
+  | Pcomiss_ff(r1, r2) ->
+      fprintf oc "	comiss	%a, %a\n" freg r2 freg r1
+  | Pxorps_f (rd) ->
+      fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
   (** Branches and calls *)
   | Pjmp_l(l) ->
       fprintf oc "	jmp	%a\n" label (transl_label l)
@@ -785,8 +802,10 @@ let print_instruction oc = function
           assert false
       end
 
-let print_literal oc (lbl, n) =
+let print_literal64 oc (lbl, n) =
   fprintf oc "%a:	.quad	0x%Lx\n" label lbl n
+let print_literal32 oc (lbl, n) =
+  fprintf oc "%a:	.long	0x%lx\n" label lbl n
 
 let print_jumptable oc (lbl, tbl) =
   fprintf oc "%a:" label lbl;
@@ -796,7 +815,7 @@ let print_jumptable oc (lbl, tbl) =
 
 let print_function oc name fn =
   Hashtbl.clear current_function_labels;
-  float_literals := [];
+  float64_literals := []; float32_literals := [];
   jumptables := [];
   current_function_sig := fn.fn_sig;
   let (text, lit, jmptbl) =
@@ -818,11 +837,13 @@ let print_function oc name fn =
     fprintf oc "	.type	%a, @function\n" symbol name;
     fprintf oc "	.size	%a, . - %a\n" symbol name symbol name
   end;
-  if !float_literals <> [] then begin
+  if !float64_literals <> [] || !float32_literals <> [] then begin
     section oc lit;
     print_align oc 8;
-    List.iter (print_literal oc) !float_literals;
-    float_literals := []
+    List.iter (print_literal64 oc) !float64_literals;
+    float64_literals := [];
+    List.iter (print_literal32 oc) !float32_literals;
+    float32_literals := []
   end;
   if !jumptables <> [] then begin
     section oc jmptbl;
@@ -842,11 +863,11 @@ let print_init oc = function
       fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
       fprintf oc "	.long	%ld %s %.18g\n"
-	(camlint_of_coqint (Floats.Float.bits_of_single n))
+	(camlint_of_coqint (Floats.Float32.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_float64 n ->
       fprintf oc "	.quad	%Ld %s %.18g\n"
-	(camlint64_of_coqint (Floats.Float.bits_of_double n))
+	(camlint64_of_coqint (Floats.Float.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_space n ->
       if Z.gt n Z.zero then
@@ -917,7 +938,11 @@ let print_program oc p =
     fprintf oc "%a:	.quad   0x8000000000000000, 0\n"
                raw_symbol "__negd_mask";
     fprintf oc "%a:	.quad   0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n"
-               raw_symbol "__absd_mask"
+               raw_symbol "__absd_mask";
+    fprintf oc "%a:	.long   0x80000000, 0, 0, 0\n"
+               raw_symbol "__negs_mask";
+    fprintf oc "%a:	.long   0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+               raw_symbol "__abss_mask"
   end;
   if target = MacOS then begin
     fprintf oc "	.section __IMPORT,__pointers,non_lazy_symbol_pointers\n";
diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml
index 193779e5..fb9a7cc5 100644
--- a/ia32/PrintOp.ml
+++ b/ia32/PrintOp.ml
@@ -64,6 +64,7 @@ let print_operation reg pp = function
   | Omove, [r1] -> reg pp r1
   | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
   | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n)
+  | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n)
   | Oindirectsymbol id, [] -> fprintf pp "&%s" (extern_atom id)
   | Ocast8signed, [r1] -> fprintf pp "int8signed(%a)" reg r1
   | Ocast8unsigned, [r1] -> fprintf pp "int8unsigned(%a)" reg r1
@@ -100,6 +101,7 @@ let print_operation reg pp = function
   | Omulf, [r1;r2] -> fprintf pp "%a *f %a" reg r1 reg r2
   | Odivf, [r1;r2] -> fprintf pp "%a /f %a" reg r1 reg r2
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
+  | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
   | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp
index 214608e4..b6aef725 100644
--- a/ia32/SelectOp.vp
+++ b/ia32/SelectOp.vp
@@ -346,6 +346,12 @@ Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
 (** ** Comparisons *)
 
 Nondetfunction compimm (default: comparison -> int -> condition)
@@ -405,6 +411,9 @@ Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
 Definition compf (c: comparison) (e1: expr) (e2: expr) :=
   Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
 
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil).
+
 (** ** Integer conversions *)
 
 Nondetfunction cast8unsigned (e: expr) := 
@@ -446,32 +455,50 @@ Nondetfunction cast16signed (e: expr) :=
 (** Floating-point conversions *)
 
 Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
 Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 
 Nondetfunction floatofint (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofint n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
 Definition intuoffloat (e: expr) :=
   Elet e
-    (Elet (Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil)
+    (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
       (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
         (intoffloat (Eletvar 1))
         (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofintu n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
-    let f := Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil in
+    let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
     Elet e
       (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
         (floatofint (Eletvar O))
         (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
   end.
 
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+
+Nondetfunction singleofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
+  | _ => Eop Osingleofint (e ::: Enil)
+  end.
+
+Definition intuofsingle (e: expr) := intuoffloat (floatofsingle e).
+
+Nondetfunction singleofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
+  | _ => singleoffloat (floatofintu e)
+  end.
+
 (** ** Addressing modes *)
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v
index 0fd6f7b8..50688621 100644
--- a/ia32/SelectOpproof.v
+++ b/ia32/SelectOpproof.v
@@ -567,6 +567,31 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+ 
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
@@ -674,6 +699,12 @@ Proof.
   intros; red; intros. unfold compf. TrivialExists.
 Qed.
 
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
   red; intros until x. unfold cast8signed. case (cast8signed_match a); intros; InvEval.
@@ -711,6 +742,11 @@ Proof.
   red; intros. unfold singleoffloat. TrivialExists.
 Qed.
 
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
 Theorem eval_intoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -738,10 +774,10 @@ Theorem eval_intuoffloat:
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros. destruct x; simpl in H0; try discriminate.
-  destruct (Float.intuoffloat f) as [n|] eqn:?; simpl in H0; inv H0.
+  destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
   set (im := Int.repr Int.half_modulus).
-  set (fm := Float.floatofintu im).
+  set (fm := Float.of_intu im).
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
     constructor. auto.
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)).
@@ -751,9 +787,9 @@ Proof.
   eapply eval_Econdition with (va := Float.cmp Clt f fm).
   eauto with evalexpr.
   destruct (Float.cmp Clt f fm) eqn:?.
-  exploit Float.intuoffloat_intoffloat_1; eauto. intro EQ.
+  exploit Float.to_intu_to_int_1; eauto. intro EQ.
   EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.intuoffloat_intoffloat_2; eauto. 
+  exploit Float.to_intu_to_int_2; eauto. 
   change Float.ox8000_0000 with im. fold fm. intro EQ.
   set (t2 := subf (Eletvar (S O)) (Eletvar O)).
   set (t3 := intoffloat t2).
@@ -778,25 +814,75 @@ Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
   InvEval. TrivialExists.
   destruct x; simpl in H0; try discriminate. inv H0.
-  exists (Vfloat (Float.floatofintu i)); split; auto.
+  exists (Vfloat (Float.of_intu i)); split; auto.
   econstructor. eauto. 
-  set (fm := Float.floatofintu Float.ox8000_0000).
+  set (fm := Float.of_intu Float.ox8000_0000).
   assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
     constructor. auto. 
   eapply eval_Econdition with (va := Int.ltu i Float.ox8000_0000).
   eauto with evalexpr.
   destruct (Int.ltu i Float.ox8000_0000) eqn:?.
-  rewrite Float.floatofintu_floatofint_1; auto.
+  rewrite Float.of_intu_of_int_1; auto.
   unfold floatofint. EvalOp. 
   exploit (eval_addimm (Int.neg Float.ox8000_0000) (Vint i :: le) (Eletvar 0)); eauto.
   simpl. intros [v [A B]]. inv B. 
   unfold addf. EvalOp. 
   constructor. unfold floatofint. EvalOp. simpl; eauto. 
   constructor. EvalOp. simpl; eauto. constructor. simpl; eauto. 
-  fold fm. rewrite Float.floatofintu_floatofint_2; auto.
+  fold fm. rewrite Float.of_intu_of_int_2; auto.
   rewrite Int.sub_add_opp. auto.
 Qed.
 
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
+  TrivialExists.
+  TrivialExists. 
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros. destruct x; simpl in H0; try discriminate.
+  destruct (Float32.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
+  unfold intuofsingle. apply eval_intuoffloat with (Vfloat (Float.of_single f)).
+  unfold floatofsingle. EvalOp. 
+  simpl. change (Float.of_single f) with (Float32.to_double f). 
+  erewrite Float32.to_intu_double; eauto. auto. 
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofintu. case (singleofintu_match a); intros.
+  InvEval. TrivialExists.
+  destruct x; simpl in H0; try discriminate. inv H0.
+  exploit eval_floatofintu. eauto. simpl. reflexivity. 
+  intros (v & A & B).
+  exists (Val.singleoffloat v); split.
+  unfold singleoffloat; EvalOp.
+  inv B; simpl. rewrite Float32.of_intu_double. auto. 
+Qed.
+
 Theorem eval_addressing:
   forall le chunk a v b ofs,
   eval_expr ge sp e m le a v ->
diff --git a/ia32/Unusedglob1.ml b/ia32/Unusedglob1.ml
index 8332a30c..eb0298bb 100644
--- a/ia32/Unusedglob1.ml
+++ b/ia32/Unusedglob1.ml
@@ -29,12 +29,14 @@ let referenced_builtin ef =
 
 let referenced_instr = function
   | Pmov_rm (_, a) | Pmov_mr (a, _)
+  | Pmov_rm_a (_, a) | Pmov_mr_a (a, _)
   | Pmovsd_fm (_, a) | Pmovsd_mf(a, _)
-  | Pfld_m a | Pfstp_m a
+  | Pmovss_fm (_, a) | Pmovss_mf(a, _)
+  | Pfldl_m a | Pflds_m a | Pfstpl_m a | Pfstps_m a
   | Pmovb_mr (a, _) | Pmovw_mr (a, _)
   | Pmovzb_rm (_, a) | Pmovsb_rm (_, a)
   | Pmovzw_rm (_, a) | Pmovsw_rm (_, a)
-  | Pcvtss2sd_fm (_, a) | Pcvtsd2ss_mf (a, _) | Plea (_, a) -> referenced_addr a
+  | Plea (_, a) -> referenced_addr a
   | Pjmp_s(s, _) -> [s]
   | Pcall_s(s, _) -> [s]
   | Pbuiltin(ef, args, res) -> referenced_builtin ef
diff --git a/ia32/ValueAOp.v b/ia32/ValueAOp.v
index 58b945f4..874c2be3 100644
--- a/ia32/ValueAOp.v
+++ b/ia32/ValueAOp.v
@@ -32,6 +32,8 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Ccompuimm c n, v1 :: nil => cmpu_bool c v1 (I n)
   | Ccompf c, v1 :: v2 :: nil => cmpf_bool c v1 v2
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
+  | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => maskzero v1 n
   | Cmasknotzero n, v1 :: nil => cnot (maskzero v1 n)
   | _, _ => Bnone
@@ -55,6 +57,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omove, v1::nil => v1
   | Ointconst n, nil => I n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ftop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ftop
   | Oindirectsymbol id, nil => Ptr (Gl id Int.zero)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
   | Ocast8unsigned, v1 :: nil => zero_ext 8 v1
@@ -93,9 +96,18 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubf, v1::v2::nil => subf v1 v2
   | Omulf, v1::v2::nil => mulf v1 v2
   | Odivf, v1::v2::nil => divf v1 v2
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
   | Ofloatofint, v1::nil => floatofint v1
+  | Ointofsingle, v1::nil => intofsingle v1
+  | Osingleofint, v1::nil => singleofint v1
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
   | Ohighlong, v1::nil => hiword v1
@@ -164,6 +176,7 @@ Proof.
   unfold eval_operation, eval_static_operation; intros;
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
   eapply eval_static_addressing_sound; eauto.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto. 
 Qed.
diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v
index e097e855..d1f7acd0 100644
--- a/ia32/standard/Conventions1.v
+++ b/ia32/standard/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -161,13 +162,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -219,25 +220,20 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => AX :: nil
-  | Some Tint => AX :: nil
+  | Some (Tint | Tany32) => AX :: nil
   | Some (Tfloat | Tsingle) => FP0 :: nil
+  | Some Tany64 => X0 :: nil
   | Some Tlong => DX :: AX :: nil
   end.
 
-(*
-(** The result location has the type stated in the signature. *)
+(** The result registers have types compatible with that given in the signature. *)
 
 Lemma loc_result_type:
   forall sig,
-  mreg_type (loc_result sig) =
-  match sig.(sig_res) with None => Tint | Some ty => ty end.
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
 Proof.
-  intros; unfold loc_result.
-  destruct (sig_res sig). 
-  destruct t; reflexivity.
-  reflexivity.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
 Qed.
-*)
 
 (** The result locations are caller-save registers *)
 
@@ -246,9 +242,9 @@ Lemma loc_result_caller_save:
   In r (loc_result s) -> In r destroyed_at_call.
 Proof.
   intros.
-  assert (r = AX \/ r = DX \/ r = FP0).
-    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
-  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
+  assert (r = AX \/ r = DX \/ r = FP0 \/ r = X0).
+    unfold loc_result in H. destruct (sig_res s) as [[]|]; simpl in H; intuition.
+  destruct H0 as [A | [A | [A | A]]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -263,6 +259,8 @@ Fixpoint loc_arguments_rec
   | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2)
   | Tsingle :: tys => S Outgoing ofs Tsingle :: loc_arguments_rec tys (ofs + 1)
   | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2)
+  | Tany32 :: tys => S Outgoing ofs Tany32 :: loc_arguments_rec tys (ofs + 1)
+  | Tany64 :: tys => S Outgoing ofs Tany64 :: loc_arguments_rec tys (ofs + 2)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
@@ -303,21 +301,18 @@ Remark loc_arguments_rec_charact:
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
-  destruct H.
-  destruct a.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l; split; [omega|congruence].
-  destruct H. subst l; split; [omega|congruence].
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H.
+- assert (REC: forall ofs1, In l (loc_arguments_rec tyl ofs1) -> ofs1 > ofs -> 
+               match l with
+               | R _ => False
+               | S Local _ _ => False
+               | S Incoming _ _ => False
+               | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
+               end).
+  { intros. exploit IHtyl; eauto. destruct l; auto. destruct sl; intuition omega
+. }
+  destruct a; simpl in H; repeat (destruct H);
+  ((eapply REC; eauto; omega) || (split; [omega|congruence])).
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -357,16 +352,15 @@ Lemma loc_arguments_bounded:
 Proof.
   intros until ty. unfold loc_arguments, size_arguments. generalize (sig_args s) 0.
   induction l; simpl; intros.
-  destruct H.
-  destruct a.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
-  destruct H. inv H.
+- contradiction.
+- Ltac decomp :=
+  match goal with
+  | [ H: _ \/ _ |- _ ] => destruct H; decomp
+  | [ H: S _ _ _ = S _ _ _ |- _ ] => inv H
+  | _ => idtac
+  end.
+  destruct a; simpl in H; decomp; auto; try apply size_arguments_rec_above.
   simpl typesize. replace (z + 1 + 1) with (z + 2) by omega. apply size_arguments_rec_above.
-  destruct H. inv H. 
-  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. 
-  auto.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
+  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above.
 Qed.
 
-
diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml
index 8d6fd243..dd896367 100644
--- a/lib/Camlcoq.ml
+++ b/lib/Camlcoq.ml
@@ -320,9 +320,14 @@ let coqstring_of_camlstring s =
 (* Floats *)
 
 let coqfloat_of_camlfloat f =
-  Float.double_of_bits(coqint_of_camlint64(Int64.bits_of_float f))
+  Float.of_bits(coqint_of_camlint64(Int64.bits_of_float f))
 let camlfloat_of_coqfloat f =
-  Int64.float_of_bits(camlint64_of_coqint(Float.bits_of_double f))
+  Int64.float_of_bits(camlint64_of_coqint(Float.to_bits f))
+
+let coqfloat32_of_camlfloat f =
+  Float32.of_bits(coqint_of_camlint(Int32.bits_of_float f))
+let camlfloat_of_coqfloat32 f =
+  Int32.float_of_bits(camlint_of_coqint(Float32.to_bits f))
 
 (* Int31 *)
 
diff --git a/lib/Fappli_IEEE_extra.v b/lib/Fappli_IEEE_extra.v
new file mode 100644
index 00000000..5194a644
--- /dev/null
+++ b/lib/Fappli_IEEE_extra.v
@@ -0,0 +1,1506 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Jacques-Henri Jourdan, INRIA Paris-Rocquencourt            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Additional operations and proofs about IEEE-754 binary
+    floating-point numbers, on top of the Flocq library. *)
+
+Require Import Psatz.
+Require Import Bool.
+Require Import Eqdep_dec.
+Require Import Fcore.
+Require Import Fcore_digits.
+Require Import Fcalc_digits.
+Require Import Fcalc_ops.
+Require Import Fcalc_round.
+Require Import Fcalc_bracket.
+Require Import Fprop_Sterbenz.
+Require Import Fappli_IEEE.
+Require Import Fappli_rnd_odd.
+
+Local Open Scope Z_scope.
+
+Section Extra_ops.
+
+(** [prec] is the number of bits of the mantissa including the implicit one.
+    [emax] is the exponent of the infinities.
+    Typically p=24 and emax = 128 in single precision. *)
+
+Variable prec emax : Z.
+Context (prec_gt_0_ : Prec_gt_0 prec).
+Let emin := (3 - emax - prec)%Z.
+Let fexp := FLT_exp emin prec.
+Hypothesis Hmax : (prec < emax)%Z.
+Let binary_float := binary_float prec emax.
+
+(** Remarks on [is_finite] *)
+
+Remark is_finite_not_is_nan:
+  forall (f: binary_float), is_finite _ _ f = true -> is_nan _ _ f = false.
+Proof.
+  destruct f; reflexivity || discriminate.
+Qed.
+
+Remark is_finite_strict_finite:
+  forall (f: binary_float), is_finite_strict _ _ f = true -> is_finite _ _ f = true.
+Proof.
+  destruct f; reflexivity || discriminate.
+Qed.
+
+(** Digression on FP numbers that cannot be [-0.0]. *)
+
+Definition is_finite_pos0 (f: binary_float) : bool :=
+  match f with
+  | B754_zero s => negb s
+  | B754_infinity _ => false
+  | B754_nan _ _ => false
+  | B754_finite _ _ _ _ => true
+  end.
+
+Lemma Bsign_pos0:
+  forall x, is_finite_pos0 x = true -> Bsign _ _ x = Rlt_bool (B2R _ _ x) 0%R.
+Proof.
+  intros. destruct x as [ [] | | | [] ex mx Bx ]; try discriminate; simpl.
+- rewrite Rlt_bool_false; auto. lra.
+- rewrite Rlt_bool_true; auto. apply F2R_lt_0_compat. compute; auto.
+- rewrite Rlt_bool_false; auto. 
+  assert ((F2R (Float radix2 (Z.pos ex) mx) > 0)%R) by 
+    ( apply F2R_gt_0_compat; compute; auto ).
+  lra.
+Qed.
+
+Theorem B2R_inj_pos0:
+  forall x y,
+  is_finite_pos0 x = true -> is_finite_pos0 y = true ->
+  B2R _ _ x = B2R _ _ y ->
+  x = y.
+Proof.
+  intros. apply B2R_Bsign_inj.
+  destruct x; reflexivity||discriminate.
+  destruct y; reflexivity||discriminate.
+  auto.
+  rewrite ! Bsign_pos0 by auto. rewrite H1; auto.
+Qed.
+
+(** ** Decidable equality *)
+
+Definition Beq_dec: forall (f1 f2: binary_float), {f1 = f2} + {f1 <> f2}.
+Proof.
+  assert (UIP_bool: forall (b1 b2: bool) (e e': b1 = b2), e = e').
+  { intros. apply UIP_dec. decide equality. }
+  Ltac try_not_eq := try solve [right; congruence].
+  destruct f1 as [| |? []|], f2 as [| |? []|];
+  try destruct b; try destruct b0;
+  try solve [left; auto]; try_not_eq.
+  destruct (positive_eq_dec x x0); try_not_eq;
+    subst; left; f_equal; f_equal; apply UIP_bool.
+  destruct (positive_eq_dec x x0); try_not_eq;
+    subst; left; f_equal; f_equal; apply UIP_bool.
+  destruct (positive_eq_dec m m0); try_not_eq;
+  destruct (Z_eq_dec e e1); try solve [right; intro H; inversion H; congruence];
+  subst; left; f_equal; apply UIP_bool. 
+  destruct (positive_eq_dec m m0); try_not_eq;
+  destruct (Z_eq_dec e e1); try solve [right; intro H; inversion H; congruence];
+  subst; left; f_equal; apply UIP_bool. 
+Defined.
+
+(** ** Comparison *)
+
+(** [Some c] means ordered as per [c]; [None] means unordered. *)
+
+Definition Bcompare (f1 f2: binary_float): option comparison :=
+  match f1, f2 with
+    | B754_nan _ _,_ | _,B754_nan _ _ => None
+    | B754_infinity true, B754_infinity true
+    | B754_infinity false, B754_infinity false => Some Eq
+    | B754_infinity true, _ => Some Lt
+    | B754_infinity false, _ => Some Gt
+    | _, B754_infinity true => Some Gt
+    | _, B754_infinity false => Some Lt
+    | B754_finite true _ _ _, B754_zero _ => Some Lt
+    | B754_finite false _ _ _, B754_zero _ => Some Gt
+    | B754_zero _, B754_finite true _ _ _ => Some Gt
+    | B754_zero _, B754_finite false _ _ _ => Some Lt
+    | B754_zero _, B754_zero _ => Some Eq
+    | B754_finite s1 m1 e1 _, B754_finite s2 m2 e2 _ =>
+      match s1, s2 with
+        | true, false => Some Lt
+        | false, true => Some Gt
+        | false, false =>
+          match Zcompare e1 e2 with
+            | Lt => Some Lt
+            | Gt => Some Gt
+            | Eq => Some (Pcompare m1 m2 Eq)
+          end
+        | true, true =>
+          match Zcompare e1 e2 with
+            | Lt => Some Gt
+            | Gt => Some Lt
+            | Eq => Some (CompOpp (Pcompare m1 m2 Eq))
+          end
+      end
+  end.
+
+Theorem Bcompare_finite_correct:
+  forall f1 f2,
+  is_finite _ _ f1 = true -> is_finite _ _ f2 = true ->
+  Bcompare f1 f2 = Some (Rcompare (B2R _ _ f1) (B2R _ _ f2)).
+Proof.
+  Ltac apply_Rcompare :=
+    match goal with
+      | [ |- Some Lt = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Lt
+      | [ |- Some Eq = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Eq
+      | [ |- Some Gt = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Gt
+    end.
+  unfold Bcompare; intros.
+  destruct f1, f2; try discriminate; unfold B2R, F2R, Fnum, Fexp, cond_Zopp;
+    try (replace 0%R with (Z2R 0 * bpow radix2 e)%R by (simpl Z2R; ring);
+         rewrite Rcompare_mult_r by (apply bpow_gt_0); rewrite Rcompare_Z2R).
+  apply_Rcompare; reflexivity.
+  destruct b0; reflexivity.
+  destruct b; reflexivity.
+  clear H H0.
+  apply andb_prop in e0; destruct e0; apply (canonic_canonic_mantissa _ _ false) in H.
+  apply andb_prop in e2; destruct e2; apply (canonic_canonic_mantissa _ _ false) in H1.
+  pose proof (Zcompare_spec e e1); unfold canonic, Fexp in H1, H.
+  assert (forall m1 m2 e1 e2,
+    let x := (Z2R (Zpos m1) * bpow radix2 e1)%R in
+    let y := (Z2R (Zpos m2) * bpow radix2 e2)%R in
+    (canonic_exp radix2 fexp x < canonic_exp radix2 fexp y)%Z -> (x < y)%R).
+  {  
+  intros; apply Rnot_le_lt; intro; apply (ln_beta_le radix2) in H5.
+  unfold canonic_exp in H4. apply (fexp_monotone prec emax) in H5.
+  unfold fexp, emin in H4. omega.
+  apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
+  }
+  assert (forall m1 m2 e1 e2, (Z2R (- Zpos m1) * bpow radix2 e1 < Z2R (Zpos m2) * bpow radix2 e2)%R).
+  {
+  intros; apply (Rlt_trans _ 0%R).
+  replace 0%R with (0*bpow radix2 e0)%R by ring; apply Rmult_lt_compat_r;
+    [apply bpow_gt_0; reflexivity|now apply (Z2R_lt _ 0)].
+  apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
+  }
+  destruct b, b0; try (now apply_Rcompare; apply H5); inversion H3;
+    try (apply_Rcompare; apply H4; rewrite H, H1 in H7; assumption);
+    try (apply_Rcompare; do 2 rewrite Z2R_opp, Ropp_mult_distr_l_reverse;
+      apply Ropp_lt_contravar; apply H4; rewrite H, H1 in H7; assumption);
+    rewrite H7, Rcompare_mult_r, Rcompare_Z2R by (apply bpow_gt_0); reflexivity.
+Qed.
+
+Theorem Bcompare_swap:
+  forall x y,
+  Bcompare y x = match Bcompare x y with Some c => Some (CompOpp c) | None => None end.
+Proof.
+  intros. 
+  destruct x as [ ? | [] | ? ? | [] mx ex Bx ];
+  destruct y as [ ? | [] | ? ? | [] my ey By ]; simpl; auto.
+- rewrite <- (Zcompare_antisym ex ey). destruct (ex ?= ey)%Z; auto.
+  simpl. f_equal; f_equal. symmetry. apply Pcompare_antisym. 
+- rewrite <- (Zcompare_antisym ex ey). destruct (ex ?= ey)%Z; auto. 
+  simpl. f_equal. symmetry. apply Pcompare_antisym.
+Qed.
+
+(** ** Absolute value *)
+
+Definition Babs abs_nan (x: binary_float) : binary_float :=
+  match x with
+  | B754_nan sx plx =>
+      let '(sres, plres) := abs_nan sx plx in B754_nan _ _ sres plres
+  | B754_infinity sx => B754_infinity _ _ false
+  | B754_finite sx mx ex Hx => B754_finite _ _ false mx ex Hx
+  | B754_zero sx => B754_zero _ _ false
+  end.
+
+Theorem B2R_Babs :
+  forall abs_nan x,
+  B2R _ _ (Babs abs_nan x) = Rabs (B2R _ _ x).
+Proof.
+  intros abs_nan [sx|sx|sx plx|sx mx ex Hx]; apply sym_eq ; try apply Rabs_R0.
+  simpl. destruct abs_nan. simpl. apply Rabs_R0.
+  simpl. rewrite <- F2R_abs. destruct sx; auto. 
+Qed.
+
+Theorem is_finite_Babs :
+  forall abs_nan x,
+  is_finite _ _ (Babs abs_nan x) = is_finite _ _ x.
+Proof.
+  intros abs_nan [| | |] ; try easy.
+  intros s pl.
+  simpl.
+  now case abs_nan.
+Qed.
+
+Theorem sign_Babs:
+  forall abs_nan x,
+  is_nan _ _ x = false ->
+  Bsign _ _ (Babs abs_nan x) = false.
+Proof.
+  intros abs_nan [| | |]; reflexivity || discriminate.
+Qed.
+
+Theorem Babs_idempotent :
+  forall abs_nan (x: binary_float),
+  is_nan _ _ x = false ->
+  Babs abs_nan (Babs abs_nan x) = Babs abs_nan x.
+Proof.
+  now intros abs_nan [sx|sx|sx plx|sx mx ex Hx] ; auto.
+Qed.
+
+Theorem Babs_opp:
+  forall abs_nan opp_nan x,
+  is_nan _ _ x = false ->
+  Babs abs_nan (Bopp _ _ opp_nan x) = Babs abs_nan x.
+Proof.
+  intros abs_nan opp_nan [| | |]; reflexivity || discriminate.
+Qed.
+
+(** ** Conversion from an integer to a FP number *)
+
+(** Integers that can be represented exactly as FP numbers. *)
+
+Definition integer_representable (n: Z): Prop :=
+  Z.abs n <= 2^emax - 2^(emax - prec) /\ generic_format radix2 fexp (Z2R n).
+
+Let int_upper_bound_eq: 2^emax - 2^(emax - prec) = (2^prec - 1) * 2^(emax - prec).
+Proof.
+  red in prec_gt_0_.
+  ring_simplify. rewrite <- (Zpower_plus radix2) by omega. f_equal. f_equal. omega.
+Qed.
+
+Lemma integer_representable_n2p:
+  forall n p,
+  -2^prec < n < 2^prec -> 0 <= p -> p <= emax - prec ->
+  integer_representable (n * 2^p).
+Proof.
+  intros; split. 
+- red in prec_gt_0_. replace (Z.abs (n * 2^p)) with (Z.abs n * 2^p).
+  rewrite int_upper_bound_eq.
+  apply Zmult_le_compat. zify; omega. apply (Zpower_le radix2); omega. 
+  zify; omega. apply (Zpower_ge_0 radix2). 
+  rewrite Z.abs_mul. f_equal. rewrite Z.abs_eq. auto. apply (Zpower_ge_0 radix2). 
+- apply generic_format_FLT. exists (Float radix2 n p).
+  unfold F2R; simpl.
+  split. rewrite <- Z2R_Zpower by auto. apply Z2R_mult. 
+  split. zify; omega. 
+  unfold emin; red in prec_gt_0_; omega.
+Qed.
+
+Lemma integer_representable_2p:
+  forall p,
+  0 <= p <= emax - 1 ->
+  integer_representable (2^p).
+Proof.
+  intros; split. 
+- red in prec_gt_0_. 
+  rewrite Z.abs_eq by (apply (Zpower_ge_0 radix2)). 
+  apply Zle_trans with (2^(emax-1)). 
+  apply (Zpower_le radix2); omega.
+  assert (2^emax = 2^(emax-1)*2).
+  { change 2 with (2^1) at 3. rewrite <- (Zpower_plus radix2) by omega. 
+    f_equal. omega. }
+  assert (2^(emax - prec) <= 2^(emax - 1)).
+  { apply (Zpower_le radix2). omega. }
+  omega.
+- red in prec_gt_0_.
+  apply generic_format_FLT. exists (Float radix2 1 p).
+  unfold F2R; simpl.
+  split. rewrite Rmult_1_l. rewrite <- Z2R_Zpower. auto. omega. 
+  split. change 1 with (2^0). apply (Zpower_lt radix2). omega. auto.
+  unfold emin; omega.
+Qed.
+
+Lemma integer_representable_opp:
+  forall n, integer_representable n -> integer_representable (-n).
+Proof.
+  intros n (A & B); split. rewrite Z.abs_opp. auto. 
+  rewrite Z2R_opp. apply generic_format_opp; auto.
+Qed.
+
+Lemma integer_representable_n2p_wide:
+  forall n p,
+  -2^prec <= n <= 2^prec -> 0 <= p -> p < emax - prec ->
+  integer_representable (n * 2^p).
+Proof.
+  intros. red in prec_gt_0_.
+  destruct (Z.eq_dec n (2^prec)); [idtac | destruct (Z.eq_dec n (-2^prec))].
+- rewrite e. rewrite <- (Zpower_plus radix2) by omega.
+  apply integer_representable_2p. omega.  
+- rewrite e. rewrite <- Zopp_mult_distr_l. apply integer_representable_opp. 
+  rewrite <- (Zpower_plus radix2) by omega.
+  apply integer_representable_2p. omega.  
+- apply integer_representable_n2p; omega.
+Qed.
+
+Lemma integer_representable_n:
+  forall n, -2^prec <= n <= 2^prec -> integer_representable n.
+Proof.
+  red in prec_gt_0_. intros.
+  replace n with (n * 2^0) by (change (2^0) with 1; ring).
+  apply integer_representable_n2p_wide. auto. omega. omega. 
+Qed.
+
+Lemma round_int_no_overflow:
+  forall n,
+  Z.abs n <= 2^emax - 2^(emax-prec) ->
+  (Rabs (round radix2 fexp (round_mode mode_NE) (Z2R n)) < bpow radix2 emax)%R.
+Proof.
+  intros. red in prec_gt_0_.
+  rewrite <- round_NE_abs. 
+  apply Rle_lt_trans with (Z2R (2^emax - 2^(emax-prec))).
+  apply round_le_generic. apply fexp_correct; auto. apply valid_rnd_N.
+  apply generic_format_FLT. exists (Float radix2 (2^prec-1) (emax-prec)).
+  rewrite int_upper_bound_eq. unfold F2R; simpl.
+  split. rewrite <- Z2R_Zpower by omega. rewrite <- Z2R_mult. auto. 
+  split. assert (0 < 2^prec) by (apply (Zpower_gt_0 radix2); omega). zify; omega.
+  unfold emin; omega.
+  rewrite <- Z2R_abs. apply Z2R_le. auto.
+  rewrite <- Z2R_Zpower by omega. apply Z2R_lt. simpl. 
+  assert (0 < 2^(emax-prec)) by (apply (Zpower_gt_0 radix2); omega).
+  omega.
+  apply fexp_correct. auto. 
+Qed.
+
+(** Conversion from an integer.  Round to nearest. *)
+
+Definition BofZ (n: Z) : binary_float :=
+  binary_normalize prec emax prec_gt_0_ Hmax mode_NE n 0 false.
+
+Theorem BofZ_correct:
+  forall n,
+  if Rlt_bool (Rabs (round radix2 fexp (round_mode mode_NE) (Z2R n))) (bpow radix2 emax)
+  then
+    B2R prec emax (BofZ n) = round radix2 fexp (round_mode mode_NE) (Z2R n) /\
+    is_finite _ _ (BofZ n) = true /\
+    Bsign prec emax (BofZ n) = Zlt_bool n 0
+  else
+    B2FF prec emax (BofZ n) = binary_overflow prec emax mode_NE (Zlt_bool n 0).
+Proof.
+  intros. 
+  generalize (binary_normalize_correct prec emax prec_gt_0_ Hmax mode_NE n 0 false).
+  fold emin; fold fexp; fold (BofZ n). 
+  replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n). 
+  destruct Rlt_bool. 
+- intros (A & B & C). split; [|split].
+  + auto.
+  + auto.
+  + rewrite C. change 0%R with (Z2R 0). rewrite Rcompare_Z2R. 
+    unfold Zlt_bool. auto.
+- intros A; rewrite A. f_equal. change 0%R with (Z2R 0). 
+  generalize (Zlt_bool_spec n 0); intros SPEC; inversion SPEC.
+  apply Rlt_bool_true; apply Z2R_lt; auto.
+  apply Rlt_bool_false; apply Z2R_le; auto.
+- unfold F2R; simpl. ring.
+Qed.
+
+Theorem BofZ_finite:
+  forall n,
+  Z.abs n <= 2^emax - 2^(emax-prec) ->
+  B2R _ _ (BofZ n) = round radix2 fexp (round_mode mode_NE) (Z2R n)
+  /\ is_finite _ _ (BofZ n) = true
+  /\ Bsign _ _ (BofZ n) = Zlt_bool n 0%Z.
+Proof.
+  intros.
+  generalize (BofZ_correct n). rewrite Rlt_bool_true. auto. 
+  apply round_int_no_overflow; auto.
+Qed.
+
+Theorem BofZ_representable:
+  forall n,
+  integer_representable n ->
+  B2R _ _ (BofZ n) = Z2R n
+  /\ is_finite _ _ (BofZ n) = true
+  /\ Bsign _ _ (BofZ n) = (n <? 0).
+Proof.
+  intros. destruct H as (P & Q). destruct (BofZ_finite n) as (A & B & C). auto.
+  intuition. rewrite A. apply round_generic. apply valid_rnd_round_mode. auto. 
+Qed.
+
+Theorem BofZ_exact:
+  forall n,
+  -2^prec <= n <= 2^prec ->
+  B2R _ _ (BofZ n) = Z2R n
+  /\ is_finite _ _ (BofZ n) = true
+  /\ Bsign _ _ (BofZ n) = Zlt_bool n 0%Z.
+Proof.
+  intros. apply BofZ_representable. apply integer_representable_n; auto.
+Qed.
+
+Lemma BofZ_finite_pos0:
+  forall n,
+  Z.abs n <= 2^emax - 2^(emax-prec) -> is_finite_pos0 (BofZ n) = true.
+Proof.
+  intros. 
+  generalize (binary_normalize_correct prec emax prec_gt_0_ Hmax mode_NE n 0 false).
+  fold emin; fold fexp; fold (BofZ n). 
+  replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n) by
+    (unfold F2R; simpl; ring).
+  rewrite Rlt_bool_true by (apply round_int_no_overflow; auto).
+  intros (A & B & C).
+  destruct (BofZ n); auto; try discriminate. 
+  simpl in *. rewrite C. change 0%R with (Z2R 0). rewrite Rcompare_Z2R. 
+  generalize (Zcompare_spec n 0); intros SPEC; inversion SPEC; auto.
+  assert ((round radix2 fexp ZnearestE (Z2R n) <= -1)%R).
+  { change (-1)%R with (Z2R (-1)).
+    apply round_le_generic. apply fexp_correct. auto. apply valid_rnd_N.
+    apply (integer_representable_opp 1). 
+    apply (integer_representable_2p 0). 
+    red in prec_gt_0_; omega.
+    apply Z2R_le; omega.
+  }
+  lra.
+Qed.
+
+Lemma BofZ_finite_equal:
+  forall x y,
+  Z.abs x <= 2^emax - 2^(emax-prec) ->
+  Z.abs y <= 2^emax - 2^(emax-prec) ->
+  B2R _ _ (BofZ x) = B2R _ _ (BofZ y) ->
+  BofZ x = BofZ y.
+Proof.
+  intros. apply B2R_inj_pos0; auto; apply BofZ_finite_pos0; auto.
+Qed.
+
+(** Commutation properties with addition, subtraction, multiplication. *)
+
+Theorem BofZ_plus:
+  forall nan p q,  
+  integer_representable p -> integer_representable q ->
+  Bplus _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p + q).
+Proof.
+  intros. 
+  destruct (BofZ_representable p) as (A & B & C); auto. 
+  destruct (BofZ_representable q) as (D & E & F); auto.
+  generalize (Bplus_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) B E).
+  fold emin; fold fexp.
+  rewrite A, D. rewrite <- Z2R_plus. 
+  generalize (BofZ_correct (p + q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+  apply B2R_Bsign_inj; auto.
+  rewrite P, U; auto.
+  rewrite R, W, C, F.
+  change 0%R with (Z2R 0). rewrite Rcompare_Z2R. unfold Zlt_bool at 3.
+  generalize (Zcompare_spec (p + q) 0); intros SPEC; inversion SPEC; auto.
+  assert (EITHER: 0 <= p \/ 0 <= q) by omega. 
+  destruct EITHER; [apply andb_false_intro1 | apply andb_false_intro2];
+  apply Zlt_bool_false; auto.
+- intros P (U & V). 
+  apply B2FF_inj. 
+  rewrite P, U, C. f_equal. rewrite C, F in V. 
+  generalize (Zlt_bool_spec p 0) (Zlt_bool_spec q 0). rewrite <- V. 
+  intros SPEC1 SPEC2; inversion SPEC1; inversion SPEC2; try congruence; symmetry.
+  apply Zlt_bool_true; omega.
+  apply Zlt_bool_false; omega.
+Qed.
+
+Theorem BofZ_minus:
+  forall nan p q,  
+  integer_representable p -> integer_representable q ->
+  Bminus _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p - q).
+Proof.
+  intros. 
+  destruct (BofZ_representable p) as (A & B & C); auto. 
+  destruct (BofZ_representable q) as (D & E & F); auto.
+  generalize (Bminus_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) B E).
+  fold emin; fold fexp.
+  rewrite A, D. rewrite <- Z2R_minus. 
+  generalize (BofZ_correct (p - q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+  apply B2R_Bsign_inj; auto.
+  rewrite P, U; auto.
+  rewrite R, W, C, F.
+  change 0%R with (Z2R 0). rewrite Rcompare_Z2R. unfold Zlt_bool at 3.
+  generalize (Zcompare_spec (p - q) 0); intros SPEC; inversion SPEC; auto.
+  assert (EITHER: 0 <= p \/ q < 0) by omega. 
+  destruct EITHER; [apply andb_false_intro1 | apply andb_false_intro2].
+  rewrite Zlt_bool_false; auto.
+  rewrite Zlt_bool_true; auto.
+- intros P (U & V). 
+  apply B2FF_inj. 
+  rewrite P, U, C. f_equal. rewrite C, F in V. 
+  generalize (Zlt_bool_spec p 0) (Zlt_bool_spec q 0). rewrite V. 
+  intros SPEC1 SPEC2; inversion SPEC1; inversion SPEC2; symmetry.
+  rewrite <- H3 in H1; discriminate.
+  apply Zlt_bool_true; omega.
+  apply Zlt_bool_false; omega.
+  rewrite <- H3 in H1; discriminate.
+Qed.
+
+Theorem BofZ_mult:
+  forall nan p q,  
+  integer_representable p -> integer_representable q ->
+  0 < q ->
+  Bmult _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p * q).
+Proof.
+  intros. 
+  assert (SIGN: xorb (p <? 0) (q <? 0) = (p * q <? 0)).
+  {
+    rewrite (Zlt_bool_false q) by omega.
+    generalize (Zlt_bool_spec p 0); intros SPEC; inversion SPEC; simpl; symmetry.
+    apply Zlt_bool_true. rewrite Z.mul_comm. apply Z.mul_pos_neg; omega.
+    apply Zlt_bool_false. apply Zsame_sign_imp; omega. 
+  }
+  destruct (BofZ_representable p) as (A & B & C); auto. 
+  destruct (BofZ_representable q) as (D & E & F); auto.
+  generalize (Bmult_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q)).
+  fold emin; fold fexp.
+  rewrite A, B, C, D, E, F. rewrite <- Z2R_mult.
+  generalize (BofZ_correct (p * q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+  apply B2R_Bsign_inj; auto.
+  rewrite P, U; auto.
+  rewrite R, W; auto.
+  apply is_finite_not_is_nan; auto.
+- intros P U.
+  apply B2FF_inj. rewrite P, U. f_equal. auto.
+Qed.
+
+Theorem BofZ_mult_2p:
+  forall nan x p,
+  Z.abs x <= 2^emax - 2^(emax-prec) ->
+  2^prec <= Z.abs x ->
+  0 <= p <= emax - 1 ->
+  Bmult _ _ _ Hmax nan mode_NE (BofZ x) (BofZ (2^p)) = BofZ (x * 2^p).
+Proof.
+  intros.
+  destruct (Z.eq_dec x 0). 
+- subst x. apply BofZ_mult.
+    apply integer_representable_n. 
+    generalize (Zpower_ge_0 radix2 prec). simpl; omega.
+    apply integer_representable_2p. auto. 
+    apply (Zpower_gt_0 radix2).
+    omega.
+- assert (Z2R x <> 0%R) by (apply (Z2R_neq _ _ n)).
+  destruct (BofZ_finite x H) as (A & B & C).
+  destruct (BofZ_representable (2^p)) as (D & E & F). 
+    apply integer_representable_2p. auto.
+  assert (canonic_exp radix2 fexp (Z2R (x * 2^p)) =
+          canonic_exp radix2 fexp (Z2R x) + p).
+  {
+    unfold canonic_exp, fexp. rewrite Z2R_mult. 
+    change (2^p) with (radix2^p). rewrite Z2R_Zpower by omega. 
+    rewrite ln_beta_mult_bpow by auto.
+    assert (prec + 1 <= ln_beta radix2 (Z2R x)).
+    { rewrite <- (ln_beta_abs radix2 (Z2R x)). 
+      rewrite <- (ln_beta_bpow radix2 prec). 
+      apply ln_beta_le. 
+      apply bpow_gt_0. rewrite <- Z2R_Zpower by (red in prec_gt_0_;omega).
+      rewrite <- Z2R_abs. apply Z2R_le; auto. }
+    unfold FLT_exp.
+    unfold emin; red in prec_gt_0_; zify; omega.
+  }
+  assert (forall m, round radix2 fexp m (Z2R x) * Z2R (2^p) =
+                    round radix2 fexp m (Z2R (x * 2^p)))%R.
+  {
+    intros. unfold round, scaled_mantissa. rewrite H3. 
+    rewrite Z2R_mult. rewrite Z.opp_add_distr. rewrite bpow_plus. 
+    set (a := Z2R x); set (b := bpow radix2 (- canonic_exp radix2 fexp a)).
+    replace (a * Z2R (2^p) * (b * bpow radix2 (-p)))%R with (a * b)%R.
+    unfold F2R; simpl. rewrite Rmult_assoc. f_equal. 
+    rewrite bpow_plus.  f_equal. apply (Z2R_Zpower radix2). omega. 
+    transitivity ((a * b) * (Z2R (2^p) * bpow radix2 (-p)))%R.
+    rewrite (Z2R_Zpower radix2). rewrite <- bpow_plus.  
+    replace (p + -p) with 0 by omega. change (bpow radix2 0) with 1%R. ring.
+    omega. 
+    ring.
+  }
+  assert (forall m x,
+    round radix2 fexp (round_mode m) (round radix2 fexp (round_mode m) x) =
+    round radix2 fexp (round_mode m) x).
+  {
+    intros. apply round_generic. apply valid_rnd_round_mode. 
+    apply generic_format_round.  apply fexp_correct; auto. 
+    apply valid_rnd_round_mode. 
+  }
+  assert (xorb (x <? 0) (2^p <? 0) = (x * 2^p <? 0)).
+  {
+    assert (0 < 2^p) by (apply (Zpower_gt_0 radix2); omega).
+    rewrite (Zlt_bool_false (2^p)) by omega. rewrite xorb_false_r.
+    symmetry. generalize (Zlt_bool_spec x 0); intros SPEC; inversion SPEC.
+    apply Zlt_bool_true. apply Z.mul_neg_pos; auto.
+    apply Zlt_bool_false. apply Z.mul_nonneg_nonneg; omega.
+  }
+  generalize (Bmult_correct _ _ _ Hmax nan mode_NE (BofZ x) (BofZ (2^p)))
+             (BofZ_correct (x * 2^p)).
+  fold emin; fold fexp. rewrite A, B, C, D, E, F, H4, H5.
+  destruct Rlt_bool.
++ intros (P & Q & R) (U & V & W).
+  apply B2R_Bsign_inj; auto.
+  rewrite P, U. auto.
+  rewrite R, W. auto.
+  apply is_finite_not_is_nan; auto.
++ intros P U. 
+  apply B2FF_inj. rewrite P, U. f_equal; auto.
+Qed.
+
+(** Rounding to odd the argument of [BofZ]. *)
+
+Lemma round_odd_flt:
+  forall prec' emin' x choice,
+  prec > 1 -> prec' > 1 -> prec' >= prec + 2 -> emin' <= emin - 2 ->
+  round radix2 fexp (Znearest choice) (round radix2 (FLT_exp emin' prec') Zrnd_odd x) =
+  round radix2 fexp (Znearest choice) x.
+Proof.
+  intros. apply round_odd_prop. auto. apply fexp_correct; auto.
+  apply exists_NE_FLT. right; omega. 
+  apply FLT_exp_valid. red; omega. 
+  apply exists_NE_FLT. right; omega.
+  unfold fexp, FLT_exp; intros. zify; omega.
+Qed.
+
+Corollary round_odd_fix:
+  forall x p choice,
+  prec > 1 ->
+  0 <= p ->
+  (bpow radix2 (prec + p + 1) <= Rabs x)%R ->
+  round radix2 fexp (Znearest choice) (round radix2 (FIX_exp p) Zrnd_odd x) =
+  round radix2 fexp (Znearest choice) x.
+Proof.
+  intros. destruct (Req_EM_T x 0%R). 
+- subst x. rewrite round_0. auto. apply valid_rnd_odd. 
+- set (prec' := ln_beta radix2 x - p).
+  set (emin' := emin - 2).
+  assert (PREC: ln_beta radix2 (bpow radix2 (prec + p + 1)) <= ln_beta radix2 x).
+  { rewrite <- (ln_beta_abs radix2 x).
+    apply ln_beta_le; auto. apply bpow_gt_0. }
+  rewrite ln_beta_bpow in PREC. 
+  assert (CANON: canonic_exp radix2 (FLT_exp emin' prec') x =
+                 canonic_exp radix2 (FIX_exp p) x).
+  {
+    unfold canonic_exp, FLT_exp, FIX_exp.
+    replace (ln_beta radix2 x - prec') with p by (unfold prec'; omega).
+    apply Z.max_l. unfold emin', emin. red in prec_gt_0_; omega. 
+  }
+  assert (RND: round radix2 (FIX_exp p) Zrnd_odd x =
+               round radix2 (FLT_exp emin' prec') Zrnd_odd x).
+  {
+    unfold round, scaled_mantissa. rewrite CANON. auto. 
+  }
+  rewrite RND. 
+  apply round_odd_flt. auto. 
+  unfold prec'. red in prec_gt_0_; omega. 
+  unfold prec'. omega.
+  unfold emin'. omega.
+Qed.
+
+Definition int_round_odd (x: Z) (p: Z) :=
+  (if Z.eqb (x mod 2^p) 0 || Z.odd (x / 2^p) then x / 2^p else x / 2^p + 1) * 2^p.
+
+Lemma Zrnd_odd_int:
+  forall n p, 0 <= p ->
+  Zrnd_odd (Z2R n * bpow radix2 (-p)) * 2^p =
+  int_round_odd n p.
+Proof.
+  intros. 
+  assert (0 < 2^p) by (apply (Zpower_gt_0 radix2); omega).
+  assert (n = (n / 2^p) * 2^p + n mod 2^p) by (rewrite Zmult_comm; apply Z.div_mod; omega).
+  assert (0 <= n mod 2^p < 2^p) by (apply Z_mod_lt; omega).
+  unfold int_round_odd. set (q := n / 2^p) in *; set (r := n mod 2^p) in *.
+  f_equal.
+  pose proof (bpow_gt_0 radix2 (-p)).
+  assert (bpow radix2 p * bpow radix2 (-p) = 1)%R.
+  { rewrite <- bpow_plus. replace (p + -p) with 0 by omega. auto. }
+  assert (Z2R n * bpow radix2 (-p) = Z2R q + Z2R r * bpow radix2 (-p))%R.
+  { rewrite H1. rewrite Z2R_plus, Z2R_mult. 
+    change (Z2R (2^p)) with (Z2R (radix2^p)).
+    rewrite Z2R_Zpower by omega. ring_simplify.
+    rewrite Rmult_assoc. rewrite H4. ring. }
+  assert (0 <= Z2R r < bpow radix2 p)%R.
+  { split. change 0%R with (Z2R 0). apply Z2R_le; omega. 
+    rewrite <- Z2R_Zpower by omega. apply Z2R_lt; tauto. }
+  assert (0 <= Z2R r * bpow radix2 (-p) < 1)%R.
+  { generalize (bpow_gt_0 radix2 (-p)). intros. 
+    split. apply Rmult_le_pos; lra. 
+    rewrite <- H4. apply Rmult_lt_compat_r. auto. tauto. }
+  assert (Zfloor (Z2R n * bpow radix2 (-p)) = q).
+  { apply Zfloor_imp. rewrite H5. rewrite Z2R_plus. change (Z2R 1) with 1%R. lra. }
+  unfold Zrnd_odd. destruct Req_EM_T.
+- assert (Z2R r * bpow radix2 (-p) = 0)%R.
+  { rewrite H8 in e. rewrite e in H5. lra. }
+  apply Rmult_integral in H9. destruct H9; [ | lra ].
+  apply (eq_Z2R r 0) in H9. apply <- Z.eqb_eq in H9. rewrite H9. assumption.
+- assert (Z2R r * bpow radix2 (-p) <> 0)%R.
+  { rewrite H8 in n0. lra. }
+  destruct (Z.eqb r 0) eqn:RZ.
+  apply Z.eqb_eq in RZ. rewrite RZ in H9. change (Z2R 0) with 0%R in H9.
+  rewrite Rmult_0_l in H9. congruence.
+  rewrite Zceil_floor_neq by lra. rewrite H8. 
+  change Zeven with Z.even. rewrite Zodd_even_bool. destruct (Z.even q); auto.
+Qed.
+
+Lemma int_round_odd_le:
+  forall p x y, 0 <= p ->
+  x <= y -> int_round_odd x p <= int_round_odd y p.
+Proof.
+  intros. 
+  assert (Zrnd_odd (Z2R x * bpow radix2 (-p)) <= Zrnd_odd (Z2R y * bpow radix2 (-p))).
+  { apply Zrnd_le. apply valid_rnd_odd. apply Rmult_le_compat_r. apply bpow_ge_0. 
+    apply Z2R_le; auto. }
+  rewrite <- ! Zrnd_odd_int by auto. 
+  apply Zmult_le_compat_r. auto. apply (Zpower_ge_0 radix2). 
+Qed.
+
+Lemma int_round_odd_exact:
+  forall p x, 0 <= p ->
+  (2^p | x) -> int_round_odd x p = x.
+Proof.
+  intros. unfold int_round_odd. apply Znumtheory.Zdivide_mod in H0.
+  rewrite H0. simpl. rewrite Zmult_comm. symmetry. apply Z_div_exact_2.
+  apply Zlt_gt. apply (Zpower_gt_0 radix2). auto. auto. 
+Qed.
+
+Theorem BofZ_round_odd:
+  forall x p,
+  prec > 1 ->
+  Z.abs x <= 2^emax - 2^(emax-prec) ->
+  0 <= p <= emax - prec ->
+  2^(prec + p + 1) <= Z.abs x ->
+  BofZ x = BofZ (int_round_odd x p).
+Proof.
+  intros x p PREC XRANGE PRANGE XGE.
+  assert (DIV: (2^p | 2^emax - 2^(emax - prec))).
+  { rewrite int_upper_bound_eq. apply Z.divide_mul_r.
+    exists (2^(emax - prec - p)). red in prec_gt_0_. 
+    rewrite <- (Zpower_plus radix2) by omega. f_equal; omega. }
+  assert (YRANGE: Z.abs (int_round_odd x p) <= 2^emax - 2^(emax-prec)).
+  { apply Z.abs_le. split.
+    replace (-(2^emax - 2^(emax-prec))) with (int_round_odd (-(2^emax - 2^(emax-prec))) p).
+    apply int_round_odd_le; zify; omega.
+    apply int_round_odd_exact. omega. apply Z.divide_opp_r. auto. 
+    replace (2^emax - 2^(emax-prec)) with (int_round_odd (2^emax - 2^(emax-prec)) p).
+    apply int_round_odd_le; zify; omega.
+    apply int_round_odd_exact. omega. auto. } 
+  destruct (BofZ_finite x XRANGE) as (X1 & X2 & X3).
+  destruct (BofZ_finite (int_round_odd x p) YRANGE) as (Y1 & Y2 & Y3).
+  apply BofZ_finite_equal; auto.
+  rewrite X1, Y1.
+  assert (Z2R (int_round_odd x p) = round radix2 (FIX_exp p) Zrnd_odd (Z2R x)).
+  {
+     unfold round, scaled_mantissa, canonic_exp, FIX_exp.
+     rewrite <- Zrnd_odd_int by omega.
+     unfold F2R; simpl. rewrite Z2R_mult. f_equal. apply (Z2R_Zpower radix2). omega.
+  }
+  rewrite H. symmetry. apply round_odd_fix. auto. omega. 
+  rewrite <- Z2R_Zpower. rewrite <- Z2R_abs. apply Z2R_le; auto. 
+  red in prec_gt_0_; omega.
+Qed.
+
+Lemma int_round_odd_shifts:
+  forall x p, 0 <= p ->
+  int_round_odd x p =
+  Z.shiftl (if Z.eqb (x mod 2^p) 0 then Z.shiftr x p else Z.lor (Z.shiftr x p) 1) p.
+Proof.
+  intros.
+  unfold int_round_odd. rewrite Z.shiftl_mul_pow2 by auto. f_equal.
+  rewrite Z.shiftr_div_pow2 by auto. 
+  destruct (x mod 2^p =? 0) eqn:E. auto. 
+  assert (forall n, (if Z.odd n then n else n + 1) = Z.lor n 1).
+  { destruct n; simpl; auto.
+    destruct p0; auto. 
+    destruct p0; auto. induction p0; auto. }
+  simpl. apply H0. 
+Qed.
+
+Lemma int_round_odd_bits:
+  forall x y p, 0 <= p ->
+  (forall i, 0 <= i < p -> Z.testbit y i = false) ->
+  Z.testbit y p = (if Z.eqb (x mod 2^p) 0 then Z.testbit x p else true) ->
+  (forall i, p < i -> Z.testbit y i = Z.testbit x i) ->
+  int_round_odd x p = y.
+Proof.
+  intros until p; intros PPOS BELOW AT ABOVE. 
+  rewrite int_round_odd_shifts by auto. 
+  apply Z.bits_inj'. intros. 
+  generalize (Zcompare_spec n p); intros SPEC; inversion SPEC.
+- rewrite BELOW by auto. apply Z.shiftl_spec_low; auto. 
+- subst n. rewrite AT. rewrite Z.shiftl_spec_high by omega.
+  replace (p - p) with 0 by omega.
+  destruct (x mod 2^p =? 0).
+  + rewrite Z.shiftr_spec by omega. f_equal; omega. 
+  + rewrite Z.lor_spec. apply orb_true_r. 
+- rewrite ABOVE by auto.  rewrite Z.shiftl_spec_high by omega.
+  destruct (x mod 2^p =? 0).
+  rewrite Z.shiftr_spec by omega. f_equal; omega.
+  rewrite Z.lor_spec, Z.shiftr_spec by omega. 
+  change 1 with (Z.ones 1). rewrite Z.ones_spec_high by omega. rewrite orb_false_r.
+  f_equal; omega.
+Qed.
+
+(** ** Conversion from a FP number to an integer *)
+
+(** Always rounds toward zero. *)
+
+Definition ZofB (f: binary_float): option Z :=
+  match f with
+    | B754_finite s m (Zpos e) _ => Some (cond_Zopp s (Zpos m) * Zpower_pos radix2 e)%Z
+    | B754_finite s m 0 _ => Some (cond_Zopp s (Zpos m))
+    | B754_finite s m (Zneg e) _ => Some (cond_Zopp s (Zpos m / Zpower_pos radix2 e))%Z
+    | B754_zero _ => Some 0%Z
+    | _ => None
+  end.
+
+Theorem ZofB_correct:
+  forall f,
+  ZofB f = if is_finite _ _ f then Some (Ztrunc (B2R _ _ f)) else None.
+Proof.
+  destruct f; simpl; auto.
+- f_equal. symmetry. apply (Ztrunc_Z2R 0). 
+- destruct e; f_equal. 
+  + unfold F2R; simpl. rewrite Rmult_1_r. rewrite Ztrunc_Z2R. auto.
+  + unfold F2R; simpl. rewrite <- Z2R_mult. rewrite Ztrunc_Z2R. auto.
+  + unfold F2R; simpl. rewrite Z2R_cond_Zopp. rewrite <- cond_Ropp_mult_l. 
+    assert (EQ: forall x, Ztrunc (cond_Ropp b x) = cond_Zopp b (Ztrunc x)).
+    {
+      intros. destruct b; simpl; auto. apply Ztrunc_opp.
+    }
+    rewrite EQ. f_equal. 
+    generalize (Zpower_pos_gt_0 2 p (refl_equal _)); intros.
+    rewrite Ztrunc_floor. symmetry. apply Zfloor_div. omega. 
+    apply Rmult_le_pos. apply (Z2R_le 0). compute; congruence.
+    apply Rlt_le. apply Rinv_0_lt_compat. apply (Z2R_lt 0). auto.
+Qed.
+
+(** Interval properties. *)
+
+Remark Ztrunc_range_pos:
+  forall x, 0 < Ztrunc x -> (Z2R (Ztrunc x) <= x < Z2R (Ztrunc x + 1)%Z)%R.
+Proof.
+  intros.  
+  rewrite Ztrunc_floor. split. apply Zfloor_lb. rewrite Z2R_plus. apply Zfloor_ub.
+  generalize (Rle_bool_spec 0%R x). intros RLE; inversion RLE; subst; clear RLE.
+  auto.
+  rewrite Ztrunc_ceil in H by lra. unfold Zceil in H.
+  assert (-x < 0)%R.
+  { apply Rlt_le_trans with (Z2R (Zfloor (-x)) + 1)%R. apply Zfloor_ub.  
+    change 0%R with (Z2R 0). change 1%R with (Z2R 1). rewrite <- Z2R_plus.
+    apply Z2R_le. omega. }
+  lra.
+Qed.
+
+Remark Ztrunc_range_zero:
+  forall x, Ztrunc x = 0 -> (-1 < x < 1)%R.
+Proof.
+  intros; generalize (Rle_bool_spec 0%R x). intros RLE; inversion RLE; subst; clear RLE.
+- rewrite Ztrunc_floor in H by auto. split. 
+  + apply Rlt_le_trans with 0%R; auto. rewrite <- Ropp_0. apply Ropp_lt_contravar. apply Rlt_0_1. 
+  + replace 1%R with (Z2R (Zfloor x) + 1)%R. apply Zfloor_ub. rewrite H. simpl. apply Rplus_0_l. 
+- rewrite Ztrunc_ceil in H by (apply Rlt_le; auto). split. 
+  + apply Ropp_lt_cancel. rewrite Ropp_involutive. 
+    replace 1%R with (Z2R (Zfloor (-x)) + 1)%R. apply Zfloor_ub. 
+    unfold Zceil in H. replace (Zfloor (-x)) with 0 by omega. simpl. apply Rplus_0_l. 
+  + apply Rlt_le_trans with 0%R; auto. apply Rle_0_1. 
+Qed.
+
+Theorem ZofB_range_pos:
+  forall f n, ZofB f = Some n -> 0 < n -> (Z2R n <= B2R _ _ f < Z2R (n + 1)%Z)%R.
+Proof.
+  intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+  apply Ztrunc_range_pos. congruence.
+Qed.
+
+Theorem ZofB_range_neg:
+  forall f n, ZofB f = Some n -> n < 0 -> (Z2R (n - 1)%Z < B2R _ _ f <= Z2R n)%R.
+Proof.
+  intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+  set (x := B2R prec emax f) in *. set (y := (-x)%R). 
+  assert (A: (Z2R (Ztrunc y) <= y < Z2R (Ztrunc y + 1)%Z)%R).
+  { apply Ztrunc_range_pos. unfold y. rewrite Ztrunc_opp. omega. }
+  destruct A as [B C]. 
+  unfold y in B, C. rewrite Ztrunc_opp in B, C. 
+  replace (- Ztrunc x + 1) with (- (Ztrunc x - 1)) in C by omega.
+  rewrite Z2R_opp in B, C. lra. 
+Qed.
+
+Theorem ZofB_range_zero:
+  forall f, ZofB f = Some 0 -> (-1 < B2R _ _ f < 1)%R.
+Proof.
+  intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+  apply Ztrunc_range_zero. auto.
+Qed.
+
+Theorem ZofB_range_nonneg:
+  forall f n, ZofB f = Some n -> 0 <= n -> (-1 < B2R _ _ f < Z2R (n + 1)%Z)%R.
+Proof.
+  intros. destruct (Z.eq_dec n 0). 
+- subst n. apply ZofB_range_zero. auto. 
+- destruct (ZofB_range_pos f n) as (A & B). auto. omega. 
+  split; auto. apply Rlt_le_trans with (Z2R 0). simpl; lra. 
+  apply Rle_trans with (Z2R n); auto. apply Z2R_le; auto. 
+Qed.
+
+(** For representable integers, [ZofB] is left inverse of [BofZ]. *)
+
+Theorem ZofBofZ_exact:
+  forall n, integer_representable n -> ZofB (BofZ n) = Some n.
+Proof.
+  intros. destruct (BofZ_representable n H) as (A & B & C).
+  rewrite ZofB_correct. rewrite A, B. f_equal. apply Ztrunc_Z2R. 
+Qed.
+
+(** Compatibility with subtraction *)
+
+Remark Zfloor_minus:
+  forall x n, Zfloor (x - Z2R n) = Zfloor x - n.
+Proof.
+  intros. apply Zfloor_imp. replace (Zfloor x - n + 1) with ((Zfloor x + 1) - n) by omega. 
+  rewrite ! Z2R_minus. unfold Rminus. split. 
+  apply Rplus_le_compat_r. apply Zfloor_lb. 
+  apply Rplus_lt_compat_r. rewrite Z2R_plus. apply Zfloor_ub.
+Qed.
+
+Theorem ZofB_minus:
+  forall minus_nan m f p q,
+  ZofB f = Some p -> 0 <= p < 2*q -> q <= 2^prec -> (Z2R q <= B2R _ _ f)%R ->
+  ZofB (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) = Some (p - q).
+Proof.
+  intros.
+  assert (Q: -2^prec <= q <= 2^prec). 
+  { split; auto.  generalize (Zpower_ge_0 radix2 prec); simpl; omega. }
+  assert (RANGE: (-1 < B2R _ _ f < Z2R (p + 1)%Z)%R) by (apply ZofB_range_nonneg; auto; omega). 
+  rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; try discriminate.
+  assert (PQ2: (Z2R (p + 1) <= Z2R q * 2)%R). 
+  { change 2%R with (Z2R 2). rewrite <- Z2R_mult. apply Z2R_le. omega. }
+  assert (EXACT: round radix2 fexp (round_mode m) (B2R _ _ f - Z2R q)%R = (B2R _ _ f - Z2R q)%R).
+  { apply round_generic. apply valid_rnd_round_mode. 
+    apply sterbenz_aux. apply FLT_exp_monotone. apply generic_format_B2R. 
+    apply integer_representable_n. auto. lra. }
+  destruct (BofZ_exact q Q) as (A & B & C). 
+  generalize (Bminus_correct _ _ _ Hmax minus_nan m f (BofZ q) FIN B).
+  rewrite Rlt_bool_true.
+- fold emin; fold fexp. intros (D & E & F).
+  rewrite ZofB_correct. rewrite E. rewrite D. rewrite A. rewrite EXACT. 
+  inversion H. f_equal. rewrite ! Ztrunc_floor. apply Zfloor_minus.
+  lra. lra. 
+- rewrite A. fold emin; fold fexp. rewrite EXACT.
+  apply Rle_lt_trans with (bpow radix2 prec). 
+  apply Rle_trans with (Z2R q). apply Rabs_le. lra.
+  rewrite <- Z2R_Zpower. apply Z2R_le; auto. red in prec_gt_0_; omega.
+  apply bpow_lt. auto.
+Qed.
+
+(** A variant of [ZofB] that bounds the range of representable integers. *)
+
+Definition ZofB_range (f: binary_float) (zmin zmax: Z): option Z :=
+  match ZofB f with
+  | None => None
+  | Some z => if Zle_bool zmin z && Zle_bool z zmax then Some z else None
+  end.
+
+Theorem ZofB_range_correct:
+  forall f min max,
+  let n := Ztrunc (B2R _ _ f) in
+  ZofB_range f min max =
+  if is_finite _ _ f && Zle_bool min n && Zle_bool n max then Some n else None.
+Proof.
+  intros. unfold ZofB_range. rewrite ZofB_correct. fold n. 
+  destruct (is_finite prec emax f); auto. 
+Qed.
+
+Lemma ZofB_range_inversion:
+  forall f min max n,
+  ZofB_range f min max = Some n ->
+  min <= n /\ n <= max /\ ZofB f = Some n.
+Proof.
+  intros. rewrite ZofB_range_correct in H. rewrite ZofB_correct.  
+  destruct (is_finite prec emax f); try discriminate. 
+  set (n1 := Ztrunc (B2R _ _ f)) in *.
+  destruct (min <=? n1) eqn:MIN; try discriminate.
+  destruct (n1 <=? max) eqn:MAX; try discriminate.
+  simpl in H. inversion H. subst n. 
+  split. apply Zle_bool_imp_le; auto. 
+  split. apply Zle_bool_imp_le; auto.
+  auto.
+Qed.
+
+Theorem ZofB_range_minus:
+  forall minus_nan m f p q,
+  ZofB_range f 0 (2 * q - 1) = Some p -> q <= 2^prec -> (Z2R q <= B2R _ _ f)%R ->
+  ZofB_range (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) (-q) (q - 1) = Some (p - q).
+Proof.
+  intros. destruct (ZofB_range_inversion _ _ _ _ H) as (A & B & C).
+  set (f' := Bminus prec emax prec_gt_0_ Hmax minus_nan m f (BofZ q)).
+  assert (D: ZofB f' = Some (p - q)).
+  { apply ZofB_minus. auto. omega. auto. auto. }
+  unfold ZofB_range. rewrite D. rewrite Zle_bool_true by omega. rewrite Zle_bool_true by omega. auto.
+Qed.
+
+(** ** Algebraic identities *)
+
+(** Commutativity of addition and multiplication *)
+
+Theorem Bplus_commut:
+  forall plus_nan mode (x y: binary_float),
+  plus_nan x y = plus_nan y x ->
+  Bplus _ _ _ Hmax plus_nan mode x y = Bplus _ _ _ Hmax plus_nan mode y x.
+Proof.
+  intros until y; intros NAN. 
+  pose proof (Bplus_correct _ _ _ Hmax plus_nan mode x y). 
+  pose proof (Bplus_correct _ _ _ Hmax plus_nan mode y x).
+  unfold Bplus in *; destruct x; destruct y; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB; auto. 
+  f_equal; apply eqb_prop; auto.
+- rewrite NAN; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB. 
+  f_equal; apply eqb_prop; auto.
+  rewrite NAN; auto. 
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- generalize (H (refl_equal _) (refl_equal _)); clear H.
+  generalize (H0 (refl_equal _) (refl_equal _)); clear H0.
+  fold emin. fold fexp. 
+  set (x := B754_finite prec emax b0 m0 e1 e2). set (rx := B2R _ _ x). 
+  set (y := B754_finite prec emax b m e e0). set (ry := B2R _ _ y).
+  rewrite (Rplus_comm ry rx). destruct Rlt_bool.
+  + intros (A1 & A2 & A3) (B1 & B2 & B3).
+    apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
+    rewrite Z.add_comm. rewrite Z.min_comm. auto.
+  + intros (A1 & A2) (B1 & B2). apply B2FF_inj. rewrite B2 in B1. rewrite <- B1 in A1. auto. 
+Qed.
+
+Theorem Bmult_commut:
+  forall mult_nan mode (x y: binary_float),
+  mult_nan x y = mult_nan y x ->
+  Bmult _ _ _ Hmax mult_nan mode x y = Bmult _ _ _ Hmax mult_nan mode y x.
+Proof.
+  intros until y; intros NAN. 
+  pose proof (Bmult_correct _ _ _ Hmax mult_nan mode x y). 
+  pose proof (Bmult_correct _ _ _ Hmax mult_nan mode y x).
+  unfold Bmult in *; destruct x; destruct y; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- revert H H0. fold emin. fold fexp. 
+  set (x := B754_finite prec emax b0 m0 e1 e2). set (rx := B2R _ _ x). 
+  set (y := B754_finite prec emax b m e e0). set (ry := B2R _ _ y).
+  rewrite (Rmult_comm ry rx). destruct Rlt_bool.
+  + intros (A1 & A2 & A3) (B1 & B2 & B3).
+    apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
+    rewrite ! Bsign_FF2B. f_equal. f_equal. apply xorb_comm. apply Pos.mul_comm. apply Z.add_comm.
+  + intros A B. apply B2FF_inj. etransitivity. eapply A. rewrite xorb_comm. auto. 
+Qed.
+
+(** Multiplication by 2 is diagonal addition. *)
+
+Theorem Bmult2_Bplus:
+  forall plus_nan mult_nan mode (f: binary_float),
+  (forall (x y: binary_float),
+   is_nan _ _ x = true -> is_finite _ _ y = true -> plus_nan x x = mult_nan x y) ->
+  Bplus _ _ _ Hmax plus_nan mode f f = Bmult _ _ _ Hmax mult_nan mode f (BofZ 2%Z).
+Proof.
+  intros until f; intros NAN.
+  destruct (BofZ_representable 2) as (A & B & C).
+  apply (integer_representable_2p 1). red in prec_gt_0_; omega. 
+  pose proof (Bmult_correct _ _ _ Hmax mult_nan mode f (BofZ 2%Z)). fold emin in H.
+  rewrite A, B, C in H. rewrite xorb_false_r in H.  
+  destruct (is_finite _ _ f) eqn:FIN.
+- pose proof (Bplus_correct _ _ _ Hmax plus_nan mode f f FIN FIN). fold emin in H0. 
+  assert (EQ: (B2R prec emax f * Z2R 2%Z = B2R prec emax f + B2R prec emax f)%R).
+  { change (Z2R 2%Z) with 2%R. ring. }
+  rewrite <- EQ in H0. destruct Rlt_bool. 
+  + destruct H0 as (P & Q & R). destruct H as (S & T & U). 
+    apply B2R_Bsign_inj; auto.
+    rewrite P, S. auto.
+    rewrite R, U.  
+    replace 0%R with (0 * Z2R 2%Z)%R by ring. rewrite Rcompare_mult_r. 
+    rewrite andb_diag, orb_diag. destruct f; try discriminate; simpl. 
+    rewrite Rcompare_Eq by auto. destruct mode; auto. 
+    replace 0%R with (@F2R radix2 {| Fnum := 0%Z; Fexp := e |}).
+    rewrite Rcompare_F2R. destruct b; auto. 
+    unfold F2R. simpl. ring. 
+    change 0%R with (Z2R 0%Z). apply Z2R_lt. omega. 
+    destruct (Bmult prec emax prec_gt_0_ Hmax mult_nan mode f (BofZ 2)); reflexivity || discriminate.
+  + destruct H0 as (P & Q). apply B2FF_inj. rewrite P, H. auto.
+- destruct f; try discriminate.
+  + simpl Bplus. rewrite eqb_true. destruct (BofZ 2) eqn:B2; try discriminate; simpl in *.
+    assert ((0 = 2)%Z) by (apply eq_Z2R; auto). discriminate.
+    subst b0. rewrite xorb_false_r. auto. 
+    auto.
+  + unfold Bplus, Bmult. rewrite <- NAN by auto. auto.
+Qed.
+
+(** Divisions that can be turned into multiplications by an inverse *)
+
+Definition Bexact_inverse_mantissa := Z.iter (prec - 1) xO xH.
+
+Remark Bexact_inverse_mantissa_value:
+  Zpos Bexact_inverse_mantissa = 2 ^ (prec - 1).
+Proof.
+  assert (REC: forall n, Z.pos (nat_iter n xO xH) = 2 ^ (Z.of_nat n)).
+  { induction n. reflexivity. 
+    simpl nat_iter. transitivity (2 * Z.pos (nat_iter n xO xH)). reflexivity. 
+    rewrite inj_S. rewrite IHn. unfold Z.succ. rewrite Zpower_plus by omega. 
+    change (2 ^ 1) with 2. ring. }
+  red in prec_gt_0_.
+  unfold Bexact_inverse_mantissa. rewrite iter_nat_of_Z by omega. rewrite REC.
+  rewrite Zabs2Nat.id_abs. rewrite Z.abs_eq by omega. auto.
+Qed.
+
+Remark Bexact_inverse_mantissa_digits2_Pnat:
+  digits2_Pnat Bexact_inverse_mantissa = Z.to_nat (prec - 1).
+Proof.
+  assert (DIGITS: forall n, digits2_Pnat (nat_iter n xO xH) = n).
+  { induction n; simpl. auto. congruence. }
+  red in prec_gt_0_.
+  unfold Bexact_inverse_mantissa. rewrite iter_nat_of_Z by omega. rewrite DIGITS.
+  apply Zabs2Nat.abs_nat_nonneg. omega.
+Qed.
+
+Remark bounded_Bexact_inverse:
+  forall e,
+  emin <= e <= emax - prec <-> bounded prec emax Bexact_inverse_mantissa e = true.
+Proof.
+  intros. unfold bounded, canonic_mantissa. rewrite andb_true_iff. 
+  rewrite <- Zeq_is_eq_bool. rewrite <- Zle_is_le_bool. 
+  rewrite Bexact_inverse_mantissa_digits2_Pnat. 
+  rewrite inj_S. red in prec_gt_0_. rewrite Z2Nat.id by omega. 
+  split. 
+- intros; split. unfold FLT_exp. unfold emin in H. zify; omega. omega.
+- intros [A B]. unfold FLT_exp in A. unfold emin. zify; omega.
+Qed.
+
+Program Definition Bexact_inverse (f: binary_float) : option binary_float :=
+  match f with
+  | B754_finite s m e B =>
+      if positive_eq_dec m Bexact_inverse_mantissa then
+      let e' := -e - (prec - 1) * 2 in
+      if Z_le_dec emin e' then
+      if Z_le_dec e' emax then
+        Some(B754_finite _ _ s m e' _)
+      else None else None else None
+  | _ => None
+  end.
+Next Obligation.
+  rewrite <- bounded_Bexact_inverse in B. rewrite <- bounded_Bexact_inverse. 
+  unfold emin in *. omega.
+Qed.
+
+Lemma Bexact_inverse_correct:
+  forall f f', Bexact_inverse f = Some f' ->
+  is_finite_strict _ _ f = true
+  /\ is_finite_strict _ _ f' = true
+  /\ B2R _ _ f' = (/ B2R _ _ f)%R
+  /\ B2R _ _ f <> 0%R
+  /\ Bsign _ _ f' = Bsign _ _ f.
+Proof with (try discriminate).
+  intros f f' EI. unfold Bexact_inverse in EI. destruct f...
+  destruct (Pos.eq_dec m Bexact_inverse_mantissa)...
+  set (e' := -e - (prec - 1) * 2) in *.
+  destruct (Z_le_dec emin e')...
+  destruct (Z_le_dec e' emax)...
+  inversion EI; clear EI; subst f' m.
+  split. auto. split. auto. split. unfold B2R. rewrite Bexact_inverse_mantissa_value.
+  unfold F2R; simpl. rewrite Z2R_cond_Zopp.
+  rewrite <- ! cond_Ropp_mult_l.
+  red in prec_gt_0_. 
+  replace (Z2R (2 ^ (prec - 1))) with (bpow radix2 (prec - 1))
+  by (symmetry; apply (Z2R_Zpower radix2); omega).
+  rewrite <- ! bpow_plus.
+  replace (prec - 1 + e') with (- (prec - 1 + e)) by (unfold e'; omega). 
+  rewrite bpow_opp. unfold cond_Ropp; destruct b; auto. 
+  rewrite Ropp_inv_permute. auto. apply Rgt_not_eq. apply bpow_gt_0.
+  split. simpl. red; intros. apply F2R_eq_0_reg in H. destruct b; simpl in H; discriminate.
+  auto.
+Qed.
+
+Theorem Bdiv_mult_inverse:
+  forall div_nan mult_nan mode x y z,
+  (forall (x y z: binary_float),
+   is_nan _ _ x = true -> is_finite _ _ y = true -> is_finite _ _ z = true ->
+   div_nan x y = mult_nan x z) ->
+  Bexact_inverse y = Some z ->
+  Bdiv _ _ _ Hmax div_nan mode x y = Bmult _ _ _ Hmax mult_nan mode x z.
+Proof.
+  intros until z; intros NAN; intros. destruct (Bexact_inverse_correct _ _ H) as (A & B & C & D & E).
+  pose proof (Bmult_correct _ _ _ Hmax mult_nan mode x z).
+  fold emin in H0. fold fexp in H0. 
+  pose proof (Bdiv_correct _ _ _ Hmax div_nan mode x y D).
+  fold emin in H1. fold fexp in H1.
+  unfold Rdiv in H1. rewrite <- C in H1. 
+  destruct (is_finite _ _ x) eqn:FINX.
+- destruct Rlt_bool. 
+  + destruct H0 as (P & Q & R). destruct H1 as (S & T & U). 
+    apply B2R_Bsign_inj; auto.
+    rewrite Q. simpl. apply is_finite_strict_finite; auto. 
+    rewrite P, S. auto. 
+    rewrite R, U, E. auto. 
+    apply is_finite_not_is_nan; auto. 
+    apply is_finite_not_is_nan. rewrite Q. simpl. apply is_finite_strict_finite; auto.  + apply B2FF_inj. rewrite H0, H1. rewrite E. auto. 
+- destruct y; try discriminate. destruct z; try discriminate.
+  destruct x; try discriminate; simpl.
+  + simpl in E; congruence.
+  + erewrite NAN; eauto. 
+Qed.
+
+End Extra_ops.
+
+(** ** Conversions between two FP formats *)
+
+Section Conversions.
+
+Variable prec1 emax1 prec2 emax2 : Z.
+Context (prec1_gt_0_ : Prec_gt_0 prec1) (prec2_gt_0_ : Prec_gt_0 prec2).
+Let emin1 := (3 - emax1 - prec1)%Z.
+Let fexp1 := FLT_exp emin1 prec1.
+Let emin2 := (3 - emax2 - prec2)%Z.
+Let fexp2 := FLT_exp emin2 prec2.
+Hypothesis Hmax1 : (prec1 < emax1)%Z.
+Hypothesis Hmax2 : (prec2 < emax2)%Z.
+Let binary_float1 := binary_float prec1 emax1.
+Let binary_float2 := binary_float prec2 emax2.
+
+Definition Bconv (conv_nan: bool -> nan_pl prec1 -> bool * nan_pl prec2) (md: mode) (f: binary_float1) : binary_float2 :=
+  match f with
+    | B754_nan s pl => let '(s, pl) := conv_nan s pl in B754_nan _ _ s pl
+    | B754_infinity s => B754_infinity _ _ s
+    | B754_zero s => B754_zero _ _ s
+    | B754_finite s m e _ => binary_normalize _ _ _ Hmax2 md (cond_Zopp s (Zpos m)) e s
+  end.
+
+Theorem Bconv_correct:
+  forall conv_nan m f,
+  is_finite _ _ f = true ->
+  if Rlt_bool (Rabs (round radix2 fexp2 (round_mode m) (B2R _ _ f))) (bpow radix2 emax2)
+  then
+     B2R _ _ (Bconv conv_nan m f) = round radix2 fexp2 (round_mode m) (B2R _ _ f)
+  /\ is_finite _ _ (Bconv conv_nan m f) = true
+  /\ Bsign _ _ (Bconv conv_nan m f) = Bsign _ _ f
+  else
+     B2FF _ _ (Bconv conv_nan m f) = binary_overflow prec2 emax2 m (Bsign _ _ f).
+Proof.
+  intros. destruct f; try discriminate.
+- simpl. rewrite round_0. rewrite Rabs_R0. rewrite Rlt_bool_true. auto. 
+  apply bpow_gt_0. apply valid_rnd_round_mode. 
+- generalize (binary_normalize_correct _ _ _ Hmax2 m (cond_Zopp b (Zpos m0)) e b).
+  fold emin2; fold fexp2. simpl. destruct Rlt_bool. 
+  + intros (A & B & C). split. auto. split. auto. rewrite C. 
+    destruct b; simpl. 
+    rewrite Rcompare_Lt. auto. apply F2R_lt_0_compat. simpl. compute; auto. 
+    rewrite Rcompare_Gt. auto. apply F2R_gt_0_compat. simpl. compute; auto.
+  + intros A. rewrite A. f_equal. destruct b.
+    apply Rlt_bool_true. apply F2R_lt_0_compat. simpl. compute; auto. 
+    apply Rlt_bool_false. apply Rlt_le. apply Rgt_lt. apply F2R_gt_0_compat. simpl. compute; auto.
+Qed.
+
+(** Converting a finite FP number to higher or equal precision preserves its value. *)
+
+Theorem Bconv_widen_exact:
+  (prec2 >= prec1)%Z -> (emax2 >= emax1)%Z ->
+  forall conv_nan m f,
+  is_finite _ _ f = true ->
+     B2R _ _ (Bconv conv_nan m f) = B2R _ _ f
+  /\ is_finite _ _ (Bconv conv_nan m f) = true
+  /\ Bsign _ _ (Bconv conv_nan m f) = Bsign _ _ f.
+Proof.
+  intros PREC EMAX; intros. generalize (Bconv_correct conv_nan m f H).
+  assert (LT: (Rabs (B2R _ _ f) < bpow radix2 emax2)%R).
+  {
+    destruct f; try discriminate; simpl. 
+    rewrite Rabs_R0. apply bpow_gt_0. 
+    apply Rlt_le_trans with (bpow radix2 emax1).
+    rewrite F2R_cond_Zopp. rewrite abs_cond_Ropp. rewrite <- F2R_Zabs. simpl Z.abs.
+    eapply bounded_lt_emax; eauto.
+    apply bpow_le. omega.
+  }
+  assert (EQ: round radix2 fexp2 (round_mode m) (B2R prec1 emax1 f) = B2R prec1 emax1 f).
+  {
+    apply round_generic. apply valid_rnd_round_mode. eapply generic_inclusion_le.
+    5: apply generic_format_B2R. apply fexp_correct; auto. apply fexp_correct; auto. 
+    instantiate (1 := emax2). intros. unfold fexp2, FLT_exp. unfold emin2. zify; omega.
+    apply Rlt_le; auto.
+  }
+  rewrite EQ. rewrite Rlt_bool_true by auto. auto. 
+Qed.
+
+(** Conversion from integers and change of format *)
+
+Theorem Bconv_BofZ:
+  forall conv_nan n,
+  integer_representable prec1 emax1 n ->
+  Bconv conv_nan mode_NE (BofZ prec1 emax1 _ Hmax1 n) = BofZ prec2 emax2 _ Hmax2 n.
+Proof.
+  intros. 
+  destruct (BofZ_representable _ _ _ Hmax1 n H) as (A & B & C). 
+  set (f := BofZ prec1 emax1 prec1_gt_0_ Hmax1 n) in *.
+  generalize (Bconv_correct conv_nan mode_NE f B).
+  unfold BofZ. 
+  generalize (binary_normalize_correct _ _ _ Hmax2 mode_NE n 0 false). 
+  fold emin2; fold fexp2. rewrite A. 
+  replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n).
+  destruct Rlt_bool. 
+- intros (P & Q & R) (D & E & F). apply B2R_Bsign_inj; auto. 
+  congruence. rewrite F, C, R. change 0%R with (Z2R 0). rewrite Rcompare_Z2R. 
+  unfold Zlt_bool. auto. 
+- intros P Q. apply B2FF_inj. rewrite P, Q. rewrite C. f_equal. change 0%R with (Z2R 0).
+  generalize (Zlt_bool_spec n 0); intros LT; inversion LT.
+  rewrite Rlt_bool_true; auto. apply Z2R_lt; auto.
+  rewrite Rlt_bool_false; auto. apply Z2R_le; auto.
+- unfold F2R; simpl. rewrite Rmult_1_r. auto.
+Qed.
+
+(** Change of format (to higher precision) and conversion to integer. *)
+
+Theorem ZofB_Bconv:
+  prec2 >= prec1 -> emax2 >= emax1 ->
+  forall conv_nan m f n,
+  ZofB _ _ f = Some n -> ZofB _ _ (Bconv conv_nan m f) = Some n.
+Proof.
+  intros. rewrite ZofB_correct in H1. destruct (is_finite _ _ f) eqn:FIN; inversion H1.
+  destruct (Bconv_widen_exact H H0 conv_nan m f) as (A & B & C). auto. 
+  rewrite ZofB_correct. rewrite B. rewrite A. auto.
+Qed.
+
+Theorem ZofB_range_Bconv:
+  forall min1 max1 min2 max2,
+  prec2 >= prec1 -> emax2 >= emax1 -> min2 <= min1 -> max1 <= max2 ->
+  forall conv_nan m f n,
+  ZofB_range _ _ f min1 max1 = Some n ->
+  ZofB_range _ _ (Bconv conv_nan m f) min2 max2 = Some n.
+Proof.
+  intros. 
+  destruct (ZofB_range_inversion _ _ _ _ _ _ H3) as (A & B & C).
+  unfold ZofB_range. erewrite ZofB_Bconv by eauto. 
+  rewrite ! Zle_bool_true by omega. auto.
+Qed.
+
+(** Change of format (to higher precision) and comparison. *)
+
+Theorem Bcompare_Bconv_widen:
+  prec2 >= prec1 -> emax2 >= emax1 ->
+  forall conv_nan m x y,
+  Bcompare _ _ (Bconv conv_nan m x) (Bconv conv_nan m y) = Bcompare _ _ x y.
+Proof.
+  intros. destruct (is_finite _ _ x && is_finite _ _ y) eqn:FIN.
+- apply andb_true_iff in FIN. destruct FIN. 
+  destruct (Bconv_widen_exact H H0 conv_nan m x H1) as (A & B & C).
+  destruct (Bconv_widen_exact H H0 conv_nan m y H2) as (D & E & F).
+  rewrite ! Bcompare_finite_correct by auto. rewrite A, D. auto.
+- generalize (Bconv_widen_exact H H0 conv_nan m x)
+             (Bconv_widen_exact H H0 conv_nan m y); intros P Q.
+  destruct x, y; try discriminate; simpl in P, Q; simpl;
+  repeat (match goal with |- context [conv_nan ?b ?pl] => destruct (conv_nan b pl) end);
+  auto.
+  destruct Q as (D & E & F); auto. 
+  destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b0 (Z.pos m0)) e b0);
+  discriminate || reflexivity.
+  destruct P as (A & B & C); auto. 
+  destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b (Z.pos m0)) e b);
+  try discriminate; simpl. destruct b; auto. destruct b, b1; auto. 
+  destruct P as (A & B & C); auto. 
+  destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b (Z.pos m0)) e b);
+  try discriminate; simpl. destruct b; auto.
+  destruct b, b2; auto.
+Qed.
+
+End Conversions.
+
+Section Compose_Conversions.
+
+Variable prec1 emax1 prec2 emax2 : Z.
+Context (prec1_gt_0_ : Prec_gt_0 prec1) (prec2_gt_0_ : Prec_gt_0 prec2).
+Let emin1 := (3 - emax1 - prec1)%Z.
+Let fexp1 := FLT_exp emin1 prec1.
+Let emin2 := (3 - emax2 - prec2)%Z.
+Let fexp2 := FLT_exp emin2 prec2.
+Hypothesis Hmax1 : (prec1 < emax1)%Z.
+Hypothesis Hmax2 : (prec2 < emax2)%Z.
+Let binary_float1 := binary_float prec1 emax1.
+Let binary_float2 := binary_float prec2 emax2.
+
+(** Converting to a higher precision then down to the original format 
+    is the identity. *)
+Theorem Bconv_narrow_widen:
+  prec2 >= prec1 -> emax2 >= emax1 ->
+  forall narrow_nan widen_nan m f, 
+  is_nan _ _ f = false ->
+  Bconv prec2 emax2 prec1 emax1 _ Hmax1 narrow_nan m (Bconv prec1 emax1 prec2 emax2 _ Hmax2 widen_nan m f) = f.
+Proof.
+  intros. destruct (is_finite _ _ f) eqn:FIN. 
+- assert (EQ: round radix2 fexp1 (round_mode m) (B2R prec1 emax1 f) = B2R prec1 emax1 f).
+  { apply round_generic. apply valid_rnd_round_mode. apply generic_format_B2R. }
+  generalize (Bconv_widen_exact _ _ _ _ _ _ Hmax2 H H0 widen_nan m f FIN).
+  set (f' := Bconv prec1 emax1 prec2 emax2 _ Hmax2 widen_nan m f). 
+  intros (A & B & C).
+  generalize (Bconv_correct _ _ _ _ _ Hmax1 narrow_nan m f' B).
+  fold emin1. fold fexp1. rewrite A, C, EQ. rewrite Rlt_bool_true. 
+  intros (D & E & F). 
+  apply B2R_Bsign_inj; auto.
+  destruct f; try discriminate; simpl.
+  rewrite Rabs_R0. apply bpow_gt_0. 
+  rewrite F2R_cond_Zopp. rewrite abs_cond_Ropp. rewrite <- F2R_Zabs. simpl Z.abs.
+  eapply bounded_lt_emax; eauto.
+- destruct f; try discriminate. simpl. auto.
+Qed.
+
+End Compose_Conversions.
+
+(** Specialization to binary32 and binary64 formats. *)
+
+Require Import Fappli_IEEE_bits.
+
+Section B3264.
+
+Let prec32 : (0 < 24)%Z.
+apply refl_equal.
+Qed.
+
+Let emax32 : (24 < 128)%Z.
+apply refl_equal.
+Qed.
+
+Let prec64 : (0 < 53)%Z.
+apply refl_equal.
+Qed.
+
+Let emax64 : (53 < 1024)%Z.
+apply refl_equal.
+Qed.
+
+Definition b32_abs : (bool -> nan_pl 24 -> bool * nan_pl 24) -> binary32 -> binary32 := Babs 24 128.
+Definition b32_eq_dec : forall (f1 f2: binary32), {f1=f2} + {f1<>f2} := Beq_dec 24 128.
+Definition b32_compare : binary32 -> binary32 -> option comparison := Bcompare 24 128.
+Definition b32_of_Z : Z -> binary32 := BofZ 24 128 prec32 emax32.
+Definition b32_to_Z : binary32 -> option Z := ZofB 24 128.
+Definition b32_to_Z_range : binary32 -> Z -> Z -> option Z := ZofB_range 24 128.
+Definition b32_exact_inverse : binary32 -> option binary32 := Bexact_inverse 24 128 prec32.
+
+Definition b64_abs : (bool -> nan_pl 53 -> bool * nan_pl 53) -> binary64 -> binary64 := Babs 53 1024.
+Definition b64_eq_dec : forall (f1 f2: binary64), {f1=f2} + {f1<>f2} := Beq_dec 53 1024.
+Definition b64_compare : binary64 -> binary64 -> option comparison := Bcompare 53 1024.
+Definition b64_of_Z : Z -> binary64 := BofZ 53 1024 prec64 emax64.
+Definition b64_to_Z : binary64 -> option Z := ZofB 53 1024.
+Definition b64_to_Z_range : binary64 -> Z -> Z -> option Z := ZofB_range 53 1024.
+Definition b64_exact_inverse : binary64 -> option binary64 := Bexact_inverse 53 1024 prec64.
+
+Definition b64_of_b32 : (bool -> nan_pl 24 -> bool * nan_pl 53) -> mode -> binary32 -> binary64 :=
+  Bconv 24 128 53 1024 prec32 prec64.
+Definition b32_of_b64 : (bool -> nan_pl 53 -> bool * nan_pl 24) -> mode -> binary64 -> binary32 :=
+  Bconv 53 1024 24 128 prec64 prec32.
+
+End B3264.
diff --git a/lib/Floats.v b/lib/Floats.v
index 35009d82..bbc2a927 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -16,47 +16,121 @@
 
 (** Formalization of floating-point numbers, using the Flocq library. *)
 
-Require Import Axioms.
 Require Import Coqlib.
 Require Import Integers.
 Require Import Fappli_IEEE.
 Require Import Fappli_IEEE_bits.
+Require Import Fappli_IEEE_extra.
 Require Import Fcore.
-Require Import Fcalc_round.
-Require Import Fcalc_bracket.
-Require Import Fprop_Sterbenz.
 Require Import Program.
 Require Archi.
 
 Close Scope R_scope.
 
-Definition float := binary64. (**r the type of IEE754 doubles *)
+Definition float := binary64. (**r the type of IEE754 double-precision FP numbers *)
+Definition float32 := binary32. (**r the type of IEE754 single-precision FP numbers *)
+
+(** Boolean-valued comparisons *)
+
+Definition cmp_of_comparison (c: comparison) (x: option Datatypes.comparison) : bool :=
+  match c with
+  | Ceq =>
+      match x with Some Eq => true | _ => false end
+  | Cne =>
+      match x with Some Eq => false | _ => true end
+  | Clt =>
+      match x with Some Lt => true | _ => false end
+  | Cle =>
+      match x with Some(Lt|Eq) => true | _ => false end
+  | Cgt =>
+      match x with Some Gt => true | _ => false end
+  | Cge =>
+      match x with Some(Gt|Eq) => true | _ => false end
+  end.
+
+Lemma cmp_of_comparison_swap:
+  forall c x,
+  cmp_of_comparison (swap_comparison c) x =
+  cmp_of_comparison c (match x with None => None | Some x => Some (CompOpp x) end).
+Proof.
+  intros. destruct c; destruct x as [[]|]; reflexivity. 
+Qed.
+
+Lemma cmp_of_comparison_ne_eq:
+  forall x, cmp_of_comparison Cne x = negb (cmp_of_comparison Ceq x).
+Proof.
+  intros. destruct x as [[]|]; reflexivity. 
+Qed.
+
+Lemma cmp_of_comparison_lt_eq_false:
+  forall x, cmp_of_comparison Clt x = true -> cmp_of_comparison Ceq x = true -> False.
+Proof.
+  destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+Lemma cmp_of_comparison_le_lt_eq:
+  forall x, cmp_of_comparison Cle x = cmp_of_comparison Clt x || cmp_of_comparison Ceq x.
+Proof.
+  destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_gt_eq_false:
+  forall x, cmp_of_comparison Cgt x = true -> cmp_of_comparison Ceq x = true -> False.
+Proof.
+  destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+Lemma cmp_of_comparison_ge_gt_eq:
+  forall x, cmp_of_comparison Cge x = cmp_of_comparison Cgt x || cmp_of_comparison Ceq x.
+Proof.
+  destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_lt_gt_false:
+  forall x, cmp_of_comparison Clt x = true -> cmp_of_comparison Cgt x = true -> False.
+Proof.
+  destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+(** Function used to parse floats *)
+
+Program Definition build_from_parsed
+  (prec:Z) (emax:Z) (prec_gt_0 : Prec_gt_0 prec) (Hmax:prec < emax)
+  (base:positive) (intPart:positive) (expPart:Z) :=
+  match expPart return _ with
+    | Z0 =>
+      binary_normalize prec emax prec_gt_0 Hmax mode_NE (Zpos intPart) Z0 false
+    | Zpos p =>
+      binary_normalize prec emax prec_gt_0 Hmax mode_NE ((Zpos intPart) * Zpower_pos (Zpos base) p) Z0 false
+    | Zneg p =>
+      let exp := Zpower_pos (Zpos base) p in
+      match exp return 0 < exp -> _ with
+        | Zneg _ | Z0 => _
+        | Zpos p =>
+          fun _ =>
+          FF2B prec emax _ (proj1 (Bdiv_correct_aux prec emax prec_gt_0 Hmax mode_NE false intPart Z0 false p Z0))
+      end _
+  end.
+Next Obligation.
+  apply Zpower_pos_gt_0. reflexivity.
+Qed.
+
+Local Notation __ := (refl_equal Datatypes.Lt).
+
+Local Hint Extern 1 (Prec_gt_0 _) => exact (refl_equal Datatypes.Lt).
+Local Hint Extern 1 (_ < _) => exact (refl_equal Datatypes.Lt).
+
+(** * Double-precision FP numbers *)
 
 Module Float.
 
-Definition zero: float := B754_zero _ _ false. (**r the float [+0.0] *)
+(** ** NaN payload manipulations *)
 
-Definition eq_dec: forall (f1 f2: float), {f1 = f2} + {f1 <> f2}.
-Proof.
-  Ltac try_not_eq := try solve [right; congruence].
-  destruct f1 as [| |? []|], f2 as [| |? []|];
-  try destruct b; try destruct b0;
-  try solve [left; auto]; try_not_eq.
-  destruct (positive_eq_dec x x0); try_not_eq;
-    subst; left; f_equal; f_equal; apply proof_irr.
-  destruct (positive_eq_dec x x0); try_not_eq;
-    subst; left; f_equal; f_equal; apply proof_irr.
-  destruct (positive_eq_dec m m0); try_not_eq;
-  destruct (Z_eq_dec e e1); try solve [right; intro H; inv H; congruence];
-  subst; left; rewrite (proof_irr e0 e2); auto.
-  destruct (positive_eq_dec m m0); try_not_eq;
-  destruct (Z_eq_dec e e1); try solve [right; intro H; inv H; congruence];
-  subst; left; rewrite (proof_irr e0 e2); auto.
-Defined.
+(** The following definitions are not part of the IEEE754 standard but
+    apply to all architectures supported by CompCert. *)
+
+(** Transform a Nan payload to a quiet Nan payload. *)
 
-(* Transform a Nan payload to a quiet Nan payload.
-   This is not part of the IEEE754 standard, but shared between all
-   architectures of Compcert. *)
 Program Definition transform_quiet_pl (pl:nan_pl 53) : nan_pl 53 :=
   Pos.lor pl (nat_iter 51 xO xH).
 Next Obligation.
@@ -91,39 +165,10 @@ Proof.
   simpl. apply lor_idempotent.
 Qed.
 
-(** Arithmetic operations *)
-
-(* The Nan payload operations for neg and abs is not part of the IEEE754
-   standard, but shared between all architectures of Compcert. *)
-Definition neg_pl (s:bool) (pl:nan_pl 53) := (negb s, pl).
-Definition abs_pl (s:bool) (pl:nan_pl 53) := (false, pl).
-
-Definition neg: float -> float := b64_opp neg_pl. (**r opposite (change sign) *)
-Definition abs (x: float): float := (**r absolute value (set sign to [+]) *)
-  match x with
-  | B754_nan s pl => let '(s, pl) := abs_pl s pl in B754_nan _ _ s pl
-  | B754_infinity _ => B754_infinity _ _ false
-  | B754_finite _ m e H => B754_finite _ _ false m e H
-  | B754_zero _ => B754_zero _ _ false
-  end.
-
-Definition binary_normalize64 (m e:Z) (s:bool): float :=
-  binary_normalize 53 1024 eq_refl eq_refl mode_NE m e s.
+(** Nan payload operations for single <-> double conversions. *)
 
-Definition binary_normalize64_correct (m e:Z) (s:bool) :=
-  binary_normalize_correct 53 1024 eq_refl eq_refl mode_NE m e s.
-Global Opaque binary_normalize64_correct.
-
-Definition binary_normalize32 (m e:Z) (s:bool) : binary32 :=
-  binary_normalize 24 128 eq_refl eq_refl mode_NE m e s.
-
-Definition binary_normalize32_correct (m e:Z) (s:bool) :=
-  binary_normalize_correct 24 128 eq_refl eq_refl mode_NE m e s.
-Global Opaque binary_normalize32_correct.
-
-(* The Nan payload operations for single <-> double conversions are not part of
-   the IEEE754 standard, but shared between all architectures of Compcert. *)
-Definition floatofbinary32_pl (s:bool) (pl:nan_pl 24) : (bool * nan_pl 53).
+Definition of_single_pl (s:bool) (pl:nan_pl 24) : (bool * nan_pl 53).
+Proof.
 Proof.
   refine (s, transform_quiet_pl (exist _ (Pos.shiftl_nat (proj1_sig pl) 29) _)).
   abstract (
@@ -133,7 +178,8 @@ Proof.
     zify; omega).
 Defined.
 
-Definition binary32offloat_pl (s:bool) (pl:nan_pl 53) : (bool * nan_pl 24).
+Definition to_single_pl (s:bool) (pl:nan_pl 53) : (bool * nan_pl 24).
+Proof.
 Proof.
   refine (s, exist _ (Pos.shiftr_nat (proj1_sig (transform_quiet_pl pl)) 29) _).
   abstract (
@@ -144,128 +190,14 @@ Proof.
     rewrite !H, <- !NPeano.Nat.sub_add_distr; zify; omega).
 Defined.
 
-Definition floatofbinary32 (f: binary32) : float := (**r single precision embedding in double precision *)
-  match f with
-    | B754_nan s pl => let '(s, pl) := floatofbinary32_pl s pl in B754_nan _ _ s pl
-    | B754_infinity s => B754_infinity _ _ s
-    | B754_zero s => B754_zero _ _ s
-    | B754_finite s m e _ =>
-      binary_normalize64 (cond_Zopp s (Zpos m)) e s
-  end.
-
-Definition binary32offloat (f: float) : binary32 := (**r conversion to single precision *)
-  match f with
-    | B754_nan s pl => let '(s, pl) := binary32offloat_pl s pl in B754_nan _ _ s pl
-    | B754_infinity s => B754_infinity _ _ s
-    | B754_zero s => B754_zero _ _ s
-    | B754_finite s m e _ =>
-      binary_normalize32 (cond_Zopp s (Zpos m)) e s
-  end.
-
-Definition singleoffloat (f: float): float := (**r conversion to single precision, embedded in double *)
-  floatofbinary32 (binary32offloat f).
-
-Definition Zoffloat (f:float): option Z := (**r conversion to Z *)
-  match f with
-    | B754_finite s m (Zpos e) _ => Some (cond_Zopp s (Zpos m) * Zpower_pos radix2 e)
-    | B754_finite s m 0 _ => Some (cond_Zopp s (Zpos m))
-    | B754_finite s m (Zneg e) _ => Some (cond_Zopp s (Zpos m / Zpower_pos radix2 e))
-    | B754_zero _ => Some 0
-    | _ => None
-  end.
-
-Definition intoffloat (f:float): option int := (**r conversion to signed 32-bit int *)
-  match Zoffloat f with
-    | Some n =>
-      if Zle_bool Int.min_signed n && Zle_bool n Int.max_signed then
-        Some (Int.repr n)
-      else
-        None
-    | None => None
-  end.
-
-Definition intuoffloat (f:float): option int := (**r conversion to unsigned 32-bit int *)
-  match Zoffloat f with
-    | Some n =>
-      if Zle_bool 0 n && Zle_bool n Int.max_unsigned then
-        Some (Int.repr n)
-      else
-        None
-    | None => None
-  end.
-
-Definition longoffloat (f:float): option int64 := (**r conversion to signed 64-bit int *)
-  match Zoffloat f with
-    | Some n =>
-      if Zle_bool Int64.min_signed n && Zle_bool n Int64.max_signed then
-        Some (Int64.repr n)
-      else
-        None
-    | None => None
-  end.
-
-Definition longuoffloat (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
-  match Zoffloat f with
-    | Some n =>
-      if Zle_bool 0 n && Zle_bool n Int64.max_unsigned then
-        Some (Int64.repr n)
-      else
-        None
-    | None => None
-  end.
-
-(* Functions used to parse floats *)
-Program Definition build_from_parsed
-  (prec:Z) (emax:Z) (prec_gt_0 :Prec_gt_0 prec) (Hmax:prec < emax)
-  (base:positive) (intPart:positive) (expPart:Z) :=
-  match expPart return _ with
-    | Z0 =>
-      binary_normalize prec emax prec_gt_0 Hmax mode_NE (Zpos intPart) Z0 false
-    | Zpos p =>
-      binary_normalize prec emax prec_gt_0 Hmax mode_NE ((Zpos intPart) * Zpower_pos (Zpos base) p) Z0 false
-    | Zneg p =>
-      let exp := Zpower_pos (Zpos base) p in
-      match exp return 0 < exp -> _ with
-        | Zneg _ | Z0 => _
-        | Zpos p =>
-          fun _ =>
-          FF2B prec emax _ (proj1 (Bdiv_correct_aux prec emax prec_gt_0 Hmax mode_NE false intPart Z0 false p Z0))
-      end _
-  end.
-Next Obligation.
-apply Zpower_pos_gt_0.
-reflexivity.
-Qed.
-
-Definition build_from_parsed64 (base:positive) (intPart:positive) (expPart:Z) : float :=
-  build_from_parsed 53 1024 eq_refl eq_refl  base intPart expPart.
+(** NaN payload operations for opposite and absolute value. *)
 
-Definition build_from_parsed32 (base:positive) (intPart:positive) (expPart:Z) : float :=
-  floatofbinary32 (build_from_parsed 24 128 eq_refl eq_refl  base intPart expPart).
-
-Definition floatofint (n:int): float := (**r conversion from signed 32-bit int *)
-  binary_normalize64 (Int.signed n) 0 false.
-Definition floatofintu (n:int): float:= (**r conversion from unsigned 32-bit int *)
-  binary_normalize64 (Int.unsigned n) 0 false.
-
-Definition floatoflong (n:int64): float := (**r conversion from signed 64-bit int *)
-  binary_normalize64 (Int64.signed n) 0 false.
-Definition floatoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int *)
-  binary_normalize64 (Int64.unsigned n) 0 false.
-
-Definition singleofint (n:int): float := (**r conversion from signed 32-bit int to single-precision float *)
-  floatofbinary32 (binary_normalize32 (Int.signed n) 0 false).
-Definition singleofintu (n:int): float:= (**r conversion from unsigned 32-bit int to single-precision float *)
-  floatofbinary32 (binary_normalize32 (Int.unsigned n) 0 false).
-
-Definition singleoflong (n:int64): float := (**r conversion from signed 64-bit int to single-precision float *)
-  floatofbinary32 (binary_normalize32 (Int64.signed n) 0 false).
-Definition singleoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int to single-precision float *)
-  floatofbinary32 (binary_normalize32 (Int64.unsigned n) 0 false).
+Definition neg_pl (s:bool) (pl:nan_pl 53) := (negb s, pl).
+Definition abs_pl (s:bool) (pl:nan_pl 53) := (false, pl).
 
-(* The Nan payload operations for two-argument arithmetic operations are not part of
-   the IEEE754 standard, but all architectures of Compcert share a similar
-   NaN behavior, parameterized by:
+(** The NaN payload operations for two-argument arithmetic operations
+   are not part of the IEEE754 standard, but all architectures of
+   Compcert share a similar NaN behavior, parameterized by:
 - a "default" payload which occurs when an operation generates a NaN from
   non-NaN arguments;
 - a choice function determining which of the payload arguments to choose,
@@ -274,78 +206,67 @@ Definition singleoflongu (n:int64): float:= (**r conversion from unsigned 64-bit
 Definition binop_pl (x y: binary64) : bool*nan_pl 53 :=
   match x, y with
   | B754_nan s1 pl1, B754_nan s2 pl2 =>
-      if Archi.choose_binop_pl s1 pl1 s2 pl2
+      if Archi.choose_binop_pl_64 s1 pl1 s2 pl2
       then (s2, transform_quiet_pl pl2)
       else (s1, transform_quiet_pl pl1)
   | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
   | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
-  | _, _ => Archi.default_pl
+  | _, _ => Archi.default_pl_64
   end.
 
+(** ** Operations over double-precision floats *)
+
+Definition zero: float := B754_zero _ _ false. (**r the float [+0.0] *)
+
+Definition eq_dec: forall (f1 f2: float), {f1 = f2} + {f1 <> f2} := b64_eq_dec.
+
+(** Arithmetic operations *)
+
+Definition neg: float -> float := b64_opp neg_pl. (**r opposite (change sign) *)
+Definition abs: float -> float := b64_abs abs_pl. (**r absolute value (set sign to [+]) *)
 Definition add: float -> float -> float := b64_plus binop_pl mode_NE. (**r addition *)
 Definition sub: float -> float -> float := b64_minus binop_pl mode_NE. (**r subtraction *)
 Definition mul: float -> float -> float := b64_mult binop_pl mode_NE. (**r multiplication *)
 Definition div: float -> float -> float := b64_div binop_pl mode_NE. (**r division *)
+Definition cmp (c:comparison) (f1 f2: float) : bool := (**r comparison *)
+  cmp_of_comparison c (b64_compare f1 f2).
 
-Definition order_float (f1 f2:float): option Datatypes.comparison :=
-  match f1, f2 with
-    | B754_nan _ _,_ | _,B754_nan _ _ => None
-    | B754_infinity true, B754_infinity true
-    | B754_infinity false, B754_infinity false => Some Eq
-    | B754_infinity true, _ => Some Lt
-    | B754_infinity false, _ => Some Gt
-    | _, B754_infinity true => Some Gt
-    | _, B754_infinity false => Some Lt
-    | B754_finite true _ _ _, B754_zero _ => Some Lt
-    | B754_finite false _ _ _, B754_zero _ => Some Gt
-    | B754_zero _, B754_finite true _ _ _ => Some Gt
-    | B754_zero _, B754_finite false _ _ _ => Some Lt
-    | B754_zero _, B754_zero _ => Some Eq
-    | B754_finite s1 m1 e1 _, B754_finite s2 m2 e2 _ =>
-      match s1, s2 with
-        | true, false => Some Lt
-        | false, true => Some Gt
-        | false, false =>
-          match Zcompare e1 e2 with
-            | Lt => Some Lt
-            | Gt => Some Gt
-            | Eq => Some (Pcompare m1 m2 Eq)
-          end
-        | true, true =>
-          match Zcompare e1 e2 with
-            | Lt => Some Gt
-            | Gt => Some Lt
-            | Eq => Some (CompOpp (Pcompare m1 m2 Eq))
-          end
-      end
-  end.
+(** Conversions *)
 
-Definition cmp (c:comparison) (f1 f2:float) : bool := (**r comparison *)
-  match c with
-  | Ceq =>
-      match order_float f1 f2 with Some Eq => true | _ => false end
-  | Cne =>
-      match order_float f1 f2 with Some Eq => false | _ => true end
-  | Clt =>
-      match order_float f1 f2 with Some Lt => true | _ => false end
-  | Cle =>
-      match order_float f1 f2 with Some(Lt|Eq) => true | _ => false end
-  | Cgt =>
-      match order_float f1 f2 with Some Gt => true | _ => false end
-  | Cge =>
-      match order_float f1 f2 with Some(Gt|Eq) => true | _ => false end
-  end.
+Definition of_single: float32 -> float := b64_of_b32 of_single_pl mode_NE.
+Definition to_single: float -> float32 := b32_of_b64 to_single_pl mode_NE.
+
+Definition to_int (f:float): option int := (**r conversion to signed 32-bit int *)
+  option_map Int.repr (b64_to_Z_range f Int.min_signed Int.max_signed).
+Definition to_intu (f:float): option int := (**r conversion to unsigned 32-bit int *)
+  option_map Int.repr (b64_to_Z_range f 0 Int.max_unsigned).
+Definition to_long (f:float): option int64 := (**r conversion to signed 64-bit int *)
+  option_map Int64.repr (b64_to_Z_range f Int64.min_signed Int64.max_signed).
+Definition to_longu (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
+  option_map Int64.repr (b64_to_Z_range f 0 Int64.max_unsigned).
+
+Definition of_int (n:int): float := (**r conversion from signed 32-bit int *)
+  b64_of_Z (Int.signed n).
+Definition of_intu (n:int): float:= (**r conversion from unsigned 32-bit int *)
+  b64_of_Z (Int.unsigned n).
+
+Definition of_long (n:int64): float := (**r conversion from signed 64-bit int *)
+  b64_of_Z (Int64.signed n).
+Definition of_longu (n:int64): float:= (**r conversion from unsigned 64-bit int *)
+  b64_of_Z (Int64.unsigned n).
+
+Definition from_parsed (base:positive) (intPart:positive) (expPart:Z) : float :=
+  build_from_parsed 53 1024 __ __ base intPart expPart.
 
 (** Conversions between floats and their concrete in-memory representation
-    as a sequence of 64 bits (double precision) or 32 bits (single precision). *)
+    as a sequence of 64 bits. *)
 
-Definition bits_of_double (f: float): int64 := Int64.repr (bits_of_b64 f).
-Definition double_of_bits (b: int64): float := b64_of_bits (Int64.unsigned b).
+Definition to_bits (f: float): int64 := Int64.repr (bits_of_b64 f).
+Definition of_bits (b: int64): float := b64_of_bits (Int64.unsigned b).
 
-Definition bits_of_single (f: float) : int := Int.repr (bits_of_b32 (binary32offloat f)).
-Definition single_of_bits (b: int): float := floatofbinary32 (b32_of_bits (Int.unsigned b)).
+Definition from_words (hi lo: int) : float := of_bits (Int64.ofwords hi lo).
 
-Definition from_words (hi lo: int) : float := double_of_bits (Int64.ofwords hi lo).
+(** ** Properties *)
 
 (** Below are the only properties of floating-point arithmetic that we
   rely on in the compiler proof. *)
@@ -362,75 +283,37 @@ Ltac smart_omega :=
   compute_this Int.min_signed; compute_this Int.max_signed;
   compute_this Int64.modulus; compute_this Int64.half_modulus;
   compute_this Int64.max_unsigned;
-  compute_this (Zpower_pos 2 1024); compute_this (Zpower_pos 2 53); compute_this (Zpower_pos 2 52);
+  compute_this (Zpower_pos 2 1024); compute_this (Zpower_pos 2 53); compute_this (Zpower_pos 2 52); compute_this (Zpower_pos 2 32);
   zify; omega.
 
-Lemma floatofbinary32_exact :
-  forall f, is_finite_strict _ _ f = true ->
-    is_finite_strict _ _ (floatofbinary32 f) = true /\ B2R _ _ f = B2R _ _ (floatofbinary32 f).
-Proof.
-  destruct f as [ | | |s m e]; try discriminate; intro.
-  pose proof (binary_normalize64_correct (cond_Zopp s (Zpos m)) e s).
-  match goal with [H0:if Rlt_bool (Rabs ?x) _ then _ else _ |- _ /\ ?y = _] => assert (x=y)%R end.
-  apply round_generic; [now apply valid_rnd_round_mode|].
-  apply (generic_inclusion_ln_beta _ (FLT_exp (3 - 128 - 24) 24)).
-  intro; eapply Zle_trans; [apply Zle_max_compat_l | apply Zle_max_compat_r]; omega.
-  apply generic_format_canonic; apply canonic_canonic_mantissa; apply (proj1 (andb_prop _ _ e0)).
-  rewrite H1, Rlt_bool_true in H0; intuition; unfold floatofbinary32, binary_normalize64.
-  match goal with [ |- _ _ _ ?x = true ] => destruct x end; try discriminate.
-  symmetry in H2; apply F2R_eq_0_reg in H2; destruct s; discriminate.
-  reflexivity.
-  eapply Rlt_trans.
-  unfold B2R; rewrite <- F2R_Zabs, abs_cond_Zopp; eapply bounded_lt_emax; now apply e0.
-  now apply bpow_lt.
-Qed.
-
-Lemma binary32offloatofbinary32_num :
-  forall f, is_nan _ _ f = false ->
-            binary32offloat (floatofbinary32 f) = f.
-Proof.
-  intros f Hnan; pose proof (floatofbinary32_exact f); destruct f as [ | | |s m e]; try reflexivity.
-  discriminate.
-  specialize (H eq_refl); destruct H.
-  destruct (floatofbinary32 (B754_finite 24 128 s m e e0)) as [ | | |s1 m1 e1]; try discriminate.
-  unfold binary32offloat.
-  pose proof (binary_normalize32_correct (cond_Zopp s1 (Zpos m1)) e1 s1).
-  unfold B2R at 2 in H0; cbv iota zeta beta in H0; rewrite <- H0, round_generic in H1.
-  rewrite Rlt_bool_true in H1.
-  unfold binary_normalize32.
-  apply B2R_inj; intuition; match goal with [|- _ _ _ ?f = true] => destruct f end; try discriminate.
-  symmetry in H2; apply F2R_eq_0_reg in H2; destruct s; discriminate.
-  reflexivity.
-  unfold B2R; rewrite <- F2R_Zabs, abs_cond_Zopp; eapply bounded_lt_emax; apply e0.
-  now apply valid_rnd_round_mode.
-  now apply generic_format_B2R.
-Qed.
-
-Lemma floatofbinary32offloatofbinary32_pl:
+Lemma of_single_to_single_pl:
   forall s pl,
-    prod_rect (fun _ => _) floatofbinary32_pl (prod_rect (fun _ => _) binary32offloat_pl (floatofbinary32_pl s pl)) = floatofbinary32_pl s pl.
+    prod_rect (fun _ => _) of_single_pl (prod_rect (fun _ => _) to_single_pl (of_single_pl s pl)) = of_single_pl s pl.
 Proof.
-  destruct pl. unfold binary32offloat_pl, floatofbinary32_pl.
+  destruct pl. unfold of_single_pl, to_single_pl.
   unfold transform_quiet_pl, proj1_sig. simpl.
   f_equal. apply nan_payload_fequal.
   unfold Pos.shiftr_nat. simpl.
   rewrite !lor_idempotent. reflexivity.
 Qed.
 
-Lemma floatofbinary32offloatofbinary32 :
-  forall f, floatofbinary32 (binary32offloat (floatofbinary32 f)) = floatofbinary32 f.
+Lemma of_single_to_single_of_single:
+  forall f, of_single (to_single (of_single f)) = of_single f.
 Proof.
-  destruct f; try (rewrite binary32offloatofbinary32_num; tauto).
-  unfold floatofbinary32, binary32offloat.
-  rewrite <- floatofbinary32offloatofbinary32_pl at 2.
+  intros. unfold of_single, to_single, b64_of_b32, b32_of_b64.
+  destruct (is_nan _ _ f) eqn:ISNAN.
+- destruct f; try discriminate. 
+  unfold Bconv. 
+  rewrite <- of_single_to_single_pl at 2.
   reflexivity.
+- rewrite (Bconv_narrow_widen 24); auto; omega.
 Qed.
 
-Lemma binary32offloatofbinary32offloat_pl:
+Lemma to_single_of_single_pl:
   forall s pl,
-    prod_rect (fun _ => _) binary32offloat_pl (prod_rect  (fun _ => _) floatofbinary32_pl (binary32offloat_pl s pl)) = binary32offloat_pl s pl.
+    prod_rect (fun _ => _) to_single_pl (prod_rect  (fun _ => _) of_single_pl (to_single_pl s pl)) = to_single_pl s pl.
 Proof.
-  destruct pl. unfold binary32offloat_pl, floatofbinary32_pl. unfold prod_rect.
+  destruct pl. unfold of_single_pl, to_single_pl. unfold prod_rect.
   f_equal. apply nan_payload_fequal.
   rewrite transform_quiet_pl_idempotent.
   unfold transform_quiet_pl, proj1_sig.
@@ -444,293 +327,132 @@ Proof.
   rewrite !nat_iter_succ_r with (f:=Pos.div2). auto.
 Qed.
 
-Lemma binary32offloatofbinary32offloat :
-  forall f, binary32offloat (floatofbinary32 (binary32offloat f)) = binary32offloat f.
+Lemma to_single_of_single_to_single:
+  forall f, to_single (of_single (to_single f)) = to_single f.
 Proof.
-  destruct f; try (rewrite binary32offloatofbinary32_num; simpl; tauto).
-  unfold floatofbinary32, binary32offloat.
-  rewrite <- binary32offloatofbinary32offloat_pl at 2.
+  intros. unfold to_single, of_single, b32_of_b64, b64_of_b32.
+  destruct (is_nan _ _ f) eqn:ISNAN.
+- destruct f; try discriminate. 
+  unfold Bconv. 
+  rewrite <- to_single_of_single_pl at 2.
   reflexivity.
-  rewrite binary32offloatofbinary32_num; simpl. auto.
-  unfold binary_normalize32.
-  pose proof (binary_normalize32_correct (cond_Zopp b (Z.pos m)) e b).
-  destruct binary_normalize; auto. simpl in H.
-  destruct Rlt_bool in H. intuition.
-  unfold binary_overflow in H. destruct n.
-  destruct overflow_to_inf in H; discriminate.
+- rewrite (Bconv_narrow_widen 24); auto. omega. omega. 
+  set (f' := Bconv 53 1024 24 128 __ __ to_single_pl mode_NE f).
+  destruct f; try discriminate; try reflexivity. 
+  exploit (Bconv_correct 53 1024 24 128 __ __ to_single_pl mode_NE
+     (B754_finite 53 1024 b m e e0)). auto. 
+  destruct Rlt_bool.
+  intros (A & B & C). apply is_finite_not_is_nan; auto.
+  fold f'. intros A. destruct f'; auto.
+  simpl in A. unfold binary_overflow in A.
+  destruct overflow_to_inf in A; destruct n; discriminate.
 Qed.
 
-Theorem singleoffloat_idem:
-  forall f, singleoffloat (singleoffloat f) = singleoffloat f.
-Proof.
-  intros; unfold singleoffloat; rewrite binary32offloatofbinary32offloat; reflexivity.
-Qed.
+(** Commutativity properties of addition and multiplication. *)
 
-Theorem singleoflong_idem:
-  forall n, singleoffloat (singleoflong n) = singleoflong n.
+Theorem add_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
 Proof.
-  intros; unfold singleoffloat, singleoflong. rewrite floatofbinary32offloatofbinary32; reflexivity.
+  intros. apply Bplus_commut.
+  destruct x, y; try reflexivity. simpl in H. intuition congruence.
 Qed.
 
-Theorem singleoflongu_idem:
-  forall n, singleoffloat (singleoflongu n) = singleoflongu n.
+Theorem mul_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
 Proof.
-  intros; unfold singleoffloat, singleoflongu. rewrite floatofbinary32offloatofbinary32; reflexivity.
+  intros. apply Bmult_commut.
+  destruct x, y; try reflexivity. simpl in H. intuition congruence.
 Qed.
 
-Definition is_single (f: float) : Prop := exists s, f = floatofbinary32 s.
-
-Theorem singleoffloat_is_single:
-  forall f, is_single (singleoffloat f).
-Proof.
-  intros. exists (binary32offloat f); auto.
-Qed.
+(** Multiplication by 2 is diagonal addition. *)
 
-Theorem singleoffloat_of_single:
-  forall f, is_single f -> singleoffloat f = f.
+Theorem mul2_add:
+  forall f, add f f = mul f (of_int (Int.repr 2%Z)).
 Proof.
-  intros. destruct H as [s EQ]. subst f. unfold singleoffloat.
-  apply floatofbinary32offloatofbinary32.
+  intros. apply Bmult2_Bplus. 
+  intros. destruct x; try discriminate. simpl. 
+  transitivity (b, transform_quiet_pl n). 
+  destruct Archi.choose_binop_pl_64; auto. 
+  destruct y; auto || discriminate.
 Qed.
 
-Theorem is_single_dec: forall f, {is_single f} + {~is_single f}.
-Proof.
-  intros. case (eq_dec (singleoffloat f) f); intros.
-  unfold singleoffloat in e. left. exists (binary32offloat f). auto.
-  right; red; intros; elim n. apply singleoffloat_of_single; auto.
-Defined.
+(** Divisions that can be turned into multiplication by an inverse. *)
 
-(** Commutativity properties of addition and multiplication. *)
+Definition exact_inverse : float -> option float := b64_exact_inverse. 
 
-Theorem add_commut:
-  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
-Proof.
-  intros x y NAN. unfold add, b64_plus. 
-  pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
-  pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
-  unfold Bplus in *; destruct x; destruct y; auto.
-- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB; auto. f_equal; apply eqb_prop; auto.
-- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB.
-  f_equal; apply eqb_prop; auto.
-  auto.
-- simpl in NAN; intuition congruence.
-- exploit H; auto. clear H. exploit H0; auto. clear H0. 
-  set (x := B754_finite 53 1024 b0 m0 e1 e2). 
-  set (rx := B2R 53 1024 x).
-  set (y := B754_finite 53 1024 b m e e0).
-  set (ry := B2R 53 1024 y).
-  rewrite (Rplus_comm ry rx). destruct Rlt_bool. 
-  intros (A1 & A2 & A3) (B1 & B2 & B3).
-  apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
-  rewrite Z.add_comm. rewrite Z.min_comm. auto.
-  intros (A1 & A2) (B1 & B2). apply B2FF_inj. rewrite B2 in B1. rewrite <- B1 in A1. auto. 
-Qed.
-
-Theorem mul_commut:
-  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
+Theorem div_mul_inverse:
+  forall x y z, exact_inverse y = Some z -> div x y = mul x z.
 Proof.
-  intros x y NAN. unfold mul, b64_mult. 
-  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
-  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
-  unfold Bmult in *; destruct x; destruct y; auto.
-- f_equal. apply xorb_comm. 
-- f_equal. apply xorb_comm. 
-- f_equal. apply xorb_comm. 
-- f_equal. apply xorb_comm. 
-- simpl in NAN. intuition congruence.
-- f_equal. apply xorb_comm. 
-- f_equal. apply xorb_comm. 
-- set (x := B754_finite 53 1024 b0 m0 e1 e2) in *. 
-  set (rx := B2R 53 1024 x) in *.
-  set (y := B754_finite 53 1024 b m e e0) in *.
-  set (ry := B2R 53 1024 y) in *.
-  rewrite (Rmult_comm ry rx) in *. destruct Rlt_bool. 
-  destruct H as (A1 & A2 & A3); destruct H0 as (B1 & B2 & B3).
-  apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
-  rewrite ! Bsign_FF2B. f_equal. f_equal. apply xorb_comm. apply Pos.mul_comm. apply Z.add_comm.
-  apply B2FF_inj. etransitivity. eapply H. rewrite xorb_comm. auto. 
+  intros. apply Bdiv_mult_inverse; auto. 
+  intros. destruct x0; try discriminate. simpl. 
+  transitivity (b, transform_quiet_pl n). 
+  destruct y0; reflexivity || discriminate.
+  destruct z0; reflexivity || discriminate.
 Qed.
 
 (** Properties of comparisons. *)
 
-Theorem order_float_finite_correct:
-  forall f1 f2, is_finite _ _ f1 = true -> is_finite _ _ f2 = true ->
-    match order_float f1 f2 with
-      | Some c => Rcompare (B2R _ _ f1) (B2R _ _ f2) = c
-      | None => False
-    end.
-Proof.
-  Ltac apply_Rcompare :=
-    match goal with
-      | [ |- Rcompare _ _ = Lt ] => apply Rcompare_Lt
-      | [ |- Rcompare _ _ = Eq ] => apply Rcompare_Eq
-      | [ |- Rcompare _ _ = Gt ] => apply Rcompare_Gt
-    end.
-  unfold order_float; intros.
-  destruct f1, f2; try discriminate; unfold B2R, F2R, Fnum, Fexp, cond_Zopp;
-    try (replace 0%R with (Z2R 0 * bpow radix2 e)%R by (simpl Z2R; ring);
-         rewrite Rcompare_mult_r by (apply bpow_gt_0); rewrite Rcompare_Z2R).
-  apply_Rcompare; reflexivity.
-  destruct b0; reflexivity.
-  destruct b; reflexivity.
-  clear H H0.
-  apply andb_prop in e0; destruct e0; apply (canonic_canonic_mantissa _ _ false) in H.
-  apply andb_prop in e2; destruct e2; apply (canonic_canonic_mantissa _ _ false) in H1.
-  pose proof (Zcompare_spec e e1); unfold canonic, Fexp in H1, H.
-  assert (forall m1 m2 e1 e2,
-    let x := (Z2R (Zpos m1) * bpow radix2 e1)%R in
-    let y := (Z2R (Zpos m2) * bpow radix2 e2)%R in
-    canonic_exp radix2 (FLT_exp (3-1024-53) 53) x < canonic_exp radix2 (FLT_exp (3-1024-53) 53) y -> (x < y)%R).
-  intros; apply Rnot_le_lt; intro; apply (ln_beta_le radix2) in H5.
-  apply (fexp_monotone 53 1024) in H5; unfold canonic_exp in H4; omega.
-  apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
-  assert (forall m1 m2 e1 e2, (Z2R (- Zpos m1) * bpow radix2 e1 < Z2R (Zpos m2) * bpow radix2 e2)%R).
-  intros; apply (Rlt_trans _ 0%R).
-  replace 0%R with (0*bpow radix2 e0)%R by ring; apply Rmult_lt_compat_r;
-    [apply bpow_gt_0; reflexivity|now apply (Z2R_lt _ 0)].
-  apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
-  destruct b, b0; try (now apply_Rcompare; apply H5); inversion H3;
-    try (apply_Rcompare; apply H4; rewrite H, H1 in H7; assumption);
-    try (apply_Rcompare; do 2 rewrite Z2R_opp, Ropp_mult_distr_l_reverse;
-      apply Ropp_lt_contravar; apply H4; rewrite H, H1 in H7; assumption);
-    rewrite H7, Rcompare_mult_r, Rcompare_Z2R by (apply bpow_gt_0); reflexivity.
-Qed.
-
 Theorem cmp_swap:
-  forall c x y, Float.cmp (swap_comparison c) x y = Float.cmp c y x.
+  forall c x y, cmp (swap_comparison c) x y = cmp c y x.
 Proof.
-  destruct c, x, y; simpl; try destruct b; try destruct b0; try reflexivity;
-  rewrite <- (Zcompare_antisym e e1); destruct (e ?= e1); try reflexivity;
-  change Eq with (CompOpp Eq); rewrite <- (Pcompare_antisym m m0 Eq);
-    simpl; destruct (Pcompare m m0 Eq); reflexivity.
+  unfold cmp, b64_compare; intros. rewrite (Bcompare_swap _ _ x y).
+  apply cmp_of_comparison_swap. 
 Qed.
 
 Theorem cmp_ne_eq:
   forall f1 f2, cmp Cne f1 f2 = negb (cmp Ceq f1 f2).
 Proof.
-  unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; reflexivity.
+  intros; apply cmp_of_comparison_ne_eq. 
 Qed.
 
 Theorem cmp_lt_eq_false:
   forall f1 f2, cmp Clt f1 f2 = true -> cmp Ceq f1 f2 = true -> False.
 Proof.
-  unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; discriminate.
+  intros f1 f2; apply cmp_of_comparison_lt_eq_false.
 Qed.
 
 Theorem cmp_le_lt_eq:
   forall f1 f2, cmp Cle f1 f2 = cmp Clt f1 f2 || cmp Ceq f1 f2.
 Proof.
-  unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; reflexivity.
+  intros f1 f2; apply cmp_of_comparison_le_lt_eq.
 Qed.
 
-Corollary cmp_gt_eq_false:
+Theorem cmp_gt_eq_false:
   forall x y, cmp Cgt x y = true -> cmp Ceq x y = true -> False.
 Proof.
-  intros; rewrite <- cmp_swap in H; rewrite <- cmp_swap in H0;
-  eapply cmp_lt_eq_false; now eauto.
+  intros f1 f2; apply cmp_of_comparison_gt_eq_false.
 Qed.
 
-Corollary cmp_ge_gt_eq:
+Theorem cmp_ge_gt_eq:
   forall f1 f2, cmp Cge f1 f2 = cmp Cgt f1 f2 || cmp Ceq f1 f2.
 Proof.
-  intros.
-  change Cge with (swap_comparison Cle); change Cgt with (swap_comparison Clt);
-    change Ceq with (swap_comparison Ceq).
-  repeat rewrite cmp_swap.
-  now apply cmp_le_lt_eq.
+  intros f1 f2; apply cmp_of_comparison_ge_gt_eq.
 Qed.
 
 Theorem cmp_lt_gt_false:
   forall f1 f2, cmp Clt f1 f2 = true -> cmp Cgt f1 f2 = true -> False.
 Proof.
-  unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; discriminate.
+  intros f1 f2; apply cmp_of_comparison_lt_gt_false.
 Qed.
 
 (** Properties of conversions to/from in-memory representation.
-  The double-precision conversions are bijective (one-to-one).
-  The single-precision conversions lose precision exactly
-  as described by [singleoffloat] rounding. *)
+  The conversions are bijective (one-to-one). *)
 
-Theorem double_of_bits_of_double:
-  forall f, double_of_bits (bits_of_double f) = f.
+Theorem of_to_bits:
+  forall f, of_bits (to_bits f) = f.
 Proof.
-  intros; unfold double_of_bits, bits_of_double, bits_of_b64, b64_of_bits.
+  intros; unfold of_bits, to_bits, bits_of_b64, b64_of_bits.
   rewrite Int64.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
-  destruct f.
-  simpl; try destruct b; vm_compute; split; congruence.
-  simpl; try destruct b; vm_compute; split; congruence.
-  destruct n as [p Hp].
-  simpl. rewrite Z.ltb_lt in Hp.
-  apply Zlt_succ_le with (m:=52) in Hp.
-  apply Zpower_le with (r:=radix2) in Hp.
-  edestruct Fcore_digits.digits2_Pnat_correct.
-  rewrite Zpower_nat_Z in H0.
-  eapply Z.lt_le_trans in Hp; eauto.
-  unfold join_bits; destruct b.
-  compute_this ((2 ^ 11 + 2047) * 2 ^ 52). smart_omega.
-  compute_this ((0 + 2047) * 2 ^ 52). smart_omega.
-  unfold bits_of_binary_float, join_bits.
-  destruct (andb_prop _ _ e0); apply Zle_bool_imp_le in H0; apply Zeq_bool_eq in H; unfold FLT_exp in H.
-  match goal with [H:Zmax ?x ?y = e|-_] => pose proof (Zle_max_l x y); pose proof (Zle_max_r x y) end.
-  rewrite H, Fcalc_digits.Z_of_nat_S_digits2_Pnat in *.
-  lapply (Fcalc_digits.Zpower_gt_Zdigits radix2 53 (Zpos m)). intro.
-  unfold radix2, radix_val, Zabs in H3.
-  pose proof (Zle_bool_spec (2 ^ 52) (Zpos m)).
-  assert (Zpos m > 0); [vm_compute; exact eq_refl|].
-  compute_this (2^11); compute_this (2^(11-1)).
-  inversion H4; fold (2^52) in *; destruct H6; destruct b; now smart_omega.
-  change Fcalc_digits.radix2 with radix2 in H1; omega.
-Qed.
-
-Theorem single_of_bits_of_single:
-  forall f, single_of_bits (bits_of_single f) = singleoffloat f.
-Proof.
-  intros; unfold single_of_bits, bits_of_single, bits_of_b32, b32_of_bits.
-  rewrite Int.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
-  destruct (binary32offloat f).
-  simpl; try destruct b; vm_compute; split; congruence.
-  simpl; try destruct b; vm_compute; split; congruence.
-  destruct n as [p Hp].
-  simpl. rewrite Z.ltb_lt in Hp.
-  apply Zlt_succ_le with (m:=23) in Hp.
-  apply Zpower_le with (r:=radix2) in Hp.
-  edestruct Fcore_digits.digits2_Pnat_correct.
-  rewrite Zpower_nat_Z in H0.
-  eapply Z.lt_le_trans in Hp; eauto.
-  compute_this (radix2^23).
-  unfold join_bits; destruct b.
-  compute_this ((2 ^ 8 + 255) * 2 ^ 23). smart_omega.
-  compute_this ((0 + 255) * 2 ^ 23). smart_omega.
-  unfold bits_of_binary_float, join_bits.
-  destruct (andb_prop _ _ e0); apply Zle_bool_imp_le in H0; apply Zeq_bool_eq in H.
-  unfold FLT_exp in H.
-  match goal with [H:Zmax ?x ?y = e|-_] => pose proof (Zle_max_l x y); pose proof (Zle_max_r x y) end.
-  rewrite H, Fcalc_digits.Z_of_nat_S_digits2_Pnat in *.
-  lapply (Fcalc_digits.Zpower_gt_Zdigits radix2 24 (Zpos m)). intro.
-  unfold radix2, radix_val, Zabs in H3.
-  pose proof (Zle_bool_spec (2 ^ 23) (Zpos m)).
-  compute_this (2^23); compute_this (2^24); compute_this (2^8); compute_this (2^(8-1)).
-  assert (Zpos m > 0); [exact eq_refl|].
-  inversion H4; destruct b; now smart_omega.
-  change Fcalc_digits.radix2 with radix2 in H1; omega.
+  generalize (bits_of_binary_float_range 52 11 __ __ f).
+  change (2^(52+11+1)) with (Int64.max_unsigned + 1). omega. 
 Qed.
 
-Theorem bits_of_singleoffloat:
-  forall f, bits_of_single (singleoffloat f) = bits_of_single f.
+Theorem to_of_bits:
+  forall b, to_bits (of_bits b) = b.
 Proof.
-  intro; unfold singleoffloat, bits_of_single; rewrite binary32offloatofbinary32offloat; reflexivity.
-Qed.
-
-Theorem singleoffloat_of_bits:
-  forall b, singleoffloat (single_of_bits b) = single_of_bits b.
-Proof.
-  intro; unfold singleoffloat, single_of_bits; rewrite floatofbinary32offloatofbinary32; reflexivity.
-Qed.
-
-Theorem single_of_bits_is_single:
-  forall b, is_single (single_of_bits b).
-Proof.
-  intros. exists (b32_of_bits (Int.unsigned b)); auto.
+  intros; unfold of_bits, to_bits, bits_of_b64, b64_of_bits.
+  rewrite bits_of_binary_float_of_bits. apply Int64.repr_unsigned.
+  apply Int64.unsigned_range. 
 Qed.
 
 (** Conversions between floats and unsigned ints can be defined
@@ -740,279 +462,103 @@ Qed.
 
 Definition ox8000_0000 := Int.repr Int.half_modulus.  (**r [0x8000_0000] *)
 
-Lemma round_exact:
-  forall n, -2^53 < n < 2^53 ->
-    round radix2 (FLT_exp (3 - 1024 - 53) 53)
-      (round_mode mode_NE) (Z2R n) = Z2R n.
-Proof.
-  intros; rewrite round_generic; [reflexivity|now apply valid_rnd_round_mode|].
-  apply generic_format_FLT; exists (Float radix2 n 0).
-  unfold F2R, Fnum, Fexp, bpow; rewrite Rmult_1_r; intuition.
-  pose proof (Zabs_spec n); now smart_omega.
-Qed.
-
-Lemma binary_normalize64_exact:
-  forall n, -2^53 < n < 2^53 ->
-    B2R _ _ (binary_normalize64 n 0 false) = Z2R n /\
-    is_finite _ _ (binary_normalize64 n 0 false) = true.
-Proof.
-  intros; pose proof (binary_normalize64_correct n 0 false).
-  unfold F2R, Fnum, Fexp, bpow in H0; rewrite Rmult_1_r, round_exact, Rlt_bool_true in H0; try now intuition.
-  rewrite <- Z2R_abs; apply Z2R_lt; pose proof (Zabs_spec n); now smart_omega.
-Qed.
-
-Theorem floatofintu_floatofint_1:
+Theorem of_intu_of_int_1:
   forall x,
   Int.ltu x ox8000_0000 = true ->
-  floatofintu x = floatofint x.
+  of_intu x = of_int x.
 Proof.
-  unfold floatofintu, floatofint, Int.signed, Int.ltu; intro.
+  unfold of_intu, of_int, Int.signed, Int.ltu; intro.
   change (Int.unsigned ox8000_0000) with Int.half_modulus.
   destruct (zlt (Int.unsigned x) Int.half_modulus); now intuition.
 Qed.
 
-Theorem floatofintu_floatofint_2:
+Theorem of_intu_of_int_2:
   forall x,
   Int.ltu x ox8000_0000 = false ->
-  floatofintu x = add (floatofint (Int.sub x ox8000_0000))
-                      (floatofintu ox8000_0000).
+  of_intu x = add (of_int (Int.sub x ox8000_0000)) (of_intu ox8000_0000).
 Proof.
-  unfold floatofintu, floatofint, Int.signed, Int.ltu, Int.sub; intros.
-  pose proof (Int.unsigned_range x).
-  compute_this (Int.unsigned ox8000_0000).
-  destruct (zlt (Int.unsigned x) 2147483648); try  discriminate.
-  rewrite Int.unsigned_repr by smart_omega.
-  destruct (zlt ((Int.unsigned x) - 2147483648) Int.half_modulus).
-  unfold add, b64_plus.
-  match goal with [|- _ = Bplus _ _ _ _ _ _ ?x ?y] =>
-    pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
-  do 2 rewrite (fun x H => proj1 (binary_normalize64_exact x H)) in H1 by smart_omega.
-  do 2 rewrite (fun x H => proj2 (binary_normalize64_exact x H)) in H1 by smart_omega.
-  rewrite <- Z2R_plus, round_exact in H1 by smart_omega.
-  rewrite Rlt_bool_true in H1;
-    replace (Int.unsigned x - 2147483648 + 2147483648) with (Int.unsigned x) in * by ring.
-  apply B2R_inj.
-  destruct (binary_normalize64_exact (Int.unsigned x)); [now smart_omega|].
-  match goal with [|- _ _ _ ?f = _] => destruct f end; intuition.
-  exfalso; simpl in H2; change 0%R with (Z2R 0) in H2; apply eq_Z2R in H2; omega.
-  try (change (53 ?= 1024) with Lt in H1).  (* for Coq 8.4 *)
-  simpl Zcompare in *.
-  match goal with [|- _ _ _ ?f = _] => destruct f end; intuition.
-  exfalso; simpl in H0; change 0%R with (Z2R 0) in H0; apply eq_Z2R in H0; omega.
-  rewrite (fun x H => proj1 (binary_normalize64_exact x H)) by smart_omega; now intuition.
-  rewrite <- Z2R_Zpower, <- Z2R_abs by omega; apply Z2R_lt;
-    pose proof (Zabs_spec (Int.unsigned x)); now smart_omega.
-  exfalso; now smart_omega.
+  unfold add, b64_plus, of_intu, of_int, b64_of_Z; intros.
+  set (y := Int.sub x ox8000_0000).
+  pose proof (Int.unsigned_range x); pose proof (Int.signed_range y).
+  assert (Ry: integer_representable 53 1024 (Int.signed y)).
+  { apply integer_representable_n; auto; smart_omega. }
+  assert (R8: integer_representable 53 1024 (Int.unsigned ox8000_0000)).
+  { apply integer_representable_2p with (p := 31);auto; smart_omega. }
+  rewrite BofZ_plus by auto.
+  f_equal. 
+  unfold Int.ltu in H. destruct zlt in H; try discriminate.
+  unfold y, Int.sub. rewrite Int.signed_repr. omega.
+  compute_this (Int.unsigned ox8000_0000); smart_omega.
 Qed.
 
-Lemma Zoffloat_correct:
-  forall f,
-    match Zoffloat f with
-      | Some n =>
-        is_finite _ _ f = true /\
-        Z2R n = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
-      | None =>
-        is_finite _ _ f = false
-    end.
-Proof.
-  destruct f; try now intuition.
-  simpl B2R. rewrite round_0. now intuition. now apply valid_rnd_round_mode.
-  destruct e. split. reflexivity.
-  rewrite round_generic. symmetry. now apply Rmult_1_r.
-  now apply valid_rnd_round_mode.
-  apply generic_format_FIX. exists (Float radix2 (cond_Zopp b (Zpos m)) 0). split; reflexivity.
-  split; [reflexivity|].
-  rewrite round_generic, Z2R_mult, Z2R_Zpower_pos, <- bpow_powerRZ;
-    [reflexivity|now apply valid_rnd_round_mode|apply generic_format_F2R; discriminate].
-  rewrite (inbetween_float_ZR_sign _ _ _ ((Zpos m) / Zpower_pos radix2 p)
-    (new_location (Zpower_pos radix2 p) (Zpos m mod Zpower_pos radix2 p) loc_Exact)).
-  unfold B2R, F2R, Fnum, Fexp, canonic_exp, bpow, FIX_exp, Zoffloat, radix2, radix_val.
-  pose proof (Rlt_bool_spec (Z2R (cond_Zopp b (Zpos m)) * / Z2R (Zpower_pos 2 p)) 0).
-  inversion H; rewrite <- (Rmult_0_l (bpow radix2 (Zneg p))) in H1.
-  apply Rmult_lt_reg_r in H1. apply (lt_Z2R _ 0) in H1.
-  destruct b; [split; [|ring_simplify];reflexivity|discriminate].
-  now apply bpow_gt_0.
-  apply Rmult_le_reg_r in H1. apply (le_Z2R 0) in H1.
-  destruct b; [destruct H1|split; [|ring_simplify]]; reflexivity.
-  now apply (bpow_gt_0 radix2 (Zneg p)).
-  unfold canonic_exp, FIX_exp; replace 0 with (Zneg p + Zpos p) by apply Zplus_opp_r.
-  apply (inbetween_float_new_location radix2 _ _ _ _ (Zpos p)); [reflexivity|].
-  apply inbetween_Exact; unfold B2R, F2R, Fnum, Fexp; destruct b.
-  rewrite  Rabs_left; [simpl; ring_simplify; reflexivity|].
-  replace 0%R with (0*(bpow radix2 (Zneg p)))%R by ring; apply Rmult_gt_compat_r.
-  now apply bpow_gt_0.
-  apply (Z2R_lt _ 0); reflexivity.
-  apply Rabs_right; replace 0%R with (0*(bpow radix2 (Zneg p)))%R by ring; apply Rgt_ge.
-  apply Rmult_gt_compat_r; [now apply bpow_gt_0|apply (Z2R_lt 0); reflexivity].
-Qed.
-
-Theorem intoffloat_correct:
-  forall f,
-    match intoffloat f with
-      | Some n =>
-        is_finite _ _ f = true /\
-        Z2R (Int.signed n) = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
-      | None =>
-        is_finite _ _ f = false \/
-        (B2R _ _ f <= Z2R (Zpred Int.min_signed)\/
-        Z2R (Zsucc Int.max_signed) <= B2R _ _ f)%R
-    end.
-Proof.
-  intro; pose proof (Zoffloat_correct f); unfold intoffloat; destruct (Zoffloat f).
-  pose proof (Zle_bool_spec Int.min_signed z); pose proof (Zle_bool_spec z Int.max_signed). 
-  compute_this Int.min_signed; compute_this Int.max_signed; destruct H.
-  inversion H0; [inversion H1|].
-  rewrite <- (Int.signed_repr z) in H2 by smart_omega; split; assumption.
-  right; right; eapply Rle_trans; [apply Z2R_le; apply Zlt_le_succ; now apply H6|].
-  rewrite H2, round_ZR_pos.
-  unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
-  do 2 rewrite Rmult_1_r; now apply Zfloor_lb.
-  apply Rnot_lt_le; intro; apply Rlt_le in H7; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H7;
-    rewrite <- H2, round_0 in H7; [apply (le_Z2R _ 0) in H7; now smart_omega|now apply valid_rnd_round_mode].
-  right; left; eapply Rle_trans; [|apply (Z2R_le z); simpl; omega].
-  rewrite H2, round_ZR_neg.
-  unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
-  do 2 rewrite Rmult_1_r; now apply Zceil_ub.
-  apply Rnot_lt_le; intro; apply Rlt_le in H5; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H5.
-  rewrite <- H2, round_0 in H5; [apply (le_Z2R 0) in H5; omega|now apply valid_rnd_round_mode].
-  left; assumption.
-Qed.
-
-Theorem intuoffloat_correct:
-  forall f,
-    match intuoffloat f with
-      | Some n =>
-        is_finite _ _ f = true /\
-        Z2R (Int.unsigned n) = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
-      | None =>
-        is_finite _ _ f = false \/
-        (B2R _ _ f <= -1 \/
-        Z2R (Zsucc Int.max_unsigned) <= B2R _ _ f)%R
-    end.
-Proof.
-  intro; pose proof (Zoffloat_correct f); unfold intuoffloat; destruct (Zoffloat f).
-  pose proof (Zle_bool_spec 0 z); pose proof (Zle_bool_spec z Int.max_unsigned).
-  compute_this Int.max_unsigned; destruct H.
-  inversion H0. inversion H1.
-  rewrite <- (Int.unsigned_repr z) in H2 by smart_omega; split; assumption.
-  right; right; eapply Rle_trans; [apply Z2R_le; apply Zlt_le_succ; now apply H6|].
-  rewrite H2, round_ZR_pos.
-  unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow;
-    do 2 rewrite Rmult_1_r; now apply Zfloor_lb.
-  apply Rnot_lt_le; intro; apply Rlt_le in H7; eapply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H7;
-    rewrite <- H2, round_0 in H7; [apply (le_Z2R _ 0) in H7; now smart_omega|now apply valid_rnd_round_mode].
-  right; left; eapply Rle_trans; [|change (-1)%R with (Z2R (-1)); apply (Z2R_le z); omega].
-  rewrite H2, round_ZR_neg; unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
-  do 2 rewrite Rmult_1_r; now apply Zceil_ub.
-  apply Rnot_lt_le; intro; apply Rlt_le in H5; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H5;
-    rewrite <- H2, round_0 in H5; [apply (le_Z2R 0) in H5; omega|now apply valid_rnd_round_mode].
-  left; assumption.
-Qed.
-
-Lemma intuoffloat_interval:
-  forall f n,
-    intuoffloat f = Some n ->
-    (-1 < B2R _ _ f < Z2R (Zsucc Int.max_unsigned))%R.
-Proof.
-  intro; pose proof (intuoffloat_correct f); destruct (intuoffloat f); try discriminate; destruct H.
-  destruct f; try discriminate; intros.
-  simpl B2R; change 0%R with (Z2R 0); change (-1)%R with (Z2R (-1)); split; apply Z2R_lt; reflexivity.
-  pose proof (Int.unsigned_range i).
-  unfold round, scaled_mantissa, B2R, F2R, Fnum, Fexp in H0 |- *; simpl bpow in H0; do 2 rewrite Rmult_1_r in H0;
-    apply eq_Z2R in H0.
-  split; apply Rnot_le_lt; intro.
-  rewrite Ztrunc_ceil in H0;
-    [apply Zceil_le in H3; change (-1)%R with (Z2R (-1)) in H3; rewrite Zceil_Z2R in H3; omega|].
-  eapply Rle_trans; [now apply H3|apply (Z2R_le (-1) 0); discriminate].
-  rewrite Ztrunc_floor in H0; [apply Zfloor_le in H3; rewrite Zfloor_Z2R in H3; now smart_omega|].
-  eapply Rle_trans; [|now apply H3]; apply (Z2R_le 0); discriminate.
-Qed.
-
-Theorem intuoffloat_intoffloat_1:
+Theorem to_intu_to_int_1:
   forall x n,
-  cmp Clt x (floatofintu ox8000_0000) = true ->
-  intuoffloat x = Some n ->
-  intoffloat x = Some n.
-Proof.
-  intros; unfold cmp in H; pose proof (order_float_finite_correct x (floatofintu ox8000_0000)).
-  destruct (order_float x (floatofintu ox8000_0000)); try destruct c; try discriminate.
-  pose proof (intuoffloat_correct x); rewrite H0 in H2; destruct H2.
-  specialize (H1 H2 eq_refl); pose proof (intoffloat_correct x); destruct (intoffloat x).
-  f_equal; rewrite <- (proj2 H4) in H3; apply eq_Z2R in H3.
-  pose proof (eq_refl (Int.repr (Int.unsigned n))); rewrite H3 in H5 at 1.
-  rewrite Int.repr_signed, Int.repr_unsigned in H5; assumption.
-  destruct H4; [rewrite H2 in H4; discriminate|].
-  apply intuoffloat_interval in H0; exfalso; destruct H0, H4.
-  eapply Rlt_le_trans in H0; [|now apply H4]; apply (lt_Z2R (-1)) in H0; discriminate.
-  apply Rcompare_Lt_inv in H1; eapply Rle_lt_trans in H1; [|now apply H4].
-  unfold floatofintu in H1; rewrite (fun x H => proj1 (binary_normalize64_exact x H)) in H1;
-    [apply lt_Z2R in H1; discriminate|split; reflexivity].
-Qed.
-
-Lemma Zfloor_minus :
-  forall x n, Zfloor(x-Z2R n) = Zfloor(x)-n.
-Proof.
-  intros; apply Zfloor_imp; replace (Zfloor x - n + 1) with (Zfloor x + 1 - n) by ring; do 2 rewrite Z2R_minus.
-  split;
-    [apply Rplus_le_compat_r; now apply Zfloor_lb|
-     apply Rplus_lt_compat_r; rewrite Z2R_plus; now apply Zfloor_ub].
-Qed.
-
-Theorem intuoffloat_intoffloat_2:
+  cmp Clt x (of_intu ox8000_0000) = true ->
+  to_intu x = Some n ->
+  to_int x = Some n.
+Proof.
+  intros. unfold to_intu in H0. 
+  destruct (b64_to_Z_range x 0 Int.max_unsigned) as [p|] eqn:E; simpl in H0; inv H0.
+  unfold b64_to_Z_range in E. exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  unfold to_int, b64_to_Z_range. unfold ZofB_range. rewrite C. 
+  rewrite Zle_bool_true by smart_omega. rewrite Zle_bool_true; auto.
+  exploit (BofZ_exact 53 1024 __ __ (Int.unsigned ox8000_0000)).
+  vm_compute; intuition congruence.
+  set (y := of_intu ox8000_0000) in *.
+  change (BofZ 53 1024 eq_refl eq_refl (Int.unsigned ox8000_0000)) with y.
+  intros (EQy & FINy & SIGNy).
+  assert (FINx: is_finite _ _ x = true).
+  { rewrite ZofB_correct in C. destruct (is_finite _ _ x) eqn:FINx; congruence. }
+  destruct (zeq p 0). 
+  subst p; smart_omega.
+  destruct (ZofB_range_pos 53 1024 __ __ x p C) as [P Q]. omega.
+  assert (CMP: b64_compare x y = Some Lt). 
+  { unfold cmp, cmp_of_comparison in H. destruct (b64_compare x y) as [[]|]; auto; discriminate. }
+  unfold b64_compare in CMP. rewrite Bcompare_finite_correct in CMP by auto. 
+  inv CMP. apply Rcompare_Lt_inv in H1. rewrite EQy in H1.
+  assert (p < Int.unsigned ox8000_0000).
+  { apply lt_Z2R. eapply Rle_lt_trans; eauto. }
+  change Int.max_signed with (Int.unsigned ox8000_0000 - 1). omega. 
+Qed.
+
+Theorem to_intu_to_int_2:
   forall x n,
-  cmp Clt x (floatofintu ox8000_0000) = false ->
-  intuoffloat x = Some n ->
-  intoffloat (sub x (floatofintu ox8000_0000)) = Some (Int.sub n ox8000_0000).
-Proof.
-  assert (B2R _ _ (floatofintu ox8000_0000) = Z2R (Int.unsigned ox8000_0000)).
-  apply (fun x H => proj1 (binary_normalize64_exact x H)); split; reflexivity.
-  intros; unfold cmp in H0; pose proof (order_float_finite_correct x (floatofintu ox8000_0000)).
-  destruct (order_float x (floatofintu ox8000_0000)); try destruct c; try discriminate;
-  pose proof (intuoffloat_correct x); rewrite H1 in H3; destruct H3; specialize (H2 H3 eq_refl).
-  apply Rcompare_Eq_inv in H2; apply B2R_inj in H2.
-  subst x; vm_compute in H1; injection H1; intro; subst n; vm_compute; reflexivity.
-  destruct x; try discriminate H3;
-    [rewrite H in H2; simpl B2R in H2; apply (eq_Z2R 0) in H2; discriminate|reflexivity].
-  reflexivity.
-  rewrite H in H2; apply Rcompare_Gt_inv in H2; pose proof (intuoffloat_interval _ _ H1).
-  unfold sub, b64_minus.
-  exploit (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x (floatofintu ox8000_0000)); [assumption|reflexivity|]; intro.
-  rewrite H, round_generic in H6.
-  match goal with [H6:if Rlt_bool ?x ?y then _ else _|-_] =>
-    pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end.
-  destruct H6 as [? []].
-  match goal with [|- _ ?y = _] => pose proof (intoffloat_correct y); destruct (intoffloat y) end.
-  destruct H10.
-  f_equal; rewrite <- (Int.repr_signed i); unfold Int.sub; f_equal; apply eq_Z2R. 
-  rewrite Z2R_minus, H11, H4.
-  unfold round, scaled_mantissa, F2R, Fexp, Fnum, round_mode; simpl bpow; repeat rewrite Rmult_1_r;
-    rewrite <- Z2R_minus; f_equal.
-  rewrite (Ztrunc_floor (B2R _ _ x)), <- Zfloor_minus, <- Ztrunc_floor;
-    [f_equal; assumption|apply Rle_0_minus; left; assumption|].
-  left; eapply Rlt_trans; [|now apply H2]; apply (Z2R_lt 0); reflexivity.
-  try (change (0 ?= 53) with Lt in H6,H8).  (* for Coq 8.4 *)
-  try (change (53 ?= 1024) with Lt in H6,H8).  (* for Coq 8.4 *)
-  exfalso; simpl Zcompare in H6, H8; rewrite H6, H8 in H10.
-  destruct H10 as [|[]]; [discriminate|..].
-  eapply Rle_trans in H10; [|apply Rle_0_minus; left; assumption]; apply (le_Z2R 0) in H10; apply H10; reflexivity.
-  eapply Rle_lt_trans in H10; [|apply Rplus_lt_compat_r; now apply (proj2 H5)].
-  rewrite <- Z2R_opp, <- Z2R_plus in H10; apply lt_Z2R in H10; discriminate.
-  exfalso; inversion H7; rewrite Rabs_right in H8.
-  eapply Rle_lt_trans in H8. apply Rle_not_lt in H8; [assumption|apply (bpow_le _ 31); discriminate].
-  change (bpow radix2 31) with (Z2R(Zsucc Int.max_unsigned - Int.unsigned ox8000_0000)); rewrite Z2R_minus.
-  apply Rplus_lt_compat_r; exact (proj2 H5).
-  apply Rle_ge; apply Rle_0_minus; left; assumption.
-  now apply valid_rnd_round_mode.
-  apply Fprop_Sterbenz.sterbenz_aux; [now apply fexp_monotone|now apply generic_format_B2R| |].
-  rewrite <- H; now apply generic_format_B2R.
-  destruct H5; split; left; assumption.
-  now destruct H2.
+  cmp Clt x (of_intu ox8000_0000) = false ->
+  to_intu x = Some n ->
+  to_int (sub x (of_intu ox8000_0000)) = Some (Int.sub n ox8000_0000).
+Proof.
+  intros. unfold to_intu in H0. 
+  destruct (b64_to_Z_range x 0 Int.max_unsigned) as [p|] eqn:E; simpl in H0; inv H0.
+  unfold b64_to_Z_range in E. exploit ZofB_range_inversion; eauto. intros (A & B & C).
+  exploit (BofZ_exact 53 1024 __ __ (Int.unsigned ox8000_0000)).
+  vm_compute; intuition congruence.
+  set (y := of_intu ox8000_0000) in *.
+  change (BofZ 53 1024 __ __ (Int.unsigned ox8000_0000)) with y.
+  intros (EQy & FINy & SIGNy).
+  assert (FINx: is_finite _ _ x = true).
+  { rewrite ZofB_correct in C. destruct (is_finite _ _ x) eqn:FINx; congruence. }
+  assert (GE: (B2R _ _ x >= Z2R (Int.unsigned ox8000_0000))%R).
+  { rewrite <- EQy. unfold cmp, cmp_of_comparison, b64_compare in H.
+    rewrite Bcompare_finite_correct in H by auto. 
+    destruct (Rcompare (B2R 53 1024 x) (B2R 53 1024 y)) eqn:CMP.
+    apply Req_ge; apply Rcompare_Eq_inv; auto.
+    discriminate.
+    apply Rgt_ge; apply Rcompare_Gt_inv; auto. 
+  } 
+  assert (EQ: b64_to_Z_range (sub x y) Int.min_signed Int.max_signed = Some (p - Int.unsigned ox8000_0000)).
+  {
+    apply ZofB_range_minus. exact E. 
+    compute_this (Int.unsigned ox8000_0000). smart_omega.
+    apply Rge_le; auto.
+  } 
+  unfold to_int; rewrite EQ. simpl. f_equal. unfold Int.sub. f_equal. f_equal. 
+  symmetry; apply Int.unsigned_repr. omega.
 Qed.
 
 (** Conversions from ints to floats can be defined as bitwise manipulations
   over the in-memory representation.  This is what the PowerPC port does.
   The trick is that [from_words 0x4330_0000 x] is the float
-  [2^52 + floatofintu x]. *)
+  [2^52 + of_intu x]. *)
 
 Definition ox4330_0000 := Int.repr 1127219200.        (**r [0x4330_0000] *)
 
@@ -1032,55 +578,42 @@ Qed.
 
 Lemma from_words_value:
   forall x,
-    B2R _ _ (from_words ox4330_0000 x) =
-    (bpow radix2 52 + Z2R (Int.unsigned x))%R /\
-    is_finite _ _ (from_words ox4330_0000 x) = true.
+     B2R _ _ (from_words ox4330_0000 x) = (bpow radix2 52 + Z2R (Int.unsigned x))%R
+  /\ is_finite _ _ (from_words ox4330_0000 x) = true
+  /\ Bsign _ _ (from_words ox4330_0000 x) = false.
 Proof.
-  intros; unfold from_words, double_of_bits, b64_of_bits, binary_float_of_bits.
-  rewrite B2R_FF2B. rewrite is_finite_FF2B.
+  intros; unfold from_words, of_bits, b64_of_bits, binary_float_of_bits.
+  rewrite B2R_FF2B, is_finite_FF2B, Bsign_FF2B.
   unfold binary_float_of_bits_aux; rewrite split_bits_or; simpl; pose proof (Int.unsigned_range x).
   destruct (Int.unsigned x + Zpower_pos 2 52) eqn:?.
   exfalso; now smart_omega.
-  simpl; rewrite <- Heqz;  unfold F2R; simpl.
-  rewrite <- (Z2R_plus 4503599627370496), Rmult_1_r.
-  split; [f_equal; compute_this (Zpower_pos 2 52); ring | reflexivity].
-  assert (Zneg p < 0) by reflexivity.
+  simpl; rewrite <- Heqz;  unfold F2R; simpl. split; auto.
+  rewrite <- (Z2R_plus 4503599627370496), Rmult_1_r. f_equal. rewrite Zplus_comm. auto.
   exfalso; now smart_omega.
 Qed.
 
-Theorem floatofintu_from_words:
-  forall x,
-  floatofintu x =
-    sub (from_words ox4330_0000 x) (from_words ox4330_0000 Int.zero).
+Lemma from_words_eq:
+  forall x, from_words ox4330_0000 x = BofZ 53 1024 __ __ (2^52 + Int.unsigned x).
 Proof.
-  intros; destruct (Int.eq_dec x Int.zero); [subst; vm_compute; reflexivity|].
-  assert (Int.unsigned x <> 0).
-  intro; destruct n; rewrite <- (Int.repr_unsigned x), H; reflexivity.
+  intros. 
   pose proof (Int.unsigned_range x).
-  pose proof (binary_normalize64_exact (Int.unsigned x)). destruct H1; [smart_omega|].
-  unfold floatofintu, sub, b64_minus.
-  match goal with [|- _ = Bminus _ _ _ _ _ _ ?x ?y] =>
-    pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
-  apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
-  do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
-  rewrite Int.unsigned_zero in H3.
-  replace (bpow radix2 52 + Z2R (Int.unsigned x) -
-    (bpow radix2 52 + Z2R 0))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
-  rewrite round_exact in H3 by smart_omega.
-  match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] =>
-    pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3 as [? []].
-  try (change (53 ?= 1024) with Lt in H3,H5).  (* for Coq 8.4 *)
-  simpl Zcompare in *; apply B2R_inj;
-    try match goal with [H':B2R _ _ ?f = _ , H'':is_finite _ _ ?f = true |- is_finite_strict _ _ ?f = true] => 
-      destruct f; [
-        simpl in H'; change 0%R with (Z2R 0) in H'; apply eq_Z2R in H'; now destruct (H (eq_sym H')) | 
-        discriminate H'' | discriminate H'' | reflexivity
-      ]
-    end.
-  rewrite H3; assumption.
-  inversion H4; change (bpow radix2 1024) with (Z2R (radix2 ^ 1024)) in H5; rewrite <- Z2R_abs in H5.
-  apply le_Z2R in H5; pose proof (Zabs_spec (Int.unsigned x));
-    exfalso; now smart_omega.
+  destruct (from_words_value x) as (A & B & C).
+  destruct (BofZ_exact 53 1024 __ __ (2^52 + Int.unsigned x)) as (D & E & F).
+  smart_omega.
+  apply B2R_Bsign_inj; auto.
+  rewrite A, D. rewrite Z2R_plus. auto. 
+  rewrite C, F. symmetry. apply Zlt_bool_false. smart_omega. 
+Qed.
+
+Theorem of_intu_from_words:
+  forall x,
+  of_intu x = sub (from_words ox4330_0000 x) (from_words ox4330_0000 Int.zero).
+Proof.
+  intros. pose proof (Int.unsigned_range x).
+  rewrite ! from_words_eq. unfold sub, b64_minus. rewrite BofZ_minus. 
+  unfold of_intu, b64_of_Z. f_equal. rewrite Int.unsigned_zero. omega.
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto; rewrite Int.unsigned_zero; smart_omega.
 Qed.
 
 Lemma ox8000_0000_signed_unsigned:
@@ -1095,337 +628,19 @@ Proof.
   apply Int.eqm_add; [now apply Int.eqm_refl|exists 1;reflexivity].
 Qed.
 
-Theorem floatofint_from_words:
+Theorem of_int_from_words:
   forall x,
-  floatofint x =
-    sub (from_words ox4330_0000 (Int.add x ox8000_0000))
-        (from_words ox4330_0000 ox8000_0000).
-Proof.
-Local Transparent Int.repr Int64.repr.
-  intros; destruct (Int.eq_dec x Int.zero); [subst; vm_compute; reflexivity|].
-  assert (Int.signed x <> 0).
-  intro; destruct n; rewrite <- (Int.repr_signed x), H; reflexivity.
-  pose proof (Int.signed_range x).
-  pose proof (binary_normalize64_exact (Int.signed x)); destruct H1; [now smart_omega|].
-  unfold floatofint, sub, b64_minus.
-  match goal with [|- _ = Bminus _ _ _ _ _ _ ?x ?y] =>
-    pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
-  apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
-  do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
-  replace (bpow radix2 52 + Z2R (Int.unsigned (Int.add x ox8000_0000)) -
-    (bpow radix2 52 + Z2R (Int.unsigned ox8000_0000)))%R with (Z2R (Int.signed x)) in H3
-  by (rewrite ox8000_0000_signed_unsigned; rewrite Z2R_plus; simpl; ring).
-  rewrite round_exact in H3 by smart_omega.
-  match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] =>
-    pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3 as [? []].
-  try (change (0 ?= 53) with Lt in H3,H5).  (* for Coq 8.4 *)
-  try (change (53 ?= 1024) with Lt in H3,H5).  (* for Coq 8.4 *)
-  simpl Zcompare in *; apply B2R_inj;
-    try match goal with [H':B2R _ _ ?f = _ , H'':is_finite _ _ ?f = true |- is_finite_strict _ _ ?f = true] => 
-      destruct f; [
-        simpl in H'; change 0%R with (Z2R 0) in H'; apply eq_Z2R in H'; now destruct (H (eq_sym H')) | 
-        discriminate H'' | discriminate H'' | reflexivity
-      ]
-    end.
-  rewrite H3; assumption.
-  inversion H4; unfold bpow in H5; rewrite <- Z2R_abs in H5;
-    apply le_Z2R in H5; pose proof (Zabs_spec (Int.signed x)); exfalso; now smart_omega.
-Qed.
-
-(** Conversions from 32-bit integers to single-precision floats can
-  be decomposed into a conversion to a double-precision float,
-  followed by a [singleoffloat] normalization.  No double rounding occurs. *)
-
-Lemma is_finite_strict_ge_1:
-  forall (f: binary32),
-  is_finite _ _ f = true ->
-  (1 <= Rabs (B2R _ _ f))%R ->
-  is_finite_strict _ _ f = true.
-Proof.
-  intros. destruct f; auto. simpl in H0.
-  change 0%R with (Z2R 0) in H0.
-  change 1%R with (Z2R 1) in H0.
-  rewrite <- Z2R_abs in H0.
-  exploit le_Z2R; eauto.
-Qed.
-
-Lemma single_float_of_int:
-  forall n,
-  -2^53 < n < 2^53 ->
-  singleoffloat (binary_normalize64 n 0 false) = floatofbinary32 (binary_normalize32 n 0 false).
-Proof.
-  intros. unfold singleoffloat. f_equal.
-  assert (EITHER: n = 0 \/ Z.abs n > 0) by (destruct n; compute; auto).
-  destruct EITHER as [EQ|GT].
-  subst n; reflexivity.
-  exploit binary_normalize64_exact; eauto. intros [A B].
-  destruct (binary_normalize64 n 0 false) as [ | | | s m e] eqn:B64; simpl in *.
-- assert (0 = n) by (apply eq_Z2R; auto). subst n. simpl in GT. omegaContradiction.
-- discriminate.
-- discriminate.
-- set (n1 := cond_Zopp s (Z.pos m)) in *.
-  generalize (binary_normalize32_correct n1 e s).
-  fold (binary_normalize32 n1 e s). intros C.
-  generalize (binary_normalize32_correct n 0 false).
-  fold (binary_normalize32 n 0 false). intros D.
-  assert (A': @F2R radix2 {| Fnum := n; Fexp := 0 |} = Z2R n).
-  { unfold F2R. apply Rmult_1_r. }
-  rewrite A in C. rewrite A' in D.
-  destruct (Rlt_bool
-         (Rabs
-            (round radix2 (FLT_exp (3 - 128 - 24) 24) (round_mode mode_NE)
-               (Z2R n))) (bpow radix2 128)).
-+ destruct C as [C1 [C2 _]]; destruct D as [D1 [D2 _]].
-  assert (1 <= Rabs (round radix2 (FLT_exp (3 - 128 - 24) 24) (round_mode mode_NE) (Z2R n)))%R.
-  { apply abs_round_ge_generic.
-    apply fexp_correct. red. omega.
-    apply valid_rnd_round_mode.
-    apply generic_format_bpow with (e := 0). compute. congruence.
-    rewrite <- Z2R_abs. change 1%R with (Z2R 1). apply Z2R_le. omega. }
-  apply B2R_inj.
-  apply is_finite_strict_ge_1; auto. rewrite C1; auto.
-  apply is_finite_strict_ge_1; auto. rewrite D1; auto.
-  congruence.
-+ apply B2FF_inj. congruence.
-Qed.
-
-Theorem singleofint_floatofint:
-  forall n, singleofint n = singleoffloat (floatofint n).
-Proof.
-  intros. symmetry. apply single_float_of_int.
-  generalize (Int.signed_range n). smart_omega.
-Qed.
-
-Theorem singleofintu_floatofintu:
-  forall n, singleofintu n = singleoffloat (floatofintu n).
-Proof.
-  intros. symmetry. apply single_float_of_int.
-  generalize (Int.unsigned_range n). smart_omega.
-Qed.
-
-Theorem mul2_add:
-  forall f, add f f = mul f (floatofint (Int.repr 2%Z)).
-Proof.
-  intros. unfold add, b64_plus, mul, b64_mult.
-  destruct (is_finite_strict _ _ f) eqn:EQFINST.
-  - assert (EQFIN:is_finite _ _ f = true) by (destruct f; simpl in *; congruence).
-    pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f f EQFIN EQFIN).
-    pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f
-                              (floatofint (Int.repr 2%Z))).
-    rewrite <- double, Rmult_comm in H.
-    replace (B2R 53 1024 (floatofint (Int.repr 2))) with 2%R in H0 by (compute; field).
-    destruct Rlt_bool.
-    + destruct H0 as [? []], H as [? []].
-      rewrite EQFIN in H1.
-      apply B2R_Bsign_inj; auto.
-      etransitivity. apply H. symmetry. apply H0.
-      etransitivity. apply H4. symmetry. etransitivity. apply H2.
-      destruct Bmult; try reflexivity; discriminate.
-      simpl. rewrite xorb_false_r.
-      erewrite <- Rmult_0_l, Rcompare_mult_r.
-      destruct f; try discriminate EQFINST.
-      simpl. unfold F2R.
-      erewrite <- Rmult_0_l, Rcompare_mult_r.
-      rewrite Rcompare_Z2R with (y:=0).
-      destruct b; reflexivity.
-      apply bpow_gt_0.
-      apply (Z2R_lt 0 2). omega.
-    + destruct H.
-      apply B2FF_inj.
-      etransitivity. apply H.
-      symmetry. etransitivity. apply H0.
-      f_equal. destruct Bsign; reflexivity.
-  - destruct f as [[]|[]| |]; try discriminate; simpl. 
-    auto. auto. auto. auto.
-    destruct (Archi.choose_binop_pl b n b n); auto.
-Qed.
-
-Program Definition pow2_float (b:bool) (e:Z) (H:-1023 < e < 1023) : float :=
-  B754_finite _ _ b (nat_iter 52 xO xH) (e-52) _.
-Next Obligation.
-  unfold Fappli_IEEE.bounded, canonic_mantissa.
-  rewrite andb_true_iff, Zle_bool_true by omega. split; auto.
-  apply Zeq_bool_true. unfold FLT_exp. simpl Z.of_nat.
-  apply Z.max_case_strong; omega.
-Qed.
-
-Theorem mul_div_pow2:
-  forall b e f H H',
-    mul f (pow2_float b e H) = div f (pow2_float b (-e) H').
-Proof.
-  intros. unfold mul, b64_mult, div, b64_div.
-  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f (pow2_float b e H)).
-  pose proof (Bdiv_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f (pow2_float b (-e) H')).
-  lapply H1. clear H1. intro.
-  change (is_finite 53 1024 (pow2_float b e H)) with true in H0.
-  unfold Rdiv in H1.
-  replace (/ B2R 53 1024 (pow2_float b (-e) H'))%R
-    with (B2R 53 1024 (pow2_float b e H)) in H1.
-  destruct (is_finite _ _ f) eqn:EQFIN.
-  - destruct Rlt_bool.
-    + destruct H0 as [? []], H1 as [? []].
-      apply B2R_Bsign_inj; auto.
-      etransitivity. apply H0. symmetry. apply H1.
-      etransitivity. apply H3. destruct Bmult; try discriminate H2; reflexivity.
-      symmetry. etransitivity. apply H5. destruct Bdiv; try discriminate H4; reflexivity.
-      reflexivity.
-    + apply B2FF_inj.
-      etransitivity. apply H0. symmetry. etransitivity. apply H1.
-      reflexivity.
-  - destruct f; try discriminate EQFIN; auto. 
-  - simpl.
-    assert ((4503599627370496 * bpow radix2 (e - 52))%R =
-            (/ (4503599627370496 * bpow radix2 (- e - 52)))%R).
-    { etransitivity. symmetry. apply (bpow_plus radix2 52).
-      symmetry. etransitivity. apply f_equal. symmetry. apply (bpow_plus radix2 52).
-      rewrite <- bpow_opp. f_equal. ring. }
-    destruct b. unfold cond_Zopp.
-    rewrite !F2R_Zopp, <- Ropp_inv_permute. f_equal. auto.
-    intro. apply F2R_eq_0_reg in H3. omega.
-    apply H2.
-  - simpl. intro. apply F2R_eq_0_reg in H2.
-    destruct b; simpl in H2; omega.
-Qed.
-
-Definition exact_inverse_mantissa := nat_iter 52 xO xH.
-
-Program Definition exact_inverse (f: float) : option float :=
-  match f with
-  | B754_finite s m e B =>
-      if peq m exact_inverse_mantissa then
-      if zlt (-1023) (e + 52) then
-      if zlt (e + 52) 1023 then
-        Some(B754_finite _ _ s m (-e - 104) _)
-      else None else None else None
-  | _ => None
-  end.
-Next Obligation.
-  unfold Fappli_IEEE.bounded, canonic_mantissa. apply andb_true_iff; split.
-  simpl Z.of_nat. apply Zeq_bool_true. unfold FLT_exp. apply Z.max_case_strong; omega.
-  apply Zle_bool_true. omega.  
-Qed.
-
-Remark B754_finite_eq:
-  forall s1 m1 e1 B1 s2 m2 e2 B2,
-  s1 = s2 -> m1 = m2 -> e1 = e2 ->
-  B754_finite _ _ s1 m1 e1 B1 = (B754_finite _ _ s2 m2 e2 B2 : float).
+  of_int x = sub (from_words ox4330_0000 (Int.add x ox8000_0000))
+                 (from_words ox4330_0000 ox8000_0000).
 Proof.
-  intros. subst. f_equal. apply proof_irrelevance. 
-Qed.
-
-Theorem div_mul_inverse:
-  forall x y z, exact_inverse y = Some z -> div x y = mul x z.
-Proof with (try discriminate).
-  unfold exact_inverse; intros. destruct y...
-  destruct (peq m exact_inverse_mantissa)...
-  destruct (zlt (-1023) (e + 52))...
-  destruct (zlt (e + 52) 1023)...
-  inv H.
-  set (n := - e - 52).
-  assert (RNG1: -1023 < n < 1023) by (unfold n; omega).
-  assert (RNG2: -1023 < -n < 1023) by (unfold n; omega).
-  symmetry. 
-  transitivity (mul x (pow2_float b n RNG1)).
-  f_equal. apply B754_finite_eq; auto. unfold n; omega.
-  transitivity (div x (pow2_float b (-n) RNG2)).
-  apply mul_div_pow2. 
-  f_equal. apply B754_finite_eq; auto. unfold n; omega.
-Qed.
-
-Theorem floatoflongu_decomp:
-  forall l, floatoflongu l =
-    add (mul (floatofintu (Int64.hiword l)) (pow2_float false 32 (conj eq_refl eq_refl)))
-        (floatofintu (Int64.loword l)).
-Proof.
-  intros.
-  unfold floatofintu.
-  pose proof (Int.unsigned_range (Int64.loword l)).
-  pose proof (Int.unsigned_range (Int64.hiword l)).
-  pose proof  (Int64.unsigned_range l).
-  compute_this Int.modulus.
-  destruct (binary_normalize64_exact (Int.unsigned (Int64.loword l)));
-    [compute_this (2 ^ 53); omega|].
-  destruct (binary_normalize64_exact (Int.unsigned (Int64.hiword l)));
-    [compute_this (2 ^ 53); omega|].
-  unfold mul, b64_mult.
-  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                            (binary_normalize64 (Int.unsigned (Int64.hiword l)) 0 false)
-                            (pow2_float false 32 (conj eq_refl eq_refl))).
-  rewrite H4 in H6.
-  replace (B2R 53 1024 (pow2_float false 32 (conj eq_refl eq_refl)))
-  with (Z2R 4294967296)%R in H6 by (compute; field).
-  rewrite <- Z2R_mult in H6.
-  rewrite round_generic in H6.
-  - rewrite Rlt_bool_true in H6.
-    + rewrite H5 in H6.
-      destruct H6 as [? [? ?]].
-      { unfold add, b64_plus.
-        pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                      (Bmult 53 1024 eq_refl eq_refl binop_pl mode_NE
-                         (binary_normalize64 (Int.unsigned (Int64.hiword l)) 0 false)
-                            (pow2_float false 32 (conj eq_refl eq_refl)))
-                            (binary_normalize64 (Int.unsigned (Int64.loword l)) 0 false) H7 H3).
-        rewrite H6, H2, <- Z2R_plus in H9.
-        change 4294967296 with (two_p 32) in H9.
-        rewrite <- Int64.ofwords_add', Int64.ofwords_recompose in H9.
-        assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                            (round_mode mode_NE) (Z2R (Int64.unsigned l))) <
-                bpow radix2 1024)%R.
-        { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
-          eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
-          erewrite <- round_generic.
-          - apply round_le. apply fexp_correct; reflexivity.
-            apply (valid_rnd_round_mode mode_NE).
-            rewrite <- Z2R_abs. apply Z2R_le. change (two_p 64) with Int64.modulus. zify; omega.
-          - apply (valid_rnd_round_mode mode_NE).
-          - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
-        rewrite Rlt_bool_true in H9 by auto.
-        unfold floatoflongu, binary_normalize64.
-        pose proof (binary_normalize64_correct (Int64.unsigned l) 0 false).
-        replace (F2R (beta:=radix2) {| Fnum := Int64.unsigned l; Fexp := 0 |})
-        with (Z2R (Int64.unsigned l)) in H11
-        by (unfold F2R, Fexp, Fnum, bpow; field).
-        rewrite Rlt_bool_true in H11 by auto.
-        destruct (Int64.eq_dec l Int64.zero). subst. reflexivity.
-        destruct H11, H9.
-        assert (1 <= round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE)
-                           (Z2R (Int64.unsigned l)))%R.
-        { erewrite <- round_generic with (x:=1%R).
-          apply round_le. apply fexp_correct. reflexivity. apply valid_rnd_round_mode.
-          assert (Int64.unsigned l <> 0).
-          { contradict n. rewrite <- (Int64.repr_unsigned l), n. auto. }
-          apply (Z2R_le 1). omega.
-          apply valid_rnd_round_mode.
-          apply (generic_format_bpow _ _ 0). compute. discriminate. }
-        unfold binary_normalize64 in *.
-        apply B2R_inj.
-        + destruct H12, (binary_normalize 53 1024 eq_refl eq_refl mode_NE (Int64.unsigned l) 0 false); try discriminate.
-          unfold B2R in H11. rewrite <- H11 in H14. apply (le_Z2R 1 0) in H14. omega.
-          auto.
-        + destruct H13; match goal with Hf0:is_finite _ _ ?f0 = true,
-                                            Hf1:B2R _ _ ?f1 = _ |-
-                                        is_finite_strict _ _ ?f = true =>
-                                        change f0 with f in Hf0; change f1 with f in Hf1;
-                                        destruct f
-                        end; try discriminate.
-          unfold B2R in H9. rewrite <- H9 in H14. apply (le_Z2R 1 0) in H14. omega.
-          auto.
-        + rewrite H11. symmetry. apply H9. }
-    + rewrite <- Z2R_abs.
-      apply (Z2R_lt _ (radix2 ^ 1024)).
-      compute_this (radix2 ^ 1024); zify; omega.
-  - apply valid_rnd_round_mode.
-  - destruct (Z.eq_dec (Int.unsigned (Int64.hiword l)) 0).
-    rewrite e. apply generic_format_0.
-    apply generic_format_FLT_FLX.
-    + apply Rle_trans with (bpow radix2 0). apply bpow_le. omega.
-      rewrite <- Z2R_abs. apply (Z2R_le 1).
-      clear - n. zify; omega.
-    + apply generic_format_FLX.
-      eexists {| Fnum := Int.unsigned (Int64.hiword l); Fexp := 32 |}.
-      unfold F2R, Fnum, Fexp. split.
-      rewrite Z2R_mult. auto.
-      compute_this (radix2 ^ 53). zify; omega.
+  intros. 
+  pose proof (Int.signed_range x).
+  rewrite ! from_words_eq. rewrite ox8000_0000_signed_unsigned.
+  change (Int.unsigned ox8000_0000) with Int.half_modulus.
+  unfold sub, b64_minus. rewrite BofZ_minus. 
+  unfold of_int, b64_of_Z. f_equal. omega.
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto; smart_omega.
 Qed.
 
 Definition ox4530_0000 := Int.repr 1160773632.        (**r [0x4530_0000] *)
@@ -1446,359 +661,694 @@ Qed.
 
 Lemma from_words_value':
   forall x,
-    B2R _ _ (from_words ox4530_0000 x) =
-    (bpow radix2 84 + Z2R (Int.unsigned x * two_p 32))%R /\
-    is_finite _ _ (from_words ox4530_0000 x) = true.
+     B2R _ _ (from_words ox4530_0000 x) = (bpow radix2 84 + Z2R (Int.unsigned x * two_p 32))%R
+  /\ is_finite _ _ (from_words ox4530_0000 x) = true
+  /\ Bsign _ _ (from_words ox4530_0000 x) = false.
 Proof.
-  intros; unfold from_words, double_of_bits, b64_of_bits, binary_float_of_bits.
-  rewrite B2R_FF2B. rewrite is_finite_FF2B.
+  intros; unfold from_words, of_bits, b64_of_bits, binary_float_of_bits.
+  rewrite B2R_FF2B, is_finite_FF2B, Bsign_FF2B.
   unfold binary_float_of_bits_aux; rewrite split_bits_or'; simpl; pose proof (Int.unsigned_range x).
   destruct (Int.unsigned x + Zpower_pos 2 52) eqn:?.
   exfalso; now smart_omega.
-  simpl; rewrite <- Heqz;  unfold F2R; simpl.
+  simpl; rewrite <- Heqz;  unfold F2R; simpl. split; auto.
   rewrite <- (Z2R_plus 19342813113834066795298816), <- (Z2R_mult _ 4294967296).
-  split; [f_equal; compute_this (Zpower_pos 2 52);
-          compute_this (two_power_pos 32); ring | reflexivity].
+  f_equal; compute_this (Zpower_pos 2 52); compute_this (two_power_pos 32); ring.
   assert (Zneg p < 0) by reflexivity.
   exfalso; now smart_omega.
 Qed.
 
-Theorem floatoflongu_from_words:
+Lemma from_words_eq':
+  forall x, from_words ox4530_0000 x = BofZ 53 1024 __ __ (2^84 + Int.unsigned x * 2^32).
+Proof.
+  intros. 
+  pose proof (Int.unsigned_range x).
+  destruct (from_words_value' x) as (A & B & C).
+  destruct (BofZ_representable 53 1024 __ __ (2^84 + Int.unsigned x * 2^32)) as (D & E & F).
+  replace (2^84 + Int.unsigned x * 2^32)
+    with  ((2^52 + Int.unsigned x) * 2^32) by ring. 
+  apply integer_representable_n2p; auto. smart_omega. omega. omega.
+  apply B2R_Bsign_inj; auto.
+  rewrite A, D. rewrite <- Z2R_Zpower by omega. rewrite <- Z2R_plus. auto. 
+  rewrite C, F. symmetry. apply Zlt_bool_false. 
+  compute_this (2^84); compute_this (2^32); omega.
+Qed.
+
+Theorem of_longu_from_words:
   forall l,
-  floatoflongu l =
+  of_longu l =
     add (sub (from_words ox4530_0000 (Int64.hiword l))
              (from_words ox4530_0000 (Int.repr (two_p 20))))
         (from_words ox4330_0000 (Int64.loword l)).
 Proof.
   intros.
-  pose proof  (Int64.unsigned_range l).
+  pose proof (Int64.unsigned_range l).
   pose proof (Int.unsigned_range (Int64.hiword l)).
-  destruct (from_words_value (Int64.loword l)).
-  destruct (from_words_value' (Int64.hiword l)).
-  destruct (from_words_value' (Int.repr (two_p 20))).
-  unfold sub, b64_minus.
-  pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                             (from_words ox4530_0000 (Int64.hiword l))
-                             (from_words ox4530_0000 (Int.repr (two_p 20))) H4 H6).
-  rewrite round_generic in H7.
-  - rewrite H3, H5 in H7.
-    replace (bpow radix2 84 + Z2R (Int.unsigned (Int64.hiword l) * two_p 32) -
-             (bpow radix2 84 + Z2R (Int.unsigned (Int.repr (two_p 20)) * two_p 32)))%R
-    with (Z2R (Int.unsigned (Int64.hiword l) * two_p 32 - two_p 52)) in H7.
-    + rewrite Rlt_bool_true in H7.
-      * { destruct H7 as [? []].
-          unfold floatoflongu, binary_normalize64.
-          pose proof (binary_normalize64_correct (Int64.unsigned l) 0 false).
-          replace (F2R (beta:=radix2) {| Fnum := Int64.unsigned l; Fexp := 0 |})
-          with (Z2R (Int64.unsigned l)) in H10
-          by (unfold F2R, Fexp, Fnum, bpow; field).
-          assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                              (round_mode mode_NE) (Z2R (Int64.unsigned l))) <
-                  bpow radix2 1024)%R.
-          { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
-          eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
-          erewrite <- round_generic.
-            - apply round_le. apply fexp_correct; reflexivity.
-              apply (valid_rnd_round_mode mode_NE).
-              rewrite <- Z2R_abs. apply Z2R_le. change (two_p 64) with Int64.modulus. zify; omega.
-            - apply (valid_rnd_round_mode mode_NE).
-            - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
-          rewrite Rlt_bool_true in H10 by auto.
-          unfold add, b64_plus.
-          pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                                    (Bminus 53 1024 eq_refl eq_refl binop_pl mode_NE
-                                            (from_words ox4530_0000 (Int64.hiword l))
-                                            (from_words ox4530_0000 (Int.repr (two_p 20))))
-                                    (from_words ox4330_0000 (Int64.loword l)) H8 H2).
-          change (bpow radix2 52) with (Z2R (two_p 52)) in H1.
-          rewrite H7, H1, <- !Z2R_plus in H12.
-          replace (Int.unsigned (Int64.hiword l) * two_p 32 - two_p 52 +
-                   (two_p 52 + Int.unsigned (Int64.loword l)))
-          with (Int.unsigned (Int64.hiword l) * two_p 32 + Int.unsigned (Int64.loword l))
-            in H12 by ring.
-          rewrite <- Int64.ofwords_add', Int64.ofwords_recompose, Rlt_bool_true in H12 by auto.
-          destruct (Z.eq_dec (Int64.unsigned l) 0).
-          - apply (f_equal Int64.repr) in e. rewrite Int64.repr_unsigned in e.
-            subst. reflexivity.
-          - destruct H12 as [? []], H10 as [? []].
-            assert (1 <= Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                                (round_mode mode_NE) (Z2R (Int64.unsigned l)))) %R.
-            { rewrite <- round_NE_abs, <- Z2R_abs by (apply fexp_correct; reflexivity).
-              erewrite <- round_generic with (x := 1%R). 
-              3:eapply (generic_format_bpow _ _ 0).
-              apply round_le, (Z2R_le 1).
-              apply fexp_correct; reflexivity. apply (valid_rnd_round_mode mode_NE).
-              zify; omega.
-              apply (valid_rnd_round_mode mode_NE).
-              compute; discriminate. }
-            eapply B2R_inj.
-            + destruct binary_normalize; try discriminate H15.
-              unfold B2R in H10. rewrite <- H10, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
-              auto.
-            + match goal with Hf0:is_finite _ _ ?f0 = true,
-                                            Hf1:B2R _ _ ?f1 = _ |-
-                                        is_finite_strict _ _ ?f = true =>
-                                        change f0 with f in Hf0; change f1 with f in Hf1;
-                                        destruct f
-              end; try discriminate H13.
-              unfold B2R in H12. rewrite <- H12, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
-              auto.
-            + etransitivity; eauto. }
-      * rewrite <- Z2R_abs. apply (Z2R_lt _ (2^1024)).
-        compute_this Int.modulus; compute_this (two_p 32);
-        compute_this (two_p 52); compute_this (2^1024).
-        clear - H0. zify; omega.
-    + rewrite Z2R_minus, Int.unsigned_repr, <- two_p_is_exp, !Z2R_mult.
-      ring_simplify. reflexivity.
-      omega. omega. compute; split; discriminate.
-  - apply valid_rnd_round_mode.
-  - apply sterbenz.
-    + apply FLT_exp_monotone.
-    + apply generic_format_B2R.
-    + apply generic_format_B2R.
-    + rewrite H3, H5, Int.unsigned_repr by (compute; split; discriminate).
-      unfold bpow. rewrite <- !Z2R_plus, <- (Z2R_mult 2).
-      compute_this (Z.pow_pos radix2 84);
-      compute_this (two_p 20 * two_p 32); compute_this (two_p 32);
-      compute_this (Int.modulus).
-      change (19342813113834066795298816 + 4503599627370496)
-      with (9671406559168833211334656 * 2).
-      unfold Rdiv. rewrite Z2R_mult, Rmult_assoc, Rinv_r, Rmult_1_r by (apply (Z2R_neq 2 0); omega).
-      split; apply Z2R_le; omega.
-Qed.
-
-Theorem floatoflong_decomp:
-  forall l, floatoflong l =
-    add (mul (floatofint (Int64.hiword l)) (pow2_float false 32 (conj eq_refl eq_refl)))
-        (floatofintu (Int64.loword l)).
-Proof.
-  intros.
-  unfold floatofintu, floatofint.
-  destruct (binary_normalize64_exact (Int.signed (Int64.hiword l))).
-  { pose proof (Int.signed_range (Int64.hiword l)).
-    revert H. generalize (Int.signed (Int64.hiword l)).
-    change (forall z : Z, -2147483648 <= z <= 2147483647 -> - 9007199254740992 < z < 9007199254740992).
-    intros. omega. }
-  destruct (binary_normalize64_exact (Int.unsigned (Int64.loword l))).
-  { pose proof (Int.unsigned_range (Int64.loword l)).
-    revert H1. generalize (Int.unsigned (Int64.loword l)).
-    change (forall z : Z, 0 <= z < 4294967296 -> - 9007199254740992 < z < 9007199254740992).
-    intros. omega. }
-  unfold mul, b64_mult.
-  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                            (binary_normalize64 (Int.signed (Int64.hiword l)) 0 false)
-                            (pow2_float false 32 (conj eq_refl eq_refl))).
-  rewrite H in H3.
-  remember (B2R 53 1024 (pow2_float false 32 (conj eq_refl eq_refl))).
-  compute in Heqr.
-  change 4503599627370496%R with (Z2R (1048576*4294967296)) in Heqr.
-  change 1048576%R with (Z2R 1048576) in Heqr.
-  rewrite Z2R_mult in Heqr.
-  assert (r = Z2R 4294967296).
-  { rewrite Heqr. field. change 0%R with (Z2R 0). intro. apply eq_Z2R in H4. discriminate. }
-  clear Heqr. subst.
-  pose proof (Int.signed_range (Int64.hiword l)).
-  change Int.min_signed with (-2147483648) in H4.
-  change Int.max_signed with 2147483647 in H4.
-  rewrite <- Z2R_mult in H3.
-  rewrite round_generic in H3.
-  - destruct (Rlt_bool_spec (Rabs (Z2R (Int.signed (Int64.hiword l) * 4294967296)))
-                            (bpow radix2 1024)).
-    + rewrite H0 in H3.
-      destruct H3 as [? [? ?]].
-      { unfold add, b64_plus.
-        pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                      (Bmult 53 1024 eq_refl eq_refl binop_pl mode_NE
-                         (binary_normalize64 (Int.signed (Int64.hiword l)) 0 false)
-                            (pow2_float false 32 (conj eq_refl eq_refl)))
-                            (binary_normalize64 (Int.unsigned (Int64.loword l)) 0 false) H6 H2).
-        rewrite H3, H1, <- Z2R_plus in H8.
-        change 4294967296 with (two_p 32) in H8.
-        rewrite <- Int64.ofwords_add'', Int64.ofwords_recompose in H8.
-        destruct (Rlt_bool_spec
-                    (Rabs
-                       (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                              (round_mode mode_NE) (Z2R (Int64.signed l))))
-                    (bpow radix2 1024)).
-        - unfold floatoflong. unfold binary_normalize64.
-          pose proof (binary_normalize64_correct (Int64.signed l) 0 false).
-          replace (F2R (beta:=radix2) {| Fnum := Int64.signed l; Fexp := 0 |})
-          with (Z2R (Int64.signed l)) in H10
-          by (unfold F2R, Fexp, Fnum, bpow; field).
-          rewrite Rlt_bool_true in H10 by auto.
-          destruct (Int64.eq_dec l Int64.zero). subst. reflexivity.
-          destruct H10, H8.
-          assert (1 <= round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE)
-                             (Z2R (Zabs (Int64.signed l))))%R.
-          { erewrite <- round_generic with (x:=1%R).
-            apply round_le. apply fexp_correct. reflexivity. apply valid_rnd_round_mode.
-            assert (Int64.signed l <> 0).
-            { contradict n. rewrite <- (Int64.repr_signed l), n. auto. }
-            change 1%R with (Z2R 1). apply Z2R_le.
-            zify. omega. apply valid_rnd_round_mode.
-            apply (generic_format_bpow _ _ 0). compute. discriminate. }
-          rewrite Z2R_abs in H13.
-          rewrite round_NE_abs in H13 by (apply fexp_correct; reflexivity).
-          change ZnearestE with (round_mode mode_NE) in H13.
-          unfold binary_normalize64 in *.
-          apply B2R_inj.
-          + destruct H11, (binary_normalize 53 1024 eq_refl eq_refl mode_NE (Int64.signed l) 0 false); try discriminate.
-            unfold B2R in H10. rewrite <- H10, Rabs_R0 in H13. apply (le_Z2R 1 0) in H13. omega.
-            auto.
-          + destruct H12; match goal with Hf0:is_finite _ _ ?f0 = true,
-                            Hf1:B2R _ _ ?f1 = _ |-
-                            is_finite_strict _ _ ?f = true =>
-                            change f0 with f in Hf0; change f1 with f in Hf1;
-                            destruct f
-            end; try discriminate.
-            unfold B2R in H8. rewrite <- H8, Rabs_R0 in H13. apply (le_Z2R 1 0) in H13. omega.
-            auto.
-          + rewrite H10. symmetry. apply H8.
-        - exfalso.
-          eapply Rle_trans with (r3:=round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                                           (round_mode mode_NE) (bpow radix2 64)) in H9.
-          rewrite round_generic in H9.
-          + eapply le_bpow in H9. omega.
-          + apply valid_rnd_round_mode.
-          + apply generic_format_bpow. compute. discriminate.
-          + rewrite <- round_NE_abs. 2:apply fexp_correct; reflexivity.
-            apply round_le. apply fexp_correct; reflexivity. apply valid_rnd_round_mode.
-            rewrite <- Z2R_abs. change (bpow radix2 64)%R with (Z2R Int64.modulus).
-            apply Z2R_le.
-            destruct (Int64.signed_range l).
-            assert (-Int64.modulus < Int64.min_signed) by reflexivity.
-            assert (Int64.max_signed < Int64.modulus) by reflexivity.
-            zify. omega. }
-    + exfalso.
-      rewrite <- Z2R_abs in H5.
-      change (bpow radix2 1024) with (Z2R (radix2 ^ 1024)) in H5.
-      apply le_Z2R in H5. assert (radix2 ^ 1024 < 18446744073709551616) by (zify; omega).
-      discriminate.
-  - apply valid_rnd_round_mode.
-  - destruct (Z.eq_dec (Int.signed (Int64.hiword l)) 0).
-    rewrite e. apply generic_format_0.
-    apply generic_format_FLT_FLX.
-    + apply Rle_trans with (bpow radix2 0). apply bpow_le. omega.
-      change (bpow radix2 0) with (Z2R 1). rewrite <- Z2R_abs. apply Z2R_le.
-      clear - n H4. zify; omega.
-    + apply generic_format_FLX.
-      eexists {| Fnum := Int.signed (Int64.hiword l); Fexp := 32 |}.
-      unfold F2R, Fnum, Fexp. split.
-      rewrite Z2R_mult. auto.
-      change (radix2 ^ 53) with 9007199254740992.
-      clear -n H4. zify; omega.
-Qed.
-
-Theorem floatoflong_from_words:
+  pose proof (Int.unsigned_range (Int64.loword l)).
+  rewrite ! from_words_eq, ! from_words_eq'. 
+  set (p20 := Int.unsigned (Int.repr (two_p 20))).
+  set (x := Int64.unsigned l) in *;
+  set (xl := Int.unsigned (Int64.loword l)) in *;
+  set (xh := Int.unsigned (Int64.hiword l)) in *.
+  unfold sub, b64_minus. rewrite BofZ_minus.
+  replace (2^84 + xh * 2^32 - (2^84 + p20 * 2^32))
+     with ((xh - p20) * 2^32) by ring.
+  unfold add, b64_plus. rewrite BofZ_plus. 
+  unfold of_longu, b64_of_Z. f_equal. 
+  rewrite <- (Int64.ofwords_recompose l) at 1. rewrite Int64.ofwords_add'.
+  fold xh; fold xl. compute_this (two_p 32); compute_this p20; ring.
+  apply integer_representable_n2p; auto.
+  compute_this p20; smart_omega. omega. omega.
+  apply integer_representable_n; auto; smart_omega. 
+  replace (2^84 + xh * 2^32) with ((2^52 + xh) * 2^32) by ring.
+  apply integer_representable_n2p; auto. smart_omega. omega. omega.
+  change (2^84 + p20 * 2^32) with ((2^52 + 1048576) * 2^32).
+  apply integer_representable_n2p; auto. omega. omega. 
+Qed.
+
+Theorem of_long_from_words:
   forall l,
-  floatoflong l =
+  of_long l =
     add (sub (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
              (from_words ox4530_0000 (Int.repr (two_p 20+two_p 31))))
         (from_words ox4330_0000 (Int64.loword l)).
 Proof.
   intros.
-  pose proof  (Int64.signed_range l);
-  compute_this (Int64.min_signed); compute_this (Int64.max_signed).
-  pose proof (Int.unsigned_range (Int.add (Int64.hiword l) ox8000_0000)).
-  destruct (from_words_value (Int64.loword l)).
-  destruct (from_words_value' (Int.add (Int64.hiword l) ox8000_0000)).
-  destruct (from_words_value' (Int.repr (two_p 20+two_p 31))).
-  unfold sub, b64_minus.
-  pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                             (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
-                             (from_words ox4530_0000 (Int.repr (two_p 20+two_p 31))) H4 H6).
-  rewrite round_generic in H7.
-  - rewrite H3, H5, ox8000_0000_signed_unsigned in H7.
-    replace (bpow radix2 84 + Z2R ((Int.signed (Int64.hiword l) + Int.half_modulus) * two_p 32) -
-             (bpow radix2 84 + Z2R (Int.unsigned (Int.repr (two_p 20+two_p 31)) * two_p 32)))%R
-    with (Z2R (Int.unsigned (Int.add (Int64.hiword l) ox8000_0000) * two_p 32 -two_p 52-two_p 63)) in H7.
-    + rewrite Rlt_bool_true in H7.
-      * { destruct H7 as [? []].
-          unfold floatoflong, binary_normalize64.
-          pose proof (binary_normalize64_correct (Int64.signed l) 0 false).
-          replace (F2R (beta:=radix2) {| Fnum := Int64.signed l; Fexp := 0 |})
-          with (Z2R (Int64.signed l)) in H10
-          by (unfold F2R, Fexp, Fnum, bpow; field).
-          assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                              (round_mode mode_NE) (Z2R (Int64.signed l))) <
-                  bpow radix2 1024)%R.
-          { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
-          eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
-          erewrite <- round_generic.
-            - apply round_le. apply fexp_correct; reflexivity.
-              apply (valid_rnd_round_mode mode_NE).
-              rewrite <- Z2R_abs. apply Z2R_le.
-              compute_this (two_p 64). zify; omega.
-            - apply (valid_rnd_round_mode mode_NE).
-            - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
-          rewrite Rlt_bool_true in H10 by auto.
-          unfold add, b64_plus.
-          pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
-                                    (Bminus 53 1024 eq_refl eq_refl binop_pl mode_NE
-                                            (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
-                                            (from_words ox4530_0000 (Int.repr (two_p 20 + two_p 31))))
-                                    (from_words ox4330_0000 (Int64.loword l)) H8 H2).
-          change (bpow radix2 52) with (Z2R (two_p 52)) in H1.
-          rewrite H7, H1, <- !Z2R_plus, ox8000_0000_signed_unsigned in H12.
-          change (two_p 63) with (Int.half_modulus * two_p 32) in H12.
-          replace ((Int.signed (Int64.hiword l) + Int.half_modulus) *
-                       two_p 32 - two_p 52 - (Int.half_modulus * two_p 32) +
-                       (two_p 52 + Int.unsigned (Int64.loword l)))
-          with (Int.signed (Int64.hiword l) * two_p 32 + Int.unsigned (Int64.loword l))
-            in H12 by ring.
-          rewrite <- Int64.ofwords_add'', Int64.ofwords_recompose, Rlt_bool_true in H12 by auto.
-          destruct (Z.eq_dec (Int64.signed l) 0).
-          - apply (f_equal Int64.repr) in e. rewrite Int64.repr_signed in e.
-            subst. reflexivity.
-          - destruct H12 as [? []], H10 as [? []].
-            assert (1 <= Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
-                                (round_mode mode_NE) (Z2R (Int64.signed l)))) %R.
-            { rewrite <- round_NE_abs, <- Z2R_abs by (apply fexp_correct; reflexivity).
-              erewrite <- round_generic with (x := 1%R). 
-              3:eapply (generic_format_bpow _ _ 0).
-              apply round_le, (Z2R_le 1).
-              apply fexp_correct; reflexivity. apply (valid_rnd_round_mode mode_NE).
-              zify; omega.
-              apply (valid_rnd_round_mode mode_NE).
-              compute; discriminate. }
-            eapply B2R_inj.
-            + destruct binary_normalize; try discriminate H15.
-              unfold B2R in H10. rewrite <- H10, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
-              auto.
-            + match goal with Hf0:is_finite _ _ ?f0 = true,
-                                            Hf1:B2R _ _ ?f1 = _ |-
-                                        is_finite_strict _ _ ?f = true =>
-                                        change f0 with f in Hf0; change f1 with f in Hf1;
-                                        destruct f
-              end; try discriminate H13.
-              unfold B2R in H12. rewrite <- H12, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
-              auto.
-            + etransitivity; eauto. }
-      * rewrite <- Z2R_abs. apply (Z2R_lt _ (2^1024)).
-        compute_this Int.modulus; compute_this (two_p 32);
-        compute_this (two_p 52); compute_this (two_p 63); compute_this (2^1024).
-        clear - H0. zify; omega.
-    + rewrite ox8000_0000_signed_unsigned, !Z2R_minus.
-      compute_this (Z2R (Int.unsigned (Int.repr (two_p 20 + two_p 31)) * two_p 32)).
-      compute_this (Z2R (two_p 52)). compute_this (Z2R (two_p 63)). ring.
-  - apply valid_rnd_round_mode.
-  - apply sterbenz.
-    + apply FLT_exp_monotone.
-    + apply generic_format_B2R.
-    + apply generic_format_B2R.
-    + rewrite H3, H5, Int.unsigned_repr by (compute; split; discriminate).
-      unfold bpow. rewrite <- !Z2R_plus, <- (Z2R_mult 2).
-      compute_this (Z.pow_pos radix2 84); compute_this (Z.pow_pos radix2 84);
-      compute_this ((two_p 20 + two_p 31) * two_p 32); compute_this (two_p 32);
-      compute_this (Int.modulus).
-      change (19342813113834066795298816 + 9227875636482146304)
-      with (9671411170854851638722560 * 2).
-      unfold Rdiv. rewrite Z2R_mult, Rmult_assoc, Rinv_r, Rmult_1_r by (apply (Z2R_neq 2 0); omega).
-      split; apply Z2R_le; omega.
+  pose proof (Int64.signed_range l).
+  pose proof (Int.signed_range (Int64.hiword l)).
+  pose proof (Int.unsigned_range (Int64.loword l)).
+  rewrite ! from_words_eq, ! from_words_eq'. 
+  set (p := Int.unsigned (Int.repr (two_p 20 + two_p 31))).
+  set (x := Int64.signed l) in *;
+  set (xl := Int.unsigned (Int64.loword l)) in *;
+  set (xh := Int.signed (Int64.hiword l)) in *.
+  rewrite ox8000_0000_signed_unsigned. fold xh. 
+  unfold sub, b64_minus. rewrite BofZ_minus.
+  replace (2^84 + (xh + Int.half_modulus) * 2^32 - (2^84 + p * 2^32))
+     with ((xh - 2^20) * 2^32) 
+       by (compute_this p; compute_this Int.half_modulus; ring).
+  unfold add, b64_plus. rewrite BofZ_plus. 
+  unfold of_long, b64_of_Z. f_equal. 
+  rewrite <- (Int64.ofwords_recompose l) at 1. rewrite Int64.ofwords_add''.
+  fold xh; fold xl. compute_this (two_p 32); ring. 
+  apply integer_representable_n2p; auto.
+  compute_this (2^20); smart_omega. omega. omega.
+  apply integer_representable_n; auto; smart_omega.
+  replace (2^84 + (xh + Int.half_modulus) * 2^32)
+     with ((2^52 + xh + Int.half_modulus) * 2^32) 
+       by (compute_this Int.half_modulus; ring).
+  apply integer_representable_n2p; auto. smart_omega. omega. omega.
+  change (2^84 + p * 2^32) with ((2^52 + p) * 2^32).
+  apply integer_representable_n2p; auto. 
+  compute_this p; smart_omega. omega.
 Qed.
 
-Global Opaque
-  zero eq_dec neg abs singleoffloat intoffloat intuoffloat floatofint floatofintu
-  add sub mul div cmp bits_of_double double_of_bits bits_of_single single_of_bits from_words.
+(** Conversions from 64-bit integers can be expressed in terms of
+  conversions from their 32-bit halves. *)
+
+Theorem of_longu_decomp:
+  forall l,
+  of_longu l = add (mul (of_intu (Int64.hiword l)) (b64_of_Z (2^32)))
+                   (of_intu (Int64.loword l)).
+Proof.
+  intros.
+  unfold of_longu, of_intu, b64_of_Z, add, mul, b64_plus, b64_mult.
+  pose proof (Int.unsigned_range (Int64.loword l)).
+  pose proof (Int.unsigned_range (Int64.hiword l)).
+  pose proof (Int64.unsigned_range l).
+  set (x := Int64.unsigned l) in *.
+  set (yl := Int.unsigned (Int64.loword l)) in *.
+  set (yh := Int.unsigned (Int64.hiword l)) in *.
+  assert (DECOMP: x = yh * 2^32 + yl).
+  { unfold x. rewrite <- (Int64.ofwords_recompose l). apply Int64.ofwords_add'. }
+  rewrite BofZ_mult. rewrite BofZ_plus. rewrite DECOMP; auto.
+  apply integer_representable_n2p; auto. smart_omega. omega. omega. 
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto; smart_omega.
+  compute; auto.
+Qed.
+
+Theorem of_long_decomp:
+  forall l,
+  of_long l = add (mul (of_int (Int64.hiword l)) (b64_of_Z (2^32)))
+                  (of_intu (Int64.loword l)).
+Proof.
+  intros.
+  unfold of_long, of_int, of_intu, b64_of_Z, add, mul, b64_plus, b64_mult.
+  pose proof (Int.unsigned_range (Int64.loword l)).
+  pose proof (Int.signed_range (Int64.hiword l)).
+  pose proof (Int64.signed_range l).
+  set (x := Int64.signed l) in *.
+  set (yl := Int.unsigned (Int64.loword l)) in *.
+  set (yh := Int.signed (Int64.hiword l)) in *.
+  assert (DECOMP: x = yh * 2^32 + yl).
+  { unfold x. rewrite <- (Int64.ofwords_recompose l), Int64.ofwords_add''. auto. }
+  rewrite BofZ_mult. rewrite BofZ_plus. rewrite DECOMP; auto.
+  apply integer_representable_n2p; auto. smart_omega. omega. omega.
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto; smart_omega.
+  apply integer_representable_n; auto. compute; intuition congruence.
+  compute; auto.
+Qed.
+
+(** Conversions from unsigned longs can be expressed in terms of conversions from signed longs.
+    If the unsigned long is too big, a round-to-odd must be performed on it
+    to avoid double rounding. *)
+
+Theorem of_longu_of_long_1:
+  forall x,
+  Int64.ltu x (Int64.repr Int64.half_modulus) = true ->
+  of_longu x = of_long x.
+Proof.
+  unfold of_longu, of_long, Int64.signed, Int64.ltu; intro.
+  change (Int64.unsigned (Int64.repr Int64.half_modulus)) with Int64.half_modulus.
+  destruct (zlt (Int64.unsigned x) Int64.half_modulus); now intuition.
+Qed.
+
+Theorem of_longu_of_long_2:
+  forall x,
+  Int64.ltu x (Int64.repr Int64.half_modulus) = false ->
+  of_longu x = mul (of_long (Int64.or (Int64.shru x Int64.one)
+                                      (Int64.and x Int64.one)))
+                   (of_int (Int.repr 2)).
+Proof.
+  intros. change (of_int (Int.repr 2)) with (BofZ 53 1024 __ __ (2^1)).
+  pose proof (Int64.unsigned_range x).
+  unfold Int64.ltu in H. 
+  change (Int64.unsigned (Int64.repr Int64.half_modulus)) with (2^63) in H.
+  destruct (zlt (Int64.unsigned x) (2^63)); inv H.
+  assert (Int64.modulus <= 2^1024 - 2^(1024-53)) by (vm_compute; intuition congruence).
+  set (n := Int64.or (Int64.shru x Int64.one) (Int64.and x Int64.one)).
+  assert (NB: forall i, 0 <= i < 64 ->
+              Int64.testbit n i =
+                if zeq i 0 then Int64.testbit x 1 || Int64.testbit x 0
+                else if zeq i 63 then false else Int64.testbit x (i + 1)).
+  { intros; unfold n; autorewrite with ints; auto. rewrite Int64.unsigned_one.
+    rewrite Int64.bits_one. compute_this Int64.zwordsize. 
+    destruct (zeq i 0); simpl proj_sumbool.
+    rewrite zlt_true by omega. rewrite andb_true_r. subst i; auto. 
+    rewrite andb_false_r, orb_false_r. 
+    destruct (zeq i 63). subst i. apply zlt_false; omega.
+    apply zlt_true; omega. }
+  assert (NB2: forall i, 0 <= i ->
+               Z.testbit (Int64.signed n * 2^1) i =
+               if zeq i 0 then false else
+               if zeq i 1 then Int64.testbit x 1 || Int64.testbit x 0 else
+               Int64.testbit x i).
+  { intros. rewrite Z.mul_pow2_bits by omega. destruct (zeq i 0).
+    apply Z.testbit_neg_r; omega.
+    rewrite Int64.bits_signed by omega. compute_this Int64.zwordsize. 
+    destruct (zlt (i-1) 64). 
+    rewrite NB by omega. destruct (zeq i 1).
+    subst. rewrite dec_eq_true by auto. auto.
+    rewrite dec_eq_false by omega. destruct (zeq (i - 1) 63). 
+    symmetry. apply Int64.bits_above. compute_this Int64.zwordsize; omega. 
+    f_equal; omega.
+    rewrite NB by omega. rewrite dec_eq_false by omega. rewrite dec_eq_true by auto. 
+    rewrite dec_eq_false by omega. symmetry. apply Int64.bits_above. compute_this Int64.zwordsize; omega. 
+  }
+  assert (EQ: Int64.signed n * 2 = int_round_odd (Int64.unsigned x) 1).
+  {
+  symmetry. apply (int_round_odd_bits 53 1024). omega.
+  intros. rewrite NB2 by omega. replace i with 0 by omega. auto. 
+  rewrite NB2 by omega. rewrite dec_eq_false by omega. rewrite dec_eq_true. 
+  rewrite orb_comm. unfold Int64.testbit. change (2^1) with 2.
+  destruct (Z.testbit (Int64.unsigned x) 0) eqn:B0;
+  [rewrite Z.testbit_true in B0 by omega|rewrite Z.testbit_false in B0 by omega];
+  change (2^0) with 1 in B0; rewrite Zdiv_1_r in B0; rewrite B0; auto.
+  intros. rewrite NB2 by omega. rewrite ! dec_eq_false by omega. auto.
+  }
+  unfold mul, of_long, of_longu, b64_mult, b64_of_Z. 
+  rewrite BofZ_mult_2p. 
+- change (2^1) with 2. rewrite EQ. apply BofZ_round_odd with (p := 1).
++ omega.
++ apply Zle_trans with Int64.modulus; trivial. smart_omega.
++ omega.
++ apply Zle_trans with (2^63). compute; intuition congruence. xomega.
+- apply Zle_trans with Int64.modulus; trivial.
+  pose proof (Int64.signed_range n).
+  compute_this Int64.min_signed; compute_this Int64.max_signed;
+  compute_this Int64.modulus; xomega.
+- assert (2^63 <= int_round_odd (Int64.unsigned x) 1).
+  { change (2^63) with (int_round_odd (2^63) 1). apply (int_round_odd_le 0 0); omega. }
+  rewrite <- EQ in H1. compute_this (2^63). compute_this (2^53). xomega. 
+- omega.
+Qed.
 
 End Float.
+
+(** * Single-precision FP numbers *)
+
+Module Float32.
+
+(** ** NaN payload manipulations *)
+
+Program Definition transform_quiet_pl (pl:nan_pl 24) : nan_pl 24 :=
+  Pos.lor pl (nat_iter 22 xO xH).
+Next Obligation.
+  destruct pl.
+  simpl. rewrite Z.ltb_lt in *.
+  assert (forall x, S (Fcore_digits.digits2_Pnat x) = Pos.to_nat (Pos.size x)).
+  { induction x0; simpl; auto; rewrite IHx0; zify; omega. }
+  fold (Z.of_nat (S (Fcore_digits.digits2_Pnat (Pos.lor x 4194304)))).
+  rewrite H, positive_nat_Z, Psize_log_inf, <- Zlog2_log_inf in *. clear H.
+  change (Z.pos (Pos.lor x 4194304)) with (Z.lor (Z.pos x) 4194304).
+  rewrite Z.log2_lor by (zify; omega).
+  apply Z.max_case. auto. simpl. omega.
+Qed.
+
+Lemma transform_quiet_pl_idempotent:
+  forall pl, transform_quiet_pl (transform_quiet_pl pl) = transform_quiet_pl pl.
+Proof.
+  intros []; simpl; intros. apply Float.nan_payload_fequal.
+  simpl. apply Float.lor_idempotent.
+Qed.
+
+Definition neg_pl (s:bool) (pl:nan_pl 24) := (negb s, pl).
+Definition abs_pl (s:bool) (pl:nan_pl 24) := (false, pl).
+
+Definition binop_pl (x y: binary32) : bool*nan_pl 24 :=
+  match x, y with
+  | B754_nan s1 pl1, B754_nan s2 pl2 =>
+      if Archi.choose_binop_pl_32 s1 pl1 s2 pl2
+      then (s2, transform_quiet_pl pl2)
+      else (s1, transform_quiet_pl pl1)
+  | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
+  | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
+  | _, _ => Archi.default_pl_32
+  end.
+
+(** ** Operations over single-precision floats *)
+
+Definition zero: float32 := B754_zero _ _ false. (**r the float [+0.0] *)
+
+Definition eq_dec: forall (f1 f2: float32), {f1 = f2} + {f1 <> f2} := b32_eq_dec.
+
+(** Arithmetic operations *)
+
+Definition neg: float32 -> float32 := b32_opp neg_pl. (**r opposite (change sign) *)
+Definition abs: float32 -> float32 := b32_abs abs_pl. (**r absolute value (set sign to [+]) *)
+Definition add: float32 -> float32 -> float32 := b32_plus binop_pl mode_NE. (**r addition *)
+Definition sub: float32 -> float32 -> float32 := b32_minus binop_pl mode_NE. (**r subtraction *)
+Definition mul: float32 -> float32 -> float32 := b32_mult binop_pl mode_NE. (**r multiplication *)
+Definition div: float32 -> float32 -> float32 := b32_div binop_pl mode_NE. (**r division *)
+Definition cmp (c:comparison) (f1 f2: float32) : bool := (**r comparison *)
+  cmp_of_comparison c (b32_compare f1 f2).
+
+(** Conversions *)
+
+Definition of_double : float -> float32 := Float.to_single.
+Definition to_double : float32 -> float := Float.of_single.
+
+Definition to_int (f:float32): option int := (**r conversion to signed 32-bit int *)
+  option_map Int.repr (b32_to_Z_range f Int.min_signed Int.max_signed).
+Definition to_intu (f:float32): option int := (**r conversion to unsigned 32-bit int *)
+  option_map Int.repr (b32_to_Z_range f 0 Int.max_unsigned).
+Definition to_long (f:float32): option int64 := (**r conversion to signed 64-bit int *)
+  option_map Int64.repr (b32_to_Z_range f Int64.min_signed Int64.max_signed).
+Definition to_longu (f:float32): option int64 := (**r conversion to unsigned 64-bit int *)
+  option_map Int64.repr (b32_to_Z_range f 0 Int64.max_unsigned).
+
+Definition of_int (n:int): float32 := (**r conversion from signed 32-bit int to single-precision float *)
+  b32_of_Z (Int.signed n).
+Definition of_intu (n:int): float32 := (**r conversion from unsigned 32-bit int to single-precision float *)
+  b32_of_Z (Int.unsigned n).
+
+Definition of_long (n:int64): float32 := (**r conversion from signed 64-bit int to single-precision float *)
+  b32_of_Z (Int64.signed n).
+Definition of_longu (n:int64): float32 := (**r conversion from unsigned 64-bit int to single-precision float *)
+  b32_of_Z (Int64.unsigned n).
+
+Definition from_parsed (base:positive) (intPart:positive) (expPart:Z) : float32 :=
+  build_from_parsed 24 128 __ __ base intPart expPart.
+
+(** Conversions between floats and their concrete in-memory representation
+    as a sequence of 32 bits. *)
+
+Definition to_bits (f: float32) : int := Int.repr (bits_of_b32 f).
+Definition of_bits (b: int): float32 := b32_of_bits (Int.unsigned b).
+
+(** ** Properties *)
+
+(** Commutativity properties of addition and multiplication. *)
+
+Theorem add_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
+Proof.
+  intros. apply Bplus_commut.
+  destruct x, y; try reflexivity. simpl in H. intuition congruence.
+Qed.
+
+Theorem mul_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
+Proof.
+  intros. apply Bmult_commut.
+  destruct x, y; try reflexivity. simpl in H. intuition congruence.
+Qed.
+
+(** Multiplication by 2 is diagonal addition. *)
+
+Theorem mul2_add:
+  forall f, add f f = mul f (of_int (Int.repr 2%Z)).
+Proof.
+  intros. apply Bmult2_Bplus. 
+  intros. destruct x; try discriminate. simpl. 
+  transitivity (b, transform_quiet_pl n). 
+  destruct Archi.choose_binop_pl_32; auto. 
+  destruct y; auto || discriminate.
+Qed.
+
+(** Divisions that can be turned into multiplication by an inverse. *)
+
+Definition exact_inverse : float32 -> option float32 := b32_exact_inverse. 
+
+Theorem div_mul_inverse:
+  forall x y z, exact_inverse y = Some z -> div x y = mul x z.
+Proof.
+  intros. apply Bdiv_mult_inverse; auto. 
+  intros. destruct x0; try discriminate. simpl. 
+  transitivity (b, transform_quiet_pl n). 
+  destruct y0; reflexivity || discriminate.
+  destruct z0; reflexivity || discriminate.
+Qed.
+
+(** Properties of comparisons. *)
+
+Theorem cmp_swap:
+  forall c x y, cmp (swap_comparison c) x y = cmp c y x.
+Proof.
+  unfold cmp, b32_compare; intros. rewrite (Bcompare_swap _ _ x y).
+  apply cmp_of_comparison_swap. 
+Qed.
+
+Theorem cmp_ne_eq:
+  forall f1 f2, cmp Cne f1 f2 = negb (cmp Ceq f1 f2).
+Proof.
+  intros; apply cmp_of_comparison_ne_eq. 
+Qed.
+
+Theorem cmp_lt_eq_false:
+  forall f1 f2, cmp Clt f1 f2 = true -> cmp Ceq f1 f2 = true -> False.
+Proof.
+  intros f1 f2; apply cmp_of_comparison_lt_eq_false.
+Qed.
+
+Theorem cmp_le_lt_eq:
+  forall f1 f2, cmp Cle f1 f2 = cmp Clt f1 f2 || cmp Ceq f1 f2.
+Proof.
+  intros f1 f2; apply cmp_of_comparison_le_lt_eq.
+Qed.
+
+Theorem cmp_gt_eq_false:
+  forall x y, cmp Cgt x y = true -> cmp Ceq x y = true -> False.
+Proof.
+  intros f1 f2; apply cmp_of_comparison_gt_eq_false.
+Qed.
+
+Theorem cmp_ge_gt_eq:
+  forall f1 f2, cmp Cge f1 f2 = cmp Cgt f1 f2 || cmp Ceq f1 f2.
+Proof.
+  intros f1 f2; apply cmp_of_comparison_ge_gt_eq.
+Qed.
+
+Theorem cmp_lt_gt_false:
+  forall f1 f2, cmp Clt f1 f2 = true -> cmp Cgt f1 f2 = true -> False.
+Proof.
+  intros f1 f2; apply cmp_of_comparison_lt_gt_false.
+Qed.
+
+Theorem cmp_double:
+  forall f1 f2 c, cmp c f1 f2 = Float.cmp c (to_double f1) (to_double f2).
+Proof.
+  unfold cmp, Float.cmp; intros. f_equal. symmetry. apply Bcompare_Bconv_widen.
+  red; omega. omega. omega.
+Qed.
+
+(** Properties of conversions to/from in-memory representation.
+  The conversions are bijective (one-to-one). *)
+
+Theorem of_to_bits:
+  forall f, of_bits (to_bits f) = f.
+Proof.
+  intros; unfold of_bits, to_bits, bits_of_b32, b32_of_bits.
+  rewrite Int.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
+  generalize (bits_of_binary_float_range 23 8 __ __ f).
+  change (2^(23+8+1)) with (Int.max_unsigned + 1). omega.
+Qed.
+
+Theorem to_of_bits:
+  forall b, to_bits (of_bits b) = b.
+Proof.
+  intros; unfold of_bits, to_bits, bits_of_b32, b32_of_bits.
+  rewrite bits_of_binary_float_of_bits. apply Int.repr_unsigned.
+  apply Int.unsigned_range. 
+Qed.
+
+(** Conversions from 32-bit integers to single-precision floats can
+  be decomposed into a conversion to a double-precision float,
+  followed by a [Float32.of_double] conversion.  No double rounding occurs. *)
+
+Theorem of_int_double:
+  forall n, of_int n = of_double (Float.of_int n).
+Proof.
+  intros. symmetry. apply Bconv_BofZ. 
+  apply integer_representable_n; auto. generalize (Int.signed_range n); Float.smart_omega. 
+Qed.
+
+Theorem of_intu_double:
+  forall n, of_intu n = of_double (Float.of_intu n).
+Proof.
+  intros. symmetry. apply Bconv_BofZ.
+  apply integer_representable_n; auto. generalize (Int.unsigned_range n); Float.smart_omega. 
+Qed.
+
+(** Conversion of single-precision floats to integers can be decomposed
+  into a [Float32.to_double] extension, followed by a double-precision-to-int
+  conversion. *)
+
+Theorem to_int_double:
+  forall f n, to_int f = Some n -> Float.to_int (to_double f) = Some n.
+Proof.
+  intros.
+  unfold to_int in H.
+  destruct (b32_to_Z_range f Int.min_signed Int.max_signed) as [n'|] eqn:E; inv H.
+  unfold Float.to_int, to_double, Float.of_single, b64_to_Z_range, b64_of_b32. 
+  erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega. 
+Qed.
+
+Theorem to_intu_double:
+  forall f n, to_intu f = Some n -> Float.to_intu (to_double f) = Some n.
+Proof.
+  intros.
+  unfold to_intu in H.
+  destruct (b32_to_Z_range f 0 Int.max_unsigned) as [n'|] eqn:E; inv H.
+  unfold Float.to_intu, to_double, Float.of_single, b64_to_Z_range, b64_of_b32. 
+  erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega. 
+Qed.
+
+Theorem to_long_double:
+  forall f n, to_long f = Some n -> Float.to_long (to_double f) = Some n.
+Proof.
+  intros.
+  unfold to_long in H.
+  destruct (b32_to_Z_range f Int64.min_signed Int64.max_signed) as [n'|] eqn:E; inv H.
+  unfold Float.to_long, to_double, Float.of_single, b64_to_Z_range, b64_of_b32. 
+  erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega. 
+Qed.
+
+Theorem to_longu_double:
+  forall f n, to_longu f = Some n -> Float.to_longu (to_double f) = Some n.
+Proof.
+  intros.
+  unfold to_longu in H.
+  destruct (b32_to_Z_range f 0 Int64.max_unsigned) as [n'|] eqn:E; inv H.
+  unfold Float.to_longu, to_double, Float.of_single, b64_to_Z_range, b64_of_b32. 
+  erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega. 
+Qed.
+
+(** Conversions from 64-bit integers to single-precision floats can be expressed
+  as conversion to a double-precision float followed by a [Float32.of_double] conversion.
+  To avoid double rounding when the integer is large (above [2^53]), a round
+  to odd must be performed on the integer before conversion to double-precision float. *)
+
+Lemma int_round_odd_plus:
+  forall p n, 0 <= p ->
+  int_round_odd n p = Z.land (Z.lor n (Z.land n (2^p-1) + (2^p-1))) (-(2^p)).
+Proof.
+  intros.
+  assert (POS: 0 < 2^p) by (apply (Zpower_gt_0 radix2); auto). 
+  assert (A: Z.land n (2^p-1) = n mod 2^p).
+  { rewrite <- Z.land_ones by auto. f_equal. rewrite Z.ones_equiv. omega. }
+  rewrite A.
+  assert (B: 0 <= n mod 2^p < 2^p).
+  { apply Z_mod_lt. omega. }
+  set (m := n mod 2^p + (2^p-1)) in *.
+  assert (C: m / 2^p = if zeq (n mod 2^p) 0 then 0 else 1).
+  { unfold m. destruct (zeq (n mod 2^p) 0).
+    rewrite e. apply Zdiv_small. omega. 
+    eapply Zdiv_unique with (n mod 2^p - 1). ring. omega. }
+  assert (D: Z.testbit m p = if zeq (n mod 2^p) 0 then false else true).
+  { destruct (zeq (n mod 2^p) 0).
+    apply Z.testbit_false; auto. rewrite C; auto.
+    apply Z.testbit_true; auto. rewrite C; auto. }
+  assert (E: forall i, p < i -> Z.testbit m i = false).
+  { intros. apply Z.testbit_false. omega. 
+    replace (m / 2^i) with 0. auto. symmetry. apply Zdiv_small. 
+    unfold m. split. omega. apply Zlt_le_trans with (2 * 2^p). omega. 
+    change 2 with (2^1) at 1. rewrite <- (Zpower_plus radix2) by omega. 
+    apply Zpower_le. omega. }
+  assert (F: forall i, 0 <= i -> Z.testbit (-2^p) i = if zlt i p then false else true).
+  { intros. rewrite Z.bits_opp by auto. rewrite <- Z.ones_equiv.
+    destruct (zlt i p). 
+    rewrite Z.ones_spec_low by omega. auto.
+    rewrite Z.ones_spec_high by omega. auto. }
+  apply int_round_odd_bits; auto. 
+  - intros. rewrite Z.land_spec, F, zlt_true by omega. apply andb_false_r. 
+  - rewrite Z.land_spec, Z.lor_spec, D, F, zlt_false, andb_true_r by omega.
+    destruct (Z.eqb (n mod 2^p) 0) eqn:Z. 
+    rewrite Z.eqb_eq in Z. rewrite Z, zeq_true. apply orb_false_r. 
+    rewrite Z.eqb_neq in Z. rewrite zeq_false by auto. apply orb_true_r. 
+  - intros. rewrite Z.land_spec, Z.lor_spec, E, F, zlt_false, andb_true_r by omega.
+    apply orb_false_r.
+Qed.
+
+Lemma of_long_round_odd:
+  forall n conv_nan,
+  2^36 <= Z.abs n < 2^64 ->
+  b32_of_Z n = b32_of_b64 conv_nan mode_NE (b64_of_Z (Z.land (Z.lor n ((Z.land n 2047) + 2047)) (-2048))).
+Proof.
+  intros. rewrite <- (int_round_odd_plus 11) by omega.
+  assert (-2^64 <= int_round_odd n 11). 
+  { change (-2^64) with (int_round_odd (-2^64) 11). apply (int_round_odd_le 0 0); xomega. }
+  assert (int_round_odd n 11 <= 2^64). 
+  { change (2^64) with (int_round_odd (2^64) 11). apply (int_round_odd_le 0 0); xomega. }
+  unfold b32_of_Z, b32_of_b64, b64_of_Z. 
+  rewrite Bconv_BofZ. 
+  apply BofZ_round_odd with (p := 11).
+  omega.
+  apply Zle_trans with (2^64). omega. compute; intuition congruence.
+  omega.
+  exact (proj1 H). 
+  unfold int_round_odd. apply integer_representable_n2p_wide. auto. omega. 
+  unfold int_round_odd in H0, H1. 
+  split; (apply Zmult_le_reg_r with (2^11); [compute; auto | assumption]).
+  omega.
+  omega.
+Qed.
+
+Theorem of_longu_double_1:
+  forall n,
+  Int64.unsigned n <= 2^53 ->
+  of_longu n = of_double (Float.of_longu n).
+Proof.
+  intros. symmetry; apply Bconv_BofZ. apply integer_representable_n; auto. 
+  pose proof (Int64.unsigned_range n); omega.
+Qed.
+
+Theorem of_longu_double_2:
+  forall n,
+  2^36 <= Int64.unsigned n ->
+  of_longu n = of_double (Float.of_longu 
+                           (Int64.and (Int64.or n 
+                                                (Int64.add (Int64.and n (Int64.repr 2047))
+                                                           (Int64.repr 2047)))
+                                      (Int64.repr (-2048)))).
+Proof.
+  intros.
+  pose proof (Int64.unsigned_range n). 
+  unfold of_longu. erewrite of_long_round_odd.
+  unfold of_double, Float.to_single. instantiate (1 := Float.to_single_pl). 
+  f_equal. unfold Float.of_longu. f_equal.
+  set (n' := Z.land (Z.lor (Int64.unsigned n) (Z.land (Int64.unsigned n) 2047 + 2047)) (-2048)).
+  assert (int_round_odd (Int64.unsigned n) 11 = n') by (apply int_round_odd_plus; omega).
+  assert (0 <= n'). 
+  { rewrite <- H1. change 0 with (int_round_odd 0 11). apply (int_round_odd_le 0 0); omega. }
+  assert (n' < Int64.modulus). 
+  { apply Zle_lt_trans with (int_round_odd (Int64.modulus - 1) 11). 
+    rewrite <- H1. apply (int_round_odd_le 0 0); omega. 
+    compute; auto. }
+  rewrite <- (Int64.unsigned_repr n') by (unfold Int64.max_unsigned; omega).
+  f_equal. Int64.bit_solve. rewrite Int64.testbit_repr by auto. unfold n'.
+  rewrite Z.land_spec, Z.lor_spec. f_equal. f_equal. 
+  unfold Int64.testbit. rewrite Int64.add_unsigned.
+  fold (Int64.testbit (Int64.repr
+        (Int64.unsigned (Int64.and n (Int64.repr 2047)) +
+         Int64.unsigned (Int64.repr 2047))) i).
+  rewrite Int64.testbit_repr by auto. f_equal. f_equal. unfold Int64.and.
+  symmetry. apply Int64.unsigned_repr. change 2047 with (Z.ones 11).
+  rewrite Z.land_ones by omega. 
+  exploit (Z_mod_lt (Int64.unsigned n) (2^11)). compute; auto. 
+  assert (2^11 < Int64.max_unsigned) by (compute; auto). omega. 
+  apply Int64.same_bits_eqm; auto. exists (-1); auto.
+  split. xomega. change (2^64) with Int64.modulus. xomega. 
+Qed.
+
+Theorem of_long_double_1:
+  forall n,
+  Z.abs (Int64.signed n) <= 2^53 ->
+  of_long n = of_double (Float.of_long n).
+Proof.
+  intros. symmetry; apply Bconv_BofZ. apply integer_representable_n; auto. xomega. 
+Qed.
+
+Theorem of_long_double_2:
+  forall n,
+  2^36 <= Z.abs (Int64.signed n) ->
+  of_long n = of_double (Float.of_long
+                           (Int64.and (Int64.or n 
+                                                (Int64.add (Int64.and n (Int64.repr 2047))
+                                                           (Int64.repr 2047)))
+                                      (Int64.repr (-2048)))).
+Proof.
+  intros.
+  pose proof (Int64.signed_range n). 
+  unfold of_long. erewrite of_long_round_odd.
+  unfold of_double, Float.to_single. instantiate (1 := Float.to_single_pl). 
+  f_equal. unfold Float.of_long. f_equal.
+  set (n' := Z.land (Z.lor (Int64.signed n) (Z.land (Int64.signed n) 2047 + 2047)) (-2048)).
+  assert (int_round_odd (Int64.signed n) 11 = n') by (apply int_round_odd_plus; omega).
+  assert (Int64.min_signed <= n'). 
+  { rewrite <- H1. change Int64.min_signed with (int_round_odd Int64.min_signed 11). apply (int_round_odd_le 0 0); omega. }
+  assert (n' <= Int64.max_signed).
+  { apply Zle_trans with (int_round_odd Int64.max_signed 11). 
+    rewrite <- H1. apply (int_round_odd_le 0 0); omega. 
+    compute; intuition congruence. }
+  rewrite <- (Int64.signed_repr n') by omega.
+  f_equal. Int64.bit_solve. rewrite Int64.testbit_repr by auto. unfold n'.
+  rewrite Z.land_spec, Z.lor_spec. f_equal. f_equal.
+  rewrite Int64.bits_signed by omega. rewrite zlt_true by omega. auto. 
+  unfold Int64.testbit. rewrite Int64.add_unsigned.
+  fold (Int64.testbit (Int64.repr
+        (Int64.unsigned (Int64.and n (Int64.repr 2047)) +
+         Int64.unsigned (Int64.repr 2047))) i).
+  rewrite Int64.testbit_repr by auto. f_equal. f_equal. unfold Int64.and.
+  change (Int64.unsigned (Int64.repr 2047)) with 2047. 
+  change 2047 with (Z.ones 11). rewrite ! Z.land_ones by omega.
+  rewrite Int64.unsigned_repr. apply Int64.eqmod_mod_eq. 
+  apply Zlt_gt. apply (Zpower_gt_0 radix2); omega.
+  apply Int64.eqmod_divides with (2^64). apply Int64.eqm_signed_unsigned. 
+  exists (2^(64-11)); auto.
+  exploit (Z_mod_lt (Int64.unsigned n) (2^11)). compute; auto. 
+  assert (2^11 < Int64.max_unsigned) by (compute; auto). omega. 
+  apply Int64.same_bits_eqm; auto. exists (-1); auto.
+  split. auto. assert (-2^64 < Int64.min_signed) by (compute; auto). 
+  assert (Int64.max_signed < 2^64) by (compute; auto).
+  xomega.
+Qed.
+
+End Float32.
+
+Global Opaque
+  Float.zero Float.eq_dec Float.neg Float.abs Float.of_single Float.to_single
+  Float.of_int Float.of_intu Float.of_long Float.of_longu
+  Float.to_int Float.to_intu Float.to_long Float.to_longu
+  Float.add Float.sub Float.mul Float.div Float.cmp
+  Float.to_bits Float.of_bits Float.from_words.
+
+Global Opaque
+  Float32.zero Float32.eq_dec Float32.neg Float32.abs
+  Float32.of_int Float32.of_intu Float32.of_long Float32.of_longu
+  Float32.to_int Float32.to_intu Float32.to_long Float32.to_longu
+  Float32.add Float32.sub Float32.mul Float32.div Float32.cmp
+  Float32.to_bits Float32.of_bits.
+
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index 0b871d93..070f7cc9 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -25,10 +25,18 @@ Definition big_endian := true.
 Notation align_int64 := 8%Z (only parsing).
 Notation align_float64 := 8%Z (only parsing).
 
-Program Definition default_pl : bool * nan_pl 53 := 
+Program Definition default_pl_64 : bool * nan_pl 53 := 
   (false, nat_iter 51 xO xH).
 
-Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
   false.                        (**r always choose first NaN *)
 
-Global Opaque big_endian default_pl choose_binop_pl.
+Program Definition default_pl_32 : bool * nan_pl 24 := 
+  (false, nat_iter 22 xO xH).
+
+Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+  false.                        (**r always choose first NaN *)
+
+Global Opaque big_endian
+              default_pl_64 choose_binop_pl_64
+              default_pl_32 choose_binop_pl_32.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index aba78d45..a7e5eafe 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -165,20 +165,29 @@ Inductive instruction : Type :=
   | Pextsh: ireg -> ireg -> instruction                       (**r 16-bit sign extension *)
   | Pfreeframe: Z -> int -> instruction                       (**r deallocate stack frame and restore previous frame *)
   | Pfabs: freg -> freg -> instruction                        (**r float absolute value *)
+  | Pfabss: freg -> freg -> instruction                       (**r float absolute value *)
   | Pfadd: freg -> freg -> freg -> instruction                (**r float addition *)
+  | Pfadds: freg -> freg -> freg -> instruction               (**r float addition *)
   | Pfcmpu: freg -> freg -> instruction                       (**r float comparison *)
   | Pfcti: ireg -> freg -> instruction                        (**r float-to-signed-int conversion *)
   | Pfdiv: freg -> freg -> freg -> instruction                (**r float division *)
+  | Pfdivs: freg -> freg -> freg -> instruction               (**r float division *)
   | Pfmake: freg -> ireg -> ireg -> instruction               (**r build a float from 2 ints *)
   | Pfmr: freg -> freg -> instruction                         (**r float move *)
   | Pfmul: freg -> freg -> freg -> instruction                (**r float multiply *)
+  | Pfmuls: freg -> freg -> freg -> instruction               (**r float multiply *)
   | Pfneg: freg -> freg -> instruction                        (**r float negation *)
+  | Pfnegs: freg -> freg -> instruction                       (**r float negation *)
   | Pfrsp: freg -> freg -> instruction                        (**r float round to single precision *)
+  | Pfxdp: freg -> freg -> instruction                        (**r float extend to double precision (pseudo) *)
   | Pfsub: freg -> freg -> freg -> instruction                (**r float subtraction *)
+  | Pfsubs: freg -> freg -> freg -> instruction               (**r float subtraction *)
   | Plbz: ireg -> constant -> ireg -> instruction             (**r load 8-bit unsigned int *)
   | Plbzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plfd: freg -> constant -> ireg -> instruction             (**r load 64-bit float *)
   | Plfdx: freg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Plfd_a: freg -> constant -> ireg -> instruction           (**r load 64-bit quantity to float reg *)
+  | Plfdx_a: freg -> ireg -> ireg -> instruction              (**r same, with 2 index regs *)
   | Plfs: freg -> constant -> ireg -> instruction             (**r load 32-bit float *)
   | Plfsx: freg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plha: ireg -> constant -> ireg -> instruction             (**r load 16-bit signed int *)
@@ -186,8 +195,11 @@ Inductive instruction : Type :=
   | Plhz: ireg -> constant -> ireg -> instruction             (**r load 16-bit unsigned int *)
   | Plhzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plfi: freg -> float -> instruction                        (**r load float constant *)
+  | Plfis: freg -> float32 -> instruction                     (**r load float constant *)
   | Plwz: ireg -> constant -> ireg -> instruction             (**r load 32-bit int *)
   | Plwzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Plwz_a: ireg -> constant -> ireg -> instruction           (**r load 32-bit quantity to int reg *)
+  | Plwzx_a: ireg -> ireg -> ireg -> instruction              (**r same, with 2 index regs *)
   | Pmfcrbit: ireg -> crbit -> instruction                    (**r move condition bit to reg *)
   | Pmflr: ireg -> instruction                                (**r move LR to reg *)
   | Pmr: ireg -> ireg -> instruction                          (**r integer move *)
@@ -196,7 +208,7 @@ Inductive instruction : Type :=
   | Pmulli: ireg -> ireg -> constant -> instruction           (**r integer multiply immediate *)
   | Pmullw: ireg -> ireg -> ireg -> instruction               (**r integer multiply *)
   | Pmulhw: ireg -> ireg -> ireg -> instruction               (**r multiply high signed *)
-  | Pmulhwu: ireg -> ireg -> ireg -> instruction               (**r multiply high signed *)
+  | Pmulhwu: ireg -> ireg -> ireg -> instruction              (**r multiply high signed *)
   | Pnand: ireg -> ireg -> ireg -> instruction                (**r bitwise not-and *)
   | Pnor: ireg -> ireg -> ireg -> instruction                 (**r bitwise not-or *)
   | Por: ireg -> ireg -> ireg -> instruction                  (**r bitwise or *)
@@ -213,12 +225,16 @@ Inductive instruction : Type :=
   | Pstbx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Pstfd: freg -> constant -> ireg -> instruction            (**r store 64-bit float *)
   | Pstfdx: freg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
+  | Pstfd_a: freg -> constant -> ireg -> instruction          (**r store 64-bit quantity from float reg *)
+  | Pstfdx_a: freg -> ireg -> ireg -> instruction             (**r same, with 2 index regs *)
   | Pstfs: freg -> constant -> ireg -> instruction            (**r store 32-bit float *)
   | Pstfsx: freg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
   | Psth: ireg -> constant -> ireg -> instruction             (**r store 16-bit int *)
   | Psthx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Pstw: ireg -> constant -> ireg -> instruction             (**r store 32-bit int *)
   | Pstwx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Pstw_a: ireg -> constant -> ireg -> instruction           (**r store 32-bit quantity from int reg *)
+  | Pstwx_a: ireg -> ireg -> ireg -> instruction              (**r same, with 2 index regs *)
   | Psubfc: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction *)
   | Psubfe: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction with carry *)
   | Psubfic: ireg -> ireg -> constant -> instruction          (**r integer subtraction from immediate *)
@@ -645,26 +661,40 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       end
   | Pfabs rd r1 =>
       Next (nextinstr (rs#rd <- (Val.absf rs#r1))) m
+  | Pfabss rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.absfs rs#r1))) m
   | Pfadd rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.addf rs#r1 rs#r2))) m
+  | Pfadds rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.addfs rs#r1 rs#r2))) m
   | Pfcmpu r1 r2 =>
       Next (nextinstr (compare_float rs rs#r1 rs#r2)) m
   | Pfcti rd r1 =>
       Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pfdiv rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.divf rs#r1 rs#r2))) m
+  | Pfdivs rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.divfs rs#r1 rs#r2))) m
   | Pfmake rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.floatofwords rs#r1 rs#r2))) m
   | Pfmr rd r1 =>
       Next (nextinstr (rs#rd <- (rs#r1))) m
   | Pfmul rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.mulf rs#r1 rs#r2))) m
+  | Pfmuls rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.mulfs rs#r1 rs#r2))) m
   | Pfneg rd r1 =>
       Next (nextinstr (rs#rd <- (Val.negf rs#r1))) m
+  | Pfnegs rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.negfs rs#r1))) m
   | Pfrsp rd r1 =>
       Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
+  | Pfxdp rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.floatofsingle rs#r1))) m
   | Pfsub rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.subf rs#r1 rs#r2))) m
+  | Pfsubs rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.subfs rs#r1 rs#r2))) m
   | Plbz rd cst r1 =>
       load1 Mint8unsigned rd cst r1 rs m
   | Plbzx rd r1 r2 =>
@@ -673,6 +703,10 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       load1 Mfloat64 rd cst r1 rs m
   | Plfdx rd r1 r2 =>
       load2 Mfloat64 rd r1 r2 rs m
+  | Plfd_a rd cst r1 =>
+      load1 Many64 rd cst r1 rs m
+  | Plfdx_a rd r1 r2 =>
+      load2 Many64 rd r1 r2 rs m
   | Plfs rd cst r1 =>
       load1 Mfloat32 rd cst r1 rs m
   | Plfsx rd r1 r2 =>
@@ -687,10 +721,16 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       load2 Mint16unsigned rd r1 r2 rs m
   | Plfi rd f =>
       Next (nextinstr (rs #GPR12 <- Vundef #rd <- (Vfloat f))) m
+  | Plfis rd f =>
+      Next (nextinstr (rs #GPR12 <- Vundef #rd <- (Vsingle f))) m
   | Plwz rd cst r1 =>
       load1 Mint32 rd cst r1 rs m
   | Plwzx rd r1 r2 =>
       load2 Mint32 rd r1 r2 rs m
+  | Plwz_a rd cst r1 =>
+      load1 Many32 rd cst r1 rs m
+  | Plwzx_a rd r1 r2 =>
+      load2 Many32 rd r1 r2 rs m
   | Pmfcrbit rd bit =>
       Next (nextinstr (rs#rd <- (rs#(reg_of_crbit bit)))) m
   | Pmflr rd =>
@@ -742,16 +782,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       store1 Mfloat64 rd cst r1 rs m
   | Pstfdx rd r1 r2 =>
       store2 Mfloat64 rd r1 r2 rs m
+  | Pstfd_a rd cst r1 =>
+      store1 Many64 rd cst r1 rs m
+  | Pstfdx_a rd r1 r2 =>
+      store2 Many64 rd r1 r2 rs m
   | Pstfs rd cst r1 =>
-      match store1 Mfloat32 rd cst r1 rs m with
-      | Next rs' m' => Next (rs'#FPR13 <- Vundef) m'
-      | Stuck => Stuck
-      end
+      store1 Mfloat32 rd cst r1 rs m
   | Pstfsx rd r1 r2 =>
-      match store2 Mfloat32 rd r1 r2 rs m with
-      | Next rs' m' => Next (rs'#FPR13 <- Vundef) m'
-      | Stuck => Stuck
-      end
+      store2 Mfloat32 rd r1 r2 rs m
   | Psth rd cst r1 =>
       store1 Mint16unsigned rd cst r1 rs m
   | Psthx rd r1 r2 =>
@@ -760,6 +798,10 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       store1 Mint32 rd cst r1 rs m
   | Pstwx rd r1 r2 =>
       store2 Mint32 rd r1 r2 rs m
+  | Pstw_a rd cst r1 =>
+      store1 Many32 rd cst r1 rs m
+  | Pstwx_a rd r1 r2 =>
+      store2 Many32 rd r1 r2 rs m
   | Psubfc rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.sub rs#r2 rs#r1)
                        #CARRY <- (Val.add_carry rs#r2 (Val.notint rs#r1) Vone))) m
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 6b66686e..5ca770d1 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -122,40 +122,32 @@ Definition rolm (r1 r2: ireg) (amount mask: int) (k: code) :=
 
 (** Accessing slots in the stack frame.  *)
 
+Definition accessind {A: Type}
+       (instr1: A -> constant -> ireg -> instruction)
+       (instr2: A -> ireg -> ireg -> instruction)
+       (base: ireg) (ofs: int) (r: A) (k: code) :=
+  if Int.eq (high_s ofs) Int.zero 
+  then instr1 r (Cint ofs) base :: k
+  else loadimm GPR0 ofs (instr2 r base GPR0 :: k).
+
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of dst;
-      OK (if Int.eq (high_s ofs) Int.zero then
-            Plwz r (Cint ofs) base :: k
-          else
-            loadimm GPR0 ofs (Plwzx r base GPR0 :: k))
-  | Tfloat =>
-      do r <- freg_of dst;
-      OK (if Int.eq (high_s ofs) Int.zero then
-            Plfd r (Cint ofs) base :: k
-          else
-            loadimm GPR0 ofs (Plfdx r base GPR0 :: k))
-  | Tlong | Tsingle =>
-      Error (msg "Asmgen.loadind")
+  match ty, preg_of dst with
+  | Tint, IR r => OK(accessind Plwz Plwzx base ofs r k)
+  | Tany32, IR r => OK(accessind Plwz_a Plwzx_a base ofs r k)
+  | Tsingle, FR r => OK(accessind Plfs Plfsx base ofs r k)
+  | Tfloat, FR r => OK(accessind Plfd Plfdx base ofs r k)
+  | Tany64, FR r => OK(accessind Plfd_a Plfdx_a base ofs r k)
+  | _, _ => Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of src;
-      OK (if Int.eq (high_s ofs) Int.zero then
-            Pstw r (Cint ofs) base :: k
-          else
-            loadimm GPR0 ofs (Pstwx r base GPR0 :: k))
-  | Tfloat =>
-      do r <- freg_of src;
-      OK (if Int.eq (high_s ofs) Int.zero then
-            Pstfd r (Cint ofs) base :: k
-          else
-            loadimm GPR0 ofs (Pstfdx r base GPR0 :: k))
-  | Tlong | Tsingle =>
-      Error (msg "Asmgen.storeind")
+  match ty, preg_of src with
+  | Tint, IR r => OK(accessind Pstw Pstwx base ofs r k)
+  | Tany32, IR r => OK(accessind Pstw_a Pstwx_a base ofs r k)
+  | Tsingle, FR r => OK(accessind Pstfs Pstfsx base ofs r k)
+  | Tfloat, FR r => OK(accessind Pstfd Pstfdx base ofs r k)
+  | Tany64, FR r => OK(accessind Pstfd_a Pstfdx_a base ofs r k)
+  | _, _ => Error (msg "Asmgen.storeind")
   end.
 
 (** Constructor for a floating-point comparison.  The PowerPC has
@@ -340,6 +332,8 @@ Definition transl_op
       do r <- ireg_of res; OK (loadimm r n k)
   | Ofloatconst f, nil =>
       do r <- freg_of res; OK (Plfi r f :: k)
+  | Osingleconst f, nil =>
+      do r <- freg_of res; OK (Plfis r f :: k)
   | Oaddrsymbol s ofs, nil =>
       do r <- ireg_of res;
       OK (if symbol_is_small_data s ofs then
@@ -477,9 +471,30 @@ Definition transl_op
   | Odivf, a1 :: a2 :: nil =>
       do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
       OK (Pfdiv r r1 r2 :: k)
+  | Onegfs, a1 :: nil =>
+      do r1 <- freg_of a1; do r <- freg_of res;
+      OK (Pfnegs r r1 :: k)
+  | Oabsfs, a1 :: nil =>
+      do r1 <- freg_of a1; do r <- freg_of res;
+      OK (Pfabss r r1 :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
+      OK (Pfadds r r1 r2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
+      OK (Pfsubs r r1 r2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
+      OK (Pfmuls r r1 r2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res;
+      OK (Pfdivs r r1 r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r1 <- freg_of a1; do r <- freg_of res;
       OK (Pfrsp r r1 :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do r1 <- freg_of a1; do r <- freg_of res;
+      OK (Pfxdp r r1 :: k)
   | Ointoffloat, a1 :: nil =>
       do r1 <- freg_of a1; do r <- ireg_of res;
       OK (Pfcti r r1 :: k)
@@ -566,7 +581,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
   | Mfloat64 =>
       do r <- freg_of dst;
       transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_load")
   end.
 
@@ -589,7 +604,7 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
   | Mfloat64 =>
       do r <- freg_of src;
       transl_memory_access (Pstfd r) (Pstfdx r) addr args temp k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 879d7524..913fb50a 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -196,17 +196,21 @@ Remark loadind_label:
   forall base ofs ty dst k c,
   loadind base ofs ty dst k = OK c -> tail_nolabel k c.
 Proof.
-  unfold loadind; intros.
-  destruct ty; destruct (Int.eq (high_s ofs) Int.zero);
+  unfold loadind, accessind; intros.
+  destruct ty; try discriminate;
+  destruct (preg_of dst); try discriminate;
+  destruct (Int.eq (high_s ofs) Int.zero);
   TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
 Qed.
 
 Remark storeind_label:
   forall base ofs ty src k c,
-  storeind base src ofs ty k = OK c -> tail_nolabel k c.
+  storeind src base ofs ty k = OK c -> tail_nolabel k c.
 Proof.
-  unfold storeind; intros.
-  destruct ty; destruct (Int.eq (high_s ofs) Int.zero);
+  unfold storeind, accessind; intros.
+  destruct ty; try discriminate;
+  destruct (preg_of src); try discriminate;
+  destruct (Int.eq (high_s ofs) Int.zero);
   TailNoLabel; eapply tail_nolabel_trans; TailNoLabel.
 Qed.
 
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index cfeb8232..e1ab9a15 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -435,6 +435,36 @@ Qed.
 
 (** Indexed memory loads. *)
 
+Lemma accessind_load_correct:
+  forall (A: Type) (inj: A -> preg)
+       (instr1: A -> constant -> ireg -> instruction)
+       (instr2: A -> ireg -> ireg -> instruction)
+       (base: ireg) ofs rx chunk v (rs: regset) m k,
+  (forall rs m r1 cst r2,
+   exec_instr ge fn (instr1 r1 cst r2) rs m = load1 ge chunk (inj r1) cst r2 rs m) ->
+  (forall rs m r1 r2 r3,
+   exec_instr ge fn (instr2 r1 r2 r3) rs m = load2 chunk (inj r1) r2 r3 rs m) ->
+  Mem.loadv chunk m (Val.add rs#base (Vint ofs)) = Some v ->
+  base <> GPR0 -> inj rx <> PC ->
+  exists rs',
+     exec_straight ge fn (accessind instr1 instr2 base ofs rx k) rs m k rs' m
+  /\ rs'#(inj rx) = v
+  /\ forall r, r <> PC -> r <> inj rx -> r <> GPR0 -> rs'#r = rs#r.
+Proof.
+  intros. unfold accessind. destruct (Int.eq (high_s ofs) Int.zero).
+- econstructor; split. apply exec_straight_one. 
+  rewrite H. unfold load1. rewrite gpr_or_zero_not_zero by auto. simpl.
+  rewrite H1. eauto. unfold nextinstr. repeat Simplif.
+  split. unfold nextinstr. repeat Simplif. 
+  intros. repeat Simplif. 
+- exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [P [Q R]]].
+  econstructor; split. eapply exec_straight_trans. eexact P.
+  apply exec_straight_one. rewrite H0. unfold load2. rewrite Q, R by auto with asmgen. 
+  rewrite H1. reflexivity. unfold nextinstr. repeat Simplif.
+  split. repeat Simplif.
+  intros. repeat Simplif.
+Qed.
+
 Lemma loadind_correct:
   forall (base: ireg) ofs ty dst k (rs: regset) m v c,
   loadind base ofs ty dst k = OK c ->
@@ -445,38 +475,44 @@ Lemma loadind_correct:
   /\ rs'#(preg_of dst) = v
   /\ forall r, r <> PC -> r <> preg_of dst -> r <> GPR0 -> rs'#r = rs#r.
 Proof.
-Opaque Int.eq.
-  unfold loadind; intros. destruct ty; monadInv H; simpl in H0.
-(* integer *)
-  rewrite (ireg_of_eq _ _ EQ).
-  destruct (Int.eq (high_s ofs) Int.zero).
-  (* one load *)
-  econstructor; split. apply exec_straight_one. simpl.
-  unfold load1. rewrite gpr_or_zero_not_zero; auto. simpl. rewrite H0. eauto. auto.
-  intuition Simpl.
-  (* loadimm + load *)
-  exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [A [B C]]]. 
-  exists (nextinstr (rs'#x <- v)); split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one; auto. 
-  simpl. unfold load2. rewrite C; auto with asmgen. rewrite B. rewrite H0. auto. 
-  intuition Simpl. 
-(* float *)
-  rewrite (freg_of_eq _ _ EQ).
-  destruct (Int.eq (high_s ofs) Int.zero).
-  (* one load *)
-  econstructor; split. apply exec_straight_one. simpl.
-  unfold load1. rewrite gpr_or_zero_not_zero; auto. simpl. rewrite H0. eauto. auto.
-  intuition Simpl.
-  (* loadimm + load *)
-  exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [A [B C]]]. 
-  exists (nextinstr (rs'#x <- v)); split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one; auto. 
-  simpl. unfold load2. rewrite C; auto with asmgen. rewrite B. rewrite H0. auto. 
-  intuition Simpl. 
+  unfold loadind; intros. destruct ty; try discriminate; destruct (preg_of dst); inv H; simpl in H0.
+  apply accessind_load_correct with (inj := IR) (chunk := Mint32); auto with asmgen.
+  apply accessind_load_correct with (inj := FR) (chunk := Mfloat64); auto with asmgen.
+  apply accessind_load_correct with (inj := FR) (chunk := Mfloat32); auto with asmgen.
+  apply accessind_load_correct with (inj := IR) (chunk := Many32); auto with asmgen.
+  apply accessind_load_correct with (inj := FR) (chunk := Many64); auto with asmgen.
 Qed.
 
 (** Indexed memory stores. *)
 
+Lemma accessind_store_correct:
+  forall (A: Type) (inj: A -> preg)
+       (instr1: A -> constant -> ireg -> instruction)
+       (instr2: A -> ireg -> ireg -> instruction)
+       (base: ireg) ofs rx chunk (rs: regset) m m' k,
+  (forall rs m r1 cst r2,
+   exec_instr ge fn (instr1 r1 cst r2) rs m = store1 ge chunk (inj r1) cst r2 rs m) ->
+  (forall rs m r1 r2 r3,
+   exec_instr ge fn (instr2 r1 r2 r3) rs m = store2 chunk (inj r1) r2 r3 rs m) ->
+  Mem.storev chunk m (Val.add rs#base (Vint ofs)) (rs (inj rx)) = Some m' ->
+  base <> GPR0 -> inj rx <> PC -> inj rx <> GPR0 ->
+  exists rs',
+     exec_straight ge fn (accessind instr1 instr2 base ofs rx k) rs m k rs' m'
+  /\ forall r, r <> PC -> r <> GPR0 -> rs'#r = rs#r.
+Proof.
+  intros. unfold accessind. destruct (Int.eq (high_s ofs) Int.zero).
+- econstructor; split. apply exec_straight_one. 
+  rewrite H. unfold store1. rewrite gpr_or_zero_not_zero by auto. simpl.
+  rewrite H1. eauto. unfold nextinstr. repeat Simplif.
+  intros. repeat Simplif. 
+- exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [P [Q R]]].
+  econstructor; split. eapply exec_straight_trans. eexact P.
+  apply exec_straight_one. rewrite H0. unfold store2.
+  rewrite Q. rewrite R by auto with asmgen. rewrite R by auto. 
+  rewrite H1. reflexivity. unfold nextinstr. repeat Simplif.
+  intros. repeat Simplif.
+Qed.
+
 Lemma storeind_correct:
   forall (base: ireg) ofs ty src k (rs: regset) m m' c,
   storeind src base ofs ty k = OK c ->
@@ -488,33 +524,12 @@ Lemma storeind_correct:
 Proof.
   unfold storeind; intros.
   assert (preg_of src <> GPR0) by auto with asmgen.
-  destruct ty; monadInv H; simpl in H0.
-(* integer *)
-  rewrite (ireg_of_eq _ _ EQ) in *.
-  destruct (Int.eq (high_s ofs) Int.zero).
-  (* one store *)
-  econstructor; split. apply exec_straight_one. simpl.
-  unfold store1. rewrite gpr_or_zero_not_zero; auto. simpl. rewrite H0. eauto. auto.
-  intros; Simpl.
-  (* loadimm + store *)
-  exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [A [B C]]]. 
-  exists (nextinstr rs'); split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one; auto. 
-  simpl. unfold store2. rewrite B. rewrite ! C; auto with asmgen. rewrite H0. auto. 
-  intuition Simpl. 
-(* float *)
-  rewrite (freg_of_eq _ _ EQ) in *.
-  destruct (Int.eq (high_s ofs) Int.zero).
-  (* one store *)
-  econstructor; split. apply exec_straight_one. simpl.
-  unfold store1. rewrite gpr_or_zero_not_zero; auto. simpl. rewrite H0. eauto. auto.
-  intuition Simpl.
-  (* loadimm + store *)
-  exploit (loadimm_correct GPR0 ofs); eauto. intros [rs' [A [B C]]]. 
-  exists (nextinstr rs'); split. 
-  eapply exec_straight_trans. eexact A. apply exec_straight_one; auto. 
-  simpl. unfold store2. rewrite B. rewrite ! C; auto with asmgen. rewrite H0. auto. 
-  intuition Simpl. 
+  destruct ty; try discriminate; destruct (preg_of src) ; inv H; simpl in H0.
+  apply accessind_store_correct with (inj := IR) (chunk := Mint32); auto with asmgen.
+  apply accessind_store_correct with (inj := FR) (chunk := Mfloat64); auto with asmgen.
+  apply accessind_store_correct with (inj := FR) (chunk := Mfloat32); auto with asmgen.
+  apply accessind_store_correct with (inj := IR) (chunk := Many32); auto with asmgen.
+  apply accessind_store_correct with (inj := FR) (chunk := Many64); auto with asmgen.
 Qed.
 
 (** Float comparisons. *)
@@ -1210,15 +1225,7 @@ Local Transparent destroyed_by_store.
 - (* Mint32 *)
   eapply BASE; eauto; erewrite ireg_of_eq by eauto; auto.
 - (* Mfloat32 *)
-  rewrite (freg_of_eq _ _ EQ) in H1.
-  eapply transl_memory_access_correct. eauto. eauto. eauto. 
-  intros. econstructor; split. apply exec_straight_one. 
-  simpl. unfold store1. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. 
-Local Transparent destroyed_by_store.
-  simpl; intros. destruct H5 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
-  intros. econstructor; split. apply exec_straight_one. 
-  simpl. unfold store2. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. 
-  simpl; intros. destruct H5 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
+  eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
 - (* Mfloat64 *)
   eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
 Qed.
diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp
index e1e19601..bba0fad4 100644
--- a/powerpc/ConstpropOp.vp
+++ b/powerpc/ConstpropOp.vp
@@ -136,18 +136,19 @@ Definition make_xorimm (n: int) (r: reg) :=
   else (Oxorimm n, r :: nil).
 
 Definition make_mulfimm (n: float) (r r1 r2: reg) :=
-  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
   then (Oaddf, r :: r :: nil)
   else (Omulf, r1 :: r2 :: nil).
 
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
-Definition make_singleoffloat (r: reg) (a: aval) :=
-  if vincl a Fsingle && generate_float_constants tt
-  then (Omove, r :: nil)
-  else (Osingleoffloat, r :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
@@ -174,10 +175,11 @@ Nondetfunction op_strength_reduction
   | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
-  | Osingleoffloat, r1 :: nil, v1 :: nil => make_singleoffloat r1 v1
   | Ocmp c, args, vl => make_cmp c args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v
index 584865ad..8498868a 100644
--- a/powerpc/ConstpropOpproof.v
+++ b/powerpc/ConstpropOpproof.v
@@ -80,6 +80,10 @@ Ltac SimplVM :=
       let E := fresh in
       assert (E: v = Vfloat n) by (inversion H; auto);
       rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
   | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
       let E := fresh in
       assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); 
@@ -315,7 +319,7 @@ Lemma make_mulfimm_correct:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
   simpl. econstructor; split; eauto. 
@@ -328,13 +332,40 @@ Lemma make_mulfimm_correct_2:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
   rewrite Float.mul_commut; auto. 
   simpl. econstructor; split; eauto. 
 Qed.
 
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float32.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float32.mul2_add; auto. 
+  rewrite Float32.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma make_cast8signed_correct:
   forall r x,
   vmatch bc rs#r x ->
@@ -363,21 +394,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_singleoffloat_correct:
-  forall r x,
-  vmatch bc rs#r x ->
-  let (op, args) := make_singleoffloat r x in
-  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.singleoffloat rs#r) v.
-Proof.
-  intros; unfold make_singleoffloat. 
-  destruct (vincl x Fsingle && generate_float_constants tt) eqn:INCL. 
-  InvBooleans. exists rs#r; split; auto. 
-  assert (V: vmatch bc rs#r Fsingle).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite Float.singleoffloat_of_single by auto. auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -424,14 +440,16 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_shrimm_correct; auto.
 (* shru *)
   InvApproxRegs; SimplVM; inv H0. apply make_shruimm_correct; auto.
-(* singleoffloat *)
-  InvApproxRegs; SimplVM; inv H0. apply make_singleoffloat_correct; auto.
 (* cmp *)
   inv H0. apply make_cmp_correct; auto.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) rs#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
+(* mulfs *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) rs#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index baad496d..f7ed7793 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -54,23 +54,13 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | R3 => Tint  | R4 => Tint  | R5 => Tint  | R6 => Tint
-  | R7 => Tint  | R8 => Tint  | R9 => Tint  | R10 => Tint
-  | R11 => Tint | R12 => Tint
-  | R14 => Tint | R15 => Tint | R16 => Tint
-  | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint
-  | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint
-  | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint
-  | R29 => Tint | R30 => Tint | R31 => Tint
-  | F0 => Tfloat
-  | F1 => Tfloat  | F2 => Tfloat  | F3 => Tfloat  | F4 => Tfloat
-  | F5 => Tfloat  | F6 => Tfloat  | F7 => Tfloat  | F8 => Tfloat
-  | F9 => Tfloat  | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat
-  | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
-  | F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat
-  | F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat
-  | F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat
-  | F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat
+  | R3  | R4  | R5  | R6  | R7  | R8  | R9  | R10  | R11 | R12
+  | R14 | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24
+  | R25 | R26 | R27 | R28 | R29 | R30 | R31 => Tany32
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => Tany64
   end.
 
 Open Scope positive_scope.
@@ -112,6 +102,7 @@ Definition is_stack_reg (r: mreg) : bool := false.
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
   | Ofloatconst _ => R12 :: nil
+  | Osingleconst _ => R12 :: nil
   | Ointoffloat => F13 :: nil
   | _ => nil
   end.
@@ -120,10 +111,7 @@ Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg
   R12 :: nil.
 
 Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
-  match chunk with
-  | Mfloat32 => R11 :: R12 :: F13 :: nil
-  | _ => R11 :: R12 :: nil
-  end.
+  R11 :: R12 :: nil.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
   nil.
@@ -135,21 +123,16 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_builtin _ _ => F13 :: nil
   | EF_vload _ => nil
-  | EF_vstore Mfloat32 => F13 :: nil
   | EF_vstore _ => nil
   | EF_vload_global _ _ _ => R11 :: nil
   | EF_vstore_global Mint64 _ _ => R10 :: R11 :: R12 :: nil
-  | EF_vstore_global Mfloat32 _ _ => R11 :: R12 :: F13 :: nil
   | EF_vstore_global _ _ _ => R11 :: R12 :: nil
   | EF_memcpy _ _ => R11 :: R12 :: F13 :: nil
   | _ => nil
   end.
 
 Definition destroyed_by_setstack (ty: typ): list mreg :=
-  match ty with
-  | Tsingle => F13 :: nil
-  | _ => nil
-  end.
+  nil.
 
 Definition destroyed_at_function_entry: list mreg :=
   nil.
diff --git a/powerpc/NeedOp.v b/powerpc/NeedOp.v
index d43af3c8..e1307492 100644
--- a/powerpc/NeedOp.v
+++ b/powerpc/NeedOp.v
@@ -25,6 +25,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Omove => op1 nv
   | Ointconst n => nil
   | Ofloatconst n => nil
+  | Osingleconst n => nil
   | Oaddrsymbol id ofs => nil
   | Oaddrstack ofs => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
@@ -52,7 +53,9 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Oroli amount mask => op1 (default nv)
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
-  | Osingleoffloat => op1 (singleoffloat nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Osingleoffloat | Ofloatofsingle => op1 (default nv)
   | Ointoffloat => op1 (default nv)
   | Ofloatofwords | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
@@ -66,7 +69,6 @@ Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
   | Orolm amount mask => rolm_redundant nv amount mask
-  | Osingleoffloat => singleoffloat_redundant nv
   | _ => false
   end.
 
@@ -136,7 +138,6 @@ Proof.
 - apply or_sound; auto. apply notint_sound; rewrite bitwise_idem; auto.
 - apply shrimm_sound; auto.
 - apply rolm_sound; auto. 
-- apply singleoffloat_sound; auto. 
 - destruct (eval_condition c args m) as [b|] eqn:EC; simpl in H2. 
   erewrite needs_of_condition_sound by eauto.
   subst v; simpl. auto with na.
@@ -156,7 +157,6 @@ Proof.
 - apply andimm_redundant_sound; auto. 
 - apply orimm_redundant_sound; auto.
 - apply rolm_redundant_sound; auto.
-- apply singleoffloat_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 17cf0725..dbec2755 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -54,6 +54,7 @@ Inductive operation : Type :=
   | Omove: operation                    (**r [rd = r1] *)
   | Ointconst: int -> operation         (**r [rd] is set to the given integer constant *)
   | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
+  | Osingleconst: float32 -> operation  (**r [rd] is set to the given float constant *)
   | Oaddrsymbol: ident -> int -> operation (**r [rd] is set to the the address of the symbol plus the offset *)
   | Oaddrstack: int -> operation        (**r [rd] is set to the stack pointer plus the given offset *)
 (*c Integer arithmetic: *)
@@ -96,7 +97,14 @@ Inductive operation : Type :=
   | Osubf: operation                    (**r [rd = r1 - r2] *)
   | Omulf: operation                    (**r [rd = r1 * r2] *)
   | Odivf: operation                    (**r [rd = r1 / r2] *)
+  | Onegfs: operation                   (**r [rd = - r1] *)
+  | Oabsfs: operation                   (**r [rd = abs(r1)] *)
+  | Oaddfs: operation                   (**r [rd = r1 + r2] *)
+  | Osubfs: operation                   (**r [rd = r1 - r2] *)
+  | Omulfs: operation                   (**r [rd = r1 * r2] *)
+  | Odivfs: operation                   (**r [rd = r1 / r2] *)
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle: operation           (**r [rd] is [r1] extended to double-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ofloatofwords: operation            (**r [rd = float_of_words(r1,r2)] *)
@@ -130,7 +138,7 @@ Defined.
 Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
-  generalize Float.eq_dec; intro.
+  generalize Float.eq_dec Float32.eq_dec; intros.
   assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
   generalize eq_condition; intro.
   decide equality.
@@ -172,6 +180,7 @@ Definition eval_operation
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
   | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
   | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs) 
   | Oaddrstack ofs, nil => Some (Val.add sp (Vint ofs))
   | Ocast8signed, v1::nil => Some (Val.sign_ext 8 v1)
@@ -213,7 +222,14 @@ Definition eval_operation
   | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
   | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
   | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
+  | Onegfs, v1::nil => Some(Val.negfs v1)
+  | Oabsfs, v1::nil => Some(Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2)
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
@@ -265,7 +281,8 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
@@ -306,7 +323,14 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofwords => (Tint :: Tint :: nil, Tfloat)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
@@ -343,7 +367,8 @@ Proof with (try exact I).
   destruct op; simpl in H0; FuncInv; subst; simpl.
   congruence.
   exact I.
-  destruct (Float.is_single_dec f); auto. 
+  auto.
+  auto.
   unfold Genv.symbol_address. destruct (Genv.find_symbol genv i)...
   destruct sp...
   destruct v0...
@@ -386,8 +411,15 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0... simpl. apply Float.singleoffloat_is_single.
-  destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0...
@@ -521,35 +553,6 @@ Proof.
   rewrite Val.add_assoc. auto. 
 Qed.
 
-(** Transformation of addressing modes with two operands or more
-  into an equivalent arithmetic operation.  This is used in the [Reload]
-  pass when a store instruction cannot be reloaded directly because
-  it runs out of temporary registers. *)
-
-(** For the PowerPC, there is only one binary addressing mode: [Aindexed2].
-  The corresponding operation is [Oadd]. *)
-
-Definition op_for_binary_addressing (addr: addressing) : operation := Oadd.
-
-Lemma eval_op_for_binary_addressing:
-  forall (F V: Type) (ge: Genv.t F V) sp addr args v m,
-  (length args >= 2)%nat ->
-  eval_addressing ge sp addr args = Some v ->
-  eval_operation ge sp (op_for_binary_addressing addr) args m = Some v.
-Proof.
-  intros.
-  destruct addr; simpl in H0; FuncInv; simpl in H; try omegaContradiction.
-  simpl; congruence.
-Qed.
-
-Lemma type_op_for_binary_addressing:
-  forall addr,
-  (length (type_of_addressing addr) >= 2)%nat ->
-  type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint).
-Proof.
-  intros. destruct addr; simpl in H; reflexivity || omegaContradiction.
-Qed.
-
 (** Operations that are so cheap to recompute that CSE should not factor them out. *)
 
 Definition is_trivial_op (op: operation) : bool :=
@@ -578,61 +581,6 @@ Proof.
   destruct c; simpl; auto; discriminate. 
 Qed.
 
-(** Checking whether two addressings, applied to the same arguments, produce
-  separated memory addresses.  Used in [CSE]. *)
-
-Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
-                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
-  match addr1, addr2 with
-  | Aindexed ofs1, Aindexed ofs2 => 
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | Aglobal s1 ofs1, Aglobal s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Abased s1 ofs1, Abased s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Ainstack ofs1, Ainstack ofs2 =>
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | _, _ => false
-  end.
-
-Lemma addressing_separated_sound:
-  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
-  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
-  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
-  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
-  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
-Proof.
-  unfold addressing_separated; intros.
-  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
-  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
-(* Aindexed *)
-  destruct v; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-(* Aglobal *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)).
-  replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)).
-  right. apply Int.no_overlap_sound; auto. 
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Abased *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; simpl in *; try discriminate.
-  destruct v; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3).
-  right. apply Int.no_overlap_sound; auto. 
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Ainstack *)
-  destruct sp; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-Qed.
-
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment
@@ -719,6 +667,8 @@ Ltac InvInject :=
       inv H; InvInject
   | [ H: val_inject _ (Vfloat _) _ |- _ ] =>
       inv H; InvInject
+  | [ H: val_inject _ (Vsingle _) _ |- _ ] =>
+      inv H; InvInject
   | [ H: val_inject _ (Vptr _ _) _ |- _ ] =>
       inv H; InvInject
   | [ H: val_list_inject _ nil _ |- _ ] =>
@@ -806,7 +756,14 @@ Proof.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index b90b9f21..e3f07244 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -395,11 +395,11 @@ let print_builtin_vload_common oc chunk base offset res =
       fprintf oc "	lhz	%a, %a(%a)\n" ireg res constant offset ireg base
   | Mint16signed, IR res ->
       fprintf oc "	lha	%a, %a(%a)\n" ireg res constant offset ireg base
-  | Mint32, IR res ->
+  | (Mint32 | Many32), IR res ->
       fprintf oc "	lwz	%a, %a(%a)\n" ireg res constant offset ireg base
   | Mfloat32, FR res ->
       fprintf oc "	lfs	%a, %a(%a)\n" freg res constant offset ireg base
-  | Mfloat64, FR res ->
+  | (Mfloat64 | Many64), FR res ->
       fprintf oc "	lfd	%a, %a(%a)\n" freg res constant offset ireg base
   (* Mint64 is special-cased below *)
   | _ ->
@@ -451,12 +451,11 @@ let print_builtin_vstore_common oc chunk base offset src =
       fprintf oc "	stb	%a, %a(%a)\n" ireg src constant offset ireg base
   | (Mint16signed | Mint16unsigned), IR src ->
       fprintf oc "	sth	%a, %a(%a)\n" ireg src constant offset ireg base
-  | Mint32, IR src ->
+  | (Mint32 | Many32), IR src ->
       fprintf oc "	stw	%a, %a(%a)\n" ireg src constant offset ireg base
   | Mfloat32, FR src ->
-      fprintf oc "	frsp	%a, %a\n" freg FPR13 freg src;
-      fprintf oc "	stfs	%a, %a(%a)\n" freg FPR13 constant offset ireg base
-  | Mfloat64, FR src ->
+      fprintf oc "	stfs	%a, %a(%a)\n" freg src constant offset ireg base
+  | (Mfloat64 | Many64), FR src ->
       fprintf oc "	stfd	%a, %a(%a)\n" freg src constant offset ireg base
   (* Mint64 is special-cased below *)
   | _ ->
@@ -512,11 +511,11 @@ let align n a = (n + a - 1) land (-a)
 let rec next_arg_locations ir fr ofs = function
   | [] ->
       (ir, fr, ofs)
-  | Tint :: l ->
+  | (Tint | Tany32) :: l ->
       if ir < 8
       then next_arg_locations (ir + 1) fr ofs l
       else next_arg_locations ir fr (ofs + 4) l
-  | (Tfloat | Tsingle) :: l ->
+  | (Tfloat | Tsingle | Tany64) :: l ->
       if fr < 8
       then next_arg_locations ir (fr + 1) ofs l
       else next_arg_locations ir fr (align ofs 8 + 8) l
@@ -676,6 +675,7 @@ let short_cond_branch tbl pc lbl_dest =
 (* Printing of instructions *)
 
 let float_literals : (int * int64) list ref = ref []
+let float32_literals : (int * int32) list ref = ref []
 let jumptables : (int * label list) list ref = ref []
 
 let print_instruction oc tbl pc fallthrough = function
@@ -804,10 +804,12 @@ let print_instruction oc tbl pc fallthrough = function
         fprintf oc "	addi	%a, %a, %ld\n" ireg GPR1 ireg GPR1 sz
       else
         fprintf oc "	lwz	%a, %ld(%a)\n" ireg GPR1 ofs ireg GPR1
-  | Pfabs(r1, r2) ->
+  | Pfabs(r1, r2) | Pfabss(r1, r2) ->
       fprintf oc "	fabs	%a, %a\n" freg r1 freg r2
   | Pfadd(r1, r2, r3) ->
       fprintf oc "	fadd	%a, %a, %a\n" freg r1 freg r2 freg r3
+  | Pfadds(r1, r2, r3) ->
+      fprintf oc "	fadds	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Pfcmpu(r1, r2) ->
       fprintf oc "	fcmpu	%a, %a, %a\n" creg 0 freg r1 freg r2
   | Pfcti(r1, r2) ->
@@ -821,6 +823,8 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "%s end pseudoinstr fcti\n" comment
   | Pfdiv(r1, r2, r3) ->
       fprintf oc "	fdiv	%a, %a, %a\n" freg r1 freg r2 freg r3
+  | Pfdivs(r1, r2, r3) ->
+      fprintf oc "	fdivs	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Pfmake(rd, r1, r2) ->
       fprintf oc "%s begin pseudoinstr %a = fmake(%a, %a)\n"
               comment freg rd ireg r1 ireg r2;
@@ -835,25 +839,37 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	fmr	%a, %a\n" freg r1 freg r2
   | Pfmul(r1, r2, r3) ->
       fprintf oc "	fmul	%a, %a, %a\n" freg r1 freg r2 freg r3
-  | Pfneg(r1, r2) ->
+  | Pfmuls(r1, r2, r3) ->
+      fprintf oc "	fmuls	%a, %a, %a\n" freg r1 freg r2 freg r3
+  | Pfneg(r1, r2) | Pfnegs(r1, r2) ->
       fprintf oc "	fneg	%a, %a\n" freg r1 freg r2
   | Pfrsp(r1, r2) ->
       fprintf oc "	frsp	%a, %a\n" freg r1 freg r2
+  | Pfxdp(r1, r2) ->
+      if r1 <> r2 then
+        fprintf oc "	fmr	%a, %a\n" freg r1 freg r2
   | Pfsub(r1, r2, r3) ->
       fprintf oc "	fsub	%a, %a, %a\n" freg r1 freg r2 freg r3
+  | Pfsubs(r1, r2, r3) ->
+      fprintf oc "	fsubs	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Plbz(r1, c, r2) ->
       fprintf oc "	lbz	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plbzx(r1, r2, r3) ->
       fprintf oc "	lbzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-  | Plfd(r1, c, r2) ->
+  | Plfd(r1, c, r2)  | Plfd_a(r1, c, r2) ->
       fprintf oc "	lfd	%a, %a(%a)\n" freg r1 constant c ireg r2
-  | Plfdx(r1, r2, r3) ->
+  | Plfdx(r1, r2, r3) | Plfdx_a(r1, r2, r3) ->
       fprintf oc "	lfdx	%a, %a, %a\n" freg r1 ireg r2 ireg r3
   | Plfi(r1, c) ->
       let lbl = new_label() in
       fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
       fprintf oc "	lfd	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat c);
-      float_literals := (lbl, camlint64_of_coqint (Floats.Float.bits_of_double c)) :: !float_literals;
+      float_literals := (lbl, camlint64_of_coqint (Floats.Float.to_bits c)) :: !float_literals;
+  | Plfis(r1, c) ->
+      let lbl = new_label() in
+      fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
+      fprintf oc "	lfs	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat32 c);
+      float32_literals := (lbl, camlint_of_coqint (Floats.Float32.to_bits c)) :: !float32_literals;
   | Plfs(r1, c, r2) ->
       fprintf oc "	lfs	%a, %a(%a)\n" freg r1 constant c ireg r2
   | Plfsx(r1, r2, r3) ->
@@ -866,9 +882,9 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	lhz	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plhzx(r1, r2, r3) ->
       fprintf oc "	lhzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-  | Plwz(r1, c, r2) ->
+  | Plwz(r1, c, r2) | Plwz_a(r1, c, r2) ->
       fprintf oc "	lwz	%a, %a(%a)\n" ireg r1 constant c ireg r2
-  | Plwzx(r1, r2, r3) ->
+  | Plwzx(r1, r2, r3) | Plwzx_a(r1, r2, r3) ->
       fprintf oc "	lwzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pmfcrbit(r1, bit) ->
       fprintf oc "	mfcr	%a\n" ireg r1;
@@ -924,23 +940,21 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	stb	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Pstbx(r1, r2, r3) ->
       fprintf oc "	stbx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-  | Pstfd(r1, c, r2) ->
+  | Pstfd(r1, c, r2) | Pstfd_a(r1, c, r2) ->
       fprintf oc "	stfd	%a, %a(%a)\n" freg r1 constant c ireg r2
-  | Pstfdx(r1, r2, r3) ->
+  | Pstfdx(r1, r2, r3) | Pstfdx_a(r1, r2, r3) ->
       fprintf oc "	stfdx	%a, %a, %a\n" freg r1 ireg r2 ireg r3
   | Pstfs(r1, c, r2) ->
-      fprintf oc "	frsp	%a, %a\n" freg FPR13 freg r1;
-      fprintf oc "	stfs	%a, %a(%a)\n" freg FPR13 constant c ireg r2
+      fprintf oc "	stfs	%a, %a(%a)\n" freg r1 constant c ireg r2
   | Pstfsx(r1, r2, r3) ->
-      fprintf oc "	frsp	%a, %a\n" freg FPR13 freg r1;
-      fprintf oc "	stfsx	%a, %a, %a\n" freg FPR13 ireg r2 ireg r3
+      fprintf oc "	stfsx	%a, %a, %a\n" freg r1 ireg r2 ireg r3
   | Psth(r1, c, r2) ->
       fprintf oc "	sth	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Psthx(r1, r2, r3) ->
       fprintf oc "	sthx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
-  | Pstw(r1, c, r2) ->
+  | Pstw(r1, c, r2) | Pstw_a(r1, c, r2) ->
       fprintf oc "	stw	%a, %a(%a)\n" ireg r1 constant c ireg r2
-  | Pstwx(r1, r2, r3) ->
+  | Pstwx(r1, r2, r3) | Pstwx_a(r1, r2, r3) ->
       fprintf oc "	stwx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Psubfc(r1, r2, r3) ->
       fprintf oc "	subfc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
@@ -1061,11 +1075,14 @@ let rec print_instructions oc tbl pc fallthrough = function
 
 (* Print the code for a function *)
 
-let print_literal oc (lbl, n) =
+let print_literal64 oc (lbl, n) =
   let nlo = Int64.to_int32 n
   and nhi = Int64.to_int32(Int64.shift_right_logical n 32) in
   fprintf oc "%a:	.long	0x%lx, 0x%lx\n" label lbl nhi nlo
 
+let print_literal32 oc (lbl, n) =
+  fprintf oc "%a:	.long	0x%lx\n" label lbl n
+
 let print_jumptable oc (lbl, tbl) =
   fprintf oc "%a:" label lbl;
   List.iter
@@ -1075,6 +1092,7 @@ let print_jumptable oc (lbl, tbl) =
 let print_function oc name fn =
   Hashtbl.clear current_function_labels;
   float_literals := [];
+  float32_literals := [];
   jumptables := [];
   current_function_sig := fn.fn_sig;
   let (text, lit, jmptbl) =
@@ -1095,11 +1113,12 @@ let print_function oc name fn =
   cfi_endproc oc;
   fprintf oc "	.type	%a, @function\n" symbol name;
   fprintf oc "	.size	%a, . - %a\n" symbol name symbol name;
-  if !float_literals <> [] then begin
+  if !float_literals <> [] || !float32_literals <> [] then begin
     section oc lit;
     fprintf oc "	.balign 8\n";
-    List.iter (print_literal oc) !float_literals;
-    float_literals := []
+    List.iter (print_literal64 oc) !float_literals;
+    List.iter (print_literal32 oc) !float32_literals;
+    float_literals := []; float32_literals := []
   end;
   if !jumptables <> [] then begin
     section oc jmptbl;
@@ -1131,10 +1150,10 @@ let print_init oc = function
                  (Int64.logand b 0xFFFFFFFFL)
   | Init_float32 n ->
       fprintf oc "	.long	0x%lx %s %.18g\n"
-                 (camlint_of_coqint (Floats.Float.bits_of_single n))
+                 (camlint_of_coqint (Floats.Float32.to_bits n))
                  comment (camlfloat_of_coqfloat n)
   | Init_float64 n ->
-      let b = camlint64_of_coqint (Floats.Float.bits_of_double n) in
+      let b = camlint64_of_coqint (Floats.Float.to_bits n) in
       fprintf oc "	.long	0x%Lx, 0x%Lx %s %.18g\n"
                  (Int64.shift_right_logical b 32)
                  (Int64.logand b 0xFFFFFFFFL)
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 371a08a4..70b1feb6 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -374,6 +374,12 @@ Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
 (** ** Comparisons *)
 
 Nondetfunction compimm (default: comparison -> int -> condition)
@@ -433,6 +439,10 @@ Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
 Definition compf (c: comparison) (e1: expr) (e2: expr) :=
   Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
 
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompf c)) (Eop Ofloatofsingle (e1 ::: Enil) :::
+                         Eop Ofloatofsingle (e2 ::: Enil) ::: Enil).
+
 (** ** Integer conversions *)
 
 Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e.
@@ -457,7 +467,7 @@ Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 
 Definition intuoffloat (e: expr) :=
   Elet e
-  (Elet (Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil)
+  (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
     (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
       (intoffloat (Eletvar 1))
       (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
@@ -465,7 +475,7 @@ Definition intuoffloat (e: expr) :=
 Nondetfunction floatofintu (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil =>
-      Eop (Ofloatconst (Float.floatofintu n)) Enil
+      Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
       subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: e ::: Enil))
            (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Int.zero)) Enil)
@@ -474,14 +484,27 @@ Nondetfunction floatofintu (e: expr) :=
 Nondetfunction floatofint (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil =>
-      Eop (Ofloatconst (Float.floatofint n)) Enil
+      Eop (Ofloatconst (Float.of_int n)) Enil
   | _ =>
       subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil
                                ::: addimm Float.ox8000_0000 e ::: Enil))
            (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Float.ox8000_0000)) Enil)
   end.
 
+Definition intofsingle (e: expr) :=
+  intoffloat (Eop Ofloatofsingle (e ::: Enil)).
+
+Definition singleofint (e: expr) :=
+  Eop Osingleoffloat (floatofint e ::: Enil).
+
+Definition intuofsingle (e: expr) :=
+  intuoffloat (Eop Ofloatofsingle (e ::: Enil)).
+
+Definition singleofintu (e: expr) :=
+  Eop Osingleoffloat (floatofintu e ::: Enil).
+
 Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
 
 (** ** Recognition of addressing modes for load and store operations *)
 
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index cb48d51a..8311b82c 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -639,6 +639,31 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+ 
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
@@ -746,6 +771,18 @@ Proof.
   intros; red; intros. unfold compf. TrivialExists.
 Qed.
 
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs.
+  replace (Val.cmpfs c x y) with
+          (Val.cmpf c (Val.floatofsingle x) (Val.floatofsingle y)).
+  TrivialExists. constructor. EvalOp. simpl; reflexivity. 
+  constructor. EvalOp. simpl; reflexivity. constructor.
+  auto. 
+  destruct x; auto. destruct y; auto. unfold Val.cmpf, Val.cmpfs; simpl. 
+  rewrite Float32.cmp_double. auto. 
+Qed.
 
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
@@ -778,6 +815,11 @@ Proof.
   red; intros. unfold singleoffloat. TrivialExists.
 Qed.
 
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
 Theorem eval_intoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -794,10 +836,10 @@ Theorem eval_intuoffloat:
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros. destruct x; simpl in H0; try discriminate.
-  destruct (Float.intuoffloat f) as [n|] eqn:?; simpl in H0; inv H0.
+  destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
   set (im := Int.repr Int.half_modulus).
-  set (fm := Float.floatofintu im).
+  set (fm := Float.of_intu im).
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
     constructor. auto.
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)).
@@ -807,9 +849,9 @@ Proof.
   eapply eval_Econdition with (va := Float.cmp Clt f fm).
   eauto with evalexpr.
   destruct (Float.cmp Clt f fm) eqn:?.
-  exploit Float.intuoffloat_intoffloat_1; eauto. intro EQ.
+  exploit Float.to_intu_to_int_1; eauto. intro EQ.
   EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.intuoffloat_intoffloat_2; eauto. 
+  exploit Float.to_intu_to_int_2; eauto. 
   change Float.ox8000_0000 with im. fold fm. intro EQ.
   set (t2 := subf (Eletvar (S O)) (Eletvar O)).
   set (t3 := intoffloat t2).
@@ -834,7 +876,7 @@ Proof.
   intros until y. unfold floatofint. destruct (floatofint_match a); intros.
   InvEval. TrivialExists. 
   rename e0 into a. destruct x; simpl in H0; inv H0.
-  exists (Vfloat (Float.floatofint i)); split; auto.
+  exists (Vfloat (Float.of_int i)); split; auto.
   set (t1 := addimm Float.ox8000_0000 a).
   set (t2 := Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: t1 ::: Enil)).
   set (t3 := Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Float.ox8000_0000)) Enil).
@@ -844,7 +886,7 @@ Proof.
   unfold t2. EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. 
   unfold eval_operation. eauto. 
   instantiate (2 := t3). unfold t3. EvalOp. simpl; eauto.  
-  intros [v2 [A2 B2]]. simpl in B2. inv B2. rewrite Float.floatofint_from_words. auto.
+  intros [v2 [A2 B2]]. simpl in B2. inv B2. rewrite Float.of_int_from_words. auto.
 Qed.
 
 Theorem eval_floatofintu:
@@ -856,7 +898,7 @@ Proof.
   intros until y. unfold floatofintu. destruct (floatofintu_match a); intros.
   InvEval. TrivialExists.
   rename e0 into a. destruct x; simpl in H0; inv H0.
-  exists (Vfloat (Float.floatofintu i)); split; auto.
+  exists (Vfloat (Float.of_intu i)); split; auto.
   unfold floatofintu.
   set (t2 := Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: a ::: Enil)).
   set (t3 := Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Int.zero)) Enil).
@@ -864,7 +906,73 @@ Proof.
   unfold t2. EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. 
   unfold eval_operation. eauto. 
   instantiate (2 := t3). unfold t3. EvalOp. simpl; eauto.
-  intros [v2 [A2 B2]]. simpl in B2. inv B2. rewrite Float.floatofintu_from_words. auto.
+  intros [v2 [A2 B2]]. simpl in B2. inv B2. rewrite Float.of_intu_from_words. auto.
+Qed.
+
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intofsingle. 
+  assert (Val.intoffloat (Val.floatofsingle x) = Some y).
+  { destruct x; simpl in H0; try discriminate.
+    destruct (Float32.to_int f) eqn:F; inv H0. 
+    apply Float32.to_int_double in F. 
+    simpl. unfold Float32.to_double in F; rewrite F; auto.  
+  }
+  apply eval_intoffloat with (Val.floatofsingle x); auto. EvalOp. 
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros. unfold singleofint.
+  assert (exists z, Val.floatofint x = Some z /\ y = Val.singleoffloat z).
+  {
+    destruct x; inv H0. simpl. exists (Vfloat (Float.of_int i)); simpl; split; auto.
+    f_equal. apply Float32.of_int_double. 
+  }
+  destruct H1 as (z & A & B). subst y. 
+  exploit eval_floatofint; eauto. intros (v & C & D). 
+  exists (Val.singleoffloat v); split. EvalOp. inv D; auto. 
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intuofsingle. 
+  assert (Val.intuoffloat (Val.floatofsingle x) = Some y).
+  { destruct x; simpl in H0; try discriminate.
+    destruct (Float32.to_intu f) eqn:F; inv H0. 
+    apply Float32.to_intu_double in F. 
+    simpl. unfold Float32.to_double in F; rewrite F; auto.  
+  }
+  apply eval_intuoffloat with (Val.floatofsingle x); auto. EvalOp. 
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros. unfold singleofintu.
+  assert (exists z, Val.floatofintu x = Some z /\ y = Val.singleoffloat z).
+  {
+    destruct x; inv H0. simpl. exists (Vfloat (Float.of_intu i)); simpl; split; auto.
+    f_equal. apply Float32.of_intu_double. 
+  }
+  destruct H1 as (z & A & B). subst y. 
+  exploit eval_floatofintu; eauto. intros (v & C & D). 
+  exists (Val.singleoffloat v); split. EvalOp. inv D; auto. 
 Qed.
 
 Theorem eval_addressing:
diff --git a/powerpc/Unusedglob1.ml b/powerpc/Unusedglob1.ml
index 49c0774e..2d3efe39 100644
--- a/powerpc/Unusedglob1.ml
+++ b/powerpc/Unusedglob1.ml
@@ -42,18 +42,22 @@ let referenced_instr = function
   | Pcmpwi(_, c)
   | Plbz(_, c, _)
   | Plfd(_, c, _)
+  | Plfd_a(_, c, _)
   | Plfs(_, c, _)
   | Plha(_, c, _)
   | Plhz(_, c, _)
   | Plwz(_, c, _)
+  | Plwz_a(_, c, _)
   | Pmulli(_, _, c)
   | Pori(_, _, c)
   | Poris(_, _, c)
   | Pstb(_, c, _)
   | Pstfd(_, c, _)
+  | Pstfd_a(_, c, _)
   | Pstfs(_, c, _)
   | Psth(_, c, _) 
   | Pstw(_, c, _) 
+  | Pstw_a(_, c, _) 
   | Psubfic(_, _, c)
   | Pxori(_, _, c)
   | Pxoris(_, _, c) -> referenced_constant c
diff --git a/powerpc/ValueAOp.v b/powerpc/ValueAOp.v
index 77463f4b..a5a1db80 100644
--- a/powerpc/ValueAOp.v
+++ b/powerpc/ValueAOp.v
@@ -52,6 +52,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omove, v1::nil => v1
   | Ointconst n, nil => I n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ftop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ftop
   | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
   | Oaddrstack ofs, nil => Ptr (Stk ofs)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
@@ -92,7 +93,14 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubf, v1::v2::nil => subf v1 v2
   | Omulf, v1::v2::nil => mulf v1 v2
   | Odivf, v1::v2::nil => divf v1 v2
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
   | Ofloatofwords, v1::v2::nil => floatofwords v1 v2
   | Omakelong, v1::v2::nil => longofwords v1 v2
@@ -163,6 +171,7 @@ Proof.
   unfold eval_operation, eval_static_operation; intros;
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
   rewrite Int.add_zero_l; eauto with va.
   fold (Val.sub (Vint i) a1). auto with va.
   apply floatofwords_sound; auto. 
diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v
index 2db1f737..866e73d6 100644
--- a/powerpc/eabi/Conventions1.v
+++ b/powerpc/eabi/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -180,13 +181,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -239,12 +240,21 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R3 :: nil
-  | Some Tint => R3 :: nil
-  | Some (Tfloat | Tsingle) => F1 :: nil
+  | Some (Tint | Tany32) => R3 :: nil
+  | Some (Tfloat | Tsingle | Tany64) => F1 :: nil
   | Some Tlong => R3 :: R4 :: nil
   end.
 
-(** The result location is a caller-save register *)
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
+Qed.
+
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
   forall (s: signature) (r: mreg), 
@@ -278,18 +288,18 @@ Fixpoint loc_arguments_rec
     (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
+  | (Tint | Tany32) as ty :: tys =>
       match list_nth_z int_param_regs ir with
       | None =>
-          S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1)
+          S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 1)
       | Some ireg =>
           R ireg :: loc_arguments_rec tys (ir + 1) fr ofs
       end
-  | (Tfloat | Tsingle) :: tys =>
+  | (Tfloat | Tsingle | Tany64) as ty :: tys =>
       match list_nth_z float_param_regs fr with
       | None =>
           let ofs := align ofs 2 in
-          S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2)
+          S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 2)
       | Some freg =>
           R freg :: loc_arguments_rec tys ir (fr + 1) ofs
       end
@@ -316,12 +326,12 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys =>
+  | (Tint | Tany32) :: tys =>
       match list_nth_z int_param_regs ir with
       | None => size_arguments_rec tys ir fr (ofs + 1)
       | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
       end
-  | (Tfloat | Tsingle) :: tys =>
+  | (Tfloat | Tsingle | Tany64) :: tys =>
       match list_nth_z float_param_regs fr with
       | None => size_arguments_rec tys ir fr (align ofs 2 + 2)
       | Some freg => size_arguments_rec tys ir (fr + 1) ofs
@@ -393,7 +403,7 @@ Opaque list_nth_z.
   destruct H. subst. split. omega. congruence.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
   assert (ofs <= align ofs 2) by (apply align_le; omega). 
-  destruct H. subst. split. omega. congruence. 
+  destruct H. subst. split. omega. congruence.
   destruct H. subst. split. omega. congruence.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. 
 - (* single *)
@@ -404,6 +414,20 @@ Opaque list_nth_z.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto.
   assert (ofs <= align ofs 2) by (apply align_le; omega). 
   intuition omega.
+- (* any32 *)
+  destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H.
+  subst. left. eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  subst. split. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* any64 *)
+  destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. 
+  subst. right. eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  subst. split. apply Zle_ge. apply align_le. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto.
+  assert (ofs <= align ofs 2) by (apply align_le; omega). 
+  intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -441,6 +465,10 @@ Proof.
   destruct (list_nth_z float_param_regs fr); eauto.
   apply Zle_trans with (align ofs0 2). apply align_le; omega.
   apply Zle_trans with (align ofs0 2 + 2); auto; omega.
+  destruct (list_nth_z int_param_regs ir); eauto. apply Zle_trans with (ofs0 + 1); auto; omega.
+  destruct (list_nth_z float_param_regs fr); eauto.
+  apply Zle_trans with (align ofs0 2). apply align_le; omega.
+  apply Zle_trans with (align ofs0 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -490,6 +518,18 @@ Proof.
   transitivity (align ofs0 2 + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
   eauto.
 - (* single *)
+  destruct (list_nth_z float_param_regs fr); destruct H0. 
+  congruence.
+  eauto.
+  inv H0. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
+  eauto.
+- (* any32 *)
+  destruct (list_nth_z int_param_regs ir); destruct H0. 
+  congruence.
+  eauto.
+  inv H0. apply size_arguments_rec_above.
+  eauto.
+- (* any64 *)
   destruct (list_nth_z float_param_regs fr); destruct H0. 
   congruence.
   eauto.
diff --git a/test/c/fftsp.c b/test/c/fftsp.c
index 42ae905e..3c7c23c3 100644
--- a/test/c/fftsp.c
+++ b/test/c/fftsp.c
@@ -52,17 +52,17 @@ int dfft(float x[], float y[], int np)
   for (k = 1;  k <= m-1; k++ ) {
     n2 = n2 / 2; 
     n4 = n2 / 4; 
-    e  = tpi / (double)n2; 
+    e  = tpi / (float)n2; 
     a  = 0.0;
     
     for (j = 1; j<= n4 ; j++) {
-      a3 = 3.0 * a; 
+      a3 = 3.0f * a; 
       cc1 = cosf(a); 
       ss1 = sinf(a);
       
       cc3 = cosf(a3); 
       ss3 = sinf(a3); 
-      a = e * (double)j; 
+      a = e * (float)j; 
       is = j; 
       id = 2 * n2;
 	  
@@ -162,7 +162,7 @@ int main(int argc, char ** argv)
   xi = calloc(np, sizeof(float));
   pxr = xr;
   pxi = xi;
-  *pxr = (enp - 1.0) * 0.5;
+  *pxr = (enp - 1.0) * 0.5f;
   *pxi = 0.0;
   n2 = np / 2;  
   *(pxr+n2) = -0.5;
@@ -171,8 +171,8 @@ int main(int argc, char ** argv)
     j = np - i;
     *(pxr+i) = -0.5;
     *(pxr+j) = -0.5;
-    z = t * (double)i;  
-    y = -0.5*(cosf(z)/sinf(z));
+    z = t * (float)i;  
+    y = -0.5f*(cosf(z)/sinf(z));
     *(pxi+i) =  y;
     *(pxi+j) = -y;
   }
diff --git a/test/regression/NaNs.c b/test/regression/NaNs.c
index 45f1e7fb..618ce295 100644
--- a/test/regression/NaNs.c
+++ b/test/regression/NaNs.c
@@ -32,17 +32,17 @@ inline float single_of_bits(u32 i)
   u.i = i; return u.f;
 }
 
-volatile double val[8];
-
 char * valname[8] = {
   "+0", "-0", "+inf", "-inf",
   "snan(5)", "qnan(6)", "snan(-9)", "qnan(-1)"
 };
 
-int main()
+void test64(void)
 {
+  volatile double val[8];
   int i, j;
 
+  printf("--- Double precision\n");
   val[0] = 0.0;
   val[1] = - val[0];
   val[2] = double_of_bits(0x7FF0000000000000);
@@ -56,10 +56,7 @@ int main()
   for (i = 0; i < 8; i++) {
     printf("opp(%s) = 0x%016llx\n", valname[i], bits_of_double(- val[i]));
     printf("single(%s) = 0x%08x\n", valname[i], bits_of_single((float)(val[i])));
-#if 0
-    /* The reference interpreter doesn't support __builtin_fabs */
     printf("abs(%s) = 0x%016llx\n", valname[i], bits_of_double(__builtin_fabs(val[i])));
-#endif
     for (j = 0; j < 8; j++) {
       printf("%s + %s = 0x%016llx\n",
              valname[i], valname[j], bits_of_double(val[i] + val[j]));
@@ -71,5 +68,44 @@ int main()
              valname[i], valname[j], bits_of_double(val[i] / val[j]));
     }
   }
+}
+
+void test32(void)
+{
+  volatile float val[8];
+  int i, j;
+
+  printf("--- Single precision\n");
+  val[0] = 0.0f;
+  val[1] = - val[0];
+  val[2] = single_of_bits(0x7F800000);
+  val[3] = - val[2];
+
+  val[4] = single_of_bits(0x7F800005);
+  val[5] = single_of_bits(0x7FC00006);
+  val[6] = single_of_bits(0xFF800009);
+  val[7] = single_of_bits(0xFFC00001);
+
+  for (i = 0; i < 8; i++) {
+    printf("opp(%s) = 0x%08x\n", valname[i], bits_of_single(- val[i]));
+    printf("double(%s) = 0x%016llx\n", valname[i], bits_of_double((double)(val[i])));
+    printf("abs(%s) = 0x%08x\n", valname[i], bits_of_single(__builtin_fabs(val[i])));
+    for (j = 0; j < 8; j++) {
+      printf("%s + %s = 0x%08x\n",
+             valname[i], valname[j], bits_of_single(val[i] + val[j]));
+      printf("%s - %s = 0x%08x\n",
+             valname[i], valname[j], bits_of_single(val[i] - val[j]));
+      printf("%s * %s = 0x%08x\n",
+             valname[i], valname[j], bits_of_single(val[i] * val[j]));
+      printf("%s / %s = 0x%08x\n",
+             valname[i], valname[j], bits_of_single(val[i] / val[j]));
+    }
+  }
+}
+
+int main(void)
+{
+  test64();
+  test32();
   return 0;
 }
diff --git a/test/spass/Makefile.bak b/test/spass/Makefile.bak
deleted file mode 100644
index 320f6221..00000000
--- a/test/spass/Makefile.bak
+++ /dev/null
@@ -1,13 +0,0 @@
-LEVEL = ../../..
-PROG = SPASS
-
-CPPFLAGS = -DCLOCK_NO_TIMING -fno-strict-aliasing -w
-LDFLAGS  = -lm
-
-ifdef SMALL_PROBLEM_SIZE
-RUN_OPTIONS="$(PROJ_SRC_DIR)/small_problem.dfg"
-else
-RUN_OPTIONS="$(PROJ_SRC_DIR)/problem.dfg"
-endif
-
-include ../../Makefile.multisrc
-- 
cgit