Merge branch 'master' into merge_master_8.13.1

PARTIAL MERGE (PARTLY BROKEN). See unsolved conflicts in: aarch64/TO_MERGE and riscV/TO_MERGE WARNING: interface of va_args and assembly sections have changed
author: Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> 2021-03-23 19:12:19 +0100
committer: Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> 2021-03-23 19:12:19 +0100
commit: dcb523736e82d72b03fa8d055bf74472dba7345c (patch)
tree: 71e797c92d45dca509527043d233c51b2ed8fc86 /aarch64
parent: 3e953ef41f736ed5b7db699b1adf21d46cb5b8db (diff)
parent: 6bf310dd678285dc193798e89fc2c441d8430892 (diff)
download: compcert-kvx-dcb523736e82d72b03fa8d055bf74472dba7345c.tar.gz
compcert-kvx-dcb523736e82d72b03fa8d055bf74472dba7345c.zip
17 files changed, 3156 insertions, 216 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 067d32fb..e5111220 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -1398,7 +1398,7 @@ Ltac Equalities :=
   split. auto. intros. destruct B; auto. subst. auto.
 - (* trace length *)
   red; intros. inv H; simpl.
-  omega.
+  lia.
   eapply external_call_trace_length; eauto.
   eapply external_call_trace_length; eauto.
 - (* initial states *)
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 8187e077..6863b967 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -47,17 +47,28 @@ let expand_storeptr (src: ireg) (base: iregsp) ofs =
 (* Determine the number of int registers, FP registers, and stack locations
    used to pass the fixed parameters. *)
 
+let align n a = (n + a - 1) land (-a)
+
+let typesize = function
+  | Tint | Tany32 | Tsingle -> 4
+  | Tlong | Tany64 | Tfloat -> 8
+
+let reserve_stack stk ty =
+  match Archi.abi with
+  | Archi.AAPCS64 -> stk + 8
+  | Archi.Apple -> align stk (typesize ty) + typesize ty
+
 let rec next_arg_locations ir fr stk = function
   | [] ->
       (ir, fr, stk)
-  | (Tint | Tlong | Tany32 | Tany64) :: l ->
+  | (Tint | Tlong | Tany32 | Tany64 as ty) :: l ->
       if ir < 8
       then next_arg_locations (ir + 1) fr stk l
-      else next_arg_locations ir fr (stk + 8) l
-  | (Tfloat | Tsingle) :: l ->
+      else next_arg_locations ir fr (reserve_stack stk ty) l
+  | (Tfloat | Tsingle as ty) :: l ->
       if fr < 8
       then next_arg_locations ir (fr + 1) stk l
-      else next_arg_locations ir fr (stk + 8) l
+      else next_arg_locations ir fr (reserve_stack stk ty) l
 
 (* Allocate memory on the stack and use it to save the registers
    used for parameter passing.  As an optimization, do not save
@@ -86,6 +97,8 @@ let save_parameter_registers ir fr =
     emit (Pstrd(float_param_regs.(i), ADimm(XSP, Z.of_uint pos)))
   done
 
+let current_function_stacksize = ref 0L
+
 (* Initialize a va_list as per va_start.
    Register r points to the following struct:
 
@@ -98,11 +111,7 @@ let save_parameter_registers ir fr =
    }
 *)
 
-let current_function_stacksize = ref 0L
-
-let expand_builtin_va_start r =
-  if not (is_current_function_variadic ()) then
-    invalid_arg "Fatal error: va_start used in non-vararg function";
+let expand_builtin_va_start_aapcs64 r =
   let (ir, fr, stk) =
     next_arg_locations 0 0 0 (get_current_function_args ()) in
   let stack_ofs = Int64.(add !current_function_stacksize (of_int stk))
@@ -127,6 +136,25 @@ let expand_builtin_va_start r =
   expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int vr_offs));
   emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 28L)))
 
+(* In macOS, va_list is just a pointer (char * ) and all variadic arguments
+   are passed on the stack. *)
+
+let expand_builtin_va_start_apple r =
+  let (ir, fr, stk) =
+    next_arg_locations 0 0 0 (get_current_function_args ()) in
+  let stk = align stk 8 in
+  let stack_ofs = Int64.(add !current_function_stacksize (of_int stk)) in
+  (* *va = sp + stack_ofs *)
+  expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 stack_ofs);
+  emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 0L)))
+
+let expand_builtin_va_start r =
+  if not (is_current_function_variadic ()) then
+    invalid_arg "Fatal error: va_start used in non-vararg function";
+  match Archi.abi with
+  | Archi.AAPCS64 -> expand_builtin_va_start_aapcs64 r
+  | Archi.Apple   -> expand_builtin_va_start_apple r
+
 (* Handling of annotations *)
 
 let expand_annot_val kind txt targ args res =
@@ -382,7 +410,7 @@ let expand_instruction instr =
   match instr with
   | Pallocframe (sz, ofs) ->
       emit (Pmov (RR1 X29, XSP));
-      if is_current_function_variadic() then begin
+      if is_current_function_variadic() && Archi.abi = Archi.AAPCS64 then begin
         let (ir, fr, _) =
           next_arg_locations 0 0 0 (get_current_function_args ()) in
         save_parameter_registers ir fr;
diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml
index e2a9c87a..4cfb7edf 100644
--- a/aarch64/CBuiltins.ml
+++ b/aarch64/CBuiltins.ml
@@ -17,16 +17,28 @@
 
 open C
 
-(* va_list is a struct of size 32 and alignment 8, passed by reference *)
+(* AAPCS64:
+     va_list is a struct of size 32 and alignment 8, passed by reference
+   Apple:
+     va_list is a pointer (size 8, alignment 8), passed by reference *)
 
-let va_list_type = TArray(TInt(IULong, []), Some 4L, [])
-let size_va_list = 32
-let va_list_scalar = false
+let (va_list_type, size_va_list, va_list_scalar) =
+  match Archi.abi with
+  | Archi.AAPCS64 -> (TArray(TInt(IULong, []), Some 4L, []), 32, false)
+  | Archi.Apple   -> (TPtr(TVoid [], []), 8, true)
+
+(* Some macOS headers use the GCC built-in types "__int128_t" and
+   "__uint128_t" unconditionally.  Provide a dummy definition. *)
+
+let int128_type = TArray(TInt(IULong, []), Some 2L, [])
 
 let builtins = {
-  builtin_typedefs = [
-    "__builtin_va_list", va_list_type
-  ];
+  builtin_typedefs =
+  [ "__builtin_va_list", va_list_type ] @
+  (if Configuration.system = "macos" then
+  [ "__int128_t", int128_type;
+    "__uint128_t", int128_type ]
+  else []);
   builtin_functions = [
     (* Synchronization *)
     "__builtin_fence",
diff --git a/aarch64/ConstpropOp.vp b/aarch64/ConstpropOp.vp
index c0a2c6bf..f2d17a51 100644
--- a/aarch64/ConstpropOp.vp
+++ b/aarch64/ConstpropOp.vp
@@ -13,11 +13,11 @@
 (** Strength reduction for operators and conditions.
     This is the machine-dependent part of [Constprop]. *)
 
-Require Archi.
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats.
 Require Import Op Registers.
 Require Import ValueDomain ValueAOp.
+Require SelectOp.
 
 (** * Converting known values to constants *)
 
@@ -375,7 +375,7 @@ Nondetfunction op_strength_reduction
 Nondetfunction addr_strength_reduction
                 (addr: addressing) (args: list reg) (vl: list aval) :=
   match addr, args, vl with
-  | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil =>
+  | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil ?? negb (SelectOp.symbol_is_relocatable symb) =>
       (Aglobal symb (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil)
   | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil =>
       (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil)
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index c777062c..24498aa4 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -414,7 +414,7 @@ Proof.
   Int.bit_solve. destruct (zlt i0 n0).
   replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)).
   rewrite Int.bits_zero. simpl. rewrite andb_true_r. auto.
-  rewrite <- EQ. rewrite Int.bits_zero_ext by omega. rewrite zlt_true by auto.
+  rewrite <- EQ. rewrite Int.bits_zero_ext by lia. rewrite zlt_true by auto.
   rewrite Int.bits_not by auto. apply negb_involutive.
   rewrite H6 by auto. auto.
   econstructor; split; eauto. auto.
diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v
index efda835d..f401458c 100644
--- a/aarch64/Conventions1.v
+++ b/aarch64/Conventions1.v
@@ -24,7 +24,12 @@ Require Archi.
 - Caller-save registers that can be modified during a function call.
 
   We follow the Procedure Call Standard for the ARM 64-bit Architecture
-  (AArch64) document: R19-R28 and F8-F15 are callee-save. *)
+  (AArch64) document: R19-R28 and F8-F15 are callee-save.
+
+  X16 is reserved as a temporary for asm generation.
+  X18 is reserved as the platform register.
+  X29 is reserved as the frame pointer register.
+  X30 is reserved as the return address register. *)
 
 Definition is_callee_save (r: mreg): bool :=
   match r with
@@ -154,9 +159,23 @@ Qed.
 (**
 - The first 8 integer arguments are passed in registers [R0...R7].
 - The first 8 FP arguments are passed in registers [F0...F7].
-- Extra arguments are passed on the stack, in [Outgoing] slots of size
-  64 bits (2 words), consecutively assigned, starting at word offset 0.
-**)
+- Extra arguments are passed on the stack, in [Outgoing] slots,
+  consecutively assigned, starting at word offset 0.
+
+In the standard AAPCS64, all stack slots are 8-byte wide (2 words).
+
+In the Apple variant, a stack slot has the size of the type of the
+corresponding argument, and is aligned accordingly.  We use 8-byte
+slots (2 words) for C types [long] and [double], and 4-byte slots
+(1 word) for C types [int] and [float].  For full conformance, we should
+use 1-byte slots for [char] types and 2-byte slots for [short] types,
+but this cannot be expressed in CompCert's type algebra, so we
+incorrectly use 4-byte slots.
+
+Concerning variable arguments to vararg functions:
+- In the AAPCS64 standard, they are passed like regular, fixed arguments.
+- In the Apple variant, they are always passed on stack, in 8-byte slots.
+*)
 
 Definition int_param_regs :=
   R0  :: R1  :: R2  :: R3  :: R4  :: R5  :: R6  :: R7 :: nil.
@@ -164,31 +183,70 @@ Definition int_param_regs :=
 Definition float_param_regs :=
   F0  :: F1  :: F2  :: F3  :: F4  :: F5  :: F6  :: F7 :: nil.
 
+Definition stack_arg (ty: typ) (ir fr ofs: Z)
+                     (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match Archi.abi with
+  | Archi.AAPCS64 =>
+      let ofs := align ofs 2 in
+      One (S Outgoing ofs ty) :: rec ir fr (ofs + 2)
+  | Archi.Apple =>
+      let ofs := align ofs (typesize ty) in
+      One (S Outgoing ofs ty) :: rec ir fr (ofs + typesize ty)
+  end.
+
+Definition int_arg (ty: typ) (ir fr ofs: Z)
+                   (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z int_param_regs ir with
+  | None =>
+      stack_arg ty ir fr ofs rec
+  | Some ireg =>
+      One (R ireg) :: rec (ir + 1) fr ofs
+  end.
+
+Definition float_arg (ty: typ) (ir fr ofs: Z)
+                     (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z float_param_regs fr with
+  | None =>
+      stack_arg ty ir fr ofs rec
+  | Some freg =>
+      One (R freg) :: rec ir (fr + 1) ofs
+  end.
+
+Fixpoint loc_arguments_stack (tyl: list typ) (ofs: Z) {struct tyl} : list (rpair loc) :=
+  match tyl with
+  | nil => nil
+  | ty :: tys => One (S Outgoing ofs Tany64) :: loc_arguments_stack tys (ofs + 2)
+  end.
+
 Fixpoint loc_arguments_rec
-     (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
+     (tyl: list typ) (fixed ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
   match tyl with
   | nil => nil
-  | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
-      match list_nth_z int_param_regs ir with
-      | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2)
-      | Some ireg =>
-          One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs
-      end
-  | (Tfloat | Tsingle) as ty :: tys =>
-      match list_nth_z float_param_regs fr with
-      | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2)
-      | Some freg =>
-          One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs
+  | ty :: tys =>
+      if zle fixed 0 then loc_arguments_stack tyl (align ofs 2) else
+      match ty with
+      | Tint | Tlong | Tany32 | Tany64 =>
+          int_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1))
+      | Tfloat | Tsingle =>
+          float_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1))
       end
   end.
 
+(** Number of fixed arguments for a function with signature [s].
+    For AAPCS64, all arguments are treated as fixed, even for a vararg
+    function. *)
+
+Definition fixed_arguments (s: signature) : Z :=
+  match Archi.abi, s.(sig_cc).(cc_vararg) with
+  | Archi.Apple, Some n => n
+  | _, _ => list_length_z s.(sig_args)
+  end.
+
 (** [loc_arguments s] returns the list of locations where to store arguments
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list (rpair loc) :=
-  loc_arguments_rec s.(sig_args) 0 0 0.
+  loc_arguments_rec s.(sig_args) (fixed_arguments s) 0 0 0.
 
 (** Argument locations are either caller-save registers or [Outgoing]
   stack slots at nonnegative offsets. *)
@@ -200,49 +258,73 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
   | _ => False
   end.
 
-Definition loc_argument_charact (ofs: Z) (l: loc) : Prop :=
-  match l with
-  | R r => In r int_param_regs \/ In r float_param_regs
-  | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
-  | _ => False
-  end.
-
-Remark loc_arguments_rec_charact:
-  forall tyl ir fr ofs p,
-  In p (loc_arguments_rec tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_charact ofs) p.
+Lemma loc_arguments_rec_charact:
+  forall tyl fixed ri rf ofs p,
+  ofs >= 0 ->
+  In p (loc_arguments_rec tyl fixed ri rf ofs) -> forall_rpair loc_argument_acceptable p.
 Proof.
-  assert (X: forall ofs1 ofs2 l, loc_argument_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_charact ofs1 l).
-  { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
-  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_charact ofs1) p).
-  { destruct p; simpl; intuition eauto. }
-  assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
-  { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
-Opaque list_nth_z.
-  induction tyl; simpl loc_arguments_rec; intros.
-- contradiction.
-- assert (A: forall ty, In p
-      match list_nth_z int_param_regs ir with
-      | Some ireg => One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_charact ofs) p).
-  { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1.
-    subst. left. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. omega. assumption.
-    eapply Y; eauto. omega. }
-  assert (B: forall ty, In p
-      match list_nth_z float_param_regs fr with
-      | Some ireg => One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_charact ofs) p).
-  { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1.
-    subst. right. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. omega. assumption.
-    eapply Y; eauto. omega. }
-  destruct a; eauto.
+  set (OK := fun (l: list (rpair loc)) =>
+             forall p, In p l -> forall_rpair loc_argument_acceptable p).
+  set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) =>
+              forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)).
+  assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (ALP: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0).
+  { intros. 
+    assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos).
+    lia. }
+  assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))).
+  { intros. apply Z.divide_trans with (typesize ty). apply typealign_typesize. apply align_divides. apply typesize_pos. }
+  assert (ALP2: forall ofs, ofs >= 0 -> align ofs 2 >= 0).
+  { intros. 
+    assert (ofs <= align ofs 2) by (apply align_le; lia).
+    lia. }
+  assert (ALD2: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs 2)).
+  { intros. eapply Z.divide_trans with 2.
+    exists (2 / typealign ty). destruct ty; reflexivity.
+    apply align_divides. lia. }
+  assert (STK: forall tyl ofs,
+               ofs >= 0 -> OK (loc_arguments_stack tyl ofs)).
+  { induction tyl as [ | ty tyl]; intros ofs OO; red; simpl; intros.
+  - contradiction.
+  - destruct H.
+    + subst p. split. auto. simpl. apply Z.divide_1_l.
+    + apply IHtyl with (ofs := ofs + 2). lia. auto.
+  }  
+  assert (A: forall ty ri rf ofs f,
+             OKF f -> ofs >= 0 -> OK (stack_arg ty ri rf ofs f)).
+  { intros until f; intros OF OO; red; unfold stack_arg; intros.
+    destruct Archi.abi; destruct H.
+  - subst p; simpl; auto.
+  - eapply OF; [|eauto]. apply ALP2 in OO. lia.
+  - subst p; simpl; auto.  
+  - eapply OF; [|eauto]. apply (ALP ofs ty) in OO. generalize (typesize_pos ty). lia.
+  }
+  assert (B: forall ty ri rf ofs f,
+             OKF f -> ofs >= 0 -> OK (int_arg ty ri rf ofs f)).
+  { intros until f; intros OF OO; red; unfold int_arg; intros.
+    destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; [destruct H|].
+  - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. 
+  - eapply OF; eauto. 
+  - eapply A; eauto.
+  }
+  assert (C: forall ty ri rf ofs f,
+             OKF f -> ofs >= 0 -> OK (float_arg ty ri rf ofs f)).
+  { intros until f; intros OF OO; red; unfold float_arg; intros.
+    destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH; [destruct H|].
+  - subst p; simpl. apply CSF. eapply list_nth_z_in; eauto.
+  - eapply OF; eauto.
+  - eapply A; eauto.
+  }
+  cut (forall tyl fixed ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec tyl fixed ri rf ofs)).
+  unfold OK. eauto.
+  induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl.
+- red; simpl; tauto.
+- destruct (zle fixed  0).
+  + apply (STK (ty1 :: tyl)); auto. 
+  + unfold OKF in *; destruct ty1; eauto.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -250,19 +332,10 @@ Lemma loc_arguments_acceptable:
   In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
 Proof.
   unfold loc_arguments; intros.
-  assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by decide_goal.
-  assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal.
-  assert (X: forall l, loc_argument_charact 0 l -> loc_argument_acceptable l).
-  { unfold loc_argument_charact, loc_argument_acceptable.
-    destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
-    intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
-    exists (2 / typealign ty); destruct ty; reflexivity.
-  }
-  exploit loc_arguments_rec_charact; eauto using Z.divide_0_r.
-  unfold forall_rpair; destruct p; intuition auto.
+  eapply loc_arguments_rec_charact; eauto. lia.
 Qed.
 
-Hint Resolve loc_arguments_acceptable: locs.
+Global Hint Resolve loc_arguments_acceptable: locs.
 
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
@@ -270,16 +343,29 @@ Proof.
   unfold loc_arguments; reflexivity.
 Qed.
 
-(** ** Normalization of function results *)
+(** ** Normalization of function results and parameters *)
 
 (** According to the AAPCS64 ABI specification, "padding bits" in the return
-    value of a function have unpredictable values and must be ignored.
-    Consequently, we force normalization of return values of small integer
-    types (8- and 16-bit integers), so that the top bits (the "padding bits")
-    are proper sign- or zero-extensions of the small integer value. *)
+    value of a function or in a function parameter have unpredictable
+    values and must be ignored.  Consequently, we force normalization
+    of return values and of function parameters when they have small
+    integer types (8- and 16-bit integers), so that the top bits (the
+    "padding bits") are proper sign- or zero-extensions of the small
+    integer value.
+
+    The Apple variant of the AAPCS64 requires the callee to return a normalized
+    value, and the caller to pass normalized parameters, hence no
+    normalization is needed.
+ *)
 
 Definition return_value_needs_normalization (t: rettype) : bool :=
-  match t with
-  | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true
-  | _ => false
+  match Archi.abi with
+  | Archi.Apple => false
+  | Archi.AAPCS64 =>
+      match t with
+      | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true
+      | _ => false
+      end
   end.
+
+Definition parameter_needs_normalization := return_value_needs_normalization.
diff --git a/aarch64/Op.v b/aarch64/Op.v
index 40f6ebf0..4c0dfb72 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -985,25 +985,25 @@ End SHIFT_AMOUNT.
 Program Definition mk_amount32 (n: int): amount32 :=
   {| a32_amount := Int.zero_ext 5 n |}.
 Next Obligation.
-  apply mk_amount_range. omega. reflexivity.
+  apply mk_amount_range. lia. reflexivity.
 Qed.
 
 Lemma mk_amount32_eq: forall n,
   Int.ltu n Int.iwordsize = true -> a32_amount (mk_amount32 n) = n.
 Proof.
-  intros. eapply mk_amount_eq; eauto. omega. reflexivity.
+  intros. eapply mk_amount_eq; eauto. lia. reflexivity.
 Qed.
 
 Program Definition mk_amount64 (n: int): amount64 :=
   {| a64_amount := Int.zero_ext 6 n |}.
 Next Obligation.
-  apply mk_amount_range. omega. reflexivity.
+  apply mk_amount_range. lia. reflexivity.
 Qed.
 
 Lemma mk_amount64_eq: forall n,
   Int.ltu n Int64.iwordsize' = true -> a64_amount (mk_amount64 n) = n.
 Proof.
-  intros. eapply mk_amount_eq; eauto. omega. reflexivity.
+  intros. eapply mk_amount_eq; eauto. lia. reflexivity.
 Qed.
  
 (** Recognition of move operations. *)
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 513ee9bd..0984943c 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -228,8 +228,8 @@ Proof.
   intros. unfold Int.ltu; apply zlt_true.
   apply Int.ltu_inv in H. apply Int.ltu_inv in H0.
   change (Int.unsigned Int64.iwordsize') with Int64.zwordsize in *.
-  unfold Int.sub; rewrite Int.unsigned_repr. omega.
-  assert (Int64.zwordsize < Int.max_unsigned) by reflexivity. omega.
+  unfold Int.sub; rewrite Int.unsigned_repr. lia.
+  assert (Int64.zwordsize < Int.max_unsigned) by reflexivity. lia.
 Qed.
 
 Theorem eval_shrluimm:
@@ -245,13 +245,13 @@ Local Opaque Int64.zwordsize.
 + destruct (Int.ltu n a) eqn:L2.
 * assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true).
   { apply sub_shift_amount; auto using a64_range.
-    apply Int.ltu_inv in L2. omega. }
+    apply Int.ltu_inv in L2. lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
   simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto.
 * assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true).
   { apply sub_shift_amount; auto using a64_range.
-    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
   simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto.
@@ -264,11 +264,11 @@ Local Opaque Int64.zwordsize.
 * econstructor; split. EvalOp. rewrite mk_amount64_eq by auto.
   destruct v1; simpl; auto. rewrite ! L; simpl.
   set (s' := s - Int.unsigned n).
-  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
-  rewrite Int64.shru'_zero_ext. auto. unfold s'; omega.
+  replace s with (s' + Int.unsigned n) by (unfold s'; lia).
+  rewrite Int64.shru'_zero_ext. auto. unfold s'; lia.
 * econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite ! L; simpl.
-  rewrite Int64.shru'_zero_ext_0 by omega. auto. 
+  rewrite Int64.shru'_zero_ext_0 by lia. auto. 
 + econstructor; eauto using eval_shrluimm_base.
 - intros; TrivialExists.
 Qed.
@@ -293,13 +293,13 @@ Proof.
 + destruct (Int.ltu n a) eqn:L2.
 * assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true).
   { apply sub_shift_amount; auto using a64_range.
-    apply Int.ltu_inv in L2. omega. }
+    apply Int.ltu_inv in L2. lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
   simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto.
 * assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true).
   { apply sub_shift_amount; auto using a64_range.
-    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto.
   simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto.
@@ -312,8 +312,8 @@ Proof.
 * InvBooleans. econstructor; split. EvalOp. rewrite mk_amount64_eq by auto.
   destruct v1; simpl; auto. rewrite ! L; simpl.
   set (s' := s - Int.unsigned n).
-  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
-  rewrite Int64.shr'_sign_ext. auto. unfold s'; omega. unfold s'; omega.
+  replace s with (s' + Int.unsigned n) by (unfold s'; lia).
+  rewrite Int64.shr'_sign_ext. auto. unfold s'; lia. unfold s'; lia.
 * econstructor; split; [|eauto]. apply eval_shrlimm_base; auto. EvalOp.
 + econstructor; eauto using eval_shrlimm_base.
 - intros; TrivialExists.
@@ -395,7 +395,7 @@ Proof.
 - TrivialExists.
 - destruct (zlt (Int.unsigned a0) sz).
 + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a64_range; simpl.
-  apply Val.lessdef_same. f_equal. rewrite Int64.shl'_zero_ext by omega. f_equal. omega.
+  apply Val.lessdef_same. f_equal. rewrite Int64.shl'_zero_ext by lia. f_equal. lia.
 + TrivialExists.
 - TrivialExists.
 Qed.
diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp
index 67575fdb..7f73d592 100644
--- a/aarch64/SelectOp.vp
+++ b/aarch64/SelectOp.vp
@@ -540,10 +540,18 @@ Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) :=
 
 (** ** Recognition of addressing modes for load and store operations *)
 
+(** Some symbols are relocatable (e.g. external symbols in macOS)
+    and cannot be used with [Aglobal] addressing mode. *)
+
+Parameter symbol_is_relocatable: ident -> bool.
+
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
   match e with
   | Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
-  | Eop (Oaddrsymbol id ofs) Enil => (Aglobal id ofs, Enil)
+  | Eop (Oaddrsymbol id ofs) Enil =>
+      if symbol_is_relocatable id
+      then (Aindexed (Ptrofs.to_int64 ofs), Eop (Oaddrsymbol id Ptrofs.zero) Enil ::: Enil)
+      else (Aglobal id ofs, Enil)
   | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed n, e1:::Enil)
   | Eop (Oaddlshift Slsl a) (e1:::e2:::Enil) => (Aindexed2shift a, e1:::e2:::Enil)
   | Eop (Oaddlext x a) (e1:::e2:::Enil) => (Aindexed2ext x a, e1:::e2:::Enil)
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 9ce7a8bf..dfa4c598 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -248,8 +248,8 @@ Remark sub_shift_amount:
 Proof.
   intros. unfold Int.ltu; apply zlt_true. rewrite Int.unsigned_repr_wordsize.
   apply Int.ltu_iwordsize_inv in H. apply Int.ltu_iwordsize_inv in H0.
-  unfold Int.sub; rewrite Int.unsigned_repr. omega.
-  generalize Int.wordsize_max_unsigned; omega.
+  unfold Int.sub; rewrite Int.unsigned_repr. lia.
+  generalize Int.wordsize_max_unsigned; lia.
 Qed.
 
 Theorem eval_shruimm:
@@ -265,13 +265,13 @@ Local Opaque Int.zwordsize.
 + destruct (Int.ltu n a) eqn:L2.
 * assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true).
   { apply sub_shift_amount; auto using a32_range.
-    apply Int.ltu_inv in L2. omega. }
+    apply Int.ltu_inv in L2. lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
   simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto.
 * assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true).
   { apply sub_shift_amount; auto using a32_range.
-    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
   simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto.
@@ -284,11 +284,11 @@ Local Opaque Int.zwordsize.
 * econstructor; split. EvalOp. rewrite mk_amount32_eq by auto.
   destruct v1; simpl; auto. rewrite ! L; simpl.
   set (s' := s - Int.unsigned n).
-  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
-  rewrite Int.shru_zero_ext. auto. unfold s'; omega.
+  replace s with (s' + Int.unsigned n) by (unfold s'; lia).
+  rewrite Int.shru_zero_ext. auto. unfold s'; lia.
 * econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite ! L; simpl.
-  rewrite Int.shru_zero_ext_0 by omega. auto. 
+  rewrite Int.shru_zero_ext_0 by lia. auto. 
 + econstructor; eauto using eval_shruimm_base.
 - intros; TrivialExists.
 Qed.
@@ -313,13 +313,13 @@ Proof.
 + destruct (Int.ltu n a) eqn:L2.
 * assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true).
   { apply sub_shift_amount; auto using a32_range.
-    apply Int.ltu_inv in L2. omega. }
+    apply Int.ltu_inv in L2. lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
   simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto.
 * assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true).
   { apply sub_shift_amount; auto using a32_range.
-    unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. }
+    unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. }
   econstructor; split. EvalOp.
   destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto.
   simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto.
@@ -332,8 +332,8 @@ Proof.
 * InvBooleans. econstructor; split. EvalOp. rewrite mk_amount32_eq by auto.
   destruct v1; simpl; auto. rewrite ! L; simpl.
   set (s' := s - Int.unsigned n).
-  replace s with (s' + Int.unsigned n) by (unfold s'; omega).
-  rewrite Int.shr_sign_ext. auto. unfold s'; omega. unfold s'; omega.
+  replace s with (s' + Int.unsigned n) by (unfold s'; lia).
+  rewrite Int.shr_sign_ext. auto. unfold s'; lia. unfold s'; lia.
 * econstructor; split; [|eauto]. apply eval_shrimm_base; auto. EvalOp.
 + econstructor; eauto using eval_shrimm_base. 
 - intros; TrivialExists.
@@ -404,20 +404,20 @@ Proof.
   change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
   apply Val.lessdef_same. f_equal. 
   transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)).
-  unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity.
+  unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity.
   apply Int.same_bits_eq; intros n N.
   change Int.zwordsize with 32 in *.
-  assert (N1: 0 <= n < 64) by omega.
+  assert (N1: 0 <= n < 64) by lia.
   rewrite Int64.bits_loword by auto.
   rewrite Int64.bits_shr' by auto.
   change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
-  rewrite zlt_true by omega.
+  rewrite zlt_true by lia.
   rewrite Int.testbit_repr by auto. 
-  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega).
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
   transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)).
-  rewrite Z.shiftr_spec by omega. auto.
+  rewrite Z.shiftr_spec by lia. auto.
   apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
-  change Int64.zwordsize with 64; omega.
+  change Int64.zwordsize with 64; lia.
 Qed.
 
 Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu.
@@ -430,20 +430,20 @@ Proof.
   change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
   apply Val.lessdef_same. f_equal. 
   transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)).
-  unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity.
+  unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity.
   apply Int.same_bits_eq; intros n N.
   change Int.zwordsize with 32 in *.
-  assert (N1: 0 <= n < 64) by omega.
+  assert (N1: 0 <= n < 64) by lia.
   rewrite Int64.bits_loword by auto.
   rewrite Int64.bits_shru' by auto.
   change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
-  rewrite zlt_true by omega.
+  rewrite zlt_true by lia.
   rewrite Int.testbit_repr by auto. 
-  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega).
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
   transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)).
-  rewrite Z.shiftr_spec by omega. auto.
+  rewrite Z.shiftr_spec by lia. auto.
   apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
-  change Int64.zwordsize with 64; omega.
+  change Int64.zwordsize with 64; lia.
 Qed.
 
 (** Integer conversions *)
@@ -456,7 +456,7 @@ Proof.
 - TrivialExists.
 - destruct (zlt (Int.unsigned a0) sz).
 + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl.
-  apply Val.lessdef_same. f_equal. rewrite Int.shl_zero_ext by omega. f_equal. omega.
+  apply Val.lessdef_same. f_equal. rewrite Int.shl_zero_ext by lia. f_equal. lia.
 + TrivialExists.
 - TrivialExists.
 Qed.
@@ -469,29 +469,29 @@ Proof.
 - TrivialExists.
 - destruct (zlt (Int.unsigned a0) sz).
 + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl.
-  apply Val.lessdef_same. f_equal. rewrite Int.shl_sign_ext by omega. f_equal. omega.
+  apply Val.lessdef_same. f_equal. rewrite Int.shl_sign_ext by lia. f_equal. lia.
 + TrivialExists.
 - TrivialExists.
 Qed.
 
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
-  apply eval_sign_ext; omega. 
+  apply eval_sign_ext; lia. 
 Qed.
 
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
-  apply eval_zero_ext; omega.
+  apply eval_zero_ext; lia.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
 Proof.
-  apply eval_sign_ext; omega. 
+  apply eval_sign_ext; lia. 
 Qed.
 
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
-  apply eval_zero_ext; omega.
+  apply eval_zero_ext; lia.
 Qed.
 
 (** Bitwise not, and, or, xor *)
@@ -1038,7 +1038,13 @@ Theorem eval_addressing:
 Proof.
   intros until v. unfold addressing; case (addressing_match a); intros; InvEval.
 - econstructor; split. EvalOp. simpl; auto.
-- econstructor; split. EvalOp. simpl; auto.
+- destruct (symbol_is_relocatable id).
+  + exists (Genv.symbol_address ge id Ptrofs.zero :: nil); split.
+    constructor. EvalOp. constructor.
+    simpl. rewrite <- Genv.shift_symbol_address_64 by auto.
+    rewrite Ptrofs.of_int64_to_int64, Ptrofs.add_zero_l by auto.
+    auto.
+  + econstructor; split. EvalOp. simpl; auto.
 - econstructor; split. EvalOp. simpl. 
   destruct v1; try discriminate. rewrite <- H; auto.
 - econstructor; split. EvalOp. simpl. congruence.
diff --git a/aarch64/Stacklayout.v b/aarch64/Stacklayout.v
index 86ba9f45..cdbc64d5 100644
--- a/aarch64/Stacklayout.v
+++ b/aarch64/Stacklayout.v
@@ -67,13 +67,13 @@ Local Opaque Z.add Z.mul sepconj range.
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
   change (size_chunk Mptr) with 8.
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
-  assert (0 <= 4 * b.(bound_outgoing)) by omega.
-  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
-  assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega).
-  assert (oretaddr + 8 <= ocs) by (unfold ocs; omega).
+  assert (0 <= 4 * b.(bound_outgoing)) by lia.
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+  assert (olink + 8 <= oretaddr) by (unfold oretaddr; lia).
+  assert (oretaddr + 8 <= ocs) by (unfold ocs; lia).
   assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
-  assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
-  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
+  assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
+  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
 (* Reorder as:
      outgoing
      back link
@@ -86,11 +86,11 @@ Local Opaque Z.add Z.mul sepconj range.
   rewrite sep_swap45.
 (* Apply range_split and range_split2 repeatedly *)
   unfold fe_ofs_arg.
-  apply range_split_2. fold olink; omega. omega.
-  apply range_split. omega.
-  apply range_split. omega.
-  apply range_split_2. fold ol. omega. omega.
-  apply range_drop_right with ostkdata. omega.
+  apply range_split_2. fold olink; lia. lia.
+  apply range_split. lia.
+  apply range_split. lia.
+  apply range_split_2. fold ol. lia. lia.
+  apply range_drop_right with ostkdata. lia.
   eapply sep_drop2. eexact H.
 Qed.
 
@@ -106,14 +106,14 @@ Proof.
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
-  assert (0 <= 4 * b.(bound_outgoing)) by omega.
-  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
-  assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega).
-  assert (oretaddr + 8 <= ocs) by (unfold ocs; omega).
+  assert (0 <= 4 * b.(bound_outgoing)) by lia.
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+  assert (olink + 8 <= oretaddr) by (unfold oretaddr; lia).
+  assert (oretaddr + 8 <= ocs) by (unfold ocs; lia).
   assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
-  assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
-  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega).
-  split. omega. apply align_le. omega. 
+  assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
+  assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
+  split. lia. apply align_le. lia. 
 Qed.
 
 Lemma frame_env_aligned:
@@ -133,8 +133,8 @@ Proof.
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
   change (align_chunk Mptr) with 8.
   split. apply Z.divide_0_r.
-  split. apply align_divides; omega.
-  split. apply align_divides; omega.
-  split. apply align_divides; omega.
-  apply Z.divide_add_r. apply align_divides; omega. apply Z.divide_refl.
+  split. apply align_divides; lia.
+  split. apply align_divides; lia.
+  split. apply align_divides; lia.
+  apply Z.divide_add_r. apply align_divides; lia. apply Z.divide_refl.
 Qed.
diff --git a/aarch64/Archi.v b/aarch64/TO_MERGE/Archi.v
index 7f39d1fa..eb022db9 100644
--- a/aarch64/Archi.v
+++ b/aarch64/TO_MERGE/Archi.v
@@ -85,8 +85,16 @@ Global Opaque ptr64 big_endian splitlong
               fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
 
-(** Whether to generate position-independent code or not *)
+(** Which ABI to implement *)
 
+<<<<<<< HEAD
 Parameter pic_code: unit -> bool.
 
 Definition has_notrap_loads := false.
+=======
+Inductive abi_kind: Type :=
+  | AAPCS64 (**r ARM's standard as used in Linux and other ELF platforms *)
+  | Apple.  (**r the variant used in macOS and iOS *)
+
+Parameter abi: abi_kind.
+>>>>>>> master
diff --git a/aarch64/Asmgen.v b/aarch64/TO_MERGE/Asmgen.v
index 45205158..c8e48b40 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/TO_MERGE/Asmgen.v
@@ -17,8 +17,120 @@
 
 Require Import Recdef Coqlib Zwf Zbits.
 Require Import Errors AST Integers Floats Op.
+<<<<<<< HEAD
 Require Import Locations Compopts.
 Require Import Mach Asm Asmblock Asmblockgen Machblockgen PostpassScheduling.
+=======
+Require Import Locations Mach Asm.
+Require SelectOp.
+
+Local Open Scope string_scope.
+Local Open Scope list_scope.
+Local Open Scope error_monad_scope.
+
+(** Alignment check for symbols *)
+
+Parameter symbol_is_aligned : ident -> Z -> bool.
+(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
+
+(** Extracting integer or float registers. *)
+
+Definition ireg_of (r: mreg) : res ireg :=
+  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgen.ireg_of") end.
+
+Definition freg_of (r: mreg) : res freg :=
+  match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
+
+(** Recognition of immediate arguments for logical integer operations.*)
+
+(** Valid immediate arguments are repetitions of a bit pattern [B]
+  of length [e] = 2, 4, 8, 16, 32 or 64.
+  The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
+  but must not be all zeros or all ones. *)
+
+(** The following automaton recognizes [0*1*0*|1*0*1*].
+<<
+               0          1          0
+              / \        / \        / \
+              \ /        \ /        \ /
+        -0--> [B] --1--> [D] --0--> [F]
+       /
+     [A]
+       \
+        -1--> [C] --0--> [E] --1--> [G]
+              / \        / \        / \
+              \ /        \ /        \ /
+               1          0          1
+>>
+*)
+
+Module Automaton.
+
+Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
+
+Definition start := SA.
+
+Definition next (s: state) (b: bool) :=
+  match s, b with
+    | SA,false => SB      | SA,true => SC
+    | SB,false => SB      | SB,true => SD
+    | SC,false => SE      | SC,true => SC
+    | SD,false => SF      | SD,true => SD
+    | SE,false => SE      | SE,true => SG
+    | SF,false => SF      | SF,true => Sbad
+    | SG,false => Sbad    | SG,true => SG
+    | Sbad,_ => Sbad
+  end.
+
+Definition accepting (s: state) :=
+  match s with
+  | SA | SB | SC | SD | SE | SF | SG => true
+  | Sbad => false
+  end.
+
+Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
+  match len with
+  | Datatypes.O => accepting s
+  | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
+  end.
+
+End Automaton.
+
+(** The following function determines the candidate length [e],
+    ensuring that [x] is a repetition [BB...B] 
+    of a bit pattern [B] of length [e]. *)
+
+Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
+  (** [test n] checks that the low [2n] bits of [x] are of the
+      form [BB], that is, two occurrences of the same [n] bits *)
+  let test (n: Z) : bool :=
+    Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
+  (** If [test n] fails, we know that the candidate length [e] is
+      at least [2n].  Hence we test with decreasing values of [n]:
+      32, 16, 8, 4, 2. *)
+  if sixtyfour && negb (test 32) then 64%nat
+  else if negb (test 16) then 32%nat
+  else if negb (test 8) then 16%nat
+  else if negb (test 4) then 8%nat
+  else if negb (test 2) then 4%nat
+  else 2%nat.
+
+(** A valid logical immediate is 
+- neither [0] nor [-1];
+- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
+- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
+*)
+
+Definition is_logical_imm32 (x: int) : bool :=
+  negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
+  Automaton.run (logical_imm_length (Int.unsigned x) false)
+                Automaton.start (Int.unsigned x).
+
+Definition is_logical_imm64 (x: int64) : bool :=
+  negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
+  Automaton.run (logical_imm_length (Int64.unsigned x) true)
+                Automaton.start (Int64.unsigned x).
+>>>>>>> master
 
 
 Local Open Scope error_monad_scope.
@@ -82,7 +194,86 @@ Definition addimm64 (rd r1: iregsp) (n: int64) (k: code) : code :=
   else if Int64.eq m (Int64.zero_ext 24 m) then
     addimm_aux (Asm.Psubimm X) rd r1 (Int64.unsigned m) k
   else if Int64.lt n Int64.zero then
+<<<<<<< HEAD
     loadimm64 X16 m (Asm.Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
+=======
+    loadimm64 X16 m (Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
+  else
+    loadimm64 X16 n (Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
+
+(** Logical immediate *)
+
+Definition logicalimm32
+              (insn1: ireg -> ireg0 -> Z -> instruction)
+              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1: ireg) (n: int) (k: code) : code :=
+  if is_logical_imm32 n
+  then insn1 rd r1 (Int.unsigned n) :: k
+  else loadimm32 X16 n (insn2 rd r1 X16 SOnone :: k).
+
+Definition logicalimm64
+              (insn1: ireg -> ireg0 -> Z -> instruction)
+              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1: ireg) (n: int64) (k: code) : code :=
+  if is_logical_imm64 n
+  then insn1 rd r1 (Int64.unsigned n) :: k
+  else loadimm64 X16 n (insn2 rd r1 X16 SOnone :: k).
+
+(** Sign- or zero-extended arithmetic *)
+
+Definition transl_extension (ex: extension) (a: int) : extend_op :=
+  match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
+
+Definition move_extended_base
+              (rd: ireg) (r1: ireg) (ex: extension) (k: code) : code :=
+  match ex with
+  | Xsgn32 => Pcvtsw2x rd r1 :: k
+  | Xuns32 => Pcvtuw2x rd r1 :: k
+  end.
+
+Definition move_extended
+              (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: code) : code :=
+  if Int.eq a Int.zero then
+    move_extended_base rd r1 ex k
+  else
+    move_extended_base rd r1 ex (Padd X rd XZR rd (SOlsl a) :: k).
+
+Definition arith_extended 
+              (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
+              (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+              (rd r1 r2: ireg) (ex: extension) (a: int) (k: code) : code :=
+  if Int.ltu a (Int.repr 5) then
+    insnX rd r1 r2 (transl_extension ex a) :: k
+  else
+    move_extended_base X16 r2 ex (insnS rd r1 X16 (SOlsl a) :: k).
+
+(** Extended right shift *)
+
+Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 :: k
+  else
+    Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
+    Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
+    Porr W rd XZR X16 (SOasr n) :: k.
+
+Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 :: k
+  else
+    Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
+    Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
+    Porr X rd XZR X16 (SOasr n) :: k.
+
+(** Load the address [id + ofs] in [rd] *)
+
+Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code :=
+  if SelectOp.symbol_is_relocatable id then
+    if Ptrofs.eq ofs Ptrofs.zero then
+      Ploadsymbol rd id :: k
+    else
+      Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
+>>>>>>> master
   else
     loadimm64 X16 n (Asm.Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
 
@@ -374,6 +565,7 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
   | Pnop                   => OK (Asm.Pnop)
   end.
 
+<<<<<<< HEAD
 Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction :=
   match cfi with
   | Pb l => Asm.Pb l
@@ -388,6 +580,59 @@ Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction
   | Ptbnz sz r n lbl => Asm.Ptbnz sz r n lbl
   | Ptbz sz r n lbl => Asm.Ptbz sz r n lbl
   | Pbtbl r1 tbl => Asm.Pbtbl r1 tbl
+=======
+(** Translation of addressing modes *)
+
+Definition offset_representable (sz: Z) (ofs: int64) : bool :=
+  let isz := Int64.repr sz in
+  (** either unscaled 9-bit signed *)
+  Int64.eq ofs (Int64.sign_ext 9 ofs) ||
+  (** or scaled 12-bit unsigned *)
+  (Int64.eq (Int64.modu ofs isz) Int64.zero
+   && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
+ 
+Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
+                             (insn: Asm.addressing -> instruction) (k: code) : res code :=
+  match addr, args with
+  | Aindexed ofs, a1 :: nil =>
+      do r1 <- ireg_of a1;
+       if offset_representable sz ofs then
+        OK (insn (ADimm r1 ofs) :: k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg r1 X16) :: k))
+  | Aindexed2, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (insn (ADreg r1 r2) :: k)
+  | Aindexed2shift a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero then
+        OK (insn (ADreg r1 r2) :: k)
+      else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (ADlsl r1 r2 a) :: k)
+      else
+        OK (Padd X X16 r1 r2 (SOlsl a) :: insn (ADimm X16 Int64.zero) :: k)
+  | Aindexed2ext x a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
+                             | Xuns32 => ADuxt r1 r2 a end) :: k)
+      else
+        OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
+                           (insn (ADimm X16 Int64.zero) :: k))
+  | Aglobal id ofs, nil =>
+      assertion (negb (SelectOp.symbol_is_relocatable id));
+      if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
+      then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k)
+      else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k))
+  | Ainstack ofs, nil =>
+      let ofs := Ptrofs.to_int64 ofs in
+      if offset_representable sz ofs then
+        OK (insn (ADimm XSP ofs) :: k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg XSP X16) :: k))
+  | _, _ =>
+      Error(msg "Asmgen.transl_addressing")
+>>>>>>> master
   end.
 
 Definition control_to_instruction (c: control) :=
diff --git a/aarch64/Asmgenproof.v b/aarch64/TO_MERGE/Asmgenproof.v
index d27b3f8c..8af013fd 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/TO_MERGE/Asmgenproof.v
@@ -209,6 +209,7 @@ Definition max_pos (f : Asm.function) := list_length_z f.(Asm.fn_code).
 Lemma functions_bound_max_pos: forall fb f tf,
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
   transf_function f = OK tf ->
+<<<<<<< HEAD
   max_pos tf <= Ptrofs.max_unsigned.
 Proof.
   intros fb f tf FINDf TRANSf.
@@ -222,6 +223,66 @@ Proof.
     assert (Asm.fn_code tf = c) as H. { inversion TRANSf as (H'); auto. }
     rewrite H; lia.
 Qed.
+=======
+  Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+  intros. exploit functions_translated; eauto. intros [tf' [A B]].
+  monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+(** * Properties of control flow *)
+
+Lemma transf_function_no_overflow:
+  forall f tf,
+  transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
+  lia.
+Qed.
+
+Lemma exec_straight_exec:
+  forall fb f c ep tf tc c' rs m rs' m',
+  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
+  exec_straight tge tf tc rs m c' rs' m' ->
+  plus step tge (State rs m) E0 (State rs' m').
+Proof.
+  intros. inv H.
+  eapply exec_straight_steps_1; eauto.
+  eapply transf_function_no_overflow; eauto.
+  eapply functions_transl; eauto.
+Qed.
+
+Lemma exec_straight_at:
+  forall fb f c ep tf tc c' ep' tc' rs m rs' m',
+  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
+  transl_code f c' ep' = OK tc' ->
+  exec_straight tge tf tc rs m tc' rs' m' ->
+  transl_code_at_pc ge (rs' PC) fb f c' ep' tf tc'.
+Proof.
+  intros. inv H.
+  exploit exec_straight_steps_2; eauto.
+  eapply transf_function_no_overflow; eauto.
+  eapply functions_transl; eauto.
+  intros [ofs' [PC' CT']].
+  rewrite PC'. constructor; auto.
+Qed.
+
+(** The following lemmas show that the translation from Mach to Asm
+  preserves labels, in the sense that the following diagram commutes:
+<<
+                          translation
+        Mach code ------------------------ Asm instr sequence
+            |                                          |
+            | Mach.find_label lbl       find_label lbl |
+            |                                          |
+            v                                          v
+        Mach code tail ------------------- Asm instr seq tail
+                          translation
+>>
+  The proof demands many boring lemmas showing that Asm constructor
+  functions do not introduce new labels.
+*)
+>>>>>>> master
 
 Lemma one_le_max_unsigned:
   1 <= Ptrofs.max_unsigned.
@@ -366,9 +427,16 @@ Lemma unfold_cdr bb bbs tc:
   unfold (bb :: bbs) = OK tc ->
   exists tc', unfold bbs = OK tc'.
 Proof.
+<<<<<<< HEAD
   intros; exploit unfold_car_cdr; eauto. intros (_ & ? & _ & ? & _).
   eexists; eauto.
 Qed.
+=======
+  intros; unfold loadsymbol.
+  destruct (SelectOp.symbol_is_relocatable id); TailNoLabel. destruct Ptrofs.eq; TailNoLabel.
+Qed. 
+Hint Resolve loadsymbol_label: labels.
+>>>>>>> master
 
 Lemma unfold_car bb bbs tc:
   unfold (bb :: bbs) = OK tc ->
@@ -1114,6 +1182,7 @@ Proof.
   * eapply ptrofs_nextinstr_agree; eauto.
 Qed.
 
+<<<<<<< HEAD
 Lemma store_rs_a_preserved n rs1 m1 rs1' m1' rs2 m2 v chk a: forall
   (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
   (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
@@ -1132,6 +1201,33 @@ Proof.
     * eapply ptrofs_nextinstr_agree; subst; eauto.
   + next_stuck_cong.
 Qed.
+=======
+Lemma find_label_goto_label:
+  forall f tf lbl rs m c' b ofs,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  transf_function f = OK tf ->
+  rs PC = Vptr b ofs ->
+  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
+  exists tc', exists rs',
+     goto_label tf lbl rs m = Next rs' m
+  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+  /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+  intros [tc [A B]].
+  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+  intros [pos' [P [Q R]]].
+  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+  split. unfold goto_label. rewrite P. rewrite H1. auto.
+  split. rewrite Pregmap.gss. constructor; auto.
+  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+  auto. lia.
+  generalize (transf_function_no_overflow _ _ H0). lia.
+  intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+>>>>>>> master
 
 Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall
   (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
@@ -2045,6 +2141,7 @@ Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
   | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
   end.
 
+<<<<<<< HEAD
 Remark preg_notin_charact:
   forall r rl,
   preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
@@ -2056,6 +2153,378 @@ Proof.
   rewrite IHrl. split.
   intros [A B]. intros. destruct H. congruence. auto.
   auto.
+=======
+Remark preg_of_not_X29: forall r, negb (mreg_eq r R29) = true -> IR X29 <> preg_of r.
+Proof.
+  intros. change (IR X29) with (preg_of R29). red; intros.
+  exploit preg_of_injective; eauto. intros; subst r; discriminate.
+Qed.
+
+Lemma sp_val': forall ms sp rs, agree ms sp rs -> sp = rs XSP.
+Proof.
+  intros. eapply sp_val; eauto. 
+Qed. 
+
+(** This is the simulation diagram.  We prove it by case analysis on the Mach transition. *)
+
+Theorem step_simulation:
+  forall S1 t S2, Mach.step return_address_offset ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1'),
+  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros; inv MS.
+
+- (* Mlabel *)
+  left; eapply exec_straight_steps; eauto; intros.
+  monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split. apply agree_nextinstr; auto. simpl; congruence.
+
+- (* Mgetstack *)
+  unfold load_stack in H.
+  exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+  rewrite (sp_val _ _ _ AG) in A.
+  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
+  exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
+  exists rs'; split. eauto.
+  split. eapply agree_set_mreg; eauto with asmgen. congruence.
+  simpl; congruence.
+
+- (* Msetstack *)
+  unfold store_stack in H.
+  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
+  exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+  left; eapply exec_straight_steps; eauto.
+  rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
+  exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
+  exists rs'; split. eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
+  simpl; intros. rewrite Q; auto with asmgen.
+
+- (* Mgetparam *)
+  assert (f0 = f) by congruence; subst f0.
+  unfold load_stack in *.
+  exploit Mem.loadv_extends. eauto. eexact H0. auto.
+  intros [parent' [A B]]. rewrite (sp_val' _ _ _ AG) in A.
+  exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+  exploit Mem.loadv_extends. eauto. eexact H1. auto.
+  intros [v' [C D]].
+Opaque loadind.
+  left; eapply exec_straight_steps; eauto; intros. monadInv TR. 
+  destruct ep.
+(* X30 contains parent *)
+  exploit loadind_correct. eexact EQ.
+  instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence.
+  intros [rs1 [P [Q R]]].
+  exists rs1; split. eauto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+  simpl; intros. rewrite R; auto with asmgen.
+  apply preg_of_not_X29; auto.
+(* X30 does not contain parent *)
+  exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]].
+  exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence.
+  intros [rs2 [S [T U]]].
+  exists rs2; split. eapply exec_straight_trans; eauto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+  instantiate (1 := rs1#X29 <- (rs2#X29)). intros.
+  rewrite Pregmap.gso; auto with asmgen.
+  congruence.
+  intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
+  simpl; intros. rewrite U; auto with asmgen.
+  apply preg_of_not_X29; auto.
+
+- (* Mop *)
+  assert (eval_operation tge sp op (map rs args) m = Some v).
+  { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. }
+  exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
+  intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
+  exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+  exists rs2; split. eauto. split.
+  apply agree_set_undef_mreg with rs0; auto. 
+  apply Val.lessdef_trans with v'; auto.
+  simpl; intros. InvBooleans. 
+  rewrite R; auto. apply preg_of_not_X29; auto.
+Local Transparent destroyed_by_op.
+  destruct op; try exact I; simpl; congruence.
+
+- (* Mload *)
+  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
+  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
+  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  exploit Mem.loadv_extends; eauto. intros [v' [C D]].
+  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
+  exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]].
+  exists rs2; split. eauto.
+  split. eapply agree_set_undef_mreg; eauto. congruence.
+  simpl; congruence.
+
+- (* Mstore *)
+  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
+  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
+  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
+  exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+  left; eapply exec_straight_steps; eauto.
+  intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+  exists rs2; split. eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
+  simpl; congruence.
+
+- (* Mcall *)
+  assert (f0 = f) by congruence.  subst f0.
+  inv AT.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+  { eapply transf_function_no_overflow; eauto. }
+  destruct ros as [rf|fid]; simpl in H; monadInv H5.
++ (* Indirect call *)
+  assert (rs rf = Vptr f' Ptrofs.zero).
+  { destruct (rs rf); try discriminate.
+    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+  assert (rs0 x0 = Vptr f' Ptrofs.zero).
+  { exploit ireg_val; eauto. rewrite H5; intros LD; inv LD; auto. }
+  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
+  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
+  {  econstructor; eauto. }
+  exploit return_address_offset_correct; eauto. intros; subst ra.
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto.
+  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
+  simpl. eauto.
+  econstructor; eauto.
+  econstructor; eauto.
+  eapply agree_sp_def; eauto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
++ (* Direct call *)
+  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
+  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
+    econstructor; eauto.
+  exploit return_address_offset_correct; eauto. intros; subst ra.
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_internal. eauto.
+  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
+  simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. eauto.
+  econstructor; eauto.
+  econstructor; eauto.
+  eapply agree_sp_def; eauto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
+
+- (* Mtailcall *)
+  assert (f0 = f) by congruence.  subst f0.
+  inversion AT; subst.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+  { eapply transf_function_no_overflow; eauto. }
+  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
+  destruct ros as [rf|fid]; simpl in H; monadInv H7.
++ (* Indirect call *)
+  assert (rs rf = Vptr f' Ptrofs.zero).
+  { destruct (rs rf); try discriminate.
+    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+  assert (rs0 x0 = Vptr f' Ptrofs.zero).
+  { exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto. }
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+  Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption. 
++ (* Direct call *)
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+  Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
+
+- (* Mbuiltin *)
+  inv AT. monadInv H4.
+  exploit functions_transl; eauto. intro FN.
+  generalize (transf_function_no_overflow _ _ H3); intro NOOV.
+  exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+  exploit external_call_mem_extends; eauto.
+  intros [vres' [m2' [A [B [C D]]]]].
+  left. econstructor; split. apply plus_one.
+  eapply exec_step_builtin. eauto. eauto.
+  eapply find_instr_tail; eauto.
+  erewrite <- sp_val by eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  eauto.
+  econstructor; eauto.
+  instantiate (2 := tf); instantiate (1 := x).
+  unfold nextinstr. rewrite Pregmap.gss.
+  rewrite set_res_other. rewrite undef_regs_other.
+  rewrite <- H1. simpl. econstructor; eauto.
+  eapply code_tail_next_int; eauto.
+  simpl; intros. destruct H4. congruence. destruct H4. congruence.
+  exploit list_in_map_inv; eauto. intros (mr & U & V). subst.
+  auto with asmgen.
+  auto with asmgen.
+  apply agree_nextinstr. eapply agree_set_res; auto.
+  eapply agree_undef_regs; eauto. intros.
+  simpl. rewrite undef_regs_other_2; auto. Simpl.
+  congruence.
+
+- (* Mgoto *)
+  assert (f0 = f) by congruence. subst f0.
+  inv AT. monadInv H4.
+  exploit find_label_goto_label; eauto. intros [tc' [rs' [GOTO [AT2 INV]]]].
+  left; exists (State rs' m'); split.
+  apply plus_one. econstructor; eauto.
+  eapply functions_transl; eauto.
+  eapply find_instr_tail; eauto.
+  simpl; eauto.
+  econstructor; eauto.
+  eapply agree_exten; eauto with asmgen.
+  congruence.
+
+- (* Mcond true *)
+  assert (f0 = f) by congruence. subst f0.
+  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
+  left; eapply exec_straight_opt_steps_goto; eauto.
+  intros. simpl in TR.
+  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
+  exists jmp; exists k; exists rs'.
+  split. eexact A. 
+  split. apply agree_exten with rs0; auto with asmgen.
+  exact B. 
+
+- (* Mcond false *)
+  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
+  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
+  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto.
+  split. apply agree_exten with rs0; auto. intros. Simpl.
+  simpl; congruence.
+
+- (* Mjumptable *)
+  assert (f0 = f) by congruence. subst f0.
+  inv AT. monadInv H6.
+  exploit functions_transl; eauto. intro FN.
+  generalize (transf_function_no_overflow _ _ H5); intro NOOV.
+  exploit find_label_goto_label. eauto. eauto.
+  instantiate (2 := rs0#X16 <- Vundef).
+  Simpl. eauto.
+  eauto.
+  intros [tc' [rs' [A [B C]]]].
+  exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
+  left; econstructor; split.
+  apply plus_one. econstructor; eauto.
+  eapply find_instr_tail; eauto.
+  simpl. Simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
+  econstructor; eauto.
+  eapply agree_undef_regs; eauto.
+  simpl. intros. rewrite C; auto with asmgen. Simpl.
+  congruence.
+
+- (* Mreturn *)
+  assert (f0 = f) by congruence. subst f0.
+  inversion AT; subst. simpl in H6; monadInv H6.
+  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
+    eapply transf_function_no_overflow; eauto.
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
+  left; econstructor; split.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
+
+- (* internal function *)
+
+  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+  generalize EQ; intros EQ'. monadInv EQ'.
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0.
+  unfold store_stack in *.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros [m1' [C D]].
+  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+  intros [m2' [F G]].
+  simpl chunk_of_type in F.
+  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+  intros [m3' [P Q]].
+  change (chunk_of_type Tptr) with Mint64 in *.
+  (* Execution of function prologue *)
+  monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
+  set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
+                 storeptr RA XSP (fn_retaddr_ofs f) x0) in *.
+  set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
+  set (rs2 := nextinstr (rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef)).
+  exploit (storeptr_correct tge tf XSP (fn_retaddr_ofs f) RA x0 m2' m3' rs2).
+    simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. 
+    change (rs2 X2) with sp. eexact P. 
+    simpl; congruence. congruence.
+  intros (rs3 & U & V).
+  assert (EXEC_PROLOGUE:
+            exec_straight tge tf
+              tf.(fn_code) rs0 m'
+              x0 rs3 m3').
+  { change (fn_code tf) with tfbody; unfold tfbody.
+    apply exec_straight_step with rs2 m2'.
+    unfold exec_instr. rewrite C. fold sp.
+    rewrite <- (sp_val _ _ _ AG). rewrite F. reflexivity.
+    reflexivity. 
+    eexact U. }
+  exploit exec_straight_steps_2; eauto using functions_transl. lia. constructor.
+  intros (ofs' & X & Y).                    
+  left; exists (State rs3 m3'); split.
+  eapply exec_straight_steps_1; eauto. lia. constructor.
+  econstructor; eauto.
+  rewrite X; econstructor; eauto. 
+  apply agree_exten with rs2; eauto with asmgen.
+  unfold rs2. 
+  apply agree_nextinstr. apply agree_set_other; auto with asmgen.
+  apply agree_change_sp with (parent_sp s). 
+  apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry. simpl.
+  simpl; intros; Simpl.
+  unfold sp; congruence.
+  intros. rewrite V by auto with asmgen. reflexivity.
+
+- (* external function *)
+  exploit functions_translated; eauto.
+  intros [tf [A B]]. simpl in B. inv B.
+  exploit extcall_arguments_match; eauto.
+  intros [args' [C D]].
+  exploit external_call_mem_extends; eauto.
+  intros [res' [m2' [P [Q [R S]]]]].
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_external; eauto.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  econstructor; eauto.
+  unfold loc_external_result. apply agree_set_other; auto. apply agree_set_pair; auto.
+  apply agree_undef_caller_save_regs; auto. 
+
+- (* return *)
+  inv STACKS. simpl in *.
+  right. split. lia. split. auto.
+  rewrite <- ATPC in H5.
+  econstructor; eauto. congruence.
+>>>>>>> master
 Qed.
 
 Lemma undef_regs_other_2:
diff --git a/aarch64/TO_MERGE/Asmgenproof1.v b/aarch64/TO_MERGE/Asmgenproof1.v
new file mode 100644
index 00000000..93c1f1ed
--- /dev/null
+++ b/aarch64/TO_MERGE/Asmgenproof1.v
@@ -0,0 +1,1836 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*         Xavier Leroy, Collège de France and INRIA Paris             *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Correctness proof for AArch64 code generation: auxiliary results. *)
+
+Require Import Recdef Coqlib Zwf Zbits.
+Require Import Maps Errors AST Integers Floats Values Memory Globalenvs.
+Require Import Op Locations Mach Asm Conventions.
+Require Import Asmgen.
+Require Import Asmgenproof0.
+
+Local Transparent Archi.ptr64.
+
+(** Properties of registers *)
+
+Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
+Proof.
+  destruct r; simpl; congruence.
+Qed.
+Global Hint Resolve preg_of_iregsp_not_PC: asmgen.
+
+Lemma preg_of_not_X16: forall r, preg_of r <> X16.
+Proof.
+  destruct r; simpl; congruence.
+Qed.
+
+Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+  red; intros; subst x. elim (preg_of_not_X16 r); auto.
+Qed.
+
+Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
+Proof.
+  intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Global Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16': asmgen.
+
+(** Useful simplification tactic *)
+
+
+Ltac Simplif :=
+  ((rewrite nextinstr_inv by eauto with asmgen)
+  || (rewrite nextinstr_inv1 by eauto with asmgen)
+  || (rewrite Pregmap.gss)
+  || (rewrite nextinstr_pc)
+  || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(** * Correctness of ARM constructor functions *)
+
+Section CONSTRUCTORS.
+
+Variable ge: genv.
+Variable fn: function.
+
+(** Decomposition of integer literals *)
+
+Inductive wf_decomposition: list (Z * Z) -> Prop :=
+  | wf_decomp_nil:
+      wf_decomposition nil
+  | wf_decomp_cons: forall m n p l,
+      n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l ->
+      wf_decomposition ((n, p) :: l).
+
+Lemma decompose_int_wf:
+  forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p).
+Proof.
+Local Opaque Zzero_ext.
+  induction N as [ | N]; simpl; intros.
+- constructor.
+- set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0).
++ apply IHN. lia.
++ econstructor. reflexivity. lia. apply IHN; lia. 
+Qed.
+
+Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z :=
+  match l with
+  | nil => accu
+  | (n, p) :: l => recompose_int (Zinsert accu n p 16) l
+  end.
+
+Lemma decompose_int_correct:
+  forall N n p accu,
+  0 <= p ->
+  (forall i, p <= i -> Z.testbit accu i = false) ->
+  (forall i, 0 <= i < p + Z.of_nat N * 16 ->
+   Z.testbit (recompose_int accu (decompose_int N n p)) i =
+   if zlt i p then Z.testbit accu i else Z.testbit n i).
+Proof.
+  induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE.
+- simpl. rewrite zlt_true; auto. extlia.
+- rewrite inj_S in RANGE. simpl.
+  set (frag := Zzero_ext 16 (Z.shiftr n p)).
+  assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)).
+  { unfold frag; intros. rewrite Zzero_ext_spec by lia. rewrite zlt_true by lia.
+    rewrite Z.shiftr_spec by lia. f_equal; lia. }
+  destruct (Z.eqb_spec frag 0).
++ rewrite IHN.
+* destruct (zlt i p). rewrite zlt_true by lia. auto.
+  destruct (zlt i (p + 16)); auto.
+  rewrite ABOVE by lia. rewrite FRAG by lia. rewrite e, Z.testbit_0_l. auto.
+* lia.
+* intros; apply ABOVE; lia.
+* extlia.
++ simpl. rewrite IHN.
+* destruct (zlt i (p + 16)).
+** rewrite Zinsert_spec by lia. unfold proj_sumbool.
+   rewrite zlt_true by lia.
+   destruct (zlt i p).
+   rewrite zle_false by lia. auto.
+   rewrite zle_true by lia. simpl. symmetry; apply FRAG; lia.
+** rewrite Z.ldiff_spec, Z.shiftl_spec by lia.
+   change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by lia.
+   rewrite zlt_false by lia. rewrite zlt_false by lia. apply andb_true_r. 
+* lia.
+* intros. rewrite Zinsert_spec by lia. unfold proj_sumbool.
+  rewrite zle_true by lia. rewrite zlt_false by lia. simpl.
+  apply ABOVE. lia.
+* extlia.
+Qed.
+
+Corollary decompose_int_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite decompose_int_correct. apply zlt_false; lia. 
+  lia. intros; apply Z.testbit_0_l. extlia.
+Qed.
+
+Corollary decompose_notint_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat)
+        (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite Z.lnot_spec, decompose_int_correct.
+  rewrite zlt_false by lia. rewrite Z.lnot_spec by lia. apply negb_involutive.
+  lia. intros; apply Z.testbit_0_l. extlia. lia.
+Qed.
+
+Lemma negate_decomposition_wf:
+  forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l).
+Proof.
+  induction 1; simpl; econstructor; auto.
+  instantiate (1 := (Z.lnot m)).
+  apply equal_same_bits; intros.
+  rewrite H. change 65535 with (two_p 16 - 1).
+  rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by lia.
+  destruct (zlt i 16).
+  apply xorb_true_r.
+  auto.
+Qed.
+
+Lemma Zinsert_eqmod:
+  forall n x1 x2 y p l, 0 <= p -> 0 <= l ->
+  eqmod (two_power_nat n) x1 x2 ->
+  eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l).
+Proof.
+  intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by lia.
+  destruct (zle p i && zlt i (p + l)); auto.
+  apply same_bits_eqmod with n; auto.
+Qed.
+
+Lemma Zinsert_0_l:
+  forall y p l,
+  0 <= p -> 0 <= l ->
+  Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l.
+Proof.
+  intros. apply equal_same_bits; intros.
+  rewrite Zinsert_spec by lia. unfold proj_sumbool.
+  destruct (zlt i p); [rewrite zle_false by lia|rewrite zle_true by lia]; simpl.
+- rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto.
+- rewrite Z.shiftl_spec by lia. 
+  destruct (zlt i (p + l)); auto.
+  rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by lia. auto.
+Qed.
+
+Lemma recompose_int_negated:
+  forall l, wf_decomposition l ->
+  forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l).
+Proof.
+  induction 1; intros accu; simpl.
+- auto.
+- rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. 
+  rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by lia.
+  unfold proj_sumbool.
+  destruct (zle p i); simpl; auto.
+  destruct (zlt i (p + 16)); simpl; auto.
+  change 65535 with (two_p 16 - 1).
+  rewrite Ztestbit_two_p_m1 by lia. rewrite zlt_true by lia.
+  apply xorb_true_r. 
+Qed.
+
+Lemma exec_loadimm_k_w:
+  forall (rd: ireg) k m l,
+  wf_decomposition l ->
+  forall (rs: regset) accu,
+  rs#rd = Vint (Int.repr accu) ->
+  exists rs',
+     exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int accu l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  induction 1; intros rs accu ACCU; simpl.
+- exists rs; split. apply exec_straight_opt_refl. auto.
+- destruct (IHwf_decomposition
+                (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16)))
+                (Zinsert accu n p 16))
+  as (rs' & P & Q & R).
+  Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. 
+  apply Zinsert_eqmod. auto. lia. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
+  exists rs'; split.
+  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
+  split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl.
+  intros; Simpl.
+- set (accu0 := Zinsert 0 n p 16).
+  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
+  destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+  reflexivity.
+  split. exact Q. 
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. 
+  intros; Simpl.
+- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
+  destruct (exec_loadimm_k_w rd k m (negate_decomposition l) 
+                                    (negate_decomposition_wf l H1)
+                                    rs1 accu0) as (rs2 & P & Q & R).
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal.
+  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+  reflexivity.  
+  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm32:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
+  /\ rs'#rd = Vint n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm32, loadimm; intros.
+  destruct (is_logical_imm32 n).
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto.
+  intros; Simpl.
+- set (dz := decompose_int 2%nat (Int.unsigned n) 0).
+  set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0).
+  assert (A: Int.repr (recompose_int 0 dz) = n).
+  { transitivity (Int.repr (Int.unsigned n)).
+    apply Int.eqm_samerepr. apply decompose_int_eqmod. 
+    apply Int.repr_unsigned. }
+  assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n).
+  { transitivity (Int.repr (Int.unsigned n)).
+    apply Int.eqm_samerepr. apply decompose_notint_eqmod. 
+    apply Int.repr_unsigned. }
+  destruct Nat.leb.
++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; lia.
++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; lia.
+Qed.
+
+Lemma exec_loadimm_k_x:
+  forall (rd: ireg) k m l,
+  wf_decomposition l ->
+  forall (rs: regset) accu,
+  rs#rd = Vlong (Int64.repr accu) ->
+  exists rs',
+     exec_straight_opt ge fn (loadimm_k X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  induction 1; intros rs accu ACCU; simpl.
+- exists rs; split. apply exec_straight_opt_refl. auto.
+- destruct (IHwf_decomposition
+                (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16)))
+                (Zinsert accu n p 16))
+  as (rs' & P & Q & R).
+  Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. 
+  apply Zinsert_eqmod. auto. lia. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
+  exists rs'; split.
+  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
+  split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl.
+  intros; Simpl.
+- set (accu0 := Zinsert 0 n p 16).
+  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
+  destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+  reflexivity.
+  split. exact Q. 
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1.
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. 
+  intros; Simpl.
+- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
+  destruct (exec_loadimm_k_x rd k m (negate_decomposition l) 
+                                    (negate_decomposition_wf l H1)
+                                    rs1 accu0) as (rs2 & P & Q & R).
+  unfold rs1; Simpl.
+  exists rs2; split.
+  eapply exec_straight_opt_step; eauto.
+  simpl. unfold rs1. do 5 f_equal.
+  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+  reflexivity.  
+  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+  intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm64:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
+  /\ rs'#rd = Vlong n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm64, loadimm; intros.
+  destruct (is_logical_imm64 n).
+- econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto.
+  intros; Simpl.
+- set (dz := decompose_int 4%nat (Int64.unsigned n) 0).
+  set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0).
+  assert (A: Int64.repr (recompose_int 0 dz) = n).
+  { transitivity (Int64.repr (Int64.unsigned n)).
+    apply Int64.eqm_samerepr. apply decompose_int_eqmod. 
+    apply Int64.repr_unsigned. }
+  assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n).
+  { transitivity (Int64.repr (Int64.unsigned n)).
+    apply Int64.eqm_samerepr. apply decompose_notint_eqmod. 
+    apply Int64.repr_unsigned. }
+  destruct Nat.leb.
++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; lia.
++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; lia.
+Qed.
+
+(** Add immediate *)
+
+Lemma exec_addimm_aux_32:
+  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) ->
+  (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
+  forall rd r1 n k rs m,
+  exists rs',
+     exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
+  assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; lia).
+  rewrite <- (Int.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; lia.
+  intros; Simpl.
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; lia.
+  intros; Simpl.
+- econstructor; split. eapply exec_straight_two.
+  apply SEM. apply SEM. Simpl. Simpl.
+  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
+  rewrite E. auto with ints.
+  intros; Simpl.
+Qed.
+
+Lemma exec_addimm32:
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.add rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros. unfold addimm32. set (nn := Int.neg n).
+  destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
+- apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. 
+- rewrite <- Val.sub_opp_add.
+  apply exec_addimm_aux_32 with (sem := Val.sub). auto.
+  intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
+- destruct (Int.lt n Int.zero).
++ rewrite <- Val.sub_opp_add; fold nn.
+  edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite B, C; eauto with asmgen.
+  intros; Simpl.
++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. rewrite B, C; eauto with asmgen.
+  intros; Simpl.
+Qed.
+
+Lemma exec_addimm_aux_64:
+  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) ->
+  (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
+  forall rd r1 n k rs m,
+  exists rs',
+     exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
+  assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; lia).
+  rewrite <- (Int64.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; lia.
+  intros; Simpl.
+- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+  split. Simpl. do 3 f_equal; lia.
+  intros; Simpl.
+- econstructor; split. eapply exec_straight_two.
+  apply SEM. apply SEM. Simpl. Simpl.
+  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
+  rewrite E. auto with ints.
+  intros; Simpl.
+Qed.
+
+Lemma exec_addimm64:
+  forall rd r1 n k rs m,
+  preg_of_iregsp r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.addl rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros. 
+  unfold addimm64. set (nn := Int64.neg n).
+  destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
+- apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. 
+- rewrite <- Val.subl_opp_addl.
+  apply exec_addimm_aux_64 with (sem := Val.subl). auto.
+  intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
+- destruct (Int64.lt n Int64.zero).
++ rewrite <- Val.subl_opp_addl; fold nn.
+  edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+  intros; Simpl.
++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+  intros; Simpl.
+Qed.
+
+(** Logical immediate *)
+
+Lemma exec_logicalimm32:
+  forall (insn1: ireg -> ireg0 -> Z -> instruction)
+         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn1 rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs##r1 (Vint (Int.repr n))))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
+  destruct (is_logical_imm32 n).
+- econstructor; split. 
+  apply exec_straight_one. apply SEM1. reflexivity. 
+  split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl.
+- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.  
+  apply exec_straight_one. apply SEM2. reflexivity.
+  split. Simpl. f_equal; auto. apply C; auto with asmgen.
+  intros; Simpl. 
+Qed.
+
+Lemma exec_logicalimm64:
+  forall (insn1: ireg -> ireg0 -> Z -> instruction)
+         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_instr ge fn (insn1 rd r1 n) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n))))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
+  destruct (is_logical_imm64 n).
+- econstructor; split. 
+  apply exec_straight_one. apply SEM1. reflexivity. 
+  split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl.
+- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.  
+  apply exec_straight_one. apply SEM2. reflexivity.
+  split. Simpl. f_equal; auto. apply C; auto with asmgen.
+  intros; Simpl. 
+Qed.
+
+(** Load address of symbol *)
+
+Lemma exec_loadsymbol: forall rd s ofs k rs m,
+  rd <> X16 \/ SelectOp.symbol_is_relocatable s = false ->
+  exists rs',
+     exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m
+  /\ rs'#rd = Genv.symbol_address ge s ofs
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadsymbol; intros. destruct (SelectOp.symbol_is_relocatable s).
+- predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
++ subst ofs. econstructor; split.
+  apply exec_straight_one; [simpl; eauto | reflexivity].
+  split. Simpl. intros; Simpl.
++ exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
+  intros (rs1 & A & B & C).
+  econstructor; split.
+  econstructor. simpl; eauto. auto. eexact A. 
+  split. simpl in B; rewrite B. Simpl. 
+  rewrite <- Genv.shift_symbol_address_64 by auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
+  intros. rewrite C by auto. Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+  split. Simpl. rewrite symbol_high_low; auto. 
+  intros; Simpl.
+Qed.
+
+(** Shifted operands *)
+
+Remark transl_shift_not_none:
+  forall s a, transl_shift s a <> SOnone.
+Proof.
+  destruct s; intros; simpl; congruence.
+Qed.
+
+Remark or_zero_eval_shift_op_int:
+  forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto.
+Qed.
+
+Remark or_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto.
+Qed.
+
+Remark add_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto.
+Qed.
+
+Lemma transl_eval_shift: forall s v (a: amount32),
+  eval_shift_op_int v (transl_shift s a) = eval_shift s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shift': forall s v (a: amount32),
+  Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none).
+  apply transl_eval_shift.
+Qed.
+
+Lemma transl_eval_shiftl: forall s v (a: amount64),
+  eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shiftl': forall s v (a: amount64),
+  Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+Lemma transl_eval_shiftl'': forall s v (a: amount64),
+  Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+(** Zero- and Sign- extensions *)
+
+Lemma exec_move_extended_base: forall rd r1 ex k rs m,
+  exists rs',
+     exec_straight ge fn (move_extended_base rd r1 ex k) rs m k rs' m
+  /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended_base; destruct ex; econstructor;
+  (split; [apply exec_straight_one; [simpl;eauto|auto] | split; [Simpl|intros;Simpl]]).
+Qed.
+
+Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m,
+  exists rs',
+     exec_straight ge fn (move_extended rd r1 ex a k) rs m k rs' m
+  /\ rs' rd = Op.eval_extend ex rs#r1 a
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero.
+- exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B.
+  destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto.
+  auto.
+- Local Opaque Val.addl.
+  exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  unfold exec_instr. change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto.
+  split. Simpl. rewrite B. auto. 
+  intros; Simpl.
+Qed.
+
+Lemma exec_arith_extended:
+  forall (sem: val -> val -> val)
+         (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
+         (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction),
+  (forall rd r1 r2 x rs m,
+    exec_instr ge fn (insnX rd r1 r2 x) rs m =
+      Next (nextinstr (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x)))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_instr ge fn (insnS rd r1 r2 s) rs m =
+      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
+  forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
+- econstructor; split. 
+  apply exec_straight_one. rewrite EX; eauto. auto.
+  split. Simpl. f_equal. destruct ex; auto.
+  intros; Simpl.
+- exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+  rewrite ES. eauto. auto.
+  split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. 
+  rewrite B. destruct ex; auto.
+  intros; Simpl.
+Qed. 
+
+(** Extended right shift *)
+
+Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold shrx32; intros. apply Val.shrx_shr_2 in H.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split. Simpl. subst v; auto. intros; Simpl.
+- econstructor; split. eapply exec_straight_three.
+  unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+  simpl; eauto.
+  unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
+  auto. auto. auto.
+  split. subst v; Simpl. intros; Simpl.
+Qed.
+ 
+Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold shrx64; intros. apply Val.shrxl_shrl_2 in H.
+  destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
+  split. Simpl. subst v; auto. intros; Simpl.
+- econstructor; split. eapply exec_straight_three.
+  unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+  simpl; eauto.
+  unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
+  auto. auto. auto.
+  split. subst v; Simpl. intros; Simpl.
+Qed.
+
+(** Condition bits *)
+
+Lemma compare_int_spec: forall rs v1 v2 m,
+  let rs' := compare_int rs v1 v2 m in
+     rs'#CN = (Val.negative (Val.sub v1 v2))
+  /\ rs'#CZ = (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
+  /\ rs'#CC = (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
+  /\ rs'#CV = (Val.sub_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Lemma eval_testcond_compare_sint: forall c v1 v2 b rs m,
+  Val.cmp_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2 m). 
+  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmpu; simpl. destruct c; simpl.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.eq i i0); auto.
+- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow, Int.not_lt.
+  destruct (Int.eq i i0), (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow, (Int.lt_not i). 
+  destruct (Int.eq i i0), (Int.lt i i0); auto.
+- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_uint: forall c v1 v2 b rs m,
+  Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2 m). 
+  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmpu; simpl. destruct c; simpl.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.eq i i0); auto.
+- destruct (Int.ltu i i0); auto.
+- rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+- rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+- destruct (Int.ltu i i0); auto.
+Qed.
+
+Lemma compare_long_spec: forall rs v1 v2 m,
+  let rs' := compare_long rs v1 v2 m in
+     rs'#CN = (Val.negativel (Val.subl v1 v2))
+  /\ rs'#CZ = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
+  /\ rs'#CC = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
+  /\ rs'#CV = (Val.subl_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Remark int64_sub_overflow:
+  forall x y,
+  Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
+          (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
+  (if Int64.lt x y then Int.one else Int.zero).
+Proof.
+  intros.
+  transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
+  rewrite <- (Int64.lt_sub_overflow x y).
+  unfold Int64.sub_overflow, Int64.negative.
+  set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
+  destruct (zle Int64.min_signed s && zle s Int64.max_signed);
+  destruct (Int64.lt (Int64.sub x y) Int64.zero);
+  auto.
+  destruct (Int64.lt x y); auto.
+Qed.
+
+Lemma eval_testcond_compare_slong: forall c v1 v2 b rs m,
+  Val.cmpl_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2 m). 
+  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.cmplu; simpl. destruct c; simpl.
+- destruct (Int64.eq i i0); auto.
+- destruct (Int64.eq i i0); auto.
+- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow, Int64.not_lt.
+  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow, (Int64.lt_not i). 
+  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs m,
+  Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2 m) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2 m). 
+  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E; unfold Val.cmplu.
+  destruct v1; try discriminate; destruct v2; try discriminate; simpl in H.
+- (* int-int *)
+  inv H. destruct c; simpl.
++ destruct (Int64.eq i i0); auto.
++ destruct (Int64.eq i i0); auto.
++ destruct (Int64.ltu i i0); auto.
++ rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
++ rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
++ destruct (Int64.ltu i i0); auto.
+- (* int-ptr *)
+  simpl.
+  destruct (Int64.eq i Int64.zero &&
+            (Mem.valid_pointer m b0 (Ptrofs.unsigned i0)
+              || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+- (* ptr-int *)
+  simpl.
+  destruct (Int64.eq i0 Int64.zero &&
+            (Mem.valid_pointer m b0 (Ptrofs.unsigned i)
+              || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+- (* ptr-ptr *)
+  simpl. 
+  destruct (eq_block b0 b1).
++ destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i)
+             || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1)) &&
+            (Mem.valid_pointer m b1 (Ptrofs.unsigned i0)
+             || Mem.valid_pointer m b1 (Ptrofs.unsigned i0 - 1)));
+  inv H.
+  destruct c; simpl.
+* destruct (Ptrofs.eq i i0); auto.
+* destruct (Ptrofs.eq i i0); auto.
+* destruct (Ptrofs.ltu i i0); auto.
+* rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+* rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+* destruct (Ptrofs.ltu i i0); auto.
++ destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) &&
+            Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate.
+  destruct c; simpl in H; inv H; reflexivity.
+Qed.
+
+Lemma compare_float_spec: forall rs f1 f2,
+  let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in
+     rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_float: forall c v1 v2 b rs,
+  Val.cmpf_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs,
+  option_map negb (Val.cmpf_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma compare_single_spec: forall rs f1 f2,
+  let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in
+     rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_single: forall c v1 v2 b rs,
+  Val.cmpfs_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs,
+  option_map negb (Val.cmpfs_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Remark compare_float_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (nextinstr (compare_float rs v1 v2))#r = (nextinstr rs)#r.
+Proof.
+  intros; unfold compare_float.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Remark compare_single_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (nextinstr (compare_single rs v1 v2))#r = (nextinstr rs)#r.
+Proof.
+  intros; unfold compare_single.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+(** Translation of conditionals *)
+
+Ltac ArgsInv :=
+  repeat (match goal with
+  | [ H: Error _ = OK _ |- _ ] => discriminate
+  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+  | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  end);
+  subst;
+  repeat (match goal with
+  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+  end).
+
+Lemma transl_cond_correct:
+  forall cond args k c rs m,
+  transl_cond cond args k = OK c ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ (forall b,
+      eval_condition cond (map rs (map preg_of args)) m = Some b ->
+      eval_testcond (cond_for_cond cond) rs' = Some b)
+  /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+  intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
+- (* Ccomp *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompu *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompimm *)
+  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+  split; intros. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply eval_testcond_compare_sint; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccompuimm *)
+  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+  split; intros. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply eval_testcond_compare_uint; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccompshift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccompushift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Cmaskzero *)
+  destruct (is_logical_imm32 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply (eval_testcond_compare_sint Ceq); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Cmasknotzero *)
+  destruct (is_logical_imm32 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply (eval_testcond_compare_sint Cne); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccompl *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplu *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplimm *)
+  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
+  split; intros. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply eval_testcond_compare_slong; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccompluimm *)
+  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
++ econstructor; split.
+  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
+  split; intros. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one.
+  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply eval_testcond_compare_ulong; auto. 
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccomplshift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Ccomplushift *)
+  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. 
+  destruct r; reflexivity || discriminate.
+- (* Cmasklzero *)
+  destruct (is_logical_imm64 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply (eval_testcond_compare_slong Ceq); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Cmasknotzero *)
+  destruct (is_logical_imm64 n).
++ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
+  destruct r; reflexivity || discriminate.
++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A.
+  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+  split; intros. apply (eval_testcond_compare_slong Cne); auto.
+  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+- (* Ccompf *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  split; intros. apply eval_testcond_compare_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+- (* Cnotcompf *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  split; intros. apply eval_testcond_compare_not_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+- (* Ccompfzero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  split; intros. apply eval_testcond_compare_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+- (* Cnotcompfzero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_float_inv; auto.
+  split; intros. apply eval_testcond_compare_not_float; auto.
+  destruct r; discriminate || rewrite compare_float_inv; auto.
+- (* Ccompfs *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  split; intros. apply eval_testcond_compare_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+- (* Cnotcompfs *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  split; intros. apply eval_testcond_compare_not_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+- (* Ccompfszero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  split; intros. apply eval_testcond_compare_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+- (* Cnotcompfszero *)
+  econstructor; split. apply exec_straight_one. simpl; eauto.
+  rewrite compare_single_inv; auto.
+  split; intros. apply eval_testcond_compare_not_single; auto.
+  destruct r; discriminate || rewrite compare_single_inv; auto.
+Qed.
+
+(** Translation of conditional branches *)
+
+Lemma transl_cond_branch_correct:
+  forall cond args lbl k c rs m b,
+  transl_cond_branch cond args lbl k = OK c ->
+  eval_condition cond (map rs (map preg_of args)) m = Some b ->
+  exists rs' insn,
+     exec_straight_opt ge fn c rs m (insn :: k) rs' m
+  /\ exec_instr ge fn insn rs' m =
+         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
+  /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+  intros until b; intros TR EV.
+  assert (DFL:
+    transl_cond_branch_default cond args lbl k = OK c ->
+    exists rs' insn,
+       exec_straight_opt ge fn c rs m (insn :: k) rs' m
+    /\ exec_instr ge fn insn rs' m =
+         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
+    /\ forall r, data_preg r = true -> rs'#r = rs#r).
+  {
+    unfold transl_cond_branch_default; intros.
+    exploit transl_cond_correct; eauto. intros (rs' & A & B & C).
+    exists rs', (Pbc (cond_for_cond cond) lbl); split.
+    apply exec_straight_opt_intro. eexact A.
+    split; auto. simpl. rewrite (B b) by auto. auto. 
+  }
+Local Opaque transl_cond transl_cond_branch_default.
+  destruct args as [ | a1 args]; simpl in TR; auto.
+  destruct args as [ | a2 args]; simpl in TR; auto.
+  destruct cond; simpl in TR; auto.
+- (* Ccompimm *)
+  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; 
+  apply Int.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
++ (* Ccompimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int.eq i Int.zero); auto.
+- (* Ccompuimm *)
+  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto;
+  apply Int.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompuimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite EV. auto.
++ (* Ccompuimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
+- (* Cmaskzero *)
+  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+  rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto.
+- (* Cmasknotzero *)
+  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+  rewrite EV. auto.
+- (* Ccomplimm *)
+  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; 
+  apply Int64.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccomplimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
++ (* Ccomplimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int64.eq i Int64.zero); auto.
+- (* Ccompluimm *)
+  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
+  apply Int64.same_if_eq in N0; subst n; ArgsInv.
++ (* Ccompluimm Cne 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite EV. auto.
++ (* Ccompluimm Ceq 0 *)
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
+- (* Cmasklzero *)
+  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+  rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto.
+- (* Cmasklnotzero *)
+  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+  do 2 econstructor; split.
+  apply exec_straight_opt_refl.
+  split; auto. simpl.
+  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+  rewrite EV. auto.
+Qed.
+
+(** Translation of arithmetic operations *)
+
+Ltac SimplEval H :=
+  match type of H with
+  | Some _ = None _ => discriminate
+  | Some _ = Some _ => inv H
+  | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
+
+Ltac TranslOpSimpl :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto | reflexivity]
+  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl;
+             apply Val.lessdef_same; Simpl; fail
+           | intros; Simpl; fail ] ].
+
+Ltac TranslOpBase :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto | reflexivity]
+  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
+           | intros; Simpl; fail ] ].
+
+Lemma transl_op_correct:
+  forall op args res k (rs: regset) m v c,
+  transl_op op args res k = OK c ->
+  eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ Val.lessdef v rs'#(preg_of res)
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+Proof.
+Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
+  intros until c; intros TR EV.
+  unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
+- (* move *)
+  destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR.
++ TranslOpSimpl.
++ TranslOpSimpl.
+- (* intconst *)
+  exploit exec_loadimm32. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+- (* longconst *)
+  exploit exec_loadimm64. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+- (* floatconst *)
+  destruct (Float.eq_dec n Float.zero).
++ subst n. TranslOpSimpl. 
++ TranslOpSimpl.
+- (* singleconst *)
+  destruct (Float32.eq_dec n Float32.zero).
++ subst n. TranslOpSimpl. 
++ TranslOpSimpl.
+- (* loadsymbol *)
+  exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* addrstack *)
+  exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. simpl in B; rewrite B.
+Local Transparent Val.addl.
+  destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+  auto.
+- (* shift *)
+  rewrite <- transl_eval_shift'. TranslOpSimpl.
+- (* addimm *)
+  exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* mul *)
+  TranslOpBase.
+Local Transparent Val.add.
+  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
+- (* andimm *)
+  exploit (exec_logicalimm32 (Pandimm W) (Pand W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* orimm *)
+  exploit (exec_logicalimm32 (Porrimm W) (Porr W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* xorimm *)
+  exploit (exec_logicalimm32 (Peorimm W) (Peor W)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* not *)
+  TranslOpBase.
+  destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto.
+- (* notshift *)
+  TranslOpBase.
+  destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
+- (* shrx *)
+  exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+  econstructor; split. eexact A. split. rewrite B; auto. auto.
+- (* zero-ext *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+- (* sign-ext *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+- (* shlzext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range.
+- (* shlsext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range.
+- (* zextshr *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range.
+- (* sextshr *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range.
+- (* shiftl *)
+  rewrite <- transl_eval_shiftl'. TranslOpSimpl.
+- (* extend *)
+  exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
+  econstructor; split. eexact A. 
+  split. rewrite B; auto. eauto with asmgen.
+- (* addext *)
+  exploit (exec_arith_extended Val.addl Paddext (Padd X)).
+  auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+  econstructor; split. eexact A. split. rewrite B; auto. auto.
+- (* addlimm *)
+  exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
+  intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
+- (* subext *)
+  exploit (exec_arith_extended Val.subl Psubext (Psub X)).
+  auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+  econstructor; split. eexact A. split. rewrite B; auto. auto.
+- (* mull *)
+  TranslOpBase.
+  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
+- (* andlimm *)
+  exploit (exec_logicalimm64 (Pandimm X) (Pand X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* orlimm *)
+  exploit (exec_logicalimm64 (Porrimm X) (Porr X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* xorlimm *)
+  exploit (exec_logicalimm64 (Peorimm X) (Peor X)). 
+  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C). 
+  exists rs'; split. eexact A. split. rewrite B; auto. auto.
+- (* notl *)
+  TranslOpBase.
+  destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto.
+- (* notlshift *)
+  TranslOpBase.
+  destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
+- (* shrx *)
+  exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+  econstructor; split. eexact A. split. rewrite B; auto. auto.
+- (* zero-ext-l *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+- (* sign-ext-l *)
+  TranslOpBase.
+  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+- (* shllzext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range.
+- (* shllsext *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range.
+- (* zextshrl *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range.
+- (* sextshrl *)
+  TranslOpBase.
+  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
+- (* condition *)
+  exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+  rewrite (B b) by auto. auto. 
+  auto.
+  intros; Simpl.
+- (* select *)
+  destruct (preg_of res) eqn:RES; monadInv TR.
+  + (* integer *)
+    generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+    auto.
+    intros; Simpl.
+  + (* FP *)
+    generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+    auto.
+    intros; Simpl.
+Qed.
+
+(** Translation of addressing modes, loads, stores *)
+
+Lemma transl_addressing_correct:
+  forall sz addr args (insn: Asm.addressing -> instruction) k (rs: regset) m c b o,
+  transl_addressing sz addr args insn k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) ->
+  exists ad rs',
+     exec_straight_opt ge fn c rs m (insn ad :: k) rs' m
+  /\ Asm.eval_addressing ge ad rs' = Vptr b o
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros until o; intros TR EV.
+  unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
+- (* Aindexed *)
+  destruct (offset_representable sz ofs); inv EQ0.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
++ exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C).
+  econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto.
+  eauto with asmgen.
+- (* Aindexed2 *)
+  econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
+- (* Aindexed2shift *)
+  destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2.
++ apply Int.same_if_eq in E. rewrite E.
+  econstructor; econstructor; split. apply exec_straight_opt_refl.
+  split; auto. simpl.
+  rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate.
+  unfold Val.shll. rewrite Int64.shl'_zero. auto.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto. 
++ econstructor; econstructor; split.
+  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
+  split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
+  intros; Simpl.
+- (* Aindexed2ext *)
+  destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  split; auto. destruct x; auto.
++ exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
+  instantiate (1 := x0). eauto with asmgen.
+  intros (rs' & A & B & C).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A. 
+  split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
+  unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
+  simpl; rewrite Int64.add_zero; auto.
+  intros. apply C; eauto with asmgen.
+- (* Aglobal *)
+  destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
++ econstructor; econstructor; split.
+  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
+  split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
+  intros; Simpl.
++ exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A.
+  split. simpl. 
+  rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. 
+  simpl in EV. congruence. 
+  auto with asmgen.
+- (* Ainstrack *)
+  assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
+  { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. 
+    rewrite Ptrofs.of_int64_to_int64 by auto. auto. }   
+  destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
++ econstructor; econstructor; split. apply exec_straight_opt_refl.
+  auto.
++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C).
+  econstructor; exists rs'; split.
+  apply exec_straight_opt_intro. eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto.
+  auto with asmgen.
+Qed.
+
+Lemma transl_load_correct:
+  forall chunk addr args dst k c (rs: regset) m vaddr v,
+  transl_load chunk addr args dst k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.loadv chunk m vaddr = Some v ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+Proof.
+  intros. destruct vaddr; try discriminate. 
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m)).
+  {
+    destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+  assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m =
+             Next (nextinstr (rs'#(preg_of dst) <- v)) m).
+  { unfold exec_load. rewrite Q, H1. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, X; eauto. Simpl. 
+  split. Simpl. intros; Simpl.
+Qed.
+
+Lemma transl_store_correct:
+  forall chunk addr args src k c (rs: regset) m vaddr m',
+  transl_store chunk addr args src k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m'
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros. destruct vaddr; try discriminate. 
+  set (chunk' := match chunk with Mint8signed => Mint8unsigned
+                                | Mint16signed => Mint16unsigned
+                                | _ => chunk end).
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_store ge chunk' ad rs'#(preg_of src) rs' m)).
+  {
+    unfold chunk'; destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+  assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
+  { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
+    apply Mem.store_signed_unsigned_8.
+    apply Mem.store_signed_unsigned_16. }
+  assert (Y: exec_store ge chunk' ad rs'#(preg_of src) rs' m =
+             Next (nextinstr rs') m').
+  { unfold exec_store. rewrite Q, R, X by auto with asmgen. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, Y; eauto. Simpl. 
+  intros; Simpl.
+Qed.
+
+(** Translation of indexed memory accesses *)
+
+Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i,
+  preg_of_iregsp base <> IR X16 ->
+  Val.offset_ptr rs#base ofs = Vptr b i ->
+  exists ad rs',
+     exec_straight_opt ge fn (indexed_memory_access insn sz base ofs k) rs m (insn ad :: k) rs' m
+  /\ Asm.eval_addressing ge ad rs' = Vptr b i
+  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+  unfold indexed_memory_access; intros.
+  assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i).
+  { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
+  destruct offset_representable.
+- econstructor; econstructor; split. apply exec_straight_opt_refl. auto. 
+- exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C).
+  econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
+  split. simpl. rewrite B, C by eauto with asmgen. auto. auto.
+Qed.
+
+Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
+  Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m
+  /\ rs'#dst = v
+  /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto.
+  split. Simpl. intros; Simpl.
+Qed.
+
+Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
+  Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  src <> X16 ->
+  exists rs',
+     exec_straight ge fn (storeptr src base ofs k) rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto.
+  intros; Simpl.
+Qed.
+
+Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
+  loadind base ofs ty dst k = OK c ->
+  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz insn,
+                c = indexed_memory_access insn sz base ofs k
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_load ge (chunk_of_type ty) (fun v => v) ad (preg_of dst) rs' m)).
+  {
+    unfold loadind in H; destruct ty; destruct (preg_of dst); inv H; do 2 econstructor; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl.
+  split. Simpl. intros; Simpl.
+Qed.
+
+Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
+  storeind src base ofs ty k = OK c ->
+  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m'
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz insn,
+                c = indexed_memory_access insn sz base ofs k
+             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
+                              exec_store ge (chunk_of_type ty) ad rs'#(preg_of src) rs' m)).
+  {
+    unfold storeind in H; destruct ty; destruct (preg_of src); inv H; do 2 econstructor; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM.
+  unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto.
+  Simpl.
+  intros; Simpl.
+Qed.
+
+Lemma make_epilogue_correct:
+  forall ge0 f m stk soff cs m' ms rs k tm,
+  load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+  load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+  Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+  agree ms (Vptr stk soff) rs ->
+  Mem.extends m tm ->
+  match_stack ge0 cs ->
+  exists rs', exists tm',
+     exec_straight ge fn (make_epilogue f k) rs tm k rs' tm'
+  /\ agree ms (parent_sp cs) rs'
+  /\ Mem.extends m' tm'
+  /\ rs'#RA = parent_ra cs
+  /\ rs'#SP = parent_sp cs
+  /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r).
+Proof.
+  intros until tm; intros LP LRA FREE AG MEXT MCS.
+  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+  exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+  exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+  unfold make_epilogue. 
+  exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
+    instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence.
+  intros (rs1 & A1 & B1 & C1).
+  econstructor; econstructor; split.
+  eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. 
+    simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
+    rewrite FREE'. eauto. auto. 
+  split. apply agree_nextinstr. apply agree_set_other; auto.
+  apply agree_change_sp with (Vptr stk soff).
+  apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
+  eapply parent_sp_def; eauto.
+  split. auto.
+  split. Simpl. 
+  split. Simpl. 
+  intros. Simpl.
+Qed.
+
+End CONSTRUCTORS.
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TO_MERGE/TargetPrinter.ml
index 9ec1d563..bc4279a0 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TO_MERGE/TargetPrinter.ml
@@ -21,19 +21,147 @@ open AisAnnot
 open PrintAsmaux
 open Fileinfo
 
+<<<<<<< HEAD
 (* Module containing the printing functions *)
 
 module Target (*: TARGET*) =
-  struct
-
-(* Basic printing functions *)
+=======
+(* Recognition of FP numbers that are supported by the fmov #imm instructions:
+   "a normalized binary floating point encoding with 1 sign bit,
+    4 bits of fraction and a 3-bit exponent"
+*)
+
+let is_immediate_float64 bits =
+  let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
+  let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
+  exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
+
+let is_immediate_float32 bits =
+  let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in
+  let mant = Int32.logand bits 0x7F_FFFFl in
+  exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant
+
+(* Naming and printing registers *)
+
+let intsz oc (sz, n) =
+  match sz with X -> coqint64 oc n | W -> coqint oc n
+
+let xreg_name = function
+  | X0 -> "x0"   | X1 -> "x1"   | X2 -> "x2"   | X3 -> "x3"
+  | X4 -> "x4"   | X5 -> "x5"   | X6 -> "x6"   | X7 -> "x7"
+  | X8 -> "x8"   | X9 -> "x9"   | X10 -> "x10" | X11 -> "x11"
+  | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15"
+  | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19"
+  | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23"
+  | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27"
+  | X28 -> "x28" | X29 -> "x29" | X30 -> "x30"
+
+let wreg_name = function
+  | X0 -> "w0"   | X1 -> "w1"   | X2 -> "w2"   | X3 -> "w3"
+  | X4 -> "w4"   | X5 -> "w5"   | X6 -> "w6"   | X7 -> "w7"
+  | X8 -> "w8"   | X9 -> "w9"   | X10 -> "w10" | X11 -> "w11"
+  | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15"
+  | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19"
+  | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23"
+  | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27"
+  | X28 -> "w28" | X29 -> "w29" | X30 -> "w30"
+
+let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr"
+let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr"
+
+let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp"
+let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp"
+
+let dreg_name = function
+| D0 -> "d0"   | D1 -> "d1"   | D2 -> "d2"   | D3 -> "d3"
+| D4 -> "d4"   | D5 -> "d5"   | D6 -> "d6"   | D7 -> "d7"
+| D8 -> "d8"   | D9 -> "d9"   | D10 -> "d10" | D11 -> "d11"
+| D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15"
+| D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19"
+| D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23"
+| D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27"
+| D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31"
+
+let sreg_name = function
+| D0 -> "s0"   | D1 -> "s1"   | D2 -> "s2"   | D3 -> "s3"
+| D4 -> "s4"   | D5 -> "s5"   | D6 -> "s6"   | D7 -> "s7"
+| D8 -> "s8"   | D9 -> "s9"   | D10 -> "s10" | D11 -> "s11"
+| D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15"
+| D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19"
+| D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23"
+| D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27"
+| D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31"
+
+let xreg oc r = output_string oc (xreg_name r)
+let wreg oc r = output_string oc (wreg_name r)
+let ireg oc (sz, r) =
+  output_string oc (match sz with X -> xreg_name r | W -> wreg_name r)
+
+let xreg0 oc r = output_string oc (xreg0_name r)
+let wreg0 oc r = output_string oc (wreg0_name r)
+let ireg0 oc (sz, r) =
+  output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r)
+
+let xregsp oc r = output_string oc (xregsp_name r)
+let iregsp oc (sz, r) =
+  output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r)
+
+let dreg oc r = output_string oc (dreg_name r)
+let sreg oc r = output_string oc (sreg_name r)
+let freg oc (sz, r) =
+  output_string oc (match sz with D -> dreg_name r | S -> sreg_name r)
+
+let preg_asm oc ty = function
+  | IR r -> if ty = Tint then wreg oc r else xreg oc r
+  | FR r -> if ty = Tsingle then sreg oc r else dreg oc r
+  | _    -> assert false
+
+let preg_annot = function
+  | IR r -> xreg_name r
+  | FR r -> dreg_name r
+  | _ -> assert false
+
+(* Base-2 log of a Caml integer *)
+let rec log2 n =
+  assert (n > 0);
+  if n = 1 then 0 else 1 + log2 (n lsr 1)
+
+(* System dependent printer functions *)
+
+module type SYSTEM =
+  sig
+    val comment: string
+    val raw_symbol: out_channel -> string -> unit
+    val symbol: out_channel -> P.t -> unit
+    val symbol_offset_high: out_channel -> P.t * Z.t -> unit
+    val symbol_offset_low: out_channel -> P.t * Z.t -> unit
+    val label: out_channel -> int -> unit
+    val label_high: out_channel -> int -> unit
+    val label_low: out_channel -> int -> unit
+    val load_symbol_address: out_channel -> ireg -> P.t -> unit
+    val name_of_section: section_name -> string
+    val print_fun_info:  out_channel -> P.t -> unit
+    val print_var_info: out_channel -> P.t -> unit
+    val print_comm_decl: out_channel -> P.t -> Z.t -> int -> unit
+    val print_lcomm_decl: out_channel -> P.t -> Z.t -> int -> unit
+  end
 
+module ELF_System : SYSTEM =
+>>>>>>> master
+  struct
     let comment = "//"
-
-    let symbol        = elf_symbol
-    let symbol_offset = elf_symbol_offset
-    let label         = elf_label
-
+    let raw_symbol = output_string
+    let symbol = elf_symbol
+    let symbol_offset_high = elf_symbol_offset
+    let symbol_offset_low oc id_ofs =
+      fprintf oc "#:lo12:%a" elf_symbol_offset id_ofs
+
+    let label = elf_label
+    let label_high = elf_label
+    let label_low oc lbl =
+      fprintf oc "#:lo12:%a" elf_label lbl
+
+<<<<<<< HEAD
     let print_label oc lbl = label oc (transl_label lbl)
 
     let intsz oc (sz, n) =
@@ -122,8 +250,18 @@ module Target (*: TARGET*) =
          failwith "_Thread_local unsupported on this platform"
       | Section_data(i, false) | Section_small_data i ->
           if i then ".data" else common_section ()
+=======
+    let load_symbol_address oc rd id =
+      fprintf oc "	adrp	%a, :got:%a\n" xreg rd symbol id;
+      fprintf oc "	ldr	%a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id
+
+    let name_of_section = function
+      | Section_text         -> ".text"
+      | Section_data i | Section_small_data i ->
+          variable_section ~sec:".data" ~bss:".bss" i
+>>>>>>> master
       | Section_const i | Section_small_const i ->
-          if i || (not !Clflags.option_fcommon) then ".section	.rodata" else "COMM"
+          variable_section ~sec:".section	.rodata" i
       | Section_string       -> ".section	.rodata"
       | Section_literal      -> ".section	.rodata"
       | Section_jumptable    -> ".section	.rodata"
@@ -138,6 +276,94 @@ module Target (*: TARGET*) =
             s (if wr then "w" else "") (if ex then "x" else "")
       | Section_ais_annotation -> sprintf ".section	\"__compcert_ais_annotations\",\"\",@note"
 
+    let print_fun_info = elf_print_fun_info
+    let print_var_info = elf_print_var_info
+
+    let print_comm_decl oc name sz al =
+      fprintf oc "	.comm	%a, %s, %d\n" symbol name (Z.to_string sz) al
+
+    let print_lcomm_decl oc name sz al =
+      fprintf oc "	.local	%a\n" symbol name;
+      print_comm_decl oc name sz al
+    
+  end
+
+module MacOS_System : SYSTEM =
+  struct
+    let comment = ";"
+
+    let raw_symbol oc s =
+      fprintf oc "_%s" s
+
+    let symbol oc symb =
+      raw_symbol oc (extern_atom symb)
+
+    let symbol_offset_gen kind oc (id, ofs) =
+      fprintf oc "%a@%s" symbol id kind;
+      let ofs = camlint64_of_ptrofs ofs in
+      if ofs <> 0L then fprintf oc " + %Ld" ofs
+
+    let symbol_offset_high = symbol_offset_gen "PAGE"
+    let symbol_offset_low = symbol_offset_gen "PAGEOFF"
+
+    let label oc lbl =
+      fprintf oc "L%d" lbl
+
+    let label_high oc lbl =
+      fprintf oc "%a@PAGE" label lbl
+    let label_low oc lbl =
+      fprintf oc "%a@PAGEOFF" label lbl
+
+    let load_symbol_address oc rd id =
+      fprintf oc "	adrp	%a, %a@GOTPAGE\n" xreg rd symbol id;
+      fprintf oc "	ldr	%a, [%a, %a@GOTPAGEOFF]\n" xreg rd xreg rd symbol id
+
+    let name_of_section = function
+      | Section_text -> ".text"
+      | Section_data i | Section_small_data i ->
+          variable_section ~sec:".data" i
+      | Section_const i  | Section_small_const i ->
+          variable_section ~sec:".const" ~reloc:".const_data" i
+      | Section_string -> ".const"
+      | Section_literal -> ".const"
+      | Section_jumptable -> ".text"
+      | Section_user(s, wr, ex) ->
+          sprintf ".section	\"%s\", %s, %s"
+            (if wr then "__DATA" else "__TEXT") s
+            (if ex then "regular, pure_instructions" else "regular")
+      | Section_debug_info _ ->	".section	__DWARF,__debug_info,regular,debug"
+      | Section_debug_loc  -> ".section	__DWARF,__debug_loc,regular,debug"
+      | Section_debug_line _ -> ".section	__DWARF,__debug_line,regular,debug"
+      | Section_debug_str -> ".section	__DWARF,__debug_str,regular,debug"
+      | Section_debug_ranges -> ".section	__DWARF,__debug_ranges,regular,debug"
+      | Section_debug_abbrev -> ".section	__DWARF,__debug_abbrev,regular,debug"
+      | Section_ais_annotation -> assert false (* Not supported under MacOS *)
+
+    let print_fun_info _ _ = ()
+    let print_var_info _ _ = ()
+
+    let print_comm_decl oc name sz al =
+      fprintf oc "	.comm	%a, %s, %d\n"
+                 symbol name (Z.to_string sz) (log2 al)
+
+    let print_lcomm_decl oc name sz al =
+      fprintf oc "	.lcomm	%a, %s, %d\n"
+                 symbol name (Z.to_string sz) (log2 al)
+
+  end
+
+(* Module containing the printing functions *)
+
+module Target(System: SYSTEM): TARGET =
+  struct
+    include System
+
+(* Basic printing functions *)
+
+    let print_label oc lbl = label oc (transl_label lbl)
+
+(* Names of sections *)
+
     let section oc sec =
       fprintf oc "	%s\n" (name_of_section sec)
 
@@ -193,7 +419,7 @@ module Target (*: TARGET*) =
     | ADlsl(base, r, n) -> fprintf oc "[%a, %a, lsl #%a]" xregsp base xreg r coqint n
     | ADsxt(base, r, n) -> fprintf oc "[%a, %a, sxtw #%a]" xregsp base wreg r coqint n
     | ADuxt(base, r, n) -> fprintf oc "[%a, %a, uxtw #%a]" xregsp base wreg r coqint n
-    | ADadr(base, id, ofs) -> fprintf oc "[%a, #:lo12:%a]" xregsp base symbol_offset (id, ofs)
+    | ADadr(base, id, ofs) -> fprintf oc "[%a, %a]" xregsp base symbol_offset_low (id, ofs)
     | ADpostincr(base, n) -> fprintf oc "[%a], #%a" xregsp base coqint64 n
 
 (* Print a shifted operand *)
@@ -204,15 +430,15 @@ module Target (*: TARGET*) =
     | SOasr n -> fprintf oc ", asr #%a" coqint n
     | SOror n -> fprintf oc ", ror #%a" coqint n
 
-(* Print a sign- or zero-extended operand *)
-    let extendop oc = function
-    | EOsxtb n -> fprintf oc ", sxtb #%a" coqint n
-    | EOsxth n -> fprintf oc ", sxth #%a" coqint n
-    | EOsxtw n -> fprintf oc ", sxtw #%a" coqint n
-    | EOuxtb n -> fprintf oc ", uxtb #%a" coqint n
-    | EOuxth n -> fprintf oc ", uxth #%a" coqint n
-    | EOuxtw n -> fprintf oc ", uxtw #%a" coqint n
-    | EOuxtx n -> fprintf oc ", uxtx #%a" coqint n
+(* Print a sign- or zero-extended register operand *)
+    let regextend oc = function
+    | (r, EOsxtb n) -> fprintf oc "%a, sxtb #%a" wreg r coqint n
+    | (r, EOsxth n) -> fprintf oc "%a, sxth #%a" wreg r coqint n
+    | (r, EOsxtw n) -> fprintf oc "%a, sxtw #%a" wreg r coqint n
+    | (r, EOuxtb n) -> fprintf oc "%a, uxtb #%a" wreg r coqint n
+    | (r, EOuxth n) -> fprintf oc "%a, uxth #%a" wreg r coqint n
+    | (r, EOuxtw n) -> fprintf oc "%a, uxtw #%a" wreg r coqint n
+    | (r, EOuxtx n) -> fprintf oc "%a, uxtx #%a" xreg r coqint n
 
     let next_profiling_label =
       let atomic_incr_counter = ref 0 in
@@ -325,9 +551,9 @@ module Target (*: TARGET*) =
         fprintf oc "	movk	%a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos)
     (* PC-relative addressing *)
     | Padrp(rd, id, ofs) ->
-        fprintf oc "	adrp	%a, %a\n" xreg rd symbol_offset (id, ofs)
+        fprintf oc "	adrp	%a, %a\n" xreg rd symbol_offset_high (id, ofs)
     | Paddadr(rd, r1, id, ofs) ->
-        fprintf oc "	add	%a, %a, #:lo12:%a\n" xreg rd xreg r1 symbol_offset (id, ofs)
+        fprintf oc "	add	%a, %a, %a\n" xreg rd xreg r1 symbol_offset_low (id, ofs)
     (* Bit-field operations *)
     | Psbfiz(sz, rd, r1, r, s) ->
         fprintf oc "	sbfiz	%a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s)
@@ -348,13 +574,13 @@ module Target (*: TARGET*) =
         fprintf oc "	cmn	%a, %a%a\n" ireg0 (sz, r1) ireg (sz, r2) shiftop s
     (* Integer arithmetic, extending register *)
     | Paddext(rd, r1, r2, x) ->
-        fprintf oc "	add	%a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x
+        fprintf oc "	add	%a, %a, %a\n" xregsp rd xregsp r1 regextend (r2, x)
     | Psubext(rd, r1, r2, x) ->
-        fprintf oc "	sub	%a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x
+        fprintf oc "	sub	%a, %a, %a\n" xregsp rd xregsp r1 regextend (r2, x)
     | Pcmpext(r1, r2, x) ->
-        fprintf oc "	cmp	%a, %a%a\n" xreg r1 wreg r2 extendop x
+        fprintf oc "	cmp	%a, %a\n" xreg r1 regextend (r2, x)
     | Pcmnext(r1, r2, x) ->
-        fprintf oc "	cmn	%a, %a%a\n" xreg r1 wreg r2 extendop x
+        fprintf oc "	cmn	%a, %a\n" xreg r1 regextend (r2, x)
     (* Logical, shifted register *)
     | Pand(sz, rd, r1, r2, s) ->
         fprintf oc "	and	%a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s
@@ -434,8 +660,8 @@ module Target (*: TARGET*) =
           fprintf oc "	fmov	%a, #%.7f\n" dreg rd (Int64.float_of_bits d)
         else begin
           let lbl = label_literal64 d in
-          fprintf oc "	adrp	x16, %a\n" label lbl;
-          fprintf oc "	ldr	%a, [x16, #:lo12:%a] %s %.18g\n" dreg rd label lbl comment (Int64.float_of_bits d)
+          fprintf oc "	adrp	x16, %a\n" label_high lbl;
+          fprintf oc "	ldr	%a, [x16, %a] %s %.18g\n" dreg rd label_low lbl comment (Int64.float_of_bits d)
         end
     | Pfmovimms(rd, f) ->
         let d = camlint_of_coqint (Floats.Float32.to_bits f) in
@@ -443,8 +669,8 @@ module Target (*: TARGET*) =
           fprintf oc "	fmov	%a, #%.7f\n" sreg rd (Int32.float_of_bits d)
         else begin
           let lbl = label_literal32 d in
-          fprintf oc "	adrp	x16, %a\n" label lbl;
-          fprintf oc "	ldr	%a, [x16, #:lo12:%a] %s %.18g\n" sreg rd label lbl comment (Int32.float_of_bits d)
+          fprintf oc "	adrp	x16, %a\n" label_high lbl;
+          fprintf oc "	ldr	%a, [x16, %a] %s %.18g\n" sreg rd label_low lbl comment (Int32.float_of_bits d)
         end
     | Pfmovi(D, rd, r1) ->
         fprintf oc "	fmov	%a, %a\n" dreg rd xreg0 r1
@@ -511,8 +737,7 @@ module Target (*: TARGET*) =
     | Plabel lbl ->
         fprintf oc "%a:\n" print_label lbl
     | Ploadsymbol(rd, id) ->
-        fprintf oc "	adrp	%a, :got:%a\n" xreg rd symbol id;
-        fprintf oc "	ldr	%a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id
+        load_symbol_address oc rd id
     | Pcvtsw2x(rd, r1) ->
         fprintf oc "	sxtw	%a, %a\n" xreg rd wreg r1
     | Pcvtuw2x(rd, r1) ->
@@ -577,19 +802,12 @@ module Target (*: TARGET*) =
           jumptables := []
         end
 
-    let print_fun_info = elf_print_fun_info
-
     let print_optional_fun_info _ = ()
 
-    let print_var_info = elf_print_var_info
-
     let print_comm_symb oc sz name align =
-      if C2C.atom_is_static name then
-        fprintf oc "	.local	%a\n" symbol name;
-        fprintf oc "	.comm	%a, %s, %d\n"
-        symbol name
-        (Z.to_string sz)
-        align
+      if C2C.atom_is_static name
+      then print_lcomm_decl oc name sz align
+      else print_comm_decl oc name sz align
 
     let print_instructions oc fn =
       current_function_sig := fn.fn_sig;
@@ -627,7 +845,7 @@ module Target (*: TARGET*) =
         section oc Section_text;
       end
 
-    let default_falignment = 2
+    let default_falignment = 4
 
     let cfi_startproc oc = ()
     let cfi_endproc oc = ()
@@ -635,4 +853,10 @@ module Target (*: TARGET*) =
   end
 
 let sel_target () =
-  (module Target:TARGET)
+  let module S =
+    (val (match Configuration.system with
+          | "linux" -> (module ELF_System : SYSTEM)
+          | "macos" -> (module MacOS_System : SYSTEM)
+          | _ -> invalid_arg ("System " ^ Configuration.system ^ " not supported"))
+     : SYSTEM) in
+  (module Target(S) : TARGET)
diff --git a/aarch64/extractionMachdep.v b/aarch64/TO_MERGE/extractionMachdep.v
index 69edeb55..947fa38b 100644
--- a/aarch64/extractionMachdep.v
+++ b/aarch64/TO_MERGE/extractionMachdep.v
@@ -15,13 +15,31 @@
 
 (* Additional extraction directives specific to the AArch64 port *)
 
-Require Archi Asm.
+Require Archi Asm Asmgen SelectOp.
 
 (* Archi *)
 
-Extract Constant Archi.pic_code => "fun () -> false".  (* for the time being *)
+Extract Constant Archi.abi =>
+  "match Configuration.abi with
+    | ""apple"" -> Apple
+    | _ -> AAPCS64".
+  
+(* SelectOp *)
+
+Extract Constant SelectOp.symbol_is_relocatable =>
+  "match Configuration.system with
+    | ""macos"" -> C2C.atom_is_extern
+    | _ -> (fun _ -> false)".
 
 (* Asm *)
+
 Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false".
 Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false".
+<<<<<<< HEAD
 Extract Constant Asmblockgen.symbol_is_aligned => "C2C.atom_is_aligned".
+=======
+
+(* Asmgen *)
+
+Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned".
+>>>>>>> master
author	Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>	2021-03-23 19:12:19 +0100
committer	Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>	2021-03-23 19:12:19 +0100
commit	dcb523736e82d72b03fa8d055bf74472dba7345c (patch)
tree	71e797c92d45dca509527043d233c51b2ed8fc86 /aarch64
parent	3e953ef41f736ed5b7db699b1adf21d46cb5b8db (diff)
parent	6bf310dd678285dc193798e89fc2c441d8430892 (diff)
download	compcert-kvx-dcb523736e82d72b03fa8d055bf74472dba7345c.tar.gz compcert-kvx-dcb523736e82d72b03fa8d055bf74472dba7345c.zip