diff options
author | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2021-06-10 18:28:26 +0200 |
---|---|---|
committer | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2021-06-10 18:28:26 +0200 |
commit | f16fa31ec9cc90da750c8cc10f447023962cd153 (patch) | |
tree | 28eed4d4b5bc964907f20332d1eed470a393d07b /aarch64 | |
parent | 485a4c0dd450e65745c83e59acdb40b42058e556 (diff) | |
parent | d703ae1ad5e1fcdc63e07b2a50a3e8576a11e61e (diff) | |
download | compcert-kvx-f16fa31ec9cc90da750c8cc10f447023962cd153.tar.gz compcert-kvx-f16fa31ec9cc90da750c8cc10f447023962cd153.zip |
Merge branch 'kvx-work' into BTL
Diffstat (limited to 'aarch64')
-rw-r--r-- | aarch64/Archi.v | 17 | ||||
-rw-r--r-- | aarch64/Asm.v | 2 | ||||
-rw-r--r-- | aarch64/Asmblock.v | 3 | ||||
-rw-r--r-- | aarch64/Asmblockgenproof.v | 15 | ||||
-rw-r--r-- | aarch64/Asmblockgenproof0.v | 43 | ||||
-rw-r--r-- | aarch64/Asmexpand.ml | 52 | ||||
-rw-r--r-- | aarch64/Asmgen.v | 1 | ||||
-rw-r--r-- | aarch64/Asmgenproof1.v | 1836 | ||||
-rw-r--r-- | aarch64/Builtins1.v | 9 | ||||
-rw-r--r-- | aarch64/CBuiltins.ml | 35 | ||||
-rw-r--r-- | aarch64/ConstpropOp.vp | 4 | ||||
-rw-r--r-- | aarch64/ConstpropOpproof.v | 2 | ||||
-rw-r--r-- | aarch64/Conventions1.v | 246 | ||||
-rw-r--r-- | aarch64/Op.v | 8 | ||||
-rw-r--r-- | aarch64/PeepholeOracle.ml | 11 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingproof.v | 3 | ||||
-rw-r--r-- | aarch64/SelectLongproof.v | 24 | ||||
-rw-r--r-- | aarch64/SelectOp.vp | 10 | ||||
-rw-r--r-- | aarch64/SelectOpproof.v | 66 | ||||
-rw-r--r-- | aarch64/Stacklayout.v | 44 | ||||
-rw-r--r-- | aarch64/TargetPrinter.ml | 368 | ||||
-rw-r--r-- | aarch64/extractionMachdep.v | 25 |
22 files changed, 2473 insertions, 351 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v index 7f39d1fa..378ca0d1 100644 --- a/aarch64/Archi.v +++ b/aarch64/Archi.v @@ -6,10 +6,11 @@ (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 2 of the License, or *) -(* (at your option) any later version. This file is also distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* under the terms of the GNU Lesser General Public License as *) +(* published by the Free Software Foundation, either version 2.1 of *) +(* the License, or (at your option) any later version. *) +(* This file is also distributed under the terms of the *) +(* INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) @@ -85,8 +86,14 @@ Global Opaque ptr64 big_endian splitlong fma_order fma_invalid_mul_is_nan float_of_single_preserves_sNaN. -(** Whether to generate position-independent code or not *) +(** Which ABI to implement *) Parameter pic_code: unit -> bool. Definition has_notrap_loads := false. + +Inductive abi_kind: Type := + | AAPCS64 (**r ARM's standard as used in Linux and other ELF platforms *) + | Apple. (**r the variant used in macOS and iOS *) + +Parameter abi: abi_kind. diff --git a/aarch64/Asm.v b/aarch64/Asm.v index 067d32fb..e5111220 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -1398,7 +1398,7 @@ Ltac Equalities := split. auto. intros. destruct B; auto. subst. auto. - (* trace length *) red; intros. inv H; simpl. - omega. + lia. eapply external_call_trace_length; eauto. eapply external_call_trace_length; eauto. - (* initial states *) diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v index c606002a..073f1f4c 100644 --- a/aarch64/Asmblock.v +++ b/aarch64/Asmblock.v @@ -37,6 +37,7 @@ Require Import Values Memory Events Globalenvs Smallstep. Require Import Locations Conventions. Require Stacklayout. Require Import OptionMonad Asm. +Require Import Lia. Require Export Asm. Local Open Scope option_monad_scope. @@ -437,7 +438,7 @@ Qed. Lemma size_positive (b:bblock): size b > 0. Proof. unfold size. destruct b as [hd bdy ex cor]. cbn. - destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; omega); + destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; lia); unfold non_empty_bblockb in cor; simpl in cor. inversion cor. Qed. diff --git a/aarch64/Asmblockgenproof.v b/aarch64/Asmblockgenproof.v index 6f7d39fa..11219928 100644 --- a/aarch64/Asmblockgenproof.v +++ b/aarch64/Asmblockgenproof.v @@ -19,6 +19,7 @@ Require Import Integers Floats AST Linking. Require Import Values Memory Events Globalenvs Smallstep. Require Import Op Locations Machblock Conventions Asmblock IterList. Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops. +Require Import Lia. Module MB := Machblock. Module AB := Asmblock. @@ -71,7 +72,7 @@ Lemma transf_function_no_overflow: transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned. Proof. intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. - omega. + lia. Qed. Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs), @@ -298,8 +299,8 @@ Proof. split. unfold goto_label. rewrite P. rewrite H1. auto. split. rewrite Pregmap.gss. constructor; auto. rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q. - auto. omega. - generalize (transf_function_no_overflow _ _ H0). omega. + auto. lia. + generalize (transf_function_no_overflow _ _ H0). lia. intros. apply Pregmap.gso; auto. Qed. @@ -389,7 +390,7 @@ Lemma mbsize_eqz: Proof. intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H. remember (length _) as a. remember (length_opt _) as b. - assert (a = 0%nat) by omega. assert (b = 0%nat) by omega. subst. clear H. + assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H. inv H0. inv H1. destruct bdy; destruct ex; auto. all: try discriminate. Qed. @@ -1452,11 +1453,11 @@ Proof. rewrite Pregmap.gso; auto. rewrite V; auto. } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3'). exploit exec_straight_steps_2; eauto using functions_transl. - simpl fn_blocks. simpl fn_blocks in g. omega. constructor. + simpl fn_blocks. simpl fn_blocks in g. lia. constructor. intros (ofs' & X & Y). left; exists (State rs3' m3'); split. eapply exec_straight_steps_1; eauto. - simpl fn_blocks. simpl fn_blocks in g. omega. + simpl fn_blocks. simpl fn_blocks in g. lia. constructor. econstructor; eauto. rewrite X; econstructor; eauto. @@ -1495,7 +1496,7 @@ Local Transparent destroyed_at_function_entry. - (* return *) inv MS. inv STACKS. simpl in *. - right. split. omega. split. auto. + right. split. lia. split. auto. rewrite <- ATPC in H5. econstructor; eauto. congruence. Qed. diff --git a/aarch64/Asmblockgenproof0.v b/aarch64/Asmblockgenproof0.v index 03d863a3..004cfd5c 100644 --- a/aarch64/Asmblockgenproof0.v +++ b/aarch64/Asmblockgenproof0.v @@ -38,6 +38,7 @@ Require Import Asmblockgen. Require Import Conventions1. Require Import Axioms. Require Import Asmblockprops. +Require Import Lia. Module MB:=Machblock. Module AB:=Asmblock. @@ -395,7 +396,7 @@ Inductive code_tail: Z -> bblocks -> bblocks -> Prop := Lemma code_tail_pos: forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0. Proof. - induction 1. omega. generalize (size_positive bi); intros; omega. + induction 1. lia. generalize (size_positive bi); intros; lia. Qed. Lemma find_bblock_tail: @@ -405,10 +406,10 @@ Lemma find_bblock_tail: Proof. induction c1; simpl; intros. inversion H. - destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; omega. + destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; lia. destruct (zeq pos 0). subst pos. - inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; omega. - inv H. congruence. replace (pos0 + size a - size a) with pos0 by omega. + inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; lia. + inv H. congruence. replace (pos0 + size a - size a) with pos0 by lia. eauto. Qed. @@ -422,13 +423,13 @@ Proof. induction 1; intros. - subst; eauto. - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto. - omega. + lia. Qed. Lemma size_blocks_pos c: 0 <= size_blocks c. Proof. - induction c as [| a l ]; simpl; try omega. - generalize (size_positive a); omega. + induction c as [| a l ]; simpl; try lia. + generalize (size_positive a); lia. Qed. Remark code_tail_positive: @@ -436,15 +437,15 @@ Remark code_tail_positive: code_tail ofs fn c -> 0 <= ofs. Proof. induction 1; intros; simpl. - - omega. - - generalize (size_positive bi). omega. + - lia. + - generalize (size_positive bi). lia. Qed. Remark code_tail_size: forall fn ofs c, code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c. Proof. - induction 1; intros; simpl; try omega. + induction 1; intros; simpl; try lia. Qed. Remark code_tail_bounds fn ofs c: @@ -453,7 +454,7 @@ Proof. intro H; exploit code_tail_size; eauto. generalize (code_tail_positive _ _ _ H), (size_blocks_pos c). - omega. + lia. Qed. Local Hint Resolve code_tail_next: core. @@ -470,8 +471,8 @@ Proof. intros. rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr. - rewrite Ptrofs.unsigned_repr; eauto. - omega. - - rewrite Ptrofs.unsigned_repr; omega. + lia. + - rewrite Ptrofs.unsigned_repr; lia. Qed. (** The [find_label] function returns the code tail starting at the @@ -505,12 +506,12 @@ Proof. simpl; intros until c'. case (is_label lbl a). - intros. inv H. exists pos. split; auto. split. - replace (pos - pos) with 0 by omega. constructor. constructor; try omega. - generalize (size_blocks_pos c). generalize (size_positive a). omega. + replace (pos - pos) with 0 by lia. constructor. constructor; try lia. + generalize (size_blocks_pos c). generalize (size_positive a). lia. - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]]. exists pos'. split. auto. split. - replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by omega. - constructor. auto. generalize (size_positive a). omega. + replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by lia. + constructor. auto. generalize (size_positive a). lia. Qed. (** Predictor for return addresses in generated Asm code. @@ -589,7 +590,7 @@ Proof. exists (Ptrofs.repr ofs). red; intros. rewrite Ptrofs.unsigned_repr. congruence. exploit code_tail_bounds; eauto. - intros; apply transf_function_len in TF. omega. + intros; apply transf_function_len in TF. lia. + exists Ptrofs.zero; red; intros. congruence. Qed. @@ -613,7 +614,7 @@ Inductive transl_code_at_pc (ge: MB.genv): Remark code_tail_no_bigger: forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat. Proof. - induction 1; simpl; omega. + induction 1; simpl; lia. Qed. Remark code_tail_unique: @@ -621,8 +622,8 @@ Remark code_tail_unique: code_tail pos fn c -> code_tail pos' fn c -> pos = pos'. Proof. induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto. - generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega. - generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. f_equal. eauto. Qed. diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index 8187e077..828c96d6 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -47,17 +47,28 @@ let expand_storeptr (src: ireg) (base: iregsp) ofs = (* Determine the number of int registers, FP registers, and stack locations used to pass the fixed parameters. *) +let align n a = (n + a - 1) land (-a) + +let typesize = function + | Tint | Tany32 | Tsingle -> 4 + | Tlong | Tany64 | Tfloat -> 8 + +let reserve_stack stk ty = + match Archi.abi with + | Archi.AAPCS64 -> stk + 8 + | Archi.Apple -> align stk (typesize ty) + typesize ty + let rec next_arg_locations ir fr stk = function | [] -> (ir, fr, stk) - | (Tint | Tlong | Tany32 | Tany64) :: l -> + | (Tint | Tlong | Tany32 | Tany64 as ty) :: l -> if ir < 8 then next_arg_locations (ir + 1) fr stk l - else next_arg_locations ir fr (stk + 8) l - | (Tfloat | Tsingle) :: l -> + else next_arg_locations ir fr (reserve_stack stk ty) l + | (Tfloat | Tsingle as ty) :: l -> if fr < 8 then next_arg_locations ir (fr + 1) stk l - else next_arg_locations ir fr (stk + 8) l + else next_arg_locations ir fr (reserve_stack stk ty) l (* Allocate memory on the stack and use it to save the registers used for parameter passing. As an optimization, do not save @@ -86,6 +97,8 @@ let save_parameter_registers ir fr = emit (Pstrd(float_param_regs.(i), ADimm(XSP, Z.of_uint pos))) done +let current_function_stacksize = ref 0L + (* Initialize a va_list as per va_start. Register r points to the following struct: @@ -98,11 +111,7 @@ let save_parameter_registers ir fr = } *) -let current_function_stacksize = ref 0L - -let expand_builtin_va_start r = - if not (is_current_function_variadic ()) then - invalid_arg "Fatal error: va_start used in non-vararg function"; +let expand_builtin_va_start_aapcs64 r = let (ir, fr, stk) = next_arg_locations 0 0 0 (get_current_function_args ()) in let stack_ofs = Int64.(add !current_function_stacksize (of_int stk)) @@ -127,6 +136,25 @@ let expand_builtin_va_start r = expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int vr_offs)); emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 28L))) +(* In macOS, va_list is just a pointer (char * ) and all variadic arguments + are passed on the stack. *) + +let expand_builtin_va_start_apple r = + let (ir, fr, stk) = + next_arg_locations 0 0 0 (get_current_function_args ()) in + let stk = align stk 8 in + let stack_ofs = Int64.(add !current_function_stacksize (of_int stk)) in + (* *va = sp + stack_ofs *) + expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 stack_ofs); + emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 0L))) + +let expand_builtin_va_start r = + if not (is_current_function_variadic ()) then + invalid_arg "Fatal error: va_start used in non-vararg function"; + match Archi.abi with + | Archi.AAPCS64 -> expand_builtin_va_start_aapcs64 r + | Archi.Apple -> expand_builtin_va_start_apple r + (* Handling of annotations *) let expand_annot_val kind txt targ args res = @@ -327,8 +355,12 @@ let expand_builtin_inline name args res = (* Synchronization *) | "__builtin_membar", [], _ -> () + (* No operation *) | "__builtin_nop", [], _ -> emit Pnop + (* Optimization hint *) + | "__builtin_unreachable", [], _ -> + () (* Byte swap *) | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev(W, res, a1)) @@ -382,7 +414,7 @@ let expand_instruction instr = match instr with | Pallocframe (sz, ofs) -> emit (Pmov (RR1 X29, XSP)); - if is_current_function_variadic() then begin + if is_current_function_variadic() && Archi.abi = Archi.AAPCS64 then begin let (ir, fr, _) = next_arg_locations 0 0 0 (get_current_function_args ()) in save_parameter_registers ir fr; diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index 45205158..6bb791c4 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -20,7 +20,6 @@ Require Import Errors AST Integers Floats Op. Require Import Locations Compopts. Require Import Mach Asm Asmblock Asmblockgen Machblockgen PostpassScheduling. - Local Open Scope error_monad_scope. (** Functions called by the Asmexpand ocaml file, inspired and adapted from Asmblockgen.v *) diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v new file mode 100644 index 00000000..93c1f1ed --- /dev/null +++ b/aarch64/Asmgenproof1.v @@ -0,0 +1,1836 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, Collège de France and INRIA Paris *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness proof for AArch64 code generation: auxiliary results. *) + +Require Import Recdef Coqlib Zwf Zbits. +Require Import Maps Errors AST Integers Floats Values Memory Globalenvs. +Require Import Op Locations Mach Asm Conventions. +Require Import Asmgen. +Require Import Asmgenproof0. + +Local Transparent Archi.ptr64. + +(** Properties of registers *) + +Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC. +Proof. + destruct r; simpl; congruence. +Qed. +Global Hint Resolve preg_of_iregsp_not_PC: asmgen. + +Lemma preg_of_not_X16: forall r, preg_of r <> X16. +Proof. + destruct r; simpl; congruence. +Qed. + +Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16. +Proof. + unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H. + red; intros; subst x. elim (preg_of_not_X16 r); auto. +Qed. + +Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16. +Proof. + intros. apply ireg_of_not_X16 in H. congruence. +Qed. + +Global Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16': asmgen. + +(** Useful simplification tactic *) + + +Ltac Simplif := + ((rewrite nextinstr_inv by eauto with asmgen) + || (rewrite nextinstr_inv1 by eauto with asmgen) + || (rewrite Pregmap.gss) + || (rewrite nextinstr_pc) + || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +(** * Correctness of ARM constructor functions *) + +Section CONSTRUCTORS. + +Variable ge: genv. +Variable fn: function. + +(** Decomposition of integer literals *) + +Inductive wf_decomposition: list (Z * Z) -> Prop := + | wf_decomp_nil: + wf_decomposition nil + | wf_decomp_cons: forall m n p l, + n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l -> + wf_decomposition ((n, p) :: l). + +Lemma decompose_int_wf: + forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p). +Proof. +Local Opaque Zzero_ext. + induction N as [ | N]; simpl; intros. +- constructor. +- set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0). ++ apply IHN. lia. ++ econstructor. reflexivity. lia. apply IHN; lia. +Qed. + +Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z := + match l with + | nil => accu + | (n, p) :: l => recompose_int (Zinsert accu n p 16) l + end. + +Lemma decompose_int_correct: + forall N n p accu, + 0 <= p -> + (forall i, p <= i -> Z.testbit accu i = false) -> + (forall i, 0 <= i < p + Z.of_nat N * 16 -> + Z.testbit (recompose_int accu (decompose_int N n p)) i = + if zlt i p then Z.testbit accu i else Z.testbit n i). +Proof. + induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE. +- simpl. rewrite zlt_true; auto. extlia. +- rewrite inj_S in RANGE. simpl. + set (frag := Zzero_ext 16 (Z.shiftr n p)). + assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)). + { unfold frag; intros. rewrite Zzero_ext_spec by lia. rewrite zlt_true by lia. + rewrite Z.shiftr_spec by lia. f_equal; lia. } + destruct (Z.eqb_spec frag 0). ++ rewrite IHN. +* destruct (zlt i p). rewrite zlt_true by lia. auto. + destruct (zlt i (p + 16)); auto. + rewrite ABOVE by lia. rewrite FRAG by lia. rewrite e, Z.testbit_0_l. auto. +* lia. +* intros; apply ABOVE; lia. +* extlia. ++ simpl. rewrite IHN. +* destruct (zlt i (p + 16)). +** rewrite Zinsert_spec by lia. unfold proj_sumbool. + rewrite zlt_true by lia. + destruct (zlt i p). + rewrite zle_false by lia. auto. + rewrite zle_true by lia. simpl. symmetry; apply FRAG; lia. +** rewrite Z.ldiff_spec, Z.shiftl_spec by lia. + change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by lia. + rewrite zlt_false by lia. rewrite zlt_false by lia. apply andb_true_r. +* lia. +* intros. rewrite Zinsert_spec by lia. unfold proj_sumbool. + rewrite zle_true by lia. rewrite zlt_false by lia. simpl. + apply ABOVE. lia. +* extlia. +Qed. + +Corollary decompose_int_eqmod: forall N n, + eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n. +Proof. + intros; apply eqmod_same_bits; intros. + rewrite decompose_int_correct. apply zlt_false; lia. + lia. intros; apply Z.testbit_0_l. extlia. +Qed. + +Corollary decompose_notint_eqmod: forall N n, + eqmod (two_power_nat (N * 16)%nat) + (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n. +Proof. + intros; apply eqmod_same_bits; intros. + rewrite Z.lnot_spec, decompose_int_correct. + rewrite zlt_false by lia. rewrite Z.lnot_spec by lia. apply negb_involutive. + lia. intros; apply Z.testbit_0_l. extlia. lia. +Qed. + +Lemma negate_decomposition_wf: + forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l). +Proof. + induction 1; simpl; econstructor; auto. + instantiate (1 := (Z.lnot m)). + apply equal_same_bits; intros. + rewrite H. change 65535 with (two_p 16 - 1). + rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by lia. + destruct (zlt i 16). + apply xorb_true_r. + auto. +Qed. + +Lemma Zinsert_eqmod: + forall n x1 x2 y p l, 0 <= p -> 0 <= l -> + eqmod (two_power_nat n) x1 x2 -> + eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l). +Proof. + intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by lia. + destruct (zle p i && zlt i (p + l)); auto. + apply same_bits_eqmod with n; auto. +Qed. + +Lemma Zinsert_0_l: + forall y p l, + 0 <= p -> 0 <= l -> + Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l. +Proof. + intros. apply equal_same_bits; intros. + rewrite Zinsert_spec by lia. unfold proj_sumbool. + destruct (zlt i p); [rewrite zle_false by lia|rewrite zle_true by lia]; simpl. +- rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto. +- rewrite Z.shiftl_spec by lia. + destruct (zlt i (p + l)); auto. + rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by lia. auto. +Qed. + +Lemma recompose_int_negated: + forall l, wf_decomposition l -> + forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l). +Proof. + induction 1; intros accu; simpl. +- auto. +- rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. + rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by lia. + unfold proj_sumbool. + destruct (zle p i); simpl; auto. + destruct (zlt i (p + 16)); simpl; auto. + change 65535 with (two_p 16 - 1). + rewrite Ztestbit_two_p_m1 by lia. rewrite zlt_true by lia. + apply xorb_true_r. +Qed. + +Lemma exec_loadimm_k_w: + forall (rd: ireg) k m l, + wf_decomposition l -> + forall (rs: regset) accu, + rs#rd = Vint (Int.repr accu) -> + exists rs', + exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (recompose_int accu l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + induction 1; intros rs accu ACCU; simpl. +- exists rs; split. apply exec_straight_opt_refl. auto. +- destruct (IHwf_decomposition + (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16))) + (Zinsert accu n p 16)) + as (rs' & P & Q & R). + Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. + apply Zinsert_eqmod. auto. lia. apply Int.eqm_sym; apply Int.eqm_unsigned_repr. + exists rs'; split. + eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P. + split. exact Q. intros; Simpl. rewrite R by auto. Simpl. +Qed. + +Lemma exec_loadimm_z_w: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (recompose_int 0 l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_z; destruct 1. +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +- set (accu0 := Zinsert 0 n p 16). + set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). + destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia. + reflexivity. + split. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm_n_w: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l))) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_n; destruct 1. +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +- set (accu0 := Z.lnot (Zinsert 0 n p 16)). + set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). + destruct (exec_loadimm_k_w rd k m (negate_decomposition l) + (negate_decomposition_wf l H1) + rs1 accu0) as (rs2 & P & Q & R). + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. + unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia. + reflexivity. + split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm32: + forall rd n k rs m, + exists rs', + exec_straight ge fn (loadimm32 rd n k) rs m k rs' m + /\ rs'#rd = Vint n + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm32, loadimm; intros. + destruct (is_logical_imm32 n). +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto. + intros; Simpl. +- set (dz := decompose_int 2%nat (Int.unsigned n) 0). + set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0). + assert (A: Int.repr (recompose_int 0 dz) = n). + { transitivity (Int.repr (Int.unsigned n)). + apply Int.eqm_samerepr. apply decompose_int_eqmod. + apply Int.repr_unsigned. } + assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n). + { transitivity (Int.repr (Int.unsigned n)). + apply Int.eqm_samerepr. apply decompose_notint_eqmod. + apply Int.repr_unsigned. } + destruct Nat.leb. ++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; lia. ++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; lia. +Qed. + +Lemma exec_loadimm_k_x: + forall (rd: ireg) k m l, + wf_decomposition l -> + forall (rs: regset) accu, + rs#rd = Vlong (Int64.repr accu) -> + exists rs', + exec_straight_opt ge fn (loadimm_k X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + induction 1; intros rs accu ACCU; simpl. +- exists rs; split. apply exec_straight_opt_refl. auto. +- destruct (IHwf_decomposition + (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16))) + (Zinsert accu n p 16)) + as (rs' & P & Q & R). + Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. + apply Zinsert_eqmod. auto. lia. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr. + exists rs'; split. + eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P. + split. exact Q. intros; Simpl. rewrite R by auto. Simpl. +Qed. + +Lemma exec_loadimm_z_x: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_z; destruct 1. +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +- set (accu0 := Zinsert 0 n p 16). + set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). + destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia. + reflexivity. + split. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm_n_x: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l))) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_n; destruct 1. +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +- set (accu0 := Z.lnot (Zinsert 0 n p 16)). + set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). + destruct (exec_loadimm_k_x rd k m (negate_decomposition l) + (negate_decomposition_wf l H1) + rs1 accu0) as (rs2 & P & Q & R). + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. + unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia. + reflexivity. + split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm64: + forall rd n k rs m, + exists rs', + exec_straight ge fn (loadimm64 rd n k) rs m k rs' m + /\ rs'#rd = Vlong n + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm64, loadimm; intros. + destruct (is_logical_imm64 n). +- econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto. + intros; Simpl. +- set (dz := decompose_int 4%nat (Int64.unsigned n) 0). + set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0). + assert (A: Int64.repr (recompose_int 0 dz) = n). + { transitivity (Int64.repr (Int64.unsigned n)). + apply Int64.eqm_samerepr. apply decompose_int_eqmod. + apply Int64.repr_unsigned. } + assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n). + { transitivity (Int64.repr (Int64.unsigned n)). + apply Int64.eqm_samerepr. apply decompose_notint_eqmod. + apply Int64.repr_unsigned. } + destruct Nat.leb. ++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; lia. ++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; lia. +Qed. + +(** Add immediate *) + +Lemma exec_addimm_aux_32: + forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_instr ge fn (insn rd r1 n) rs m = + Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) -> + (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) -> + forall rd r1 n k rs m, + exists rs', + exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros insn sem SEM ASSOC; intros. unfold addimm_aux. + set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo). + assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; lia). + rewrite <- (Int.repr_unsigned n). + destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. +- econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. +- econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. +- econstructor; split. eapply exec_straight_two. + apply SEM. apply SEM. Simpl. Simpl. + split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr. + rewrite E. auto with ints. + intros; Simpl. +Qed. + +Lemma exec_addimm32: + forall rd r1 n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m + /\ rs'#rd = Val.add rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros. unfold addimm32. set (nn := Int.neg n). + destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))]. +- apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. +- rewrite <- Val.sub_opp_add. + apply exec_addimm_aux_32 with (sem := Val.sub). auto. + intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto. +- destruct (Int.lt n Int.zero). ++ rewrite <- Val.sub_opp_add; fold nn. + edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite B, C; eauto with asmgen. + intros; Simpl. ++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite B, C; eauto with asmgen. + intros; Simpl. +Qed. + +Lemma exec_addimm_aux_64: + forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_instr ge fn (insn rd r1 n) rs m = + Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) -> + (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) -> + forall rd r1 n k rs m, + exists rs', + exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros insn sem SEM ASSOC; intros. unfold addimm_aux. + set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo). + assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; lia). + rewrite <- (Int64.repr_unsigned n). + destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. +- econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. +- econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. +- econstructor; split. eapply exec_straight_two. + apply SEM. apply SEM. Simpl. Simpl. + split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr. + rewrite E. auto with ints. + intros; Simpl. +Qed. + +Lemma exec_addimm64: + forall rd r1 n k rs m, + preg_of_iregsp r1 <> X16 -> + exists rs', + exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m + /\ rs'#rd = Val.addl rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros. + unfold addimm64. set (nn := Int64.neg n). + destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))]. +- apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. +- rewrite <- Val.subl_opp_addl. + apply exec_addimm_aux_64 with (sem := Val.subl). auto. + intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto. +- destruct (Int64.lt n Int64.zero). ++ rewrite <- Val.subl_opp_addl; fold nn. + edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. + split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. + intros; Simpl. ++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. + split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. + intros; Simpl. +Qed. + +(** Logical immediate *) + +Lemma exec_logicalimm32: + forall (insn1: ireg -> ireg0 -> Z -> instruction) + (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction) + (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_instr ge fn (insn1 rd r1 n) rs m = + Next (nextinstr (rs#rd <- (sem rs##r1 (Vint (Int.repr n))))) m) -> + (forall rd r1 r2 s rs m, + exec_instr ge fn (insn2 rd r1 r2 s) rs m = + Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) -> + forall rd r1 n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32. + destruct (is_logical_imm32 n). +- econstructor; split. + apply exec_straight_one. apply SEM1. reflexivity. + split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl. +- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. apply SEM2. reflexivity. + split. Simpl. f_equal; auto. apply C; auto with asmgen. + intros; Simpl. +Qed. + +Lemma exec_logicalimm64: + forall (insn1: ireg -> ireg0 -> Z -> instruction) + (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction) + (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_instr ge fn (insn1 rd r1 n) rs m = + Next (nextinstr (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n))))) m) -> + (forall rd r1 r2 s rs m, + exec_instr ge fn (insn2 rd r1 r2 s) rs m = + Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> + forall rd r1 n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64. + destruct (is_logical_imm64 n). +- econstructor; split. + apply exec_straight_one. apply SEM1. reflexivity. + split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl. +- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. apply SEM2. reflexivity. + split. Simpl. f_equal; auto. apply C; auto with asmgen. + intros; Simpl. +Qed. + +(** Load address of symbol *) + +Lemma exec_loadsymbol: forall rd s ofs k rs m, + rd <> X16 \/ SelectOp.symbol_is_relocatable s = false -> + exists rs', + exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m + /\ rs'#rd = Genv.symbol_address ge s ofs + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadsymbol; intros. destruct (SelectOp.symbol_is_relocatable s). +- predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero. ++ subst ofs. econstructor; split. + apply exec_straight_one; [simpl; eauto | reflexivity]. + split. Simpl. intros; Simpl. ++ exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence. + intros (rs1 & A & B & C). + econstructor; split. + econstructor. simpl; eauto. auto. eexact A. + split. simpl in B; rewrite B. Simpl. + rewrite <- Genv.shift_symbol_address_64 by auto. + rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto. + intros. rewrite C by auto. Simpl. +- econstructor; split. + eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. + split. Simpl. rewrite symbol_high_low; auto. + intros; Simpl. +Qed. + +(** Shifted operands *) + +Remark transl_shift_not_none: + forall s a, transl_shift s a <> SOnone. +Proof. + destruct s; intros; simpl; congruence. +Qed. + +Remark or_zero_eval_shift_op_int: + forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto. +Qed. + +Remark or_zero_eval_shift_op_long: + forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto. +Qed. + +Remark add_zero_eval_shift_op_long: + forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto. +Qed. + +Lemma transl_eval_shift: forall s v (a: amount32), + eval_shift_op_int v (transl_shift s a) = eval_shift s v a. +Proof. + intros. destruct s; simpl; auto. +Qed. + +Lemma transl_eval_shift': forall s v (a: amount32), + Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a. +Proof. + intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none). + apply transl_eval_shift. +Qed. + +Lemma transl_eval_shiftl: forall s v (a: amount64), + eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a. +Proof. + intros. destruct s; simpl; auto. +Qed. + +Lemma transl_eval_shiftl': forall s v (a: amount64), + Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a. +Proof. + intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none). + apply transl_eval_shiftl. +Qed. + +Lemma transl_eval_shiftl'': forall s v (a: amount64), + Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a. +Proof. + intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none). + apply transl_eval_shiftl. +Qed. + +(** Zero- and Sign- extensions *) + +Lemma exec_move_extended_base: forall rd r1 ex k rs m, + exists rs', + exec_straight ge fn (move_extended_base rd r1 ex k) rs m k rs' m + /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold move_extended_base; destruct ex; econstructor; + (split; [apply exec_straight_one; [simpl;eauto|auto] | split; [Simpl|intros;Simpl]]). +Qed. + +Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m, + exists rs', + exec_straight ge fn (move_extended rd r1 ex a k) rs m k rs' m + /\ rs' rd = Op.eval_extend ex rs#r1 a + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero. +- exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C). + exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B. + destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto. + auto. +- Local Opaque Val.addl. + exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + unfold exec_instr. change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto. + split. Simpl. rewrite B. auto. + intros; Simpl. +Qed. + +Lemma exec_arith_extended: + forall (sem: val -> val -> val) + (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction) + (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction), + (forall rd r1 r2 x rs m, + exec_instr ge fn (insnX rd r1 r2 x) rs m = + Next (nextinstr (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x)))) m) -> + (forall rd r1 r2 s rs m, + exec_instr ge fn (insnS rd r1 r2 s) rs m = + Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> + forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m, + r1 <> X16 -> + exists rs', + exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)). +- econstructor; split. + apply exec_straight_one. rewrite EX; eauto. auto. + split. Simpl. f_equal. destruct ex; auto. + intros; Simpl. +- exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + rewrite ES. eauto. auto. + split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. + rewrite B. destruct ex; auto. + intros; Simpl. +Qed. + +(** Extended right shift *) + +Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m, + Val.shrx rs#r1 (Vint n) = Some v -> + r1 <> X16 -> + exists rs', + exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m + /\ rs'#rd = v + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold shrx32; intros. apply Val.shrx_shr_2 in H. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. Simpl. subst v; auto. intros; Simpl. +- econstructor; split. eapply exec_straight_three. + unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. + simpl; eauto. + unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. + auto. auto. auto. + split. subst v; Simpl. intros; Simpl. +Qed. + +Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m, + Val.shrxl rs#r1 (Vint n) = Some v -> + r1 <> X16 -> + exists rs', + exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m + /\ rs'#rd = v + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold shrx64; intros. apply Val.shrxl_shrl_2 in H. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. Simpl. subst v; auto. intros; Simpl. +- econstructor; split. eapply exec_straight_three. + unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. + simpl; eauto. + unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. + auto. auto. auto. + split. subst v; Simpl. intros; Simpl. +Qed. + +(** Condition bits *) + +Lemma compare_int_spec: forall rs v1 v2 m, + let rs' := compare_int rs v1 v2 m in + rs'#CN = (Val.negative (Val.sub v1 v2)) + /\ rs'#CZ = (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2) + /\ rs'#CC = (Val.cmpu (Mem.valid_pointer m) Cge v1 v2) + /\ rs'#CV = (Val.sub_overflow v1 v2). +Proof. + intros; unfold rs'; auto. +Qed. + +Lemma eval_testcond_compare_sint: forall c v1 v2 b rs m, + Val.cmp_bool c v1 v2 = Some b -> + eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2 m) = Some b. +Proof. + intros. generalize (compare_int_spec rs v1 v2 m). + set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.cmpu; simpl. destruct c; simpl. +- destruct (Int.eq i i0); auto. +- destruct (Int.eq i i0); auto. +- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto. +- rewrite Int.lt_sub_overflow, Int.not_lt. + destruct (Int.eq i i0), (Int.lt i i0); auto. +- rewrite Int.lt_sub_overflow, (Int.lt_not i). + destruct (Int.eq i i0), (Int.lt i i0); auto. +- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto. +Qed. + +Lemma eval_testcond_compare_uint: forall c v1 v2 b rs m, + Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 = Some b -> + eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2 m) = Some b. +Proof. + intros. generalize (compare_int_spec rs v1 v2 m). + set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.cmpu; simpl. destruct c; simpl. +- destruct (Int.eq i i0); auto. +- destruct (Int.eq i i0); auto. +- destruct (Int.ltu i i0); auto. +- rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto. +- rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto. +- destruct (Int.ltu i i0); auto. +Qed. + +Lemma compare_long_spec: forall rs v1 v2 m, + let rs' := compare_long rs v1 v2 m in + rs'#CN = (Val.negativel (Val.subl v1 v2)) + /\ rs'#CZ = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2)) + /\ rs'#CC = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2)) + /\ rs'#CV = (Val.subl_overflow v1 v2). +Proof. + intros; unfold rs'; auto. +Qed. + +Remark int64_sub_overflow: + forall x y, + Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero))) + (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) = + (if Int64.lt x y then Int.one else Int.zero). +Proof. + intros. + transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))). + rewrite <- (Int64.lt_sub_overflow x y). + unfold Int64.sub_overflow, Int64.negative. + set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero). + destruct (zle Int64.min_signed s && zle s Int64.max_signed); + destruct (Int64.lt (Int64.sub x y) Int64.zero); + auto. + destruct (Int64.lt x y); auto. +Qed. + +Lemma eval_testcond_compare_slong: forall c v1 v2 b rs m, + Val.cmpl_bool c v1 v2 = Some b -> + eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2 m) = Some b. +Proof. + intros. generalize (compare_long_spec rs v1 v2 m). + set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.cmplu; simpl. destruct c; simpl. +- destruct (Int64.eq i i0); auto. +- destruct (Int64.eq i i0); auto. +- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto. +- rewrite int64_sub_overflow, Int64.not_lt. + destruct (Int64.eq i i0), (Int64.lt i i0); auto. +- rewrite int64_sub_overflow, (Int64.lt_not i). + destruct (Int64.eq i i0), (Int64.lt i i0); auto. +- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto. +Qed. + +Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs m, + Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 = Some b -> + eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2 m) = Some b. +Proof. + intros. generalize (compare_long_spec rs v1 v2 m). + set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E; unfold Val.cmplu. + destruct v1; try discriminate; destruct v2; try discriminate; simpl in H. +- (* int-int *) + inv H. destruct c; simpl. ++ destruct (Int64.eq i i0); auto. ++ destruct (Int64.eq i i0); auto. ++ destruct (Int64.ltu i i0); auto. ++ rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto. ++ rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto. ++ destruct (Int64.ltu i i0); auto. +- (* int-ptr *) + simpl. + destruct (Int64.eq i Int64.zero && + (Mem.valid_pointer m b0 (Ptrofs.unsigned i0) + || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))); try discriminate. + destruct c; simpl in H; inv H; reflexivity. +- (* ptr-int *) + simpl. + destruct (Int64.eq i0 Int64.zero && + (Mem.valid_pointer m b0 (Ptrofs.unsigned i) + || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); try discriminate. + destruct c; simpl in H; inv H; reflexivity. +- (* ptr-ptr *) + simpl. + destruct (eq_block b0 b1). ++ destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i) + || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1)) && + (Mem.valid_pointer m b1 (Ptrofs.unsigned i0) + || Mem.valid_pointer m b1 (Ptrofs.unsigned i0 - 1))); + inv H. + destruct c; simpl. +* destruct (Ptrofs.eq i i0); auto. +* destruct (Ptrofs.eq i i0); auto. +* destruct (Ptrofs.ltu i i0); auto. +* rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto. +* rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto. +* destruct (Ptrofs.ltu i i0); auto. ++ destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) && + Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate. + destruct c; simpl in H; inv H; reflexivity. +Qed. + +Lemma compare_float_spec: forall rs f1 f2, + let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in + rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2)) + /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2)) + /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2))) + /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))). +Proof. + intros; auto. +Qed. + +Lemma eval_testcond_compare_float: forall c v1 v2 b rs, + Val.cmpf_bool c v1 v2 = Some b -> + eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_float_spec rs f f0). + set (rs' := compare_float rs (Vfloat f) (Vfloat f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float.cmp Float.ordered. + unfold Float.cmp, Float.ordered; + destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs, + option_map negb (Val.cmpf_bool c v1 v2) = Some b -> + eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_float_spec rs f f0). + set (rs' := compare_float rs (Vfloat f) (Vfloat f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float.cmp Float.ordered. + unfold Float.cmp, Float.ordered; + destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma compare_single_spec: forall rs f1 f2, + let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in + rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2)) + /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2)) + /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2))) + /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))). +Proof. + intros; auto. +Qed. + +Lemma eval_testcond_compare_single: forall c v1 v2 b rs, + Val.cmpfs_bool c v1 v2 = Some b -> + eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_single_spec rs f f0). + set (rs' := compare_single rs (Vsingle f) (Vsingle f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float32.cmp Float32.ordered. + unfold Float32.cmp, Float32.ordered; + destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs, + option_map negb (Val.cmpfs_bool c v1 v2) = Some b -> + eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_single_spec rs f f0). + set (rs' := compare_single rs (Vsingle f) (Vsingle f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float32.cmp Float32.ordered. + unfold Float32.cmp, Float32.ordered; + destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity. +Qed. + +Remark compare_float_inv: forall rs v1 v2 r, + match r with CR _ => False | _ => True end -> + (nextinstr (compare_float rs v1 v2))#r = (nextinstr rs)#r. +Proof. + intros; unfold compare_float. + destruct r; try contradiction; destruct v1; auto; destruct v2; auto. +Qed. + +Remark compare_single_inv: forall rs v1 v2 r, + match r with CR _ => False | _ => True end -> + (nextinstr (compare_single rs v1 v2))#r = (nextinstr rs)#r. +Proof. + intros; unfold compare_single. + destruct r; try contradiction; destruct v1; auto; destruct v2; auto. +Qed. + +(** Translation of conditionals *) + +Ltac ArgsInv := + repeat (match goal with + | [ H: Error _ = OK _ |- _ ] => discriminate + | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args + | [ H: bind _ _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + end); + subst; + repeat (match goal with + | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in * + | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in * + end). + +Lemma transl_cond_correct: + forall cond args k c rs m, + transl_cond cond args k = OK c -> + exists rs', + exec_straight ge fn c rs m k rs' m + /\ (forall b, + eval_condition cond (map rs (map preg_of args)) m = Some b -> + eval_testcond (cond_for_cond cond) rs' = Some b) + /\ forall r, data_preg r = true -> rs'#r = rs#r. +Proof. + intros until m; intros TR. destruct cond; simpl in TR; ArgsInv. +- (* Ccomp *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. +- (* Ccompu *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. +- (* Ccompimm *) + destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. ++ econstructor; split. + apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_sint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccompuimm *) + destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. ++ econstructor; split. + apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccompshift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. +- (* Ccompushift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. +- (* Cmaskzero *) + destruct (is_logical_imm32 n). ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_sint Ceq); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Cmasknotzero *) + destruct (is_logical_imm32 n). ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_sint Cne); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccompl *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. +- (* Ccomplu *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. apply eval_testcond_compare_ulong; auto. + destruct r; reflexivity || discriminate. +- (* Ccomplimm *) + destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. ++ econstructor; split. + apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_slong; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccompluimm *) + destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. + destruct r; reflexivity || discriminate. ++ econstructor; split. + apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_ulong; auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_ulong; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccomplshift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. +- (* Ccomplushift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. + destruct r; reflexivity || discriminate. +- (* Cmasklzero *) + destruct (is_logical_imm64 n). ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_slong Ceq); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Cmasknotzero *) + destruct (is_logical_imm64 n). ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. + destruct r; reflexivity || discriminate. ++ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_slong Cne); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. +- (* Ccompf *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_float_inv; auto. + split; intros. apply eval_testcond_compare_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. +- (* Cnotcompf *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_float_inv; auto. + split; intros. apply eval_testcond_compare_not_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. +- (* Ccompfzero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_float_inv; auto. + split; intros. apply eval_testcond_compare_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. +- (* Cnotcompfzero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_float_inv; auto. + split; intros. apply eval_testcond_compare_not_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. +- (* Ccompfs *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_single_inv; auto. + split; intros. apply eval_testcond_compare_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. +- (* Cnotcompfs *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_single_inv; auto. + split; intros. apply eval_testcond_compare_not_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. +- (* Ccompfszero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_single_inv; auto. + split; intros. apply eval_testcond_compare_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. +- (* Cnotcompfszero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + rewrite compare_single_inv; auto. + split; intros. apply eval_testcond_compare_not_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. +Qed. + +(** Translation of conditional branches *) + +Lemma transl_cond_branch_correct: + forall cond args lbl k c rs m b, + transl_cond_branch cond args lbl k = OK c -> + eval_condition cond (map rs (map preg_of args)) m = Some b -> + exists rs' insn, + exec_straight_opt ge fn c rs m (insn :: k) rs' m + /\ exec_instr ge fn insn rs' m = + (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) + /\ forall r, data_preg r = true -> rs'#r = rs#r. +Proof. + intros until b; intros TR EV. + assert (DFL: + transl_cond_branch_default cond args lbl k = OK c -> + exists rs' insn, + exec_straight_opt ge fn c rs m (insn :: k) rs' m + /\ exec_instr ge fn insn rs' m = + (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) + /\ forall r, data_preg r = true -> rs'#r = rs#r). + { + unfold transl_cond_branch_default; intros. + exploit transl_cond_correct; eauto. intros (rs' & A & B & C). + exists rs', (Pbc (cond_for_cond cond) lbl); split. + apply exec_straight_opt_intro. eexact A. + split; auto. simpl. rewrite (B b) by auto. auto. + } +Local Opaque transl_cond transl_cond_branch_default. + destruct args as [ | a1 args]; simpl in TR; auto. + destruct args as [ | a2 args]; simpl in TR; auto. + destruct cond; simpl in TR; auto. +- (* Ccompimm *) + destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; + apply Int.same_if_eq in N0; subst n; ArgsInv. ++ (* Ccompimm Cne 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto. ++ (* Ccompimm Ceq 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int.eq i Int.zero); auto. +- (* Ccompuimm *) + destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; + apply Int.same_if_eq in N0; subst n; ArgsInv. ++ (* Ccompuimm Cne 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. rewrite EV. auto. ++ (* Ccompuimm Ceq 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto. +- (* Cmaskzero *) + destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv. + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. + erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto. + rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto. +- (* Cmasknotzero *) + destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv. + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. + erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto. + rewrite EV. auto. +- (* Ccomplimm *) + destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; + apply Int64.same_if_eq in N0; subst n; ArgsInv. ++ (* Ccomplimm Cne 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto. ++ (* Ccomplimm Ceq 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int64.eq i Int64.zero); auto. +- (* Ccompluimm *) + destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; + apply Int64.same_if_eq in N0; subst n; ArgsInv. ++ (* Ccompluimm Cne 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. rewrite EV. auto. ++ (* Ccompluimm Ceq 0 *) + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto. +- (* Cmasklzero *) + destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv. + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. + erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto. + rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto. +- (* Cmasklnotzero *) + destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv. + do 2 econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. + erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto. + rewrite EV. auto. +Qed. + +(** Translation of arithmetic operations *) + +Ltac SimplEval H := + match type of H with + | Some _ = None _ => discriminate + | Some _ = Some _ => inv H + | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity) +end. + +Ltac TranslOpSimpl := + econstructor; split; + [ apply exec_straight_one; [simpl; eauto | reflexivity] + | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; + apply Val.lessdef_same; Simpl; fail + | intros; Simpl; fail ] ]. + +Ltac TranslOpBase := + econstructor; split; + [ apply exec_straight_one; [simpl; eauto | reflexivity] + | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl + | intros; Simpl; fail ] ]. + +Lemma transl_op_correct: + forall op args res k (rs: regset) m v c, + transl_op op args res k = OK c -> + eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v -> + exists rs', + exec_straight ge fn c rs m k rs' m + /\ Val.lessdef v rs'#(preg_of res) + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. +Proof. +Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. + intros until c; intros TR EV. + unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl. +- (* move *) + destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR. ++ TranslOpSimpl. ++ TranslOpSimpl. +- (* intconst *) + exploit exec_loadimm32. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. +- (* longconst *) + exploit exec_loadimm64. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. +- (* floatconst *) + destruct (Float.eq_dec n Float.zero). ++ subst n. TranslOpSimpl. ++ TranslOpSimpl. +- (* singleconst *) + destruct (Float32.eq_dec n Float32.zero). ++ subst n. TranslOpSimpl. ++ TranslOpSimpl. +- (* loadsymbol *) + exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* addrstack *) + exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. simpl in B; rewrite B. +Local Transparent Val.addl. + destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto. + auto. +- (* shift *) + rewrite <- transl_eval_shift'. TranslOpSimpl. +- (* addimm *) + exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* mul *) + TranslOpBase. +Local Transparent Val.add. + destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto. +- (* andimm *) + exploit (exec_logicalimm32 (Pandimm W) (Pand W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* orimm *) + exploit (exec_logicalimm32 (Porrimm W) (Porr W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* xorimm *) + exploit (exec_logicalimm32 (Peorimm W) (Peor W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* not *) + TranslOpBase. + destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto. +- (* notshift *) + TranslOpBase. + destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. +- (* shrx *) + exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. +- (* zero-ext *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. +- (* sign-ext *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. +- (* shlzext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range. +- (* shlsext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range. +- (* zextshr *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range. +- (* sextshr *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range. +- (* shiftl *) + rewrite <- transl_eval_shiftl'. TranslOpSimpl. +- (* extend *) + exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C). + econstructor; split. eexact A. + split. rewrite B; auto. eauto with asmgen. +- (* addext *) + exploit (exec_arith_extended Val.addl Paddext (Padd X)). + auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. +- (* addlimm *) + exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto. +- (* subext *) + exploit (exec_arith_extended Val.subl Psubext (Psub X)). + auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. +- (* mull *) + TranslOpBase. + destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto. +- (* andlimm *) + exploit (exec_logicalimm64 (Pandimm X) (Pand X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* orlimm *) + exploit (exec_logicalimm64 (Porrimm X) (Porr X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* xorlimm *) + exploit (exec_logicalimm64 (Peorimm X) (Peor X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. +- (* notl *) + TranslOpBase. + destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto. +- (* notlshift *) + TranslOpBase. + destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto. +- (* shrx *) + exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. +- (* zero-ext-l *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. +- (* sign-ext-l *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. +- (* shllzext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range. +- (* shllsext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range. +- (* zextshrl *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range. +- (* sextshrl *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range. +- (* condition *) + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. auto. + auto. + intros; Simpl. +- (* select *) + destruct (preg_of res) eqn:RES; monadInv TR. + + (* integer *) + generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. + rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. + auto. + intros; Simpl. + + (* FP *) + generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. + rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. + auto. + intros; Simpl. +Qed. + +(** Translation of addressing modes, loads, stores *) + +Lemma transl_addressing_correct: + forall sz addr args (insn: Asm.addressing -> instruction) k (rs: regset) m c b o, + transl_addressing sz addr args insn k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) -> + exists ad rs', + exec_straight_opt ge fn c rs m (insn ad :: k) rs' m + /\ Asm.eval_addressing ge ad rs' = Vptr b o + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros until o; intros TR EV. + unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV. +- (* Aindexed *) + destruct (offset_representable sz ofs); inv EQ0. ++ econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. ++ exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C). + econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A. + split. simpl. rewrite B, C by eauto with asmgen. auto. + eauto with asmgen. +- (* Aindexed2 *) + econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. +- (* Aindexed2shift *) + destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2. ++ apply Int.same_if_eq in E. rewrite E. + econstructor; econstructor; split. apply exec_straight_opt_refl. + split; auto. simpl. + rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate. + unfold Val.shll. rewrite Int64.shl'_zero. auto. ++ econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. ++ econstructor; econstructor; split. + apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. + split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto. + intros; Simpl. +- (* Aindexed2ext *) + destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2. ++ econstructor; econstructor; split. apply exec_straight_opt_refl. + split; auto. destruct x; auto. ++ exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto. + instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal. + unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range; + simpl; rewrite Int64.add_zero; auto. + intros. apply C; eauto with asmgen. +- (* Aglobal *) + destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR. ++ econstructor; econstructor; split. + apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. + split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence. + intros; Simpl. ++ exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. + rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. + simpl in EV. congruence. + auto with asmgen. +- (* Ainstrack *) + assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o). + { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. + rewrite Ptrofs.of_int64_to_int64 by auto. auto. } + destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR. ++ econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. ++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. rewrite B, C by eauto with asmgen. auto. + auto with asmgen. +Qed. + +Lemma transl_load_correct: + forall chunk addr args dst k c (rs: regset) m vaddr v, + transl_load chunk addr args dst k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr -> + Mem.loadv chunk m vaddr = Some v -> + exists rs', + exec_straight ge fn c rs m k rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. +Proof. + intros. destruct vaddr; try discriminate. + assert (A: exists sz insn, + transl_addressing sz addr args insn k = OK c + /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m = + exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m)). + { + destruct chunk; monadInv H; + try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ); + do 2 econstructor; (split; [eassumption|auto]). + } + destruct A as (sz & insn & B & C). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m = + Next (nextinstr (rs'#(preg_of dst) <- v)) m). + { unfold exec_load. rewrite Q, H1. auto. } + econstructor; split. + eapply exec_straight_opt_right. eexact P. + apply exec_straight_one. rewrite C, X; eauto. Simpl. + split. Simpl. intros; Simpl. +Qed. + +Lemma transl_store_correct: + forall chunk addr args src k c (rs: regset) m vaddr m', + transl_store chunk addr args src k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr -> + Mem.storev chunk m vaddr rs#(preg_of src) = Some m' -> + exists rs', + exec_straight ge fn c rs m k rs' m' + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros. destruct vaddr; try discriminate. + set (chunk' := match chunk with Mint8signed => Mint8unsigned + | Mint16signed => Mint16unsigned + | _ => chunk end). + assert (A: exists sz insn, + transl_addressing sz addr args insn k = OK c + /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m = + exec_store ge chunk' ad rs'#(preg_of src) rs' m)). + { + unfold chunk'; destruct chunk; monadInv H; + try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ); + do 2 econstructor; (split; [eassumption|auto]). + } + destruct A as (sz & insn & B & C). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m'). + { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry. + apply Mem.store_signed_unsigned_8. + apply Mem.store_signed_unsigned_16. } + assert (Y: exec_store ge chunk' ad rs'#(preg_of src) rs' m = + Next (nextinstr rs') m'). + { unfold exec_store. rewrite Q, R, X by auto with asmgen. auto. } + econstructor; split. + eapply exec_straight_opt_right. eexact P. + apply exec_straight_one. rewrite C, Y; eauto. Simpl. + intros; Simpl. +Qed. + +(** Translation of indexed memory accesses *) + +Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i, + preg_of_iregsp base <> IR X16 -> + Val.offset_ptr rs#base ofs = Vptr b i -> + exists ad rs', + exec_straight_opt ge fn (indexed_memory_access insn sz base ofs k) rs m (insn ad :: k) rs' m + /\ Asm.eval_addressing ge ad rs' = Vptr b i + /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. +Proof. + unfold indexed_memory_access; intros. + assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i). + { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. } + destruct offset_representable. +- econstructor; econstructor; split. apply exec_straight_opt_refl. auto. +- exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C). + econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A. + split. simpl. rewrite B, C by eauto with asmgen. auto. auto. +Qed. + +Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset), + Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m + /\ rs'#dst = v + /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto. + split. Simpl. intros; Simpl. +Qed. + +Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset), + Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' -> + preg_of_iregsp base <> IR X16 -> + src <> X16 -> + exists rs', + exec_straight ge fn (storeptr src base ofs k) rs m k rs' m' + /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto. + intros; Simpl. +Qed. + +Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v, + loadind base ofs ty dst k = OK c -> + Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge fn c rs m k rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + assert (X: exists sz insn, + c = indexed_memory_access insn sz base ofs k + /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m = + exec_load ge (chunk_of_type ty) (fun v => v) ad (preg_of dst) rs' m)). + { + unfold loadind in H; destruct ty; destruct (preg_of dst); inv H; do 2 econstructor; eauto. + } + destruct X as (sz & insn & EQ & SEM). subst c. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl. + split. Simpl. intros; Simpl. +Qed. + +Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', + storeind src base ofs ty k = OK c -> + Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge fn c rs m k rs' m' + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + assert (X: exists sz insn, + c = indexed_memory_access insn sz base ofs k + /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m = + exec_store ge (chunk_of_type ty) ad rs'#(preg_of src) rs' m)). + { + unfold storeind in H; destruct ty; destruct (preg_of src); inv H; do 2 econstructor; eauto. + } + destruct X as (sz & insn & EQ & SEM). subst c. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. rewrite SEM. + unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto. + Simpl. + intros; Simpl. +Qed. + +Lemma make_epilogue_correct: + forall ge0 f m stk soff cs m' ms rs k tm, + load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) -> + load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) -> + Mem.free m stk 0 f.(fn_stacksize) = Some m' -> + agree ms (Vptr stk soff) rs -> + Mem.extends m tm -> + match_stack ge0 cs -> + exists rs', exists tm', + exec_straight ge fn (make_epilogue f k) rs tm k rs' tm' + /\ agree ms (parent_sp cs) rs' + /\ Mem.extends m' tm' + /\ rs'#RA = parent_ra cs + /\ rs'#SP = parent_sp cs + /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r). +Proof. + intros until tm; intros LP LRA FREE AG MEXT MCS. + exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). + exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA'). + exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. + exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'. + exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). + unfold make_epilogue. + exploit (loadptr_correct XSP (fn_retaddr_ofs f)). + instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence. + intros (rs1 & A1 & B1 & C1). + econstructor; econstructor; split. + eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. + simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. + rewrite FREE'. eauto. auto. + split. apply agree_nextinstr. apply agree_set_other; auto. + apply agree_change_sp with (Vptr stk soff). + apply agree_exten with rs; auto. intros; apply C1; auto with asmgen. + eapply parent_sp_def; eauto. + split. auto. + split. Simpl. + split. Simpl. + intros. Simpl. +Qed. + +End CONSTRUCTORS. diff --git a/aarch64/Builtins1.v b/aarch64/Builtins1.v index 53c83d7e..cd6f8cc4 100644 --- a/aarch64/Builtins1.v +++ b/aarch64/Builtins1.v @@ -6,10 +6,11 @@ (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 2 of the License, or *) -(* (at your option) any later version. This file is also distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* under the terms of the GNU Lesser General Public License as *) +(* published by the Free Software Foundation, either version 2.1 of *) +(* the License, or (at your option) any later version. *) +(* This file is also distributed under the terms of the *) +(* INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml index e2a9c87a..80d66310 100644 --- a/aarch64/CBuiltins.ml +++ b/aarch64/CBuiltins.ml @@ -6,10 +6,11 @@ (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 2 of the License, or *) -(* (at your option) any later version. This file is also distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* under the terms of the GNU Lesser General Public License as *) +(* published by the Free Software Foundation, either version 2.1 of *) +(* the License, or (at your option) any later version. *) +(* This file is also distributed under the terms of the *) +(* INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) @@ -17,16 +18,28 @@ open C -(* va_list is a struct of size 32 and alignment 8, passed by reference *) +(* AAPCS64: + va_list is a struct of size 32 and alignment 8, passed by reference + Apple: + va_list is a pointer (size 8, alignment 8), passed by reference *) -let va_list_type = TArray(TInt(IULong, []), Some 4L, []) -let size_va_list = 32 -let va_list_scalar = false +let (va_list_type, size_va_list, va_list_scalar) = + match Archi.abi with + | Archi.AAPCS64 -> (TArray(TInt(IULong, []), Some 4L, []), 32, false) + | Archi.Apple -> (TPtr(TVoid [], []), 8, true) + +(* Some macOS headers use the GCC built-in types "__int128_t" and + "__uint128_t" unconditionally. Provide a dummy definition. *) + +let int128_type = TArray(TInt(IULong, []), Some 2L, []) let builtins = { - builtin_typedefs = [ - "__builtin_va_list", va_list_type - ]; + builtin_typedefs = + [ "__builtin_va_list", va_list_type ] @ + (if Configuration.system = "macos" then + [ "__int128_t", int128_type; + "__uint128_t", int128_type ] + else []); builtin_functions = [ (* Synchronization *) "__builtin_fence", diff --git a/aarch64/ConstpropOp.vp b/aarch64/ConstpropOp.vp index c0a2c6bf..f2d17a51 100644 --- a/aarch64/ConstpropOp.vp +++ b/aarch64/ConstpropOp.vp @@ -13,11 +13,11 @@ (** Strength reduction for operators and conditions. This is the machine-dependent part of [Constprop]. *) -Require Archi. Require Import Coqlib Compopts. Require Import AST Integers Floats. Require Import Op Registers. Require Import ValueDomain ValueAOp. +Require SelectOp. (** * Converting known values to constants *) @@ -375,7 +375,7 @@ Nondetfunction op_strength_reduction Nondetfunction addr_strength_reduction (addr: addressing) (args: list reg) (vl: list aval) := match addr, args, vl with - | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil => + | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil ?? negb (SelectOp.symbol_is_relocatable symb) => (Aglobal symb (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil) | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil => (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil) diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v index c777062c..24498aa4 100644 --- a/aarch64/ConstpropOpproof.v +++ b/aarch64/ConstpropOpproof.v @@ -414,7 +414,7 @@ Proof. Int.bit_solve. destruct (zlt i0 n0). replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)). rewrite Int.bits_zero. simpl. rewrite andb_true_r. auto. - rewrite <- EQ. rewrite Int.bits_zero_ext by omega. rewrite zlt_true by auto. + rewrite <- EQ. rewrite Int.bits_zero_ext by lia. rewrite zlt_true by auto. rewrite Int.bits_not by auto. apply negb_involutive. rewrite H6 by auto. auto. econstructor; split; eauto. auto. diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v index efda835d..f401458c 100644 --- a/aarch64/Conventions1.v +++ b/aarch64/Conventions1.v @@ -24,7 +24,12 @@ Require Archi. - Caller-save registers that can be modified during a function call. We follow the Procedure Call Standard for the ARM 64-bit Architecture - (AArch64) document: R19-R28 and F8-F15 are callee-save. *) + (AArch64) document: R19-R28 and F8-F15 are callee-save. + + X16 is reserved as a temporary for asm generation. + X18 is reserved as the platform register. + X29 is reserved as the frame pointer register. + X30 is reserved as the return address register. *) Definition is_callee_save (r: mreg): bool := match r with @@ -154,9 +159,23 @@ Qed. (** - The first 8 integer arguments are passed in registers [R0...R7]. - The first 8 FP arguments are passed in registers [F0...F7]. -- Extra arguments are passed on the stack, in [Outgoing] slots of size - 64 bits (2 words), consecutively assigned, starting at word offset 0. -**) +- Extra arguments are passed on the stack, in [Outgoing] slots, + consecutively assigned, starting at word offset 0. + +In the standard AAPCS64, all stack slots are 8-byte wide (2 words). + +In the Apple variant, a stack slot has the size of the type of the +corresponding argument, and is aligned accordingly. We use 8-byte +slots (2 words) for C types [long] and [double], and 4-byte slots +(1 word) for C types [int] and [float]. For full conformance, we should +use 1-byte slots for [char] types and 2-byte slots for [short] types, +but this cannot be expressed in CompCert's type algebra, so we +incorrectly use 4-byte slots. + +Concerning variable arguments to vararg functions: +- In the AAPCS64 standard, they are passed like regular, fixed arguments. +- In the Apple variant, they are always passed on stack, in 8-byte slots. +*) Definition int_param_regs := R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: nil. @@ -164,31 +183,70 @@ Definition int_param_regs := Definition float_param_regs := F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil. +Definition stack_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match Archi.abi with + | Archi.AAPCS64 => + let ofs := align ofs 2 in + One (S Outgoing ofs ty) :: rec ir fr (ofs + 2) + | Archi.Apple => + let ofs := align ofs (typesize ty) in + One (S Outgoing ofs ty) :: rec ir fr (ofs + typesize ty) + end. + +Definition int_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z int_param_regs ir with + | None => + stack_arg ty ir fr ofs rec + | Some ireg => + One (R ireg) :: rec (ir + 1) fr ofs + end. + +Definition float_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z float_param_regs fr with + | None => + stack_arg ty ir fr ofs rec + | Some freg => + One (R freg) :: rec ir (fr + 1) ofs + end. + +Fixpoint loc_arguments_stack (tyl: list typ) (ofs: Z) {struct tyl} : list (rpair loc) := + match tyl with + | nil => nil + | ty :: tys => One (S Outgoing ofs Tany64) :: loc_arguments_stack tys (ofs + 2) + end. + Fixpoint loc_arguments_rec - (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) := + (tyl: list typ) (fixed ir fr ofs: Z) {struct tyl} : list (rpair loc) := match tyl with | nil => nil - | (Tint | Tlong | Tany32 | Tany64) as ty :: tys => - match list_nth_z int_param_regs ir with - | None => - One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2) - | Some ireg => - One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs - end - | (Tfloat | Tsingle) as ty :: tys => - match list_nth_z float_param_regs fr with - | None => - One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2) - | Some freg => - One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs + | ty :: tys => + if zle fixed 0 then loc_arguments_stack tyl (align ofs 2) else + match ty with + | Tint | Tlong | Tany32 | Tany64 => + int_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1)) + | Tfloat | Tsingle => + float_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1)) end end. +(** Number of fixed arguments for a function with signature [s]. + For AAPCS64, all arguments are treated as fixed, even for a vararg + function. *) + +Definition fixed_arguments (s: signature) : Z := + match Archi.abi, s.(sig_cc).(cc_vararg) with + | Archi.Apple, Some n => n + | _, _ => list_length_z s.(sig_args) + end. + (** [loc_arguments s] returns the list of locations where to store arguments when calling a function with signature [s]. *) Definition loc_arguments (s: signature) : list (rpair loc) := - loc_arguments_rec s.(sig_args) 0 0 0. + loc_arguments_rec s.(sig_args) (fixed_arguments s) 0 0 0. (** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -200,49 +258,73 @@ Definition loc_argument_acceptable (l: loc) : Prop := | _ => False end. -Definition loc_argument_charact (ofs: Z) (l: loc) : Prop := - match l with - | R r => In r int_param_regs \/ In r float_param_regs - | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs') - | _ => False - end. - -Remark loc_arguments_rec_charact: - forall tyl ir fr ofs p, - In p (loc_arguments_rec tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_charact ofs) p. +Lemma loc_arguments_rec_charact: + forall tyl fixed ri rf ofs p, + ofs >= 0 -> + In p (loc_arguments_rec tyl fixed ri rf ofs) -> forall_rpair loc_argument_acceptable p. Proof. - assert (X: forall ofs1 ofs2 l, loc_argument_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_charact ofs1 l). - { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. } - assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_charact ofs1) p). - { destruct p; simpl; intuition eauto. } - assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)). - { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. } -Opaque list_nth_z. - induction tyl; simpl loc_arguments_rec; intros. -- contradiction. -- assert (A: forall ty, In p - match list_nth_z int_param_regs ir with - | Some ireg => One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2) - end -> - forall_rpair (loc_argument_charact ofs) p). - { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1. - subst. left. eapply list_nth_z_in; eauto. - eapply IHtyl; eauto. - subst. split. omega. assumption. - eapply Y; eauto. omega. } - assert (B: forall ty, In p - match list_nth_z float_param_regs fr with - | Some ireg => One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2) - end -> - forall_rpair (loc_argument_charact ofs) p). - { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1. - subst. right. eapply list_nth_z_in; eauto. - eapply IHtyl; eauto. - subst. split. omega. assumption. - eapply Y; eauto. omega. } - destruct a; eauto. + set (OK := fun (l: list (rpair loc)) => + forall p, In p l -> forall_rpair loc_argument_acceptable p). + set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) => + forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)). + assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (ALP: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0). + { intros. + assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos). + lia. } + assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))). + { intros. apply Z.divide_trans with (typesize ty). apply typealign_typesize. apply align_divides. apply typesize_pos. } + assert (ALP2: forall ofs, ofs >= 0 -> align ofs 2 >= 0). + { intros. + assert (ofs <= align ofs 2) by (apply align_le; lia). + lia. } + assert (ALD2: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs 2)). + { intros. eapply Z.divide_trans with 2. + exists (2 / typealign ty). destruct ty; reflexivity. + apply align_divides. lia. } + assert (STK: forall tyl ofs, + ofs >= 0 -> OK (loc_arguments_stack tyl ofs)). + { induction tyl as [ | ty tyl]; intros ofs OO; red; simpl; intros. + - contradiction. + - destruct H. + + subst p. split. auto. simpl. apply Z.divide_1_l. + + apply IHtyl with (ofs := ofs + 2). lia. auto. + } + assert (A: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (stack_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold stack_arg; intros. + destruct Archi.abi; destruct H. + - subst p; simpl; auto. + - eapply OF; [|eauto]. apply ALP2 in OO. lia. + - subst p; simpl; auto. + - eapply OF; [|eauto]. apply (ALP ofs ty) in OO. generalize (typesize_pos ty). lia. + } + assert (B: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (int_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold int_arg; intros. + destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; [destruct H|]. + - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - eapply A; eauto. + } + assert (C: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (float_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold float_arg; intros. + destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH; [destruct H|]. + - subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - eapply A; eauto. + } + cut (forall tyl fixed ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec tyl fixed ri rf ofs)). + unfold OK. eauto. + induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl. +- red; simpl; tauto. +- destruct (zle fixed 0). + + apply (STK (ty1 :: tyl)); auto. + + unfold OKF in *; destruct ty1; eauto. Qed. Lemma loc_arguments_acceptable: @@ -250,19 +332,10 @@ Lemma loc_arguments_acceptable: In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p. Proof. unfold loc_arguments; intros. - assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by decide_goal. - assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal. - assert (X: forall l, loc_argument_charact 0 l -> loc_argument_acceptable l). - { unfold loc_argument_charact, loc_argument_acceptable. - destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto. - intros [C D]. split; auto. apply Z.divide_trans with 2; auto. - exists (2 / typealign ty); destruct ty; reflexivity. - } - exploit loc_arguments_rec_charact; eauto using Z.divide_0_r. - unfold forall_rpair; destruct p; intuition auto. + eapply loc_arguments_rec_charact; eauto. lia. Qed. -Hint Resolve loc_arguments_acceptable: locs. +Global Hint Resolve loc_arguments_acceptable: locs. Lemma loc_arguments_main: loc_arguments signature_main = nil. @@ -270,16 +343,29 @@ Proof. unfold loc_arguments; reflexivity. Qed. -(** ** Normalization of function results *) +(** ** Normalization of function results and parameters *) (** According to the AAPCS64 ABI specification, "padding bits" in the return - value of a function have unpredictable values and must be ignored. - Consequently, we force normalization of return values of small integer - types (8- and 16-bit integers), so that the top bits (the "padding bits") - are proper sign- or zero-extensions of the small integer value. *) + value of a function or in a function parameter have unpredictable + values and must be ignored. Consequently, we force normalization + of return values and of function parameters when they have small + integer types (8- and 16-bit integers), so that the top bits (the + "padding bits") are proper sign- or zero-extensions of the small + integer value. + + The Apple variant of the AAPCS64 requires the callee to return a normalized + value, and the caller to pass normalized parameters, hence no + normalization is needed. + *) Definition return_value_needs_normalization (t: rettype) : bool := - match t with - | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true - | _ => false + match Archi.abi with + | Archi.Apple => false + | Archi.AAPCS64 => + match t with + | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true + | _ => false + end end. + +Definition parameter_needs_normalization := return_value_needs_normalization. diff --git a/aarch64/Op.v b/aarch64/Op.v index 40f6ebf0..4c0dfb72 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -985,25 +985,25 @@ End SHIFT_AMOUNT. Program Definition mk_amount32 (n: int): amount32 := {| a32_amount := Int.zero_ext 5 n |}. Next Obligation. - apply mk_amount_range. omega. reflexivity. + apply mk_amount_range. lia. reflexivity. Qed. Lemma mk_amount32_eq: forall n, Int.ltu n Int.iwordsize = true -> a32_amount (mk_amount32 n) = n. Proof. - intros. eapply mk_amount_eq; eauto. omega. reflexivity. + intros. eapply mk_amount_eq; eauto. lia. reflexivity. Qed. Program Definition mk_amount64 (n: int): amount64 := {| a64_amount := Int.zero_ext 6 n |}. Next Obligation. - apply mk_amount_range. omega. reflexivity. + apply mk_amount_range. lia. reflexivity. Qed. Lemma mk_amount64_eq: forall n, Int.ltu n Int64.iwordsize' = true -> a64_amount (mk_amount64 n) = n. Proof. - intros. eapply mk_amount_eq; eauto. omega. reflexivity. + intros. eapply mk_amount_eq; eauto. lia. reflexivity. Qed. (** Recognition of move operations. *) diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml index 18f41fed..2b214df4 100644 --- a/aarch64/PeepholeOracle.ml +++ b/aarch64/PeepholeOracle.ml @@ -401,9 +401,8 @@ let pair_rep_inv insta = * for one type of inst. Lists contains integers which * are the indices of insts in the main array "insta". *) init_symb_mem (); - for i = Array.length insta - 1 downto 1 do + for i = Array.length insta - 1 downto 0 do let h0 = insta.(i) in - let h1 = insta.(i - 1) in (* Here we need to update every symbolic memory according to the matched inst *) update_pot_rep_basic h0 insta (Ldr P32) true; update_pot_rep_basic h0 insta (Ldr P64) true; @@ -413,9 +412,9 @@ let pair_rep_inv insta = update_pot_rep_basic h0 insta (Str P64) true; update_pot_rep_basic h0 insta (Str P32f) true; update_pot_rep_basic h0 insta (Str P64f) true; - match (h0, h1) with + match h0 with (* Non-consecutive ldr *) - | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> + | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))) -> if is_compat_load ldi then ( (* Search a previous compatible load *) let ld_t = get_load_pht ldi in @@ -445,7 +444,7 @@ let pair_rep_inv insta = (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))); Hashtbl.replace symb_mem ld_t pot_rep) (* Non-consecutive str *) - | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> + | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))) -> if is_compat_store sti then ( (* Search a previous compatible store *) let st_t = get_store_pht sti in @@ -469,7 +468,7 @@ let pair_rep_inv insta = (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1)))); Hashtbl.replace symb_mem st_t pot_rep (* Any other inst *)) - | i, _ -> ( + | i -> ( (* Clear list of candidates if there is a non supported store *) match i with PStore _ -> reset_str_symb_mem () | _ -> ()) done diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v index 48840602..a5084b5f 100644 --- a/aarch64/PostpassSchedulingproof.v +++ b/aarch64/PostpassSchedulingproof.v @@ -21,6 +21,7 @@ Require Import Asmblockprops. Require Import PostpassScheduling. Require Import Asmblockgenproof. Require Import Axioms. +Require Import Lia. Local Open Scope error_monad_scope. @@ -171,7 +172,7 @@ Proof. induction tc. - intros. simpl in H. discriminate. - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. - + inv H. assert (k = k') by omega. subst. reflexivity. + + inv H. assert (k = k') by lia. subst. reflexivity. + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto. Qed. diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index 513ee9bd..0984943c 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -228,8 +228,8 @@ Proof. intros. unfold Int.ltu; apply zlt_true. apply Int.ltu_inv in H. apply Int.ltu_inv in H0. change (Int.unsigned Int64.iwordsize') with Int64.zwordsize in *. - unfold Int.sub; rewrite Int.unsigned_repr. omega. - assert (Int64.zwordsize < Int.max_unsigned) by reflexivity. omega. + unfold Int.sub; rewrite Int.unsigned_repr. lia. + assert (Int64.zwordsize < Int.max_unsigned) by reflexivity. lia. Qed. Theorem eval_shrluimm: @@ -245,13 +245,13 @@ Local Opaque Int64.zwordsize. + destruct (Int.ltu n a) eqn:L2. * assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true). { apply sub_shift_amount; auto using a64_range. - apply Int.ltu_inv in L2. omega. } + apply Int.ltu_inv in L2. lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto. simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto. * assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true). { apply sub_shift_amount; auto using a64_range. - unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. } + unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto. simpl. rewrite L. rewrite Int64.shru'_shl', L2 by auto using a64_range. auto. @@ -264,11 +264,11 @@ Local Opaque Int64.zwordsize. * econstructor; split. EvalOp. rewrite mk_amount64_eq by auto. destruct v1; simpl; auto. rewrite ! L; simpl. set (s' := s - Int.unsigned n). - replace s with (s' + Int.unsigned n) by (unfold s'; omega). - rewrite Int64.shru'_zero_ext. auto. unfold s'; omega. + replace s with (s' + Int.unsigned n) by (unfold s'; lia). + rewrite Int64.shru'_zero_ext. auto. unfold s'; lia. * econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite ! L; simpl. - rewrite Int64.shru'_zero_ext_0 by omega. auto. + rewrite Int64.shru'_zero_ext_0 by lia. auto. + econstructor; eauto using eval_shrluimm_base. - intros; TrivialExists. Qed. @@ -293,13 +293,13 @@ Proof. + destruct (Int.ltu n a) eqn:L2. * assert (L3: Int.ltu (Int.sub a n) Int64.iwordsize' = true). { apply sub_shift_amount; auto using a64_range. - apply Int.ltu_inv in L2. omega. } + apply Int.ltu_inv in L2. lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto. simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto. * assert (L3: Int.ltu (Int.sub n a) Int64.iwordsize' = true). { apply sub_shift_amount; auto using a64_range. - unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. } + unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount64_eq, L3, a64_range by auto. simpl. rewrite L. rewrite Int64.shr'_shl', L2 by auto using a64_range. auto. @@ -312,8 +312,8 @@ Proof. * InvBooleans. econstructor; split. EvalOp. rewrite mk_amount64_eq by auto. destruct v1; simpl; auto. rewrite ! L; simpl. set (s' := s - Int.unsigned n). - replace s with (s' + Int.unsigned n) by (unfold s'; omega). - rewrite Int64.shr'_sign_ext. auto. unfold s'; omega. unfold s'; omega. + replace s with (s' + Int.unsigned n) by (unfold s'; lia). + rewrite Int64.shr'_sign_ext. auto. unfold s'; lia. unfold s'; lia. * econstructor; split; [|eauto]. apply eval_shrlimm_base; auto. EvalOp. + econstructor; eauto using eval_shrlimm_base. - intros; TrivialExists. @@ -395,7 +395,7 @@ Proof. - TrivialExists. - destruct (zlt (Int.unsigned a0) sz). + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a64_range; simpl. - apply Val.lessdef_same. f_equal. rewrite Int64.shl'_zero_ext by omega. f_equal. omega. + apply Val.lessdef_same. f_equal. rewrite Int64.shl'_zero_ext by lia. f_equal. lia. + TrivialExists. - TrivialExists. Qed. diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp index 67575fdb..7f73d592 100644 --- a/aarch64/SelectOp.vp +++ b/aarch64/SelectOp.vp @@ -540,10 +540,18 @@ Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) := (** ** Recognition of addressing modes for load and store operations *) +(** Some symbols are relocatable (e.g. external symbols in macOS) + and cannot be used with [Aglobal] addressing mode. *) + +Parameter symbol_is_relocatable: ident -> bool. + Nondetfunction addressing (chunk: memory_chunk) (e: expr) := match e with | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) - | Eop (Oaddrsymbol id ofs) Enil => (Aglobal id ofs, Enil) + | Eop (Oaddrsymbol id ofs) Enil => + if symbol_is_relocatable id + then (Aindexed (Ptrofs.to_int64 ofs), Eop (Oaddrsymbol id Ptrofs.zero) Enil ::: Enil) + else (Aglobal id ofs, Enil) | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) | Eop (Oaddlshift Slsl a) (e1:::e2:::Enil) => (Aindexed2shift a, e1:::e2:::Enil) | Eop (Oaddlext x a) (e1:::e2:::Enil) => (Aindexed2ext x a, e1:::e2:::Enil) diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index 9ce7a8bf..dfa4c598 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -248,8 +248,8 @@ Remark sub_shift_amount: Proof. intros. unfold Int.ltu; apply zlt_true. rewrite Int.unsigned_repr_wordsize. apply Int.ltu_iwordsize_inv in H. apply Int.ltu_iwordsize_inv in H0. - unfold Int.sub; rewrite Int.unsigned_repr. omega. - generalize Int.wordsize_max_unsigned; omega. + unfold Int.sub; rewrite Int.unsigned_repr. lia. + generalize Int.wordsize_max_unsigned; lia. Qed. Theorem eval_shruimm: @@ -265,13 +265,13 @@ Local Opaque Int.zwordsize. + destruct (Int.ltu n a) eqn:L2. * assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true). { apply sub_shift_amount; auto using a32_range. - apply Int.ltu_inv in L2. omega. } + apply Int.ltu_inv in L2. lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto. simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto. * assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true). { apply sub_shift_amount; auto using a32_range. - unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. } + unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto. simpl. rewrite L. rewrite Int.shru_shl, L2 by auto using a32_range. auto. @@ -284,11 +284,11 @@ Local Opaque Int.zwordsize. * econstructor; split. EvalOp. rewrite mk_amount32_eq by auto. destruct v1; simpl; auto. rewrite ! L; simpl. set (s' := s - Int.unsigned n). - replace s with (s' + Int.unsigned n) by (unfold s'; omega). - rewrite Int.shru_zero_ext. auto. unfold s'; omega. + replace s with (s' + Int.unsigned n) by (unfold s'; lia). + rewrite Int.shru_zero_ext. auto. unfold s'; lia. * econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite ! L; simpl. - rewrite Int.shru_zero_ext_0 by omega. auto. + rewrite Int.shru_zero_ext_0 by lia. auto. + econstructor; eauto using eval_shruimm_base. - intros; TrivialExists. Qed. @@ -313,13 +313,13 @@ Proof. + destruct (Int.ltu n a) eqn:L2. * assert (L3: Int.ltu (Int.sub a n) Int.iwordsize = true). { apply sub_shift_amount; auto using a32_range. - apply Int.ltu_inv in L2. omega. } + apply Int.ltu_inv in L2. lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto. simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto. * assert (L3: Int.ltu (Int.sub n a) Int.iwordsize = true). { apply sub_shift_amount; auto using a32_range. - unfold Int.ltu in L2. destruct zlt in L2; discriminate || omega. } + unfold Int.ltu in L2. destruct zlt in L2; discriminate || lia. } econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite mk_amount32_eq, L3, a32_range by auto. simpl. rewrite L. rewrite Int.shr_shl, L2 by auto using a32_range. auto. @@ -332,8 +332,8 @@ Proof. * InvBooleans. econstructor; split. EvalOp. rewrite mk_amount32_eq by auto. destruct v1; simpl; auto. rewrite ! L; simpl. set (s' := s - Int.unsigned n). - replace s with (s' + Int.unsigned n) by (unfold s'; omega). - rewrite Int.shr_sign_ext. auto. unfold s'; omega. unfold s'; omega. + replace s with (s' + Int.unsigned n) by (unfold s'; lia). + rewrite Int.shr_sign_ext. auto. unfold s'; lia. unfold s'; lia. * econstructor; split; [|eauto]. apply eval_shrimm_base; auto. EvalOp. + econstructor; eauto using eval_shrimm_base. - intros; TrivialExists. @@ -404,20 +404,20 @@ Proof. change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl. apply Val.lessdef_same. f_equal. transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)). - unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity. + unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity. apply Int.same_bits_eq; intros n N. change Int.zwordsize with 32 in *. - assert (N1: 0 <= n < 64) by omega. + assert (N1: 0 <= n < 64) by lia. rewrite Int64.bits_loword by auto. rewrite Int64.bits_shr' by auto. change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64. - rewrite zlt_true by omega. + rewrite zlt_true by lia. rewrite Int.testbit_repr by auto. - unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega). + unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia). transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)). - rewrite Z.shiftr_spec by omega. auto. + rewrite Z.shiftr_spec by lia. auto. apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. - change Int64.zwordsize with 64; omega. + change Int64.zwordsize with 64; lia. Qed. Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu. @@ -430,20 +430,20 @@ Proof. change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl. apply Val.lessdef_same. f_equal. transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)). - unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by omega. reflexivity. + unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity. apply Int.same_bits_eq; intros n N. change Int.zwordsize with 32 in *. - assert (N1: 0 <= n < 64) by omega. + assert (N1: 0 <= n < 64) by lia. rewrite Int64.bits_loword by auto. rewrite Int64.bits_shru' by auto. change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64. - rewrite zlt_true by omega. + rewrite zlt_true by lia. rewrite Int.testbit_repr by auto. - unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; omega). + unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia). transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)). - rewrite Z.shiftr_spec by omega. auto. + rewrite Z.shiftr_spec by lia. auto. apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. - change Int64.zwordsize with 64; omega. + change Int64.zwordsize with 64; lia. Qed. (** Integer conversions *) @@ -456,7 +456,7 @@ Proof. - TrivialExists. - destruct (zlt (Int.unsigned a0) sz). + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl. - apply Val.lessdef_same. f_equal. rewrite Int.shl_zero_ext by omega. f_equal. omega. + apply Val.lessdef_same. f_equal. rewrite Int.shl_zero_ext by lia. f_equal. lia. + TrivialExists. - TrivialExists. Qed. @@ -469,29 +469,29 @@ Proof. - TrivialExists. - destruct (zlt (Int.unsigned a0) sz). + econstructor; split. EvalOp. destruct v1; simpl; auto. rewrite a32_range; simpl. - apply Val.lessdef_same. f_equal. rewrite Int.shl_sign_ext by omega. f_equal. omega. + apply Val.lessdef_same. f_equal. rewrite Int.shl_sign_ext by lia. f_equal. lia. + TrivialExists. - TrivialExists. Qed. Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8). Proof. - apply eval_sign_ext; omega. + apply eval_sign_ext; lia. Qed. Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8). Proof. - apply eval_zero_ext; omega. + apply eval_zero_ext; lia. Qed. Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16). Proof. - apply eval_sign_ext; omega. + apply eval_sign_ext; lia. Qed. Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16). Proof. - apply eval_zero_ext; omega. + apply eval_zero_ext; lia. Qed. (** Bitwise not, and, or, xor *) @@ -1038,7 +1038,13 @@ Theorem eval_addressing: Proof. intros until v. unfold addressing; case (addressing_match a); intros; InvEval. - econstructor; split. EvalOp. simpl; auto. -- econstructor; split. EvalOp. simpl; auto. +- destruct (symbol_is_relocatable id). + + exists (Genv.symbol_address ge id Ptrofs.zero :: nil); split. + constructor. EvalOp. constructor. + simpl. rewrite <- Genv.shift_symbol_address_64 by auto. + rewrite Ptrofs.of_int64_to_int64, Ptrofs.add_zero_l by auto. + auto. + + econstructor; split. EvalOp. simpl; auto. - econstructor; split. EvalOp. simpl. destruct v1; try discriminate. rewrite <- H; auto. - econstructor; split. EvalOp. simpl. congruence. diff --git a/aarch64/Stacklayout.v b/aarch64/Stacklayout.v index 86ba9f45..cdbc64d5 100644 --- a/aarch64/Stacklayout.v +++ b/aarch64/Stacklayout.v @@ -67,13 +67,13 @@ Local Opaque Z.add Z.mul sepconj range. set (ostkdata := align (ol + 4 * b.(bound_local)) 8). change (size_chunk Mptr) with 8. generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. - assert (0 <= 4 * b.(bound_outgoing)) by omega. - assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). - assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega). - assert (oretaddr + 8 <= ocs) by (unfold ocs; omega). + assert (0 <= 4 * b.(bound_outgoing)) by lia. + assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia). + assert (olink + 8 <= oretaddr) by (unfold oretaddr; lia). + assert (oretaddr + 8 <= ocs) by (unfold ocs; lia). assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). - assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega). - assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega). + assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia). + assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia). (* Reorder as: outgoing back link @@ -86,11 +86,11 @@ Local Opaque Z.add Z.mul sepconj range. rewrite sep_swap45. (* Apply range_split and range_split2 repeatedly *) unfold fe_ofs_arg. - apply range_split_2. fold olink; omega. omega. - apply range_split. omega. - apply range_split. omega. - apply range_split_2. fold ol. omega. omega. - apply range_drop_right with ostkdata. omega. + apply range_split_2. fold olink; lia. lia. + apply range_split. lia. + apply range_split. lia. + apply range_split_2. fold ol. lia. lia. + apply range_drop_right with ostkdata. lia. eapply sep_drop2. eexact H. Qed. @@ -106,14 +106,14 @@ Proof. set (ol := align (size_callee_save_area b ocs) 8). set (ostkdata := align (ol + 4 * b.(bound_local)) 8). generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. - assert (0 <= 4 * b.(bound_outgoing)) by omega. - assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). - assert (olink + 8 <= oretaddr) by (unfold oretaddr; omega). - assert (oretaddr + 8 <= ocs) by (unfold ocs; omega). + assert (0 <= 4 * b.(bound_outgoing)) by lia. + assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia). + assert (olink + 8 <= oretaddr) by (unfold oretaddr; lia). + assert (oretaddr + 8 <= ocs) by (unfold ocs; lia). assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). - assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega). - assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; omega). - split. omega. apply align_le. omega. + assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia). + assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia). + split. lia. apply align_le. lia. Qed. Lemma frame_env_aligned: @@ -133,8 +133,8 @@ Proof. set (ostkdata := align (ol + 4 * b.(bound_local)) 8). change (align_chunk Mptr) with 8. split. apply Z.divide_0_r. - split. apply align_divides; omega. - split. apply align_divides; omega. - split. apply align_divides; omega. - apply Z.divide_add_r. apply align_divides; omega. apply Z.divide_refl. + split. apply align_divides; lia. + split. apply align_divides; lia. + split. apply align_divides; lia. + apply Z.divide_add_r. apply align_divides; lia. apply Z.divide_refl. Qed. diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index 53959152..1ca3be16 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -21,109 +21,137 @@ open AisAnnot open PrintAsmaux open Fileinfo -(* Module containing the printing functions *) +let intsz oc (sz, n) = + match sz with X -> coqint64 oc n | W -> coqint oc n + +let xreg_name = function + | X0 -> "x0" | X1 -> "x1" | X2 -> "x2" | X3 -> "x3" + | X4 -> "x4" | X5 -> "x5" | X6 -> "x6" | X7 -> "x7" + | X8 -> "x8" | X9 -> "x9" | X10 -> "x10" | X11 -> "x11" + | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15" + | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19" + | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23" + | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27" + | X28 -> "x28" | X29 -> "x29" | X30 -> "x30" + +let wreg_name = function + | X0 -> "w0" | X1 -> "w1" | X2 -> "w2" | X3 -> "w3" + | X4 -> "w4" | X5 -> "w5" | X6 -> "w6" | X7 -> "w7" + | X8 -> "w8" | X9 -> "w9" | X10 -> "w10" | X11 -> "w11" + | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15" + | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19" + | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23" + | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27" + | X28 -> "w28" | X29 -> "w29" | X30 -> "w30" + +let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr" +let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr" + +let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp" +let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp" + +let dreg_name = function +| D0 -> "d0" | D1 -> "d1" | D2 -> "d2" | D3 -> "d3" +| D4 -> "d4" | D5 -> "d5" | D6 -> "d6" | D7 -> "d7" +| D8 -> "d8" | D9 -> "d9" | D10 -> "d10" | D11 -> "d11" +| D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15" +| D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19" +| D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23" +| D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27" +| D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31" + +let sreg_name = function +| D0 -> "s0" | D1 -> "s1" | D2 -> "s2" | D3 -> "s3" +| D4 -> "s4" | D5 -> "s5" | D6 -> "s6" | D7 -> "s7" +| D8 -> "s8" | D9 -> "s9" | D10 -> "s10" | D11 -> "s11" +| D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15" +| D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19" +| D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23" +| D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27" +| D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31" + +let xreg oc r = output_string oc (xreg_name r) +let wreg oc r = output_string oc (wreg_name r) +let ireg oc (sz, r) = + output_string oc (match sz with X -> xreg_name r | W -> wreg_name r) + +let xreg0 oc r = output_string oc (xreg0_name r) +let wreg0 oc r = output_string oc (wreg0_name r) +let ireg0 oc (sz, r) = + output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r) + +let xregsp oc r = output_string oc (xregsp_name r) +let iregsp oc (sz, r) = + output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r) + +let dreg oc r = output_string oc (dreg_name r) +let sreg oc r = output_string oc (sreg_name r) +let freg oc (sz, r) = + output_string oc (match sz with D -> dreg_name r | S -> sreg_name r) + +let preg_asm oc ty = function + | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r + | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r +| _ -> assert false + +let preg_annot = function + | DR (IR (RR1 r)) -> xreg_name r + | DR (FR r) -> dreg_name r + | _ -> assert false + +(* Base-2 log of a Caml integer *) +let rec log2 n = + assert (n > 0); + if n = 1 then 0 else 1 + log2 (n lsr 1) + +(* System dependent printer functions *) + +module type SYSTEM = + sig + val comment: string + val raw_symbol: out_channel -> string -> unit + val symbol: out_channel -> P.t -> unit + val symbol_offset_high: out_channel -> P.t * Z.t -> unit + val symbol_offset_low: out_channel -> P.t * Z.t -> unit + val label: out_channel -> int -> unit + val label_high: out_channel -> int -> unit + val label_low: out_channel -> int -> unit + val load_symbol_address: out_channel -> ireg -> P.t -> unit + val name_of_section: section_name -> string + val print_fun_info: out_channel -> P.t -> unit + val print_var_info: out_channel -> P.t -> unit + val print_comm_decl: out_channel -> P.t -> Z.t -> int -> unit + val print_lcomm_decl: out_channel -> P.t -> Z.t -> int -> unit + end -module Target (*: TARGET*) = +module ELF_System : SYSTEM = struct - -(* Basic printing functions *) - let comment = "//" + let raw_symbol = output_string + let symbol = elf_symbol + let symbol_offset_high = elf_symbol_offset + let symbol_offset_low oc id_ofs = + fprintf oc "#:lo12:%a" elf_symbol_offset id_ofs + + let label = elf_label + let label_high = elf_label + let label_low oc lbl = + fprintf oc "#:lo12:%a" elf_label lbl + + let load_symbol_address oc rd id = + fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; + fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id - let symbol = elf_symbol - let symbol_offset = elf_symbol_offset - let label = elf_label - - let print_label oc lbl = label oc (transl_label lbl) - - let intsz oc (sz, n) = - match sz with X -> coqint64 oc n | W -> coqint oc n - - let xreg_name = function - | X0 -> "x0" | X1 -> "x1" | X2 -> "x2" | X3 -> "x3" - | X4 -> "x4" | X5 -> "x5" | X6 -> "x6" | X7 -> "x7" - | X8 -> "x8" | X9 -> "x9" | X10 -> "x10" | X11 -> "x11" - | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15" - | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19" - | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23" - | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27" - | X28 -> "x28" | X29 -> "x29" | X30 -> "x30" - - let wreg_name = function - | X0 -> "w0" | X1 -> "w1" | X2 -> "w2" | X3 -> "w3" - | X4 -> "w4" | X5 -> "w5" | X6 -> "w6" | X7 -> "w7" - | X8 -> "w8" | X9 -> "w9" | X10 -> "w10" | X11 -> "w11" - | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15" - | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19" - | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23" - | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27" - | X28 -> "w28" | X29 -> "w29" | X30 -> "w30" - - let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr" - let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr" - - let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp" - let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp" - - let dreg_name = function - | D0 -> "d0" | D1 -> "d1" | D2 -> "d2" | D3 -> "d3" - | D4 -> "d4" | D5 -> "d5" | D6 -> "d6" | D7 -> "d7" - | D8 -> "d8" | D9 -> "d9" | D10 -> "d10" | D11 -> "d11" - | D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15" - | D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19" - | D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23" - | D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27" - | D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31" - - let sreg_name = function - | D0 -> "s0" | D1 -> "s1" | D2 -> "s2" | D3 -> "s3" - | D4 -> "s4" | D5 -> "s5" | D6 -> "s6" | D7 -> "s7" - | D8 -> "s8" | D9 -> "s9" | D10 -> "s10" | D11 -> "s11" - | D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15" - | D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19" - | D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23" - | D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27" - | D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31" - - let xreg oc r = output_string oc (xreg_name r) - let wreg oc r = output_string oc (wreg_name r) - let ireg oc (sz, r) = - output_string oc (match sz with X -> xreg_name r | W -> wreg_name r) - - let xreg0 oc r = output_string oc (xreg0_name r) - let wreg0 oc r = output_string oc (wreg0_name r) - let ireg0 oc (sz, r) = - output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r) - - let xregsp oc r = output_string oc (xregsp_name r) - let iregsp oc (sz, r) = - output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r) - - let dreg oc r = output_string oc (dreg_name r) - let sreg oc r = output_string oc (sreg_name r) - let freg oc (sz, r) = - output_string oc (match sz with D -> dreg_name r | S -> sreg_name r) - - let preg_asm oc ty = function - | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r - | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r - | _ -> assert false - - let preg_annot = function - | DR (IR (RR1 r)) -> xreg_name r - | DR (FR r) -> dreg_name r - | _ -> assert false - -(* Names of sections *) + (* Names of sections *) let name_of_section = function | Section_text -> ".text" | Section_data(i, true) -> failwith "_Thread_local unsupported on this platform" | Section_data(i, false) | Section_small_data i -> - if i then ".data" else common_section () + variable_section ~sec:".data" ~bss:".bss" i | Section_const i | Section_small_const i -> - if i || (not !Clflags.option_fcommon) then ".section .rodata" else "COMM" + variable_section ~sec:".section .rodata" i | Section_string -> ".section .rodata" | Section_literal -> ".section .rodata" | Section_jumptable -> ".section .rodata" @@ -138,6 +166,96 @@ module Target (*: TARGET*) = s (if wr then "w" else "") (if ex then "x" else "") | Section_ais_annotation -> sprintf ".section \"__compcert_ais_annotations\",\"\",@note" + let print_fun_info = elf_print_fun_info + let print_var_info = elf_print_var_info + + let print_comm_decl oc name sz al = + fprintf oc " .comm %a, %s, %d\n" symbol name (Z.to_string sz) al + + let print_lcomm_decl oc name sz al = + fprintf oc " .local %a\n" symbol name; + print_comm_decl oc name sz al + + end + +module MacOS_System : SYSTEM = + struct + let comment = ";" + + let raw_symbol oc s = + fprintf oc "_%s" s + + let symbol oc symb = + raw_symbol oc (extern_atom symb) + + let symbol_offset_gen kind oc (id, ofs) = + fprintf oc "%a@%s" symbol id kind; + let ofs = camlint64_of_ptrofs ofs in + if ofs <> 0L then fprintf oc " + %Ld" ofs + + let symbol_offset_high = symbol_offset_gen "PAGE" + let symbol_offset_low = symbol_offset_gen "PAGEOFF" + + let label oc lbl = + fprintf oc "L%d" lbl + + let label_high oc lbl = + fprintf oc "%a@PAGE" label lbl + let label_low oc lbl = + fprintf oc "%a@PAGEOFF" label lbl + + let load_symbol_address oc rd id = + fprintf oc " adrp %a, %a@GOTPAGE\n" xreg rd symbol id; + fprintf oc " ldr %a, [%a, %a@GOTPAGEOFF]\n" xreg rd xreg rd symbol id + + let name_of_section = function + | Section_text -> ".text" + | Section_data(i, true) -> + failwith "_Thread_local unsupported on this platform" + | Section_data(i, false) | Section_small_data i -> + variable_section ~sec:".data" i + | Section_const i | Section_small_const i -> + variable_section ~sec:".const" ~reloc:".const_data" i + | Section_string -> ".const" + | Section_literal -> ".const" + | Section_jumptable -> ".text" + | Section_user(s, wr, ex) -> + sprintf ".section \"%s\", %s, %s" + (if wr then "__DATA" else "__TEXT") s + (if ex then "regular, pure_instructions" else "regular") + | Section_debug_info _ -> ".section __DWARF,__debug_info,regular,debug" + | Section_debug_loc -> ".section __DWARF,__debug_loc,regular,debug" + | Section_debug_line _ -> ".section __DWARF,__debug_line,regular,debug" + | Section_debug_str -> ".section __DWARF,__debug_str,regular,debug" + | Section_debug_ranges -> ".section __DWARF,__debug_ranges,regular,debug" + | Section_debug_abbrev -> ".section __DWARF,__debug_abbrev,regular,debug" + | Section_ais_annotation -> assert false (* Not supported under MacOS *) + + let print_fun_info _ _ = () + let print_var_info _ _ = () + + let print_comm_decl oc name sz al = + fprintf oc " .comm %a, %s, %d\n" + symbol name (Z.to_string sz) (log2 al) + + let print_lcomm_decl oc name sz al = + fprintf oc " .lcomm %a, %s, %d\n" + symbol name (Z.to_string sz) (log2 al) + + end + +(* Module containing the printing functions *) + +module Target(System: SYSTEM): TARGET = + struct + include System + +(* Basic printing functions *) + + let print_label oc lbl = label oc (transl_label lbl) + +(* Names of sections *) + let section oc sec = fprintf oc " %s\n" (name_of_section sec) @@ -193,7 +311,7 @@ module Target (*: TARGET*) = | ADlsl(base, r, n) -> fprintf oc "[%a, %a, lsl #%a]" xregsp base xreg r coqint n | ADsxt(base, r, n) -> fprintf oc "[%a, %a, sxtw #%a]" xregsp base wreg r coqint n | ADuxt(base, r, n) -> fprintf oc "[%a, %a, uxtw #%a]" xregsp base wreg r coqint n - | ADadr(base, id, ofs) -> fprintf oc "[%a, #:lo12:%a]" xregsp base symbol_offset (id, ofs) + | ADadr(base, id, ofs) -> fprintf oc "[%a, %a]" xregsp base symbol_offset_low (id, ofs) | ADpostincr(base, n) -> fprintf oc "[%a], #%a" xregsp base coqint64 n (* Print a shifted operand *) @@ -204,15 +322,15 @@ module Target (*: TARGET*) = | SOasr n -> fprintf oc ", asr #%a" coqint n | SOror n -> fprintf oc ", ror #%a" coqint n -(* Print a sign- or zero-extended operand *) - let extendop oc = function - | EOsxtb n -> fprintf oc ", sxtb #%a" coqint n - | EOsxth n -> fprintf oc ", sxth #%a" coqint n - | EOsxtw n -> fprintf oc ", sxtw #%a" coqint n - | EOuxtb n -> fprintf oc ", uxtb #%a" coqint n - | EOuxth n -> fprintf oc ", uxth #%a" coqint n - | EOuxtw n -> fprintf oc ", uxtw #%a" coqint n - | EOuxtx n -> fprintf oc ", uxtx #%a" coqint n +(* Print a sign- or zero-extended register operand *) + let regextend oc = function + | (r, EOsxtb n) -> fprintf oc "%a, sxtb #%a" wreg r coqint n + | (r, EOsxth n) -> fprintf oc "%a, sxth #%a" wreg r coqint n + | (r, EOsxtw n) -> fprintf oc "%a, sxtw #%a" wreg r coqint n + | (r, EOuxtb n) -> fprintf oc "%a, uxtb #%a" wreg r coqint n + | (r, EOuxth n) -> fprintf oc "%a, uxth #%a" wreg r coqint n + | (r, EOuxtw n) -> fprintf oc "%a, uxtw #%a" wreg r coqint n + | (r, EOuxtx n) -> fprintf oc "%a, uxtx #%a" xreg r coqint n let next_profiling_label = let atomic_incr_counter = ref 0 in @@ -325,9 +443,9 @@ module Target (*: TARGET*) = fprintf oc " movk %a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos) (* PC-relative addressing *) | Padrp(rd, id, ofs) -> - fprintf oc " adrp %a, %a\n" xreg rd symbol_offset (id, ofs) + fprintf oc " adrp %a, %a\n" xreg rd symbol_offset_high (id, ofs) | Paddadr(rd, r1, id, ofs) -> - fprintf oc " add %a, %a, #:lo12:%a\n" xreg rd xreg r1 symbol_offset (id, ofs) + fprintf oc " add %a, %a, %a\n" xreg rd xreg r1 symbol_offset_low (id, ofs) (* Bit-field operations *) | Psbfiz(sz, rd, r1, r, s) -> fprintf oc " sbfiz %a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s) @@ -348,13 +466,13 @@ module Target (*: TARGET*) = fprintf oc " cmn %a, %a%a\n" ireg0 (sz, r1) ireg (sz, r2) shiftop s (* Integer arithmetic, extending register *) | Paddext(rd, r1, r2, x) -> - fprintf oc " add %a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x + fprintf oc " add %a, %a, %a\n" xregsp rd xregsp r1 regextend (r2, x) | Psubext(rd, r1, r2, x) -> - fprintf oc " sub %a, %a, %a%a\n" xregsp rd xregsp r1 wreg r2 extendop x + fprintf oc " sub %a, %a, %a\n" xregsp rd xregsp r1 regextend (r2, x) | Pcmpext(r1, r2, x) -> - fprintf oc " cmp %a, %a%a\n" xreg r1 wreg r2 extendop x + fprintf oc " cmp %a, %a\n" xreg r1 regextend (r2, x) | Pcmnext(r1, r2, x) -> - fprintf oc " cmn %a, %a%a\n" xreg r1 wreg r2 extendop x + fprintf oc " cmn %a, %a\n" xreg r1 regextend (r2, x) (* Logical, shifted register *) | Pand(sz, rd, r1, r2, s) -> fprintf oc " and %a, %a, %a%a\n" ireg (sz, rd) ireg0 (sz, r1) ireg (sz, r2) shiftop s @@ -434,8 +552,8 @@ module Target (*: TARGET*) = fprintf oc " fmov %a, #%.7f\n" dreg rd (Int64.float_of_bits d) else begin let lbl = label_literal64 d in - fprintf oc " adrp x16, %a\n" label lbl; - fprintf oc " ldr %a, [x16, #:lo12:%a] %s %.18g\n" dreg rd label lbl comment (Int64.float_of_bits d) + fprintf oc " adrp x16, %a\n" label_high lbl; + fprintf oc " ldr %a, [x16, %a] %s %.18g\n" dreg rd label_low lbl comment (Int64.float_of_bits d) end | Pfmovimms(rd, f) -> let d = camlint_of_coqint (Floats.Float32.to_bits f) in @@ -443,8 +561,8 @@ module Target (*: TARGET*) = fprintf oc " fmov %a, #%.7f\n" sreg rd (Int32.float_of_bits d) else begin let lbl = label_literal32 d in - fprintf oc " adrp x16, %a\n" label lbl; - fprintf oc " ldr %a, [x16, #:lo12:%a] %s %.18g\n" sreg rd label lbl comment (Int32.float_of_bits d) + fprintf oc " adrp x16, %a\n" label_high lbl; + fprintf oc " ldr %a, [x16, %a] %s %.18g\n" sreg rd label_low lbl comment (Int32.float_of_bits d) end | Pfmovi(D, rd, r1) -> fprintf oc " fmov %a, %a\n" dreg rd xreg0 r1 @@ -511,8 +629,7 @@ module Target (*: TARGET*) = | Plabel lbl -> fprintf oc "%a:\n" print_label lbl | Ploadsymbol(rd, id) -> - fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; - fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id + load_symbol_address oc rd id | Pcvtsw2x(rd, r1) -> fprintf oc " sxtw %a, %a\n" xreg rd wreg r1 | Pcvtuw2x(rd, r1) -> @@ -577,19 +694,12 @@ module Target (*: TARGET*) = jumptables := [] end - let print_fun_info = elf_print_fun_info - let print_optional_fun_info _ = () - let print_var_info = elf_print_var_info - let print_comm_symb oc sz name align = - if C2C.atom_is_static name then - fprintf oc " .local %a\n" symbol name; - fprintf oc " .comm %a, %s, %d\n" - symbol name - (Z.to_string sz) - align + if C2C.atom_is_static name + then print_lcomm_decl oc name sz align + else print_comm_decl oc name sz align let print_instructions oc fn = current_function_sig := fn.fn_sig; @@ -627,7 +737,7 @@ module Target (*: TARGET*) = section oc Section_text; end - let default_falignment = 2 + let default_falignment = 4 let cfi_startproc oc = () let cfi_endproc oc = () @@ -635,4 +745,10 @@ module Target (*: TARGET*) = end let sel_target () = - (module Target:TARGET) + let module S = + (val (match Configuration.system with + | "linux" -> (module ELF_System : SYSTEM) + | "macos" -> (module MacOS_System : SYSTEM) + | _ -> invalid_arg ("System " ^ Configuration.system ^ " not supported")) + : SYSTEM) in + (module Target(S) : TARGET) diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v index 69edeb55..0401d0fa 100644 --- a/aarch64/extractionMachdep.v +++ b/aarch64/extractionMachdep.v @@ -6,22 +6,37 @@ (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the GNU General Public License as published by *) -(* the Free Software Foundation, either version 2 of the License, or *) -(* (at your option) any later version. This file is also distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* under the terms of the GNU Lesser General Public License as *) +(* published by the Free Software Foundation, either version 2.1 of *) +(* the License, or (at your option) any later version. *) +(* This file is also distributed under the terms of the *) +(* INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) (* Additional extraction directives specific to the AArch64 port *) -Require Archi Asm. +Require Archi Asm Asmgen SelectOp. (* Archi *) + Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *) +Extract Constant Archi.abi => + "match Configuration.abi with + | ""apple"" -> Apple + | _ -> AAPCS64". + +(* SelectOp *) + +Extract Constant SelectOp.symbol_is_relocatable => + "match Configuration.system with + | ""macos"" -> C2C.atom_is_extern + | _ -> (fun _ -> false)". + (* Asm *) + Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false". Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false". Extract Constant Asmblockgen.symbol_is_aligned => "C2C.atom_is_aligned". |