diff options
author | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2010-06-29 08:27:14 +0000 |
---|---|---|
committer | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2010-06-29 08:27:14 +0000 |
commit | 9c7c84cc40eaacc1e2c13091165785cddecba5ad (patch) | |
tree | 65eafe51ad284d88fd5a949e1b2a54cd272f9f91 /powerpc | |
parent | f4b416882955d9d91bca60f3eb35b95f4124a5be (diff) | |
download | compcert-kvx-9c7c84cc40eaacc1e2c13091165785cddecba5ad.tar.gz compcert-kvx-9c7c84cc40eaacc1e2c13091165785cddecba5ad.zip |
Support for inlined built-ins.
AST: add ef_inline flag to external functions.
Selection: recognize calls to inlined built-ins and inline them as Sbuiltin.
CminorSel to Asm: added Sbuiltin/Ibuiltin instruction.
PrintAsm: adapted expansion of builtins.
C2Clight: adapted detection of builtins.
Conventions: refactored in a machine-independent part (backend/Conventions)
and a machine-dependent part (ARCH/SYS/Conventions1).
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1356 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'powerpc')
-rw-r--r-- | powerpc/Asm.v | 78 | ||||
-rw-r--r-- | powerpc/Asmgen.v | 2 | ||||
-rw-r--r-- | powerpc/Asmgenproof.v | 35 | ||||
-rw-r--r-- | powerpc/Asmgenproof1.v | 2 | ||||
-rw-r--r-- | powerpc/PrintAsm.ml | 230 | ||||
-rw-r--r-- | powerpc/eabi/Conventions1.v (renamed from powerpc/eabi/Conventions.v) | 212 | ||||
-rw-r--r-- | powerpc/macosx/Conventions1.v (renamed from powerpc/macosx/Conventions.v) | 218 |
7 files changed, 209 insertions, 568 deletions
diff --git a/powerpc/Asm.v b/powerpc/Asm.v index 21c237e6..9da58710 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -24,7 +24,7 @@ Require Import Globalenvs. Require Import Smallstep. Require Import Locations. Require Stacklayout. -Require Conventions. +Require Import Conventions. (** * Abstract syntax *) @@ -56,6 +56,34 @@ Proof. decide equality. Defined. Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}. Proof. decide equality. Defined. +(** The PowerPC has a great many registers, some general-purpose, some very + specific. We model only the following registers: *) + +Inductive preg: Type := + | IR: ireg -> preg (**r integer registers *) + | FR: freg -> preg (**r float registers *) + | PC: preg (**r program counter *) + | LR: preg (**r link register (return address) *) + | CTR: preg (**r count register, used for some branches *) + | CARRY: preg (**r carry bit of the status register *) + | CR0_0: preg (**r bit 0 of the condition register *) + | CR0_1: preg (**r bit 1 of the condition register *) + | CR0_2: preg (**r bit 2 of the condition register *) + | CR0_3: preg. (**r bit 3 of the condition register *) + +Coercion IR: ireg >-> preg. +Coercion FR: freg >-> preg. + +Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}. +Proof. decide equality. apply ireg_eq. apply freg_eq. Defined. + +Module PregEq. + Definition t := preg. + Definition eq := preg_eq. +End PregEq. + +Module Pregmap := EMap(PregEq). + (** Symbolic constants. Immediate operands to an arithmetic instruction or an indexed memory access can be either integer literals, or the low or high 16 bits of a symbolic reference (the address @@ -188,7 +216,8 @@ Inductive instruction : Type := | Pxor: ireg -> ireg -> ireg -> instruction (**r bitwise xor *) | Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *) | Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *) - | Plabel: label -> instruction. (**r define a code label *) + | Plabel: label -> instruction (**r define a code label *) + | Pbuiltin: external_function -> list preg -> preg -> instruction. (**r built-in *) (** The pseudo-instructions are the following: @@ -313,34 +342,6 @@ Definition program := AST.program fundef unit. (** * Operational semantics *) -(** The PowerPC has a great many registers, some general-purpose, some very - specific. We model only the following registers: *) - -Inductive preg: Type := - | IR: ireg -> preg (**r integer registers *) - | FR: freg -> preg (**r float registers *) - | PC: preg (**r program counter *) - | LR: preg (**r link register (return address) *) - | CTR: preg (**r count register, used for some branches *) - | CARRY: preg (**r carry bit of the status register *) - | CR0_0: preg (**r bit 0 of the condition register *) - | CR0_1: preg (**r bit 1 of the condition register *) - | CR0_2: preg (**r bit 2 of the condition register *) - | CR0_3: preg. (**r bit 3 of the condition register *) - -Coercion IR: ireg >-> preg. -Coercion FR: freg >-> preg. - -Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}. -Proof. decide equality. apply ireg_eq. apply freg_eq. Defined. - -Module PregEq. - Definition t := preg. - Definition eq := preg_eq. -End PregEq. - -Module Pregmap := EMap(PregEq). - (** The semantics operates over a single mapping from registers (type [preg]) to values. We maintain (but do not enforce) the convention that integer registers are mapped to values of @@ -788,6 +789,8 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome OK (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m | Plabel lbl => OK (nextinstr rs) m + | Pbuiltin ef args res => + Error (**r treated specially below *) end. (** Translation of the LTL/Linear/Mach view of machine registers @@ -859,10 +862,10 @@ Inductive extcall_args (rs: regset) (m: mem): list loc -> list val -> Prop := Definition extcall_arguments (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := - extcall_args rs m (Conventions.loc_arguments sg) args. + extcall_args rs m (loc_arguments sg) args. Definition loc_external_result (sg: signature) : preg := - preg_of (Conventions.loc_result sg). + preg_of (loc_result sg). (** Execution of the instruction at [rs#PC]. *) @@ -877,13 +880,20 @@ Inductive step: state -> trace -> state -> Prop := find_instr (Int.unsigned ofs) c = Some i -> exec_instr c i rs m = OK rs' m' -> step (State rs m) E0 (State rs' m') + | exec_step_builtin: + forall b ofs c ef args res rs m t v m', + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal c) -> + find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) -> + external_call ef ge (map rs args) m t v m' -> + step (State rs m) t (State (nextinstr(rs # res <- v)) m') | exec_step_external: forall b ef args res rs m t rs' m', rs PC = Vptr b Int.zero -> Genv.find_funct_ptr ge b = Some (External ef) -> external_call ef ge args m t res m' -> - extcall_arguments rs m ef.(ef_sig) args -> - rs' = (rs#(loc_external_result ef.(ef_sig)) <- res + extcall_arguments rs m (ef_sig ef) args -> + rs' = (rs#(loc_external_result (ef_sig ef)) <- res #PC <- (rs LR)) -> step (State rs m) t (State rs' m'). diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index ca42d563..b1b1245b 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -494,6 +494,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) := Pmtlr GPR12 :: Pfreeframe (-f .(fn_framesize)) f.(fn_stacksize) f.(fn_link_ofs) :: Pbs symb :: k + | Mbuiltin ef args res => + Pbuiltin ef (map preg_of args) (preg_of res) :: k | Mlabel lbl => Plabel lbl :: k | Mgoto lbl => diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index fcbbbd73..ee2867e6 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -25,6 +25,7 @@ Require Import Globalenvs. Require Import Smallstep. Require Import Op. Require Import Locations. +Require Import Conventions. Require Import Mach. Require Import Machconcr. Require Import Machtyping. @@ -1063,6 +1064,37 @@ Proof. unfold rs5; auto with ppcgen. Qed. +Lemma exec_Mbuiltin_prop: + forall (s : list stackframe) (f : block) (sp : val) + (ms : Mach.regset) (m : mem) (ef : external_function) + (args : list mreg) (res : mreg) (b : list Mach.instruction) + (t : trace) (v : val) (m' : mem), + external_call ef ge ms ## args m t v m' -> + exec_instr_prop (Machconcr.State s f sp (Mbuiltin ef args res :: b) ms m) t + (Machconcr.State s f sp b (Regmap.set res v ms) m'). +Proof. + intros; red; intros; inv MS. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inv WTI. + inv AT. simpl in H3. + generalize (functions_transl _ _ FIND); intro FN. + generalize (functions_transl_no_overflow _ _ FIND); intro NOOV. + left. econstructor; split. apply plus_one. + eapply exec_step_builtin. eauto. eauto. + eapply find_instr_tail; eauto. + replace (rs##(preg_of##args)) with (ms##args). + eapply external_call_symbols_preserved; eauto. + exact symbols_preserved. exact varinfo_preserved. + rewrite list_map_compose. apply list_map_exten. intros. + symmetry. eapply preg_val; eauto. + econstructor; eauto with coqlib. + unfold nextinstr. rewrite Pregmap.gss. rewrite Pregmap.gso. + rewrite <- H0. simpl. constructor; auto. + eapply code_tail_next_int; eauto. + apply sym_not_equal. auto with ppcgen. + auto with ppcgen. +Qed. + Lemma exec_Mgoto_prop: forall (s : list stackframe) (fb : block) (f : function) (sp : val) (lbl : Mach.label) (c : list Mach.instruction) (ms : Mach.regset) @@ -1345,7 +1377,7 @@ Lemma exec_function_external_prop: Genv.find_funct_ptr ge fb = Some (External ef) -> external_call ef ge args m t0 res m' -> Machconcr.extcall_arguments ms m (parent_sp s) (ef_sig ef) args -> - ms' = Regmap.set (Conventions.loc_result (ef_sig ef)) res ms -> + ms' = Regmap.set (loc_result (ef_sig ef)) res ms -> exec_instr_prop (Machconcr.Callstate s fb ms m) t0 (Machconcr.Returnstate s ms' m'). Proof. @@ -1387,6 +1419,7 @@ Proof exec_Mstore_prop exec_Mcall_prop exec_Mtailcall_prop + exec_Mbuiltin_prop exec_Mgoto_prop exec_Mcond_true_prop exec_Mcond_false_prop diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index 60c49690..5ebde633 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -27,7 +27,7 @@ Require Import Machconcr. Require Import Machtyping. Require Import Asm. Require Import Asmgen. -Require Conventions. +Require Import Conventions. (** * Properties of low half/high half decomposition *) diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index 62e4536b..b3ccb20b 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -178,6 +178,11 @@ let creg oc r = | MacOS|Diab -> fprintf oc "cr%d" r | Linux -> fprintf oc "%d" r +let preg oc = function + | IR r -> ireg oc r + | FR r -> freg oc r + | _ -> assert false + (* Names of sections *) let name_of_section_MacOS = function @@ -256,7 +261,90 @@ let rec log2 n = assert (n > 0); if n = 1 then 0 else 1 + log2 (n lsr 1) -(* Built-in functions *) +(* Built-ins. They come in two flavors: + - inlined by the compiler: take their arguments in arbitrary + registers; preserve all registers except GPR12 and FPR13 + - inlined while printing asm code; take their arguments in + locations dictated by the calling conventions; preserve + callee-save regs only. *) + +let print_builtin_inlined oc name args res = + fprintf oc "%s begin builtin %s\n" comment name; + (* Can use as temporaries: GPR12, FPR13 *) + begin match name, args, res with + (* Volatile reads *) + | "__builtin_volatile_read_int8unsigned", [IR addr], IR res -> + fprintf oc " lbz %a, 0(%a)\n" ireg res ireg addr + | "__builtin_volatile_read_int8signed", [IR addr], IR res -> + fprintf oc " lbz %a, 0(%a)\n" ireg res ireg addr; + fprintf oc " extsb %a, %a\n" ireg res ireg res + | "__builtin_volatile_read_int16unsigned", [IR addr], IR res -> + fprintf oc " lhz %a, 0(%a)\n" ireg res ireg addr + | "__builtin_volatile_read_int16signed", [IR addr], IR res -> + fprintf oc " lha %a, 0(%a)\n" ireg res ireg addr + | ("__builtin_volatile_read_int32"|"__builtin_volatile_read_pointer"), [IR addr], IR res -> + fprintf oc " lwz %a, 0(%a)\n" ireg res ireg addr + | "__builtin_volatile_read_float32", [IR addr], FR res -> + fprintf oc " lfs %a, 0(%a)\n" freg res ireg addr + | "__builtin_volatile_read_float64", [IR addr], FR res -> + fprintf oc " lfd %a, 0(%a)\n" freg res ireg addr + (* Volatile writes *) + | ("__builtin_volatile_write_int8unsigned"|"__builtin_volatile_write_int8signed"), [IR addr; IR src], _ -> + fprintf oc " stb %a, 0(%a)\n" ireg src ireg addr + | ("__builtin_volatile_write_int16unsigned"|"__builtin_volatile_write_int16signed"), [IR addr; IR src], _ -> + fprintf oc " sth %a, 0(%a)\n" ireg src ireg addr + | ("__builtin_volatile_write_int32"|"__builtin_volatile_write_pointer"), [IR addr; IR src], _ -> + fprintf oc " stw %a, 0(%a)\n" ireg src ireg addr + | "__builtin_volatile_write_float32", [IR addr; FR src], _ -> + fprintf oc " frsp %a, %a\n" freg FPR13 freg src; + fprintf oc " stfs %a, 0(%a)\n" freg FPR13 ireg addr + | "__builtin_volatile_write_float64", [IR addr; FR src], _ -> + fprintf oc " stfd %a, 0(%a)\n" freg src ireg addr + (* Integer arithmetic *) + | "__builtin_mulhw", [IR a1; IR a2], IR res -> + fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2 + | "__builtin_mulhwu", [IR a1; IR a2], IR res -> + fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2 + | "__builtin_cntlzw", [IR a1], IR res -> + fprintf oc " cntlzw %a, %a\n" ireg res ireg a1 + (* Float arithmetic *) + | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res -> + fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 + | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res -> + fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 + | "__builtin_fabs", [FR a1], FR res -> + fprintf oc " fabs %a, %a\n" freg res freg a1 + | "__builtin_fsqrt", [FR a1], FR res -> + fprintf oc " fsqrt %a, %a\n" freg res freg a1 + | "__builtin_frsqrte", [FR a1], FR res -> + fprintf oc " frsqrte %a, %a\n" freg res freg a1 + | "__builtin_fres", [FR a1], FR res -> + fprintf oc " fres %a, %a\n" freg res freg a1 + | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res -> + fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 + (* Memory accesses *) + | "__builtin_read_int16_reversed", [IR a1], IR res -> + fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 + | "__builtin_read_int32_reversed", [IR a1], IR res -> + fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 + | "__builtin_write_int16_reversed", [IR a1; IR a2], _ -> + fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 + | "__builtin_write_int32_reversed", [IR a1; IR a2], _ -> + fprintf oc " stwbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 + (* Synchronization *) + | "__builtin_eieio", [], _ -> + fprintf oc " eieio\n" + | "__builtin_sync", [], _ -> + fprintf oc " sync\n" + | "__builtin_isync", [], _ -> + fprintf oc " isync\n" + | "__builtin_trap", [], _ -> + fprintf oc " trap\n" + (* Catch-all *) + | _ -> + invalid_arg ("unrecognized builtin " ^ name) + end; + fprintf oc "%s end builtin %s\n" comment name let re_builtin_function = Str.regexp "__builtin_" @@ -264,121 +352,42 @@ let is_builtin_function s = Str.string_match re_builtin_function (extern_atom s) 0 let print_builtin_function oc s = - fprintf oc "%s begin builtin %s\n" comment (extern_atom s); - (* int args: GPR3, GPR4 float args: FPR1, FPR2, FPR3 - int res: GPR3 float res: FPR1 + fprintf oc "%s begin builtin function %s\n" comment (extern_atom s); + (* int args: GPR3, GPR4, GPR5 float args: FPR1, FPR2, FPR3 + int res: GPR3 float res: FPR1 Watch out for MacOSX/EABI incompatibility: functions that take some floats then some ints. There are none here. *) - begin match extern_atom s with - (* Volatile reads *) - | "__builtin_volatile_read_int8unsigned" -> - fprintf oc " lbz %a, 0(%a)\n" ireg GPR3 ireg GPR3 - | "__builtin_volatile_read_int8signed" -> - fprintf oc " lbz %a, 0(%a)\n" ireg GPR3 ireg GPR3; - fprintf oc " extsb %a, %a\n" ireg GPR3 ireg GPR3 - | "__builtin_volatile_read_int16unsigned" -> - fprintf oc " lhz %a, 0(%a)\n" ireg GPR3 ireg GPR3 - | "__builtin_volatile_read_int16signed" -> - fprintf oc " lha %a, 0(%a)\n" ireg GPR3 ireg GPR3 - | "__builtin_volatile_read_int32" - | "__builtin_volatile_read_pointer" -> - fprintf oc " lwz %a, 0(%a)\n" ireg GPR3 ireg GPR3 - | "__builtin_volatile_read_float32" -> - fprintf oc " lfs %a, 0(%a)\n" freg FPR1 ireg GPR3 - | "__builtin_volatile_read_float64" -> - fprintf oc " lfd %a, 0(%a)\n" freg FPR1 ireg GPR3 - (* Volatile writes *) - | "__builtin_volatile_write_int8unsigned" - | "__builtin_volatile_write_int8signed" -> - fprintf oc " stb %a, 0(%a)\n" ireg GPR4 ireg GPR3 - | "__builtin_volatile_write_int16unsigned" - | "__builtin_volatile_write_int16signed" -> - fprintf oc " sth %a, 0(%a)\n" ireg GPR4 ireg GPR3 - | "__builtin_volatile_write_int32" - | "__builtin_volatile_write_pointer" -> - fprintf oc " stw %a, 0(%a)\n" ireg GPR4 ireg GPR3 - | "__builtin_volatile_write_float32" -> - fprintf oc " frsp %a, %a\n" freg FPR1 freg FPR1; - fprintf oc " stfs %a, 0(%a)\n" freg FPR1 ireg GPR3 - | "__builtin_volatile_write_float64" -> - fprintf oc " stfd %a, 0(%a)\n" freg FPR1 ireg GPR3 (* Block copy *) + begin match extern_atom s with | "__builtin_memcpy" -> let lbl1 = new_label() in let lbl2 = new_label() in - fprintf oc " cmplwi %a, %a, 0\n" creg 0 ireg GPR5; - fprintf oc " beq %a, %a\n" creg 0 label lbl1; - fprintf oc " mtctr %a\n" ireg GPR5; - fprintf oc " addi %a, %a, -1\n" ireg GPR3 ireg GPR3; - fprintf oc " addi %a, %a, -1\n" ireg GPR4 ireg GPR4; - fprintf oc "%a: lbzu %a, 1(%a)\n" label lbl2 ireg GPR0 ireg GPR4; - fprintf oc " stbu %a, 1(%a)\n" ireg GPR0 ireg GPR3; - fprintf oc " bdnz %a\n" label lbl2; + fprintf oc " cmplwi %a, %a, 0\n" creg 0 ireg GPR5; + fprintf oc " beq %a, %a\n" creg 0 label lbl1; + fprintf oc " mtctr %a\n" ireg GPR5; + fprintf oc " addi %a, %a, -1\n" ireg GPR3 ireg GPR3; + fprintf oc " addi %a, %a, -1\n" ireg GPR4 ireg GPR4; + fprintf oc "%a: lbzu %a, 1(%a)\n" label lbl2 ireg GPR0 ireg GPR4; + fprintf oc " stbu %a, 1(%a)\n" ireg GPR0 ireg GPR3; + fprintf oc " bdnz %a\n" label lbl2; fprintf oc "%a:\n" label lbl1 | "__builtin_memcpy_words" -> let lbl1 = new_label() in let lbl2 = new_label() in - fprintf oc " rlwinm. %a, %a, 30, 2, 31\n" ireg GPR5 ireg GPR5; - fprintf oc " beq %a, %a\n" creg 0 label lbl1; - fprintf oc " mtctr %a\n" ireg GPR5; - fprintf oc " addi %a, %a, -4\n" ireg GPR3 ireg GPR3; - fprintf oc " addi %a, %a, -4\n" ireg GPR4 ireg GPR4; - fprintf oc "%a: lwzu %a, 4(%a)\n" label lbl2 ireg GPR0 ireg GPR4; - fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg GPR3; - fprintf oc " bdnz %a\n" label lbl2; + fprintf oc " rlwinm. %a, %a, 30, 2, 31\n" ireg GPR5 ireg GPR5; + fprintf oc " beq %a, %a\n" creg 0 label lbl1; + fprintf oc " mtctr %a\n" ireg GPR5; + fprintf oc " addi %a, %a, -4\n" ireg GPR3 ireg GPR3; + fprintf oc " addi %a, %a, -4\n" ireg GPR4 ireg GPR4; + fprintf oc "%a: lwzu %a, 4(%a)\n" label lbl2 ireg GPR0 ireg GPR4; + fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg GPR3; + fprintf oc " bdnz %a\n" label lbl2; fprintf oc "%a:\n" label lbl1 - (* Integer arithmetic *) - | "__builtin_mulhw" -> - fprintf oc " mulhw %a, %a, %a\n" ireg GPR3 ireg GPR3 ireg GPR4 - | "__builtin_mulhwu" -> - fprintf oc " mulhwu %a, %a, %a\n" ireg GPR3 ireg GPR3 ireg GPR4 - | "__builtin_cntlzw" -> - fprintf oc " cntlzw %a, %a\n" ireg GPR3 ireg GPR3 - (* Float arithmetic *) - | "__builtin_fmadd" -> - fprintf oc " fmadd %a, %a, %a, %a\n" - freg FPR1 freg FPR1 freg FPR2 freg FPR3 - | "__builtin_fmsub" -> - fprintf oc " fmsub %a, %a, %a, %a\n" - freg FPR1 freg FPR1 freg FPR2 freg FPR3 - | "__builtin_fabs" -> - fprintf oc " fabs %a, %a\n" freg FPR1 freg FPR1 - | "__builtin_fsqrt" -> - fprintf oc " fsqrt %a, %a\n" freg FPR1 freg FPR1 - | "__builtin_frsqrte" -> - fprintf oc " frsqrte %a, %a\n" freg FPR1 freg FPR1 - | "__builtin_fres" -> - fprintf oc " fres %a, %a\n" freg FPR1 freg FPR1 - | "__builtin_fsel" -> - fprintf oc " fsel %a, %a, %a, %a\n" - freg FPR1 freg FPR1 freg FPR2 freg FPR3 - (* Memory accesses *) - | "__builtin_read_int16_reversed" -> - fprintf oc " lhbrx %a, %a, %a\n" - ireg GPR3 ireg_or_zero GPR0 ireg GPR3 - | "__builtin_read_int32_reversed" -> - fprintf oc " lwbrx %a, %a, %a\n" - ireg GPR3 ireg_or_zero GPR0 ireg GPR3 - | "__builtin_write_int16_reversed" -> - fprintf oc " sthbrx %a, %a, %a\n" - ireg GPR4 ireg_or_zero GPR0 ireg GPR3 - | "__builtin_write_int32_reversed" -> - fprintf oc " stwbrx %a, %a, %a\n" - ireg GPR4 ireg_or_zero GPR0 ireg GPR3 - (* Synchronization *) - | "__builtin_eieio" -> - fprintf oc " eieio\n" - | "__builtin_sync" -> - fprintf oc " sync\n" - | "__builtin_isync" -> - fprintf oc " isync\n" - | "__builtin_trap" -> - fprintf oc " trap\n" (* Catch-all *) | s -> invalid_arg ("unrecognized builtin function " ^ s) end; - fprintf oc "%s end builtin %s\n" comment (extern_atom s) + fprintf oc "%s end builtin function %s\n" comment (extern_atom s) (* Printing of instructions *) @@ -450,10 +459,7 @@ let print_instruction oc labels = function fprintf oc " lwz %a, %a(%a)\n" ireg GPR12 label_low lbl ireg GPR12; fprintf oc " mtctr %a\n" ireg GPR12; fprintf oc " bctr\n"; - fprintf oc "%a:" label lbl; - List.iter - (fun l -> fprintf oc " .long %a\n" label (transl_label l)) - tbl; + jumptables := (lbl, tbl) :: !jumptables; fprintf oc "%s end pseudoinstr btbl\n" comment | Pcmplw(r1, r2) -> fprintf oc " cmplw %a, %a, %a\n" creg 0 ireg r1 ireg r2 @@ -660,6 +666,8 @@ let print_instruction oc labels = function | Plabel lbl -> if Labelset.mem lbl labels then fprintf oc "%a:\n" label (transl_label lbl) + | Pbuiltin(ef, args, res) -> + print_builtin_inlined oc (extern_atom ef.ef_id) args res let print_literal oc (lbl, n) = let nlo = Int64.to_int32 n @@ -814,7 +822,7 @@ let stub_function oc name ef = fprintf oc " .align 2\n"; fprintf oc "L%s$stub:\n" name; if Str.string_match re_variadic_stub name 0 - then variadic_stub oc name (Str.matched_group 1 name) ef.ef_sig.sig_args + then variadic_stub oc name (Str.matched_group 1 name) (ef_sig ef).sig_args else non_variadic_stub oc name let function_needs_stub name = true @@ -839,7 +847,7 @@ let stub_function oc name ef = let name = extern_atom name in (* Only variadic functions need a stub *) if Str.string_match re_variadic_stub name 0 - then variadic_stub oc name (Str.matched_group 1 name) ef.ef_sig.sig_args + then variadic_stub oc name (Str.matched_group 1 name) (ef_sig ef).sig_args let function_needs_stub name = Str.string_match re_variadic_stub (extern_atom name) 0 @@ -858,13 +866,15 @@ let stub_function = let print_fundef oc (Coq_pair(name, defn)) = match defn with - | Internal code -> print_function oc name code - | External ef -> if not(is_builtin_function name) then stub_function oc name ef + | Internal code -> + print_function oc name code + | External ef -> + if not (is_builtin_function name) then stub_function oc name ef let record_extfun (Coq_pair(name, defn)) = match defn with | Internal _ -> () - | External _ -> + | External _ -> if function_needs_stub name && not (is_builtin_function name) then stubbed_functions := IdentSet.add name !stubbed_functions diff --git a/powerpc/eabi/Conventions.v b/powerpc/eabi/Conventions1.v index fd3a4ebe..60eaaa5e 100644 --- a/powerpc/eabi/Conventions.v +++ b/powerpc/eabi/Conventions1.v @@ -229,65 +229,6 @@ Proof. unfold float_callee_save_regs; NoRepet. Qed. -(** * Acceptable locations for register allocation *) - -(** The following predicate describes the locations that can be assigned - to an RTL pseudo-register during register allocation: a non-temporary - machine register or a [Local] stack slot are acceptable. *) - -Definition loc_acceptable (l: loc) : Prop := - match l with - | R r => ~(In l temporaries) - | S (Local ofs ty) => ofs >= 0 - | S (Incoming _ _) => False - | S (Outgoing _ _) => False - end. - -Definition locs_acceptable (ll: list loc) : Prop := - forall l, In l ll -> loc_acceptable l. - -Lemma temporaries_not_acceptable: - forall l, loc_acceptable l -> Loc.notin l temporaries. -Proof. - unfold loc_acceptable; destruct l. - simpl. intuition congruence. - destruct s; try contradiction. - intro. simpl. tauto. -Qed. -Hint Resolve temporaries_not_acceptable: locs. - -Lemma locs_acceptable_disj_temporaries: - forall ll, locs_acceptable ll -> Loc.disjoint ll temporaries. -Proof. - intros. apply Loc.notin_disjoint. intros. - apply temporaries_not_acceptable. auto. -Qed. - -Lemma loc_acceptable_noteq_diff: - forall l1 l2, - loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2. -Proof. - unfold loc_acceptable, Loc.diff; destruct l1; destruct l2; - try (destruct s); try (destruct s0); intros; auto; try congruence. - case (zeq z z0); intro. - compare t t0; intro. - subst z0; subst t0; tauto. - tauto. tauto. - contradiction. contradiction. -Qed. - -Lemma loc_acceptable_notin_notin: - forall r ll, - loc_acceptable r -> - ~(In r ll) -> Loc.notin r ll. -Proof. - induction ll; simpl; intros. - auto. - split. apply loc_acceptable_noteq_diff. assumption. - apply sym_not_equal. tauto. - apply IHll. assumption. tauto. -Qed. - (** * Function calling conventions *) (** The functions in this section determine the locations (machine registers @@ -333,40 +274,18 @@ Proof. reflexivity. Qed. -(** The result location is acceptable. *) - -Lemma loc_result_acceptable: - forall sig, loc_acceptable (R (loc_result sig)). -Proof. - intros. unfold loc_acceptable. red. - unfold loc_result. destruct (sig_res sig). - destruct t; simpl; NotOrEq. - simpl; NotOrEq. -Qed. - -(** The result location is a caller-save register. *) +(** The result location is a caller-save register or a temporary *) Lemma loc_result_caller_save: - forall (s: signature), In (R (loc_result s)) destroyed_at_call. + forall (s: signature), + In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. Proof. - intros; unfold loc_result. + intros; unfold loc_result. left; destruct (sig_res s). destruct t; simpl; OrEq. simpl; OrEq. Qed. -(** The result location is not a callee-save register. *) - -Lemma loc_result_not_callee_save: - forall (s: signature), - ~(In (loc_result s) int_callee_save_regs \/ In (loc_result s) float_callee_save_regs). -Proof. - intros. generalize (loc_result_caller_save s). - generalize (int_callee_save_not_destroyed (loc_result s)). - generalize (float_callee_save_not_destroyed (loc_result s)). - tauto. -Qed. - (** ** Location of function arguments *) (** The PowerPC EABI states the following convention for passing arguments @@ -635,24 +554,6 @@ Proof. ElimOrEq; intuition. Qed. -(** Callee-save registers do not overlap with argument locations. *) - -Lemma arguments_not_preserved: - forall sig l, - Loc.notin l destroyed_at_call -> loc_acceptable l -> - Loc.notin l (loc_arguments sig). -Proof. - intros. unfold loc_arguments. destruct l. - apply loc_arguments_rec_notin_reg. - generalize (Loc.notin_not_in _ _ H). intro; red; intro. - apply H1. generalize H2. simpl. ElimOrEq; OrEq. - generalize (Loc.notin_not_in _ _ H). intro; red; intro. - apply H1. generalize H2. simpl. ElimOrEq; OrEq. - destruct s; simpl in H0; try contradiction. - apply loc_arguments_rec_notin_local. -Qed. -Hint Resolve arguments_not_preserved: locs. - (** Argument locations agree in number with the function signature. *) Lemma loc_arguments_length: @@ -687,108 +588,3 @@ Proof. intro; simpl. ElimOrEq; reflexivity. intro; simpl. ElimOrEq; reflexivity. Qed. - -(** There is no partial overlap between an argument location and an - acceptable location: they are either identical or disjoint. *) - -Lemma no_overlap_arguments: - forall args sg, - locs_acceptable args -> - Loc.no_overlap args (loc_arguments sg). -Proof. - unfold Loc.no_overlap; intros. - generalize (H r H0). - generalize (loc_arguments_acceptable _ _ H1). - destruct s; destruct r; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; auto. - intros. right. auto. - destruct s; try tauto. destruct s0; tauto. -Qed. - -(** Decide whether a tailcall is possible. *) - -Definition tailcall_is_possible (sg: signature) : bool := - let fix tcisp (l: list loc) := - match l with - | nil => true - | R _ :: l' => tcisp l' - | S _ :: l' => false - end - in tcisp (loc_arguments sg). - -Lemma tailcall_is_possible_correct: - forall s, tailcall_is_possible s = true -> tailcall_possible s. -Proof. - intro s. unfold tailcall_is_possible, tailcall_possible. - generalize (loc_arguments s). induction l; simpl; intros. - elim H0. - destruct a. - destruct H0. subst l0. auto. apply IHl. auto. auto. discriminate. -Qed. - -(** ** Location of function parameters *) - -(** A function finds the values of its parameter in the same locations - where its caller stored them, except that the stack-allocated arguments, - viewed as [Outgoing] slots by the caller, are accessed via [Incoming] - slots (at the same offsets and types) in the callee. *) - -Definition parameter_of_argument (l: loc) : loc := - match l with - | S (Outgoing n ty) => S (Incoming n ty) - | _ => l - end. - -Definition loc_parameters (s: signature) := - List.map parameter_of_argument (loc_arguments s). - -Lemma loc_parameters_type: - forall sig, List.map Loc.type (loc_parameters sig) = sig.(sig_args). -Proof. - intros. unfold loc_parameters. - rewrite list_map_compose. - rewrite <- loc_arguments_type. - apply list_map_exten. - intros. destruct x; simpl. auto. - destruct s; reflexivity. -Qed. - -Lemma loc_parameters_length: - forall sg, List.length (loc_parameters sg) = List.length sg.(sig_args). -Proof. - intros. unfold loc_parameters. rewrite list_length_map. - apply loc_arguments_length. -Qed. - -Lemma loc_parameters_not_temporaries: - forall sig, Loc.disjoint (loc_parameters sig) temporaries. -Proof. - intro; red; intros. - unfold loc_parameters in H. - elim (list_in_map_inv _ _ _ H). intros y [EQ IN]. - generalize (loc_arguments_not_temporaries sig y x2 IN H0). - subst x1. destruct x2. - destruct y; simpl. auto. destruct s; auto. - byContradiction. generalize H0. simpl. NotOrEq. -Qed. - -Lemma no_overlap_parameters: - forall params sg, - locs_acceptable params -> - Loc.no_overlap (loc_parameters sg) params. -Proof. - unfold Loc.no_overlap; intros. - unfold loc_parameters in H0. - elim (list_in_map_inv _ _ _ H0). intros t [EQ IN]. - rewrite EQ. - generalize (loc_arguments_acceptable _ _ IN). - generalize (H s H1). - destruct s; destruct t; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; simpl; auto. - intros; right; auto. - destruct s; try tauto. destruct s0; try tauto. - intros; simpl. tauto. -Qed. - diff --git a/powerpc/macosx/Conventions.v b/powerpc/macosx/Conventions1.v index 1954c912..a5741e1c 100644 --- a/powerpc/macosx/Conventions.v +++ b/powerpc/macosx/Conventions1.v @@ -229,65 +229,6 @@ Proof. unfold float_callee_save_regs; NoRepet. Qed. -(** * Acceptable locations for register allocation *) - -(** The following predicate describes the locations that can be assigned - to an RTL pseudo-register during register allocation: a non-temporary - machine register or a [Local] stack slot are acceptable. *) - -Definition loc_acceptable (l: loc) : Prop := - match l with - | R r => ~(In l temporaries) - | S (Local ofs ty) => ofs >= 0 - | S (Incoming _ _) => False - | S (Outgoing _ _) => False - end. - -Definition locs_acceptable (ll: list loc) : Prop := - forall l, In l ll -> loc_acceptable l. - -Lemma temporaries_not_acceptable: - forall l, loc_acceptable l -> Loc.notin l temporaries. -Proof. - unfold loc_acceptable; destruct l. - simpl. intuition congruence. - destruct s; try contradiction. - intro. simpl. tauto. -Qed. -Hint Resolve temporaries_not_acceptable: locs. - -Lemma locs_acceptable_disj_temporaries: - forall ll, locs_acceptable ll -> Loc.disjoint ll temporaries. -Proof. - intros. apply Loc.notin_disjoint. intros. - apply temporaries_not_acceptable. auto. -Qed. - -Lemma loc_acceptable_noteq_diff: - forall l1 l2, - loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2. -Proof. - unfold loc_acceptable, Loc.diff; destruct l1; destruct l2; - try (destruct s); try (destruct s0); intros; auto; try congruence. - case (zeq z z0); intro. - compare t t0; intro. - subst z0; subst t0; tauto. - tauto. tauto. - contradiction. contradiction. -Qed. - -Lemma loc_acceptable_notin_notin: - forall r ll, - loc_acceptable r -> - ~(In r ll) -> Loc.notin r ll. -Proof. - induction ll; simpl; intros. - auto. - split. apply loc_acceptable_noteq_diff. assumption. - apply sym_not_equal. tauto. - apply IHll. assumption. tauto. -Qed. - (** * Function calling conventions *) (** The functions in this section determine the locations (machine registers @@ -333,40 +274,18 @@ Proof. reflexivity. Qed. -(** The result location is acceptable. *) - -Lemma loc_result_acceptable: - forall sig, loc_acceptable (R (loc_result sig)). -Proof. - intros. unfold loc_acceptable. red. - unfold loc_result. destruct (sig_res sig). - destruct t; simpl; NotOrEq. - simpl; NotOrEq. -Qed. - -(** The result location is a caller-save register. *) +(** The result location is a caller-save register or a temporary *) Lemma loc_result_caller_save: - forall (s: signature), In (R (loc_result s)) destroyed_at_call. + forall (s: signature), + In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. Proof. - intros; unfold loc_result. + intros; unfold loc_result. left; destruct (sig_res s). destruct t; simpl; OrEq. simpl; OrEq. Qed. -(** The result location is not a callee-save register. *) - -Lemma loc_result_not_callee_save: - forall (s: signature), - ~(In (loc_result s) int_callee_save_regs \/ In (loc_result s) float_callee_save_regs). -Proof. - intros. generalize (loc_result_caller_save s). - generalize (int_callee_save_not_destroyed (loc_result s)). - generalize (float_callee_save_not_destroyed (loc_result s)). - tauto. -Qed. - (** ** Location of function arguments *) (** The PowerPC ABI states the following convention for passing arguments @@ -439,13 +358,6 @@ Fixpoint size_arguments_rec Definition size_arguments (s: signature) : Z := size_arguments_rec s.(sig_args) int_param_regs float_param_regs 8. -(** A tail-call is possible for a signature if the corresponding - arguments are all passed in registers. *) - -Definition tailcall_possible (s: signature) : Prop := - forall l, In l (loc_arguments s) -> - match l with R _ => True | S _ => False end. - (** Argument locations are either non-temporary registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -641,24 +553,6 @@ Proof. ElimOrEq; intuition. Qed. -(** Callee-save registers do not overlap with argument locations. *) - -Lemma arguments_not_preserved: - forall sig l, - Loc.notin l destroyed_at_call -> loc_acceptable l -> - Loc.notin l (loc_arguments sig). -Proof. - intros. unfold loc_arguments. destruct l. - apply loc_arguments_rec_notin_reg. - generalize (Loc.notin_not_in _ _ H). intro; red; intro. - apply H1. generalize H2. simpl. ElimOrEq; OrEq. - generalize (Loc.notin_not_in _ _ H). intro; red; intro. - apply H1. generalize H2. simpl. ElimOrEq; OrEq. - destruct s; simpl in H0; try contradiction. - apply loc_arguments_rec_notin_local. -Qed. -Hint Resolve arguments_not_preserved: locs. - (** Argument locations agree in number with the function signature. *) Lemma loc_arguments_length: @@ -696,107 +590,3 @@ Proof. intro; simpl. ElimOrEq; reflexivity. Qed. -(** There is no partial overlap between an argument location and an - acceptable location: they are either identical or disjoint. *) - -Lemma no_overlap_arguments: - forall args sg, - locs_acceptable args -> - Loc.no_overlap args (loc_arguments sg). -Proof. - unfold Loc.no_overlap; intros. - generalize (H r H0). - generalize (loc_arguments_acceptable _ _ H1). - destruct s; destruct r; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; auto. - intros. right. auto. - destruct s; try tauto. destruct s0; tauto. -Qed. - -(** Decide whether a tailcall is possible. *) - -Definition tailcall_is_possible (sg: signature) : bool := - let fix tcisp (l: list loc) := - match l with - | nil => true - | R _ :: l' => tcisp l' - | S _ :: l' => false - end - in tcisp (loc_arguments sg). - -Lemma tailcall_is_possible_correct: - forall s, tailcall_is_possible s = true -> tailcall_possible s. -Proof. - intro s. unfold tailcall_is_possible, tailcall_possible. - generalize (loc_arguments s). induction l; simpl; intros. - elim H0. - destruct a. - destruct H0. subst l0. auto. apply IHl. auto. auto. discriminate. -Qed. - -(** ** Location of function parameters *) - -(** A function finds the values of its parameter in the same locations - where its caller stored them, except that the stack-allocated arguments, - viewed as [Outgoing] slots by the caller, are accessed via [Incoming] - slots (at the same offsets and types) in the callee. *) - -Definition parameter_of_argument (l: loc) : loc := - match l with - | S (Outgoing n ty) => S (Incoming n ty) - | _ => l - end. - -Definition loc_parameters (s: signature) := - List.map parameter_of_argument (loc_arguments s). - -Lemma loc_parameters_type: - forall sig, List.map Loc.type (loc_parameters sig) = sig.(sig_args). -Proof. - intros. unfold loc_parameters. - rewrite list_map_compose. - rewrite <- loc_arguments_type. - apply list_map_exten. - intros. destruct x; simpl. auto. - destruct s; reflexivity. -Qed. - -Lemma loc_parameters_length: - forall sg, List.length (loc_parameters sg) = List.length sg.(sig_args). -Proof. - intros. unfold loc_parameters. rewrite list_length_map. - apply loc_arguments_length. -Qed. - -Lemma loc_parameters_not_temporaries: - forall sig, Loc.disjoint (loc_parameters sig) temporaries. -Proof. - intro; red; intros. - unfold loc_parameters in H. - elim (list_in_map_inv _ _ _ H). intros y [EQ IN]. - generalize (loc_arguments_not_temporaries sig y x2 IN H0). - subst x1. destruct x2. - destruct y; simpl. auto. destruct s; auto. - byContradiction. generalize H0. simpl. NotOrEq. -Qed. - -Lemma no_overlap_parameters: - forall params sg, - locs_acceptable params -> - Loc.no_overlap (loc_parameters sg) params. -Proof. - unfold Loc.no_overlap; intros. - unfold loc_parameters in H0. - elim (list_in_map_inv _ _ _ H0). intros t [EQ IN]. - rewrite EQ. - generalize (loc_arguments_acceptable _ _ IN). - generalize (H s H1). - destruct s; destruct t; simpl. - intros. case (mreg_eq m0 m); intro. left; congruence. tauto. - intros. right; destruct s; simpl; auto. - intros; right; auto. - destruct s; try tauto. destruct s0; try tauto. - intros; simpl. tauto. -Qed. - |