From 239cbd2ebab8814b11d7ef43c35a17ce56a7ba0b Mon Sep 17 00:00:00 2001 From: xleroy Date: Wed, 11 May 2011 11:47:38 +0000 Subject: cparser/StructAssign: always use __builtin_memcpy + alignment indication (simpler and globally more efficient) cfrontend/C2C.ml: specialization of __builtin_memcpy over size */PrintAsm.ml: revised expansion of __builtin_memcpy_* ia32/Asm.ml: typo in comment git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1649 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- arm/PrintAsm.ml | 151 ++++++++++++++++++++++++++------------------- cfrontend/C2C.ml | 140 ++++++++++++++++++++++++++--------------- cparser/StructAssign.ml | 140 ++++++++++++++++------------------------- ia32/Asm.v | 2 +- ia32/PrintAsm.ml | 149 +++++++++++++++++++++++++++++--------------- powerpc/PrintAsm.ml | 161 ++++++++++++++++++++++++++++++++---------------- 6 files changed, 443 insertions(+), 300 deletions(-) diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml index 8be92fd5..ed6d3f35 100644 --- a/arm/PrintAsm.ml +++ b/arm/PrintAsm.ml @@ -223,6 +223,63 @@ let call_helper oc fn dst arg1 arg2 = locations dictated by the calling conventions; preserve callee-save regs only. *) +(* Handling of __builtin_annotation *) + +let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" + +let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" + +let print_annotation oc txt args res = + fprintf oc "%s annotation: " comment; + let print_fragment = function + | Str.Text s -> + output_string oc s + | Str.Delim "%%" -> + output_char oc '%' + | Str.Delim s -> + let n = int_of_string (String.sub s 1 (String.length s - 1)) in + try + preg oc (List.nth args (n-1)) + with Failure _ -> + fprintf oc "" s in + List.iter print_fragment (Str.full_split re_annot_param txt); + fprintf oc "\n"; + begin match args, res with + | [], _ -> 0 + | IR src :: _, IR dst -> + if dst = src then 0 else (fprintf oc " mr %a, %a\n" ireg dst ireg src; 1) + | FR src :: _, FR dst -> + if dst = src then 0 else (fprintf oc " fmr %a, %a\n" freg dst freg src; 1) + | _, _ -> assert false + end + +(* Handling of __builtin_memcpy_alX_szY *) + +let re_builtin_memcpy = + Str.regexp "__builtin_memcpy\\(_al\\([248]\\)\\)?_sz\\([0-9]+\\)$" + +(* The ARM has strict alignment constraints. *) + +let print_builtin_memcpy oc sz al dst src = + let rec copy ofs sz ninstr = + if sz >= 4 && al >= 4 then begin + fprintf oc " ldr %a, [%a, #%d]\n" ireg IR14 ireg src ofs; + fprintf oc " str %a, [%a, #%d]\n" ireg IR14 ireg dst ofs; + copy (ofs + 4) (sz - 4) (ninstr + 2) + end else if sz >= 2 && al >= 2 then begin + fprintf oc " ldrh %a, [%a, #%d]\n" ireg IR14 ireg src ofs; + fprintf oc " strh %a, [%a, #%d]\n" ireg IR14 ireg dst ofs; + copy (ofs + 2) (sz - 2) (ninstr + 2) + end else if sz >= 1 then begin + fprintf oc " ldrb %a, [%a, #%d]\n" ireg IR14 ireg src ofs; + fprintf oc " strb %a, [%a, #%d]\n" ireg IR14 ireg dst ofs; + copy (ofs + 1) (sz - 1) (ninstr + 2) + end else + ninstr in + copy 0 sz 0 + +(* Handling of compiler-inlined builtins *) + let print_builtin_inlined oc name args res = fprintf oc "%s begin %s\n" comment name; let n = match name, args, res with @@ -257,6 +314,15 @@ let print_builtin_inlined oc name args res = (* Float arithmetic *) | "__builtin_fabs", [FR a1], FR res -> fprintf oc " absd %a, %a\n" freg res freg a1; 1 + (* Inlined memcpy *) + | name, [IR dst; IR src], _ when Str.string_match re_builtin_memcpy name 0 -> + let sz = int_of_string (Str.matched_group 3 name) in + let al = try int_of_string (Str.matched_group 2 name) with Not_found -> 1 in + print_builtin_memcpy oc sz al dst src + (* Annotations *) + | name, args, res when Str.string_match re_builtin_annotation name 0 -> + let annot = Str.matched_group 1 name in + print_annotation oc annot args res (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) @@ -264,11 +330,27 @@ let print_builtin_inlined oc name args res = fprintf oc "%s end %s\n" comment name; n +(* Handling of other builtins *) + let re_builtin_function = Str.regexp "__builtin_" let is_builtin_function s = Str.string_match re_builtin_function (extern_atom s) 0 +let print_memcpy oc load store sz log2sz = + let lbl1 = new_label() in + let lbl2 = new_label() in + if sz = 1 + then fprintf oc " cmp %a, #0\n" ireg IR2 + else fprintf oc " movs %a, %a, lsr #%d\n" ireg IR2 ireg IR2 log2sz; + fprintf oc " beq .L%d\n" lbl1; + fprintf oc ".L%d: %s %a, [%a], #%d\n" lbl2 load ireg IR3 ireg IR1 sz; + fprintf oc " subs %a, %a, #1\n" ireg IR2 ireg IR2; + fprintf oc " %s %a, [%a], #%d\n" store ireg IR3 ireg IR0 sz; + fprintf oc " bne .L%d\n" lbl2; + fprintf oc ".L%d:\n" lbl1; + 7 + let print_builtin_function oc s = fprintf oc "%s begin %s\n" comment (extern_atom s); (* int args: IR0-IR3 float args: FR0, FR1 @@ -276,35 +358,11 @@ let print_builtin_function oc s = let n = match extern_atom s with (* Block copy *) | "__builtin_memcpy" -> - let lbl1 = new_label() in - let lbl2 = new_label() in - fprintf oc " cmp %a, #0\n" ireg IR2; - fprintf oc " beq .L%d\n" lbl1; - fprintf oc ".L%d: ldrb %a, [%a], #1\n" lbl2 ireg IR3 ireg IR1; - fprintf oc " subs %a, %a, #1\n" ireg IR2 ireg IR2; - fprintf oc " strb %a, [%a], #1\n" ireg IR3 ireg IR0; - fprintf oc " bne .L%d\n" lbl2; - fprintf oc ".L%d:\n" lbl1; - 7 -(* - let lbl = new_label() in - fprintf oc " cmp %a, #0\n" ireg IR2; - fprintf oc "%a: ldrbne %a, [%a], #1\n" label lbl ireg IR3 ireg IR1; - fprintf oc " strbne %a, [%a], #1\n" ireg IR3 ireg IR0; - fprintf oc " subnes %a, %a, #1\n" ireg IR2 ireg IR2; - fprintf oc " bne %a\n" label lbl -*) - | "__builtin_memcpy_words" -> - let lbl1 = new_label() in - let lbl2 = new_label() in - fprintf oc " movs %a, %a, lsr #2\n" ireg IR2 ireg IR2; - fprintf oc " beq .L%d\n" lbl1; - fprintf oc ".L%d: ldr %a, [%a], #4\n" lbl2 ireg IR3 ireg IR1; - fprintf oc " subs %a, %a, #1\n" ireg IR2 ireg IR2; - fprintf oc " str %a, [%a], #4\n" ireg IR3 ireg IR0; - fprintf oc " bne .L%d\n" lbl2; - fprintf oc ".L%d:\n" lbl1; - 7 + print_memcpy oc "ldrb" "strb" 1 0 + | "__builtin_memcpy_al2" -> + print_memcpy oc "ldrh" "strh" 2 1 + | "__builtin_memcpy_al4" | "__builtin_memcpy_al8" -> + print_memcpy oc "ldr" "str" 4 2 (* Catch-all *) | s -> invalid_arg ("unrecognized builtin function " ^ s) @@ -312,35 +370,6 @@ let print_builtin_function oc s = fprintf oc "%s end %s\n" comment (extern_atom s); n -let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" - -let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" - -let print_annotation oc txt args res = - fprintf oc "%s annotation: " comment; - let print_fragment = function - | Str.Text s -> - output_string oc s - | Str.Delim "%%" -> - output_char oc '%' - | Str.Delim s -> - let n = int_of_string (String.sub s 1 (String.length s - 1)) in - try - preg oc (List.nth args (n-1)) - with Failure _ -> - fprintf oc "" s in - List.iter print_fragment (Str.full_split re_annot_param txt); - fprintf oc "\n"; - begin match args, res with - | [], _ -> () - | IR src :: _, IR dst -> - if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src - | FR src :: _, FR dst -> - if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src - | _, _ -> assert false - end; - 0 - (* Printing of instructions *) let shift_op oc = function @@ -504,9 +533,7 @@ let print_instruction oc = function 2 + List.length tbl | Pbuiltin(ef, args, res) -> let name = extern_atom ef.ef_id in - if Str.string_match re_builtin_annotation name 0 - then print_annotation oc (Str.matched_group 1 name) args res - else print_builtin_inlined oc name args res + print_builtin_inlined oc name args res let no_fallthrough = function | Pb _ -> true @@ -518,7 +545,7 @@ let rec print_instructions oc instrs = match instrs with | [] -> () | i :: il -> - let n = print_instruction oc labels i in + let n = print_instruction oc i in currpos := !currpos + n * 4; let d = distance_to_emit_constants() in if d < 256 && no_fallthrough i then diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml index 1a6539ac..1ee63b85 100644 --- a/cfrontend/C2C.ml +++ b/cfrontend/C2C.ml @@ -111,7 +111,19 @@ let builtins_generic = { TPtr(TVoid [AConst], []); TInt(Cutil.size_t_ikind, [])], false); - "__builtin_memcpy_words", + "__builtin_memcpy_al2", + (TVoid [], + [TPtr(TVoid [], []); + TPtr(TVoid [AConst], []); + TInt(Cutil.size_t_ikind, [])], + false); + "__builtin_memcpy_al4", + (TVoid [], + [TPtr(TVoid [], []); + TPtr(TVoid [AConst], []); + TInt(Cutil.size_t_ikind, [])], + false); + "__builtin_memcpy_al8", (TVoid [], [TPtr(TVoid [], []); TPtr(TVoid [AConst], []); @@ -172,65 +184,89 @@ let globals_for_strings globs = (fun s id l -> global_for_string s id :: l) stringTable globs -(** ** Handling of stubs for variadic functions *) +(** ** Declaration of special external functions *) + +let special_externals_table : (string, fundef) Hashtbl.t = Hashtbl.create 47 -let stub_function_table = Hashtbl.create 47 +let register_special_external c_name int_name targs tres inline = + if not (Hashtbl.mem special_externals_table c_name) then begin + Hashtbl.add special_externals_table c_name + (External({ef_id = intern_string int_name; + ef_sig = signature_of_type targs tres; + ef_inline = inline}, + targs, tres)) + end + +let declare_special_externals k = + Hashtbl.fold + (fun c_name fd k -> + Datatypes.Coq_pair(intern_string c_name, fd) :: k) + special_externals_table k + +(** ** Handling of stubs for variadic functions *) let register_stub_function name tres targs = let rec letters_of_type = function | Tnil -> [] | Tcons(Tfloat _, tl) -> "f" :: letters_of_type tl | Tcons(_, tl) -> "i" :: letters_of_type tl in + let rec types_of_types = function + | Tnil -> Tnil + | Tcons(Tfloat _, tl) -> Tcons(Tfloat F64, types_of_types tl) + | Tcons(_, tl) -> Tcons(Tpointer Tvoid, types_of_types tl) in let stub_name = name ^ "$" ^ String.concat "" (letters_of_type targs) in - try - (stub_name, Hashtbl.find stub_function_table stub_name) - with Not_found -> - let rec types_of_types = function - | Tnil -> Tnil - | Tcons(Tfloat _, tl) -> Tcons(Tfloat F64, types_of_types tl) - | Tcons(_, tl) -> Tcons(Tpointer Tvoid, types_of_types tl) in - let stub_type = Tfunction (types_of_types targs, tres) in - Hashtbl.add stub_function_table stub_name stub_type; - (stub_name, stub_type) - -let declare_stub_function stub_name stub_type = - match stub_type with - | Tfunction(targs, tres) -> - Datatypes.Coq_pair(intern_string stub_name, - External({ ef_id = intern_string stub_name; - ef_sig = signature_of_type targs tres; - ef_inline = false }, - targs, tres)) - | _ -> assert false - -let declare_stub_functions k = - Hashtbl.fold (fun n i k -> declare_stub_function n i :: k) - stub_function_table k + let targs = types_of_types targs in + register_special_external stub_name stub_name targs tres false; + (stub_name, Tfunction (targs, tres)) (** ** Handling of annotations *) -let annot_function_list = ref [] let annot_function_next = ref 0 let register_annotation_function txt targs tres = incr annot_function_next; let fun_name = - intern_string - (Printf.sprintf "__builtin_annotation_%d" !annot_function_next) - and ext_ident = - intern_string - (Printf.sprintf "__builtin_annotation_\"%s\"" txt) in - annot_function_list := - Datatypes.Coq_pair(fun_name, - External({ ef_id = ext_ident; - ef_sig = signature_of_type targs tres; - ef_inline = true }, - targs, tres)) :: !annot_function_list; - Evalof(Evar(fun_name, Tfunction(targs, tres)), Tfunction(targs, tres)) - -let declare_annotation_functions k = - List.rev_append !annot_function_list k + Printf.sprintf "__builtin_annotation_%d" !annot_function_next + and int_name = + Printf.sprintf "__builtin_annotation_\"%s\"" txt in + register_special_external fun_name int_name targs tres true; + Evalof(Evar(intern_string fun_name, Tfunction(targs, tres)), + Tfunction(targs, tres)) + +(** ** Handling of inlined memcpy functions *) + +let alignof_pointed ty = + match ty with + | Tpointer ty' -> camlint_of_z (alignof ty') + | _ -> 1l (* safe default *) + +let register_inlined_memcpy basename sz = + let name = Printf.sprintf "%s_sz%ld" basename sz in + let targs = Tcons(Tpointer Tvoid, Tcons(Tpointer Tvoid, Tnil)) in + let tres = Tvoid in + register_special_external name name targs tres true; + Evalof(Evar(intern_string name, Tfunction(targs, tres)), + Tfunction(targs, tres)) + +let memcpy_inline_threshold = ref 64l + +let make_builtin_memcpy name fn args = + match args with + | Econs(dst, Econs(src, Econs(sz, Enil))) -> + let sz1 = + match Initializers.constval sz with + | CompcertErrors.OK(Vint n) -> Some (camlint_of_coqint n) + | _ -> None in + begin match sz1 with + | Some sz2 when sz2 <= !memcpy_inline_threshold -> + let fn = register_inlined_memcpy name sz2 in + Ecall(fn, Econs(dst, Econs(src, Enil)), Tvoid) + | _ -> + Ecall(fn, args, Tvoid) + end + | _ -> + Ecall(fn, args, Tvoid) (** ** Translation functions *) @@ -539,6 +575,13 @@ let rec convertExpr env e = ezero end + | C.ECall({edesc = C.EVar {name = ("__builtin_memcpy" + |"__builtin_memcpy_al2" + |"__builtin_memcpy_al4" + |"__builtin_memcpy_al8" as name)}} as fn, + args) -> + make_builtin_memcpy name (convertExpr env fn) (convertExprList env args) + | C.ECall(fn, args) -> match projFunType env fn.etyp with | None -> @@ -727,8 +770,8 @@ let convertFundef env fd = (** External function declaration *) let noninlined_builtin_functions = [ - "__builtin_memcpy"; - "__builtin_memcpy_words" + "__builtin_memcpy"; "__builtin_memcpy_al2"; + "__builtin_memcpy_al4"; "__builtin_memcpy_al8" ] let convertFundecl env (sto, id, ty, optinit) = @@ -889,17 +932,16 @@ let convertProgram p = stringNum := 0; Hashtbl.clear decl_atom; Hashtbl.clear stringTable; - Hashtbl.clear stub_function_table; + Hashtbl.clear special_externals_table; let p = Builtins.declarations() @ p in try let (funs1, vars1) = convertGlobdecls (translEnv Env.empty p) [] [] (cleanupGlobals p) in - let funs2 = declare_stub_functions funs1 in - let funs3 = declare_annotation_functions funs2 in + let funs2 = declare_special_externals funs1 in let vars2 = globals_for_strings vars1 in if !numErrors > 0 then None - else Some { AST.prog_funct = funs3; + else Some { AST.prog_funct = funs2; AST.prog_vars = vars2; AST.prog_main = intern_string "main" } with Env.Error msg -> diff --git a/cparser/StructAssign.ml b/cparser/StructAssign.ml index ae922670..edf88213 100644 --- a/cparser/StructAssign.ml +++ b/cparser/StructAssign.ml @@ -25,11 +25,6 @@ open Env open Errors open Transform -(* Max number of assignments that can be inlined. Above this threshold, - we call memcpy() instead. *) - -let maxsize = ref 8 - (* Finding appropriate memcpy functions *) let memcpy_decl = ref (None : ident option) @@ -46,11 +41,7 @@ let lookup_function env name = | (id, II_ident(sto, ty)) -> (id, ty) | (id, II_enum _) -> raise (Env.Error(Env.Unbound_identifier name)) -let memcpy_ident env = - try lookup_function env "__builtin_memcpy" - with Env.Error _ -> - try lookup_function env "memcpy" - with Env.Error _ -> +let default_memcpy () = match !memcpy_decl with | Some id -> (id, memcpy_type) @@ -59,9 +50,20 @@ let memcpy_ident env = memcpy_decl := Some id; (id, memcpy_type) -let memcpy_words_ident env = - try lookup_function env "__builtin_memcpy_words" - with Env.Error _ -> memcpy_ident env +let rec find_memcpy env = function + | [] -> + default_memcpy() + | name :: rem -> + try lookup_function env name with Env.Error _ -> find_memcpy env rem + +let memcpy_1_ident env = + find_memcpy env ["__builtin_memcpy"; "memcpy"] +let memcpy_2_ident env = + find_memcpy env ["__builtin_memcpy_al2"; "__builtin_memcpy"; "memcpy"] +let memcpy_4_ident env = + find_memcpy env ["__builtin_memcpy_al4"; "__builtin_memcpy"; "memcpy"] +let memcpy_8_ident env = + find_memcpy env ["__builtin_memcpy_al8"; "__builtin_memcpy"; "memcpy"] (* Smart constructor for "," expressions *) @@ -71,67 +73,31 @@ let comma e1 e2 = | _, EConst _ -> e1 | _, _ -> ecomma e1 e2 +(* Smart constructor for "&" expressions *) + +let rec addrof e = + match e.edesc with + | EBinop(Ocomma, e1, e2, _) -> ecomma e1 (addrof e2) + | _ -> eaddrof e + (* Translate an assignment [lhs = rhs] between composite types. - [lhs] and [rhs] must be pure, invariant l-values. *) + [lhs] and [rhs] must be l-values. *) let transf_assign env lhs rhs = - - let num_assign = ref 0 in - - let assign l r = - incr num_assign; - if !num_assign > !maxsize - then raise Exit - else eassign l r in - - let rec transf l r = - match unroll env l.etyp with - | TStruct(id, attr) -> - let ci = Env.find_struct env id in - transf_struct l r ci.ci_members - | TUnion(id, attr) -> - raise Exit - | TArray(ty_elt, Some sz, attr) -> - transf_array l r ty_elt 0L sz - | TArray(ty_elt, None, attr) -> - assert false - | _ -> - assign l r - - and transf_struct l r = function - | [] -> nullconst - | f :: fl -> - comma (transf {edesc = EUnop(Odot f.fld_name, l); etyp = f.fld_typ} - {edesc = EUnop(Odot f.fld_name, r); etyp = f.fld_typ}) - (transf_struct l r fl) - - and transf_array l r ty idx sz = - if idx >= sz then nullconst else begin - let e = intconst idx size_t_ikind in - comma (transf {edesc = EBinop(Oindex, l, e, ty); etyp = ty} - {edesc = EBinop(Oindex, r, e, ty); etyp = ty}) - (transf_array l r ty (Int64.add idx 1L) sz) - end - in - - try - transf lhs rhs - with Exit -> - let by_words = - match Cutil.alignof env lhs.etyp, Cutil.sizeof env lhs.etyp with - | Some al, Some sz -> - al mod !config.sizeof_ptr = 0 && sz mod !config.sizeof_ptr = 0 - | _, _-> - false in - let (ident, ty) = - if by_words - then memcpy_words_ident env - else memcpy_ident env in - let memcpy = {edesc = EVar(ident); etyp = ty} in - let e_lhs = {edesc = EUnop(Oaddrof, lhs); etyp = TPtr(lhs.etyp, [])} in - let e_rhs = {edesc = EUnop(Oaddrof, rhs); etyp = TPtr(rhs.etyp, [])} in - let e_size = {edesc = ESizeof(lhs.etyp); etyp = TInt(size_t_ikind, [])} in - {edesc = ECall(memcpy, [e_lhs; e_rhs; e_size]); etyp = TVoid[]} + let (al, sz) = + match Cutil.alignof env lhs.etyp, Cutil.sizeof env lhs.etyp with + | Some al, Some sz -> (al, sz) + | _, _ -> (1, 1) in + let (ident, ty) = + if al mod 8 = 0 && sz mod 8 = 0 then memcpy_8_ident env + else if al mod 4 = 0 && sz mod 4 = 0 then memcpy_4_ident env + else if al mod 2 = 0 && sz mod 2 = 0 then memcpy_2_ident env + else memcpy_1_ident env in + let memcpy = {edesc = EVar(ident); etyp = ty} in + let e_lhs = addrof lhs in + let e_rhs = addrof rhs in + let e_size = {edesc = ESizeof(lhs.etyp); etyp = TInt(size_t_ikind, [])} in + {edesc = ECall(memcpy, [e_lhs; e_rhs; e_size]); etyp = TVoid[]} (* Detect invariant l-values *) @@ -146,18 +112,14 @@ let rec invariant_lvalue env e = (* Bind a l-value to a temporary variable if it is not invariant. *) -let rec bind_lvalue env e fn = - match e.edesc with - | EBinop(Ocomma, e1, e2, _) -> - ecomma e1 (bind_lvalue env e2 fn) - | _ -> - if invariant_lvalue env e then - fn e - else begin - let tmp = new_temp (TPtr(e.etyp, [])) in - ecomma (eassign tmp (eaddrof e)) - (fn {edesc = EUnop(Oderef, tmp); etyp = e.etyp}) - end +let bind_lvalue env e fn = + if invariant_lvalue env e then + fn e + else begin + let tmp = new_temp (TPtr(e.etyp, [])) in + ecomma (eassign tmp (addrof e)) + (fn {edesc = EUnop(Oderef, tmp); etyp = e.etyp}) + end (* Transformation of expressions. *) @@ -166,10 +128,14 @@ type context = Val | Effects let rec transf_expr env ctx e = match e.edesc with | EBinop(Oassign, lhs, rhs, _) when is_composite_type env lhs.etyp -> - bind_lvalue env (transf_expr env Val lhs) (fun l -> - bind_lvalue env (transf_expr env Val rhs) (fun r -> - let e' = transf_assign env l r in - if ctx = Val then ecomma e' l else e')) + let lhs' = transf_expr env Val lhs in + let rhs' = transf_expr env Val rhs in + begin match ctx with + | Effects -> + transf_assign env lhs' rhs' + | Val -> + bind_lvalue env lhs' (fun l -> ecomma (transf_assign env l rhs') l) + end | EConst c -> e | ESizeof ty -> e | EVar x -> e diff --git a/ia32/Asm.v b/ia32/Asm.v index 7e6bde74..0c4a153c 100644 --- a/ia32/Asm.v +++ b/ia32/Asm.v @@ -10,7 +10,7 @@ (* *) (* *********************************************************************) -(** Abstract syntax and semantics for PowerPC assembly language *) +(** Abstract syntax and semantics for IA32 assembly language *) Require Import Coqlib. Require Import Maps. diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml index a7a5f1af..35195b64 100644 --- a/ia32/PrintAsm.ml +++ b/ia32/PrintAsm.ml @@ -219,6 +219,74 @@ let need_masks = ref false locations dictated by the calling conventions; preserve callee-save regs only. *) +(* Handling of __builtin_annotation *) + +let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" + +let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" + +let print_annotation oc txt args res = + fprintf oc "%s annotation: " comment; + let print_fragment = function + | Str.Text s -> + output_string oc s + | Str.Delim "%%" -> + output_char oc '%' + | Str.Delim s -> + let n = int_of_string (String.sub s 1 (String.length s - 1)) in + try + preg oc (List.nth args (n-1)) + with Failure _ -> + fprintf oc "" s in + List.iter print_fragment (Str.full_split re_annot_param txt); + fprintf oc "\n"; + match args, res with + | [], _ -> () + | IR src :: _, IR dst -> + if dst <> src then fprintf oc " movl %a, %a\n" ireg src ireg dst + | FR src :: _, FR dst -> + if dst <> src then fprintf oc " movsd %a, %a\n" freg src freg dst + | _, _ -> assert false + +(* Handling of __builtin_memcpy_alX_szY *) + +let re_builtin_memcpy = + Str.regexp "__builtin_memcpy\\(_al\\([248]\\)\\)?_sz\\([0-9]+\\)$" + +(* Unaligned memory accesses are quite fast on IA32, so use large + memory accesses regardless of alignment. *) + +let print_builtin_memcpy oc sz dst src = + let tmp = + if src <> ECX && dst <> ECX then ECX + else if src <> EDX && dst <> EDX then EDX + else EAX in + let rec copy ofs sz = + if sz >= 8 then begin + fprintf oc " movq %d(%a), %a\n" ofs ireg src freg XMM6; + fprintf oc " movq %a, %d(%a)\n" freg XMM6 ofs ireg dst; + copy (ofs + 8) (sz - 8) + end else if sz >= 4 then begin + fprintf oc " movd %d(%a), %a\n" ofs ireg src freg XMM6; + fprintf oc " movd %a, %d(%a)\n" freg XMM6 ofs ireg dst; + copy (ofs + 4) (sz - 4) + end else if sz >= 2 then begin + fprintf oc " movw %d(%a), %a\n" ofs ireg src ireg16 tmp; + fprintf oc " movw %a, %d(%a)\n" ireg16 tmp ofs ireg dst; + copy (ofs + 2) (sz - 2) + end else if sz >= 1 then begin + fprintf oc " movb %d(%a), %a\n" ofs ireg src ireg8 tmp; + fprintf oc " movb %a, %d(%a)\n" ireg8 tmp ofs ireg dst; + copy (ofs + 1) (sz - 1) + end in + if tmp = EAX && sz mod 4 <> 0 then + fprintf oc " movd %a, %a\n" ireg EAX freg XMM7; + copy 0 sz; + if tmp = EAX && sz mod 4 <> 0 then + fprintf oc " movd %a, %a\n" freg XMM7 ireg EAX + +(* Handling of compiler-inlined builtins *) + let print_builtin_inlined oc name args res = fprintf oc "%s begin builtin %s\n" comment name; begin match name, args, res with @@ -285,16 +353,43 @@ let print_builtin_inlined oc name args res = fprintf oc " movsd %a, %a\n" freg a1 freg res; fprintf oc " minsd %a, %a\n" freg a2 freg res end + (* Inlined memcpy *) + | name, [IR dst; IR src], _ when Str.string_match re_builtin_memcpy name 0 -> + let sz = int_of_string (Str.matched_group 3 name) in + print_builtin_memcpy oc sz dst src + (* Annotations *) + | name, args, res when Str.string_match re_builtin_annotation name 0 -> + let annot = Str.matched_group 1 name in + print_annotation oc annot args res + (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) end; fprintf oc "%s end builtin %s\n" comment name +(* Handling of other builtins *) + let re_builtin_function = Str.regexp "__builtin_" let is_builtin_function s = Str.string_match re_builtin_function (extern_atom s) 0 +let print_memcpy oc sz = + fprintf oc " movl %%esi, %%eax\n"; (* Preserve esi, edi *) + fprintf oc " movl %%edi, %%edx\n"; + fprintf oc " movl 0(%%esp), %%edi\n"; (* Load args *) + fprintf oc " movl 4(%%esp), %%esi\n"; + fprintf oc " movl 8(%%esp), %%ecx\n"; + fprintf oc " shr $2, %%ecx\n"; + fprintf oc " rep movsl\n"; (* Copy by words *) + if sz < 4 then begin + fprintf oc " movl 8(%%esp), %%ecx\n"; + fprintf oc " andl $3, %%ecx\n"; + fprintf oc " rep movsb\n" (* Finish copy by bytes *) + end; + fprintf oc " movl %%eax, %%esi\n"; (* Restore esi, edi *) + fprintf oc " movl %%edx, %%edi\n" + let print_builtin_function oc s = fprintf oc "%s begin builtin function %s\n" comment (extern_atom s); (* arguments: on stack, starting at offset 0 *) @@ -302,57 +397,17 @@ let print_builtin_function oc s = begin match extern_atom s with (* Block copy *) | "__builtin_memcpy" -> - fprintf oc " movl %%esi, %%eax\n"; - fprintf oc " movl %%edi, %%edx\n"; - fprintf oc " movl 0(%%esp), %%edi\n"; - fprintf oc " movl 4(%%esp), %%esi\n"; - fprintf oc " movl 8(%%esp), %%ecx\n"; - fprintf oc " rep movsb\n"; - fprintf oc " movl %%eax, %%esi\n"; - fprintf oc " movl %%edx, %%edi\n" - | "__builtin_memcpy_words" -> - fprintf oc " movl %%esi, %%eax\n"; - fprintf oc " movl %%edi, %%edx\n"; - fprintf oc " movl 0(%%esp), %%edi\n"; - fprintf oc " movl 4(%%esp), %%esi\n"; - fprintf oc " movl 8(%%esp), %%ecx\n"; - fprintf oc " shr $2, %%ecx\n"; - fprintf oc " rep movsl\n"; - fprintf oc " movl %%eax, %%esi\n"; - fprintf oc " movl %%edx, %%edi\n" + print_memcpy oc 1 + | "__builtin_memcpy_al2" -> + print_memcpy oc 2 + | "__builtin_memcpy_al4" | "__builtin_memcpy_al8" -> + print_memcpy oc 4 (* Catch-all *) | s -> invalid_arg ("unrecognized builtin function " ^ s) end; fprintf oc "%s end builtin function %s\n" comment (extern_atom s) -let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" - -let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" - -let print_annotation oc txt args res = - fprintf oc "%s annotation: " comment; - let print_fragment = function - | Str.Text s -> - output_string oc s - | Str.Delim "%%" -> - output_char oc '%' - | Str.Delim s -> - let n = int_of_string (String.sub s 1 (String.length s - 1)) in - try - preg oc (List.nth args (n-1)) - with Failure _ -> - fprintf oc "" s in - List.iter print_fragment (Str.full_split re_annot_param txt); - fprintf oc "\n"; - match args, res with - | [], _ -> () - | IR src :: _, IR dst -> - if dst <> src then fprintf oc " movl %a, %a\n" ireg src ireg dst - | FR src :: _, FR dst -> - if dst <> src then fprintf oc " movsd %a, %a\n" freg src freg dst - | _, _ -> assert false - (* Printing of instructions *) let float_literals : (int * int64) list ref = ref [] @@ -578,9 +633,7 @@ let print_instruction oc = function fprintf oc " addl $%ld, %%esp\n" sz | Pbuiltin(ef, args, res) -> let name = extern_atom ef.ef_id in - if Str.string_match re_builtin_annotation name 0 - then print_annotation oc (Str.matched_group 1 name) args res - else print_builtin_inlined oc name args res + print_builtin_inlined oc name args res let print_literal oc (lbl, n) = fprintf oc "%a: .quad 0x%Lx\n" label lbl n diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index daa7f90b..56e3c360 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -272,6 +272,69 @@ let rec log2 n = locations dictated by the calling conventions; preserve callee-save regs only. *) +(* Handling of __builtin_annotation *) + +let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" + +let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" + +let print_annotation oc txt args res = + fprintf oc "%s annotation: " comment; + let print_fragment = function + | Str.Text s -> + output_string oc s + | Str.Delim "%%" -> + output_char oc '%' + | Str.Delim s -> + let n = int_of_string (String.sub s 1 (String.length s - 1)) in + try + preg oc (List.nth args (n-1)) + with Failure _ -> + fprintf oc "" s in + List.iter print_fragment (Str.full_split re_annot_param txt); + fprintf oc "\n"; + match args, res with + | [], _ -> () + | IR src :: _, IR dst -> + if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src + | FR src :: _, FR dst -> + if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src + | _, _ -> assert false + +(* Handling of __builtin_memcpy_alX_szY *) + +let re_builtin_memcpy = + Str.regexp "__builtin_memcpy\\(_al\\([248]\\)\\)?_sz\\([0-9]+\\)$" + +(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are + fast, but unaligned accesses to 64-bit floats can be slow + (not so much on G5, but clearly so on Power7). + So, use 64-bit accesses only if alignment >= 8. + Note that lfd and stfd cannot trap on ill-formed floats. *) + +let print_builtin_memcpy oc sz al dst src = + let rec copy ofs sz = + if sz >= 8 && al >= 8 then begin + fprintf oc " lfd %a, %d(%a)\n" freg FPR0 ofs ireg src; + fprintf oc " stfd %a, %d(%a)\n" freg FPR0 ofs ireg dst; + copy (ofs + 8) (sz - 8) + end else if sz >= 4 then begin + fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src; + fprintf oc " stw %a, %d(%a)\n" ireg GPR0 ofs ireg dst; + copy (ofs + 4) (sz - 4) + end else if sz >= 2 then begin + fprintf oc " lhz %a, %d(%a)\n" ireg GPR0 ofs ireg src; + fprintf oc " sth %a, %d(%a)\n" ireg GPR0 ofs ireg dst; + copy (ofs + 2) (sz - 2) + end else if sz >= 1 then begin + fprintf oc " lbz %a, %d(%a)\n" ireg GPR0 ofs ireg src; + fprintf oc " stb %a, %d(%a)\n" ireg GPR0 ofs ireg dst; + copy (ofs + 1) (sz - 1) + end in + copy 0 sz + +(* Handling of compiler-inlined builtins *) + let print_builtin_inlined oc name args res = fprintf oc "%s begin builtin %s\n" comment name; (* Can use as temporaries: GPR0, GPR11, GPR12, FPR0, FPR12, FPR13 *) @@ -344,17 +407,42 @@ let print_builtin_inlined oc name args res = fprintf oc " isync\n" | "__builtin_trap", [], _ -> fprintf oc " trap\n" + (* Inlined memcpy *) + | name, [IR dst; IR src], _ when Str.string_match re_builtin_memcpy name 0 -> + let sz = int_of_string (Str.matched_group 3 name) in + let al = try int_of_string (Str.matched_group 2 name) with Not_found -> 1 in + print_builtin_memcpy oc sz al dst src + (* Annotations *) + | name, args, res when Str.string_match re_builtin_annotation name 0 -> + let annot = Str.matched_group 1 name in + print_annotation oc annot args res (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) end; fprintf oc "%s end builtin %s\n" comment name +(* Handling of other builtins *) + let re_builtin_function = Str.regexp "__builtin_" let is_builtin_function s = Str.string_match re_builtin_function (extern_atom s) 0 +let print_memcpy oc sz prepare load store = + let lbl1 = new_label() in + let lbl2 = new_label() in + prepare GPR5; + fprintf oc " beq %a, %a\n" creg 0 label lbl1; + fprintf oc " mtctr %a\n" ireg GPR5; + fprintf oc " addi %a, %a, -%d\n" ireg GPR3 ireg GPR3 sz; + fprintf oc " addi %a, %a, -%d\n" ireg GPR4 ireg GPR4 sz; + fprintf oc "%a:\n" label lbl2; + load GPR4; + store GPR3; + fprintf oc " bdnz %a\n" label lbl2; + fprintf oc "%a:\n" label lbl1 + let print_builtin_function oc s = fprintf oc "%s begin builtin function %s\n" comment (extern_atom s); (* int args: GPR3, GPR4, GPR5 float args: FPR1, FPR2, FPR3 @@ -364,62 +452,31 @@ let print_builtin_function oc s = (* Block copy *) begin match extern_atom s with | "__builtin_memcpy" -> - let lbl1 = new_label() in - let lbl2 = new_label() in - fprintf oc " cmplwi %a, %a, 0\n" creg 0 ireg GPR5; - fprintf oc " beq %a, %a\n" creg 0 label lbl1; - fprintf oc " mtctr %a\n" ireg GPR5; - fprintf oc " addi %a, %a, -1\n" ireg GPR3 ireg GPR3; - fprintf oc " addi %a, %a, -1\n" ireg GPR4 ireg GPR4; - fprintf oc "%a: lbzu %a, 1(%a)\n" label lbl2 ireg GPR0 ireg GPR4; - fprintf oc " stbu %a, 1(%a)\n" ireg GPR0 ireg GPR3; - fprintf oc " bdnz %a\n" label lbl2; - fprintf oc "%a:\n" label lbl1 - | "__builtin_memcpy_words" -> - let lbl1 = new_label() in - let lbl2 = new_label() in - fprintf oc " rlwinm. %a, %a, 30, 2, 31\n" ireg GPR5 ireg GPR5; - fprintf oc " beq %a, %a\n" creg 0 label lbl1; - fprintf oc " mtctr %a\n" ireg GPR5; - fprintf oc " addi %a, %a, -4\n" ireg GPR3 ireg GPR3; - fprintf oc " addi %a, %a, -4\n" ireg GPR4 ireg GPR4; - fprintf oc "%a: lwzu %a, 4(%a)\n" label lbl2 ireg GPR0 ireg GPR4; - fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg GPR3; - fprintf oc " bdnz %a\n" label lbl2; - fprintf oc "%a:\n" label lbl1 + print_memcpy oc 1 + (fun r -> fprintf oc " cmplwi %a, %a, 0\n" creg 0 ireg r) + (fun r -> fprintf oc " lbzu %a, 1(%a)\n" ireg GPR0 ireg r) + (fun r -> fprintf oc " stbu %a, 1(%a)\n" ireg GPR0 ireg r) + | "__builtin_memcpy_al2" -> + print_memcpy oc 2 + (fun r -> fprintf oc " rlwinm. %a, %a, 31, 1, 31\n" ireg r ireg r) + (fun r -> fprintf oc " lhzu %a, 2(%a)\n" ireg GPR0 ireg r) + (fun r -> fprintf oc " sthu %a, 2(%a)\n" ireg GPR0 ireg r) + | "__builtin_memcpy_al4" -> + print_memcpy oc 4 + (fun r -> fprintf oc " rlwinm. %a, %a, 30, 2, 31\n" ireg r ireg r) + (fun r -> fprintf oc " lwzu %a, 4(%a)\n" ireg GPR0 ireg r) + (fun r -> fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg r) + | "__builtin_memcpy_al8" -> + print_memcpy oc 8 + (fun r -> fprintf oc " rlwinm. %a, %a, 29, 3, 31\n" ireg r ireg r) + (fun r -> fprintf oc " lfdu %a, 8(%a)\n" freg FPR0 ireg r) + (fun r -> fprintf oc " stfdu %a, 8(%a)\n" freg FPR0 ireg r) (* Catch-all *) | s -> invalid_arg ("unrecognized builtin function " ^ s) end; fprintf oc "%s end builtin function %s\n" comment (extern_atom s) -let re_builtin_annotation = Str.regexp "__builtin_annotation_\"\\(.*\\)\"$" - -let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" - -let print_annotation oc txt args res = - fprintf oc "%s annotation: " comment; - let print_fragment = function - | Str.Text s -> - output_string oc s - | Str.Delim "%%" -> - output_char oc '%' - | Str.Delim s -> - let n = int_of_string (String.sub s 1 (String.length s - 1)) in - try - preg oc (List.nth args (n-1)) - with Failure _ -> - fprintf oc "" s in - List.iter print_fragment (Str.full_split re_annot_param txt); - fprintf oc "\n"; - match args, res with - | [], _ -> () - | IR src :: _, IR dst -> - if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src - | FR src :: _, FR dst -> - if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src - | _, _ -> assert false - (* Printing of instructions *) let float_literals : (int * int64) list ref = ref [] @@ -655,9 +712,7 @@ let print_instruction oc = function fprintf oc "%a:\n" label (transl_label lbl) | Pbuiltin(ef, args, res) -> let name = extern_atom ef.ef_id in - if Str.string_match re_builtin_annotation name 0 - then print_annotation oc (Str.matched_group 1 name) args res - else print_builtin_inlined oc name args res + print_builtin_inlined oc name args res let print_literal oc (lbl, n) = let nlo = Int64.to_int32 n -- cgit