aboutsummaryrefslogtreecommitdiffstats
path: root/powerpc/Asmexpand.ml
diff options
context:
space:
mode:
Diffstat (limited to 'powerpc/Asmexpand.ml')
-rw-r--r--powerpc/Asmexpand.ml452
1 files changed, 277 insertions, 175 deletions
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
index ae4d694a..3e57cdbf 100644
--- a/powerpc/Asmexpand.ml
+++ b/powerpc/Asmexpand.ml
@@ -21,6 +21,7 @@ open Memdata
open Asm
open Asmexpandaux
+exception Error of string
(* Useful constants and helper functions *)
@@ -44,14 +45,14 @@ let emit_addimm rd rs n =
(* Handling of annotations *)
let expand_annot_val txt targ args res =
- emit (Pannot(EF_annot(txt, [targ]), List.map (fun r -> AA_base r) args));
+ emit (Pbuiltin(EF_annot(txt, [targ]), args, BR_none));
begin match args, res with
- | [IR src], [IR dst] ->
+ | [BA(IR src)], BR(IR dst) ->
if dst <> src then emit (Pmr(dst, src))
- | [FR src], [FR dst] ->
+ | [BA(FR src)], BR(FR dst) ->
if dst <> src then emit (Pfmr(dst, src))
| _, _ ->
- assert false
+ raise (Error "ill-formed __builtin_annot_val")
end
(* Handling of memcpy *)
@@ -62,34 +63,64 @@ let expand_annot_val txt targ args res =
So, use 64-bit accesses only if alignment >= 4.
Note that lfd and stfd cannot trap on ill-formed floats. *)
+let offset_in_range ofs =
+ Int.eq (Asmgen.high_s ofs) Int.zero
+
+let memcpy_small_arg sz arg tmp =
+ match arg with
+ | BA (IR r) ->
+ (r, _0)
+ | BA_addrstack ofs ->
+ if offset_in_range ofs
+ && offset_in_range (Int.add ofs (Int.repr (Z.of_uint sz)))
+ then (GPR1, ofs)
+ else begin emit_addimm tmp GPR1 ofs; (tmp, _0) end
+ | _ ->
+ assert false
+
let expand_builtin_memcpy_small sz al src dst =
- let rec copy ofs sz =
+ let (tsrc, tdst) =
+ if dst <> BA (IR GPR11) then (GPR11, GPR12) else (GPR12, GPR11) in
+ let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
+ let (rdst, odst) = memcpy_small_arg sz dst tdst in
+ let rec copy osrc odst sz =
if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin
- emit (Plfd(FPR13, Cint ofs, src));
- emit (Pstfd(FPR13, Cint ofs, dst));
- copy (Int.add ofs _8) (sz - 8)
+ emit (Plfd(FPR13, Cint osrc, rsrc));
+ emit (Pstfd(FPR13, Cint odst, rdst));
+ copy (Int.add osrc _8) (Int.add odst _8) (sz - 8)
end else if sz >= 4 then begin
- emit (Plwz(GPR0, Cint ofs, src));
- emit (Pstw(GPR0, Cint ofs, dst));
- copy (Int.add ofs _4) (sz - 4)
+ emit (Plwz(GPR0, Cint osrc, rsrc));
+ emit (Pstw(GPR0, Cint odst, rdst));
+ copy (Int.add osrc _4) (Int.add odst _4) (sz - 4)
end else if sz >= 2 then begin
- emit (Plhz(GPR0, Cint ofs, src));
- emit (Psth(GPR0, Cint ofs, dst));
- copy (Int.add ofs _2) (sz - 2)
+ emit (Plhz(GPR0, Cint osrc, rsrc));
+ emit (Psth(GPR0, Cint odst, rdst));
+ copy (Int.add osrc _2) (Int.add odst _2) (sz - 2)
end else if sz >= 1 then begin
- emit (Plbz(GPR0, Cint ofs, src));
- emit (Pstb(GPR0, Cint ofs, dst));
- copy (Int.add ofs _1) (sz - 1)
+ emit (Plbz(GPR0, Cint osrc, rsrc));
+ emit (Pstb(GPR0, Cint odst, rdst));
+ copy (Int.add osrc _1) (Int.add odst _1) (sz - 1)
end in
- copy _0 sz
+ copy osrc odst sz
+
+let memcpy_big_arg arg tmp =
+ (* Set [tmp] to the value of [arg] minus 4 *)
+ match arg with
+ | BA (IR r) ->
+ emit (Paddi(tmp, r, Cint _m4))
+ | BA_addrstack ofs ->
+ emit_addimm tmp GPR1 (Int.add ofs _m4)
+ | _ ->
+ assert false
let expand_builtin_memcpy_big sz al src dst =
assert (sz >= 4);
emit_loadimm GPR0 (Z.of_uint (sz / 4));
emit (Pmtctr GPR0);
- let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in
- emit (Paddi(s, src, Cint _m4));
- emit (Paddi(d, dst, Cint _m4));
+ let (s, d) =
+ if dst <> BA (IR GPR11) then (GPR11, GPR12) else (GPR12, GPR11) in
+ memcpy_big_arg src s;
+ memcpy_big_arg dst d;
let lbl = new_label() in
emit (Plabel lbl);
emit (Plwzu(GPR0, Cint _4, s));
@@ -109,7 +140,7 @@ let expand_builtin_memcpy_big sz al src dst =
let expand_builtin_memcpy sz al args =
let (dst, src) =
- match args with [IR d; IR s] -> (d, s) | _ -> assert false in
+ match args with [d; s] -> (d, s) | _ -> assert false in
if sz <= (if !Clflags.option_ffpu && al >= 4
then if !Clflags.option_Osize then 35 else 51
else if !Clflags.option_Osize then 19 else 27)
@@ -118,140 +149,131 @@ let expand_builtin_memcpy sz al args =
(* Handling of volatile reads and writes *)
-let expand_builtin_vload_common chunk base offset res =
+let offset_constant cst delta =
+ match cst with
+ | Cint n ->
+ let n' = Int.add n delta in
+ if offset_in_range n' then Some (Cint n') else None
+ | Csymbol_sda(id, ofs) ->
+ Some (Csymbol_sda(id, Int.add ofs delta))
+ | _ -> None
+
+let rec expand_builtin_vload_common chunk base offset res =
match chunk, res with
- | Mint8unsigned, IR res ->
+ | Mint8unsigned, BR(IR res) ->
emit (Plbz(res, offset, base))
- | Mint8signed, IR res ->
+ | Mint8signed, BR(IR res) ->
emit (Plbz(res, offset, base));
emit (Pextsb(res, res))
- | Mint16unsigned, IR res ->
+ | Mint16unsigned, BR(IR res) ->
emit (Plhz(res, offset, base))
- | Mint16signed, IR res ->
+ | Mint16signed, BR(IR res) ->
emit (Plha(res, offset, base))
- | (Mint32 | Many32), IR res ->
+ | (Mint32 | Many32), BR(IR res) ->
emit (Plwz(res, offset, base))
- | Mfloat32, FR res ->
+ | Mfloat32, BR(FR res) ->
emit (Plfs(res, offset, base))
- | (Mfloat64 | Many64), FR res ->
+ | (Mfloat64 | Many64), BR(FR res) ->
emit (Plfd(res, offset, base))
- (* Mint64 is special-cased below *)
- | _ ->
+ | Mint64, BR_splitlong(BR(IR hi), BR(IR lo)) ->
+ begin match offset_constant offset _4 with
+ | Some offset' ->
+ if hi <> base then begin
+ emit (Plwz(hi, offset, base));
+ emit (Plwz(lo, offset', base))
+ end else begin
+ emit (Plwz(lo, offset', base));
+ emit (Plwz(hi, offset, base))
+ end
+ | None ->
+ emit (Paddi(GPR11, base, offset));
+ expand_builtin_vload_common chunk GPR11 (Cint _0) res
+ end
+ | _, _ ->
assert false
let expand_builtin_vload chunk args res =
- begin match args, res with
- | [IR addr], [res] when chunk <> Mint64 ->
+ match args with
+ | [BA(IR addr)] ->
expand_builtin_vload_common chunk addr (Cint _0) res
- | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
- if addr <> res1 then begin
- emit (Plwz(res1, Cint _0, addr));
- emit (Plwz(res2, Cint _4, addr))
+ | [BA_addrstack ofs] ->
+ if offset_in_range ofs then
+ expand_builtin_vload_common chunk GPR1 (Cint ofs) res
+ else begin
+ emit_addimm GPR11 GPR1 ofs;
+ expand_builtin_vload_common chunk GPR11 (Cint _0) res
+ end
+ | [BA_addrglobal(id, ofs)] ->
+ if symbol_is_small_data id ofs then
+ expand_builtin_vload_common chunk GPR0 (Csymbol_sda(id, ofs)) res
+ else if symbol_is_rel_data id ofs then begin
+ emit (Paddis(GPR11, GPR0, Csymbol_rel_high(id, ofs)));
+ expand_builtin_vload_common chunk GPR11 (Csymbol_rel_low(id, ofs)) res
end else begin
- emit (Plwz(res2, Cint _4, addr));
- emit (Plwz(res1, Cint _0, addr))
+ emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs)));
+ expand_builtin_vload_common chunk GPR11 (Csymbol_low(id, ofs)) res
end
| _ ->
assert false
- end
-
-let expand_builtin_vload_global chunk id ofs args res =
- begin match res with
- | [res] when chunk <> Mint64 ->
- emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs)));
- expand_builtin_vload_common chunk GPR11 (Csymbol_low(id, ofs)) res
- | [IR res1; IR res2] when chunk = Mint64 ->
- emit (Paddis(res1, GPR0, Csymbol_high(id, ofs)));
- emit (Plwz(res1, Csymbol_low(id, ofs), res1));
- let ofs = Int.add ofs _4 in
- emit (Paddis(res2, GPR0, Csymbol_high(id, ofs)));
- emit (Plwz(res2, Csymbol_low(id, ofs), res2))
- | _ ->
- assert false
- end
-
-let expand_builtin_vload_sda chunk id ofs args res =
- begin match res with
- | [res] when chunk <> Mint64 ->
- expand_builtin_vload_common chunk GPR0 (Csymbol_sda(id, ofs)) res
- | [IR res1; IR res2] when chunk = Mint64 ->
- emit (Plwz(res1, Csymbol_sda(id, ofs), GPR0));
- let ofs = Int.add ofs _4 in
- emit (Plwz(res2, Csymbol_sda(id, ofs), GPR0))
- | _ ->
- assert false
- end
-let expand_builtin_vload_rel chunk id ofs args res =
- emit (Paddis(GPR11, GPR0, Csymbol_rel_high(id, ofs)));
- emit (Paddi(GPR11, GPR11, Csymbol_rel_low(id, ofs)));
- expand_builtin_vload chunk [IR GPR11] res
+let temp_for_vstore src =
+ let rl = AST.params_of_builtin_arg src in
+ if not (List.mem (IR GPR11) rl) then GPR11
+ else if not (List.mem (IR GPR12) rl) then GPR12
+ else GPR10
let expand_builtin_vstore_common chunk base offset src =
match chunk, src with
- | (Mint8signed | Mint8unsigned), IR src ->
+ | (Mint8signed | Mint8unsigned), BA(IR src) ->
emit (Pstb(src, offset, base))
- | (Mint16signed | Mint16unsigned), IR src ->
+ | (Mint16signed | Mint16unsigned), BA(IR src) ->
emit (Psth(src, offset, base))
- | (Mint32 | Many32), IR src ->
+ | (Mint32 | Many32), BA(IR src) ->
emit (Pstw(src, offset, base))
- | Mfloat32, FR src ->
+ | Mfloat32, BA(FR src) ->
emit (Pstfs(src, offset, base))
- | (Mfloat64 | Many64), FR src ->
+ | (Mfloat64 | Many64), BA(FR src) ->
emit (Pstfd(src, offset, base))
- (* Mint64 is special-cased below *)
- | _ ->
+ | Mint64, BA_splitlong(BA(IR hi), BA(IR lo)) ->
+ begin match offset_constant offset _4 with
+ | Some offset' ->
+ emit (Pstw(hi, offset, base));
+ emit (Pstw(lo, offset', base))
+ | None ->
+ let tmp = temp_for_vstore src in
+ emit (Paddi(tmp, base, offset));
+ emit (Pstw(hi, Cint _0, tmp));
+ emit (Pstw(lo, Cint _4, tmp))
+ end
+ | _, _ ->
assert false
let expand_builtin_vstore chunk args =
- begin match args with
- | [IR addr; src] when chunk <> Mint64 ->
+ match args with
+ | [BA(IR addr); src] ->
expand_builtin_vstore_common chunk addr (Cint _0) src
- | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
- emit (Pstw(src1, Cint _0, addr));
- emit (Pstw(src2, Cint _4, addr))
- | _ ->
- assert false
- end
-
-let expand_builtin_vstore_global chunk id ofs args =
- begin match args with
- | [src] when chunk <> Mint64 ->
- let tmp = if src = IR GPR11 then GPR12 else GPR11 in
- emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
- expand_builtin_vstore_common chunk tmp (Csymbol_low(id, ofs)) src
- | [IR src1; IR src2] when chunk = Mint64 ->
- let tmp =
- if not (List.mem GPR12 [src1; src2]) then GPR12 else
- if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
- emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
- emit (Pstw(src1, Csymbol_low(id, ofs), tmp));
- let ofs = Int.add ofs _4 in
- emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
- emit (Pstw(src2, Csymbol_low(id, ofs), tmp))
- | _ ->
- assert false
- end
-
-let expand_builtin_vstore_sda chunk id ofs args =
- begin match args with
- | [src] when chunk <> Mint64 ->
- expand_builtin_vstore_common chunk GPR0 (Csymbol_sda(id, ofs)) src
- | [IR src1; IR src2] when chunk = Mint64 ->
- emit (Pstw(src1, Csymbol_sda(id, ofs), GPR0));
- let ofs = Int.add ofs _4 in
- emit (Pstw(src2, Csymbol_sda(id, ofs), GPR0))
+ | [BA_addrstack ofs; src] ->
+ if offset_in_range ofs then
+ expand_builtin_vstore_common chunk GPR1 (Cint ofs) src
+ else begin
+ let tmp = temp_for_vstore src in
+ emit_addimm tmp GPR1 ofs;
+ expand_builtin_vstore_common chunk tmp (Cint _0) src
+ end
+ | [BA_addrglobal(id, ofs); src] ->
+ if symbol_is_small_data id ofs then
+ expand_builtin_vstore_common chunk GPR0 (Csymbol_sda(id, ofs)) src
+ else if symbol_is_rel_data id ofs then begin
+ let tmp = temp_for_vstore src in
+ emit (Paddis(tmp, GPR0, Csymbol_rel_high(id, ofs)));
+ expand_builtin_vstore_common chunk tmp (Csymbol_rel_low(id, ofs)) src
+ end else begin
+ let tmp = temp_for_vstore src in
+ emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+ expand_builtin_vstore_common chunk tmp (Csymbol_low(id, ofs)) src
+ end
| _ ->
assert false
- end
-
-let expand_builtin_vstore_rel chunk id ofs args =
- let tmp =
- if not (List.mem (IR GPR12) args) then GPR12 else
- if not (List.mem (IR GPR11) args) then GPR11 else GPR10 in
- emit (Paddis(tmp, GPR0, Csymbol_rel_high(id, ofs)));
- emit (Paddi(tmp, tmp, Csymbol_rel_low(id, ofs)));
- expand_builtin_vstore chunk (IR tmp :: args)
(* Handling of varargs *)
@@ -302,49 +324,99 @@ let expand_builtin_va_start r =
let expand_int64_arith conflict rl fn =
if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl
+(* Handling of cache instructions *)
+
+(* Auxiliary function to generate address for the cache function *)
+let expand_builtin_cache_common addr f =
+ let add = match addr with
+ | BA (IR a1) -> a1
+ | BA_addrstack ofs ->
+ emit_addimm GPR11 GPR1 ofs;
+ GPR11
+ | BA_addrglobal(id, ofs) ->
+ if symbol_is_small_data id ofs then begin
+ emit (Paddi (GPR11, GPR0, Csymbol_sda(id, ofs)));
+ GPR11
+ end else if symbol_is_rel_data id ofs then begin
+ emit (Paddis(GPR11, GPR0, Csymbol_rel_high(id, ofs)));
+ emit (Paddi(GPR11, GPR11, Csymbol_rel_low(id, ofs)));
+ GPR11
+ end else begin
+ emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs)));
+ emit (Paddi (GPR11, GPR11, Csymbol_low(id, ofs)));
+ GPR11
+ end
+ | _ -> raise (Error "Argument is not an address") in
+ f add
+
+let expand_builtin_prefetch addr rw loc =
+ if not ((loc >= _0) && (loc <= _2)) then
+ raise (Error "the last argument of __builtin_prefetch must be a constant between 0 and 2");
+ let emit_prefetch_instr addr =
+ if Int.eq rw _0 then begin
+ emit (Pdcbt (loc,GPR0,addr));
+ end else if Int.eq rw _1 then begin
+ emit (Pdcbtst (loc,GPR0,addr));
+ end else
+ raise (Error "the second argument of __builtin_prefetch must be either 0 or 1")
+ in
+ expand_builtin_cache_common addr emit_prefetch_instr
+
+let expand_builtin_dcbtls addr loc =
+ if not ((loc == _0) || (loc = _2)) then
+ raise (Error "the second argument of __builtin_dcbtls must be a constant between 0 and 2");
+ let emit_inst addr = emit (Pdcbtls (loc,GPR0,addr)) in
+ expand_builtin_cache_common addr emit_inst
+
+let expand_builtin_icbtls addr loc =
+ if not ((loc == _0) || (loc = _2)) then
+ raise (Error "the second argument of __builtin_icbtls must be a constant between 0 and 2");
+ let emit_inst addr = emit (Picbtls (loc,GPR0,addr)) in
+ expand_builtin_cache_common addr emit_inst
+
(* Handling of compiler-inlined builtins *)
let expand_builtin_inline name args res =
(* Can use as temporaries: GPR0, FPR13 *)
match name, args, res with
(* Integer arithmetic *)
- | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
+ | "__builtin_mulhw", [BA(IR a1); BA(IR a2)], BR(IR res) ->
emit (Pmulhw(res, a1, a2))
- | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
+ | "__builtin_mulhwu", [BA(IR a1); BA(IR a2)], BR(IR res) ->
emit (Pmulhwu(res, a1, a2))
- | "__builtin_clz", [IR a1], [IR res] ->
+ | "__builtin_clz", [BA(IR a1)], BR(IR res) ->
emit (Pcntlzw(res, a1))
- | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] ->
+ | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
emit (Pstwu(a1, Cint _m8, GPR1));
emit (Pcfi_adjust _8);
emit (Plwbrx(res, GPR0, GPR1));
emit (Paddi(GPR1, GPR1, Cint _8));
emit (Pcfi_adjust _m8)
- | "__builtin_bswap16", [IR a1], [IR res] ->
+ | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
emit (Prlwinm(GPR0, a1, _8, coqint_of_camlint 0x0000FF00l));
emit (Prlwinm(res, a1, coqint_of_camlint 24l,
coqint_of_camlint 0x000000FFl));
emit (Por(res, GPR0, res))
(* Float arithmetic *)
- | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
+ | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfmadd(res, a1, a2, a3))
- | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
+ | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfmsub(res, a1, a2, a3))
- | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
+ | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfnmadd(res, a1, a2, a3))
- | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
+ | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfnmsub(res, a1, a2, a3))
- | "__builtin_fabs", [FR a1], [FR res] ->
+ | "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
emit (Pfabs(res, a1))
- | "__builtin_fsqrt", [FR a1], [FR res] ->
+ | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
emit (Pfsqrt(res, a1))
- | "__builtin_frsqrte", [FR a1], [FR res] ->
+ | "__builtin_frsqrte", [BA(FR a1)], BR(FR res) ->
emit (Pfrsqrte(res, a1))
- | "__builtin_fres", [FR a1], [FR res] ->
+ | "__builtin_fres", [BA(FR a1)], BR(FR res) ->
emit (Pfres(res, a1))
- | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
+ | "__builtin_fsel", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
emit (Pfsel(res, a1, a2, a3))
- | "__builtin_fcti", [FR a1], [IR res] ->
+ | "__builtin_fcti", [BA(FR a1)], BR(IR res) ->
emit (Pfctiw(FPR13, a1));
emit (Pstfdu(FPR13, Cint _m8, GPR1));
emit (Pcfi_adjust _8);
@@ -352,30 +424,36 @@ let expand_builtin_inline name args res =
emit (Paddi(GPR1, GPR1, Cint _8));
emit (Pcfi_adjust _m8)
(* 64-bit integer arithmetic *)
- | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+ | "__builtin_negl", [BA_splitlong(BA(IR ah), BA(IR al))],
+ BR_splitlong(BR(IR rh), BR(IR rl)) ->
expand_int64_arith (rl = ah) rl (fun rl ->
emit (Psubfic(rl, al, Cint _0));
emit (Psubfze(rh, ah)))
- | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ | "__builtin_addl", [BA_splitlong(BA(IR ah), BA(IR al));
+ BA_splitlong(BA(IR bh), BA(IR bl))],
+ BR_splitlong(BR(IR rh), BR(IR rl)) ->
expand_int64_arith (rl = ah || rl = bh) rl (fun rl ->
emit (Paddc(rl, al, bl));
emit (Padde(rh, ah, bh)))
- | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ | "__builtin_subl", [BA_splitlong(BA(IR ah), BA(IR al));
+ BA_splitlong(BA(IR bh), BA(IR bl))],
+ BR_splitlong(BR(IR rh), BR(IR rl)) ->
expand_int64_arith (rl = ah || rl = bh) rl (fun rl ->
emit (Psubfc(rl, bl, al));
emit (Psubfe(rh, bh, ah)))
- | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+ | "__builtin_mull", [BA(IR a); BA(IR b)],
+ BR_splitlong(BR(IR rh), BR(IR rl)) ->
expand_int64_arith (rl = a || rl = b) rl (fun rl ->
emit (Pmullw(rl, a, b));
emit (Pmulhwu(rh, a, b)))
(* Memory accesses *)
- | "__builtin_read16_reversed", [IR a1], [IR res] ->
+ | "__builtin_read16_reversed", [BA(IR a1)], BR(IR res) ->
emit (Plhbrx(res, GPR0, a1))
- | "__builtin_read32_reversed", [IR a1], [IR res] ->
+ | "__builtin_read32_reversed", [BA(IR a1)], BR(IR res) ->
emit (Plwbrx(res, GPR0, a1))
- | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
+ | "__builtin_write16_reversed", [BA(IR a1); BA(IR a2)], _ ->
emit (Psthbrx(a2, GPR0, a1))
- | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
+ | "__builtin_write32_reversed", [BA(IR a1); BA(IR a2)], _ ->
emit (Pstwbrx(a2, GPR0, a1))
(* Synchronization *)
| "__builtin_membar", [], _ ->
@@ -388,20 +466,50 @@ let expand_builtin_inline name args res =
emit (Pisync)
| "__builtin_lwsync", [], _ ->
emit (Plwsync)
+ | "__builtin_mbar", [BA_int mo], _ ->
+ if not (mo = _0 || mo = _1) then
+ raise (Error "the argument of __builtin_mbar must be either 0 or 1");
+ emit (Pmbar mo)
+ | "__builin_mbar",_, _ ->
+ raise (Error "the argument of __builtin_mbar must be a constant");
| "__builtin_trap", [], _ ->
emit (Ptrap)
(* Vararg stuff *)
- | "__builtin_va_start", [IR a], _ ->
+ | "__builtin_va_start", [BA(IR a)], _ ->
expand_builtin_va_start a
- (* Catch-all *)
- | "__builtin_dcbf", [IR a1],_ ->
+ (* Cache control *)
+ | "__builtin_dcbf", [BA(IR a1)],_ ->
emit (Pdcbf (GPR0,a1))
- | "__builtin_dcbi", [IR a1],_ ->
+ | "__builtin_dcbi", [BA(IR a1)],_ ->
emit (Pdcbi (GPR0,a1))
- | "__builtin_icbi", [IR a1],_ ->
+ | "__builtin_icbi", [BA(IR a1)],_ ->
emit (Picbi(GPR0,a1))
+ | "__builtin_dcbtls", [a; BA_int loc],_ ->
+ expand_builtin_dcbtls a loc
+ | "__builtin_dcbtls",_,_ ->
+ raise (Error "the second argument of __builtin_dcbtls must be a constant")
+ | "__builtin_icbtls", [a; BA_int loc],_ ->
+ expand_builtin_icbtls a loc
+ | "__builtin_icbtls",_,_ ->
+ raise (Error "the second argument of __builtin_icbtls must be a constant")
+ | "__builtin_prefetch" , [a1 ;BA_int rw; BA_int loc],_ ->
+ expand_builtin_prefetch a1 rw loc
+ | "__builtin_prefetch" ,_,_ ->
+ raise (Error "the second and third argument of __builtin_prefetch must be a constant")
+ | "__builtin_dcbz",[BA (IR a1)],_ ->
+ emit (Pdcbz (GPR0,a1))
+ (* Special registers *)
+ | "__builtin_get_spr", [BA_int n], BR(IR res) ->
+ emit (Pmfspr(res, n))
+ | "__builtin_get_spr", _, _ ->
+ raise (Error "the argument of __builtin_get_spr must be a constant")
+ | "__builtin_set_spr", [BA_int n; BA(IR a1)], _ ->
+ emit (Pmtspr(n, a1))
+ | "__builtin_set_spr", _, _ ->
+ raise (Error "the first argument of __builtin_set_spr must be a constant")
+ (* Catch-all *)
| _ ->
- invalid_arg ("unrecognized builtin " ^ name)
+ raise (Error ("unrecognized builtin " ^ name))
(* Calls to variadic functions: condition bit 6 must be set
if at least one argument is a float; clear otherwise.
@@ -484,25 +592,11 @@ let expand_instruction instr =
expand_builtin_vload chunk args res
| EF_vstore chunk ->
expand_builtin_vstore chunk args
- | EF_vload_global(chunk, id, ofs) ->
- if symbol_is_small_data id ofs then
- expand_builtin_vload_sda chunk id ofs args res
- else if symbol_is_rel_data id ofs then
- expand_builtin_vload_rel chunk id ofs args res
- else
- expand_builtin_vload_global chunk id ofs args res
- | EF_vstore_global(chunk, id, ofs) ->
- if symbol_is_small_data id ofs then
- expand_builtin_vstore_sda chunk id ofs args
- else if symbol_is_rel_data id ofs then
- expand_builtin_vstore_rel chunk id ofs args
- else
- expand_builtin_vstore_global chunk id ofs args
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
| EF_annot_val(txt, targ) ->
expand_annot_val txt targ args res
- | EF_inline_asm(txt, sg, clob) ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
emit instr
| _ ->
assert false
@@ -511,13 +605,21 @@ let expand_instruction instr =
emit instr
let expand_function fn =
- set_current_function fn;
- List.iter expand_instruction fn.fn_code;
- get_current_function ()
+ try
+ set_current_function fn;
+ List.iter expand_instruction fn.fn_code;
+ Errors.OK (get_current_function ())
+ with Error s ->
+ Errors.Error (Errors.msg (coqstring_of_camlstring s))
let expand_fundef = function
- | Internal f -> Internal (expand_function f)
- | External ef -> External ef
+ | Internal f ->
+ begin match expand_function f with
+ | Errors.OK tf -> Errors.OK (Internal tf)
+ | Errors.Error msg -> Errors.Error msg
+ end
+ | External ef ->
+ Errors.OK (External ef)
-let expand_program (p: Asm.program) : Asm.program =
- AST.transform_program expand_fundef p
+let expand_program (p: Asm.program) : Asm.program Errors.res =
+ AST.transform_partial_program expand_fundef p