diff options
Diffstat (limited to 'riscV/Asmexpand.ml')
-rw-r--r-- | riscV/Asmexpand.ml | 164 |
1 files changed, 157 insertions, 7 deletions
diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml index 7e36abf8..3f9d3359 100644 --- a/riscV/Asmexpand.ml +++ b/riscV/Asmexpand.ml @@ -23,6 +23,7 @@ open Asm open Asmexpandaux open AST open Camlcoq +open Asmgen open! Integers exception Error of string @@ -44,11 +45,13 @@ let align n a = (n + a - 1) land (-a) (* Emit instruction sequences that set or offset a register by a constant. *) let expand_loadimm32 dst n = - List.iter emit (Asmgen.loadimm32 dst n []) + match make_immed32 n with + | Imm32_single imm -> emit (Paddiw (dst, X0, imm)) + | Imm32_pair (hi, lo) -> List.iter emit (load_hilo32 dst hi lo []) let expand_addptrofs dst src n = - List.iter emit (Asmgen.addptrofs dst src n []) + List.iter emit (addptrofs dst src n []) let expand_storeind_ptr src base ofs = - List.iter emit (Asmgen.storeind_ptr src base ofs []) + List.iter emit (storeind_ptr src base ofs []) (* Built-ins. They come in two flavors: - annotation statements: take their arguments in registers or stack @@ -394,6 +397,90 @@ let expand_bswap64 d s = emit (Psrlil(X31, X s, coqint_of_camlint 56l)); emit (Porl(d, X X1, X X31)) +(* Count leading zeros. Algorithm 5-7 from Hacker's Delight, + re-rolled as a loop to produce more compact code. *) + +let expand_clz ~sixtyfour ~splitlong = + (* Input: X in X5 or (X5, X6) if splitlong + Result: N in X7 + Temporaries: S in X8, Y in X9 *) + let lbl1 = new_label() in + let lbl2 = new_label() in + (* N := bitsize of X's type (32 or 64) *) + expand_loadimm32 X7 (coqint_of_camlint + (if sixtyfour || splitlong then 64l else 32l)); + (* S := initial shift amount (16 or 32) *) + expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l)); + if splitlong then begin + (* if (Xhigh == 0) goto lbl1 *) + emit (Pbeqw(X X6, X0, lbl1)); + (* N := 32 *) + expand_loadimm32 X7 (coqint_of_camlint 32l); + (* X := Xhigh *) + emit (Pmv(X5, X6)) + end; + (* lbl1: *) + emit (Plabel lbl1); + (* Y := X >> S *) + emit (if sixtyfour then Psrll(X9, X X5, X X8) else Psrlw(X9, X X5, X X8)); + (* if (Y == 0) goto lbl2 *) + emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2)); + (* N := N - S *) + emit (Psubw(X7, X X7, X X8)); + (* X := Y *) + emit (Pmv(X5, X9)); + (* lbl2: *) + emit (Plabel lbl2); + (* S := S / 2 *) + emit (Psrliw(X8, X X8, _1)); + (* if (S != 0) goto lbl1; *) + emit (Pbnew(X X8, X0, lbl1)); + (* N := N - X *) + emit (Psubw(X7, X X7, X X5)) + +(* Count trailing zeros. Algorithm 5-14 from Hacker's Delight, + re-rolled as a loop to produce more compact code. *) + +let expand_ctz ~sixtyfour ~splitlong = + (* Input: X in X6 or (X5, X6) if splitlong + Result: N in X7 + Temporaries: S in X8, Y in X9 *) + let lbl1 = new_label() in + let lbl2 = new_label() in + (* N := bitsize of X's type (32 or 64) *) + expand_loadimm32 X7 (coqint_of_camlint + (if sixtyfour || splitlong then 64l else 32l)); + (* S := initial shift amount (16 or 32) *) + expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l)); + if splitlong then begin + (* if (Xlow == 0) goto lbl1 *) + emit (Pbeqw(X X5, X0, lbl1)); + (* N := 32 *) + expand_loadimm32 X7 (coqint_of_camlint 32l); + (* X := Xlow *) + emit (Pmv(X6, X5)) + end; + (* lbl1: *) + emit (Plabel lbl1); + (* Y := X >> S *) + emit (if sixtyfour then Pslll(X9, X X6, X X8) else Psllw(X9, X X6, X X8)); + (* if (Y == 0) goto lbl2 *) + emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2)); + (* N := N - S *) + emit (Psubw(X7, X X7, X X8)); + (* X := Y *) + emit (Pmv(X6, X9)); + (* lbl2: *) + emit (Plabel lbl2); + (* S := S / 2 *) + emit (Psrliw(X8, X X8, _1)); + (* if (S != 0) goto lbl1; *) + emit (Pbnew(X X8, X0, lbl1)); + (* N := N - most significant bit of X *) + emit (if sixtyfour then Psrlil(X6, X X6, coqint_of_camlint 63l) + else Psrliw(X6, X X6, coqint_of_camlint 31l)); + emit (Psubw(X7, X X7, X X6)) + (* Handling of compiler-inlined builtins *) let expand_builtin_inline name args res = @@ -418,10 +505,33 @@ let expand_builtin_inline name args res = assert (ah = X6 && al = X5 && rh = X5 && rl = X6); expand_bswap32 X5 X5; expand_bswap32 X6 X6 + (* Count zeros *) + | "__builtin_clz", [BA(IR a)], BR(IR res) -> + assert (a = X5 && res = X7); + expand_clz ~sixtyfour:false ~splitlong:false + | "__builtin_clzl", [BA(IR a)], BR(IR res) -> + assert (a = X5 && res = X7); + expand_clz ~sixtyfour:Archi.ptr64 ~splitlong:false + | "__builtin_clzll", [BA(IR a)], BR(IR res) -> + assert (a = X5 && res = X7); + expand_clz ~sixtyfour:true ~splitlong:false + | "__builtin_clzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) -> + assert (al = X5 && ah = X6 && res = X7); + expand_clz ~sixtyfour:false ~splitlong:true + | "__builtin_ctz", [BA(IR a)], BR(IR res) -> + assert (a = X6 && res = X7); + expand_ctz ~sixtyfour:false ~splitlong:false + | "__builtin_ctzl", [BA(IR a)], BR(IR res) -> + assert (a = X6 && res = X7); + expand_ctz ~sixtyfour:Archi.ptr64 ~splitlong:false + | "__builtin_ctzll", [BA(IR a)], BR(IR res) -> + assert (a = X6 && res = X7); + expand_ctz ~sixtyfour:true ~splitlong:false + | "__builtin_ctzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) -> + assert (al = X5 && ah = X6 && res = X7); + expand_ctz ~sixtyfour:false ~splitlong:true (* Float arithmetic *) - | "__builtin_fabs", [BA(FR a1)], BR(FR res) -> - emit (Pfabsd(res, a1)) - | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) -> + | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) -> emit (Pfsqrtd(res, a1)) | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> emit (Pfmaddd(res, a1, a2, a3)) @@ -475,9 +585,49 @@ let expand_builtin_inline name args res = raise (Error ("unrecognized builtin " ^ name)) (* Expansion of instructions *) - + let expand_instruction instr = match instr with + | Pselectl(rd, rb, rt, rf) -> + if not Archi.ptr64 + then failwith "Pselectl not available on RV32, only on RV64" + else + if ireg0_eq rt rf then + begin + if ireg0_eq (X rd) rt then + begin + end + else + begin + emit (Paddl(rd, X0, rt)) + end + end + else + if (ireg0_eq (X rd) rt) then + begin + emit (Psubl(X31, X0, rb)); + emit (Pandl(X31, X X31, rt)); + emit (Paddil(rd, rb, Int64.mone)); + emit (Pandl(rd, X rd, rf)); + emit (Porl(rd, X rd, X X31)) + end + else + if (ireg0_eq (X rd) rf) then + begin + emit (Paddil(X31, rb, Int64.mone)); + emit (Pandl(X31, X X31, rf)); + emit (Psubl(rd, X0, rb)); + emit (Pandl(rd, X rd, rt)); + emit (Porl(rd, X rd, X X31)) + end + else + begin + emit (Psubl(X31, X0, rb)); + emit (Paddil(rd, rb, Int64.mone)); + emit (Pandl(X31, X X31, rt)); + emit (Pandl(rd, X rd, rf)); + emit (Porl(rd, X rd, X X31)) + end | Pallocframe (sz, ofs) -> let sg = get_current_function_sig() in emit (Pmv (X30, X2)); |