From 5aef1e188ba1f6552f7c846cda6296ad47f04ba1 Mon Sep 17 00:00:00 2001 From: Bernhard Schommer Date: Thu, 20 Aug 2015 11:13:12 +0200 Subject: Added command line option to specify a linker command file for the linker. --- driver/Driver.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/driver/Driver.ml b/driver/Driver.ml index 2932e879..37e3b44c 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -468,6 +468,7 @@ Assembling options: Linking options: -l Link library -L Add to search path for libraries + -T Use as linker command file -Wl, Pass option to the linker Tracing options: -dparse Save C file after parsing and elaboration in .parse.c @@ -564,6 +565,10 @@ let cmdline_actions = (* Linking options *) Prefix "-l", Self push_linker_arg; Prefix "-L", Self push_linker_arg; + Exact "-T", String (fun s -> if Configuration.system = "diab" then + push_linker_arg ("-Wm "^s) + else + push_linker_arg ("-T "^s)); Prefix "-Wl,", Self push_linker_arg; (* Tracing options *) Exact "-dparse", Set option_dparse; -- cgit From 5f798720574bf9d694da271e3e8bf699a4726497 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 09:55:47 +0200 Subject: Fixed bugs in asm expansion causing the test suite to fail. More bugs remain. --- ia32/Asmexpand.ml | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index e07672a6..11c63469 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -78,19 +78,19 @@ let expand_builtin_memcpy_small sz al src dst = let rec copy ofs sz = if sz >= 8 && !Clflags.option_ffpu then begin emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovq_mr (Addrmode (Some src, None, Coq_inl ofs),XMM7)); + emit (Pmovq_mr (Addrmode (Some dst, None, Coq_inl ofs),XMM7)); copy (Int.add ofs _8) (sz - 8) end else if sz >= 4 then begin emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmov_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + emit (Pmov_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); copy (Int.add ofs _4) (sz - 4) end else if sz >= 2 then begin emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovw_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + emit (Pmovw_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); copy (Int.add ofs _2) (sz - 2) end else if sz >= 1 then begin emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovb_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + emit (Pmovb_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); copy (Int.add ofs _1) (sz - 1) end in copy _0 sz @@ -226,7 +226,7 @@ let expand_builtin_inline name args res = (* Float arithmetic *) | "__builtin_fabs", [FR a1], [FR res] -> if a1 <> res then - emit (Pmovsd_ff (a1,res)); + emit (Pmovsd_ff (res,a1)); emit (Pabsd res) (* This ensures that need_masks is set to true *) | "__builtin_fsqrt", [FR a1], [FR res] -> emit (Psqrtsd (a1,res)) @@ -236,7 +236,7 @@ let expand_builtin_inline name args res = else if res = a2 then emit (Pmaxsd (a1,res)) else begin - emit (Pmovsd_ff (a1,res)); + emit (Pmovsd_ff (res,a1)); emit (Pmaxsd (a2,res)) end | "__builtin_fmin", [FR a1; FR a2], [FR res] -> @@ -245,7 +245,7 @@ let expand_builtin_inline name args res = else if res = a2 then emit (Pminsd (a1,res)) else begin - emit (Pmovsd_ff (a1,res)); + emit (Pmovsd_ff (res,a1)); emit (Pminsd (a2,res)) end | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> @@ -256,7 +256,7 @@ let expand_builtin_inline name args res = else if res = a3 then emit (Pfmadd231 (a2,a3,res)) else begin - emit (Pmovsd_ff (a2,res)); + emit (Pmovsd_ff (res,a2)); emit (Pfmadd231 (a1,a2,res)) end |"__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> @@ -267,7 +267,7 @@ let expand_builtin_inline name args res = else if res = a3 then emit (Pfmsub231 (a2,a3,res)) else begin - emit (Pmovsd_ff (a2,res)); + emit (Pmovsd_ff (res,a2)); emit (Pfmsub231 (a1,a2,res)) end | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> @@ -278,7 +278,7 @@ let expand_builtin_inline name args res = else if res = a3 then emit (Pfnmadd231 (a2,a3,res)) else begin - emit (Pmovsd_ff (a2,res)); + emit (Pmovsd_ff (res,a2)); emit (Pfnmadd231 (a1,a2,res)) end |"__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> @@ -289,7 +289,7 @@ let expand_builtin_inline name args res = else if res = a3 then emit (Pfnmsub231 (a2,a3,res)) else begin - emit (Pmovsd_ff (a2,res)); + emit (Pmovsd_ff (res,a2)); emit (Pfnmsub231 (a1,a2,res)) end (* 64-bit integer arithmetic *) @@ -304,7 +304,7 @@ let expand_builtin_inline name args res = emit (Padcl_rr (ECX,EDX)) | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); - emit (Psub_rr (EBX,EAX)); + emit (Psub_rr (EAX,EBX)); emit (Psbbl (ECX,EDX)) | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> assert (a = EAX && b = EDX && rh = EDX && rl = EAX); @@ -322,9 +322,16 @@ let expand_builtin_inline name args res = let tmp = if a1 = ECX then EDX else ECX in let addr = Addrmode(Some a1,None,Coq_inl _0) in if a2 <> tmp then - emit (Pmov_rr (a2,tmp)); + emit (Pmov_rr (tmp,a2)); emit (Pxchg (tmp,tmp)); emit (Pmovw_mr (addr,tmp)) + | "__builtin_write32_reversed", [IR a1; IR a2], _ -> + let tmp = if a1 = ECX then EDX else ECX in + let addr = Addrmode(Some a1,None,Coq_inl _0) in + if a2 <> tmp then + emit (Pmov_rr (tmp,a2)); + emit (Pbswap tmp); + emit (Pmov_mr (addr,tmp)) (* Vararg stuff *) | "__builtin_va_start", [IR a], _ -> expand_builtin_va_start a -- cgit From 9174523f4791e2263f13866b1df1f5adc0cc3ec4 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 10:51:08 +0200 Subject: Consistent naming of "P" instructions and consistent ordering of arguments according to Intel convention (instr destination, argument). --- ia32/Asm.v | 85 +++++++++++------------ ia32/Asmexpand.ml | 189 +++++++++++++++++++++++++------------------------- ia32/TargetPrinter.ml | 71 ++++++++++--------- 3 files changed, 171 insertions(+), 174 deletions(-) diff --git a/ia32/Asm.v b/ia32/Asm.v index b423b4fc..9e763f60 100644 --- a/ia32/Asm.v +++ b/ia32/Asm.v @@ -213,41 +213,41 @@ Inductive instruction: Type := | Pfreeframe(sz: Z)(ofs_ra ofs_link: int) | Pbuiltin(ef: external_function)(args: list preg)(res: list preg) | Pannot(ef: external_function)(args: list (annot_arg preg)) - | Padcl_ir (n: int) (r: ireg) - | Padcl_rr (r1: ireg) (r2: ireg) - | Paddl (r1: ireg) (r2: ireg) - | Paddl_mi (a: addrmode) (n: int) - | Paddl_ri (r1: ireg) (n: int) - | Pbsfl (r1: ireg) (r2: ireg) - | Pbslr (r1: ireg) (r2: ireg) - | Pbswap (r: ireg) + (** Instructions not generated by [Asmgen] *) + | Padc_ri (rd: ireg) (n: int) + | Padc_rr (rd: ireg) (r2: ireg) + | Padd_mi (a: addrmode) (n: int) + | Padd_ri (rd: ireg) (n: int) + | Padd_rr (rd: ireg) (r2: ireg) + | Pbsf (rd: ireg) (r1: ireg) + | Pbsr (rd: ireg) (r1: ireg) + | Pbswap (rd: ireg) + | Pbswap16 (rd: ireg) | Pcfi_adjust (n: int) - | Pfmadd132 (r1: freg) (r2: freg) (r3: freg) - | Pfmadd213 (r1: freg) (r2: freg) (r3: freg) - | Pfmadd231 (r1: freg) (r2: freg) (r3: freg) - | Pfmsub132 (r1: freg) (r2: freg) (r3: freg) - | Pfmsub213 (r1: freg) (r2: freg) (r3: freg) - | Pfmsub231 (r1: freg) (r2: freg) (r3: freg) - | Pfnmadd132 (r1: freg) (r2: freg) (r3: freg) - | Pfnmadd213 (r1: freg) (r2: freg) (r3: freg) - | Pfnmadd231 (r1: freg) (r2: freg) (r3: freg) - | Pfnmsub132 (r1: freg) (r2: freg) (r3: freg) - | Pfnmsub213 (r1: freg) (r2: freg) (r3: freg) - | Pfnmsub231 (r1: freg) (r2: freg) (r3: freg) - | Pmaxsd (r1: freg) (r2: freg) - | Pminsd (r1: freg) (r2: freg) - | Pmovb_rm (rs: ireg) (a: addrmode) - | Pmovq_rm (rs: freg) (a: addrmode) + | Pfmadd132 (rd: freg) (r2: freg) (r3: freg) + | Pfmadd213 (rd: freg) (r2: freg) (r3: freg) + | Pfmadd231 (rd: freg) (r2: freg) (r3: freg) + | Pfmsub132 (rd: freg) (r2: freg) (r3: freg) + | Pfmsub213 (rd: freg) (r2: freg) (r3: freg) + | Pfmsub231 (rd: freg) (r2: freg) (r3: freg) + | Pfnmadd132 (rd: freg) (r2: freg) (r3: freg) + | Pfnmadd213 (rd: freg) (r2: freg) (r3: freg) + | Pfnmadd231 (rd: freg) (r2: freg) (r3: freg) + | Pfnmsub132 (rd: freg) (r2: freg) (r3: freg) + | Pfnmsub213 (rd: freg) (r2: freg) (r3: freg) + | Pfnmsub231 (rd: freg) (r2: freg) (r3: freg) + | Pmaxsd (rd: freg) (r2: freg) + | Pminsd (rd: freg) (r2: freg) + | Pmovb_rm (rd: ireg) (a: addrmode) | Pmovq_mr (a: addrmode) (rs: freg) + | Pmovq_rm (rd: freg) (a: addrmode) | Pmovsb | Pmovsw - | Pmovw_rm (rs: ireg) (ad: addrmode) + | Pmovw_rm (rd: ireg) (ad: addrmode) | Prep_movsl - | Prolw_8 (r: ireg) - | Psbbl (r1: ireg) (r2: ireg) - | Psqrtsd (r1: freg) (r2: freg) - | Psubl_ri (r1: ireg) (n: int) - | Pxchg (r1: ireg) (r2: ireg). + | Psbb_rr (rd: ireg) (r2: ireg) + | Psqrtsd (rd: freg) (r1: freg) + | Psub_ri (rd: ireg) (n: int). Definition code := list instruction. Record function : Type := mkfunction { fn_sig: signature; fn_code: code }. @@ -784,16 +784,17 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out Stuck (**r treated specially below *) | Pannot ef args => Stuck (**r treated specially below *) - (** The following instructions and directives are not generated directly by Asmgen, - so we do not model them. *) - | Padcl_ir _ _ - | Padcl_rr _ _ - | Paddl _ _ - | Paddl_mi _ _ - | Paddl_ri _ _ + (** The following instructions and directives are not generated + directly by [Asmgen], so we do not model them. *) + | Padc_ri _ _ + | Padc_rr _ _ + | Padd_mi _ _ + | Padd_ri _ _ + | Padd_rr _ _ + | Pbsf _ _ + | Pbsr _ _ | Pbswap _ - | Pbsfl _ _ - | Pbslr _ _ + | Pbswap16 _ | Pcfi_adjust _ | Pfmadd132 _ _ _ | Pfmadd213 _ _ _ @@ -816,11 +817,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pmovsw | Pmovw_rm _ _ | Prep_movsl - | Prolw_8 _ - | Psbbl _ _ + | Psbb_rr _ _ | Psqrtsd _ _ - | Psubl_ri _ _ - | Pxchg _ _ => Stuck + | Psub_ri _ _ => Stuck end. (** Translation of the LTL/Linear/Mach view of machine registers diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index 11c63469..177f6989 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -67,6 +67,16 @@ let expand_annot_val txt targ args res = if dst <> src then emit (Pmovsd_ff (dst,src)) | _, _ -> assert false +(* Operations on addressing modes *) + +let offset_addressing (Addrmode(base, ofs, cst)) delta = + Addrmode(base, ofs, + match cst with + | Coq_inl n -> Coq_inl(Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Int.add n delta)) + +let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs) +let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs)) (* Handling of memcpy *) @@ -77,20 +87,20 @@ let expand_builtin_memcpy_small sz al src dst = assert (src = EDX && dst = EAX); let rec copy ofs sz = if sz >= 8 && !Clflags.option_ffpu then begin - emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovq_mr (Addrmode (Some dst, None, Coq_inl ofs),XMM7)); + emit (Pmovq_rm (XMM7, linear_addr src ofs)); + emit (Pmovq_mr (linear_addr dst ofs, XMM7)); copy (Int.add ofs _8) (sz - 8) end else if sz >= 4 then begin - emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmov_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmov_rm (ECX, linear_addr src ofs)); + emit (Pmov_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _4) (sz - 4) end else if sz >= 2 then begin - emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovw_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmovw_rm (ECX, linear_addr src ofs)); + emit (Pmovw_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _2) (sz - 2) end else if sz >= 1 then begin - emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovb_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmovb_rm (ECX, linear_addr src ofs)); + emit (Pmovb_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _1) (sz - 1) end in copy _0 sz @@ -111,12 +121,6 @@ let expand_builtin_memcpy sz al args = (* Handling of volatile reads and writes *) -let offset_addressing (Addrmode(base, ofs, cst)) delta = - Addrmode(base, ofs, - match cst with - | Coq_inl n -> Coq_inl(Integers.Int.add n delta) - | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) - let expand_builtin_vload_common chunk addr res = match chunk, res with | Mint8unsigned, [IR res] -> @@ -130,7 +134,7 @@ let expand_builtin_vload_common chunk addr res = | Mint32, [IR res] -> emit (Pmov_rm (res,addr)) | Mint64, [IR res1; IR res2] -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in if not (Asmgen.addressing_mentions addr res2) then begin emit (Pmov_rm (res2,addr)); emit (Pmov_rm (res1,addr')) @@ -147,13 +151,11 @@ let expand_builtin_vload_common chunk addr res = let expand_builtin_vload chunk args res = match args with - | [IR addr] -> - expand_builtin_vload_common chunk (Addrmode (Some addr,None, Coq_inl _0)) res - | _ -> - assert false + | [IR addr] -> expand_builtin_vload_common chunk (linear_addr addr _0) res + | _ -> assert false let expand_builtin_vload_global chunk id ofs args res = - expand_builtin_vload_common chunk (Addrmode(None, None, Coq_inr(id,ofs))) res + expand_builtin_vload_common chunk (global_addr id ofs) res let expand_builtin_vstore_common chunk addr src tmp = match chunk, src with @@ -169,7 +171,7 @@ let expand_builtin_vstore_common chunk addr src tmp = | Mint32, [IR src] -> emit (Pmov_mr (addr,src)) | Mint64, [IR src1; IR src2] -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in emit (Pmov_mr (addr,src2)); emit (Pmov_mr (addr',src1)) | Mfloat32, [FR src] -> @@ -182,15 +184,13 @@ let expand_builtin_vstore_common chunk addr src tmp = let expand_builtin_vstore chunk args = match args with | IR addr :: src -> - expand_builtin_vstore_common chunk - (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src + expand_builtin_vstore_common chunk (linear_addr addr _0) src (if addr = EAX then ECX else EAX) | _ -> assert false let expand_builtin_vstore_global chunk id ofs args = - expand_builtin_vstore_common chunk - (Addrmode(None, None, Coq_inr(id,ofs))) args EAX + expand_builtin_vstore_common chunk (global_addr id ofs) args EAX (* Handling of varargs *) @@ -202,8 +202,30 @@ let expand_builtin_va_start r = Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) (mul 4l (Z.to_int32 (Conventions1.size_arguments !current_function.fn_sig)))) in - emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP)); - emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs)) + emit (Pmov_mr (linear_addr r _0, ESP)); + emit (Padd_mi (linear_addr r _0, ofs)) + +(* FMA operations *) + +(* vfmadd r1, r2, r3 performs r1 := ri * rj + rk + hence + vfmadd132 r1, r2, r3 performs r1 := r1 * r3 + r2 + vfmadd213 r1, r2, r3 performs r1 := r2 * r1 + r3 + vfmadd231 r1, r2, r3 performs r1 := r2 * r3 + r1 +*) + +let expand_fma args res i132 i213 i231 = + match args, res with + | [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then emit (i132 a1 a3 a2) (* a1 * a2 + a3 *) + else if res = a2 then emit (i213 a2 a1 a3) (* a1 * a2 + a3 *) + else if res = a3 then emit (i231 a3 a1 a2) (* a1 * a2 + a3 *) + else begin + emit (Pmovsd_ff(res, a3)); + emit (i231 res a1 a2) (* a1 * a2 + res *) + end + | _ -> + invalid_arg ("ill-formed fma builtin") (* Handling of compiler-inlined builtins *) @@ -217,95 +239,71 @@ let expand_builtin_inline name args res = | "__builtin_bswap16", [IR a1], [IR res] -> if a1 <> res then emit (Pmov_rr (res,a1)); - emit (Prolw_8 res) + emit (Pbswap16 res) | "__builtin_clz", [IR a1], [IR res] -> - emit (Pbslr (a1,res)); + emit (Pbsr (res,a1)); emit (Pxor_ri(res,coqint_of_camlint 31l)) | "__builtin_ctz", [IR a1], [IR res] -> - emit (Pbsfl (a1,res)) + emit (Pbsf (res,a1)) (* Float arithmetic *) | "__builtin_fabs", [FR a1], [FR res] -> if a1 <> res then emit (Pmovsd_ff (res,a1)); emit (Pabsd res) (* This ensures that need_masks is set to true *) | "__builtin_fsqrt", [FR a1], [FR res] -> - emit (Psqrtsd (a1,res)) + emit (Psqrtsd (res,a1)) | "__builtin_fmax", [FR a1; FR a2], [FR res] -> if res = a1 then - emit (Pmaxsd (a2,res)) + emit (Pmaxsd (res,a2)) else if res = a2 then - emit (Pmaxsd (a1,res)) + emit (Pmaxsd (res,a1)) else begin emit (Pmovsd_ff (res,a1)); - emit (Pmaxsd (a2,res)) + emit (Pmaxsd (res,a2)) end | "__builtin_fmin", [FR a1; FR a2], [FR res] -> if res = a1 then - emit (Pminsd (a2,res)) + emit (Pminsd (res,a2)) else if res = a2 then - emit (Pminsd (a1,res)) + emit (Pminsd (res,a1)) else begin emit (Pmovsd_ff (res,a1)); - emit (Pminsd (a2,res)) - end - | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfmadd231 (a1,a2,res)) - end - |"__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfmsub231 (a1,a2,res)) - end - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfnmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfnmadd231 (a1,a2,res)) - end - |"__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfnmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfnmsub231 (a1,a2,res)) + emit (Pminsd (res,a2)) end + | "__builtin_fmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd231(r1, r2, r3)) + | "__builtin_fmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub231(r1, r2, r3)) + | "__builtin_fnmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd231(r1, r2, r3)) + | "__builtin_fnmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub231(r1, r2, r3)) (* 64-bit integer arithmetic *) | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); emit (Pneg EAX); - emit (Padcl_ir (_0,EDX)); + emit (Padc_ri (EDX,_0)); emit (Pneg EDX) | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); - emit (Paddl (EBX,EAX)); - emit (Padcl_rr (ECX,EDX)) + emit (Padd_rr (EAX,EBX)); + emit (Padc_rr (EDX,ECX)) | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); emit (Psub_rr (EAX,EBX)); - emit (Psbbl (ECX,EDX)) + emit (Psbb_rr (EDX,ECX)) | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> assert (a = EAX && b = EDX && rh = EDX && rl = EAX); emit (Pmul_r EDX) @@ -313,18 +311,17 @@ let expand_builtin_inline name args res = | "__builtin_read16_reversed", [IR a1], [IR res] -> let addr = Addrmode(Some a1,None,Coq_inl _0) in emit (Pmovzw_rm (res,addr)); - emit (Prolw_8 res) + emit (Pbswap16 res) | "__builtin_read32_reversed", [IR a1], [IR res] -> let addr = Addrmode(Some a1,None,Coq_inl _0) in emit (Pmov_rm (res,addr)); emit (Pbswap res) | "__builtin_write16_reversed", [IR a1; IR a2], _ -> let tmp = if a1 = ECX then EDX else ECX in - let addr = Addrmode(Some a1,None,Coq_inl _0) in if a2 <> tmp then emit (Pmov_rr (tmp,a2)); - emit (Pxchg (tmp,tmp)); - emit (Pmovw_mr (addr,tmp)) + emit (Pbswap16 tmp); + emit (Pmovw_mr (linear_addr a1 _0,tmp)) | "__builtin_write32_reversed", [IR a1; IR a2], _ -> let tmp = if a1 = ECX then EDX else ECX in let addr = Addrmode(Some a1,None,Coq_inl _0) in @@ -348,17 +345,17 @@ let expand_instruction instr = match instr with | Pallocframe (sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in - let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in + let addr = linear_addr ESP (coqint_of_camlint (Int32.add sz 4l)) in + let addr' = linear_addr ESP ofs_link in let sz' = coqint_of_camlint sz in - emit (Psubl_ri (ESP,sz')); + emit (Psub_ri (ESP,sz')); emit (Pcfi_adjust sz'); emit (Plea (EDX,addr)); emit (Pmov_mr (addr',EDX)); PrintAsmaux.current_function_stacksize := sz | Pfreeframe(sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - emit (Paddl_ri (ESP,coqint_of_camlint sz)) + emit (Padd_ri (ESP,coqint_of_camlint sz)) | Pbuiltin (ef,args, res) -> begin match ef with @@ -373,8 +370,10 @@ let expand_instruction instr = | EF_vstore_global(chunk, id, ofs) -> expand_builtin_vstore_global chunk id ofs args | EF_memcpy(sz, al) -> - expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz)) - (Int32.to_int (camlint_of_coqint al)) args + expand_builtin_memcpy + (Int32.to_int (camlint_of_coqint sz)) + (Int32.to_int (camlint_of_coqint al)) + args | EF_annot_val(txt, targ) -> expand_annot_val txt targ args res | EF_inline_asm(txt, sg, clob) -> diff --git a/ia32/TargetPrinter.ml b/ia32/TargetPrinter.ml index 18aacebf..13bc6826 100644 --- a/ia32/TargetPrinter.ml +++ b/ia32/TargetPrinter.ml @@ -580,77 +580,76 @@ module Target(System: SYSTEM):TARGET = end else begin fprintf oc " ret\n" end - (** Pseudo-instructions *) - | Padcl_ir (n,res) -> + (* Instructions produced by Asmexpand *) + | Padc_ri (res,n) -> fprintf oc " adcl $%ld, %a\n" (camlint_of_coqint n) ireg res; - | Padcl_rr (a1,res) -> + | Padc_rr (res,a1) -> fprintf oc " adcl %a, %a\n" ireg a1 ireg res; - | Paddl (a1,res) -> + | Padd_ri (res,n) -> + fprintf oc " addl $%ld, %a\n" (camlint_of_coqint n) ireg res + | Padd_rr (res,a1) -> fprintf oc " addl %a, %a\n" ireg a1 ireg res; - | Paddl_mi (addr,n) -> + | Padd_mi (addr,n) -> fprintf oc " addl $%ld, %a\n" (camlint_of_coqint n) addressing addr - | Paddl_ri (res,n) -> - fprintf oc " addl $%ld, %a\n" (camlint_of_coqint n) ireg res - | Pbsfl (a1,res) -> + | Pbsf (res,a1) -> fprintf oc " bsfl %a, %a\n" ireg a1 ireg res - | Pbslr (a1,res) -> + | Pbsr (res,a1) -> fprintf oc " bsrl %a, %a\n" ireg a1 ireg res | Pbswap res -> fprintf oc " bswap %a\n" ireg res + | Pbswap16 res -> + fprintf oc " rolw $8, %a\n" ireg16 res | Pcfi_adjust sz -> cfi_adjust oc (camlint_of_coqint sz) - | Pfmadd132 (a1,a2,res) -> + | Pfmadd132 (res,a1,a2) -> fprintf oc " vfmadd132sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfmadd213 (a1,a2,res) -> + | Pfmadd213 (res,a1,a2) -> fprintf oc " vfmadd213sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfmadd231 (a1,a2,res) -> + | Pfmadd231 (res,a1,a2) -> fprintf oc " vfmadd231sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfmsub132 (a1,a2,res) -> + | Pfmsub132 (res,a1,a2) -> fprintf oc " vfmsub132sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfmsub213 (a1,a2,res) -> + | Pfmsub213 (res,a1,a2) -> fprintf oc " vfmsub213sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfmsub231 (a1,a2,res) -> + | Pfmsub231 (res,a1,a2) -> fprintf oc " vfmsub231sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmadd132 (a1,a2,res) -> + | Pfnmadd132 (res,a1,a2) -> fprintf oc " vfnmadd132sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmadd213 (a1,a2,res) -> + | Pfnmadd213 (res,a1,a2) -> fprintf oc " vfnmadd213sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmadd231 (a1,a2,res) -> + | Pfnmadd231 (res,a1,a2) -> fprintf oc " vfnmadd231sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmsub132 (a1,a2,res) -> + | Pfnmsub132 (res,a1,a2) -> fprintf oc " vfnmsub132sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmsub213 (a1,a2,res) -> + | Pfnmsub213 (res,a1,a2) -> fprintf oc " vfnmsub213sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pfnmsub231 (a1,a2,res) -> + | Pfnmsub231 (res,a1,a2) -> fprintf oc " vfnmsub231sd %a, %a, %a\n" freg a1 freg a2 freg res - | Pmaxsd (a1,res) -> + | Pmaxsd (res,a1) -> fprintf oc " maxsd %a, %a\n" freg a1 freg res - | Pminsd (a1,res) -> + | Pminsd (res,a1) -> fprintf oc " minsd %a, %a\n" freg a1 freg res - | Pmovb_rm (r1,a) -> - fprintf oc " movb %a, %a\n" addressing a ireg8 r1 + | Pmovb_rm (rd,a) -> + fprintf oc " movb %a, %a\n" addressing a ireg8 rd + | Pmovq_mr(a, rs) -> + fprintf oc " movq %a, %a\n" freg rs addressing a | Pmovq_rm(rd, a) -> fprintf oc " movq %a, %a\n" addressing a freg rd - | Pmovq_mr(a, r1) -> - fprintf oc " movq %a, %a\n" freg r1 addressing a | Pmovsb -> fprintf oc " movsb\n"; | Pmovsw -> fprintf oc " movsw\n"; - | Pmovw_rm (r1, a) -> - fprintf oc " movw %a, %a\n" addressing a ireg16 r1 + | Pmovw_rm (rd, a) -> + fprintf oc " movw %a, %a\n" addressing a ireg16 rd | Prep_movsl -> fprintf oc " rep movsl\n" - | Prolw_8 res -> - fprintf oc " rolw $8, %a\n" ireg16 res - | Psbbl (a1,res) -> + | Psbb_rr (res,a1) -> fprintf oc " sbbl %a, %a\n" ireg a1 ireg res - | Psqrtsd (a1,res) -> + | Psqrtsd (res,a1) -> fprintf oc " sqrtsd %a, %a\n" freg a1 freg res - | Psubl_ri (res,n) -> + | Psub_ri (res,n) -> fprintf oc " subl $%ld, %a\n" (camlint_of_coqint n) ireg res; - | Pxchg (a1,a2) -> - fprintf oc " xchg %a, %a\n" ireg8 a1 high_ireg8 a2; + (** Pseudo-instructions *) | Plabel(l) -> fprintf oc "%a:\n" label (transl_label l) | Pallocframe(sz, ofs_ra, ofs_link) -- cgit From d0319112f1fc01b648542e66eb1597ac7b14e49c Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 11:55:28 +0200 Subject: Fix bugs in Asmexpand.ml for ARM. --- arm/Asmexpand.ml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index b4508ace..907d3199 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -168,7 +168,7 @@ let expand_builtin_vload chunk args res = let expand_builtin_vload_global chunk id ofs args res = emit (Ploadsymbol (IR14,id,ofs)); - expand_builtin_vload_common chunk args res + expand_builtin_vload_common chunk (IR IR14 :: args) res let expand_builtin_vstore_common chunk args = match chunk, args with @@ -180,11 +180,11 @@ let expand_builtin_vstore_common chunk args = emit (Pstr (src,addr, SOimm _0)) | Mint64, [IR addr; IR src1; IR src2] -> emit (Pstr (src2,addr,SOimm _0)); - emit (Pstr (src1,addr,SOimm _0)) + emit (Pstr (src1,addr,SOimm _4)) | Mfloat32, [IR addr; FR src] -> - emit (Pfstd (src,addr,_0)) + emit (Pfsts (src,addr,_0)) | Mfloat64, [IR addr; FR src] -> - emit (Pfsts (src,addr,_0)); + emit (Pfstd (src,addr,_0)); | _ -> assert false @@ -193,7 +193,7 @@ let expand_builtin_vstore chunk args = let expand_builtin_vstore_global chunk id ofs args = emit (Ploadsymbol (IR14,id,ofs)); - expand_builtin_vstore_common chunk args + expand_builtin_vstore_common chunk (IR IR14 :: args) (* Handling of varargs *) @@ -269,10 +269,10 @@ let expand_builtin_inline name args res = | "__builtin_read32_reversed", [IR a1], [IR res] -> emit (Pldr (res,a1,SOimm _0)); emit (Prev (IR res,IR res)); - | "__builtin_write16_reverse", [IR a1; IR a2], _ -> + | "__builtin_write16_reversed", [IR a1; IR a2], _ -> emit (Prev16 (IR IR14, IR a2)); emit (Pstrh (IR14, a1, SOimm _0)) - | "__builtin_write32_reverse", [IR a1; IR a2], _ -> + | "__builtin_write32_reversed", [IR a1; IR a2], _ -> emit (Prev (IR IR14, IR a2)); emit (Pstr (IR14, a1, SOimm _0)) (* Synchronization *) @@ -301,7 +301,8 @@ let expand_instruction instr = end; expand_subimm IR13 IR13 sz; emit (Pcfi_adjust sz); - emit (Pstr (IR12,IR13,SOimm ofs)) + emit (Pstr (IR12,IR13,SOimm ofs)); + PrintAsmaux.current_function_stacksize := camlint_of_coqint sz | Pfreeframe (sz, ofs) -> let sz = if (!current_function).fn_sig.sig_cc.cc_vararg -- cgit From 44df927b7e562240ca7ecbb29be8b5b1881f3c05 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 14:08:31 +0200 Subject: Asmexpand for ARM: fixed bug in Pfreeframe. Plus: update comments and credit Bernhard Schommer. --- arm/Asmexpand.ml | 6 +++--- ia32/Asmexpand.ml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index 907d3199..ca30924c 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -11,8 +12,7 @@ (* *********************************************************************) (* Expanding built-ins and some pseudo-instructions by rewriting - of the ARM assembly code. Currently not everything done, this - expansion is performed on the fly in PrintAsm. *) + of the ARM assembly code. *) open Asm open Asmexpandaux @@ -310,7 +310,7 @@ let expand_instruction instr = else sz in if Asmgen.is_immed_arith sz then emit (Padd (IR13,IR13,SOimm sz)) - else emit (Pldr (IR13,IR13,SOimm sz)) + else emit (Pldr (IR13,IR13,SOimm ofs)) | Pbuiltin (ef,args,res) -> begin match ef with | EF_builtin (name,sg) -> diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index 177f6989..137b61b5 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -11,8 +12,7 @@ (* *********************************************************************) (* Expanding built-ins and some pseudo-instructions by rewriting - of the IA32 assembly code. Currently not done, this expansion - is performed on the fly in PrintAsm. *) + of the IA32 assembly code. *) open Asm open Asmexpandaux -- cgit From adeea8cdb4c880973fd9404659c09509e70197cd Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 14:14:53 +0200 Subject: Don't use strdup(), it is not ISO C99. --- test/c/knucleotide.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/c/knucleotide.c b/test/c/knucleotide.c index f7438926..6bd0e9e7 100644 --- a/test/c/knucleotide.c +++ b/test/c/knucleotide.c @@ -62,10 +62,11 @@ struct ht_node *ht_node_create(char *key) { perror("malloc ht_node"); exit(1); } - if ((newkey = (char *)strdup(key)) == 0) { + if ((newkey = malloc(strlen(key) + 1)) == 0) { perror("strdup newkey"); exit(1); } + strcpy(newkey, key); node->key = newkey; node->val = 0; node->next = (struct ht_node *)NULL; -- cgit From 806872b5a7c8dc4e69e7b36bd49019af2871c70e Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 14:17:21 +0200 Subject: Erase incomplete file .depend.extr if "make depend" fails. --- Makefile.extr | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile.extr b/Makefile.extr index 4e17e904..1bb3eec8 100644 --- a/Makefile.extr +++ b/Makefile.extr @@ -162,10 +162,9 @@ clean: depend: $(GENERATED) @echo "Analyzing OCaml dependencies" - @$(OCAMLDEP) $(foreach d,$(DIRS),$(wildcard $(d)/*.mli $(d)/*.ml)) >.depend.extr - @$(OCAMLDEP) $(GENERATED) >> .depend.extr + @$(OCAMLDEP) $(foreach d,$(DIRS),$(wildcard $(d)/*.mli $(d)/*.ml)) $(GENERATED) >.depend.extr || { rm -f .depend.extr; exit 2; } ifneq ($(strip $(DIRS_P4)),) - @$(OCAMLDEP_P4) $(foreach d,$(DIRS_P4),$(wildcard $(d)/*.mli $(d)/*.ml)) >>.depend.extr + @$(OCAMLDEP_P4) $(foreach d,$(DIRS_P4),$(wildcard $(d)/*.mli $(d)/*.ml)) >>.depend.extr || { rm -f .depend.extr; exit 2; } endif -- cgit From c6567a3f0a16050fd04469fdcc7a575f81c0c8f4 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 21 Aug 2015 14:30:05 +0200 Subject: test/regression: test packedstruct1 only if unaligned accesses are supported. Also: exit on error when a test fails. --- test/Makefile | 2 +- test/c/Makefile | 2 +- test/compression/Makefile | 2 +- test/raytracer/Makefile | 2 +- test/regression/Makefile | 13 +++++++++++-- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/test/Makefile b/test/Makefile index ab44be54..5aa115d8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,7 +4,7 @@ all: for i in $(DIRS); do $(MAKE) -C $$i all; done test: - for i in $(DIRS); do $(MAKE) -C $$i test; done + set -e; for i in $(DIRS); do $(MAKE) -C $$i test; done bench: for i in $(DIRS); do $(MAKE) -C $$i bench; done diff --git a/test/c/Makefile b/test/c/Makefile index a81a9d5c..59a0d834 100644 --- a/test/c/Makefile +++ b/test/c/Makefile @@ -38,7 +38,7 @@ test: @for i in $(PROGS); do \ if ./$$i.compcert | cmp -s - Results/$$i; \ then echo "$$i: passed"; \ - else echo "$$i: FAILED"; \ + else echo "$$i: FAILED"; exit 2; \ fi; \ done diff --git a/test/compression/Makefile b/test/compression/Makefile index e35e1a1c..d951c08f 100644 --- a/test/compression/Makefile +++ b/test/compression/Makefile @@ -44,7 +44,7 @@ test: ./$$i -d -i $(TESTCOMPR) -o $(TESTEXPND); \ if cmp $(TESTFILE) $(TESTEXPND); \ then echo "$$i: passed"; \ - else echo "$$i: FAILED"; \ + else echo "$$i: FAILED"; exit 2; \ fi; \ done rm -f $(TESTCOMPR) $(TESTEXPND) diff --git a/test/raytracer/Makefile b/test/raytracer/Makefile index c6eb1907..1d4882bc 100644 --- a/test/raytracer/Makefile +++ b/test/raytracer/Makefile @@ -54,7 +54,7 @@ test: ./render < kal.gml @if cmp kal.ppm Results/kal.ppm; \ then echo "raytracer: passed"; \ - else echo "raytracer: FAILED"; \ + else echo "raytracer: FAILED"; exit 2; \ fi bench: diff --git a/test/regression/Makefile b/test/regression/Makefile index 00c80047..2f70c63a 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -23,9 +23,18 @@ TESTS=int32 int64 floats floats-basics \ TESTS_COMP=attribs1 bitfields1 bitfields2 bitfields3 bitfields4 \ bitfields5 bitfields6 bitfields7 bitfields8 \ - builtins-$(ARCH) packedstruct1 packedstruct2 alignas \ + builtins-$(ARCH) packedstruct2 alignas \ varargs1 varargs2 sections alias +# packedstruct1 makes unaligned memory accesses + +ifeq ($(ARCH),powerpc) +TESTS_COMP+=packedstruct1 +endif +ifeq ($(ARCH),ia32) +TESTS_COMP+=packedstruct1 +endif + # Can run, both in compiled mode and in interpreter mode, # but produce processor-dependent results, so no reference output in Results @@ -66,7 +75,7 @@ test: @for i in $(TESTS) $(TESTS_COMP); do \ if ./$$i.compcert | cmp -s - Results/$$i; \ then echo "$$i: passed"; \ - else echo "$$i: FAILED"; \ + else echo "$$i: FAILED"; exit 2; \ fi; \ done @for i in $(TESTS); do \ -- cgit