diff options
-rw-r--r-- | driver/Clflags.ml | 3 | ||||
-rw-r--r-- | driver/Driver.ml | 12 | ||||
-rw-r--r-- | mppa_k1c/SelectOp.vp | 12 | ||||
-rw-r--r-- | mppa_k1c/SelectOpproof.v | 18 | ||||
-rw-r--r-- | mppa_k1c/TargetPrinter.ml | 54 | ||||
-rw-r--r-- | runtime/Makefile | 6 | ||||
-rw-r--r-- | runtime/mppa_k1c/i32_divmod.S | 120 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_sdiv.c | 9 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_smod.c | 35 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_udiv.c | 6 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_udivmod.c | 2 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_umod.c | 6 | ||||
-rw-r--r-- | test/monniaux/division/sum_div.c | 18 |
13 files changed, 221 insertions, 80 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml index b1afab6f..fd5f0e68 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -71,3 +71,6 @@ let option_fglobaladdrtmp = ref false let option_fglobaladdroffset = ref false let option_fxsaddr = ref true let option_coalesce_mem = ref true + +let option_div_i32 = ref "stsud" +let option_div_i64 = ref "stsud" diff --git a/driver/Driver.ml b/driver/Driver.ml index cfafcaa3..314cf31c 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -265,9 +265,13 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name ref strref default = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) + ] in + let f_str name strref default = + [Exact("-f" ^ name ^ "="), String + (fun s -> (strref := (if s == "" then default else s))) ] in [ (* Getting help *) @@ -369,13 +373,15 @@ let cmdline_actions = @ f_opt "cse" option_fcse @ f_opt "redundancy" option_fredundancy @ f_opt "postpass" option_fpostpass - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched "list" @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp @ f_opt "globaladdroffset" option_fglobaladdroffset @ f_opt "xsaddr" option_fxsaddr @ f_opt "coalesce-mem" option_coalesce_mem + @ f_str "div-i32" option_div_i32 "stsud" + @ f_str "div-i64" option_div_i64 "stsud" (* Code generation options *) @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) diff --git a/mppa_k1c/SelectOp.vp b/mppa_k1c/SelectOp.vp index aac3010e..6adcebe5 100644 --- a/mppa_k1c/SelectOp.vp +++ b/mppa_k1c/SelectOp.vp @@ -423,18 +423,10 @@ Definition mods_base (e1: expr) (e2: expr) := Eexternal i32_smod sig_ii_i (e1 ::: e2 ::: Enil). Definition divu_base (e1: expr) (e2: expr) := - Eop Olowlong - ((Eexternal i64_udiv sig_ll_l - ((Eop Ocast32unsigned (e1 ::: Enil))::: - (Eop Ocast32unsigned (e2 ::: Enil))::: Enil)) - :::Enil). + Eexternal i32_udiv sig_ii_i (e1 ::: e2 ::: Enil). Definition modu_base (e1: expr) (e2: expr) := - Eop Olowlong - ((Eexternal i64_umod sig_ll_l - ((Eop Ocast32unsigned (e1 ::: Enil))::: - (Eop Ocast32unsigned (e2 ::: Enil))::: Enil)) - :::Enil). + Eexternal i32_umod sig_ii_i (e1 ::: e2 ::: Enil). Definition shrximm (e1: expr) (n2: int) := if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil). diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v index d22725d5..22eecfad 100644 --- a/mppa_k1c/SelectOpproof.v +++ b/mppa_k1c/SelectOpproof.v @@ -872,6 +872,12 @@ Theorem eval_divu_base: Val.divu x y = Some z -> exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. + intros; unfold divu_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +(* For using 64-bit unsigned division for 32-bit + intros until z. intros Hax Hby Hdiv. unfold divu_base. pose proof (divu_is_divlu x y) as DIVU. @@ -891,7 +897,8 @@ Proof. } congruence. Qed. - + *) + Theorem eval_modu_base: forall le a b x y z, eval_expr ge sp e m le a x -> @@ -899,6 +906,12 @@ Theorem eval_modu_base: Val.modu x y = Some z -> exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v. Proof. + intros; unfold modu_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +(* for using 64-bit unsigned modulo for 32-bit + intros until z. intros Hax Hby Hmod. unfold modu_base. pose proof (modu_is_modlu x y) as MODU. @@ -918,7 +931,8 @@ Proof. } congruence. Qed. - + *) + Theorem eval_shrximm: forall le a n x z, eval_expr ge sp e m le a x -> diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 96779517..2bdd0978 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -34,11 +34,57 @@ module Target (*: TARGET*) = let comment = "#" + type idiv_function_kind = + | Idiv_system + | Idiv_stsud + | Idiv_fp;; + + let idiv_function_kind = function + "stsud" -> Idiv_stsud + | "system" -> Idiv_system + | "fp" -> Idiv_fp + | _ -> failwith "unknown integer division kind";; + + let idiv_function_kind_32bit () = idiv_function_kind !Clflags.option_div_i32;; + let idiv_function_kind_64bit () = idiv_function_kind !Clflags.option_div_i64;; + let subst_symbol = function - "__compcert_i64_udiv" -> "__udivdi3" - | "__compcert_i64_sdiv" -> "__divdi3" - | "__compcert_i64_umod" -> "__umoddi3" - | "__compcert_i64_smod" -> "__moddi3" + "__compcert_i64_udiv" -> + (match idiv_function_kind_64bit () with + | Idiv_system | Idiv_fp -> "__udivdi3" + | Idiv_stsud -> "__compcert_i64_udiv_stsud") + | "__compcert_i64_sdiv" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__divdi3" + | Idiv_stsud -> "__compcert_i64_sdiv_stsud") + | "__compcert_i64_umod" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__umoddi3" + | Idiv_stsud -> "__compcert_i64_umod_stsud") + | "__compcert_i64_smod" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__moddi3" + | Idiv_stsud -> "__compcert_i64_stsud") + | "__compcert_i32_sdiv" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_sdiv_fp" + | Idiv_stsud -> "__compcert_i32_sdiv_stsud") + | "__compcert_i32_udiv" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_udiv_fp" + | Idiv_stsud -> "__compcert_i32_udiv_stsud") + | "__compcert_i32_smod" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_smod_fp" + | Idiv_stsud -> "__compcert_i32_smod_stsud") + | "__compcert_i32_umod" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_umod_fp" + | Idiv_stsud -> "__compcert_i32_umod_stsud") | "__compcert_f64_div" -> "__divdf3" | "__compcert_f32_div" -> "__divsf3" | x -> x;; diff --git a/runtime/Makefile b/runtime/Makefile index 1258d941..6bc3e7ea 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -23,9 +23,9 @@ OBJS=i64_dtou.o i64_utod.o i64_utof.o vararg.o else ifeq ($(ARCH),powerpc64) OBJS=i64_dtou.o i64_stof.o i64_utod.o i64_utof.o vararg.o else ifeq ($(ARCH),mppa_k1c) -OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o vararg.o\ - i64_dtos.o i64_dtou.o i64_utod.o i64_utof.o i64_stod.o i64_stof.o\ - i64_shl.o i64_shr.o +OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o \ + i64_udivmod_stsud.o i32_divmod.o \ + vararg.o # Missing: i64_utod.o i64_utof.o i64_stod.o i64_stof.o DOMAKE:=$(shell (cd mppa_k1c && make)) else diff --git a/runtime/mppa_k1c/i32_divmod.S b/runtime/mppa_k1c/i32_divmod.S new file mode 100644 index 00000000..d2b4e8d5 --- /dev/null +++ b/runtime/mppa_k1c/i32_divmod.S @@ -0,0 +1,120 @@ +/* K1C +32-bit unsigned/signed integer division/modulo (udiv5) + +D. Monniaux, CNRS, VERIMAG */ + + + .globl __compcert_i32_sdiv_fp +__compcert_i32_sdiv_fp: + compw.lt $r2 = $r0, 0 + compw.lt $r3 = $r1, 0 + absw $r0 = $r0 + absw $r1 = $r1 + ;; + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_smod_fp +__compcert_i32_smod_fp: + compw.lt $r2 = $r0, 0 + absw $r0 = $r0 + absw $r1 = $r1 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_umod_fp +__compcert_i32_umod_fp: + make $r2 = 0 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_udiv_fp +__compcert_i32_udiv_fp: + make $r2 = 0 + make $r3 = 0 + ;; + +/* +r0 : a +r1 : b +r2 : negate result? +r3 : return mod? +*/ + + .globl __compcert_i32_divmod_fp +__compcert_i32_divmod_fp: + zxwd $r7 = $r1 + zxwd $r1 = $r0 +#ifndef NO_SHORTCUT + compw.ltu $r8 = $r0, $r1 + cb.weqz $r1? .ERR # return 0 if divide by 0 +#endif + ;; +# a in r1, b in r7 + floatud.rn.s $r5 = $r7, 0 +#ifndef NO_SHORTCUT + compd.eq $r8 = $r7, 1 + cb.wnez $r8? .LESS # shortcut if a < b +#endif + ;; +# b (double) in r5 + make $r6 = 0x3ff0000000000000 # 1.0 + fnarrowdw.rn.s $r11 = $r5 +# cb.wnez $r8, .RET1 # if b=1 + ;; +# b (single) in r11 + floatud.rn.s $r10 = $r1, 0 + finvw.rn.s $r11 = $r11 + ;; + fwidenlwd.s $r11 = $r11 + ;; +# invb0 in r11 + copyd $r9 = $r11 + ffmsd.rn.s $r6 = $r11, $r5 +# alpha in r6 + ;; + ffmad.rn.s $r9 = $r11, $r6 +# 1/b in r9 + ;; + fmuld.rn.s $r0 = $r10, $r9 +# a/b in r1 + ;; + fixedud.rn.s $r0 = $r0, 0 + ;; + msbfd $r1 = $r0, $r7 + ;; + addd $r6 = $r0, -1 + addd $r8 = $r1, $r7 + ;; + cmoved.dltz $r1? $r0 = $r6 + cmoved.dltz $r1? $r1 = $r8 + ;; + negw $r4 = $r0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r0 = $r4 + cmoved.wnez $r2? $r1 = $r5 + ;; +.END: + cmoved.wnez $r3? $r0 = $r1 + ret + ;; +#ifndef NO_SHORTCUT + +.LESS: + make $r0 = 0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r1 = $r5 + goto .END + ;; + +.ERR: + make $r0 = 0 + ret + ;; +#endif diff --git a/runtime/mppa_k1c/i64_sdiv.c b/runtime/mppa_k1c/i64_sdiv.c index 892aac07..df308736 100644 --- a/runtime/mppa_k1c/i64_sdiv.c +++ b/runtime/mppa_k1c/i64_sdiv.c @@ -1,16 +1,14 @@ extern long __divdi3 (long a, long b); -long i64_sdiv (long a, long b) -{ - return __divdi3 (a, b); -} - int i32_sdiv (int a, int b) { return __divdi3 (a, b); } +/* #define COMPCERT_FE_EXCEPT */ +#ifdef COMPCERT_FE_EXCEPT #ifdef __K1C_COS__ + #include <hal/cos_registers.h> #define K1_SFR_CS_IO_MASK COS_SFR_CS_IO_MASK #define K1_SFR_CS_DZ_MASK COS_SFR_CS_DZ_MASK @@ -34,3 +32,4 @@ int feclearexcept(int excepts) { __builtin_k1_wfxl(K1_SFR_CS, mask); return 0; } +#endif diff --git a/runtime/mppa_k1c/i64_smod.c b/runtime/mppa_k1c/i64_smod.c index 26ffb39b..3371eecf 100644 --- a/runtime/mppa_k1c/i64_smod.c +++ b/runtime/mppa_k1c/i64_smod.c @@ -1,40 +1,5 @@ -#if COMPLIQUE -unsigned long long -udivmoddi4(unsigned long long num, unsigned long long den, int modwanted); - -long long -i64_smod (long long a, long long b) -{ - int neg = 0; - long long res; - - if (a < 0) - { - a = -a; - neg = 1; - } - - if (b < 0) - b = -b; - - res = udivmoddi4 (a, b, 1); - - if (neg) - res = -res; - - return res; -} - -#else extern long __moddi3 (long a, long b); - -long i64_smod (long a, long b) -{ - return __moddi3 (a, b); -} - int i32_smod (int a, int b) { return __moddi3 (a, b); } -#endif diff --git a/runtime/mppa_k1c/i64_udiv.c b/runtime/mppa_k1c/i64_udiv.c index e69de29b..75f4bbf5 100644 --- a/runtime/mppa_k1c/i64_udiv.c +++ b/runtime/mppa_k1c/i64_udiv.c @@ -0,0 +1,6 @@ +extern unsigned long __udivdi3 (unsigned long a, unsigned long b); + +unsigned i32_udiv (unsigned a, unsigned b) +{ + return __udivdi3 (a, b); +} diff --git a/runtime/mppa_k1c/i64_udivmod.c b/runtime/mppa_k1c/i64_udivmod.c index 74b39874..ca48cd87 100644 --- a/runtime/mppa_k1c/i64_udivmod.c +++ b/runtime/mppa_k1c/i64_udivmod.c @@ -1,3 +1,4 @@ +#if 0 /* THIS IS THE PREVIOUS VERSION, USED ON BOSTAN AND ANDEY */ unsigned long long udivmoddi4(unsigned long long num, unsigned long long den, int modwanted) @@ -26,3 +27,4 @@ udivmoddi4(unsigned long long num, unsigned long long den, int modwanted) return modwanted ? r : q; } +#endif diff --git a/runtime/mppa_k1c/i64_umod.c b/runtime/mppa_k1c/i64_umod.c index e69de29b..59e35960 100644 --- a/runtime/mppa_k1c/i64_umod.c +++ b/runtime/mppa_k1c/i64_umod.c @@ -0,0 +1,6 @@ +extern unsigned long __umoddi3 (unsigned long a, unsigned long b); + +unsigned i32_umod (unsigned a, unsigned b) +{ + return __umoddi3 (a, b); +} diff --git a/test/monniaux/division/sum_div.c b/test/monniaux/division/sum_div.c deleted file mode 100644 index 87256922..00000000 --- a/test/monniaux/division/sum_div.c +++ /dev/null @@ -1,18 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include "../clock.h" - -int main(int argc, char **argv) { - unsigned modulus = argc < 2 ? 3371 : atoi(argv[1]); - clock_prepare(); - clock_start(); - unsigned total=0, total_mod=0; - for(int i=0; i<1000; i++) { - total += i; - total_mod = (total_mod + i)%modulus; - } - clock_stop(); - print_total_clock(); - printf("%u %u %d\n", total, total_mod, total%modulus == total_mod); - return 0; -} |