aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-04-02 09:03:25 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-04-02 09:03:25 +0200
commit8fb2d1a49443767ce353520ea045383430a2655e (patch)
treefc3f737acca869af517f213214f19f8bc78d50d3
parentff67146a6e1c7ee9c202b5e770706415f45a674d (diff)
parent629252b160fd4b909231bcad6edcf6f254aca0d6 (diff)
downloadcompcert-kvx-8fb2d1a49443767ce353520ea045383430a2655e.tar.gz
compcert-kvx-8fb2d1a49443767ce353520ea045383430a2655e.zip
Merge branch 'mppa-ternary' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-ternary
-rwxr-xr-xconfigure2
-rw-r--r--mppa_k1c/Asm.v6
-rw-r--r--mppa_k1c/Asmblock.v21
-rw-r--r--mppa_k1c/Asmblockdeps.v522
-rw-r--r--mppa_k1c/Asmvliw.v346
-rw-r--r--mppa_k1c/PostpassScheduling.v7
-rw-r--r--mppa_k1c/PostpassSchedulingproof.v176
-rw-r--r--mppa_k1c/abstractbb/Parallelizability.v18
-rw-r--r--test/monniaux/bitsliced-aes/bs.c1
-rw-r--r--test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c1551
-rw-r--r--test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c2
-rwxr-xr-xtest/monniaux/bitsliced-aes/one_file/reduce/compare.sh4
-rw-r--r--test/monniaux/ternary/ternary.c8
13 files changed, 1113 insertions, 1551 deletions
diff --git a/configure b/configure
index a3a2ea8c..5852205e 100755
--- a/configure
+++ b/configure
@@ -804,7 +804,7 @@ if [ "$arch" = "mppa_k1c" ]; then
cat >> Makefile.config <<EOF
ARCHDIRS=$arch $arch/lib $arch/abstractbb $arch/abstractbb/Impure
BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v\\
- Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v\\
+ Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v\\
ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
Asmblockdeps.v\\
AbstractBasicBlocksDef.v DepTreeTheory.v ImpDep.v Parallelizability.v\\
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index 1e022218..245804f3 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -30,7 +30,7 @@ Require Import Smallstep.
Require Import Locations.
Require Stacklayout.
Require Import Conventions.
-Require Import Asmblock.
+Require Import Asmvliw.
Require Import Linking.
Require Import Errors.
@@ -477,7 +477,7 @@ Definition program_proj (p: program) : Asmblock.program :=
End RELSEM.
-Definition semantics (p: program) := Asmblock.semantics (program_proj p).
+Definition semantics (p: program) := Asmvliw.semantics (program_proj p).
(** Determinacy of the [Asm] semantics. *)
@@ -608,7 +608,7 @@ Proof (Genv.senv_match TRANSF).
Theorem transf_program_correct:
- forward_simulation (Asmblock.semantics prog) (semantics tprog).
+ forward_simulation (Asmvliw.semantics prog) (semantics tprog).
Proof.
pose proof (match_program_transf prog tprog TRANSF) as TR.
subst. unfold semantics. rewrite transf_program_proj.
diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v
index eb35ac99..2fe27143 100644
--- a/mppa_k1c/Asmblock.v
+++ b/mppa_k1c/Asmblock.v
@@ -178,7 +178,8 @@ Inductive ex_instruction : Type :=
| Pbtbl (r: ireg) (tbl: list label) (**r N-way branch through a jump table *) *)
| Pbuiltin: external_function -> list (builtin_arg preg)
- -> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *).
+ -> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *)
+.
(** FIXME: comment not up to date !
@@ -842,10 +843,10 @@ Section RELSEM.
set and memory state after execution of the instruction at [rs#PC],
or [Stuck] if the processor is stuck. *)
-Inductive outcome {rgset}: Type :=
- | Next (rs:rgset) (m:mem)
+Inductive outcome: Type :=
+ | Next (rs:regset) (m:mem)
| Stuck.
-Arguments outcome: clear implicits.
+(* Arguments outcome: clear implicits. *)
(** ** Arithmetic Expressions (including comparisons) *)
@@ -1323,7 +1324,7 @@ Definition store_chunk n :=
(** * basic instructions *)
-Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome regset :=
+Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome :=
match bi with
| PArith ai => Next (exec_arith_instr ai rs) m
@@ -1365,7 +1366,7 @@ Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome regset :
| Pnop => Next rs m
end.
-Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome regset :=
+Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
match body with
| nil => Next rs m
| bi::body' =>
@@ -1425,7 +1426,7 @@ Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z :=
| b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb'
end.
-Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome regset :=
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome :=
match label_pos lbl 0 (fn_blocks f) with
| None => Stuck
| Some pos =>
@@ -1440,7 +1441,7 @@ Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome
Warning: in m PC is assumed to be already pointing on the next instruction !
*)
-Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome regset :=
+Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome :=
match res with
| Some true => goto_label f l rs m
| Some false => Next rs m
@@ -1464,7 +1465,7 @@ Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: opti
we generate cannot use those registers to hold values that must
survive the execution of the pseudo-instruction. *)
-Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome regset :=
+Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome :=
match oc with
| Some ic =>
(** Get/Set system registers *)
@@ -1504,7 +1505,7 @@ Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem)
| None => Next rs m
end.
-Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome regset :=
+Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome :=
match exec_body (body b) rs0 m with
| Next rs' m' =>
let rs1 := nextblock b rs' in exec_control f (exit b) rs1 m'
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 7ea4c9ee..6d124556 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -594,10 +594,10 @@ Fixpoint trans_body (b: list basic) : list L.macro :=
| b :: lb => (trans_basic b) :: (trans_body lb)
end.
-Definition trans_pcincr (sz: Z) (k: L.macro) := [(#PC, Op (Control (OIncremPC sz)) (Name (#PC) @ Enil)) :: k].
+Definition trans_pcincr (sz: Z) (k: L.macro) := (#PC, Op (Control (OIncremPC sz)) (Name (#PC) @ Enil)) :: k.
Definition trans_block (b: Asmblock.bblock) : L.bblock :=
- trans_body (body b) ++ trans_pcincr (size b) (trans_exit (exit b)).
+ trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil).
Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.
Proof.
@@ -855,7 +855,7 @@ Lemma forward_simu_control:
exec_control ge fn ex (nextblock b rs) m = Next rs2 m2 ->
match_states (State rs m) s ->
exists s',
- exec Ge (trans_pcincr (size b) (trans_exit ex)) s = Some s'
+ exec Ge (trans_pcincr (size b) (trans_exit ex) :: nil) s = Some s'
/\ match_states (State rs2 m2) s'.
Proof.
intros. destruct ex.
@@ -1032,7 +1032,7 @@ Lemma exec_trans_pcincr_exec:
forall rs m s b,
match_states (State rs m) s ->
exists s',
- exec Ge (trans_pcincr (size b) (trans_exit (exit b))) s = exec Ge [trans_exit (exit b)] s'
+ exec Ge (trans_pcincr (size b) (trans_exit (exit b)) :: nil) s = exec Ge [trans_exit (exit b)] s'
/\ match_states (State (nextblock b rs) m) s'.
Proof.
intros.
@@ -1564,3 +1564,517 @@ Module PChk := ParallelChecks L PosResourceSet.
Definition bblock_para_check (p: Asmblock.bblock) : bool :=
PChk.is_parallelizable (trans_block p).
+
+Require Import Asmvliw Permutation.
+Import PChk.
+
+Section SECT_PAR.
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ || (rewrite assign_diff by (auto; try discriminate; try (apply ppos_discr; try discriminate; congruence); try (apply ppos_pmem_discr);
+ try (apply not_eq_sym; apply ppos_discr; try discriminate; congruence); try (apply not_eq_sym; apply ppos_pmem_discr); auto))
+ || (rewrite assign_eq)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Arguments Pos.add: simpl never.
+Arguments ppos: simpl never.
+
+Variable Ge: genv.
+
+Lemma trans_arith_par_correct ge fn rsr mr sr rsw mw sw rsw' mw' i:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_arith_instr ge i rsr rsw = rsw' -> mw = mw' ->
+ exists sw',
+ macro_prun Ge (trans_arith i) sw sr sr = Some sw'
+ /\ match_states (State rsw' mw') sw'.
+Proof.
+ intros GENV MSR MSW PARARITH MWEQ. subst. inv MSR. inv MSW.
+ unfold parexec_arith_instr. destruct i.
+(* Ploadsymbol *)
+ - destruct i. eexists; split; [| split].
+ * simpl. reflexivity.
+ * Simpl.
+ * simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRR *)
+ - eexists; split; [| split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+Qed.
+
+Theorem forward_simu_par_wio_basic ge fn rsr rsw mr mw sr sw bi rsw' mw':
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_basic_instr ge bi rsr rsw mr mw = Next rsw' mw' ->
+ exists sw',
+ macro_prun Ge (trans_basic bi) sw sr sr = Some sw'
+ /\ match_states (State rsw' mw') sw'.
+Proof.
+ intros GENV MSR MSW H.
+ destruct bi.
+(* Arith *)
+ - simpl in H. inversion H. subst rsw' mw'. simpl macro_prun. eapply trans_arith_par_correct; eauto.
+(* Load *)
+ - simpl in H. destruct i.
+ unfold parexec_load in H; destruct (eval_offset _ _) eqn:EVALOFF; try discriminate;
+ destruct (Mem.loadv _ _ _) eqn:MEML; try discriminate; inv H. inv MSR; inv MSW;
+ eexists; split; try split;
+ [ simpl; rewrite EVALOFF; rewrite H; pose (H0 ra); simpl in e; rewrite e; rewrite MEML; reflexivity|
+ Simpl|
+ intros rr; destruct rr; Simpl;
+ destruct (ireg_eq g rd); [
+ subst; Simpl|
+ Simpl; rewrite assign_diff; pose (H1 g); simpl in e; try assumption; Simpl; unfold ppos; apply not_eq_ireg_to_pos; assumption]].
+(* Store *)
+ - simpl in H. destruct i.
+ unfold parexec_store in H; destruct (eval_offset _ _) eqn:EVALOFF; try discriminate.
+ destruct (Mem.storev _ _ _ _) eqn:MEML; try discriminate. inv H; inv MSR; inv MSW.
+ eexists; split; try split.
+ * simpl. rewrite EVALOFF. rewrite H. rewrite (H0 ra). rewrite (H0 rs). rewrite MEML. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+(* Allocframe *)
+ - simpl in H. destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS; try discriminate.
+ inv H. inv MSR. inv MSW. eexists. split; try split.
+ * simpl. Simpl. rewrite (H0 GPR12). rewrite H. rewrite MEMAL. rewrite MEMS. Simpl.
+ rewrite H. rewrite MEMAL. rewrite MEMS. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl.
+(* Freeframe *)
+ - simpl in H. destruct (Mem.loadv _ _ _) eqn:MLOAD; try discriminate. destruct (rsr GPR12) eqn:SPeq; try discriminate.
+ destruct (Mem.free _ _ _ _) eqn:MFREE; try discriminate. inv H. inv MSR. inv MSW.
+ eexists. split; try split.
+ * simpl. rewrite (H0 GPR12). rewrite H. rewrite SPeq. rewrite MLOAD. rewrite MFREE.
+ Simpl. rewrite (H0 GPR12). rewrite SPeq. rewrite MLOAD. rewrite MFREE. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl. destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl.
+(* Pget *)
+ - simpl in H. destruct rs eqn:rseq; try discriminate. inv H. inv MSR. inv MSW.
+ eexists. split; try split. Simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* Pset *)
+ - simpl in H. destruct rd eqn:rdeq; try discriminate. inv H. inv MSR; inv MSW.
+ eexists. split; try split. Simpl. intros rr; destruct rr; Simpl.
+(* Pnop *)
+ - simpl in H. inv H. inv MSR. inv MSW. eexists. split; try split. assumption. assumption.
+Qed.
+
+Theorem forward_simu_par_body:
+ forall bdy ge fn rsr mr sr rsw mw sw rs' m',
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_wio_body ge bdy rsr rsw mr mw = Next rs' m' ->
+ exists s',
+ prun_iw Ge (trans_body bdy) sw sr = Some s'
+ /\ match_states (State rs' m') s'.
+Proof.
+ induction bdy.
+ - intros until m'. intros GENV MSR MSW WIO.
+ simpl in WIO. inv WIO. inv MSR. inv MSW.
+ eexists. split; [| split].
+ * simpl. reflexivity.
+ * assumption.
+ * assumption.
+ - intros until m'. intros GENV MSR MSW WIO.
+ simpl in WIO. destruct (parexec_basic_instr _ _ _ _ _ _) eqn:PARBASIC; try discriminate.
+ exploit forward_simu_par_wio_basic. 4: eapply PARBASIC. all: eauto.
+ intros (sw' & MPRUN & MS'). simpl prun_iw. rewrite MPRUN.
+ eapply IHbdy; eauto.
+Qed.
+
+Theorem forward_simu_par_control ge fn rsr rsw mr mw sr sw sz rs' ex m':
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_control ge fn ex (par_nextblock (Ptrofs.repr sz) rsr) (par_nextblock (Ptrofs.repr sz) rsw) mw = Next rs' m' ->
+ exists s',
+ macro_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr = Some s'
+ /\ match_states (State rs' m') s'.
+Proof.
+ intros GENV MSR MSW H0.
+ simpl in *.
+ destruct ex.
+ - destruct c; destruct i; try discriminate.
+ all: try (inv H0; inv MSR; inv MSW; eexists; split; [| split]; [simpl; rewrite (H0 PC); reflexivity | Simpl | intros rr; destruct rr; unfold par_nextblock; Simpl]).
+
+ (* Pj_l *)
+ + simpl in H0. unfold par_goto_label in H0. destruct (label_pos _ _ _) eqn:LPOS; try discriminate. destruct (par_nextblock _ _ _) eqn:NB; try discriminate. inv H0.
+ inv MSR; inv MSW.
+ eexists; split; try split.
+ * simpl. rewrite (H0 PC). unfold goto_label_deps. rewrite LPOS. Simpl.
+ unfold par_nextblock in NB. rewrite Pregmap.gss in NB. rewrite NB. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ (* Pcb *)
+ + simpl in H0. destruct (cmp_for_btest _) eqn:CFB. destruct o; try discriminate. destruct i.
+ ++ unfold par_eval_branch in H0. destruct (Val.cmp_bool _ _ _) eqn:VALCMP; try discriminate. destruct b.
+ +++ unfold par_goto_label in H0. destruct (label_pos _ _ _) eqn:LPOS; try discriminate. destruct (par_nextblock _ _ PC) eqn:NB; try discriminate.
+ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ unfold goto_label_deps. rewrite LPOS.
+ unfold par_nextblock in NB. rewrite Pregmap.gss in NB. rewrite NB. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ +++ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ ++ unfold par_eval_branch in H0. destruct (Val.cmpl_bool _ _ _) eqn:VALCMP; try discriminate. destruct b.
+ +++ unfold par_goto_label in H0. destruct (label_pos _ _ _) eqn:LPOS; try discriminate. destruct (par_nextblock _ _ PC) eqn:NB; try discriminate.
+ inv H0; inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ unfold goto_label_deps. rewrite LPOS.
+ unfold par_nextblock in NB. rewrite Pregmap.gss in NB. rewrite NB. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ +++ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ (* Pcbu *)
+ + simpl in H0. destruct (cmpu_for_btest _) eqn:CFB. destruct o; try discriminate. destruct i.
+ ++ unfold par_eval_branch in H0. destruct (Val_cmpu_bool _ _ _) eqn:VALCMP; try discriminate. destruct b.
+ +++ unfold par_goto_label in H0. destruct (label_pos _ _ _) eqn:LPOS; try discriminate. destruct (par_nextblock _ _ PC) eqn:NB; try discriminate.
+ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ unfold goto_label_deps. rewrite LPOS.
+ unfold par_nextblock in NB. rewrite Pregmap.gss in NB. rewrite NB. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ +++ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ ++ unfold par_eval_branch in H0. destruct (Val_cmplu_bool _ _ _) eqn:VALCMP; try discriminate. destruct b.
+ +++ unfold par_goto_label in H0. destruct (label_pos _ _ _) eqn:LPOS; try discriminate. destruct (par_nextblock _ _ PC) eqn:NB; try discriminate.
+ inv H0; inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ unfold goto_label_deps. rewrite LPOS.
+ unfold par_nextblock in NB. rewrite Pregmap.gss in NB. rewrite NB. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ +++ inv H0. inv MSR; inv MSW. eexists; split; try split.
+ * simpl. rewrite (H0 PC).
+ rewrite CFB. Simpl. rewrite (H0 r).
+ unfold eval_branch_deps. unfold par_nextblock in VALCMP. rewrite Pregmap.gso in VALCMP; try discriminate. rewrite VALCMP.
+ reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; unfold par_nextblock; Simpl.
+ - simpl in *. inv MSR. inv MSW. inv H0. eexists. split.
+ rewrite (H1 PC). simpl. reflexivity.
+ split. Simpl.
+ intros rr. destruct rr; unfold par_nextblock; Simpl.
+Qed.
+
+Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil).
+
+(* Lemma put in Parallelizability.
+Lemma prun_iw_app_some:
+ forall c c' sr sw s' s'',
+ prun_iw Ge c sw sr = Some s' ->
+ prun_iw Ge c' s' sr = Some s'' ->
+ prun_iw Ge (c ++ c') sw sr = Some s''.
+Proof.
+ induction c.
+ - simpl. intros. congruence.
+ - intros. simpl in *. destruct (macro_prun _ _ _ _); auto. eapply IHc; eauto. discriminate.
+Qed.
+*)
+
+Theorem forward_simu_par_wio_bblock_aux ge fn rsr mr sr rsw mw sw bdy ex sz rs' m':
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_wio_bblock_aux ge fn bdy ex (Ptrofs.repr sz) rsr rsw mr mw = Next rs' m' ->
+ exists s',
+ prun_iw Ge (trans_block_aux bdy sz ex) sw sr = Some s'
+ /\ match_states (State rs' m') s'.
+Proof.
+ intros. unfold parexec_wio_bblock_aux in H2. unfold trans_block_aux.
+ destruct (parexec_wio_body _ _ _ _ _ _) eqn:WIOB; try discriminate.
+ exploit forward_simu_par_body. 4: eapply WIOB. all: eauto.
+ intros (s' & RUNB & MS').
+ exploit forward_simu_par_control. 4: eapply H2. all: eauto.
+ intros (s'' & RUNCTL & MS'').
+ eexists. split.
+ erewrite prun_iw_app_Some; eauto. unfold prun_iw. rewrite RUNCTL. reflexivity. eassumption.
+Qed.
+
+Theorem forward_simu_par_wio_bblock ge fn rsr rsw mr sr sw mw bdy1 bdy2 ex sz rs' m' rs2 m2:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_wio_bblock_aux ge fn bdy1 ex (Ptrofs.repr sz) rsr rsw mr mw = Next rs' m' ->
+ parexec_wio_body ge bdy2 rsr rs' mr m' = Next rs2 m2 ->
+ exists s2',
+ prun_iw Ge ((trans_block_aux bdy1 sz ex)++(trans_body bdy2)) sw sr = Some s2'
+ /\ match_states (State rs2 m2) s2'.
+Proof.
+ intros. exploit forward_simu_par_wio_bblock_aux. 4: eapply H2. all: eauto.
+ intros (s' & RUNAUX & MS').
+ exploit forward_simu_par_body. 4: eapply H3. all: eauto.
+ intros (s'' & RUNBDY2 & MS'').
+ eexists. split.
+ erewrite prun_iw_app_Some; eauto. eassumption.
+Qed.
+
+Lemma trans_body_perserves_permutation bdy1 bdy2:
+ Permutation bdy1 bdy2 ->
+ Permutation (trans_body bdy1) (trans_body bdy2).
+Proof.
+ induction 1; simpl; econstructor; eauto.
+Qed.
+
+Lemma trans_body_app bdy1: forall bdy2,
+ trans_body (bdy1++bdy2) = (trans_body bdy1) ++ (trans_body bdy2).
+Proof.
+ induction bdy1; simpl; congruence.
+Qed.
+
+Theorem trans_block_perserves_permutation bdy1 bdy2 b:
+ Permutation (bdy1 ++ bdy2) (body b) ->
+ Permutation (trans_block b) ((trans_block_aux bdy1 (size b) (exit b))++(trans_body bdy2)).
+Proof.
+ intro H; unfold trans_block, trans_block_aux.
+ eapply perm_trans.
+ - eapply Permutation_app_tail.
+ apply trans_body_perserves_permutation.
+ apply Permutation_sym; eapply H.
+ - rewrite trans_body_app. rewrite <-! app_assoc.
+ apply Permutation_app_head.
+ apply Permutation_app_comm.
+Qed.
+
+Lemma forward_simu_par_wio_basic_Stuck ge fn rsr rsw mr mw sr sw bi:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_basic_instr ge bi rsr rsw mr mw = Stuck ->
+ macro_prun Ge (trans_basic bi) sw sr sr = None.
+Admitted.
+
+Lemma forward_simu_par_body_Stuck bdy: forall ge fn rsr mr sr rsw mw sw,
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_wio_body ge bdy rsr rsw mr mw = Stuck ->
+ prun_iw Ge (trans_body bdy) sw sr = None.
+Proof.
+ induction bdy.
+ - intros until sw. intros GENV MSR MSW WIO.
+ simpl in WIO. inv WIO.
+ - intros until sw. intros GENV MSR MSW WIO.
+ simpl in WIO. destruct (parexec_basic_instr _ _ _ _ _ _) eqn:PARBASIC.
+ * exploit forward_simu_par_wio_basic. 4: eapply PARBASIC. all: eauto.
+ intros (sw' & MPRUN & MS'). simpl prun_iw. rewrite MPRUN.
+ eapply IHbdy; eauto.
+ * exploit forward_simu_par_wio_basic_Stuck. 4: eapply PARBASIC. all: eauto.
+ intros X; simpl; rewrite X; auto.
+Qed.
+
+Lemma forward_simu_par_control_Stuck ge fn rsr rsw mr mw sr sw sz ex:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_control ge fn ex (par_nextblock (Ptrofs.repr sz) rsr) (par_nextblock (Ptrofs.repr sz) rsw) mw = Stuck ->
+ macro_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr = None.
+Admitted.
+
+Lemma forward_simu_par_wio_stuck_bdy1 ge fn rs m s1' bdy1 sz ex:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s1' ->
+ parexec_wio_bblock_aux ge fn bdy1 ex (Ptrofs.repr sz) rs rs m m = Stuck ->
+ prun_iw Ge ((trans_block_aux bdy1 sz ex)) s1' s1' = None.
+Proof.
+ unfold parexec_wio_bblock_aux, trans_block_aux; intros.
+ destruct (parexec_wio_body _ _ _ _ _ _) eqn:WIOB.
+ * exploit forward_simu_par_body. 4: eapply WIOB. all: eauto.
+ intros (s' & RUNB & MS').
+ erewrite prun_iw_app_Some; eauto.
+ exploit forward_simu_par_control_Stuck. 4: eauto. all: eauto.
+ simpl. intros X; rewrite X. auto.
+ * apply prun_iw_app_None. eapply forward_simu_par_body_Stuck. 4: eauto. all: eauto.
+Qed.
+
+Lemma forward_simu_par_wio_stuck_bdy2 ge fn rs m s1' bdy1 bdy2 sz ex m' rs':
+ Ge = Genv ge fn ->
+ match_states (State rs m) s1' ->
+ parexec_wio_bblock_aux ge fn bdy1 ex (Ptrofs.repr sz) rs rs m m = Next rs' m' ->
+ parexec_wio_body ge bdy2 rs rs' m m' = Stuck ->
+ prun_iw Ge ((trans_block_aux bdy1 sz ex)++(trans_body bdy2)) s1' s1'=None.
+Proof.
+ intros; exploit forward_simu_par_wio_bblock_aux. 4: eauto. all: eauto.
+ intros (s2' & X1 & X2).
+ erewrite prun_iw_app_Some; eauto.
+ eapply forward_simu_par_body_Stuck. 4: eauto. all: eauto.
+Qed.
+
+Theorem forward_simu_par:
+ forall rs1 m1 s1' b ge fn rs2 m2,
+ Ge = Genv ge fn ->
+ parexec_bblock ge fn b rs1 m1 (Next rs2 m2) ->
+ match_states (State rs1 m1) s1' ->
+ exists s2',
+ prun Ge (trans_block b) s1' (Some s2')
+ /\ match_states (State rs2 m2) s2'.
+Proof.
+ intros until m2. intros GENV PAREXEC MS.
+ inversion PAREXEC as (bdy1 & bdy2 & PERM & WIO).
+ exploit trans_block_perserves_permutation; eauto.
+ intros Perm.
+ remember (parexec_wio_bblock_aux _ _ _ _ _ _ _ _ _) as pwb.
+ destruct pwb; try congruence.
+ exploit forward_simu_par_wio_bblock; eauto. intros (s2' & PIW & MS').
+ unfold prun; simpl; eexists; split; eauto.
+Qed.
+
+Theorem forward_simu_par_stuck:
+ forall rs1 m1 s1' b ge fn,
+ Ge = Genv ge fn ->
+ parexec_bblock ge fn b rs1 m1 Stuck ->
+ match_states (State rs1 m1) s1' ->
+ prun Ge (trans_block b) s1' None.
+Proof.
+ intros until fn. intros GENV PAREXEC MS.
+ inversion PAREXEC as (bdy1 & bdy2 & PERM & WIO).
+ exploit trans_block_perserves_permutation; eauto.
+ intros Perm.
+ destruct (parexec_wio_bblock_aux _ _ _ _ _ _ _ _) eqn:WIOEXIT.
+ - econstructor; eauto. split. eapply forward_simu_par_wio_stuck_bdy2; eauto. auto.
+ - clear WIO. econstructor; eauto. split; eauto.
+ simpl; apply prun_iw_app_None; auto.
+ eapply forward_simu_par_wio_stuck_bdy1; eauto.
+Qed.
+
+Theorem forward_simu_par_alt:
+ forall rs1 m1 s1' b ge fn o2,
+ Ge = Genv ge fn ->
+ match_states (State rs1 m1) s1' ->
+ parexec_bblock ge fn b rs1 m1 o2 ->
+ exists o2',
+ prun Ge (trans_block b) s1' o2'
+ /\ match_outcome o2 o2'.
+Proof.
+ intros until o2. intros GENV MS PAREXEC. destruct o2 eqn:O2.
+ - exploit forward_simu_par; eauto. intros (s2' & PRUN & MS'). eexists. split. eassumption.
+ unfold match_outcome. eexists; split; auto.
+ - exploit forward_simu_par_stuck; eauto. intros. eexists; split; eauto.
+ constructor; auto.
+Qed.
+
+Lemma bblock_para_check_correct:
+ forall ge fn bb rs m rs' m' o,
+ Ge = Genv ge fn ->
+ exec_bblock ge fn bb rs m = Next rs' m' ->
+ bblock_para_check bb = true ->
+ parexec_bblock ge fn bb rs m o ->
+ o = Next rs' m'.
+Proof.
+ intros. unfold bblock_para_check in H1.
+ exploit forward_simu; eauto. eapply trans_state_match.
+ intros (s2' & EXEC & MS).
+ exploit forward_simu_par_alt. 2: apply (trans_state_match (State rs m)). all: eauto.
+ intros (o2' & PRUN & MO).
+ exploit parallelizable_correct. apply is_para_correct_aux. eassumption.
+ intro. eapply H3 in PRUN. clear H3. destruct o2'.
+ - inv PRUN. inv H3. unfold exec in EXEC. assert (x = s2') by congruence. subst. clear H.
+ assert (m0 = s2') by (apply functional_extensionality; auto). subst. clear H4.
+ destruct o; try discriminate. inv MO. inv H. assert (s2' = x) by congruence. subst.
+ exploit state_equiv. split. eapply MS. eapply H4. intros. inv H. reflexivity.
+ - unfold match_outcome in MO. destruct o.
+ + inv MO. inv H3. discriminate.
+ + clear MO. unfold exec in EXEC. rewrite EXEC in PRUN. discriminate.
+Qed.
+
+End SECT_PAR.
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
new file mode 100644
index 00000000..a6e9f04b
--- /dev/null
+++ b/mppa_k1c/Asmvliw.v
@@ -0,0 +1,346 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* Prashanth Mundkur, SRI International *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* The contributions by Prashanth Mundkur are reused and adapted *)
+(* under the terms of a Contributor License Agreement between *)
+(* SRI International and INRIA. *)
+(* *)
+(* *********************************************************************)
+
+(** Abstract syntax and semantics for K1c assembly language. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require Import Errors.
+Require Export Asmblock.
+Require Import Sorting.Permutation.
+
+Local Open Scope asm.
+
+Section RELSEM.
+
+(** Execution of arith instructions *)
+
+Variable ge: genv.
+
+(* TODO: on pourrait mettre ça dans Asmblock pour factoriser le code
+ en définissant
+ exec_arith_instr ai rs := parexec_arith_instr ai rs rs
+*)
+Definition parexec_arith_instr (ai: ar_instruction) (rsr rsw: regset): regset :=
+ match ai with
+ | PArithR n d => rsw#d <- (arith_eval_r ge n)
+
+ | PArithRR n d s => rsw#d <- (arith_eval_rr n rsr#s)
+
+ | PArithRI32 n d i => rsw#d <- (arith_eval_ri32 n i)
+ | PArithRI64 n d i => rsw#d <- (arith_eval_ri64 n i)
+ | PArithRF32 n d i => rsw#d <- (arith_eval_rf32 n i)
+ | PArithRF64 n d i => rsw#d <- (arith_eval_rf64 n i)
+
+ | PArithRRR n d s1 s2 => rsw#d <- (arith_eval_rrr n rsr#s1 rsr#s2)
+ | PArithRRI32 n d s i => rsw#d <- (arith_eval_rri32 n rsr#s i)
+ | PArithRRI64 n d s i => rsw#d <- (arith_eval_rri64 n rsr#s i)
+
+ | PArithARRR n d s1 s2 => rsw#d <- (arith_eval_arrr n rsr#d rsr#s1 rsr#s2)
+ | PArithARRI32 n d s i => rsw#d <- (arith_eval_arri32 n rsr#d rsr#s i)
+ | PArithARRI64 n d s i => rsw#d <- (arith_eval_arri64 n rsr#d rsr#s i)
+ end.
+
+(** * load/store *)
+
+(* TODO: factoriser ? *)
+Definition parexec_load (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem)
+ (d: ireg) (a: ireg) (ofs: offset) :=
+ match (eval_offset ge ofs) with
+ | OK ptr =>
+ match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with
+ | None => Stuck
+ | Some v => Next (rsw#d <- v) mw
+ end
+ | _ => Stuck
+ end.
+
+Definition parexec_store (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem)
+ (s: ireg) (a: ireg) (ofs: offset) :=
+ match (eval_offset ge ofs) with
+ | OK ptr =>
+ match Mem.storev chunk mr (Val.offset_ptr (rsr a) ptr) (rsr s) with
+ | None => Stuck
+ | Some m' => Next rsw m'
+ end
+ | _ => Stuck
+ end.
+
+(* rem: parexec_store = exec_store *)
+
+(** * basic instructions *)
+
+(* TODO: factoriser ? *)
+Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
+ match bi with
+ | PArith ai => Next (parexec_arith_instr ai rsr rsw) mw
+
+ | PLoadRRO n d a ofs => parexec_load (load_chunk n) rsr rsw mr mw d a ofs
+
+ | PStoreRRO n s a ofs => parexec_store (store_chunk n) rsr rsw mr mw s a ofs
+
+ | Pallocframe sz pos =>
+ let (mw, stk) := Mem.alloc mr 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr mw (Val.offset_ptr sp pos) rsr#SP with
+ | None => Stuck
+ | Some mw => Next (rsw #FP <- (rsr SP) #SP <- sp #RTMP <- Vundef) mw
+ end
+
+ | Pfreeframe sz pos =>
+ match Mem.loadv Mptr mr (Val.offset_ptr rsr#SP pos) with
+ | None => Stuck
+ | Some v =>
+ match rsr SP with
+ | Vptr stk ofs =>
+ match Mem.free mr stk 0 sz with
+ | None => Stuck
+ | Some mw => Next (rsw#SP <- v #RTMP <- Vundef) mw
+ end
+ | _ => Stuck
+ end
+ end
+ | Pget rd ra =>
+ match ra with
+ | RA => Next (rsw#rd <- (rsr#ra)) mw
+ | _ => Stuck
+ end
+ | Pset ra rd =>
+ match ra with
+ | RA => Next (rsw#ra <- (rsr#rd)) mw
+ | _ => Stuck
+ end
+ | Pnop => Next rsw mw
+end.
+
+(* parexec with writes-in-order *)
+Fixpoint parexec_wio_body (body: list basic) (rsr rsw: regset) (mr mw: mem) :=
+ match body with
+ | nil => Next rsw mw
+ | bi::body' =>
+ match parexec_basic_instr bi rsr rsw mr mw with
+ | Next rsw mw => parexec_wio_body body' rsr rsw mr mw
+ | Stuck => Stuck
+ end
+ end.
+
+(** Manipulations over the [PC] register: continuing with the next
+ instruction ([nextblock]) or branching to a label ([goto_label]). *)
+
+(* TODO: factoriser ? *)
+Definition par_nextblock size_b (rs: regset) :=
+ rs#PC <- (Val.offset_ptr rs#PC size_b).
+
+(* TODO: factoriser ? *)
+Definition par_goto_label (f: function) (lbl: label) (rsr rsw: regset) (mw: mem) :=
+ match label_pos lbl 0 (fn_blocks f) with
+ | None => Stuck
+ | Some pos =>
+ match rsr#PC with
+ | Vptr b ofs => Next (rsw#PC <- (Vptr b (Ptrofs.repr pos))) mw
+ | _ => Stuck
+ end
+ end.
+
+(** Evaluating a branch
+
+Warning: in m PC is assumed to be already pointing on the next instruction !
+
+*)
+(* TODO: factoriser ? *)
+Definition par_eval_branch (f: function) (l: label) (rsr rsw: regset) (mw: mem) (res: option bool) :=
+ match res with
+ | Some true => par_goto_label f l rsr rsw mw
+ | Some false => Next (rsw # PC <- (rsr PC)) mw
+ | None => Stuck
+ end.
+
+
+(** Execution of a single control-flow instruction [i] in initial state [rs] and
+ [m]. Return updated state.
+
+ As above: PC is assumed to be incremented on the next block before the control-flow instruction
+
+ For instructions that correspond tobuiltin
+ actual RISC-V instructions, the cases are straightforward
+ transliterations of the informal descriptions given in the RISC-V
+ user-mode specification. For pseudo-instructions, refer to the
+ informal descriptions given above.
+
+ Note that we set to [Vundef] the registers used as temporaries by
+ the expansions of the pseudo-instructions, so that the RISC-V code
+ we generate cannot use those registers to hold values that must
+ survive the execution of the pseudo-instruction. *)
+
+Definition parexec_control (f: function) (oc: option control) (rsr rsw: regset) (mw: mem) :=
+ match oc with
+ | Some ic =>
+(** Get/Set system registers *)
+ match ic with
+
+
+(** Branch Control Unit instructions *)
+ | Pret =>
+ Next (rsw#PC <- (rsr#RA)) mw
+ | Pcall s =>
+ Next (rsw#RA <- (rsr#PC) #PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw
+ | Picall r =>
+ Next (rsw#RA <- (rsr#PC) #PC <- (rsr#r)) mw
+ | Pgoto s =>
+ Next (rsw#PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw
+ | Pigoto r =>
+ Next (rsw#PC <- (rsr#r)) mw
+ | Pj_l l =>
+ par_goto_label f l rsr rsw mw
+ | Pcb bt r l =>
+ match cmp_for_btest bt with
+ | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.cmp_bool c rsr#r (Vint (Int.repr 0)))
+ | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.cmpl_bool c rsr#r (Vlong (Int64.repr 0)))
+ | (None, _) => Stuck
+ end
+ | Pcbu bt r l =>
+ match cmpu_for_btest bt with
+ | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val_cmpu_bool c rsr#r (Vint (Int.repr 0)))
+ | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val_cmplu_bool c rsr#r (Vlong (Int64.repr 0)))
+ | (None, _) => Stuck
+ end
+
+
+(** Pseudo-instructions *)
+ | Pbuiltin ef args res =>
+ Stuck (**r treated specially below *)
+ end
+ | None => Next (rsw#PC <- (rsr#PC)) mw
+end.
+
+
+Definition parexec_wio_bblock_aux (f: function) bdy ext size_b (rsr rsw: regset) (mr mw: mem): outcome :=
+ match parexec_wio_body bdy rsr rsw mr mw with
+ | Next rsw mw =>
+ let rsr := par_nextblock size_b rsr in
+ let rsw := par_nextblock size_b rsw in
+ parexec_control f ext rsr rsw mw
+ | Stuck => Stuck
+ end.
+
+Definition parexec_wio_bblock (f: function) (b: bblock) (rs: regset) (m: mem): outcome :=
+ parexec_wio_bblock_aux f (body b) (exit b) (Ptrofs.repr (size b)) rs rs m m.
+
+Definition parexec_bblock (f: function) (b: bblock) (rs: regset) (m: mem) (o: outcome): Prop :=
+ exists bdy1 bdy2, Permutation (bdy1++bdy2) (body b) /\
+ o=match parexec_wio_bblock_aux f bdy1 (exit b) (Ptrofs.repr (size b)) rs rs m m with
+ | Next rsw mw => parexec_wio_body bdy2 rs rsw m mw
+ | Stuck => Stuck
+ end.
+
+Lemma parexec_bblock_write_in_order f b rs m:
+ parexec_bblock f b rs m (parexec_wio_bblock f b rs m).
+Proof.
+ exists (body b). exists nil.
+ constructor 1.
+ - rewrite app_nil_r; auto.
+ - unfold parexec_wio_bblock.
+ destruct (parexec_wio_bblock_aux f _ _ _ _ _); simpl; auto.
+Qed.
+
+Inductive step: state -> trace -> state -> Prop :=
+ | exec_step_internal:
+ forall b ofs f bi rs m rs' m',
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi ->
+ parexec_wio_bblock f bi rs m = Next rs' m' ->
+ (forall o, parexec_bblock f bi rs m o -> o=(Next rs' m')) ->
+ step (State rs m) E0 (State rs' m')
+ | exec_step_builtin:
+ forall b ofs f ef args res rs m vargs t vres rs' m' bi,
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) f.(fn_blocks) = Some bi ->
+ exit bi = Some (PExpand (Pbuiltin ef args res)) ->
+ eval_builtin_args ge rs (rs SP) m args vargs ->
+ external_call ef ge vargs m t vres m' ->
+ rs' = nextblock bi
+ (set_res res vres
+ (undef_regs (map preg_of (destroyed_by_builtin ef))
+ (rs#RTMP <- Vundef))) ->
+ step (State rs m) t (State rs' m')
+ | exec_step_external:
+ forall b ef args res rs m t rs' m',
+ rs PC = Vptr b Ptrofs.zero ->
+ Genv.find_funct_ptr ge b = Some (External ef) ->
+ external_call ef ge args m t res m' ->
+ extcall_arguments rs m (ef_sig ef) args ->
+ rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
+ step (State rs m) t (State rs' m')
+ .
+
+End RELSEM.
+
+(** Execution of whole programs. *)
+
+Definition semantics (p: program) :=
+ Semantics step (initial_state p) final_state (Genv.globalenv p).
+
+Lemma semantics_determinate: forall p, determinate (semantics p).
+Proof.
+Ltac Equalities :=
+ match goal with
+ | [ H1: ?a = ?b, H2: ?a = ?c |- _ ] =>
+ rewrite H1 in H2; inv H2; Equalities
+ | _ => idtac
+ end.
+ intros; constructor; simpl; intros.
+- (* determ *)
+ inv H; inv H0; Equalities.
+ + split. constructor. auto.
+ + unfold parexec_wio_bblock, parexec_wio_bblock_aux in H4. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate.
+ rewrite H10 in H4. discriminate.
+ + unfold parexec_wio_bblock, parexec_wio_bblock_aux in H11. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate.
+ rewrite H4 in H11. discriminate.
+ + assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0.
+ exploit external_call_determ. eexact H6. eexact H13. intros [A B].
+ split. auto. intros. destruct B; auto. subst. auto.
+ + assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
+ exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+ split. auto. intros. destruct B; auto. subst. auto.
+- (* trace length *)
+ red; intros. inv H; simpl.
+ omega.
+ eapply external_call_trace_length; eauto.
+ eapply external_call_trace_length; eauto.
+- (* initial states *)
+ inv H; inv H0. f_equal. congruence.
+- (* final no step *)
+ assert (NOTNULL: forall b ofs, Vnullptr <> Vptr b ofs).
+ { intros; unfold Vnullptr; destruct Archi.ptr64; congruence. }
+ inv H. unfold Vzero in H0. red; intros; red; intros.
+ inv H; rewrite H0 in *; eelim NOTNULL; eauto.
+- (* final states *)
+ inv H; inv H0. congruence.
+Qed. \ No newline at end of file
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index 6700e684..ab4bc9c9 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -352,8 +352,9 @@ Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
do tbb <- concat_all lbb;
do sizecheck <- verify_size bb lbb;
do schedcheck <- verify_schedule bb' tbb;
- do parcheck <- verify_par lbb;
- stick_header_code (header bb) lbb.
+ do res <- stick_header_code (header bb) lbb;
+ do parcheck <- verify_par res;
+ OK res.
Lemma verified_schedule_nob_size:
forall bb lbb, verified_schedule_nob bb = OK lbb -> size bb = size_blocks lbb.
@@ -379,7 +380,7 @@ Lemma verified_schedule_nob_header:
/\ Forall (fun b => header b = nil) lbb.
Proof.
intros. split.
- - monadInv H. unfold stick_header_code in EQ4. destruct (hd_error _); try discriminate. inv EQ4.
+ - monadInv H. unfold stick_header_code in EQ2. destruct (hd_error _); try discriminate. inv EQ2.
simpl. reflexivity.
- apply verified_schedule_nob_no_header_in_middle in H. assumption.
Qed.
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index 33912203..dd485ea4 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -71,7 +71,7 @@ Lemma next_eq:
forall (rs rs': regset) m m',
rs = rs' -> m = m' -> Next rs m = Next rs' m'.
Proof.
- intros. congruence.
+ intros; apply f_equal2; auto.
Qed.
Lemma regset_double_set:
@@ -238,7 +238,7 @@ Proof.
rewrite <- Zplus_mod. auto.
Qed.
-Section PRESERVATION.
+Section PRESERVATION_ASMBLOCK.
Variables prog tprog: program.
Hypothesis TRANSL: match_prog prog tprog.
@@ -659,7 +659,7 @@ Proof.
eapply external_call_symbols_preserved; eauto. apply senv_preserved.
Qed.
-Theorem transf_program_correct:
+Theorem transf_program_correct_Asmblock:
forward_simulation (Asmblock.semantics prog) (Asmblock.semantics tprog).
Proof.
eapply forward_simulation_plus.
@@ -669,4 +669,174 @@ Proof.
- apply transf_step_correct.
Qed.
+End PRESERVATION_ASMBLOCK.
+
+
+
+
+Require Import Asmvliw.
+
+
+Lemma verified_par_checks_alls_bundles lb x: forall bundle,
+ verify_par lb = OK x ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lb; simpl; try tauto.
+ intros bundle H; monadInv H.
+ destruct 1; subst; eauto.
+ destruct x0; auto.
+Qed.
+
+
+Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
+ verified_schedule_nob bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule_nob. intros H;
+ monadInv H. destruct x3.
+ intros; eapply verified_par_checks_alls_bundles; eauto.
+Qed.
+
+Lemma verify_par_bblock_PExpand bb i:
+ exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt.
+Proof.
+ destruct bb as [h bdy ext H]; simpl.
+ intros; subst. destruct i.
+ generalize H.
+ rewrite <- AB.wf_bblock_refl in H.
+ destruct H as [H H0].
+ unfold AB.builtin_alone in H0. erewrite H0; eauto.
+Qed.
+
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles.
+
+Lemma verified_schedule_checks_alls_bundles bb lb bundle:
+ verified_schedule bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule. remember (exit bb) as exb.
+ destruct exb as [c|]; eauto.
+ destruct c as [i|]; eauto.
+ destruct i; intros H. inversion_clear H; simpl.
+ intuition subst.
+ intros; eapply verify_par_bblock_PExpand; eauto.
+Qed.
+
+Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle,
+ transf_blocks lbb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lbb; simpl.
+ - intros lb bundle H; inversion_clear H. simpl; try tauto.
+ - intros lb bundle H0.
+ monadInv H0.
+ rewrite in_app. destruct 1; eauto.
+ eapply verified_schedule_checks_alls_bundles; eauto.
+Qed.
+
+Lemma find_bblock_Some_in lb:
+ forall ofs b, find_bblock ofs lb = Some b -> List.In b lb.
+Proof.
+ induction lb; simpl; try congruence.
+ intros ofs b.
+ destruct (zlt ofs 0); try congruence.
+ destruct (zeq ofs 0); eauto.
+ intros X; inversion X; eauto.
+Qed.
+
+Section PRESERVATION_ASMVLIW.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma all_bundles_are_checked b ofs f bundle:
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock ofs (fn_blocks f) = Some bundle ->
+ verify_par_bblock bundle = OK tt.
+Proof.
+ unfold match_prog, match_program in TRANSL.
+ unfold Genv.find_funct_ptr; simpl; intros X.
+ destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+ destruct y as [tf0|]; try congruence.
+ inversion X as [H1]. subst. clear X.
+ remember (@Gfun fundef unit (Internal f)) as f2.
+ destruct H as [ctx' f1 f2 H0|]; try congruence.
+ inversion Heqf2 as [H2]. subst; clear Heqf2.
+ unfold transf_fundef, transf_partial_fundef in H.
+ destruct f1 as [f1|f1]; try congruence.
+ unfold transf_function, transl_function in H.
+ monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence.
+ injection EQ1; intros; subst.
+ monadInv EQ0. simpl in * |-.
+ intros; exploit transf_blocks_checks_all_bundles; eauto.
+ intros; eapply find_bblock_Some_in; eauto.
+Qed.
+
+Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m' o:
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ verify_par_bblock bundle = OK tt ->
+ parexec_bblock (globalenv (semantics tprog)) f bundle rs m o -> o = Next rs' m'.
+Proof.
+ intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0.
+ simpl in H. simpl in H1.
+ eapply Asmblockdeps.bblock_para_check_correct; eauto.
+Qed.
+
+Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m' o:
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ parexec_bblock (globalenv (semantics tprog)) f bundle rs m o -> o = Next rs' m'.
+Proof.
+ intros; eapply checked_bundles_are_parexec_equiv; eauto.
+ eapply all_bundles_are_checked; eauto.
+Qed.
+
+Lemma seqexec_parexec_wio b ofs f bundle rs rs' m m':
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ parexec_wio_bblock (globalenv (semantics tprog)) f bundle rs m = Next rs' m'.
+Proof.
+ intros; erewrite <- seqexec_parexec_equiv; eauto.
+ eapply parexec_bblock_write_in_order.
+Qed.
+
+
+Theorem transf_program_correct_Asmvliw:
+ forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog).
+Proof.
+ eapply forward_simulation_step with (match_states:=fun (s1:Asmblock.state) s2 => s1=s2); eauto.
+ - intros; subst; auto.
+ - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto.
+ + eapply exec_step_internal; eauto.
+ eapply seqexec_parexec_wio; eauto.
+ intros; eapply seqexec_parexec_equiv; eauto.
+ + eapply exec_step_builtin; eauto.
+ + eapply exec_step_external; eauto.
+Qed.
+
+End PRESERVATION_ASMVLIW.
+
+
+
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+ forward_simulation (Asmblock.semantics prog) (Asmvliw.semantics tprog).
+Proof.
+ eapply compose_forward_simulations.
+ eapply transf_program_correct_Asmblock; eauto.
+ eapply transf_program_correct_Asmvliw; eauto.
+Qed.
+
End PRESERVATION.
diff --git a/mppa_k1c/abstractbb/Parallelizability.v b/mppa_k1c/abstractbb/Parallelizability.v
index b6d1f142..065c0922 100644
--- a/mppa_k1c/abstractbb/Parallelizability.v
+++ b/mppa_k1c/abstractbb/Parallelizability.v
@@ -79,6 +79,24 @@ Proof.
+ intros H1; rewrite H1; simpl; auto.
Qed.
+Lemma prun_iw_app_None p1: forall m1 old p2,
+ prun_iw p1 m1 old = None ->
+ prun_iw (p1++p2) m1 old = None.
+Proof.
+ induction p1; simpl; try congruence.
+ intros; destruct (macro_prun _ _ _); simpl; auto.
+Qed.
+
+Lemma prun_iw_app_Some p1: forall m1 old m2 p2,
+ prun_iw p1 m1 old = Some m2 ->
+ prun_iw (p1++p2) m1 old = prun_iw p2 m2 old.
+Proof.
+ induction p1; simpl; try congruence.
+ intros; destruct (macro_prun _ _ _); simpl; auto.
+ congruence.
+Qed.
+
+
End PARALLEL.
End ParallelSemantics.
diff --git a/test/monniaux/bitsliced-aes/bs.c b/test/monniaux/bitsliced-aes/bs.c
index 8df53391..063f36f5 100644
--- a/test/monniaux/bitsliced-aes/bs.c
+++ b/test/monniaux/bitsliced-aes/bs.c
@@ -29,7 +29,6 @@ static inline word_t compcert_ternary(word_t x, word_t v0, word_t v1) {
#error "endianness not supported"
#endif
-
void bs_addroundkey(word_t * B, word_t * rk)
{
int i;
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c
index 44b88d1f..5294ff1d 100644
--- a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_compute.c
@@ -1,1525 +1,32 @@
#include <stdint.h>
-#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
-#include "/home/monniaux/work/Kalray/CompCert/test/monniaux/clock.h"
-
-#define EXIT1
-
-void aes_ecb_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key);
-void aes_ecb_decrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key);
-
-void aes_ctr_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key, uint8_t * iv);
-#define aes_ctr_decrypt(outputb,inputb,size,key,iv) aes_ctr_encrypt(outputb,inputb,size,key,iv)
-
-#define BLOCK_SIZE 128
-#define KEY_SCHEDULE_SIZE 176
-#define WORD_SIZE 64
-#define BS_BLOCK_SIZE (BLOCK_SIZE * WORD_SIZE / 8)
-#define WORDS_PER_BLOCK (BLOCK_SIZE / WORD_SIZE)
-
-#if (WORD_SIZE==64)
- typedef uint64_t word_t;
- #define ONE 1ULL
- #define MUL_SHIFT 6
- #define WFMT "lx"
- #define WPAD "016"
- #define __builtin_bswap_wordsize(x) __builtin_bswap64(x)
-#elif (WORD_SIZE==32)
- typedef uint32_t word_t;
- #define ONE 1UL
- #define MUL_SHIFT 5
- #define WFMT "x"
- #define WPAD "08"
- #define __builtin_bswap_wordsize(x) __builtin_bswap32(x)
-#elif (WORD_SIZE==16)
- typedef uint16_t word_t;
- #define ONE 1
- #define MUL_SHIFT 4
- #define WFMT "hx"
- #define WPAD "04"
- #define __builtin_bswap_wordsize(x) __builtin_bswap16(x)
-#elif (WORD_SIZE==8)
- typedef uint8_t word_t;
- #define ONE 1
- #define MUL_SHIFT 3
- #define WFMT "hhx"
- #define WPAD "02"
- #define __builtin_bswap_wordsize(x) (x)
-#else
-#error "invalid word size"
-#endif
-
-void bs_transpose(word_t * blocks);
-void bs_transpose_rev(word_t * blocks);
-void bs_transpose_dst(word_t * transpose, word_t * blocks);
-
-void bs_sbox(word_t U[8]);
-void bs_sbox_rev(word_t U[8]);
-
-void bs_shiftrows(word_t * B);
-void bs_shiftrows_rev(word_t * B);
-
-void bs_mixcolumns(word_t * B);
-void bs_mixcolumns_rev(word_t * B);
-
-void bs_shiftmix(word_t * B);
-
-void bs_addroundkey(word_t * B, word_t * rk);
-void bs_apply_sbox(word_t * input);
-void bs_apply_sbox_rev(word_t * input);
-
-
-void expand_key(unsigned char *in);
-void bs_expand_key(word_t (* rk)[BLOCK_SIZE], uint8_t * key);
-
-void bs_cipher(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]);
-void bs_cipher_rev(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE]);
-
-
-void dump_hex(uint8_t * h, int len);
-void dump_word(word_t * h, int len);
-void dump_block(word_t * h, int len);
-
-#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
-#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
-
-void aes_ecb_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key)
-{
- word_t input_space[BLOCK_SIZE];
- word_t rk[11][BLOCK_SIZE];
-
- memset(outputb,0,size);
- word_t * state = (word_t *)outputb;
-
- bs_expand_key(rk, key);
-
- while (size > 0)
- {
- if (size < BS_BLOCK_SIZE)
- {
- memset(input_space,0,BS_BLOCK_SIZE);
- memmove(input_space, inputb, size);
- bs_cipher(input_space,rk);
- memmove(outputb, input_space, size);
- size = 0;
- state += size;
- }
- else
- {
- memmove(state,inputb,BS_BLOCK_SIZE);
- bs_cipher(state,rk);
- size -= BS_BLOCK_SIZE;
- state += BS_BLOCK_SIZE;
- }
-
- }
-}
-
-void aes_ecb_decrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key)
-{
- word_t input_space[BLOCK_SIZE];
- word_t rk[11][BLOCK_SIZE];
-
- memset(outputb,0,size);
- word_t * state = (word_t *)outputb;
-
- bs_expand_key(rk, key);
-
- while (size > 0)
- {
- if (size < BS_BLOCK_SIZE)
- {
- memset(input_space,0,BS_BLOCK_SIZE);
- memmove(input_space, inputb, size);
- bs_cipher_rev(input_space,rk);
- memmove(outputb, input_space, size);
- size = 0;
- state += size;
- }
- else
- {
- memmove(state,inputb,BS_BLOCK_SIZE);
- bs_cipher_rev(state,rk);
- size -= BS_BLOCK_SIZE;
- state += BS_BLOCK_SIZE;
- }
-
- }
-}
-
-static void INC_CTR(uint8_t * ctr, uint8_t i)
-{
- ctr += BLOCK_SIZE/8 - 1;
- uint8_t n = *(ctr);
- *ctr += i;
- while(*ctr < n)
- {
- ctr--;
- n = *ctr;
- (*ctr)++;
- }
-}
-
-void aes_ctr_encrypt(uint8_t * outputb, uint8_t * inputb, size_t size, uint8_t * key, uint8_t * iv)
-{
- word_t rk[11][BLOCK_SIZE];
- word_t ctr[BLOCK_SIZE];
- uint8_t iv_copy[BLOCK_SIZE/8];
-
- memset(outputb,0,size);
- memset(ctr,0,sizeof(ctr));
- memmove(iv_copy,iv,BLOCK_SIZE/8);
-
- word_t * state = (word_t *)outputb;
- bs_expand_key(rk, key);
-
- do
- {
- int chunk = MIN(size, BS_BLOCK_SIZE);
- int blocks = chunk / (BLOCK_SIZE/8);
- if (chunk % (BLOCK_SIZE/8))
- {
- blocks++;
- }
-
- int i;
- for (i = 0; i < blocks; i++)
- {
- memmove(ctr + (i * WORDS_PER_BLOCK), iv_copy, BLOCK_SIZE/8);
- INC_CTR(iv_copy,1);
- }
-
- bs_cipher(ctr, rk);
- size -= chunk;
-
- uint8_t * ctr_p = (uint8_t *) ctr;
- while(chunk--)
- {
- *outputb++ = *ctr_p++ ^ *inputb++;
- }
-
- }
- while(size);
-
-}
-
-void dump_hex(uint8_t * h, int len)
-{
- while(len--)
- printf("%02hhx",*h++);
- printf("\n");
-}
-
-void dump_word(word_t * h, int len)
-{
- while(len--)
- if ((len+1) % 8) printf("%" WPAD WFMT "\n",*h++);
- else printf("%d:\n%" WPAD WFMT "\n",128-len-1,*h++);
-
- printf("\n");
-}
-
-void dump_block(word_t * h, int len)
-{
- while(len-=2 >= 0)
- printf("%" WPAD WFMT"%" WPAD WFMT "\n",*h++,*h++);
- printf("\n");
-}
-
-static const uint8_t sbox[256] = {
- //0 1 2 3 4 5 6 7 8 9 A B C D E F
- 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
- 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
- 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
- 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
- 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
- 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
- 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
- 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
- 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
- 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
- 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
- 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
- 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
- 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
- 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
- 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
-
-static void rotate(unsigned char *in) {
- unsigned char a,c;
- a = in[0];
- for(c=0;c<3;c++)
- in[c] = in[c + 1];
- in[3] = a;
- return;
-}
-
-/* Calculate the rcon used in key expansion */
-static unsigned char rcon(unsigned char in) {
- unsigned char c=1;
- if(in == 0)
- return 0;
- while(in != 1) {
- unsigned char b;
- b = c & 0x80;
- c <<= 1;
- if(b == 0x80) {
- c ^= 0x1b;
- }
- in--;
- }
- return c;
-}
-
-/* This is the core key expansion, which, given a 4-byte value,
- * does some scrambling */
-static void schedule_core(unsigned char *in, unsigned char i) {
- char a;
- /* Rotate the input 8 bits to the left */
- rotate(in);
- /* Apply Rijndael's s-box on all 4 bytes */
- for(a = 0; a < 4; a++)
- in[a] = sbox[in[a]];
- /* On just the first byte, add 2^i to the byte */
- in[0] ^= rcon(i);
-}
-
-void expand_key(unsigned char *in) {
- unsigned char t[4];
- /* c is 16 because the first sub-key is the user-supplied key */
- unsigned char c = 16;
- unsigned char i = 1;
- unsigned char a;
-
- /* We need 11 sets of sixteen bytes each for 128-bit mode */
- while(c < 176) {
- /* Copy the temporary variable over from the last 4-byte
- * block */
- for(a = 0; a < 4; a++)
- t[a] = in[a + c - 4];
- /* Every four blocks (of four bytes),
- * do a complex calculation */
- if(c % 16 == 0) {
- schedule_core(t,i);
- i++;
- }
- for(a = 0; a < 4; a++) {
- in[c] = in[c - 16] ^ t[a];
- c++;
- }
- }
-}
-
-#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\
- defined(__amd64__) || defined(__amd32__)|| defined(__amd16__)
-#define bs2le(x) (x)
-#define bs2be(x) (x)
-#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ||\
- (defined(__sparc__))
-#define bs2le(x) __builtin_bswap_wordsize(x)
-#define bs2be(x) __builtin_bswap_wordsize(x)
-#else
-#error "endianness not supported"
-#endif
-
-
-void bs_addroundkey(word_t * B, word_t * rk)
-{
- int i;
- for (i = 0; i < BLOCK_SIZE; i++)
- B[i] ^= rk[i];
-}
-
-void bs_apply_sbox(word_t * input)
-{
- int i;
- for(i=0; i < BLOCK_SIZE; i+=8)
- {
- bs_sbox(input+i);
- }
-}
-
-void bs_apply_sbox_rev(word_t * input)
-{
- int i;
- for(i=0; i < BLOCK_SIZE; i+=8)
- {
- bs_sbox_rev(input+i);
- }
-}
-
-/*July 2011*/
-/*Straight-line program for AES s box*/
-
-/*Input is U[0], U[1],...,U[7]*/
-/*Output is S[0], S[1],...,S[7]*/
-// http://cs-www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
-void bs_sbox_rev(word_t U[8])
-{
- word_t W[8];
- word_t
- T1,T2,T3,T4,T5,T6,T8,
- T9,T10,T13,T14,T15,T16,
- T17,T18,T19,T20,T22,T23,T24,
- T25, T26, T27;
-
- word_t
- M1,M2,M3,M4,M5,M6,M7,M8,
- M9,M10,M11,M12,M13,M14,M15,
- M16,M17,M18,M19,M20,M21,M22,
- M23,M24,M25,M26,M27,M28,M29,
- M30,M31,M32,M33,M34,M35,M36,
- M37,M38,M39,M40,M41,M42,M43,
- M44,M45,M46,M47,M48,M49,M50,
- M51,M52,M53,M54,M55,M56,M57,
- M58,M59,M60,M61,M62,M63;
-
- word_t
- P0,P1,P2,P3,P4,P5,P6,P7,P8,
- P9,P10,P11,P12,P13,P14,
- P15,P16,P17,P18,P19,P20,
- P21,P22,P23,P24,P25,P26,
- P27,P28,P29;
-
- word_t Y5,
- R5, R13, R17, R18, R19;
-
-
- T23 = U[7] ^ U[4];
- T22 = ~(U[6] ^ U[4]);
- T2 = ~(U[7] ^ U[6]);
- T1 = U[4] ^ U[3];
- T24 = ~(U[3] ^ U[0]);
- R5 = U[1] ^ U[0];
- T8 = ~(U[6] ^ T23);
- T19 = T22 ^ R5;
- T9 = ~(U[0] ^ T1);
- T10 = T2 ^ T24;
- T13 = T2 ^ R5;
- T3 = T1 ^ R5;
- T25 = ~(U[5] ^ T1);
- R13 = U[6] ^ U[1];
- T17 = ~(U[5] ^ T19);
- T20 = T24 ^ R13;
- T4 = U[3] ^ T8;
- R17 = ~(U[5] ^ U[2]);
- R18 = ~(U[2] ^ U[1]);
- R19 = ~(U[5] ^ U[3]);
- Y5 = U[7] ^ R17;
- T6 = T22 ^ R17;
- T16 = R13 ^ R19;
- T27 = T1 ^ R18;
- T15 = T10 ^ T27;
- T14 = T10 ^ R18;
- T26 = T3 ^ T16;
- M1 = T13 & T6;
- M2 = T23 & T8;
- M3 = T14 ^ M1;
- M4 = T19 & Y5;
- M5 = M4 ^ M1;
- M6 = T3 & T16;
- M7 = T22 & T9;
- M8 = T26 ^ M6;
- M9 = T20 & T17;
- M10 = M9 ^ M6;
- M11 = T1 & T15;
- M12 = T4 & T27;
- M13 = M12 ^ M11;
- M14 = T2 & T10;
- M15 = M14 ^ M11;
- M16 = M3 ^ M2;
- M17 = M5 ^ T24;
- M18 = M8 ^ M7;
- M19 = M10 ^ M15;
- M20 = M16 ^ M13;
- M21 = M17 ^ M15;
- M22 = M18 ^ M13;
- M23 = M19 ^ T25;
- M24 = M22 ^ M23;
- M25 = M22 & M20;
- M26 = M21 ^ M25;
- M27 = M20 ^ M21;
- M28 = M23 ^ M25;
- M29 = M28 & M27;
- M30 = M26 & M24;
- M31 = M20 & M23;
- M32 = M27 & M31;
- M33 = M27 ^ M25;
- M34 = M21 & M22;
- M35 = M24 & M34;
- M36 = M24 ^ M25;
- M37 = M21 ^ M29;
- M38 = M32 ^ M33;
- M39 = M23 ^ M30;
- M40 = M35 ^ M36;
- M41 = M38 ^ M40;
- M42 = M37 ^ M39;
- M43 = M37 ^ M38;
- M44 = M39 ^ M40;
- M45 = M42 ^ M41;
- M46 = M44 & T6;
- M47 = M40 & T8;
- M48 = M39 & Y5;
- M49 = M43 & T16;
- M50 = M38 & T9;
- M51 = M37 & T17;
- M52 = M42 & T15;
- M53 = M45 & T27;
- M54 = M41 & T10;
- M55 = M44 & T13;
- M56 = M40 & T23;
- M57 = M39 & T19;
- M58 = M43 & T3;
- M59 = M38 & T22;
- M60 = M37 & T20;
- M61 = M42 & T1;
- M62 = M45 & T4;
- M63 = M41 & T2;
- P0 = M52 ^ M61;
- P1 = M58 ^ M59;
- P2 = M54 ^ M62;
- P3 = M47 ^ M50;
- P4 = M48 ^ M56;
- P5 = M46 ^ M51;
- P6 = M49 ^ M60;
- P7 = P0 ^ P1;
- P8 = M50 ^ M53;
- P9 = M55 ^ M63;
- P10 = M57 ^ P4;
- P11 = P0 ^ P3;
- P12 = M46 ^ M48;
- P13 = M49 ^ M51;
- P14 = M49 ^ M62;
- P15 = M54 ^ M59;
- P16 = M57 ^ M61;
- P17 = M58 ^ P2;
- P18 = M63 ^ P5;
- P19 = P2 ^ P3;
- P20 = P4 ^ P6;
- P22 = P2 ^ P7;
- P23 = P7 ^ P8;
- P24 = P5 ^ P7;
- P25 = P6 ^ P10;
- P26 = P9 ^ P11;
- P27 = P10 ^ P18;
- P28 = P11 ^ P25;
- P29 = P15 ^ P20;
- W[7] = P13 ^ P22;
- W[6] = P26 ^ P29;
- W[5] = P17 ^ P28;
- W[4] = P12 ^ P22;
- W[3] = P23 ^ P27;
- W[2] = P19 ^ P24;
- W[1] = P14 ^ P23;
- W[0] = P9 ^ P16;
-
- memmove(U,W,sizeof(W));
-}
-
-void bs_sbox(word_t U[8])
-{
- word_t S[8];
- word_t
- T1,T2,T3,T4,T5,T6,T7,T8,
- T9,T10,T11,T12,T13,T14,T15,T16,
- T17,T18,T19,T20,T21,T22,T23,T24,
- T25, T26, T27;
-
- word_t
- M1,M2,M3,M4,M5,M6,M7,M8,
- M9,M10,M11,M12,M13,M14,M15,
- M16,M17,M18,M19,M20,M21,M22,
- M23,M24,M25,M26,M27,M28,M29,
- M30,M31,M32,M33,M34,M35,M36,
- M37,M38,M39,M40,M41,M42,M43,
- M44,M45,M46,M47,M48,M49,M50,
- M51,M52,M53,M54,M55,M56,M57,
- M58,M59,M60,M61,M62,M63;
-
- word_t
- L0,L1,L2,L3,L4,L5,L6,L7,L8,
- L9,L10,L11,L12,L13,L14,
- L15,L16,L17,L18,L19,L20,
- L21,L22,L23,L24,L25,L26,
- L27,L28,L29;
-
- T1 = U[7] ^ U[4];
- T2 = U[7] ^ U[2];
- T3 = U[7] ^ U[1];
- T4 = U[4] ^ U[2];
- T5 = U[3] ^ U[1];
- T6 = T1 ^ T5;
- T7 = U[6] ^ U[5];
- T8 = U[0] ^ T6;
- T9 = U[0] ^ T7;
- T10 = T6 ^ T7;
- T11 = U[6] ^ U[2];
- T12 = U[5] ^ U[2];
- T13 = T3 ^ T4;
- T14 = T6 ^ T11;
- T15 = T5 ^ T11;
- T16 = T5 ^ T12;
- T17 = T9 ^ T16;
- T18 = U[4] ^ U[0];
- T19 = T7 ^ T18;
- T20 = T1 ^ T19;
- T21 = U[1] ^ U[0];
- T22 = T7 ^ T21;
- T23 = T2 ^ T22;
- T24 = T2 ^ T10;
- T25 = T20 ^ T17;
- T26 = T3 ^ T16;
- T27 = T1 ^ T12;
- M1 = T13 & T6;
- M2 = T23 & T8;
- M3 = T14 ^ M1;
- M4 = T19 & U[0];
- M5 = M4 ^ M1;
- M6 = T3 & T16;
- M7 = T22 & T9;
- M8 = T26 ^ M6;
- M9 = T20 & T17;
- M10 = M9 ^ M6;
- M11 = T1 & T15;
- M12 = T4 & T27;
- M13 = M12 ^ M11;
- M14 = T2 & T10;
- M15 = M14 ^ M11;
- M16 = M3 ^ M2;
- M17 = M5 ^ T24;
- M18 = M8 ^ M7;
- M19 = M10 ^ M15;
- M20 = M16 ^ M13;
- M21 = M17 ^ M15;
- M22 = M18 ^ M13;
- M23 = M19 ^ T25;
- M24 = M22 ^ M23;
- M25 = M22 & M20;
- M26 = M21 ^ M25;
- M27 = M20 ^ M21;
- M28 = M23 ^ M25;
- M29 = M28 & M27;
- M30 = M26 & M24;
- M31 = M20 & M23;
- M32 = M27 & M31;
- M33 = M27 ^ M25;
- M34 = M21 & M22;
- M35 = M24 & M34;
- M36 = M24 ^ M25;
- M37 = M21 ^ M29;
- M38 = M32 ^ M33;
- M39 = M23 ^ M30;
- M40 = M35 ^ M36;
- M41 = M38 ^ M40;
- M42 = M37 ^ M39;
- M43 = M37 ^ M38;
- M44 = M39 ^ M40;
- M45 = M42 ^ M41;
- M46 = M44 & T6;
- M47 = M40 & T8;
- M48 = M39 & U[0];
- M49 = M43 & T16;
- M50 = M38 & T9;
- M51 = M37 & T17;
- M52 = M42 & T15;
- M53 = M45 & T27;
- M54 = M41 & T10;
- M55 = M44 & T13;
- M56 = M40 & T23;
- M57 = M39 & T19;
- M58 = M43 & T3;
- M59 = M38 & T22;
- M60 = M37 & T20;
- M61 = M42 & T1;
- M62 = M45 & T4;
- M63 = M41 & T2;
- L0 = M61 ^ M62;
- L1 = M50 ^ M56;
- L2 = M46 ^ M48;
- L3 = M47 ^ M55;
- L4 = M54 ^ M58;
- L5 = M49 ^ M61;
- L6 = M62 ^ L5;
- L7 = M46 ^ L3;
- L8 = M51 ^ M59;
- L9 = M52 ^ M53;
- L10 = M53 ^ L4;
- L11 = M60 ^ L2;
- L12 = M48 ^ M51;
- L13 = M50 ^ L0;
- L14 = M52 ^ M61;
- L15 = M55 ^ L1;
- L16 = M56 ^ L0;
- L17 = M57 ^ L1;
- L18 = M58 ^ L8;
- L19 = M63 ^ L4;
- L20 = L0 ^ L1;
- L21 = L1 ^ L7;
- L22 = L3 ^ L12;
- L23 = L18 ^ L2;
- L24 = L15 ^ L9;
- L25 = L6 ^ L10;
- L26 = L7 ^ L9;
- L27 = L8 ^ L10;
- L28 = L11 ^ L14;
- L29 = L11 ^ L17;
- S[7] = L6 ^ L24;
- S[6] = ~(L16 ^ L26);
- S[5] = ~(L19 ^ L28);
- S[4] = L6 ^ L21;
- S[3] = L20 ^ L22;
- S[2] = L25 ^ L29;
- S[1] = ~(L13 ^ L27);
- S[0] = ~(L6 ^ L23);
-
- memmove(U,S,sizeof(S));
-}
-
-void bs_transpose(word_t * blocks)
-{
- word_t transpose[BLOCK_SIZE];
- memset(transpose, 0, sizeof(transpose));
- bs_transpose_dst(transpose,blocks);
- memmove(blocks,transpose,sizeof(transpose));
-}
-
-void bs_transpose_dst(word_t * transpose, word_t * blocks)
-{
- int i,k;
- word_t w;
- for(k=0; k < WORD_SIZE; k++)
- {
- int bitpos = ONE << k;
- for (i=0; i < WORDS_PER_BLOCK; i++)
- {
- w = bs2le(blocks[k * WORDS_PER_BLOCK + i]);
- int offset = i << MUL_SHIFT;
-
-#ifndef UNROLL_TRANSPOSE
- int j;
- for(j=0; j < WORD_SIZE; j++)
- {
- // TODO make const time
- transpose[offset + j] |= (w & (ONE << j)) ? bitpos : 0;
- }
-#else
-
- transpose[(offset)+ 0 ] |= (w & (ONE << 0 )) ? (bitpos) : 0;
- transpose[(offset)+ 1 ] |= (w & (ONE << 1 )) ? (bitpos) : 0;
- transpose[(offset)+ 2 ] |= (w & (ONE << 2 )) ? (bitpos) : 0;
- transpose[(offset)+ 3 ] |= (w & (ONE << 3 )) ? (bitpos) : 0;
- transpose[(offset)+ 4 ] |= (w & (ONE << 4 )) ? (bitpos) : 0;
- transpose[(offset)+ 5 ] |= (w & (ONE << 5 )) ? (bitpos) : 0;
- transpose[(offset)+ 6 ] |= (w & (ONE << 6 )) ? (bitpos) : 0;
- transpose[(offset)+ 7 ] |= (w & (ONE << 7 )) ? (bitpos) : 0;
-#if WORD_SIZE > 8
- transpose[(offset)+ 8 ] |= (w & (ONE << 8 )) ? (bitpos) : 0;
- transpose[(offset)+ 9 ] |= (w & (ONE << 9 )) ? (bitpos) : 0;
- transpose[(offset)+ 10] |= (w & (ONE << 10)) ? (bitpos) : 0;
- transpose[(offset)+ 11] |= (w & (ONE << 11)) ? (bitpos) : 0;
- transpose[(offset)+ 12] |= (w & (ONE << 12)) ? (bitpos) : 0;
- transpose[(offset)+ 13] |= (w & (ONE << 13)) ? (bitpos) : 0;
- transpose[(offset)+ 14] |= (w & (ONE << 14)) ? (bitpos) : 0;
- transpose[(offset)+ 15] |= (w & (ONE << 15)) ? (bitpos) : 0;
-#endif
-#if WORD_SIZE > 16
- transpose[(offset)+ 16] |= (w & (ONE << 16)) ? (bitpos) : 0;
- transpose[(offset)+ 17] |= (w & (ONE << 17)) ? (bitpos) : 0;
- transpose[(offset)+ 18] |= (w & (ONE << 18)) ? (bitpos) : 0;
- transpose[(offset)+ 19] |= (w & (ONE << 19)) ? (bitpos) : 0;
- transpose[(offset)+ 20] |= (w & (ONE << 20)) ? (bitpos) : 0;
- transpose[(offset)+ 21] |= (w & (ONE << 21)) ? (bitpos) : 0;
- transpose[(offset)+ 22] |= (w & (ONE << 22)) ? (bitpos) : 0;
- transpose[(offset)+ 23] |= (w & (ONE << 23)) ? (bitpos) : 0;
- transpose[(offset)+ 24] |= (w & (ONE << 24)) ? (bitpos) : 0;
- transpose[(offset)+ 25] |= (w & (ONE << 25)) ? (bitpos) : 0;
- transpose[(offset)+ 26] |= (w & (ONE << 26)) ? (bitpos) : 0;
- transpose[(offset)+ 27] |= (w & (ONE << 27)) ? (bitpos) : 0;
- transpose[(offset)+ 28] |= (w & (ONE << 28)) ? (bitpos) : 0;
- transpose[(offset)+ 29] |= (w & (ONE << 29)) ? (bitpos) : 0;
- transpose[(offset)+ 30] |= (w & (ONE << 30)) ? (bitpos) : 0;
- transpose[(offset)+ 31] |= (w & (ONE << 31)) ? (bitpos) : 0;
-#endif
-#if WORD_SIZE > 32
- transpose[(offset)+ 32] |= (w & (ONE << 32)) ? (bitpos) : 0;
- transpose[(offset)+ 33] |= (w & (ONE << 33)) ? (bitpos) : 0;
- transpose[(offset)+ 34] |= (w & (ONE << 34)) ? (bitpos) : 0;
- transpose[(offset)+ 35] |= (w & (ONE << 35)) ? (bitpos) : 0;
- transpose[(offset)+ 36] |= (w & (ONE << 36)) ? (bitpos) : 0;
- transpose[(offset)+ 37] |= (w & (ONE << 37)) ? (bitpos) : 0;
- transpose[(offset)+ 38] |= (w & (ONE << 38)) ? (bitpos) : 0;
- transpose[(offset)+ 39] |= (w & (ONE << 39)) ? (bitpos) : 0;
- transpose[(offset)+ 40] |= (w & (ONE << 40)) ? (bitpos) : 0;
- transpose[(offset)+ 41] |= (w & (ONE << 41)) ? (bitpos) : 0;
- transpose[(offset)+ 42] |= (w & (ONE << 42)) ? (bitpos) : 0;
- transpose[(offset)+ 43] |= (w & (ONE << 43)) ? (bitpos) : 0;
- transpose[(offset)+ 44] |= (w & (ONE << 44)) ? (bitpos) : 0;
- transpose[(offset)+ 45] |= (w & (ONE << 45)) ? (bitpos) : 0;
- transpose[(offset)+ 46] |= (w & (ONE << 46)) ? (bitpos) : 0;
- transpose[(offset)+ 47] |= (w & (ONE << 47)) ? (bitpos) : 0;
- transpose[(offset)+ 48] |= (w & (ONE << 48)) ? (bitpos) : 0;
- transpose[(offset)+ 49] |= (w & (ONE << 49)) ? (bitpos) : 0;
- transpose[(offset)+ 50] |= (w & (ONE << 50)) ? (bitpos) : 0;
- transpose[(offset)+ 51] |= (w & (ONE << 51)) ? (bitpos) : 0;
- transpose[(offset)+ 52] |= (w & (ONE << 52)) ? (bitpos) : 0;
- transpose[(offset)+ 53] |= (w & (ONE << 53)) ? (bitpos) : 0;
- transpose[(offset)+ 54] |= (w & (ONE << 54)) ? (bitpos) : 0;
- transpose[(offset)+ 55] |= (w & (ONE << 55)) ? (bitpos) : 0;
- transpose[(offset)+ 56] |= (w & (ONE << 56)) ? (bitpos) : 0;
- transpose[(offset)+ 57] |= (w & (ONE << 57)) ? (bitpos) : 0;
- transpose[(offset)+ 58] |= (w & (ONE << 58)) ? (bitpos) : 0;
- transpose[(offset)+ 59] |= (w & (ONE << 59)) ? (bitpos) : 0;
- transpose[(offset)+ 60] |= (w & (ONE << 60)) ? (bitpos) : 0;
- transpose[(offset)+ 61] |= (w & (ONE << 61)) ? (bitpos) : 0;
- transpose[(offset)+ 62] |= (w & (ONE << 62)) ? (bitpos) : 0;
- transpose[(offset)+ 63] |= (w & (ONE << 63)) ? (bitpos) : 0;
-#endif
-#endif
- // constant time:
- //transpose[(i<<MUL_SHIFT)+ j] |= (((int64_t)((w & (ONE << j)) << (WORD_SIZE-1-j)))>>(WORD_SIZE-1)) & (ONE<<k);
- }
- }
-}
-
-void bs_transpose_rev(word_t * blocks)
-{
- int i,k;
- word_t w;
- word_t transpose[BLOCK_SIZE];
- memset(transpose, 0, sizeof(transpose));
- for(k=0; k < BLOCK_SIZE; k++)
- {
- w = blocks[k];
- word_t bitpos = bs2be(ONE << (k % WORD_SIZE));
- word_t offset = k / WORD_SIZE;
-#ifndef UNROLL_TRANSPOSE
- int j;
- for(j=0; j < WORD_SIZE; j++)
- {
- word_t bit = (w & (ONE << j)) ? (ONE << (k % WORD_SIZE)) : 0;
- transpose[j * WORDS_PER_BLOCK + (offset)] |= bit;
- }
-#else
- transpose[0 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 0 )) ? bitpos : 0;
- transpose[1 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 1 )) ? bitpos : 0;
- transpose[2 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 2 )) ? bitpos : 0;
- transpose[3 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 3 )) ? bitpos : 0;
- transpose[4 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 4 )) ? bitpos : 0;
- transpose[5 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 5 )) ? bitpos : 0;
- transpose[6 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 6 )) ? bitpos : 0;
- transpose[7 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 7 )) ? bitpos : 0;
-#if WORD_SIZE > 8
- transpose[8 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 8 )) ? bitpos : 0;
- transpose[9 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 9 )) ? bitpos : 0;
- transpose[10 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 10)) ? bitpos : 0;
- transpose[11 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 11)) ? bitpos : 0;
- transpose[12 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 12)) ? bitpos : 0;
- transpose[13 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 13)) ? bitpos : 0;
- transpose[14 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 14)) ? bitpos : 0;
- transpose[15 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 15)) ? bitpos : 0;
-#endif
-#if WORD_SIZE > 16
- transpose[16 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 16)) ? bitpos : 0;
- transpose[17 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 17)) ? bitpos : 0;
- transpose[18 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 18)) ? bitpos : 0;
- transpose[19 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 19)) ? bitpos : 0;
- transpose[20 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 20)) ? bitpos : 0;
- transpose[21 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 21)) ? bitpos : 0;
- transpose[22 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 22)) ? bitpos : 0;
- transpose[23 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 23)) ? bitpos : 0;
- transpose[24 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 24)) ? bitpos : 0;
- transpose[25 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 25)) ? bitpos : 0;
- transpose[26 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 26)) ? bitpos : 0;
- transpose[27 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 27)) ? bitpos : 0;
- transpose[28 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 28)) ? bitpos : 0;
- transpose[29 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 29)) ? bitpos : 0;
- transpose[30 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 30)) ? bitpos : 0;
- transpose[31 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 31)) ? bitpos : 0;
-#endif
-#if WORD_SIZE > 32
- transpose[32 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 32)) ? bitpos : 0;
- transpose[33 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 33)) ? bitpos : 0;
- transpose[34 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 34)) ? bitpos : 0;
- transpose[35 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 35)) ? bitpos : 0;
- transpose[36 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 36)) ? bitpos : 0;
- transpose[37 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 37)) ? bitpos : 0;
- transpose[38 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 38)) ? bitpos : 0;
- transpose[39 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 39)) ? bitpos : 0;
- transpose[40 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 40)) ? bitpos : 0;
- transpose[41 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 41)) ? bitpos : 0;
- transpose[42 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 42)) ? bitpos : 0;
- transpose[43 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 43)) ? bitpos : 0;
- transpose[44 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 44)) ? bitpos : 0;
- transpose[45 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 45)) ? bitpos : 0;
- transpose[46 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 46)) ? bitpos : 0;
- transpose[47 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 47)) ? bitpos : 0;
- transpose[48 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 48)) ? bitpos : 0;
- transpose[49 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 49)) ? bitpos : 0;
- transpose[50 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 50)) ? bitpos : 0;
- transpose[51 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 51)) ? bitpos : 0;
- transpose[52 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 52)) ? bitpos : 0;
- transpose[53 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 53)) ? bitpos : 0;
- transpose[54 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 54)) ? bitpos : 0;
- transpose[55 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 55)) ? bitpos : 0;
- transpose[56 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 56)) ? bitpos : 0;
- transpose[57 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 57)) ? bitpos : 0;
- transpose[58 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 58)) ? bitpos : 0;
- transpose[59 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 59)) ? bitpos : 0;
- transpose[60 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 60)) ? bitpos : 0;
- transpose[61 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 61)) ? bitpos : 0;
- transpose[62 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 62)) ? bitpos : 0;
- transpose[63 * WORDS_PER_BLOCK + (offset )] |= (w & (ONE << 63)) ? bitpos : 0;
-#endif
-#endif
- }
- memmove(blocks,transpose,sizeof(transpose));
-}
-
-
-#define R0 0
-#define R1 8
-#define R2 16
-#define R3 24
-
-#define B0 0
-#define B1 32
-#define B2 64
-#define B3 96
-
-#define R0_shift (BLOCK_SIZE/4)*0
-#define R1_shift (BLOCK_SIZE/4)*1
-#define R2_shift (BLOCK_SIZE/4)*2
-#define R3_shift (BLOCK_SIZE/4)*3
-#define B_MOD (BLOCK_SIZE)
-
-
-void bs_shiftrows(word_t * B)
-{
- word_t Bp_space[BLOCK_SIZE];
- word_t * Bp = Bp_space;
- word_t * Br0 = B + 0;
- word_t * Br1 = B + 32;
- word_t * Br2 = B + 64;
- word_t * Br3 = B + 96;
- uint8_t offsetr0 = 0;
- uint8_t offsetr1 = 32;
- uint8_t offsetr2 = 64;
- uint8_t offsetr3 = 96;
-
-
- int i;
- for(i=0; i<4; i++)
- {
- Bp[B0 + 0] = Br0[0];
- Bp[B0 + 1] = Br0[1];
- Bp[B0 + 2] = Br0[2];
- Bp[B0 + 3] = Br0[3];
- Bp[B0 + 4] = Br0[4];
- Bp[B0 + 5] = Br0[5];
- Bp[B0 + 6] = Br0[6];
- Bp[B0 + 7] = Br0[7];
- Bp[B1 + 0] = Br1[0];
- Bp[B1 + 1] = Br1[1];
- Bp[B1 + 2] = Br1[2];
- Bp[B1 + 3] = Br1[3];
- Bp[B1 + 4] = Br1[4];
- Bp[B1 + 5] = Br1[5];
- Bp[B1 + 6] = Br1[6];
- Bp[B1 + 7] = Br1[7];
- Bp[B2 + 0] = Br2[0];
- Bp[B2 + 1] = Br2[1];
- Bp[B2 + 2] = Br2[2];
- Bp[B2 + 3] = Br2[3];
- Bp[B2 + 4] = Br2[4];
- Bp[B2 + 5] = Br2[5];
- Bp[B2 + 6] = Br2[6];
- Bp[B2 + 7] = Br2[7];
- Bp[B3 + 0] = Br3[0];
- Bp[B3 + 1] = Br3[1];
- Bp[B3 + 2] = Br3[2];
- Bp[B3 + 3] = Br3[3];
- Bp[B3 + 4] = Br3[4];
- Bp[B3 + 5] = Br3[5];
- Bp[B3 + 6] = Br3[6];
- Bp[B3 + 7] = Br3[7];
-
- offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
-
- Br0 = B + offsetr0;
- Br1 = B + offsetr1;
- Br2 = B + offsetr2;
- Br3 = B + offsetr3;
-
- Bp += 8;
- }
- memmove(B,Bp_space,sizeof(Bp_space));
-}
-
-
-void bs_shiftrows_rev(word_t * B)
-{
- word_t Bp_space[BLOCK_SIZE];
- word_t * Bp = Bp_space;
- word_t * Br0 = Bp + 0;
- word_t * Br1 = Bp + 32;
- word_t * Br2 = Bp + 64;
- word_t * Br3 = Bp + 96;
- uint8_t offsetr0 = 0;
- uint8_t offsetr1 = 32;
- uint8_t offsetr2 = 64;
- uint8_t offsetr3 = 96;
-
-
- int i;
- for(i=0; i<4; i++)
- {
- Br0[0] = B[B0 + 0];
- Br0[1] = B[B0 + 1];
- Br0[2] = B[B0 + 2];
- Br0[3] = B[B0 + 3];
- Br0[4] = B[B0 + 4];
- Br0[5] = B[B0 + 5];
- Br0[6] = B[B0 + 6];
- Br0[7] = B[B0 + 7];
- Br1[0] = B[B1 + 0];
- Br1[1] = B[B1 + 1];
- Br1[2] = B[B1 + 2];
- Br1[3] = B[B1 + 3];
- Br1[4] = B[B1 + 4];
- Br1[5] = B[B1 + 5];
- Br1[6] = B[B1 + 6];
- Br1[7] = B[B1 + 7];
- Br2[0] = B[B2 + 0];
- Br2[1] = B[B2 + 1];
- Br2[2] = B[B2 + 2];
- Br2[3] = B[B2 + 3];
- Br2[4] = B[B2 + 4];
- Br2[5] = B[B2 + 5];
- Br2[6] = B[B2 + 6];
- Br2[7] = B[B2 + 7];
- Br3[0] = B[B3 + 0];
- Br3[1] = B[B3 + 1];
- Br3[2] = B[B3 + 2];
- Br3[3] = B[B3 + 3];
- Br3[4] = B[B3 + 4];
- Br3[5] = B[B3 + 5];
- Br3[6] = B[B3 + 6];
- Br3[7] = B[B3 + 7];
-
- offsetr0 = (offsetr0 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr1 = (offsetr1 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr2 = (offsetr2 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
- offsetr3 = (offsetr3 + BLOCK_SIZE/16 + BLOCK_SIZE/4) & 0x7f;
-
- Br0 = Bp + offsetr0;
- Br1 = Bp + offsetr1;
- Br2 = Bp + offsetr2;
- Br3 = Bp + offsetr3;
-
- B += 8;
- }
- memmove(B - 8 * 4,Bp_space,sizeof(Bp_space));
-}
-
-
-#define A0 0
-#define A1 8
-#define A2 16
-#define A3 24
-
-// Does shift rows and mix columns in same step
-void bs_shiftmix(word_t * B)
-{
- word_t Bp_space[BLOCK_SIZE];
- word_t * Bp = Bp_space;
-
- word_t * Br0 = B + 0;
- word_t * Br1 = B + 32;
- word_t * Br2 = B + 64;
- word_t * Br3 = B + 96;
-
- uint8_t offsetr0 = 0;
- uint8_t offsetr1 = 32;
- uint8_t offsetr2 = 64;
- uint8_t offsetr3 = 96;
-
- Br0 = B + offsetr0;
- Br1 = B + offsetr1;
- Br2 = B + offsetr2;
- Br3 = B + offsetr3;
-
-
- int i;
- for (i = 0; i < 4; i++)
- {
- // B0
- // 2*A0 2*A1 A1 A2 A3
- word_t of =Br0[R0+7]^ Br1[R1+7];
- Bp[A0+0] = Br1[R1+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of;
- Bp[A0+1] = Br0[R0+0] ^ Br1[R1+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+1] ^ of;
- Bp[A0+2] = Br0[R0+1] ^ Br1[R1+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+2];
- Bp[A0+3] = Br0[R0+2] ^ Br1[R1+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+3] ^ of;
- Bp[A0+4] = Br0[R0+3] ^ Br1[R1+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+4] ^ of;
- Bp[A0+5] = Br0[R0+4] ^ Br1[R1+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+5];
- Bp[A0+6] = Br0[R0+5] ^ Br1[R1+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+6];
- Bp[A0+7] = Br0[R0+6] ^ Br1[R1+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+7];
-
- // A0 2*A1 2*A2 A2 A3
- of = Br1[R1+7] ^ Br2[R2+7];
- Bp[A1+0] = Br0[R0+0] ^ Br2[R2+0] ^ Br3[R3+0] ^ of;
- Bp[A1+1] = Br0[R0+1] ^ Br1[R1+0] ^ Br2[R2+0] ^ Br2[R2+1] ^ Br3[R3+1] ^ of;
- Bp[A1+2] = Br0[R0+2] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br2[R2+2] ^ Br3[R3+2];
- Bp[A1+3] = Br0[R0+3] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br2[R2+3] ^ Br3[R3+3] ^ of;
- Bp[A1+4] = Br0[R0+4] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br2[R2+4] ^ Br3[R3+4] ^ of;
- Bp[A1+5] = Br0[R0+5] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br2[R2+5] ^ Br3[R3+5];
- Bp[A1+6] = Br0[R0+6] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br2[R2+6] ^ Br3[R3+6];
- Bp[A1+7] = Br0[R0+7] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br2[R2+7] ^ Br3[R3+7];
-
- // A0 A1 2*A2 2*A3 A3
- of = Br2[R2+7] ^ Br3[R3+7];
- Bp[A2+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br3[R3+0] ^ of;
- Bp[A2+1] = Br0[R0+1] ^ Br1[R1+1] ^ Br2[R2+0] ^ Br3[R3+0] ^ Br3[R3+1] ^ of;
- Bp[A2+2] = Br0[R0+2] ^ Br1[R1+2] ^ Br2[R2+1] ^ Br3[R3+1] ^ Br3[R3+2];
- Bp[A2+3] = Br0[R0+3] ^ Br1[R1+3] ^ Br2[R2+2] ^ Br3[R3+2] ^ Br3[R3+3] ^ of;
- Bp[A2+4] = Br0[R0+4] ^ Br1[R1+4] ^ Br2[R2+3] ^ Br3[R3+3] ^ Br3[R3+4] ^ of;
- Bp[A2+5] = Br0[R0+5] ^ Br1[R1+5] ^ Br2[R2+4] ^ Br3[R3+4] ^ Br3[R3+5];
- Bp[A2+6] = Br0[R0+6] ^ Br1[R1+6] ^ Br2[R2+5] ^ Br3[R3+5] ^ Br3[R3+6];
- Bp[A2+7] = Br0[R0+7] ^ Br1[R1+7] ^ Br2[R2+6] ^ Br3[R3+6] ^ Br3[R3+7];
-
- // A0 2*A0 A1 A2 2*A3
- of = Br0[R0+7] ^ Br3[R3+7];
- Bp[A3+0] = Br0[R0+0] ^ Br1[R1+0] ^ Br2[R2+0] ^ of;
- Bp[A3+1] = Br0[R0+1] ^ Br0[R0+0] ^ Br1[R1+1] ^ Br2[R2+1] ^ Br3[R3+0] ^ of;
- Bp[A3+2] = Br0[R0+2] ^ Br0[R0+1] ^ Br1[R1+2] ^ Br2[R2+2] ^ Br3[R3+1];
- Bp[A3+3] = Br0[R0+3] ^ Br0[R0+2] ^ Br1[R1+3] ^ Br2[R2+3] ^ Br3[R3+2] ^ of;
- Bp[A3+4] = Br0[R0+4] ^ Br0[R0+3] ^ Br1[R1+4] ^ Br2[R2+4] ^ Br3[R3+3] ^ of;
- Bp[A3+5] = Br0[R0+5] ^ Br0[R0+4] ^ Br1[R1+5] ^ Br2[R2+5] ^ Br3[R3+4];
- Bp[A3+6] = Br0[R0+6] ^ Br0[R0+5] ^ Br1[R1+6] ^ Br2[R2+6] ^ Br3[R3+5];
- Bp[A3+7] = Br0[R0+7] ^ Br0[R0+6] ^ Br1[R1+7] ^ Br2[R2+7] ^ Br3[R3+6];
-
- Bp += BLOCK_SIZE/4;
-
- offsetr0 = (offsetr0 + BLOCK_SIZE/4) & 0x7f;
- offsetr1 = (offsetr1 + BLOCK_SIZE/4) & 0x7f;
- offsetr2 = (offsetr2 + BLOCK_SIZE/4) & 0x7f;
- offsetr3 = (offsetr3 + BLOCK_SIZE/4) & 0x7f;
-
- Br0 = B + offsetr0;
- Br1 = B + offsetr1;
- Br2 = B + offsetr2;
- Br3 = B + offsetr3;
- }
-
- memmove(B,Bp_space,sizeof(Bp_space));
-}
-
-
-
-void bs_mixcolumns(word_t * B)
-{
- word_t Bp_space[BLOCK_SIZE];
- word_t * Bp = Bp_space;
- // to understand this, see
- // https://en.wikipedia.org/wiki/Rijndael_mix_columns
-
- int i = 0;
- for (; i < 4; i++)
- {
- // of = A0 ^ A1;
- // A0 = A0 ^ (0x1b & ((signed char)of>>7));
-
- //// 2 * A0
- // A0 = A0 ^ (A0 << 1)
-
- //// + 3 * A1
- // A0 = A0 ^ (A1)
- // A0 = A0 ^ (A1<<1)
-
- //// + A2 + A3
- // A0 = A0 ^ (A2)
- // A0 = A0 ^ (A3)
- // A0.7 A1.7
- word_t of = B[A0+7] ^ B[A1+7];
-
- // 2*A0 2*A1 A1 A2 A3
- Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of;
- Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of;
- Bp[A0+2] = B[A0+1] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+2];
- Bp[A0+3] = B[A0+2] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+3] ^ of;
- Bp[A0+4] = B[A0+3] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+4] ^ of;
- Bp[A0+5] = B[A0+4] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+5];
- Bp[A0+6] = B[A0+5] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+6];
- Bp[A0+7] = B[A0+6] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+7];
-
-
-
- // of = A1 ^ A2
- // A1 = A1 ^ (0x1b & ((signed char)of>>7));
-
- //// A0
- // A1 = A1 ^ (A0)
-
- //// + 2 * A1
- // A1 = A1 ^ (A1 << 1)
-
- //// + 3 * A2
- // A1 = A1 ^ (A2)
- // A1 = A1 ^ (A2<<1)
-
- //// + A3
- // A1 = A1 ^ (A3)
-
- of = B[A1+7] ^ B[A2+7];
-
- // A0 2*A1 2*A2 A2 A3
- Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of;
- Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+0] ^ B[A2+1] ^ B[A3+1] ^ of;
- Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A2+1] ^ B[A2+2] ^ B[A3+2];
- Bp[A1+3] = B[A0+3] ^ B[A1+2] ^ B[A2+2] ^ B[A2+3] ^ B[A3+3] ^ of;
- Bp[A1+4] = B[A0+4] ^ B[A1+3] ^ B[A2+3] ^ B[A2+4] ^ B[A3+4] ^ of;
- Bp[A1+5] = B[A0+5] ^ B[A1+4] ^ B[A2+4] ^ B[A2+5] ^ B[A3+5];
- Bp[A1+6] = B[A0+6] ^ B[A1+5] ^ B[A2+5] ^ B[A2+6] ^ B[A3+6];
- Bp[A1+7] = B[A0+7] ^ B[A1+6] ^ B[A2+6] ^ B[A2+7] ^ B[A3+7];
-
-
- // of = A2 ^ A3
- // A2 = A2 ^ (0x1b & ((signed char)of>>7));
-
- //// A0 + A1
- // A2 = A2 ^ (A0)
- // A2 = A2 ^ (A1)
-
- //// + 2 * A2
- // A2 = A2 ^ (A2 << 1)
-
- //// + 3 * A3
- // A2 = A2 ^ (A3)
- // A2 = A2 ^ (A3<<1)
-
-
- of = B[A2+7] ^ B[A3+7];
-
- // A0 A1 2*A2 2*A3 A3
- Bp[A2+0] = B[A0+0] ^ B[A1+0] ^ B[A3+0] ^ of;
- Bp[A2+1] = B[A0+1] ^ B[A1+1] ^ B[A2+0] ^ B[A3+0] ^ B[A3+1] ^ of;
- Bp[A2+2] = B[A0+2] ^ B[A1+2] ^ B[A2+1] ^ B[A3+1] ^ B[A3+2];
- Bp[A2+3] = B[A0+3] ^ B[A1+3] ^ B[A2+2] ^ B[A3+2] ^ B[A3+3] ^ of;
- Bp[A2+4] = B[A0+4] ^ B[A1+4] ^ B[A2+3] ^ B[A3+3] ^ B[A3+4] ^ of;
- Bp[A2+5] = B[A0+5] ^ B[A1+5] ^ B[A2+4] ^ B[A3+4] ^ B[A3+5];
- Bp[A2+6] = B[A0+6] ^ B[A1+6] ^ B[A2+5] ^ B[A3+5] ^ B[A3+6];
- Bp[A2+7] = B[A0+7] ^ B[A1+7] ^ B[A2+6] ^ B[A3+6] ^ B[A3+7];
-
-
- // A3 = A0 ^ A3
- // A3 = A3 ^ (0x1b & ((signed char)of>>7));
-
- //// 3 * A0
- // A3 = A3 ^ (A0)
- // A3 = A3 ^ (A0 << 1)
-
- //// + A1 + A2
- // A3 = A3 ^ A1
- // A3 = A3 ^ A2
-
- //// + 2 * A3
- // A3 = A3 ^ (A3<<1)
-
- of = B[A0+7] ^ B[A3+7];
-
- // 2*A0 A0 A1 A2 2*A3
- Bp[A3+0] = B[A0+0] ^ B[A1+0] ^ B[A2+0] ^ of;
- Bp[A3+1] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+0] ^ of;
- Bp[A3+2] = B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A2+2] ^ B[A3+1];
- Bp[A3+3] = B[A0+3] ^ B[A0+2] ^ B[A1+3] ^ B[A2+3] ^ B[A3+2] ^ of;
- Bp[A3+4] = B[A0+4] ^ B[A0+3] ^ B[A1+4] ^ B[A2+4] ^ B[A3+3] ^ of;
- Bp[A3+5] = B[A0+5] ^ B[A0+4] ^ B[A1+5] ^ B[A2+5] ^ B[A3+4];
- Bp[A3+6] = B[A0+6] ^ B[A0+5] ^ B[A1+6] ^ B[A2+6] ^ B[A3+5];
- Bp[A3+7] = B[A0+7] ^ B[A0+6] ^ B[A1+7] ^ B[A2+7] ^ B[A3+6];
-
-
- //
- Bp += BLOCK_SIZE/4;
- B += BLOCK_SIZE/4;
- }
-
-
- memmove(B - BLOCK_SIZE,Bp - BLOCK_SIZE,sizeof(Bp_space));
-}
-
-void bs_mixcolumns_rev(word_t * B)
-{
- // to understand this, see
- // https://en.wikipedia.org/wiki/Rijndael_mix_columns
- // TODO combine with shiftrows for performance on decryption
- word_t Bp_space[BLOCK_SIZE];
- word_t * Bp = Bp_space;
-
-
- int i = 0;
- for (; i < BLOCK_SIZE / 4; i += BLOCK_SIZE / 16)
- {
-
- //// state[i][0] = A0*0x0e + A1*0x0b + A2*0x0d + A3*0x09
- // overflow:
- /* A0 * 0b1110 */ /* A1 * 0b1011 */ /* A2 * 0b1101 */ /* A3 * 0b1001 */
- word_t of0 = ( (B[A0+7] ^ B[A0+6] ^ B[A0+5]) ^ (B[A1 + 7] ^ B[A1+5]) ^ (B[A2+6] ^ B[A2+5]) ^ ( B[A3+5] )); // 2 bit
- word_t of1 = ( (B[A0+7] ^ B[A0+6]) ^ ( B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6] )); // 3 bit
- word_t of2 = ( (B[A0+7]) ^ ( B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7] )); // 4 bit
-
- // inverse:
- // 1110 1011 1101 1001
- // A0 = A0 * 14 + A1 * 11 + A2 * 13 + A3 * 9
- // A0 = A0 * (2+4+8) + A1 * (1+2+8) + A2 * (1+4+8) + A3 * (1+8)
-
- // (2*A0 + 4*A0 + 8*A0 ) + (8*A1 + 2*A1 + A1 ) + ( A2 + 4*A2 + 8*A2 ) + ( A3 + 8*A3)
- Bp[A0+0] = B[A1+0] ^ B[A2+0] ^ B[A3+0] ^ of0;
- Bp[A0+1] = B[A0+0] ^ B[A1+0] ^ B[A1+1] ^ B[A2+1] ^ B[A3+1] ^ of0 ^ of1;
- Bp[A0+2] = B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ B[A1+2] ^ B[A2+2] ^ B[A2+0] ^ B[A3+2] ^ of1 ^ of2;
- Bp[A0+3] = B[A0+2] ^ B[A0+1] ^ B[A0+0] ^ B[A1+0] ^ B[A1+2] ^ B[A1+3] ^ B[A2+3] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+0] ^ of0 ^ of2;
- Bp[A0+4] = B[A0+3] ^ B[A0+2] ^ B[A0+1] ^ B[A1+1] ^ B[A1+3] ^ B[A1+4] ^ B[A2+4] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+1] ^ of0 ^ of1;
- Bp[A0+5] = B[A0+4] ^ B[A0+3] ^ B[A0+2] ^ B[A1+2] ^ B[A1+4] ^ B[A1+5] ^ B[A2+5] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+2] ^ of1 ^ of2;
- Bp[A0+6] = B[A0+5] ^ B[A0+4] ^ B[A0+3] ^ B[A1+3] ^ B[A1+5] ^ B[A1+6] ^ B[A2+6] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+3] ^ of2;
- Bp[A0+7] = B[A0+6] ^ B[A0+5] ^ B[A0+4] ^ B[A1+4] ^ B[A1+6] ^ B[A1+7] ^ B[A2+7] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+4];
-
-
-
- //// state[i][1] = A0*0x09 + A1*0xe + A2*0x0b + A3*0x0d
- // overflow:
- /* A0 * 0b1001 */ /* A1 * 0b1110 */ /* A2 * 0b101 1 */ /* A3 * 0b1101 */
- of0 = ( (B[A0+5]) ^ (B[A1+7] ^ B[A1+6] ^ B[A1+5]) ^ (B[A2 + 7] ^ B[A2+5]) ^ (B[A3+6] ^ B[A3+5])); // 2 bit
- of1 = ( (B[A0+6]) ^ (B[A1+7] ^ B[A1+6]) ^ ( B[A2+6]) ^ (B[A3+7] ^ B[A3+6])); // 3 bit
- of2 = ( (B[A0+7]) ^ (B[A1+7]) ^ ( B[A2+7]) ^ ( B[A3+7])); // 4 bit
-
- // inverse:
- // 1001 1110 1011 1101
- // A1 = A0 * 9 + A1 * 14 + A2 * 11 + A3 * 13
- // A1 = A0 * (1+8) + A1 * (2+4+8) + A2 * (1+2+8) + A3 * (1+4+8)
-
- // (1*A0 + 8*A0 ) +(2*A1 + 4*A1 + 8*A1 ) + (1*A2 + 2*A2 + 8*A2 ) + (1*A3 + 4*A3 + 8*A3)
- Bp[A1+0] = B[A0+0] ^ B[A2+0] ^ B[A3+0] ^ of0;
- Bp[A1+1] = B[A0+1] ^ B[A1+0] ^ B[A2+1] ^ B[A2+0] ^ B[A3+1] ^ of0 ^ of1;
- Bp[A1+2] = B[A0+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A3+2] ^ B[A3+0] ^ of1 ^ of2;
- Bp[A1+3] = B[A0+3] ^ B[A0+0] ^ B[A1+2] ^ B[A1+1] ^ B[A1+0] ^ B[A2+3] ^ B[A2+2] ^ B[A2+0] ^ B[A3+3] ^ B[A3+1] ^ B[A3+0] ^ of0 ^ of2;
- Bp[A1+4] = B[A0+4] ^ B[A0+1] ^ B[A1+3] ^ B[A1+2] ^ B[A1+1] ^ B[A2+4] ^ B[A2+3] ^ B[A2+1] ^ B[A3+4] ^ B[A3+2] ^ B[A3+1] ^ of0 ^ of1;
- Bp[A1+5] = B[A0+5] ^ B[A0+2] ^ B[A1+4] ^ B[A1+3] ^ B[A1+2] ^ B[A2+5] ^ B[A2+4] ^ B[A2+2] ^ B[A3+5] ^ B[A3+3] ^ B[A3+2] ^ of1 ^ of2;
- Bp[A1+6] = B[A0+6] ^ B[A0+3] ^ B[A1+5] ^ B[A1+4] ^ B[A1+3] ^ B[A2+6] ^ B[A2+5] ^ B[A2+3] ^ B[A3+6] ^ B[A3+4] ^ B[A3+3] ^ of2;
- Bp[A1+7] = B[A0+7] ^ B[A0+4] ^ B[A1+6] ^ B[A1+5] ^ B[A1+4] ^ B[A2+7] ^ B[A2+6] ^ B[A2+4] ^ B[A3+7] ^ B[A3+5] ^ B[A3+4];
-
-
- //// state[i][2] = A0*0x0d + A1*0x09 + A2*0x0e + A3*0x0b
- // overflow:
- /* A1 * 0b1001 */ /* A2 * 0b1110 */ /* A3 * 0b1011 */ /* A0 * 0b1101 */
- of0 = ( (B[A1+5]) ^ (B[A2+7] ^ B[A2+6] ^ B[A2+5]) ^ (B[A3 + 7] ^ B[A3+5]) ^ (B[A0+6] ^ B[A0+5])); // 2 bit
- of1 = ( (B[A1+6]) ^ (B[A2+7] ^ B[A2+6]) ^ ( B[A3+6]) ^ (B[A0+7] ^ B[A0+6])); // 3 bit
- of2 = ( (B[A1+7]) ^ (B[A2+7]) ^ ( B[A3+7]) ^ ( B[A0+7])); // 4 bit
-
- // inverse:
- // 1001 1110 1011 1101
- // A2 = A1 * 9 + A2 * 14 + A3 * 11 + A0 * 13
- // A2 = A1 * (1+8) + A2 * (2+4+8) + A3 * (1+2+8) + A0 * (1+4+8)
-
- // (1*A1 + 8*A1) + ( 2*A2 + 4*A2 + 8*A2) + (1*A3 2*A2 + 8*A2) + (1*A0 + 4*A0 + 8*A0)
- Bp[A2+0] = B[A1+0] ^ B[A3+0] ^ B[A0+0] ^ of0;
- Bp[A2+1] = B[A1+1] ^ B[A2+0] ^ B[A3+1] ^ B[A3+0] ^ B[A0+1] ^ of0 ^ of1;
- Bp[A2+2] = B[A1+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A0+2] ^ B[A0+0] ^ of1 ^ of2;
- Bp[A2+3] = B[A1+3] ^ B[A1+0] ^ B[A2+2] ^ B[A2+1] ^ B[A2+0] ^ B[A3+3] ^ B[A3+2] ^ B[A3+0] ^ B[A0+3] ^ B[A0+1] ^ B[A0+0] ^ of0 ^ of2;
- Bp[A2+4] = B[A1+4] ^ B[A1+1] ^ B[A2+3] ^ B[A2+2] ^ B[A2+1] ^ B[A3+4] ^ B[A3+3] ^ B[A3+1] ^ B[A0+4] ^ B[A0+2] ^ B[A0+1] ^ of0 ^ of1;
- Bp[A2+5] = B[A1+5] ^ B[A1+2] ^ B[A2+4] ^ B[A2+3] ^ B[A2+2] ^ B[A3+5] ^ B[A3+4] ^ B[A3+2] ^ B[A0+5] ^ B[A0+3] ^ B[A0+2] ^ of1 ^ of2;
- Bp[A2+6] = B[A1+6] ^ B[A1+3] ^ B[A2+5] ^ B[A2+4] ^ B[A2+3] ^ B[A3+6] ^ B[A3+5] ^ B[A3+3] ^ B[A0+6] ^ B[A0+4] ^ B[A0+3] ^ of2;
- Bp[A2+7] = B[A1+7] ^ B[A1+4] ^ B[A2+6] ^ B[A2+5] ^ B[A2+4] ^ B[A3+7] ^ B[A3+6] ^ B[A3+4] ^ B[A0+7] ^ B[A0+5] ^ B[A0+4];
-
-
-
- //// state[i][3] = A0*0x0b + A1*0x0d + A2*0x09 + A3*0x0e
- // overflow:
- /* A2 * 0b1001 */ /* A3 * 0b1110 */ /* A0 * 0b1011 */ /* A1 * 0b1101 */
- of0 = ( (B[A2+5]) ^ (B[A3+7] ^ B[A3+6] ^ B[A3+5]) ^ (B[A0 + 7] ^ B[A0+5]) ^ (B[A1+6] ^ B[A1+5])); // 2 bit
- of1 = ( (B[A2+6]) ^ (B[A3+7] ^ B[A3+6]) ^ ( B[A0+6]) ^ (B[A1+7] ^ B[A1+6])); // 3 bit
- of2 = ( (B[A2+7]) ^ (B[A3+7]) ^ ( B[A0+7]) ^ ( B[A1+7])); // 4 bit
-
- // inverse:
- // 1001 1110 1011 1101
- // A2 = A2 * 9 + A3 * 14 + A0 * 11 + A1 * 13
- // A2 = A2 * (1+8) + A3 * (2+4+8) + A0 * (1+2+8) + A1 * (1+4+8)
-
- // (1*A2 + 8*A2) + ( 2*A3 + 4*A3 + 8*A3) + (1*A0 2*A0 + 8*A0) + (1*A1 + 4*A1 + 8*A1)
- Bp[A3+0] = B[A2+0] ^ B[A0+0] ^ B[A1+0] ^ of0;
- Bp[A3+1] = B[A2+1] ^ B[A3+0] ^ B[A0+1] ^ B[A0+0] ^ B[A1+1] ^ of0 ^ of1;
- Bp[A3+2] = B[A2+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+2] ^ B[A0+1] ^ B[A1+2] ^ B[A1+0] ^ of1 ^ of2;
- Bp[A3+3] = B[A2+3] ^ B[A2+0] ^ B[A3+2] ^ B[A3+1] ^ B[A3+0] ^ B[A0+3] ^ B[A0+2] ^ B[A0+0] ^ B[A1+3] ^ B[A1+1] ^ B[A1+0] ^ of0 ^ of2;
- Bp[A3+4] = B[A2+4] ^ B[A2+1] ^ B[A3+3] ^ B[A3+2] ^ B[A3+1] ^ B[A0+4] ^ B[A0+3] ^ B[A0+1] ^ B[A1+4] ^ B[A1+2] ^ B[A1+1] ^ of0 ^ of1;
- Bp[A3+5] = B[A2+5] ^ B[A2+2] ^ B[A3+4] ^ B[A3+3] ^ B[A3+2] ^ B[A0+5] ^ B[A0+4] ^ B[A0+2] ^ B[A1+5] ^ B[A1+3] ^ B[A1+2] ^ of1 ^ of2;
- Bp[A3+6] = B[A2+6] ^ B[A2+3] ^ B[A3+5] ^ B[A3+4] ^ B[A3+3] ^ B[A0+6] ^ B[A0+5] ^ B[A0+3] ^ B[A1+6] ^ B[A1+4] ^ B[A1+3] ^ of2;
- Bp[A3+7] = B[A2+7] ^ B[A2+4] ^ B[A3+6] ^ B[A3+5] ^ B[A3+4] ^ B[A0+7] ^ B[A0+6] ^ B[A0+4] ^ B[A1+7] ^ B[A1+5] ^ B[A1+4];
-
- Bp += BLOCK_SIZE/4;
- B += BLOCK_SIZE/4;
- }
-
- memmove(B - BLOCK_SIZE, Bp - BLOCK_SIZE,sizeof(Bp_space));
-
-}
-
-void bs_expand_key(word_t (* rk)[BLOCK_SIZE], uint8_t * _key)
-{
- // TODO integrate this better
- uint8_t key[KEY_SCHEDULE_SIZE];
- memmove(key,_key,BLOCK_SIZE/8);
- expand_key(key);
-
- int i, j = 0, k, l;
- for (i = 0; i < KEY_SCHEDULE_SIZE; i += (BLOCK_SIZE/8))
- {
- memmove(rk[j], key + i, BLOCK_SIZE / 8);
-
- for (k = WORDS_PER_BLOCK; k < 128; k += WORDS_PER_BLOCK)
- {
- for (l = 0; l < WORDS_PER_BLOCK; l++)
- {
- rk[j][k + l] = rk[j][l];
- }
- }
- bs_transpose(rk[j]);
- j++;
- }
-
-}
-
-void bs_cipher(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE])
-{
- int round;
- bs_transpose(state);
-
-
- bs_addroundkey(state,rk[0]);
- for (round = 1; round < 10; round++)
- {
- bs_apply_sbox(state);
- /*bs_shiftrows(state);*/
- /*bs_mixcolumns(state);*/
- bs_shiftmix(state);
- bs_addroundkey(state,rk[round]);
- }
- bs_apply_sbox(state);
- bs_shiftrows(state);
- bs_addroundkey(state,rk[10]);
- bs_transpose_rev(state);
-}
-
-void bs_cipher_rev(word_t state[BLOCK_SIZE], word_t (* rk)[BLOCK_SIZE])
-{
- int round;
- bs_transpose(state);
-
- bs_addroundkey(state,rk[10]);
- for (round = 9; round > 0; round--)
- {
- bs_shiftrows_rev(state);
- bs_apply_sbox_rev(state);
- bs_addroundkey(state,rk[round]);
- bs_mixcolumns_rev(state);
- }
- bs_shiftrows_rev(state);
- bs_apply_sbox_rev(state);
- bs_addroundkey(state,rk[0]);
-
- bs_transpose_rev(state);
-}
-
-void aes_ecb_test()
-{
- uint8_t key_vector[16] = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c";
- uint8_t pt_vector[16] = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a";
- uint8_t ct_vector[16] = "\x3a\xd7\x7b\xb4\x0d\x7a\x36\x60\xa8\x9e\xca\xf3\x24\x66\xef\x97";
- uint8_t output[16];
- uint8_t input[16];
-
- printf("AES ECB\n");
-
- aes_ecb_encrypt(output, pt_vector,16,key_vector);
-
-
- printf("cipher text: \n");
- dump_hex(output, 16);
-
- aes_ecb_decrypt(input, output, 16, key_vector);
-
- printf("plain text: \n");
- dump_hex((uint8_t * )input,16);
-
- if (memcmp(pt_vector, input, 16) != 0)
- {
- fprintf(stderr,"error: decrypted ciphertext is not the same as the input plaintext\n");
- EXIT1;
- }
- else if (memcmp(ct_vector, output, 16) != 0)
- {
- fprintf(stderr,"error: ciphertext is not the same as the test vector\n");
- EXIT1;
- }
- else
- {
- printf("ECB passes test vector\n\n");
- }
-}
-
-void aes_ctr_test()
-{
-// Test vector from NIST for 4 input blocks
-#define AES_CTR_TESTS_BYTES 64
-
- uint8_t key_vector[16] =
- "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c";
-
- uint8_t iv_vector[16] =
- "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
-
- uint8_t pt_vector[AES_CTR_TESTS_BYTES] =
- "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
- "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
- "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
- "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10"
- ;
-
- uint8_t ct_vector[AES_CTR_TESTS_BYTES] =
- "\x87\x4d\x61\x91\xb6\x20\xe3\x26\x1b\xef\x68\x64\x99\x0d\xb6\xce"
- "\x98\x06\xf6\x6b\x79\x70\xfd\xff\x86\x17\x18\x7b\xb9\xff\xfd\xff"
- "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
- "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1\x79\x21\x70\xa0\xf3\x00\x9c\xee"
- ;
-
- uint8_t output[AES_CTR_TESTS_BYTES];
- uint8_t input[AES_CTR_TESTS_BYTES];
-
- printf("AES CTR\n");
-
- aes_ctr_encrypt(output,pt_vector,AES_CTR_TESTS_BYTES,key_vector, iv_vector);
-
- printf("cipher text: \n");
- dump_hex(output,AES_CTR_TESTS_BYTES);
-
- aes_ctr_decrypt(input,output,AES_CTR_TESTS_BYTES,key_vector, iv_vector);
-
- printf("plain text: \n");
- dump_hex(input,AES_CTR_TESTS_BYTES);
-
- if (memcmp(pt_vector, input, AES_CTR_TESTS_BYTES) != 0)
- {
- fprintf(stderr,"error: decrypted ciphertext is not the same as the input plaintext\n");
- EXIT1;
- }
- else if (memcmp(ct_vector, output, AES_CTR_TESTS_BYTES) != 0)
- {
- fprintf(stderr,"error: ciphertext is not the same as the test vector\n");
- EXIT1;
- }
- else
- {
- printf("CTR passes test vector\n\n");
- }
-
+int i[1];
+int j, bs_transpose_dst_k, k, s, o;
+void a(int (*)[], uint8_t *);
+void b(uint8_t c, uint8_t d, size_t e, uint8_t f, uint8_t g) {
+ int l[1];
+ a(l, f);
+}
+void a(int (*l)[], uint8_t *m) {
+ for (; o < 76; o += 8) {
+ {
+ int *n = i;
+ bs_transpose_dst_k = 0;
+ for (; bs_transpose_dst_k < 64; bs_transpose_dst_k++) {
+ j = 0;
+ for (; j < 64; j++) {
+ k = &s;
+ n[j] = k & 1;
+ }
+ }
+ }
+ }
+}
+void aes_ecb_test() {}
+void aes_ctr_test() {
+ uint8_t p = "";
+ uint8_t q = "";
+ uint8_t r = "";
+ uint8_t output[4];
+ b(output, r, 4, p, q);
}
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c
index 71f67f0f..0d48b3b8 100644
--- a/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main.c
@@ -1,4 +1,4 @@
-#include "../../../clock.h"
+#include "/home/monniaux/work/Kalray/CompCert/test/monniaux/clock.h"
void aes_ecb_test(void);
void aes_ctr_test(void);
diff --git a/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh b/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh
index e98f4677..a21bb465 100755
--- a/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh
+++ b/test/monniaux/bitsliced-aes/one_file/reduce/compare.sh
@@ -1,7 +1,7 @@
#!/bin/bash
ROOT=/home/monniaux/work/Kalray/CompCert
SRC=bitsliced-aes_compute.c
-MAIN=bitsliced-aes_main
+MAIN=/home/monniaux/work/Kalray/CompCert/test/monniaux/bitsliced-aes/one_file/reduce/bitsliced-aes_main
k1-mbr-gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o $MAIN.gcc.k1c.o -o bitsliced-aes.gcc.k1c &&
$ROOT/ccomp -O3 -fno-unprototyped -O3 $SRC $ROOT/test/monniaux/clock.gcc.k1c.o $MAIN.gcc.k1c.o -o bitsliced-aes.ccomp.k1c &&
gcc -Werror=implicit -Werror=uninitialized -O3 $SRC $ROOT/test/monniaux/clock.gcc.host.o $MAIN.c -o bitsliced-aes.gcc.host &&
@@ -12,5 +12,5 @@ grep cycles ./bitsliced-aes.gcc.k1c.out > ./bitsliced-aes.gcc.k1c.cycles &&
grep cycles ./bitsliced-aes.ccomp.k1c.out > ./bitsliced-aes.ccomp.k1c.cycles &&
sed -i -e 's/cycles: //' ./bitsliced-aes.gcc.k1c.cycles &&
sed -i -e 's/cycles: //' ./bitsliced-aes.ccomp.k1c.cycles &&
-test $(cat ./bitsliced-aes.ccomp.k1c.cycles) -gt 300 &&
+test $(cat ./bitsliced-aes.gcc.k1c.cycles) -gt 100000 &&
test $(cat ./bitsliced-aes.ccomp.k1c.cycles) -gt $(expr 2 '*' $(cat ./bitsliced-aes.gcc.k1c.cycles))
diff --git a/test/monniaux/ternary/ternary.c b/test/monniaux/ternary/ternary.c
index 45201ff8..79025639 100644
--- a/test/monniaux/ternary/ternary.c
+++ b/test/monniaux/ternary/ternary.c
@@ -5,10 +5,16 @@
typedef uint32_t data;
+#if 0
+#define TERNARY(a, b, c) ((a) ? (b) : (c))
+#else
+#define TERNARY(a, b, c) (((-(a)) & (b)) | ((-1+(a)) & (c)))
+#endif
+
data silly_computation(void) {
data x = 1;
for(int i=0; i<10000; i++) {
- x = x * (((x & 0x100) != 0) ? 45561U : 337777U);
+ x = x * TERNARY(((x & 0x100) != 0), 45561U, 337777U);
}
return x;
}