aboutsummaryrefslogtreecommitdiffstats
path: root/scheduling/BTL.v
blob: 389d8d938b2ef4c58514e50309f03624ebc77ece (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
(** The BTL intermediate language: abstract syntax and semantics.

  BTL stands for "Block Transfer Language".

  Informally, a block is a piece of "loop-free" code, with a single entry-point,
  hence, such that transformation preserving locally the semantics of each block,
  preserve also globally the semantics of the function.

  a BTL function is a CFG where each node is such a block, represented by structured code.

  BTL gives a structured view of RTL code.
  It is dedicated to optimizations validated by "symbolic simulation" over blocks.


*)

Require Import Coqlib Maps.
Require Import AST Integers Values Events Memory Globalenvs Smallstep.
Require Import RTL Op Registers OptionMonad.
Require Import Lia.

Import ListNotations.

(** * Abstract syntax *)

Definition exit := node.  (* we may generalize this with register renamings at exit, 
                             like in "phi" nodes of SSA-form *)

(** final instructions (that stops block execution) *)
Inductive final: Type :=
  | Bgoto (succ:exit) (** No operation -- just branch to [succ]. *)
  | Breturn (res: option reg)
      (** terminates the execution of the current function.  It returns the value of the given
          register, or [Vundef] if none is given. *)
  | Bcall (sig: signature) (fn: reg + ident) (args:list reg) (dest:reg) (succ:exit)
      (** invokes the function determined by [fn] (either a function pointer found in a register or a
          function name), giving it the values of registers [args] as arguments.
          It stores the return value in [dest] and branches to [succ]. *)
  | Btailcall (sig:signature) (fn: reg + ident) (args: list reg)
      (**  performs a function invocation in tail-call position 
          (the current function terminates after the call, returning the result of the callee)  
      *)
  | Bbuiltin (ef:external_function) (args:list (builtin_arg reg)) (dest:builtin_res reg) (succ:exit)
      (** calls the built-in function identified by [ef], giving it the values of [args] as arguments.
          It stores the return value in [dest] and branches to [succ]. *)
  | Bjumptable (arg:reg) (tbl:list exit)
      (** [Bjumptable arg tbl] transitions to the node that is the [n]-th
          element of the list [tbl], where [n] is the unsigned integer value of register [arg]. *)
  .

(* instruction block *)
Inductive iblock: Type :=
(* final instructions that stops block execution *)
  | BF (fi: final)
(* basic instructions that continues block execution, except when aborting *)
  | Bnop (* nop instruction *)
  | Bop (op:operation) (args:list reg) (dest:reg) 
      (** performs the arithmetic operation [op] over the values of registers [args], stores the result in [dest] *)
  | Bload (trap:trapping_mode) (chunk:memory_chunk) (addr:addressing) (args:list reg) (dest:reg)
      (** loads a [chunk] quantity from the address determined by the addressing mode [addr] 
          and the values of the [args] registers, stores the quantity just read into [dest].
          If trap=NOTRAP, then failures lead to a default value written to [dest]. *)
  | Bstore (chunk:memory_chunk) (addr:addressing) (args:list reg) (src:reg)
      (** stores the value of register [src] in the [chunk] quantity at the
          the address determined by the addressing mode [addr] and the
          values of the [args] registers. *)
(* composed instructions *)
  | Bseq (b1 b2: iblock)
      (** starts by running [b1] and stops here if execution of [b1] has reached a final instruction or aborted
          or continue with [b2] otherwise *)
  | Bcond (cond:condition) (args:list reg) (ifso ifnot: iblock) (info:option bool)
      (** evaluates the boolean condition [cond] over the values of registers [args].  
          If the condition is true, it continues on [ifso].
          If the condition is false, it continues on [ifnot].
          [info] is a ghost field there to provide information relative to branch prediction. *)
  .
Coercion BF: final >-> iblock.


(** NB: - a RTL [(Inop pc)] ending a branch of block is encoded by [(Bseq Bnop (Bgoto pc))].
        - a RTL [(Inop pc)] in the middle of a branch is simply encoded by [Bnop].
        - the same trick appears for all "basic" instructions and [Icond].
*)



Record iblock_info := { 
  entry: iblock;
  input_regs: Regset.t (* extra liveness information for BTL functional semantics *)
}.

Definition code: Type := PTree.t iblock_info.

Record function: Type := mkfunction {
  fn_sig: signature;
  fn_params: list reg;
  fn_stacksize: Z;
  fn_code: code;
  fn_entrypoint: node
}.

(** A function description comprises a control-flow graph (CFG) [fn_code]
    (a partial finite mapping from nodes to instructions).  As in Cminor,
    [fn_sig] is the function signature and [fn_stacksize] the number of bytes
    for its stack-allocated activation record.  [fn_params] is the list
    of registers that are bound to the values of arguments at call time.
    [fn_entrypoint] is the node of the first instruction of the function
    in the CFG. *)

Definition fundef := AST.fundef function.

Definition program := AST.program fundef unit.

Definition funsig (fd: fundef) :=
  match fd with
  | Internal f => fn_sig f
  | External ef => ef_sig ef
  end.

(** * Operational semantics *)

Definition genv := Genv.t fundef unit.

(** The dynamic semantics of BTL is similar to RTL,
    except that the step of one instruction is generalized into the run of one [iblock].
*)

Inductive stackframe : Type :=
  | Stackframe:
      forall (res: reg)            (**r where to store the result *)
             (f: function)         (**r calling function *)
             (sp: val)             (**r stack pointer in calling function *)
             (succ: exit)          (**r program point in calling function *)
             (rs: regset),         (**r register state in calling function *)
      stackframe.

Inductive state : Type :=
  | State:
      forall (stack: list stackframe) (**r call stack *)
             (f: function)            (**r current function *)
             (sp: val)                (**r stack pointer *)
             (pc: node)               (**r current program point in [c] *)
             (rs: regset)             (**r register state *)
             (m: mem),                (**r memory state *)
      state
  | Callstate:
      forall (stack: list stackframe) (**r call stack *)
             (f: fundef)              (**r function to call *)
             (args: list val)         (**r arguments to the call *)
             (m: mem),                (**r memory state *)
      state
  | Returnstate:
      forall (stack: list stackframe) (**r call stack *)
             (v: val)                 (**r return value for the call *)
             (m: mem),                (**r memory state *)
      state.

(** outcome of a block execution *)
Record outcome := out {
   _rs: regset;
   _m: mem;
   _fin: option final 
}.

(* follows RTL semantics to set the result of builtin *)
Definition reg_builtin_res (res: builtin_res reg): option reg :=
  match res with
  | BR r => Some r
  | _ => None
  end.

Section RELSEM.

(** [step] (and in particular [final_step]) is parametrized by function [tr_exit] to transfer registers on each iblock exit.

    In particular, [tr_exit f lpc or rs] computes from [rs] the [rs'] on which the execution from iblock [pc] in [lpc] in [f] may start.

    Here, [or] is an optional register that will be assigned after exiting the iblock, but before entering in [pc]: e.g. the register set by a function call
    before entering in return address.

*)

Variable tr_exit: function -> list exit -> option reg -> regset -> regset.

Variable ge: genv.

Definition find_function (ros: reg + ident) (rs: regset) : option fundef :=
  match ros with
  | inl r => Genv.find_funct ge rs#r
  | inr symb =>
      match Genv.find_symbol ge symb with
      | None => None
      | Some b => Genv.find_funct_ptr ge b
      end
  end.

Local Open Scope option_monad_scope.

(* (* TODO: a new (hopefully simpler) scheme to support "NOTRAP" wrt current scheme of RTL *)

Inductive has_loaded sp rs m chunk addr args v: trapping_mode -> Prop :=
  | has_loaded_normal a trap
      (EVAL: eval_addressing ge sp addr rs##args = Some a)
      (LOAD: Mem.loadv chunk m a = Some v)
      : has_loaded sp rs m chunk addr args v trap
  | has_loaded_default
      (LOAD: forall a, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None)
      (DEFAULT: v = default_notrap_load_value chunk)
      : has_loaded sp rs m chunk addr args v NOTRAP 
  .

(* TODO: move this scheme in "Memory" module if this scheme is useful ! *)

*)

(** internal big-step execution of one iblock *)
Inductive iblock_istep sp: regset -> mem -> iblock -> regset -> mem -> option final -> Prop :=
  | exec_final rs m fin: iblock_istep sp rs m (BF fin) rs m (Some fin)
  | exec_nop rs m: iblock_istep sp rs m Bnop rs m None
  | exec_op rs m op args res v
     (EVAL: eval_operation ge sp op rs##args m = Some v)
     : iblock_istep sp rs m (Bop op args res) (rs#res <- v) m None
  | exec_load_TRAP rs m chunk addr args dst a v
     (EVAL: eval_addressing ge sp addr rs##args = Some a)
     (LOAD: Mem.loadv chunk m a = Some v)
     : iblock_istep sp rs m (Bload TRAP chunk addr args dst) (rs#dst <- v) m None
(* TODO: replace [exec_load_TRAP] above by this one. See "new scheme" for "NOTRAP" above
  | exec_load rs m trap chunk addr args dst v
      (LOAD: has_loaded sp rs m chunk addr args v trap)
     : iblock_istep sp rs m (Bload trap chunk addr args dst) (rs#dst <- v) m None
*)
  | exec_store rs m chunk addr args src a m' 
     (EVAL: eval_addressing ge sp addr rs##args = Some a)
     (STORE: Mem.storev chunk m a rs#src = Some m')
     : iblock_istep sp rs m (Bstore chunk addr args src) rs m' None
  | exec_seq_stop rs m b1 b2 rs' m' fin
     (EXEC: iblock_istep sp rs m b1 rs' m' (Some fin))
     : iblock_istep sp rs m (Bseq b1 b2) rs' m' (Some fin)
  | exec_seq_continue rs m b1 b2 rs1 m1 rs' m' ofin
     (EXEC1: iblock_istep sp rs m b1 rs1 m1 None)
     (EXEC2: iblock_istep sp rs1 m1 b2 rs' m' ofin)
     : iblock_istep sp rs m (Bseq b1 b2) rs' m' ofin
  | exec_cond rs m cond args ifso ifnot i b rs' m' ofin
     (EVAL: eval_condition cond rs##args m = Some b)
     (EXEC: iblock_istep sp rs m (if b then ifso else ifnot) rs' m' ofin)
     : iblock_istep sp rs m (Bcond cond args ifso ifnot i) rs' m' ofin
  .
Local Hint Constructors iblock_istep: core.

(** A functional variant of [iblock_istep_run] of [iblock_istep].
Lemma [iblock_istep_run_equiv] below provides a proof that "relation" [iblock_istep] is a "partial function".
*)
Fixpoint iblock_istep_run sp ib rs m: option outcome := 
  match ib with
  | BF fin =>
      Some {| _rs := rs; _m := m; _fin := Some fin |}
  (* basic instructions *)
  | Bnop =>
      Some {| _rs := rs; _m:= m; _fin := None |}
  | Bop op args res =>
      SOME v <- eval_operation ge sp op rs##args m IN
      Some {| _rs := rs#res <- v; _m:= m; _fin := None |}
  | Bload TRAP chunk addr args dst =>
      SOME a <- eval_addressing ge sp addr rs##args IN
      SOME v <- Mem.loadv chunk m a IN
      Some {| _rs := rs#dst <- v; _m:= m; _fin := None |}
  | Bload NOTRAP chunk addr args dst =>
      None (* TODO *)
  | Bstore chunk addr args src =>
      SOME a <- eval_addressing ge sp addr rs##args IN
      SOME m' <- Mem.storev chunk m a rs#src IN
      Some {| _rs := rs; _m:= m'; _fin := None |}
 (* composed instructions *)
  | Bseq b1 b2 =>
      SOME out1 <- iblock_istep_run sp b1 rs m IN
      match out1.(_fin) with
      | None => iblock_istep_run sp b2 out1.(_rs) out1.(_m)
      | _ => Some out1 (* stop execution on the 1st final instruction *)
      end
  | Bcond cond args ifso ifnot _ =>
      SOME b <- eval_condition cond rs##args m IN
      iblock_istep_run sp (if b then ifso else ifnot) rs m
  end.

Lemma iblock_istep_run_equiv sp rs m ib rs' m' ofin:
  iblock_istep sp rs m ib rs' m' ofin <-> iblock_istep_run sp ib rs m = Some {| _rs := rs'; _m:= m'; _fin := ofin |}.
Proof.
  constructor.
  - induction 1; simpl; try autodestruct; try_simplify_someHyps.
  - generalize rs m rs' m' ofin; clear rs m rs' m' ofin.
    induction ib; simpl; repeat (try autodestruct; try_simplify_someHyps).
    destruct o; try_simplify_someHyps; subst; eauto.
Qed.

Local Open Scope list_scope.

Inductive final_step stack f sp rs m: final -> trace -> state -> Prop :=
  | exec_Bgoto pc:
      final_step stack f sp rs m (Bgoto pc) E0
                 (State stack f sp pc (tr_exit f [pc] None rs) m)
  | exec_Breturn or stk m':
      sp = (Vptr stk Ptrofs.zero) ->
      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
      final_step stack f sp rs m (Breturn or)
        E0 (Returnstate stack (regmap_optget or Vundef rs) m')
  | exec_Bcall sig ros args res pc' fd:
      find_function ros rs = Some fd ->
      funsig fd = sig ->
      final_step stack f sp rs m (Bcall sig ros args res pc')
        E0 (Callstate (Stackframe res f sp pc' (tr_exit f [pc'] (Some res) rs) :: stack) fd rs##args m)
  | exec_Btailcall sig ros args stk m' fd:
      find_function ros rs = Some fd ->
      funsig fd = sig ->
      sp = (Vptr stk Ptrofs.zero) ->
      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
      final_step stack f sp rs m (Btailcall sig ros args)
        E0 (Callstate stack fd rs##args m')
  | exec_Bbuiltin ef args res pc' vargs t vres m':
      eval_builtin_args ge (fun r => rs#r) sp m args vargs ->
      external_call ef ge vargs m t vres m' ->
      final_step stack f sp rs m (Bbuiltin ef args res pc')
         t (State stack f sp pc' (regmap_setres res vres (tr_exit f [pc'] (reg_builtin_res res) rs)) m')
  | exec_Bjumptable arg tbl n pc':
      rs#arg = Vint n ->
      list_nth_z tbl (Int.unsigned n) = Some pc' ->
      final_step stack f sp rs m (Bjumptable arg tbl)
        E0 (State stack f sp pc' (tr_exit f tbl None rs) m)
.

(** big-step execution of one iblock *)
Definition iblock_step stack f sp rs m ib t s: Prop :=
  exists rs' m' fin, iblock_istep sp rs m ib rs' m' (Some fin) /\ final_step stack f sp rs' m' fin t s.

(** The transitions are presented as an inductive predicate
  [step ge st1 t st2], where [ge] is the global environment,
  [st1] the initial state, [st2] the final state, and [t] the trace
  of system calls performed during this transition. *)

Inductive step: state -> trace -> state -> Prop :=
  | exec_iblock stack ib f sp pc rs m t s
      (PC: (fn_code f)!pc = Some ib)
      (STEP: iblock_step stack f sp rs m ib.(entry) t s)
      :step (State stack f sp pc rs m) t s
  | exec_function_internal stack f args m m' stk
      (ALLOC: Mem.alloc m 0 f.(fn_stacksize) = (m', stk))
      :step (Callstate stack (Internal f) args m)
         E0 (State stack
                  f
                  (Vptr stk Ptrofs.zero)
                  f.(fn_entrypoint)
                  (init_regs args f.(fn_params))
                  m')
  | exec_function_external stack ef args res t m m'
      (EXTCALL: external_call ef ge args m t res m')
      :step (Callstate stack (External ef) args m)
          t (Returnstate stack res m')
  | exec_return stack res f sp pc rs vres m
      :step (Returnstate (Stackframe res f sp pc rs :: stack) vres m)
         E0 (State stack f sp pc (rs#res <- vres) m)
.

End RELSEM.

(** Execution of whole programs are described as sequences of transitions
  from an initial state to a final state.  An initial state is a [Callstate]
  corresponding to the invocation of the ``main'' function of the program
  without arguments and with an empty call stack. *)

Inductive initial_state (p: program): state -> Prop :=
  | initial_state_intro: forall b f m0,
      let ge := Genv.globalenv p in
      Genv.init_mem p = Some m0 ->
      Genv.find_symbol ge p.(prog_main) = Some b ->
      Genv.find_funct_ptr ge b = Some f ->
      funsig f = signature_main ->
      initial_state p (Callstate nil f nil m0).

(** A final state is a [Returnstate] with an empty call stack. *)

Inductive final_state: state -> int -> Prop :=
  | final_state_intro: forall r m,
      final_state (Returnstate nil (Vint r) m) r.

(** The "raw" CFG small-step semantics for a BTL program (e.g. close to RTL semantics) *)

(* tid = transfer_identity *)
Definition tid (_:function) (_:list exit) (_: option reg) (rs:regset) := rs.

Definition cfgsem (p: program) :=
  Semantics (step tid) (initial_state p) final_state (Genv.globalenv p).

(** The full "functional" small-step semantics for a BTL program.
    at each exit, we only transfer register in "input_regs" (i.e. "alive" registers).
*)
Definition transfer_regs (inputs: list reg) (rs: regset): regset :=
  init_regs (rs##inputs) inputs.

Lemma transfer_regs_inputs inputs rs r:
  List.In r inputs -> (transfer_regs inputs rs)#r = rs#r.
Proof.
  unfold transfer_regs; induction inputs; simpl; intuition subst.
  - rewrite Regmap.gss; auto.
  - destruct (Pos.eq_dec a r).
    + subst; rewrite Regmap.gss; auto.
    + rewrite Regmap.gso; auto.
Qed.

Lemma transfer_regs_dead inputs rs r:
  ~List.In r inputs -> (transfer_regs inputs rs)#r = Vundef.
Proof.
  unfold transfer_regs; induction inputs; simpl; intuition subst.
  - rewrite Regmap.gi; auto.
  - rewrite Regmap.gso; auto.
Qed.

Fixpoint union_inputs (f:function) (tbl: list exit): Regset.t :=
  match tbl with
  | nil => Regset.empty
  | pc::l => let rs:= union_inputs f l in
             match f.(fn_code)!pc with
             | None => rs
             | Some ib => Regset.union rs ib.(input_regs)
             end
  end.

(* TODO: lemma not yet used

Lemma union_inputs_In f tbl r:
  Regset.In r (union_inputs f tbl) -> (exists pc, List.In pc tbl /\ exists ib, f.(fn_code)!pc = Some ib /\ Regset.In r ib.(input_regs)).
Proof.
  induction tbl as [|pc l]; simpl; intros.
  - exploit Regset.empty_1; eauto. intuition.
  - destruct ((fn_code f) ! pc) eqn:ATpc.
    + exploit Regset.union_1; eauto.
      destruct 1 as [X1|X1].
      * destruct IHl as (pc' & H1 & H2); eauto.
      * eexists; intuition eauto.
    + destruct IHl as (pc' & H1 & H2); eauto.
Qed.

Lemma union_inputs_notIn f tbl r: 
   (forall pc ib, List.In pc tbl -> f.(fn_code)!pc = Some ib -> ~Regset.In r ib.(input_regs))
    -> ~Regset.In r (union_inputs f tbl).
Proof.
  induction tbl as [|pc l]; simpl; intuition eauto.
  - exploit Regset.empty_1; eauto.
  - destruct ((fn_code f) ! pc) eqn:ATpc; intuition eauto.
    exploit Regset.union_1; intuition eauto.
Qed.
*)

(* This function computes the union of the inputs associated to the exits
                   minus the optional register in [or] (typically the result of a call or a builtin).
   i.e. similar to [pre_output_regs] in RTLpath.
*)
Definition pre_inputs (f:function) (tbl: list exit) (or: option reg): Regset.t :=
 let rs := union_inputs f tbl in
 match or with 
 | Some r => Regset.remove r rs
 | None => rs
 end
 .

(* TODO: lemma pre_inputs_In + pre_inputs_notIn ? *)

Definition tr_inputs (f:function) (tbl: list exit) (or:option reg): regset -> regset 
  := transfer_regs (Regset.elements (pre_inputs f tbl or)).


(* TODO: move this elsewhere *)
Lemma SetoidList_InA_eq_equiv A (l: list A): forall x,
  SetoidList.InA (fun x y => x = y) x l <-> List.In x l.
Proof.
  intros x; split.
  - induction 1; simpl; eauto.
  - induction l; simpl; intuition.
Qed.

Lemma tr_pre_inputs f tbl or rs r:
  Regset.In r (pre_inputs f tbl or) -> (tr_inputs f tbl or rs)#r = rs#r.
Proof.
  intros; eapply transfer_regs_inputs.
  rewrite <- SetoidList_InA_eq_equiv.
  eapply Regset.elements_1; eauto.
Qed.

Lemma tr_inputs_dead f tbl or rs r:
  ~(Regset.In r (pre_inputs f tbl or)) -> (tr_inputs f tbl or rs)#r = Vundef.
Proof.
  intros X; eapply transfer_regs_dead; intuition eauto.
  eapply X. eapply Regset.elements_2. 
  rewrite -> SetoidList_InA_eq_equiv; eauto.
Qed.

Local Hint Resolve tr_pre_inputs Regset.mem_2 Regset.mem_1: core.

Lemma tr_inputs_get f tbl or rs r:
  (tr_inputs f tbl or rs)#r = if Regset.mem r (pre_inputs f tbl or) then rs#r else Vundef.
Proof.
  autodestruct; eauto.
  intros; apply tr_inputs_dead; eauto.
  intro X; exploit Regset.mem_1; eauto.
  congruence.
Qed.

(* TODO: not yet used
Lemma tr_inputs_ext f tbl or rs1 rs2:
  (forall r, Regset.In r (pre_inputs f tbl or) -> rs1#r = rs2#r) ->
  (forall r, (tr_inputs f tbl or rs1)#r = (tr_inputs f tbl or rs2)#r).
Proof.
  intros EQ r. rewrite !tr_inputs_get.
  autodestruct; auto.
Qed.
*)

Definition fsem (p: program) :=
  Semantics (step tr_inputs) (initial_state p) final_state (Genv.globalenv p).

Local Open Scope list_scope.

Definition poly_tr {A} (tr: function -> list exit -> option reg -> A) f (i: final): A := 
  match i with
  | Bgoto pc => tr f [pc] None
  | Bcall _ _ _ res pc => tr f [pc] (Some res)
  | Btailcall _ _ args => tr f [] None
  | Bbuiltin _ _ res pc => tr f [pc] (reg_builtin_res res)
  | Breturn _ => tr f [] None
  | Bjumptable _ tbl => tr f tbl None
  end.

Definition tr_regs: function -> final -> regset -> regset :=
  poly_tr tr_inputs.

(* TODO: NOT USEFUL ?
Definition liveout: function -> final -> Regset.t :=
  poly_tr pre_inputs.

Lemma tr_regs_liveout_equiv f fi : tr_regs f fi = transfer_regs (Regset.elements (liveout f fi)).
Proof.
  destruct fi; simpl; auto.
Qed.

Local Hint Resolve tr_inputs_get: core.

Lemma tr_regs_get f fi rs r: (tr_regs f fi rs)#r = if Regset.mem r (liveout f fi) then rs#r else Vundef.
Proof.
   Local Opaque pre_inputs.
   destruct fi; simpl; auto.
Qed.
*)

(* * Comparing BTL semantics modulo extensionality of [regset].

NB: This is at least used in BTL_SEtheory (and probably in the future BTL_SchedulerProof).

*)
Inductive equiv_stackframe: stackframe -> stackframe -> Prop :=
  | equiv_stackframe_intro res f sp pc (rs1 rs2: regset)
      (EQUIV: forall r, rs1#r = rs2#r)
      :equiv_stackframe (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2)
  .

Inductive equiv_state: state -> state -> Prop :=
  | State_equiv stk1 stk2 f sp pc rs1 m rs2
      (STACKS: list_forall2 equiv_stackframe stk1 stk2)
      (EQUIV: forall r, rs1#r = rs2#r)
      :equiv_state (State stk1 f sp pc rs1 m) (State stk2 f sp pc rs2 m)
  | Call_equiv stk1 stk2 f args m
      (STACKS: list_forall2 equiv_stackframe stk1 stk2)
      :equiv_state (Callstate stk1 f args m) (Callstate stk2 f args m)
  | Return_equiv stk1 stk2 v m
      (STACKS: list_forall2 equiv_stackframe stk1 stk2)
      :equiv_state (Returnstate stk1 v m) (Returnstate stk2 v m)
  .
Local Hint Constructors equiv_stackframe equiv_state: core.

Lemma equiv_stackframe_refl stf: equiv_stackframe stf stf.
Proof.
  destruct stf; eauto.
Qed.

Lemma equiv_stack_refl stk: list_forall2 equiv_stackframe stk stk.
Proof.
  Local Hint Resolve equiv_stackframe_refl: core.
  induction stk; simpl; constructor; auto.
Qed.
Local Hint Resolve equiv_stack_refl: core.

Lemma equiv_state_refl s: equiv_state s s.
Proof.
  induction s; simpl; constructor; auto.
Qed.

Lemma equiv_stackframe_trans stf1 stf2 stf3:
  equiv_stackframe stf1 stf2 -> equiv_stackframe stf2 stf3 -> equiv_stackframe stf1 stf3.
Proof.
  destruct 1; intros EQ; inv EQ; try econstructor; eauto.
  intros; eapply eq_trans; eauto.
Qed.
Local Hint Resolve equiv_stackframe_trans: core.

Lemma equiv_stack_trans stk1 stk2:
  list_forall2 equiv_stackframe stk1 stk2 -> 
  forall stk3, list_forall2 equiv_stackframe stk2 stk3 -> 
  list_forall2 equiv_stackframe stk1 stk3.
Proof.
  induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
Qed.

Lemma equiv_state_trans s1 s2 s3: equiv_state s1 s2 -> equiv_state s2 s3 -> equiv_state s1 s3.
Proof.
  Local Hint Resolve equiv_stack_trans: core.
  destruct 1; intros EQ; inv EQ; econstructor; eauto.
  intros; eapply eq_trans; eauto.
Qed.

Lemma regmap_setres_eq (rs rs': regset) res vres:
  (forall r, rs # r = rs' # r) ->
  forall r, (regmap_setres res vres rs) # r = (regmap_setres res vres rs') # r.
Proof.
  intros RSEQ r. destruct res; simpl; try congruence.
  destruct (peq x r).
  - subst. repeat (rewrite Regmap.gss). reflexivity.
  - repeat (rewrite Regmap.gso); auto.
Qed.

(* * Simulation of BTL fsem by BTL cfgsem: a lock-step less-def simulation.

We start by extending the previous [equiv_*] stuff for [Val.lessdef]: we need to also compare memories 
for Mem.extends, because Vundef values are propagated in memory through stores, allocation, etc.

*)

Inductive lessdef_stackframe: stackframe -> stackframe -> Prop :=
  | lessdef_stackframe_intro res f sp pc rs1 rs2
      (REGS: forall r, Val.lessdef rs1#r rs2#r)
      :lessdef_stackframe (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2)
  .

Inductive lessdef_state: state -> state -> Prop :=
  | State_lessdef stk1 stk2 f sp pc rs1 m1 rs2 m2
      (STACKS: list_forall2 lessdef_stackframe stk1 stk2)
      (REGS: forall r, Val.lessdef rs1#r rs2#r)
      (MEMS: Mem.extends m1 m2)
      :lessdef_state (State stk1 f sp pc rs1 m1) (State stk2 f sp pc rs2 m2)
  | Call_lessdef stk1 stk2 f args1 args2 m1 m2
      (STACKS: list_forall2 lessdef_stackframe stk1 stk2)
      (ARGS: Val.lessdef_list args1 args2)
      (MEMS: Mem.extends m1 m2)
      :lessdef_state (Callstate stk1 f args1 m1) (Callstate stk2 f args2 m2)
  | Return_lessdef stk1 stk2 v1 v2 m1 m2
      (STACKS: list_forall2 lessdef_stackframe stk1 stk2)
      (REGS: Val.lessdef v1 v2)
      (MEMS: Mem.extends m1 m2)
      :lessdef_state (Returnstate stk1 v1 m1) (Returnstate stk2 v2 m2)
  .
Local Hint Constructors lessdef_stackframe lessdef_state: core.

Lemma lessdef_stackframe_refl stf: lessdef_stackframe stf stf.
Proof.
  destruct stf; eauto.
Qed.

Local Hint Resolve lessdef_stackframe_refl: core.
Lemma lessdef_stack_refl stk: list_forall2 lessdef_stackframe stk stk.
Proof.
  induction stk; simpl; constructor; auto.
Qed.

Local Hint Resolve lessdef_stack_refl: core.

Lemma lessdef_list_refl args: Val.lessdef_list args args.
Proof.
  induction args; simpl; auto.
Qed.

Local Hint Resolve lessdef_list_refl Mem.extends_refl: core.

Lemma lessdef_state_refl s: lessdef_state s s.
Proof.
  induction s; simpl; constructor; auto.
Qed.

Local Hint Resolve Val.lessdef_trans: core.
Lemma lessdef_stackframe_trans stf1 stf2 stf3:
  lessdef_stackframe stf1 stf2 -> lessdef_stackframe stf2 stf3 -> lessdef_stackframe stf1 stf3.
Proof.
  destruct 1. intros LD; inv LD; try econstructor; eauto.
Qed.

Local Hint Resolve lessdef_stackframe_trans: core.
Lemma lessdef_stack_trans stk1 stk2:
  list_forall2 lessdef_stackframe stk1 stk2 -> 
  forall stk3, list_forall2 lessdef_stackframe stk2 stk3 -> 
  list_forall2 lessdef_stackframe stk1 stk3.
Proof.
  induction 1; intros stk3 LD; inv LD; econstructor; eauto.
Qed.

Local Hint Resolve lessdef_stack_trans Mem.extends_extends_compose Val.lessdef_list_trans: core.
Lemma lessdef_state_trans s1 s2 s3: lessdef_state s1 s2 -> lessdef_state s2 s3 -> lessdef_state s1 s3.
Proof.
  destruct 1; intros LD; inv LD; econstructor; eauto.
Qed.

Lemma init_regs_lessdef_preserv args1 args2
  (ARGS : Val.lessdef_list args1 args2)
  : forall rl r, Val.lessdef (init_regs args1 rl)#r (init_regs args2 rl)#r.
Proof.
  induction ARGS; simpl; auto.
  intros rl r1; destruct rl; simpl in *; auto.
  eapply set_reg_lessdef; eauto.
  intros r2; eauto.
Qed.
Local Hint Resolve init_regs_lessdef_preserv: core.

Lemma tr_inputs_lessdef f rs1 rs2 tbl or r
  (REGS: forall r, Val.lessdef rs1#r rs2#r)
  :Val.lessdef (tr_inputs f tbl or rs1)#r (tid f tbl or rs2)#r.
Proof.
  unfold tid; rewrite tr_inputs_get.
  autodestruct.
Qed.
Local Hint Resolve tr_inputs_lessdef: core.

Lemma find_function_lessdef ge ros rs1 rs2 fd
  (FIND: find_function ge ros rs1 = Some fd)
  (REGS: forall r, Val.lessdef rs1#r rs2#r)
  : find_function ge ros rs2 = Some fd.
Proof.
  destruct ros; simpl in *; auto.
  exploit Genv.find_funct_inv; eauto.
  intros (b & EQ).
  destruct (REGS r); try congruence.
Qed.
Local Hint Resolve find_function_lessdef regs_lessdef_regs: core.


Lemma fsem2cfgsem_ibistep_simu ge sp rs1 m1 ib rs1' m1' of
  (ISTEP: iblock_istep ge sp rs1 m1 ib rs1' m1' of)
  rs2 m2
  (REGS: forall r, Val.lessdef rs1#r rs2#r)
  (MEMS: Mem.extends m1 m2)
  : exists rs2' m2', iblock_istep_run ge sp ib rs2 m2 = Some (out rs2' m2' of) 
     /\ (forall r, Val.lessdef rs1'#r rs2'#r)
     /\ (Mem.extends m1' m2').
Proof.
  induction ISTEP; simpl; try_simplify_someHyps.
Admitted.

Local Hint Constructors final_step list_forall2: core.
Local Hint Unfold regs_lessdef: core.

Lemma fsem2cfgsem_finalstep_simu ge stk1 f sp rs1 m1 fin t s1 stk2 rs2 m2
  (FSTEP: final_step tr_inputs ge stk1 f sp rs1 m1 fin t s1)
  (STACKS: list_forall2 lessdef_stackframe stk1 stk2)
  (REGS: forall r, Val.lessdef rs1#r rs2#r)
  (MEMS: Mem.extends m1 m2)
  : exists s2, final_step tid ge stk2 f sp rs2 m2 fin t s2
               /\ lessdef_state s1 s2.
Proof.
  destruct FSTEP; try (eexists; split; simpl; econstructor; eauto; fail).
  - (* return *)
    exploit Mem.free_parallel_extends; eauto.
    intros (m2' & FREE & MEMS2).
    eexists; split; simpl; econstructor; eauto.
    destruct or; simpl; auto.
  - (* tailcall *)
    exploit Mem.free_parallel_extends; eauto.
    intros (m2' & FREE & MEMS2).
    eexists; split; simpl; econstructor; eauto.
  - (* builtin *)
    exploit (eval_builtin_args_lessdef (ge:=ge) (e1:=(fun r => rs1 # r)) (fun r => rs2 # r)); eauto.
    intros (vl2' & BARGS & VARGS).
    exploit external_call_mem_extends; eauto.
    intros (vres' & m2' & CALL2 & REGS2 & MEMS2 & UNCHANGED).
    eexists; split; simpl; econstructor; eauto.
    intros; apply set_res_lessdef; eauto.
  - (* jumptable *)
    eexists; split; simpl; econstructor; eauto.
    destruct (REGS arg); try congruence.
Qed.

Lemma fsem2cfgsem_ibstep_simu ge stk1 stk2 f sp rs1 m1 ib t s1 rs2 m2
  (STEP: iblock_step tr_inputs ge stk1 f sp rs1 m1 ib t s1)
  (STACKS : list_forall2 lessdef_stackframe stk1 stk2)
  (REGS: forall r, Val.lessdef rs1#r rs2#r)
  (MEMS : Mem.extends m1 m2)
  : exists s2, iblock_step tid ge stk2 f sp rs2 m2 ib t s2
               /\ lessdef_state s1 s2.
Proof.
  destruct STEP as (rs1' & m1' & fin & ISTEP & FSTEP).
  exploit fsem2cfgsem_ibistep_simu; eauto.
  intros (rs2' & m2' & ISTEP2 & REGS2 & MEMS2).
  rewrite <- iblock_istep_run_equiv in ISTEP2. clear ISTEP REGS MEMS.
  exploit fsem2cfgsem_finalstep_simu; eauto.
  intros (s2 & FSTEP2 & LESSDEF). clear FSTEP.
  unfold iblock_step; eexists; split; eauto.
Qed.

Local Hint Constructors step: core.

Lemma fsem2cfgsem_step_simu ge s1 t s1' s2
 (STEP: step tr_inputs ge s1 t s1')
 (REGS: lessdef_state s1 s2)
 :exists s2' : state,
   step tid ge s2 t s2' /\
   lessdef_state s1' s2'.
Proof.
  destruct STEP; inv REGS.
  - (* iblock *)
    intros; exploit fsem2cfgsem_ibstep_simu; eauto. clear STEP.
    intros (s2 & STEP2 & REGS2).
    eexists; split; eauto.
  - (* internal call *)
    exploit (Mem.alloc_extends m m2 0 (fn_stacksize f) stk m' 0 (fn_stacksize f)); eauto.
    1-2: lia.
    intros (m2' & ALLOC2 & MEMS2). clear ALLOC MEMS.
    eexists; split; econstructor; eauto.
  - (* external call *)
    exploit external_call_mem_extends; eauto.
    intros (vres' & m2' & CALL2 & REGS2 & MEMS2 & UNCHANGED). clear EXTCALL MEMS.
    eexists; split; econstructor; eauto.
  - (* return *)
    inversion STACKS as [|d1 d2 d3 d4 STF2 STK2]. subst.
    inv STF2.
    eexists; split; econstructor; eauto.
    intros; eapply set_reg_lessdef; eauto.
Qed.

Theorem fsem2cfgsem prog:
  forward_simulation (fsem prog) (cfgsem prog).
Proof.
  eapply forward_simulation_step with lessdef_state; simpl; auto.
  - destruct 1; intros; eexists; intuition eauto. econstructor; eauto.
  - intros s1 s2 r REGS FINAL; destruct FINAL.
     inv REGS; inv STACKS; inv REGS0.
     econstructor; eauto.
  - intros; eapply fsem2cfgsem_step_simu; eauto.
Qed.

(** TODO: est-ce qu'il faudrait déplacer ceci dans une librairie du genre "BTLlib" ? *)

(** Matching BTL and RTL code 

We should be able to define a single verifier able to prove a bisimulation between BTL and RTL code.

NB 1: the proof of BTL -> RTL (plus simulation) should be much easier than RTL -> BTL (star simulation).

NB 2: our scheme allows the BTL to duplicate some RTL code.
- in other words: RTL -> BTL allows tail duplication, loop unrolling, etc. Exactly like "Duplicate" on RTL.
- BTL -> RTL allows to "undo" some duplications (e.g. remove duplications that have not enabled interesting optimizations) !

Hence, in a sense, our verifier imitates the approach of Duplicate, where [dupmap] maps the BTL nodes to the RTL nodes.

The [match_*] definitions gives a "relational" specification of the verifier...
*)

Require Import Errors.

Inductive match_final_inst (dupmap: PTree.t node): final -> instruction -> Prop :=
(* TODO: it may be simplify the oracle for BTL -> RTL, but may makes the verifier more complex ?
  | mfi_goto pc pc':
      dupmap!pc = (Some pc') -> match_final_inst dupmap (Bgoto pc) (Inop pc')
*)
  | mfi_return or: match_final_inst dupmap (Breturn or) (Ireturn or)
  | mfi_call pc pc' s ri lr r:
      dupmap!pc = (Some pc') -> match_final_inst dupmap (Bcall s ri lr r pc) (Icall s ri lr r pc')
  | mfi_tailcall s ri lr:
      match_final_inst dupmap (Btailcall s ri lr) (Itailcall s ri lr)
  | mfi_builtin pc pc' ef la br:
      dupmap!pc = (Some pc') -> match_final_inst dupmap (Bbuiltin ef la br pc) (Ibuiltin ef la br pc')
  | mfi_jumptable ln ln' r:
      list_forall2 (fun pc pc' => (dupmap!pc = (Some pc'))) ln ln' ->
      match_final_inst dupmap (Bjumptable r ln) (Ijumptable r ln')
.

Inductive is_join_opt {A}: (option A) -> (option A) -> (option A) -> Prop :=
  | ijo_None_left o: is_join_opt None o o
  | ijo_None_right o: is_join_opt o None o
  | ijo_Some x: is_join_opt (Some x) (Some x) (Some x)
  .

(* [match_iblock dupmap cfg isfst pc ib opc] means that [ib] match a block in a RTL code starting at [pc], with:
   - [isfst] (in "input") indicates that no step in the surrounding block has been executed before entering [pc]
   - if [opc] (in "ouput") is [None], this means that all branches of the block ends on a final instruction
   - if [opc] is [Some pc'], this means that all branches of the block that do not exit, join on [pc'].
*)
Inductive match_iblock (dupmap: PTree.t node) (cfg: RTL.code): bool -> node -> iblock -> (option node) -> Prop :=
  | mib_BF isfst fi pc i:
      cfg!pc = Some i ->
      match_final_inst dupmap fi i ->
      match_iblock dupmap cfg isfst pc (BF fi) None
  | mib_nop isfst pc pc':
      cfg!pc = Some (Inop pc') ->
      match_iblock dupmap cfg isfst pc Bnop (Some pc')
  | mib_op isfst pc pc' op lr r:
      cfg!pc = Some (Iop op lr r pc') ->
      match_iblock dupmap cfg isfst pc (Bop op lr r) (Some pc')
  | mib_load isfst pc pc' m a lr r:
      cfg!pc = Some (Iload TRAP m a lr r pc') -> 
      match_iblock dupmap cfg isfst pc (Bload TRAP m a lr r) (Some pc')
  | mib_store isfst pc pc' m a lr r:
      cfg!pc = Some (Istore m a lr r pc') -> 
      match_iblock dupmap cfg isfst pc (Bstore m a lr r) (Some pc')
  | mib_exit pc pc': 
      dupmap!pc = (Some pc') ->
      match_iblock dupmap cfg false pc' (Bgoto pc) None
      (* NB: on RTL side, we exit the block by a "basic" instruction (or Icond). 
         Thus some step should have been executed before [pc'] in the RTL code *)
  | mib_seq_Some isfst b1 b2 pc1 pc2 opc:
      match_iblock dupmap cfg isfst pc1 b1 (Some pc2) ->
      match_iblock dupmap cfg false pc2 b2 opc ->
      match_iblock dupmap cfg isfst pc1 (Bseq b1 b2) opc
(*  | mib_seq_None isfst b1 b2 pc:
      match_iblock dupmap cfg isfst pc b1 None -> 
      match_iblock dupmap cfg isfst (Bseq b1 b2) pc None
      (* TODO: here [b2] is dead code ! Is this case really useful ? 
         The oracle could remove this dead code.
         And the verifier could fail if there is such dead code!
      *)
*)
  | mib_cond isfst c lr bso bnot pcso pcnot pc opc1 opc2 opc i i':
      cfg!pc = Some (Icond c lr pcso pcnot i') ->
      match_iblock dupmap cfg false pcso bso opc1 ->
      match_iblock dupmap cfg false pcnot bnot opc2 ->
      is_join_opt opc1 opc2 opc ->
      match_iblock dupmap cfg isfst pc (Bcond c lr bso bnot i) opc
  .

Definition match_cfg dupmap (cfg: code) (cfg':RTL.code): Prop :=
    forall pc pc', dupmap!pc = Some pc' ->
    exists ib, cfg!pc = Some ib /\ match_iblock dupmap cfg' true pc' ib.(entry) None.

(** Shared verifier between RTL -> BTL and BTL -> RTL *)

Local Open Scope error_monad_scope.

Definition verify_is_copy dupmap n n' :=
  match dupmap!n with
  | None => Error(msg "verify_is_copy None")
  | Some revn => match (Pos.compare n' revn) with Eq => OK tt | _ => Error(msg "verify_is_copy invalid map") end
  end.

Fixpoint verify_is_copy_list dupmap ln ln' :=
  match ln with
  | n::ln => match ln' with
             | n'::ln' => do _ <- verify_is_copy dupmap n n';
                          verify_is_copy_list dupmap ln ln'
             | nil => Error (msg "verify_is_copy_list: ln' bigger than ln") end
  | nil => match ln' with
          | n :: ln' => Error (msg "verify_is_copy_list: ln bigger than ln'")
          | nil => OK tt end
  end.

Lemma verify_is_copy_correct dupmap n n' tt:
  verify_is_copy dupmap n n' = OK tt ->
  dupmap ! n = Some n'.
Proof.
  unfold verify_is_copy; repeat autodestruct.
  intros NP H; destruct (_ ! n) eqn:REVM; [|inversion H].
  eapply Pos.compare_eq in NP. congruence.
Qed.
Local Hint Resolve verify_is_copy_correct: core.

Lemma verify_is_copy_list_correct dupmap ln: forall ln' tt,
  verify_is_copy_list dupmap ln ln' = OK tt ->
  list_forall2 (fun n n' => dupmap ! n = Some n') ln ln'.
Proof.
  induction ln.
  - intros. destruct ln'; monadInv H. constructor.
  - intros. destruct ln'; monadInv H. constructor; eauto.
Qed.

(* TODO Copied from duplicate, should we import ? *)
Lemma product_eq {A B: Type} :
  (forall (a b: A), {a=b} + {a<>b}) ->
  (forall (c d: B), {c=d} + {c<>d}) ->
  forall (x y: A+B), {x=y} + {x<>y}.
Proof.
  intros H H'. intros. decide equality.
Qed.

(* TODO Copied from duplicate, should we import ? *)
(** FIXME Ideally i would like to put this in AST.v but i get an "illegal application"
 * error when doing so *)
Remark builtin_arg_eq_pos: forall (a b: builtin_arg positive), {a=b} + {a<>b}.
Proof.
  intros.
  apply (builtin_arg_eq Pos.eq_dec).
Defined.
Global Opaque builtin_arg_eq_pos.

(* TODO Copied from duplicate, should we import ? *)
Remark builtin_res_eq_pos: forall (a b: builtin_res positive), {a=b} + {a<>b}.
Proof. intros. apply (builtin_res_eq Pos.eq_dec). Qed.
Global Opaque builtin_res_eq_pos.

Fixpoint verify_block (dupmap: PTree.t node) cfg isfst pc ib : res (option node) :=
  match ib with
  | BF fi =>
      match fi with
      | Bgoto pc1 =>
          do u <- verify_is_copy dupmap pc1 pc;
          if negb isfst then
            OK None
          else Error (msg "verify_block: isfst is true Bgoto")
      | Breturn or =>
          match cfg!pc with
          | Some (Ireturn or') =>
              if option_eq Pos.eq_dec or or' then OK None
              else Error (msg "verify_block: different opt reg in Breturn")
          | _ => Error (msg "verify_block: incorrect cfg Breturn")
          end
      | Bcall s ri lr r pc1 =>
          match cfg!pc with
          | Some (Icall s' ri' lr' r' pc2) =>
              do u <- verify_is_copy dupmap pc1 pc2;
              if (signature_eq s s') then
                if (product_eq Pos.eq_dec ident_eq ri ri') then
                  if (list_eq_dec Pos.eq_dec lr lr') then
                    if (Pos.eq_dec r r') then OK None
                    else Error (msg "verify_block: different r r' in Bcall")
                  else Error (msg "verify_block: different lr in Bcall")
                else Error (msg "verify_block: different ri in Bcall")
              else Error (msg "verify_block: different signatures in Bcall")
          | _ => Error (msg "verify_block: incorrect cfg Bcall")
          end
      | Btailcall s ri lr =>
          match cfg!pc with
          | Some (Itailcall s' ri' lr') =>
              if (signature_eq s s') then
                if (product_eq Pos.eq_dec ident_eq ri ri') then
                  if (list_eq_dec Pos.eq_dec lr lr') then OK None
                  else Error (msg "verify_block: different lr in Btailcall")
                else Error (msg "verify_block: different ri in Btailcall")
              else Error (msg "verify_block: different signatures in Btailcall")
          | _ => Error (msg "verify_block: incorrect cfg Btailcall")
          end
      | Bbuiltin ef la br pc1 =>
          match cfg!pc with
          | Some (Ibuiltin ef' la' br' pc2) =>
              do u <- verify_is_copy dupmap pc1 pc2;
              if (external_function_eq ef ef') then
                if (list_eq_dec builtin_arg_eq_pos la la') then
                  if (builtin_res_eq_pos br br') then OK None
                  else Error (msg "verify_block: different brr in Bbuiltin")
                else Error (msg "verify_block: different lbar in Bbuiltin")
              else Error (msg "verify_block: different ef in Bbuiltin")
          | _ => Error (msg "verify_block: incorrect cfg Bbuiltin")
          end
      | Bjumptable r ln =>
          match cfg!pc with
          | Some (Ijumptable r' ln') =>
              do u <- verify_is_copy_list dupmap ln ln';
              if (Pos.eq_dec r r') then OK None
              else Error (msg "verify_block: different r in Bjumptable")
          | _ => Error (msg "verify_block: incorrect cfg Bjumptable")
          end
      end
  | Bnop =>
      match cfg!pc with
      | Some (Inop pc') => OK (Some pc')
      | _ => Error (msg "verify_block: incorrect cfg Bnop")
      end
  | Bop op lr r =>
      match cfg!pc with
      | Some (Iop op' lr' r' pc') =>
          if (eq_operation op op') then
            if (list_eq_dec Pos.eq_dec lr lr') then
              if (Pos.eq_dec r r') then
                OK (Some pc')
              else Error (msg "verify_block: different r in Bop")
            else Error (msg "verify_block: different lr in Bop")
          else Error (msg "verify_block: different operations in Bop")
      | _ => Error (msg "verify_block: incorrect cfg Bop")
      end
  | Bload tm m a lr r =>
      match cfg!pc with
      | Some (Iload tm' m' a' lr' r' pc') =>
          if (trapping_mode_eq tm TRAP && trapping_mode_eq tm' TRAP) then
            if (chunk_eq m m') then
              if (eq_addressing a a') then
                if (list_eq_dec Pos.eq_dec lr lr') then
                  if (Pos.eq_dec r r') then
                    OK (Some pc')
                  else Error (msg "verify_block: different r in Bload")
                else Error (msg "verify_block: different lr in Bload")
              else Error (msg "verify_block: different addressing in Bload")
            else Error (msg "verify_block: different mchunk in Bload")
          else Error (msg "verify_block: NOTRAP trapping_mode unsupported in Bload")
      | _ => Error (msg "verify_block: incorrect cfg Bload")
      end
  | Bstore m a lr r =>
      match cfg!pc with
      | Some (Istore m' a' lr' r' pc') =>
          if (chunk_eq m m') then
            if (eq_addressing a a') then
              if (list_eq_dec Pos.eq_dec lr lr') then
                if (Pos.eq_dec r r') then OK (Some pc')
                else Error (msg "verify_block: different r in Bstore")
              else Error (msg "verify_block: different lr in Bstore")
            else Error (msg "verify_block: different addressing in Bstore")
          else Error (msg "verify_block: different mchunk in Bstore")
      | _ => Error (msg "verify_block: incorrect cfg Bstore")
      end
  | Bseq b1 b2 =>
      do opc <- verify_block dupmap cfg isfst pc b1;
      match opc with
      | Some pc' =>
          verify_block dupmap cfg false pc' b2
      | None => Error (msg "verify_block: None next pc in Bseq (deadcode)")
      end
  | Bcond c lr bso bnot i =>
      match cfg!pc with
      | Some (Icond c' lr' pcso pcnot i') =>
          if (list_eq_dec Pos.eq_dec lr lr') then
            if (eq_condition c c') then
              do opc1 <- verify_block dupmap cfg false pcso bso;
              do opc2 <- verify_block dupmap cfg false pcnot bnot;
              match opc1, opc2 with
              | None, o => OK o
              | o, None => OK o
              | Some x, Some x' =>
                  if Pos.eq_dec x x' then OK (Some x)
                  else Error (msg "verify_block: is_join_opt incorrect for Bcond")
              end
            else Error (msg "verify_block: incompatible conditions in Bcond")
          else Error (msg "verify_block: different lr in Bcond")
      | _ => Error (msg "verify_block: incorrect cfg Bcond")
      end
  end.

(* This property expresses that "relation" [match_iblock] is a partial function (see also [iblock_istep_run_equiv] above) *)
Lemma verify_block_correct dupmap cfg ib: forall pc isfst fin,
   verify_block dupmap cfg isfst pc ib = (OK fin) -> match_iblock dupmap cfg isfst pc ib fin.
Proof.
  induction ib; intros;
  try (unfold verify_block in H; destruct (cfg!pc) eqn:EQCFG; [ idtac | discriminate; fail ]).
  - (* BF *)
    destruct fi; unfold verify_block in H.
    + (* Bgoto *)
      monadInv H.
      destruct (isfst); simpl in EQ0; inv EQ0.
      eapply verify_is_copy_correct in EQ.
      constructor; assumption.
    + (* Breturn *)
      destruct (cfg!pc) eqn:EQCFG; try destruct i; try discriminate.
      destruct (option_eq _ _ _); try discriminate. inv H.
      eapply mib_BF; eauto. constructor.
    + (* Bcall *)
      destruct (cfg!pc) eqn:EQCFG; try destruct i; try discriminate.
      monadInv H.
      eapply verify_is_copy_correct in EQ.
      destruct (signature_eq _ _); try discriminate.
      destruct (product_eq _ _ _ _); try discriminate.
      destruct (list_eq_dec _ _ _); try discriminate.
      destruct (Pos.eq_dec _ _); try discriminate. subst.
      inv EQ0. eapply mib_BF; eauto. constructor; assumption.
    + (* Btailcall *)
      destruct (cfg!pc) eqn:EQCFG; try destruct i; try discriminate.
      destruct (signature_eq _ _); try discriminate.
      destruct (product_eq _ _ _ _); try discriminate.
      destruct (list_eq_dec _ _ _); try discriminate. subst.
      inv H. eapply mib_BF; eauto. constructor.
    + (* Bbuiltin *)
      destruct (cfg!pc) eqn:EQCFG; try destruct i; try discriminate.
      monadInv H.
      eapply verify_is_copy_correct in EQ.
      destruct (external_function_eq _ _); try discriminate.
      destruct (list_eq_dec _ _ _); try discriminate.
      destruct (builtin_res_eq_pos _ _); try discriminate. subst.
      inv EQ0. eapply mib_BF; eauto. constructor; assumption.
    + (* Bjumptable *)
      destruct (cfg!pc) eqn:EQCFG; try destruct i; try discriminate.
      monadInv H.
      eapply verify_is_copy_list_correct in EQ.
      destruct (Pos.eq_dec _ _); try discriminate. subst.
      inv EQ0. eapply mib_BF; eauto. constructor; assumption.
  - (* Bnop *)
    destruct i; inv H.
    constructor; assumption.
  - (* Bop *)
    destruct i; try discriminate.
    destruct (eq_operation _ _); try discriminate.
    destruct (list_eq_dec _ _ _); try discriminate.
    destruct (Pos.eq_dec _ _); try discriminate. inv H.
    constructor; assumption.
  - (* Bload *)
    destruct i; try discriminate.
    do 2 (destruct (trapping_mode_eq _ _); try discriminate).
    simpl in H.
    destruct (chunk_eq _ _); try discriminate.
    destruct (eq_addressing _ _); try discriminate.
    destruct (list_eq_dec _ _ _); try discriminate.
    destruct (Pos.eq_dec _ _); try discriminate. inv H.
    constructor; assumption.
  - (* Bstore *)
    destruct i; try discriminate.
    destruct (chunk_eq _ _); try discriminate.
    destruct (eq_addressing _ _); try discriminate.
    destruct (list_eq_dec _ _ _); try discriminate.
    destruct (Pos.eq_dec _ _); try discriminate. inv H.
    constructor; assumption.
  - (* Bseq *)
    monadInv H.
    destruct x; try discriminate.
    eapply mib_seq_Some.
    eapply IHib1; eauto.
    eapply IHib2; eauto.
  - (* Bcond *)
    destruct i; try discriminate.
    destruct (list_eq_dec _ _ _); try discriminate.
    destruct (eq_condition _ _); try discriminate.
    fold (verify_block dupmap cfg false n ib1) in H.
    fold (verify_block dupmap cfg false n0 ib2) in H.
    monadInv H.
    destruct x, x0; try destruct (Pos.eq_dec _ _); try discriminate.
    all: inv EQ2; eapply mib_cond; eauto; econstructor.
Qed.
Local Hint Resolve verify_block_correct: core.

Fixpoint verify_blocks dupmap (cfg: code) (cfg':RTL.code) l: res unit :=
  match l with
  | nil => OK tt
  | (pc, pc')::l =>
    match cfg!pc with
    | Some ib => do o <- verify_block dupmap cfg' true pc' ib.(entry);
                 match o with
                 | None => verify_blocks dupmap cfg cfg' l
                 | _ => Error(msg "verify_blocks.end")
                 end
    | _ => Error(msg "verify_blocks.entry")
    end
  end.

Definition verify_cfg dupmap (cfg: code) (cfg':RTL.code): res unit :=
  verify_blocks dupmap cfg cfg' (PTree.elements dupmap).

Lemma verify_cfg_correct dupmap cfg cfg' tt:
  verify_cfg dupmap cfg cfg' = OK tt ->
  match_cfg dupmap cfg cfg'.
Proof.
  unfold verify_cfg. 
  intros X pc pc' H; generalize X; clear X.
  exploit PTree.elements_correct; eauto.
  generalize tt pc pc' H; clear tt pc pc' H.
  generalize (PTree.elements dupmap).
  induction l as [|[pc1 pc1']l]; simpl.
  - tauto.
  - intros pc pc' DUP u H. 
    unfold bind. 
    repeat autodestruct.
    intros; subst.
    destruct H as [H|H]; eauto.
    inversion H; subst.
    eexists; split; eauto.
Qed.

Definition verify_function dupmap f f' : res unit :=
  do _ <- verify_is_copy dupmap (fn_entrypoint f) (RTL.fn_entrypoint f');
  verify_cfg dupmap (fn_code f) (RTL.fn_code f').

Definition is_goto (ib: iblock): bool :=
  match ib with
  | Bgoto _ => true
  | _ => false
  end.

Definition is_atom (ib: iblock): bool :=
 match ib with
 | Bseq _ _ | Bcond _ _ _ _ _ => false 
 | _ => true
 end.

(** Is expand property to only support atomic instructions on the left part of a Bseq *)
Inductive is_expand: iblock -> Prop :=
  | exp_Bseq ib1 ib2:
     is_atom ib1 = true ->
     is_expand ib2 -> 
     is_expand (Bseq ib1 ib2)
  | exp_Bcond cond args ib1 ib2 i:
     is_expand ib1 ->
     is_expand ib2 -> 
     is_expand (Bcond cond args ib1 ib2 i)
  | exp_others ib:
     is_atom ib = true ->
     is_expand ib
     .
Local Hint Constructors is_expand: core.

Fixpoint expand (ib: iblock) (k: option iblock): iblock :=
  match ib with
  | Bseq ib1 ib2 => expand ib1 (Some (expand ib2 k))
  | Bcond cond args ib1 ib2 i => 
     Bcond cond args (expand ib1 k) (expand ib2 k) i
  | BF fin => fin
  | ib => 
    match k with 
    | None => ib
    | Some rem => Bseq ib rem
    end
  end.

Lemma expand_correct ib: forall k,
  (match k with Some rem => is_expand rem | None => True end)
  -> is_expand (expand ib k).
Proof.
  induction ib; simpl; intros; try autodestruct; auto.
Qed.