MenhirLib/Interpreter.v


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465

(****************************************************************************)
(*                                                                          *)
(*                                   Menhir                                 *)
(*                                                                          *)
(*           Jacques-Henri Jourdan, CNRS, LRI, Université Paris Sud         *)
(*                                                                          *)
(*  Copyright Inria. All rights reserved. This file is distributed under    *)
(*  the terms of the GNU Lesser General Public License as published by the  *)
(*  Free Software Foundation, either version 3 of the License, or (at your  *)
(*  option) any later version, as described in the file LICENSE.            *)
(*                                                                          *)
(****************************************************************************)

From Coq Require Import List Syntax.
Import ListNotations.
From Coq.ssr Require Import ssreflect.
Require Automaton.
Require Import Alphabet Grammar Validator_safe.

Module Make(Import A:Automaton.T).
Module Import ValidSafe := Validator_safe.Make A.

(** A few helpers for dependent types. *)

(** Decidable propositions. *)
Class Decidable (P : Prop) := decide : {P} + {~P}.
Arguments decide _ {_}.

(** A [Comparable] type has decidable equality. *)
Global Instance comparable_decidable_eq T `{ComparableLeibnizEq T} (x y : T) :
  Decidable (x = y).
Proof.
  unfold Decidable.
  destruct (compare x y) eqn:EQ; [left; apply compare_eq; intuition | ..];
    right; intros ->; by rewrite compare_refl in EQ.
Defined.

Global Instance list_decidable_eq T :
  (forall x y : T, Decidable (x = y)) ->
  (forall l1 l2 : list T, Decidable (l1 = l2)).
Proof. unfold Decidable. decide equality. Defined.

Ltac subst_existT :=
  repeat
    match goal with
    | _ => progress subst
    | H : @existT ?A ?P ?x ?y1 = @existT ?A ?P ?x ?y2 |- _ =>
      let DEC := fresh in
      assert (DEC : forall u1 u2 : A, Decidable (u1 = u2)) by apply _;
      apply Eqdep_dec.inj_pair2_eq_dec in H; [|by apply DEC];
      clear DEC
    end.

(** The interpreter is written using dependent types. In order to
  avoid reducing proof terms while executing the parser, we thunk all
  the propositions behind an arrow.
  Note that thunkP is still in Prop so that it is erased by
  extraction.
 *)
Definition thunkP (P : Prop) : Prop := True -> P.

(** Sometimes, we actually need a reduced proof in a program (for
  example when using an equality to cast a value). In that case,
  instead of reducing the proof we already have, we reprove the
  assertion by using decidability. *)
Definition reprove {P} `{Decidable P} (p : thunkP P) : P :=
  match decide P with
  | left p => p
  | right np => False_ind _ (np (p I))
  end.

(** Combination of reprove with eq_rect. *)
Definition cast {T : Type} (F : T -> Type) {x y : T} (eq : thunkP (x = y))
                {DEC : unit -> Decidable (x = y)}:
                F x -> F y :=
  fun a => eq_rect x F a y (@reprove _ (DEC ()) eq).

Lemma cast_eq T F (x : T) (eq : thunkP (x = x)) `{forall x y, Decidable (x = y)} a :
  cast F eq a = a.
Proof. by rewrite /cast -Eqdep_dec.eq_rect_eq_dec. Qed.

(** Input buffers and operations on them. **)
CoInductive buffer : Type :=
  Buf_cons { buf_head : token; buf_tail : buffer }.

(* Note: Coq 8.12.0 wants a Declare Scope command,
   but this breaks compatibility with Coq < 8.10.
   Declare Scope buffer_scope. *)
Delimit Scope buffer_scope with buf.
Bind Scope buffer_scope with buffer.

Infix "::" := Buf_cons (at level 60, right associativity) : buffer_scope.

(** Concatenation of a list and an input buffer **)
Fixpoint app_buf (l:list token) (buf:buffer) :=
  match l with
    | nil => buf
    | cons t q => (t :: app_buf q buf)%buf
  end.
Infix "++" := app_buf (at level 60, right associativity) : buffer_scope.

Lemma app_buf_assoc (l1 l2:list token) (buf:buffer) :
  (l1 ++ (l2 ++ buf) = (l1 ++ l2) ++ buf)%buf.
Proof. induction l1 as [|?? IH]=>//=. rewrite IH //. Qed.

(** The type of a non initial state: the type of semantic values associated
   with the last symbol of this state. *)
Definition noninitstate_type state :=
  symbol_semantic_type (last_symb_of_non_init_state state).

(** The stack of the automaton : it can be either nil or contains a non
    initial state, a semantic value for the symbol associted with this state,
    and a nested stack. **)
Definition stack := list (sigT noninitstate_type). (* eg. list {state & state_type state} *)

Section Interpreter.

Hypothesis safe: safe.

(* Properties of the automaton deduced from safety validation. *)
Proposition shift_head_symbs: shift_head_symbs.
Proof. pose proof safe; unfold ValidSafe.safe in H; intuition. Qed.
Proposition goto_head_symbs: goto_head_symbs.
Proof. pose proof safe; unfold ValidSafe.safe in H; intuition. Qed.
Proposition shift_past_state: shift_past_state.
Proof. pose proof safe; unfold ValidSafe.safe in H; intuition. Qed.
Proposition goto_past_state: goto_past_state.
Proof. pose proof safe; unfold ValidSafe.safe in H; intuition. Qed.
Proposition reduce_ok: reduce_ok.
Proof. pose proof safe; unfold ValidSafe.safe in H; intuition. Qed.

Variable init : initstate.

(** The top state of a stack **)
Definition state_of_stack (stack:stack): state :=
  match stack with
    | [] => init
    | existT _ s _::_ => s
  end.

(** The stack of states of an automaton stack **)
Definition state_stack_of_stack (stack:stack) :=
  (List.map
    (fun cell:sigT noninitstate_type => singleton_state_pred (projT1 cell))
    stack ++ [singleton_state_pred init])%list.

(** The stack of symbols of an automaton stack **)
Definition symb_stack_of_stack (stack:stack) :=
  List.map
    (fun cell:sigT noninitstate_type => last_symb_of_non_init_state (projT1 cell))
    stack.

(** The stack invariant : it basically states that the assumptions on the
    states are true. **)
Inductive stack_invariant: stack -> Prop :=
  | stack_invariant_constr:
    forall stack,
      prefix      (head_symbs_of_state (state_of_stack stack))
                  (symb_stack_of_stack stack) ->
      prefix_pred (head_states_of_state (state_of_stack stack))
                  (state_stack_of_stack stack) ->
      stack_invariant_next stack ->
      stack_invariant stack
with stack_invariant_next: stack -> Prop :=
  | stack_invariant_next_nil:
      stack_invariant_next []
  | stack_invariant_next_cons:
    forall state_cur st stack_rec,
      stack_invariant stack_rec ->
      stack_invariant_next (existT _ state_cur st::stack_rec).

(** [pop] pops some symbols from the stack. It returns the popped semantic
    values using [sem_popped] as an accumulator and discards the popped
    states.**)
Fixpoint pop (symbols_to_pop:list symbol) {A:Type} (stk:stack) :
  thunkP (prefix symbols_to_pop (symb_stack_of_stack stk)) ->
  forall (action:arrows_right A (map symbol_semantic_type symbols_to_pop)),
    stack * A.
unshelve refine
  (match symbols_to_pop
      return
         (thunkP (prefix symbols_to_pop (symb_stack_of_stack stk))) ->
         forall (action:arrows_right A (map _ symbols_to_pop)), stack * A
   with
     | [] => fun _ action => (stk, action)
     | t::q => fun Hp action =>
       match stk
          return thunkP (prefix (t::q) (symb_stack_of_stack stk)) -> stack * A
       with
         | existT _ state_cur sem::stack_rec => fun Hp =>
           let sem_conv := cast symbol_semantic_type _ sem in
           pop q _ stack_rec _ (action sem_conv)
         | [] => fun Hp => False_rect _ _
       end Hp
   end).
Proof.
  - simpl in Hp. clear -Hp. abstract (intros _ ; specialize (Hp I); now inversion Hp).
  - clear -Hp. abstract (specialize (Hp I); now inversion Hp).
  - simpl in Hp. clear -Hp. abstract (intros _ ; specialize (Hp I); now inversion Hp).
Defined.

(* Equivalent declarative specification for pop, so that we avoid
   (part of) the dependent types nightmare. *)
Inductive pop_spec {A:Type} :
    forall (symbols_to_pop:list symbol) (stk : stack)
           (action : arrows_right A (map symbol_semantic_type symbols_to_pop))
           (stk' : stack) (sem : A),
      Prop :=
  | Nil_pop_spec stk sem : pop_spec [] stk sem stk sem
  | Cons_pop_spec symbols_to_pop st stk action sem stk' res :
      pop_spec symbols_to_pop stk (action sem) stk' res ->
      pop_spec (last_symb_of_non_init_state st::symbols_to_pop)
               (existT _ st sem :: stk) action stk' res.

Lemma pop_spec_ok {A:Type} symbols_to_pop stk Hp action stk' res:
  pop symbols_to_pop stk Hp action = (stk', res) <->
  pop_spec (A:=A) symbols_to_pop stk action stk' res.
Proof.
  revert stk Hp action.
  induction symbols_to_pop as [|t symbols_to_pop IH]=>stk Hp action /=.
  - split.
    + intros [= <- <-]. constructor.
    + intros H. inversion H. by subst_existT.
  - destruct stk as [|[st sem]]=>/=; [by destruct pop_subproof0|].
    remember (pop_subproof t symbols_to_pop stk st Hp) as EQ eqn:eq. clear eq.
    generalize EQ. revert Hp action. rewrite <-(EQ I)=>Hp action ?.
    rewrite cast_eq. rewrite IH. split.
    + intros. by constructor.
    + intros H. inversion H. by subst_existT.
Qed.


Lemma pop_preserves_invariant symbols_to_pop stk Hp A action :
  stack_invariant stk ->
  stack_invariant (fst (pop symbols_to_pop stk Hp (A:=A) action)).
Proof.
  revert stk Hp A action. induction symbols_to_pop as [|t q IH]=>//=.
  intros stk Hp A action Hi.
  destruct Hi as [stack Hp' Hpp [|state st stk']].
  - destruct pop_subproof0.
  - now apply IH.
Qed.

Lemma pop_state_valid symbols_to_pop stk Hp A action lpred :
  prefix_pred lpred (state_stack_of_stack stk) ->
  let stk' := fst (pop symbols_to_pop stk Hp (A:=A) action) in
  state_valid_after_pop (state_of_stack stk') symbols_to_pop lpred.
Proof.
  revert stk Hp A action lpred. induction symbols_to_pop as [|t q IH]=>/=.
  - intros stk Hp A a lpred Hpp. destruct lpred as [|pred lpred]; constructor.
    inversion Hpp as [|? lpred' ? pred' Himpl Hpp' eq1 eq2]; subst.
    specialize (Himpl (state_of_stack stk)).
    destruct (pred' (state_of_stack stk)) as [] eqn:Heqpred'=>//.
    destruct stk as [|[]]; simpl in *.
    + inversion eq2; subst; clear eq2.
      unfold singleton_state_pred in Heqpred'.
      now rewrite compare_refl in Heqpred'; discriminate.
    + inversion eq2; subst; clear eq2.
      unfold singleton_state_pred in Heqpred'.
      now rewrite compare_refl in Heqpred'; discriminate.
  - intros stk Hp A a lpred Hpp. destruct stk as [|[] stk]=>//=.
    + destruct pop_subproof0.
    + destruct lpred as [|pred lpred]; [by constructor|].
      constructor. apply IH. by inversion Hpp.
Qed.

(** [step_result] represents the result of one step of the automaton : it can
    fail, accept or progress. [Fail_sr] means that the input is incorrect.
    [Accept_sr] means that this is the last step of the automaton, and it
    returns the semantic value of the input word. [Progress_sr] means that
    some progress has been made, but new steps are needed in order to accept
    a word.

    For [Accept_sr] and [Progress_sr], the result contains the new input buffer.

    [Fail_sr] means that the input word is rejected by the automaton. It is
    different to [Err] (from the error monad), which mean that the automaton is
    bogus and has perfomed a forbidden action. **)
Inductive step_result :=
  | Fail_sr_full: state -> token -> step_result
  | Accept_sr: symbol_semantic_type (NT (start_nt init)) -> buffer -> step_result
  | Progress_sr: stack -> buffer -> step_result.


(** [reduce_step] does a reduce action :
   - pops some elements from the stack
   - execute the action of the production
   - follows the goto for the produced non terminal symbol **)
Definition reduce_step stk prod (buffer : buffer)
        (Hval : thunkP (valid_for_reduce (state_of_stack stk) prod))
        (Hi : thunkP (stack_invariant stk))
  : step_result.
refine
  ((let '(stk', sem) as ss := pop (prod_rhs_rev prod) stk _ (prod_action prod)
      return thunkP (state_valid_after_pop (state_of_stack (fst ss)) _
                               (head_states_of_state (state_of_stack stk))) -> _
    in fun Hval' =>
    match goto_table (state_of_stack stk') (prod_lhs prod) as goto
      return (thunkP (goto = None ->
                      match state_of_stack stk' with
                      | Init i => prod_lhs prod = start_nt i
                      | Ninit _ => False
                      end)) -> _
    with
    | Some (exist _ state_new e) => fun _ =>
      let sem := eq_rect _ _ sem _ e in
      Progress_sr (existT noninitstate_type state_new sem::stk') buffer
    | None => fun Hval =>
      let sem := cast symbol_semantic_type _ sem in
      Accept_sr sem buffer
    end (fun _ => _))
   (fun _ => pop_state_valid _ _ _ _ _ _ _)).
Proof.
  - clear -Hi Hval.
    abstract (intros _; destruct Hi=>//; eapply prefix_trans; [by apply Hval|eassumption]).
  - clear -Hval.
    abstract (intros _; f_equal; specialize (Hval I eq_refl); destruct stk' as [|[]]=>//).
  - simpl in Hval'. clear -Hval Hval'.
    abstract (move : Hval => /(_ I) [_ /(_ _ (Hval' I))] Hval2 Hgoto; by rewrite Hgoto in Hval2).
  - clear -Hi. abstract by destruct Hi.
Defined.

Lemma reduce_step_stack_invariant_preserved stk prod buffer Hv Hi stk' buffer':
  reduce_step stk prod buffer Hv Hi = Progress_sr stk' buffer' ->
  stack_invariant stk'.
Proof.
  unfold reduce_step.
  match goal with
  | |- context [pop ?symbols_to_pop stk ?Hp ?action] =>
    assert (Hi':=pop_preserves_invariant symbols_to_pop stk Hp _ action (Hi I));
    generalize (pop_state_valid symbols_to_pop stk Hp _ action)
  end.
  destruct pop as [stk0 sem]=>/=. simpl in Hi'. intros Hv'.
  assert (Hgoto1:=goto_head_symbs (state_of_stack stk0) (prod_lhs prod)).
  assert (Hgoto2:=goto_past_state (state_of_stack stk0) (prod_lhs prod)).
  match goal with | |- context [fun _ : True => ?X] => generalize X end.
  destruct goto_table as [[state_new e]|] eqn:EQgoto=>//.
  intros _ [= <- <-]. constructor=>/=.
  - constructor. eapply prefix_trans. apply Hgoto1. by destruct Hi'.
  - unfold state_stack_of_stack; simpl; constructor.
    + intros ?. by destruct singleton_state_pred.
    + eapply prefix_pred_trans. apply Hgoto2. by destruct Hi'.
  - by constructor.
Qed.

(** One step of parsing. **)
Definition step stk buffer (Hi : thunkP (stack_invariant stk)): step_result :=
  match action_table (state_of_stack stk) as a return
    thunkP
      match a return Prop with
      | Default_reduce_act prod => _
      | Lookahead_act awt => forall t : terminal,
        match awt t with
        | Reduce_act p => _
        | _ => True
        end
      end -> _
  with
  | Default_reduce_act prod => fun Hv =>
    reduce_step stk prod buffer Hv Hi
  | Lookahead_act awt => fun Hv =>
    match buf_head buffer with
    | tok =>
      match awt (token_term tok) as a return
        thunkP match a return Prop with Reduce_act p => _ | _ => _ end -> _
      with
      | Shift_act state_new e => fun _ =>
        let sem_conv := eq_rect _ symbol_semantic_type (token_sem tok) _ e in
        Progress_sr (existT noninitstate_type state_new sem_conv::stk)
                    (buf_tail buffer)
      | Reduce_act prod => fun Hv =>
        reduce_step stk prod buffer Hv Hi
      | Fail_act => fun _ =>
        Fail_sr_full (state_of_stack stk) tok
      end (fun _ => Hv I (token_term tok))
    end
  end (fun _ => reduce_ok _).

Lemma step_stack_invariant_preserved stk buffer Hi stk' buffer':
  step stk buffer Hi = Progress_sr stk' buffer' ->
  stack_invariant stk'.
Proof.
  unfold step.
  generalize (reduce_ok (state_of_stack stk))=>Hred.
  assert (Hshift1 := shift_head_symbs (state_of_stack stk)).
  assert (Hshift2 := shift_past_state (state_of_stack stk)).
  destruct action_table as [prod|awt]=>/=.
  - eauto using reduce_step_stack_invariant_preserved.
  - set (term := token_term (buf_head buffer)).
    generalize (Hred term). clear Hred. intros Hred.
    specialize (Hshift1 term). specialize (Hshift2 term).
    destruct (awt term) as [state_new e|prod|]=>//.
    + intros [= <- <-]. constructor=>/=.
      * constructor. eapply prefix_trans. apply Hshift1. by destruct Hi.
      * unfold state_stack_of_stack; simpl; constructor.
        -- intros ?. by destruct singleton_state_pred.
        -- eapply prefix_pred_trans. apply Hshift2. by destruct Hi.
      * constructor; by apply Hi.
    + eauto using reduce_step_stack_invariant_preserved.
Qed.

(** The parsing use a [nat] fuel parameter [log_n_steps], so that we
    do not have to prove terminaison, which is difficult.

    Note that [log_n_steps] is *not* the fuel in the conventionnal
    sense: this parameter contains the logarithm (in base 2) of the
    number of steps to perform. Hence, a value of, e.g., 50 will
    usually be enough to ensure termination. *)
Fixpoint parse_fix stk buffer (log_n_steps : nat) (Hi : thunkP (stack_invariant stk)):
  { sr : step_result |
    forall stk' buffer', sr = Progress_sr stk' buffer' -> stack_invariant stk' } :=
  match log_n_steps with
  | O => exist _ (step stk buffer Hi)
                 (step_stack_invariant_preserved _ _ Hi)
  | S log_n_steps =>
    match parse_fix stk buffer log_n_steps Hi with
    | exist _ (Progress_sr stk buffer) Hi' =>
      parse_fix stk buffer log_n_steps (fun _ => Hi' _ buffer eq_refl)
    | sr => sr
    end
  end.

(** The final result of a parsing is either a failure (the automaton
    has rejected the input word), either a timeout (the automaton has
    spent all the given [2^log_n_steps]), either a parsed semantic value
    with a rest of the input buffer.

    Note that we do not make parse_result depend on start_nt for the
    result type, so that this inductive is extracted without the use
    of Obj.t in OCaml.  **)
Inductive parse_result {A : Type} :=
  | Fail_pr_full: state -> token -> parse_result
  | Timeout_pr: parse_result
  | Parsed_pr: A -> buffer -> parse_result.
Global Arguments parse_result _ : clear implicits.

Definition parse (buffer : buffer) (log_n_steps : nat):
  parse_result (symbol_semantic_type (NT (start_nt init))).
refine (match proj1_sig (parse_fix [] buffer log_n_steps _) with
        | Fail_sr_full st tok => Fail_pr_full st tok
        | Accept_sr sem buffer' => Parsed_pr sem buffer'
        | Progress_sr _ _ => Timeout_pr
        end).
Proof.
  abstract (repeat constructor; intros; by destruct singleton_state_pred).
Defined.

End Interpreter.

Arguments Fail_sr_full {init} _ _.
Arguments Accept_sr {init} _ _.
Arguments Progress_sr {init} _ _.

(* These notations are provided for backwards compatibility with Coq code
 * from before the addition of the return information. They are used in the 
 * theorem statements. 
 *)
Notation Fail_sr := (Fail_sr_full _ _) (only parsing).
Notation Fail_pr := (Fail_pr_full _ _) (only parsing).

End Make.

Module Type T(A:Automaton.T).
  Include (Make A).
End T.