diff options
Diffstat (limited to 'test/monniaux/ocaml/byterun/lexing.c')
-rw-r--r-- | test/monniaux/ocaml/byterun/lexing.c | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/test/monniaux/ocaml/byterun/lexing.c b/test/monniaux/ocaml/byterun/lexing.c new file mode 100644 index 00000000..b1049904 --- /dev/null +++ b/test/monniaux/ocaml/byterun/lexing.c @@ -0,0 +1,233 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ +/* */ +/* Copyright 1996 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#define CAML_INTERNALS + +/* The table-driven automaton for lexers generated by camllex. */ + +#include "caml/fail.h" +#include "caml/mlvalues.h" +#include "caml/stacks.h" + +struct lexer_buffer { + value refill_buff; + value lex_buffer; + value lex_buffer_len; + value lex_abs_pos; + value lex_start_pos; + value lex_curr_pos; + value lex_last_pos; + value lex_last_action; + value lex_eof_reached; + value lex_mem; + value lex_start_p; + value lex_curr_p; +}; + +struct lexing_table { + value lex_base; + value lex_backtrk; + value lex_default; + value lex_trans; + value lex_check; + value lex_base_code; + value lex_backtrk_code; + value lex_default_code; + value lex_trans_code; + value lex_check_code; + value lex_code; +}; + +#if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2 +#define Short(tbl,n) \ + (*((unsigned char *)((tbl) + (n) * 2)) + \ + (*((signed char *)((tbl) + (n) * 2 + 1)) << 8)) +#else +#define Short(tbl,n) (((short *)(tbl))[(n)]) +#endif + +CAMLprim value caml_lex_engine(struct lexing_table *tbl, value start_state, + struct lexer_buffer *lexbuf) +{ + int state, base, backtrk, c; + + state = Int_val(start_state); + if (state >= 0) { + /* First entry */ + lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos; + lexbuf->lex_last_action = Val_int(-1); + } else { + /* Reentry after refill */ + state = -state - 1; + } + while(1) { + /* Lookup base address or action number for current state */ + base = Short(tbl->lex_base, state); + if (base < 0) return Val_int(-base-1); + /* See if it's a backtrack point */ + backtrk = Short(tbl->lex_backtrk, state); + if (backtrk >= 0) { + lexbuf->lex_last_pos = lexbuf->lex_curr_pos; + lexbuf->lex_last_action = Val_int(backtrk); + } + /* See if we need a refill */ + if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){ + if (lexbuf->lex_eof_reached == Val_bool (0)){ + return Val_int(-state - 1); + }else{ + c = 256; + } + }else{ + /* Read next input char */ + c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); + lexbuf->lex_curr_pos += 2; + } + /* Determine next state */ + if (Short(tbl->lex_check, base + c) == state) + state = Short(tbl->lex_trans, base + c); + else + state = Short(tbl->lex_default, state); + /* If no transition on this char, return to last backtrack point */ + if (state < 0) { + lexbuf->lex_curr_pos = lexbuf->lex_last_pos; + if (lexbuf->lex_last_action == Val_int(-1)) { + caml_failwith("lexing: empty token"); + } else { + return lexbuf->lex_last_action; + } + }else{ + /* Erase the EOF condition only if the EOF pseudo-character was + consumed by the automaton (i.e. there was no backtrack above) + */ + if (c == 256) lexbuf->lex_eof_reached = Val_bool (0); + } + } +} + +/***********************************************/ +/* New lexer engine, with memory of positions */ +/***********************************************/ + +static void run_mem(char *pc, value mem, value curr_pos) { + for (;;) { + unsigned char dst, src ; + + dst = *pc++ ; + if (dst == 0xff) + return ; + src = *pc++ ; + if (src == 0xff) { + /* fprintf(stderr,"[%hhu] <- %d\n",dst,Int_val(curr_pos)) ;*/ + Field(mem,dst) = curr_pos ; + } else { + /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */ + Field(mem,dst) = Field(mem,src) ; + } + } +} + +static void run_tag(char *pc, value mem) { + for (;;) { + unsigned char dst, src ; + + dst = *pc++ ; + if (dst == 0xff) + return ; + src = *pc++ ; + if (src == 0xff) { + /* fprintf(stderr,"[%hhu] <- -1\n",dst) ; */ + Field(mem,dst) = Val_int(-1) ; + } else { + /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */ + Field(mem,dst) = Field(mem,src) ; + } + } +} + +CAMLprim value caml_new_lex_engine(struct lexing_table *tbl, value start_state, + struct lexer_buffer *lexbuf) +{ + int state, base, backtrk, c, pstate ; + state = Int_val(start_state); + if (state >= 0) { + /* First entry */ + lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos; + lexbuf->lex_last_action = Val_int(-1); + } else { + /* Reentry after refill */ + state = -state - 1; + } + while(1) { + /* Lookup base address or action number for current state */ + base = Short(tbl->lex_base, state); + if (base < 0) { + int pc_off = Short(tbl->lex_base_code, state) ; + run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem); + /* fprintf(stderr,"Perform: %d\n",-base-1) ; */ + return Val_int(-base-1); + } + /* See if it's a backtrack point */ + backtrk = Short(tbl->lex_backtrk, state); + if (backtrk >= 0) { + int pc_off = Short(tbl->lex_backtrk_code, state); + run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem); + lexbuf->lex_last_pos = lexbuf->lex_curr_pos; + lexbuf->lex_last_action = Val_int(backtrk); + + } + /* See if we need a refill */ + if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){ + if (lexbuf->lex_eof_reached == Val_bool (0)){ + return Val_int(-state - 1); + }else{ + c = 256; + } + }else{ + /* Read next input char */ + c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); + lexbuf->lex_curr_pos += 2; + } + /* Determine next state */ + pstate=state ; + if (Short(tbl->lex_check, base + c) == state) + state = Short(tbl->lex_trans, base + c); + else + state = Short(tbl->lex_default, state); + /* If no transition on this char, return to last backtrack point */ + if (state < 0) { + lexbuf->lex_curr_pos = lexbuf->lex_last_pos; + if (lexbuf->lex_last_action == Val_int(-1)) { + caml_failwith("lexing: empty token"); + } else { + return lexbuf->lex_last_action; + } + }else{ + /* If some transition, get and perform memory moves */ + int base_code = Short(tbl->lex_base_code, pstate) ; + int pc_off ; + if (Short(tbl->lex_check_code, base_code + c) == pstate) + pc_off = Short(tbl->lex_trans_code, base_code + c) ; + else + pc_off = Short(tbl->lex_default_code, pstate) ; + if (pc_off > 0) + run_mem(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem, + lexbuf->lex_curr_pos) ; + /* Erase the EOF condition only if the EOF pseudo-character was + consumed by the automaton (i.e. there was no backtrack above) + */ + if (c == 256) lexbuf->lex_eof_reached = Val_bool (0); + } + } +} |