aboutsummaryrefslogtreecommitdiffstats
path: root/test/monniaux/ocaml/byterun/lexing.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/monniaux/ocaml/byterun/lexing.c')
-rw-r--r--test/monniaux/ocaml/byterun/lexing.c233
1 files changed, 233 insertions, 0 deletions
diff --git a/test/monniaux/ocaml/byterun/lexing.c b/test/monniaux/ocaml/byterun/lexing.c
new file mode 100644
index 00000000..b1049904
--- /dev/null
+++ b/test/monniaux/ocaml/byterun/lexing.c
@@ -0,0 +1,233 @@
+/**************************************************************************/
+/* */
+/* OCaml */
+/* */
+/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
+/* */
+/* Copyright 1996 Institut National de Recherche en Informatique et */
+/* en Automatique. */
+/* */
+/* All rights reserved. This file is distributed under the terms of */
+/* the GNU Lesser General Public License version 2.1, with the */
+/* special exception on linking described in the file LICENSE. */
+/* */
+/**************************************************************************/
+
+#define CAML_INTERNALS
+
+/* The table-driven automaton for lexers generated by camllex. */
+
+#include "caml/fail.h"
+#include "caml/mlvalues.h"
+#include "caml/stacks.h"
+
+struct lexer_buffer {
+ value refill_buff;
+ value lex_buffer;
+ value lex_buffer_len;
+ value lex_abs_pos;
+ value lex_start_pos;
+ value lex_curr_pos;
+ value lex_last_pos;
+ value lex_last_action;
+ value lex_eof_reached;
+ value lex_mem;
+ value lex_start_p;
+ value lex_curr_p;
+};
+
+struct lexing_table {
+ value lex_base;
+ value lex_backtrk;
+ value lex_default;
+ value lex_trans;
+ value lex_check;
+ value lex_base_code;
+ value lex_backtrk_code;
+ value lex_default_code;
+ value lex_trans_code;
+ value lex_check_code;
+ value lex_code;
+};
+
+#if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
+#define Short(tbl,n) \
+ (*((unsigned char *)((tbl) + (n) * 2)) + \
+ (*((signed char *)((tbl) + (n) * 2 + 1)) << 8))
+#else
+#define Short(tbl,n) (((short *)(tbl))[(n)])
+#endif
+
+CAMLprim value caml_lex_engine(struct lexing_table *tbl, value start_state,
+ struct lexer_buffer *lexbuf)
+{
+ int state, base, backtrk, c;
+
+ state = Int_val(start_state);
+ if (state >= 0) {
+ /* First entry */
+ lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
+ lexbuf->lex_last_action = Val_int(-1);
+ } else {
+ /* Reentry after refill */
+ state = -state - 1;
+ }
+ while(1) {
+ /* Lookup base address or action number for current state */
+ base = Short(tbl->lex_base, state);
+ if (base < 0) return Val_int(-base-1);
+ /* See if it's a backtrack point */
+ backtrk = Short(tbl->lex_backtrk, state);
+ if (backtrk >= 0) {
+ lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
+ lexbuf->lex_last_action = Val_int(backtrk);
+ }
+ /* See if we need a refill */
+ if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
+ if (lexbuf->lex_eof_reached == Val_bool (0)){
+ return Val_int(-state - 1);
+ }else{
+ c = 256;
+ }
+ }else{
+ /* Read next input char */
+ c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
+ lexbuf->lex_curr_pos += 2;
+ }
+ /* Determine next state */
+ if (Short(tbl->lex_check, base + c) == state)
+ state = Short(tbl->lex_trans, base + c);
+ else
+ state = Short(tbl->lex_default, state);
+ /* If no transition on this char, return to last backtrack point */
+ if (state < 0) {
+ lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
+ if (lexbuf->lex_last_action == Val_int(-1)) {
+ caml_failwith("lexing: empty token");
+ } else {
+ return lexbuf->lex_last_action;
+ }
+ }else{
+ /* Erase the EOF condition only if the EOF pseudo-character was
+ consumed by the automaton (i.e. there was no backtrack above)
+ */
+ if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
+ }
+ }
+}
+
+/***********************************************/
+/* New lexer engine, with memory of positions */
+/***********************************************/
+
+static void run_mem(char *pc, value mem, value curr_pos) {
+ for (;;) {
+ unsigned char dst, src ;
+
+ dst = *pc++ ;
+ if (dst == 0xff)
+ return ;
+ src = *pc++ ;
+ if (src == 0xff) {
+ /* fprintf(stderr,"[%hhu] <- %d\n",dst,Int_val(curr_pos)) ;*/
+ Field(mem,dst) = curr_pos ;
+ } else {
+ /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
+ Field(mem,dst) = Field(mem,src) ;
+ }
+ }
+}
+
+static void run_tag(char *pc, value mem) {
+ for (;;) {
+ unsigned char dst, src ;
+
+ dst = *pc++ ;
+ if (dst == 0xff)
+ return ;
+ src = *pc++ ;
+ if (src == 0xff) {
+ /* fprintf(stderr,"[%hhu] <- -1\n",dst) ; */
+ Field(mem,dst) = Val_int(-1) ;
+ } else {
+ /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
+ Field(mem,dst) = Field(mem,src) ;
+ }
+ }
+}
+
+CAMLprim value caml_new_lex_engine(struct lexing_table *tbl, value start_state,
+ struct lexer_buffer *lexbuf)
+{
+ int state, base, backtrk, c, pstate ;
+ state = Int_val(start_state);
+ if (state >= 0) {
+ /* First entry */
+ lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
+ lexbuf->lex_last_action = Val_int(-1);
+ } else {
+ /* Reentry after refill */
+ state = -state - 1;
+ }
+ while(1) {
+ /* Lookup base address or action number for current state */
+ base = Short(tbl->lex_base, state);
+ if (base < 0) {
+ int pc_off = Short(tbl->lex_base_code, state) ;
+ run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem);
+ /* fprintf(stderr,"Perform: %d\n",-base-1) ; */
+ return Val_int(-base-1);
+ }
+ /* See if it's a backtrack point */
+ backtrk = Short(tbl->lex_backtrk, state);
+ if (backtrk >= 0) {
+ int pc_off = Short(tbl->lex_backtrk_code, state);
+ run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem);
+ lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
+ lexbuf->lex_last_action = Val_int(backtrk);
+
+ }
+ /* See if we need a refill */
+ if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
+ if (lexbuf->lex_eof_reached == Val_bool (0)){
+ return Val_int(-state - 1);
+ }else{
+ c = 256;
+ }
+ }else{
+ /* Read next input char */
+ c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
+ lexbuf->lex_curr_pos += 2;
+ }
+ /* Determine next state */
+ pstate=state ;
+ if (Short(tbl->lex_check, base + c) == state)
+ state = Short(tbl->lex_trans, base + c);
+ else
+ state = Short(tbl->lex_default, state);
+ /* If no transition on this char, return to last backtrack point */
+ if (state < 0) {
+ lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
+ if (lexbuf->lex_last_action == Val_int(-1)) {
+ caml_failwith("lexing: empty token");
+ } else {
+ return lexbuf->lex_last_action;
+ }
+ }else{
+ /* If some transition, get and perform memory moves */
+ int base_code = Short(tbl->lex_base_code, pstate) ;
+ int pc_off ;
+ if (Short(tbl->lex_check_code, base_code + c) == pstate)
+ pc_off = Short(tbl->lex_trans_code, base_code + c) ;
+ else
+ pc_off = Short(tbl->lex_default_code, pstate) ;
+ if (pc_off > 0)
+ run_mem(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem,
+ lexbuf->lex_curr_pos) ;
+ /* Erase the EOF condition only if the EOF pseudo-character was
+ consumed by the automaton (i.e. there was no backtrack above)
+ */
+ if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
+ }
+ }
+}