From 16fc8b07cb8dbbabad0b665b9114925f4349cf38 Mon Sep 17 00:00:00 2001 From: xleroy Date: Thu, 5 Jun 2014 06:50:10 +0000 Subject: Cleaner, more resilient parsing of pragmas. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2507 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- lib/Tokenize.mll | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 lib/Tokenize.mll (limited to 'lib/Tokenize.mll') diff --git a/lib/Tokenize.mll b/lib/Tokenize.mll new file mode 100644 index 00000000..422068b1 --- /dev/null +++ b/lib/Tokenize.mll @@ -0,0 +1,45 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU General Public License as published by *) +(* the Free Software Foundation, either version 2 of the License, or *) +(* (at your option) any later version. This file is also distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Parse a string as a list of tokens *) + +let identstart = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' ] +let identcont = [ '0'-'9' 'A'-'Z' 'a'-'z' '$' '_' '-' '.' ] + +rule tokenize acc = parse + | eof { List.rev acc } + | [' ' '\t' '\n'] + { tokenize acc lexbuf } + | "\"" { tok_dquote acc (Buffer.create 16) lexbuf } + | "'" { tok_squote acc (Buffer.create 16) lexbuf } + | (identstart identcont*) as s + { tokenize (s :: acc) lexbuf } + | _ as c { tokenize (String.make 1 c :: acc) lexbuf } + +and tok_dquote acc buf = parse + | "\"" | eof { tokenize (Buffer.contents buf :: acc) lexbuf } + | "\\t" { Buffer.add_char buf '\t'; tok_dquote acc buf lexbuf } + | "\\n" { Buffer.add_char buf '\n'; tok_dquote acc buf lexbuf } + | "\\" ([ '\\' '\"' ] as c) + { Buffer.add_char buf c; tok_dquote acc buf lexbuf } + | _ as c { Buffer.add_char buf c; tok_dquote acc buf lexbuf } + +and tok_squote acc buf = parse + | "\'" | eof { tokenize (Buffer.contents buf :: acc) lexbuf } + | _ as c { Buffer.add_char buf c; tok_squote acc buf lexbuf } + +{ +let string s = + tokenize [] (Lexing.from_string s) +} -- cgit