diff options
author | Yann Herklotz <ymherklotz@gmail.com> | 2018-03-23 03:12:12 +0000 |
---|---|---|
committer | Yann Herklotz <ymherklotz@gmail.com> | 2018-03-23 03:12:12 +0000 |
commit | 7c621378e4f3b6c9118b5591427faf8170c2faef (patch) | |
tree | fda43ed69d8ba90be5ce2b8462b120630f1f265a /FMark | |
parent | 566b92541201e685e02ba40dde225cf999578043 (diff) | |
download | FMark-7c621378e4f3b6c9118b5591427faf8170c2faef.tar.gz FMark-7c621378e4f3b6c9118b5591427faf8170c2faef.zip |
[html] Added HTMLLIT token to identify HTML blocks in parser
Diffstat (limited to 'FMark')
-rw-r--r-- | FMark/src/Common/Lexer/Lexer.fs | 20 | ||||
-rw-r--r-- | FMark/src/Common/Lexer/LexerTest.fs | 28 |
2 files changed, 24 insertions, 24 deletions
diff --git a/FMark/src/Common/Lexer/Lexer.fs b/FMark/src/Common/Lexer/Lexer.fs index 6fdb160..5e2c7d6 100644 --- a/FMark/src/Common/Lexer/Lexer.fs +++ b/FMark/src/Common/Lexer/Lexer.fs @@ -48,23 +48,23 @@ let nextToken state s = match s, state with | EscapedCharTok n, _ -> n, state | HTMLSingleton (s, r), Normal -> - (LITERAL s, r), Normal + (HTMLLIT s, r), Normal | HTMLStartTag (s, [t], r), Normal -> - (LITERAL s, r), InHTMLTag (t, 1) + (HTMLLIT s, r), InHTMLTag (t, 1) | HTMLStartTag (s, [t], r), InHTMLTag (tag, d) -> if t = tag then - (LITERAL s, r), InHTMLTag (tag, d+1) + (HTMLLIT s, r), InHTMLTag (tag, d+1) else - (LITERAL s, r), InHTMLTag (tag, d) + (HTMLLIT s, r), InHTMLTag (tag, d) | HTMLEndTag (s, [t], r), InHTMLTag (tag, d) -> if t = tag then - if d = 1 then (LITERAL s, r), Normal - else (LITERAL s, r), InHTMLTag (tag, d-1) - else (LITERAL s, r), InHTMLTag (tag, d) + if d = 1 then (HTMLLIT s, r), Normal + else (HTMLLIT s, r), InHTMLTag (tag, d-1) + else (HTMLLIT s, r), InHTMLTag (tag, d) | RegexMatch "^.+?(?=<)" (s, _, r), InHTMLTag (t, d) -> - (LITERAL s, r), InHTMLTag (t, d) + (HTMLLIT s, r), InHTMLTag (t, d) | RegexMatch "^.*" (s, _, r), InHTMLTag (t, d) -> - (LITERAL s, r), InHTMLTag (t, d) + (HTMLLIT s, r), InHTMLTag (t, d) | CharacterTok n, _ -> n, state | RegexMatch @"^\s+" (m, _, s), _ -> (replaceChars "\t" " " m @@ -102,7 +102,7 @@ let returnTokens = function | _, InCodeBlock (s, l) -> [CODEBLOCK (s, l); ENDLINE] | tok, InHTMLTag (str, _) -> - tok @ [LITERAL str; ENDLINE] + tok @ [HTMLLIT str; ENDLINE] | tok, _ -> tok diff --git a/FMark/src/Common/Lexer/LexerTest.fs b/FMark/src/Common/Lexer/LexerTest.fs index 84fcf63..9e65a89 100644 --- a/FMark/src/Common/Lexer/LexerTest.fs +++ b/FMark/src/Common/Lexer/LexerTest.fs @@ -309,46 +309,46 @@ let lexTest = "One line html", "<span>This is a span element</span>", - [LITERAL "<span>"; LITERAL "This is a span element"; LITERAL "</span>"; ENDLINE] + [HTMLLIT "<span>"; HTMLLIT "This is a span element"; HTMLLIT "</span>"; ENDLINE] "Online closing html", "<img src=\"https://github.com/IMAGE.png\" />", - [LITERAL "<img src=\"https://github.com/IMAGE.png\" />"; ENDLINE] + [HTMLLIT "<img src=\"https://github.com/IMAGE.png\" />"; ENDLINE] "HTML with non-HTML start", "This is an image: <span>Hello World</span>", [LITERAL "This"; WHITESPACE 1; LITERAL "is"; WHITESPACE 1; LITERAL "an" - WHITESPACE 1; LITERAL "image"; COLON; WHITESPACE 1; LITERAL "<span>" - LITERAL "Hello World"; LITERAL "</span>"; ENDLINE] + WHITESPACE 1; LITERAL "image"; COLON; WHITESPACE 1; HTMLLIT "<span>" + HTMLLIT "Hello World"; HTMLLIT "</span>"; ENDLINE] "Singleton HTML passthrough", "Singleton <br> passthrough and more text", - [LITERAL "Singleton"; WHITESPACE 1; LITERAL "<br>"; WHITESPACE 1; LITERAL "passthrough" + [LITERAL "Singleton"; WHITESPACE 1; HTMLLIT "<br>"; WHITESPACE 1; LITERAL "passthrough" WHITESPACE 1; LITERAL "and"; WHITESPACE 1; LITERAL "more"; WHITESPACE 1 LITERAL "text"; ENDLINE] "HTML image tag", "Embedding an <img src=\"https://github.com/IMAGE\"> in text", - [LITERAL "Embedding"; WHITESPACE 1; LITERAL "an"; WHITESPACE 1; LITERAL"<img src=\"https://github.com/IMAGE\">" + [LITERAL "Embedding"; WHITESPACE 1; LITERAL "an"; WHITESPACE 1; HTMLLIT "<img src=\"https://github.com/IMAGE\">" WHITESPACE 1; LITERAL "in"; WHITESPACE 1; LITERAL "text"; ENDLINE] "A lot of nested tags", "<p><p><p><p><p><p><p> </p></p></p></p></p></p></p>", - [LITERAL "<p>"; LITERAL "<p>"; LITERAL "<p>"; LITERAL "<p>"; LITERAL "<p>"; LITERAL "<p>" - LITERAL "<p>"; LITERAL " "; LITERAL "</p>"; LITERAL "</p>"; LITERAL "</p>"; LITERAL "</p>" - LITERAL "</p>"; LITERAL "</p>"; LITERAL "</p>"; ENDLINE] + [HTMLLIT "<p>"; HTMLLIT "<p>"; HTMLLIT "<p>"; HTMLLIT "<p>"; HTMLLIT "<p>"; HTMLLIT "<p>" + HTMLLIT "<p>"; HTMLLIT " "; HTMLLIT "</p>"; HTMLLIT "</p>"; HTMLLIT "</p>"; HTMLLIT "</p>" + HTMLLIT "</p>"; HTMLLIT "</p>"; HTMLLIT "</p>"; ENDLINE] "Half opened tag should just be outputted", "<a><", - [LITERAL "<a>"; LITERAL "<"; ENDLINE] + [HTMLLIT "<a>"; HTMLLIT "<"; ENDLINE] "Half opened with text after should be as expected", "<a><This text should appear as normal", - [LITERAL "<a>"; LITERAL "<This text should appear as normal"; ENDLINE] + [HTMLLIT "<a>"; HTMLLIT "<This text should appear as normal"; ENDLINE] "Wrong html close tag should be passed through", "<p></>s", - [LITERAL "<p>"; LITERAL "</>s"; ENDLINE] + [HTMLLIT "<p>"; HTMLLIT "</>s"; ENDLINE] ] /// Tests for the complete lexers with a string list as input @@ -384,9 +384,9 @@ let lexListTest = "This should not, <span>This should not be tokenized []</span>"], [LITERAL "This"; WHITESPACE 1; LITERAL "should"; WHITESPACE 1; LITERAL "not" WHITESPACE 1; LITERAL "be"; WHITESPACE 1; LITERAL "passed"; WHITESPACE 1 - LITERAL "through"; ENDLINE; LITERAL "<div>"; LITERAL "This should just all be passed through, "; LITERAL "</div>" + LITERAL "through"; ENDLINE; HTMLLIT "<div>"; HTMLLIT "This should just all be passed through, "; HTMLLIT "</div>" ENDLINE; LITERAL "This"; WHITESPACE 1; LITERAL "should"; WHITESPACE 1; LITERAL "not"; COMMA; WHITESPACE 1 - LITERAL "<span>"; LITERAL "This should not be tokenized []"; LITERAL "</span>"; ENDLINE] + HTMLLIT "<span>"; HTMLLIT "This should not be tokenized []"; HTMLLIT "</span>"; ENDLINE] ] // -------------------------------------------------- |