aboutsummaryrefslogtreecommitdiffstats
path: root/FMark/src/Common/Parser/Parser.fs
blob: 2e10969c6b01607da4d9dc929a03fd9e181e045d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
module Parser
open Types
open Shared
open ParserHelperFuncs
open TOCite
open Logger

// helper functions


/// parse inline code
let rec parseCode toks =
    match toks with
    | BACKTICK::_ -> ("", toks.[1..]) |> Ok
    | tok::toks' ->
        parseCode toks'
        |> Result.map (fun (str, tks) ->
        mapTok tok + str, tks )
    | e ->  globLog.Warn None (sprintf "%A" e)
            ("\\`", xOnwards 1 toks) |> Ok


/// parse a paragraph which counts for contents in  `<p>`
/// parseParagraph eats 2>= ENDLINEs
let parseParagraph ftLst toks =
    let parseParagraph' lines tokLine = (parseInLineElements2 ftLst tokLine) :: lines
    toks
    |> trimENDLINEs
    |> cutIntoLines
    |> List.fold parseParagraph' []
    |> List.rev
    |> Paragraph

/// match table start sequence
/// return table rows, terminates when [] or two continuous ENDLINEs
/// start sequence:
/// something in first line, at least one '|' and three '-' in second line
let (|MatchTable|_|) toks =
    // transform table rows into Table or Pretable depending if valid table.
    let tableTransform (rows,rtks) =
        rows |> Markalc.parseEvaluateTable
        |> function
        | Ok(rows) -> 
            let toPCellList (cell:Cell) = 
                let toks,head,align = (cell.GetParams) 
                let pCellLine = toks |> parseInLineElements
                CellLine(pCellLine,head,align)
            let toPRow row = 
                let clst, rHead = row |> function | Cells(clst',rHead') -> clst',rHead'
                PCells(List.map toPCellList clst, rHead)// Create PRows
            // For each row, unpack into Cell list
            (List.map toPRow rows |> Table,rtks) |> Some
        | Error(_)-> None
    match toks with
    | MatchTableHead rtks ->
        match rtks with
        | MatchTableFormater _ -> cutTableRows toks |> tableTransform
        | _ -> None
    | _ -> None

/// strip header to a minimal string for id purposes
let headerIDGen id hd =
    let hdLine = hd.HeaderName
    let rec headerIDGen' hdLine =
        match hdLine with
        | FrmtedString (Literal a)::tl -> a + headerIDGen' tl
        | FrmtedString (Emphasis a)::tl -> (headerIDGen' a) + (headerIDGen' tl)
        | _ -> ""
    (headerIDGen' hdLine |> replaceChars "\ " "_") + string id
/// parse list
let parseList toks =
    // call itself if list item has a higher level
    // return if list item has lower level
    let ignoreError result = match result with | Ok x -> x | Error x -> x
    let takeAwayWhiteSpaces toks =
            match toks with
            | WHITESPACE n:: rtks -> (n/2, rtks)
            | _ -> (0, toks)
    let excludeSelfSkip x = match x with | None -> None | Some 1 -> None | Some n -> Some (n-1)
    /// return list type, list level, and list content
    let (|GetLIContent|_|) toks =
        // return list level and remaining toks
        let (level, retoks) = takeAwayWhiteSpaces toks
        match retoks with
        | ASTERISK:: WHITESPACE _:: _ | MINUS:: WHITESPACE _:: _ -> // unordered list
            (UL, level, xOnwards 2 retoks) |> Some
        | NUMBER no:: DOT:: WHITESPACE _:: _ ->  // ordered list
            (OL (no|>int), level, xOnwards 3 retoks) |> Some
        | _ -> None

    let getLIContent toks =
        match toks with
        | GetLIContent result -> result |> Ok
        | _ ->
            let (level, retoks) = takeAwayWhiteSpaces toks
            (UL, level, retoks) |> Error

    /// get all list items in current item level and sub lists
    let rec getCurrentList level listItems lines =
        match lines with
        | line:: reLines ->
            match line |> getLIContent |> ignoreError with
            | (_, liLevel, _) when liLevel >= level -> // list item and sub list item
                getCurrentList level (line::listItems) reLines
            | _ -> listItems |> List.rev
        | [] -> listItems |> List.rev

    let rec parseList' level lines =
        let (listType, depth, _) =
            match List.head lines |> getLIContent with
            | Ok result -> result
            | Error result ->
                globLog.Warn (Some 100) "invalid list item, line does not begin with [*;-;number]\ndefault to UL"
                result
        let listFolder (currentLv, listItems, (skipNo: int option), currentLine) line =
            match skipNo with
            | None ->
                match line |> getLIContent |> ignoreError with
                | (_, level, content) when level=currentLv ->
                    let tLine = content |> parseInLineElements
                    (currentLv, StringItem(tLine)::listItems, None, currentLine+1)
                | (_, level, _) when level>currentLv ->
                    let (listItem, skip) =
                        xOnwards currentLine lines
                        |> getCurrentList (currentLv+1) []
                        |> parseList' (currentLv+1)
                    (currentLv, NestedList(listItem)::listItems, skip |> excludeSelfSkip, currentLine+1)
                | _ -> failwith "list item level < current level, not possible"
            | Some skip ->
                match skip with
                | 1 -> (currentLv, listItems, None, currentLine+1)
                | n when n>1 -> (currentLv, listItems, Some (n-1), currentLine+1)
                | _ -> failwith "negative or zero skip number, not possible"
        List.fold listFolder (level, [], None, 0) lines
        |> (fun (_, lis, _, _) ->
            let doSkip =
                match List.length lines with
                | 0 -> None
                | n -> Some n
            {ListType=listType; ListItem=lis |> List.rev; Depth=depth}, doSkip)
    toks
    |> trimENDLINEs
    |> cutIntoLines
    |> parseList' 0
    |> fst


/// Match TOC token
/// match "%%TOC"
let (|MatchTOC|_|) hdList toks =
    let createLinks (hdList:THeader list) =
        let makeRelLink i (h:THeader) =
            let linkText = Line(h.HeaderName)
            let linkID = headerIDGen i hdList.[i]
            {h with HeaderName = [Link (linkText, sprintf "#%s" linkID)]}
            //{h with HeaderName = Link((h.HeaderName), sprintf "#HEADER%i" i)} // Link of HyperText: TFrmtedString * URL: string
        let linksLst = List.mapi makeRelLink hdList
        {HeaderLst=linksLst}
    let filterHeadersByDepth depthOption hdList =
        match depthOption with
        | Some d ->
            let headerFilter hd =
                hd.Level <= d
            List.filter headerFilter hdList
        | None -> hdList

    let filterHeadersByName hdExListOption hdList =
        match hdExListOption with
        | Some hdExList ->
            let filterOutHeaders hdList hdToExclude =
                let hdTLine = hdToExclude |> parseInLineElements
                let headerFilter hd =
                    hd.HeaderName <> hdTLine
                List.filter headerFilter hdList
            List.fold filterOutHeaders hdList hdExList
        | None -> hdList

    let splitToksAt sep toks : Token list list =
        let rec split (toksList, toks) =
            match toks with
            | [] ->
                // delete empty list
                match toksList with
                | []::retoksList -> retoksList
                | _ -> toksList
                |> List.rev
            | sym::rtks when sym=sep ->
                (
                    []::toksList, rtks )
                |> split
            | tok::rtks ->
                (
                    (tok::(List.head toksList))::(List.tail toksList), rtks )
                |> split
        split ([[]], toks)
        |> List.map List.rev


    let trimWhitespaces toks =
        let rec trimer toks =
            match toks with
            | WHITESPACE _::retoks -> trimer retoks
            | _ -> toks
        toks
        |> trimer
        |> List.rev
        |> trimer
        |> List.rev

    ///  (maxDepth option, hdExList option)
    let parseTOCparameters toks =
        let parmFields = splitToksAt COMMA toks
        let (|MatchRSBRA|_|) toks =
            let rec matcher (pToks, toks) =
                match toks with
                | [] -> None
                | RSBRA::_ -> pToks |> List.rev |> Some
                | tk::rst -> (tk::pToks, rst) |> matcher
            matcher ([], toks)
        let parmsFolder (maxDepth, hdExList) parm =
            match parm |> trimWhitespaces with
            | LITERAL"depth"::EQUAL::NUMBER noStr::_ ->
                noStr |> int |> Some, hdExList
            | LITERAL"excludes"::EQUAL::LSBRA::rst ->
                let newHdExList =
                    match rst with
                    | MatchRSBRA exList ->
                        exList
                        |> splitToksAt SEMICOLON
                        |> List.map (fun x -> x |> trimWhitespaces)
                        |> Some
                    | _ -> None
                maxDepth, newHdExList
            | _ -> maxDepth, hdExList
        // state: (maxDepth, hdExList)
        List.fold parmsFolder (None, None) parmFields



    match toks with
    //| PERCENT::PERCENT::LITERAL("TOC")::// Options
    // | PERCENT::PERCENT::LITERAL("TOC")::WHITESPACE _::LITERAL"depth"::EQUAL::NUMBER noStr::rst ->
    //     // filter out headers with level > depth
    //     // ignore the rest tokens in this line
    //     let depth = noStr|>int
    //     (
    //         hdList
    //         |>filterHeaders depth
    //         |> createLinks
    //         ,
    //         rst|>cutFirstLine|>snd
    //     ) |> Some
    | PERCENT::PERCENT::LITERAL("TOC")::rst ->
        let (tocLine, retoks) = rst|>cutFirstLine
        let (maxDepth, hdExList) = parseTOCparameters tocLine
        (
            hdList
        |> filterHeadersByDepth maxDepth
        |> filterHeadersByName hdExList
        |> createLinks
            ,retoks)
        |> Some
    | _ -> None

/// parse supported `ParsedObj`s, turn them into a list
/// assuming each item start at the beginning of the line
/// the returned token head does not have 2>= ENDLINE
let rec parseItem (hdLst: THeader list) (ftLst: ParsedObj list) (rawToks: Token list) : Result<ParsedObj * Token list, string> =
    let toks = deleteLeadingENDLINEs rawToks
    match toks with
    | MatchTOC hdLst (toc,rtks) -> (ContentTable toc,rtks) |> Ok
    | CODEBLOCK (content, lang) :: toks' -> (CodeBlock(content, lang), toks') |> Ok
    | MatchTable (rows, rtks) -> (rows, rtks) |> Ok
    | MatchQuote (content, rtks) ->
        (parseInLineElements2 ftLst content |> Quote , rtks)
        |> Ok
    | HEADER i :: rtks -> (Header (hdLst.[i],(headerIDGen i hdLst.[i])), rtks) |> Ok
    | PickoutList (list, retoks) -> (parseList list |> List, retoks) |> Ok
    | PickoutParagraph (par, retoks) ->
        (parseParagraph ftLst par, retoks) |> Ok
    | _ -> sprintf "Parse item did not match: %A" toks |> removeChars ["[";"]"] |> Error

and parseItemList hdLst ftLst toks : Result<ParsedObj list * option<Token list>, string> =
    match (List.isEmpty toks, not (List.exists (function | WHITESPACE(_) | ENDLINE -> false | _ -> true) toks)) with
    | (false,false) -> 
        parseItem hdLst ftLst toks
        |> Result.bind (fun (pobj, re) ->
            match List.isEmpty re with
            | true -> ([pobj], None) |> Ok
            | false ->
                parseItemList hdLst ftLst re
                |> Result.map(fun (pobjs, re') ->
                    pobj::pobjs, re' )
        )
    | _ -> ([], None) |> Ok // if tokens are only whitespace or endlines, return no parsedObjs


/// top-level Parser, which the user should use
/// `parse` will either return result monad with either `ParsedObj list` or a string of Error message.
/// Unparsed Tokens will be in the returned in the Error message.
let parse toks =
    // insert two endlines at the beginning to make header in the first line work
    let (hds, refs, rtoks) = preParser (ENDLINE::ENDLINE::toks)
    parseItemList hds refs rtoks
    |> Result.bind (fun (pobjs, retoks) ->
        match retoks with
        | None -> pobjs |> Ok
        | Some retoks -> sprintf "Some unparsed tokens: %A" retoks |> Error)
    |> Result.map (fun pObjs -> List.append pObjs refs)