-- | -- Module : Zettel.Parse -- Description : Parse the org-zettelkasten files and split them into Zettel -- Copyright : (c) 2023, Yann Herklotz -- License : GPL-3.0-only -- Maintainer : git [at] yannherklotz [dot] com -- Stability : experimental -- Portability : POSIX module Zettel.Parse where import Data.Default (def) import qualified Data.Map.Strict as Map import qualified Data.Text as T import System.FilePath (takeBaseName) import Text.Pandoc.Builder (toMetaValue) import Text.Pandoc.Chunks (Chunk (..), ChunkedDoc (..), splitIntoChunks) import Text.Pandoc.Class (runIOorExplode) import Text.Pandoc.Definition (Block (..), Inline (..), Meta (..), MetaValue (..), Pandoc (..)) import Text.Pandoc.Options (ReaderOptions (..), getDefaultExtensions) import Text.Pandoc.Readers (readOrg) import Text.Pandoc.Shared (stringify) import Text.Pandoc.Walk (query, walk) import Zettel.Common import Zettel.Types decode :: ByteString -> Text decode = decodeUtf8 removeHeadings :: Block -> Block removeHeadings Header {} = Plain [] removeHeadings a = a removeDiv :: [Block] -> [Block] removeDiv = concatMap removeDiv' where removeDiv' (Div _ b') = b' removeDiv' a = [a] queryHeaderMetaData :: [Block] -> [(Text, MetaValue)] queryHeaderMetaData = query headingAttr where headingAttr (Header _ (_, _, l) i) = map (bimap exportDate MetaString) l <> [ ("title", MetaInlines i), ("author", MetaInlines [Str "Yann", Space, Str "Herklotz"]) ] headingAttr _ = [] exportDate s | s == "export_date" = "date" | otherwise = s removeSpan :: [Inline] -> [Inline] removeSpan = concatMap removeSpan' where removeSpan' (Span _ _) = [] removeSpan' a = [a] removeMathNL :: Inline -> Inline removeMathNL (Math mt t) = Math mt . fold $ T.lines t removeMathNL a = a separateTitleTags :: [Inline] -> ([Inline], [ZettelTag]) separateTitleTags title = (walk removeSpan $ filter (not . isTagSpan) title, map (ZettelTag . stringify) $ filter isTagSpan title) where isTagSpan (Span (_, l, _) _) = "tag" `elem` l isTagSpan _ = False parseDate :: Text -> Maybe Text parseDate t | T.null t = Just t | T.head t == '[' = viaNonEmpty head . T.words $ T.tail t | otherwise = Just t parseDateVal :: MetaValue -> Maybe MetaValue parseDateVal v = MetaString <$> (metaToString v >>= parseDate) zettelMetaFromMeta :: [ZettelTag] -> [ZettelCat] -> Meta -> ZettelMetadata zettelMetaFromMeta t c m = ZettelMetadata (lookupString "date" m) (lookupString "modified" m) t c (Just "Yann Herklotz") pandocFromChunk :: (Int, Text, Chunk) -> Pandoc pandocFromChunk (_, t, c) = walk removeMathNL . walk removeDiv . walk removeHeadings . Pandoc ( Meta ( fromList ( ("categories", toMetaValue [t]) : ("zettelid", MetaString (chunkId c)) : headingMeta ) ) ) $ chunkContents c where headingMeta = queryHeaderMetaData $ chunkContents c zettelFromPandoc :: Pandoc -> (ZettelId, Zettel) zettelFromPandoc p@(Pandoc attr _) = (ZettelId ident, Zettel title (zettelMetaFromMeta tags cats attr) p cempty cempty) where ident = fromMaybe "" $ lookupString "zettelid" attr title = fromMaybe [] $ lookupInlines "title" attr tags = maybe [] (map ZettelTag) $ lookupTextList "tags" attr cats = maybe [] (map ZettelCat) $ lookupTextList "categories" attr bibFromPandoc :: Pandoc -> (BibId, Zettel) bibFromPandoc p = first (\(ZettelId i) -> BibId i) $ zettelFromPandoc p updatePandocAttr :: Pandoc -> Pandoc updatePandocAttr (Pandoc attr b) = Pandoc ( Meta ( Map.insert "tags" tags . Map.insert "title" (MetaInlines title') . Map.update parseDateVal "modified" . Map.update parseDateVal "date" $ unMeta attr ) ) b where tags = toMetaValue $ map unZettelTag tags' (title', tags') = maybe ([], []) separateTitleTags $ lookupInlines "title" attr propagateNames :: [((Int, Text), [Chunk])] -> [(Int, Text, Chunk)] propagateNames n = [(i, name, chunk) | ((i, name), namedChunk) <- n, chunk <- namedChunk] readFileUtf8 :: FilePath -> IO Text readFileUtf8 = fmap decode . readFileBS readOrgFile :: Text -> IO Pandoc readOrgFile = runIOorExplode . readOrg (def {readerExtensions = getDefaultExtensions "org"}) parseBibliography :: FilePath -> IO [Pandoc] parseBibliography fp = do pandoc <- readFileUtf8 fp >>= readOrgFile let splitChunks = chunkedChunks $ splitIntoChunks "%i.md" False Nothing 1 pandoc return $ map (updatePandocAttr . pandocFromChunk . (\c -> (8, "bibliography", c))) splitChunks parseZettelKasten :: [(Int, FilePath)] -> Maybe FilePath -> IO ZettelGraph parseZettelKasten fl bibPath = do let names = map (second (T.pack . takeBaseName)) fl fs <- mapM (readFileUtf8 . snd) fl orgFiles <- mapM readOrgFile fs let splitChunks = map (splitIntoChunks "%i.md" False Nothing 15) orgFiles let chunks = propagateNames . zip names $ map chunkedChunks splitChunks let pandocList = map (updatePandocAttr . pandocFromChunk) chunks bibliography <- maybe mempty parseBibliography bibPath return . refreshPandocMeta $ ZettelGraph (fromList $ map zettelFromPandoc pandocList) (fromList $ map bibFromPandoc bibliography)