File: Block.hs

package info (click to toggle)
haskell-markdown 0.1.17.5-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 528 kB
  • sloc: haskell: 1,195; makefile: 4
file content (342 lines) | stat: -rw-r--r-- 12,479 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE CPP #-}
module Text.Markdown.Block
    ( Block (..)
    , ListType (..)
    , toBlocks
    , toBlockLines
    ) where

import Prelude
import Control.Monad (msum)
#if MIN_VERSION_conduit(1, 0, 0)
import Data.Conduit
#else
import Data.Conduit hiding ((.|))
import Data.Conduit.Internal (pipeL)
#endif
import qualified Data.Conduit.Text as CT
import qualified Data.Conduit.List as CL
import Data.Text (Text)
import qualified Data.Text as T
import Data.Char (isDigit)
import Text.Markdown.Types
import qualified Data.Set as Set
import qualified Data.Map as Map

toBlockLines :: Block Text -> Block [Text]
toBlockLines = fmap $ map T.stripEnd
                    . concatMap (T.splitOn "  \r\n")
                    . T.splitOn "  \n"

toBlocks :: Monad m => MarkdownSettings -> ConduitM Text (Block Text) m ()
toBlocks ms =
    mapOutput fixWS CT.lines .| toBlocksLines ms
  where
    fixWS = T.pack . go 0 . T.unpack

    go _ [] = []
    go i ('\r':cs) = go i cs
    go i ('\t':cs) =
        (replicate j ' ') ++ go (i + j) cs
      where
        j = 4 - (i `mod` 4)
    go i (c:cs) = c : go (i + 1) cs

toBlocksLines :: Monad m => MarkdownSettings -> ConduitM Text (Block Text) m ()
toBlocksLines ms = awaitForever (start ms) .| tightenLists

tightenLists :: Monad m => ConduitM (Either Blank (Block Text)) (Block Text) m ()
tightenLists =
    go Nothing
  where
    go mTightList =
        await >>= maybe (return ()) go'
      where
        go' (Left Blank) = go mTightList
        go' (Right (BlockList ltNew contents)) =
            case mTightList of
                Just (ltOld, isTight) | ltOld == ltNew -> do
                    yield $ BlockList ltNew $ (if isTight then tighten else untighten) contents
                    go mTightList
                _ -> do
                    isTight <- checkTight ltNew False
                    yield $ BlockList ltNew $ (if isTight then tighten else untighten) contents
                    go $ Just (ltNew, isTight)
        go' (Right b) = yield b >> go Nothing

    tighten (Right [BlockPara t]) = Left t
    tighten (Right []) = Left T.empty
    tighten x = x

    untighten (Left t) = Right [BlockPara t]
    untighten x = x

    checkTight lt sawBlank = do
        await >>= maybe (return $ not sawBlank) go'
      where
        go' (Left Blank) = checkTight lt True
        go' b@(Right (BlockList ltNext _)) | ltNext == lt = do
            leftover b
            return $ not sawBlank
        go' b = leftover b >> return False

data Blank = Blank

data LineType = LineList ListType Text
              | LineCode Text
              | LineFenced Text FencedHandler -- ^ terminator, language
              | LineBlockQuote Text
              | LineHeading Int Text
              | LineBlank
              | LineText Text
              | LineRule
              | LineHtml Text
              | LineReference Text Text -- ^ name, destination

lineType :: MarkdownSettings -> Text -> LineType
lineType ms t
    | T.null $ T.strip t = LineBlank
    | Just (term, fh) <- getFenced (Map.toList $ msFencedHandlers ms) t = LineFenced term fh
    | Just t' <- T.stripPrefix "> " t = LineBlockQuote t'
    | Just (level, t') <- stripHeading t = LineHeading level t'
    | Just t' <- T.stripPrefix "    " t = LineCode t'
    | isRule t = LineRule
    | isHtmlStart t = LineHtml t
    | Just (ltype, t') <- listStart t = LineList ltype t'
    | Just (name, dest) <- getReference t = LineReference name dest
    | otherwise = LineText t
  where
    getFenced [] _ = Nothing
    getFenced ((x, fh):xs) t'
        | Just rest <- T.stripPrefix x t' = Just (x, fh $ T.strip rest)
        | otherwise = getFenced xs t'

    isRule :: Text -> Bool
    isRule =
        go . T.strip
      where
        go "* * *" = True
        go "***" = True
        go "*****" = True
        go "- - -" = True
        go "---" = True
        go "___" = True
        go "_ _ _" = True
        go t' = T.length (T.takeWhile (== '-') t') >= 5

    stripHeading :: Text -> Maybe (Int, Text)
    stripHeading t'
        | T.null x = Nothing
        | otherwise = Just (T.length x, T.strip $ T.dropWhileEnd (== '#') y)
      where
        (x, y) = T.span (== '#') t'

    getReference :: Text -> Maybe (Text, Text)
    getReference a = do
        b <- T.stripPrefix "[" $ T.dropWhile (== ' ') a
        let (name, c) = T.break (== ']') b
        d <- T.stripPrefix "]:" c
        Just (name, T.strip d)

start :: Monad m => MarkdownSettings -> Text -> ConduitM Text (Either Blank (Block Text)) m ()
start ms t =
    go $ lineType ms t
  where
    go LineBlank = yield $ Left Blank
    go (LineFenced term fh) = do
        (finished, ls) <- takeTillConsume (== term)
        case finished of
            Just _ -> do
                let block =
                        case fh of
                            FHRaw fh' -> fh' $ T.intercalate "\n" ls
                            FHParsed fh' -> fh' $ runConduitPure $ mapM_ yield ls .| toBlocksLines ms .| CL.consume
                mapM_ (yield . Right) block
            Nothing -> mapM_ leftover (reverse $ T.cons ' ' t : ls)
    go (LineBlockQuote t') = do
        ls <- takeQuotes .| CL.consume
        let blocks = runConduitPure $ mapM_ yield (t' : ls) .| toBlocksLines ms .| CL.consume
        yield $ Right $ BlockQuote blocks
    go (LineHeading level t') = yield $ Right $ BlockHeading level t'
    go (LineCode t') = do
        ls <- getIndented 4 .| CL.consume
        yield $ Right $ BlockCode Nothing $ T.intercalate "\n" $ t' : ls
    go LineRule = yield $ Right BlockRule
    go (LineHtml t') = do
        if t' `Set.member` msStandaloneHtml ms
            then yield $ Right $ BlockHtml t'
            else do
                ls <- takeTill (T.null . T.strip) .| CL.consume
                yield $ Right $ BlockHtml $ T.intercalate "\n" $ t' : ls
    go (LineList ltype t') = do
        t2 <- CL.peek
        case fmap (lineType ms) t2 of
            -- If the next line is a non-indented text line, then we have a
            -- lazy list.
            Just (LineText t2') | T.null (T.takeWhile (== ' ') t2') -> do
                CL.drop 1
                -- Get all of the non-indented lines.
                let loop front = do
                        x <- await
                        case x of
                            Nothing -> return $ front []
                            Just y ->
                                case lineType ms y of
                                    LineText z -> loop (front . (z:))
                                    _ -> leftover y >> return (front [])
                ls <- loop (\rest -> T.dropWhile (== ' ') t' : t2' : rest)
                yield $ Right $ BlockList ltype $ Right [BlockPara $ T.intercalate "\n" ls]
            -- If the next line is an indented list, then we have a sublist. I
            -- disagree with this interpretation of Markdown, but it's the way
            -- that Github implements things, so we will too.
            _ | Just t2' <- t2
              , Just t2'' <- T.stripPrefix "    " t2'
              , LineList _ltype' _t2''' <- lineType ms t2'' -> do
                ls <- getIndented 4 .| CL.consume
                let blocks = runConduitPure $ mapM_ yield ls .| toBlocksLines ms .| CL.consume
                let addPlainText
                        | T.null $ T.strip t' = id
                        | otherwise = (BlockPlainText (T.strip t'):)
                yield $ Right $ BlockList ltype $ Right $ addPlainText blocks
            _ -> do
                let t'' = T.dropWhile (== ' ') t'
                let leader = T.length t - T.length t''
                ls <- getIndented leader .| CL.consume
                let blocks = runConduitPure $ mapM_ yield (t'' : ls) .| toBlocksLines ms .| CL.consume
                yield $ Right $ BlockList ltype $ Right blocks
    go (LineReference x y) = yield $ Right $ BlockReference x y
    go (LineText t') = do
        -- Check for underline headings
        let getUnderline :: Text -> Maybe Int
            getUnderline s
                | T.length s < 2 = Nothing
                | T.all (== '=') s = Just 1
                | T.all (== '-') s = Just 2
                | otherwise = Nothing
        t2 <- CL.peek
        case t2 >>= getUnderline of
            Just level -> do
                CL.drop 1
                yield $ Right $ BlockHeading level t'
            Nothing -> do
                let listStartIndent x =
                        case listStart x of
                            Just (_, y) -> T.take 2 y == "  "
                            Nothing -> False
                    isNonPara LineBlank = True
                    isNonPara LineFenced{} = True
                    isNonPara LineBlockQuote{} = not $ msBlankBeforeBlockquote ms
                    isNonPara LineHtml{} = True -- See example 95 in Common Markdown spec
                    isNonPara _ = False
                (mfinal, ls) <- takeTillConsume (\x -> isNonPara (lineType ms x) || listStartIndent x)
                maybe (return ()) leftover mfinal
                yield $ Right $ BlockPara $ T.intercalate "\n" $ t' : ls

isHtmlStart :: T.Text -> Bool
-- Allow for up to three spaces before the opening tag.
isHtmlStart t | "    " `T.isPrefixOf` t = False
isHtmlStart t =
    case T.stripPrefix "<" $ T.dropWhile (== ' ') t of
        Nothing -> False
        Just t' ->
            let (name, rest)
                    | Just _ <- T.stripPrefix "!--" t' = ("--", t')
                    | otherwise = T.break (\c -> c == ' ' || c == '>') t'
             in (T.all isValidTagName name &&
                not (T.null name) &&
                (not ("/" `T.isPrefixOf` rest) || ("/>" `T.isPrefixOf` rest)))

                || isPI t' || isCommentCData t'
  where
    isValidTagName :: Char -> Bool
    isValidTagName c =
        ('A' <= c && c <= 'Z') ||
        ('a' <= c && c <= 'z') ||
        ('0' <= c && c <= '9') ||
        (c == '-') ||
        (c == '_') ||
        (c == '/') ||
        (c == '!')

    isPI = ("?" `T.isPrefixOf`)
    isCommentCData = ("!" `T.isPrefixOf`)

takeTill :: Monad m => (i -> Bool) -> ConduitM i i m ()
takeTill f =
    loop
  where
    loop = await >>= maybe (return ()) (\x -> if f x then return () else yield x >> loop)

takeTillConsume
  :: Monad m
  => (i -> Bool)
  -> ConduitM i o m (Maybe i, [i])
takeTillConsume f =
    loop id
  where
    loop front = await >>= maybe
        (return (Nothing, front []))
        (\x ->
            if f x
                then return (Just x, front [])
                else loop (front . (x:))
        )

listStart :: Text -> Maybe (ListType, Text)
listStart t0
    | Just t' <- stripUnorderedListSeparator t = Just (Unordered, t')
    | Just t' <- stripNumber t, Just t'' <- stripOrderedListSeparator t' = Just (Ordered, t'')
    | otherwise = Nothing
  where
    t = T.stripStart t0

stripNumber :: Text -> Maybe Text
stripNumber x
    | T.null y = Nothing
    | otherwise = Just z
  where
    (y, z) = T.span isDigit x

stripUnorderedListSeparator :: Text -> Maybe Text
stripUnorderedListSeparator =
  stripPrefixChoice ["* ", "*\t", "+ ", "+\t", "- ", "-\t"]

stripOrderedListSeparator :: Text -> Maybe Text
stripOrderedListSeparator =
  stripPrefixChoice [". ", ".\t", ") ", ")\t"]

-- | Attempt to strip each of the prefixes in @xs@ from the start of @x@. As
-- soon as one matches, return the remainder of @x@. Prefixes are tried in
-- order. If none match, return @Nothing@.
stripPrefixChoice :: [Text] -> Text -> Maybe Text
stripPrefixChoice xs x = msum $ map (flip T.stripPrefix x) xs

getIndented :: Monad m => Int -> ConduitM Text Text m ()
getIndented leader =
    go []
  where
    go blanks = await >>= maybe (mapM_ leftover blanks) (go' blanks)

    go' blanks t
        | T.null $ T.strip t = go (T.drop leader t : blanks)
        | T.length x == leader && T.null (T.strip x) = do
            mapM_ yield $ reverse blanks
            yield y
            go []
        | otherwise = mapM_ leftover (t:blanks)
      where
        (x, y) = T.splitAt leader t

takeQuotes :: Monad m => ConduitM Text Text m ()
takeQuotes =
    await >>= maybe (return ()) go
  where
    go "" = return ()
    go ">" = yield "" >> takeQuotes
    go t
        | Just t' <- T.stripPrefix "> " t = yield t' >> takeQuotes
        | otherwise = yield t >> takeQuotes