1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
{- |
Module : Data.GraphViz.PreProcessing
Description : Pre-process imported Dot code.
Copyright : (c) Ivan Lazar Miljenovic
License : 3-Clause BSD-style
Maintainer : Ivan.Miljenovic@gmail.com
\"Real life\" Dot code contains various items that are not directly
parseable by this library. This module defines the 'preProcess'
function to remove these components, which include:
* Comments (both @\/* ... *\/@ style and @\/\/ ... @ style);
* Pre-processor lines (lines starting with a @#@);
* Split lines (by inserting a @\\@ the rest of that \"line\" is
continued on the next line).
* Strings concatenated together using @\"...\" + \"...\"@; these
are concatenated into one big string.
-}
module Data.GraphViz.PreProcessing(preProcess) where
import Data.GraphViz.Parsing
import Data.GraphViz.Exception(GraphvizException(NotDotCode), throw)
import qualified Data.Text.Lazy as T
import Data.Text.Lazy(Text)
import qualified Data.Text.Lazy.Builder as B
import Data.Text.Lazy.Builder(Builder)
import Data.Monoid(Monoid(..), mconcat)
-- -----------------------------------------------------------------------------
-- Filtering out unwanted Dot items such as comments
-- | Remove unparseable features of Dot, such as comments and
-- multi-line strings (which are converted to single-line strings).
preProcess :: Text -> Text
preProcess t = case fst $ runParser parseOutUnwanted t of
(Right r) -> B.toLazyText r
(Left l) -> throw (NotDotCode l)
-- snd should be null
-- | Parse out comments and make quoted strings spread over multiple
-- lines only over a single line. Should parse the /entire/ input
-- 'Text'.
parseOutUnwanted :: Parse Builder
parseOutUnwanted = mconcat <$> many getNext
where
getNext = parseOK
`onFail`
parseConcatStrings
`onFail`
parseHTML
`onFail`
parseUnwanted
`onFail`
fmap B.singleton next
parseOK = B.fromLazyText
<$> many1Satisfy (`notElem` ['\n', '\r', '\\', '/', '"', '<'])
-- | Parses an unwanted part of the Dot code (comments and
-- pre-processor lines; also un-splits lines).
parseUnwanted :: (Monoid m) => Parse m
parseUnwanted = oneOf [ parseLineComment
, parseMultiLineComment
, parsePreProcessor
, parseSplitLine
]
-- | Remove pre-processor lines (that is, those that start with a
-- @#@). Will consume the newline from the beginning of the
-- previous line, but will leave the one from the pre-processor line
-- there (so in the end it just removes the line).
parsePreProcessor :: (Monoid m) => Parse m
parsePreProcessor = newline *> character '#' *> consumeLine *> pure mempty
-- | Parse @//@-style comments.
parseLineComment :: (Monoid m) => Parse m
parseLineComment = string "//"
-- Note: do /not/ consume the newlines, as they're
-- needed in case the next line is a pre-processor
-- line.
*> consumeLine
*> pure mempty
-- | Parse @/* ... */@-style comments.
parseMultiLineComment :: (Monoid m) => Parse m
parseMultiLineComment = bracket start end (many inner) *> pure mempty
where
start = string "/*"
end = string "*/"
inner = (many1Satisfy ('*' /=) *> pure ())
`onFail`
(character '*' *> satisfy ('/' /=) *> inner)
parseConcatStrings :: Parse Builder
parseConcatStrings = wrapQuotes . mconcat <$> sepBy1 parseString parseConcat
where
qParse = bracket (character '"') (commit $ character '"')
parseString = qParse (mconcat <$> many parseInner)
parseInner = (string "\\\"" *> pure (B.fromLazyText $ T.pack "\\\""))
`onFail`
-- Need to parse an explicit `\', in case it ends the
-- string (and thus the next step would get parsed by the
-- previous option).
(string "\\\\" *> pure (B.fromLazyText $ T.pack "\\\\"))
`onFail`
parseSplitLine -- in case there's a split mid-quote
`onFail`
fmap B.singleton (satisfy (quoteChar /=))
parseConcat = parseSep *> character '+' *> parseSep
parseSep = many $ whitespace1 `onFail` parseUnwanted
wrapQuotes str = qc `mappend` str `mappend` qc
qc = B.singleton '"'
-- | Lines can be split with a @\\@ at the end of the line.
parseSplitLine :: (Monoid m) => Parse m
parseSplitLine = character '\\' *> newline *> pure mempty
parseHTML :: Parse Builder
parseHTML = fmap (addAngled . mconcat)
. parseAngled $ many inner
where
inner = parseHTML
`onFail`
(B.fromLazyText <$> many1Satisfy (\c -> c /= open && c /= close))
addAngled str = B.singleton open `mappend` str `mappend` B.singleton close
open = '<'
close = '>'
|