File: PreProcessing.hs

package info (click to toggle)
haskell-graphviz 2999.17.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,488 kB
  • sloc: haskell: 12,152; makefile: 2
file content (131 lines) | stat: -rw-r--r-- 5,032 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
{- |
   Module      : Data.GraphViz.PreProcessing
   Description : Pre-process imported Dot code.
   Copyright   : (c) Ivan Lazar Miljenovic
   License     : 3-Clause BSD-style
   Maintainer  : Ivan.Miljenovic@gmail.com

   \"Real life\" Dot code contains various items that are not directly
   parseable by this library.  This module defines the 'preProcess'
   function to remove these components, which include:

     * Comments (both @\/* ... *\/@ style and @\/\/ ... @ style);

     * Pre-processor lines (lines starting with a @#@);

     * Split lines (by inserting a @\\@ the rest of that \"line\" is
       continued on the next line).

     * Strings concatenated together using @\"...\" + \"...\"@; these
       are concatenated into one big string.
-}
module Data.GraphViz.PreProcessing(preProcess) where

import Data.GraphViz.Parsing
import Data.GraphViz.Exception(GraphvizException(NotDotCode), throw)

import qualified Data.Text.Lazy as T
import Data.Text.Lazy(Text)
import qualified Data.Text.Lazy.Builder as B
import Data.Text.Lazy.Builder(Builder)
import Data.Monoid(Monoid(..), mconcat)

-- -----------------------------------------------------------------------------
-- Filtering out unwanted Dot items such as comments

-- | Remove unparseable features of Dot, such as comments and
--   multi-line strings (which are converted to single-line strings).
preProcess :: Text -> Text
preProcess t = case fst $ runParser parseOutUnwanted t of
                 (Right r) -> B.toLazyText r
                 (Left l)  -> throw (NotDotCode l)
               -- snd should be null

-- | Parse out comments and make quoted strings spread over multiple
--   lines only over a single line.  Should parse the /entire/ input
--   'Text'.
parseOutUnwanted :: Parse Builder
parseOutUnwanted = mconcat <$> many getNext
  where
    getNext = parseOK
              `onFail`
              parseConcatStrings
              `onFail`
              parseHTML
              `onFail`
              parseUnwanted
              `onFail`
              fmap B.singleton next

    parseOK = B.fromLazyText
              <$> many1Satisfy (`notElem` ['\n', '\r', '\\', '/', '"', '<'])

-- | Parses an unwanted part of the Dot code (comments and
--   pre-processor lines; also un-splits lines).
parseUnwanted :: (Monoid m) => Parse m
parseUnwanted = oneOf [ parseLineComment
                      , parseMultiLineComment
                      , parsePreProcessor
                      , parseSplitLine
                      ]

-- | Remove pre-processor lines (that is, those that start with a
--   @#@).  Will consume the newline from the beginning of the
--   previous line, but will leave the one from the pre-processor line
--   there (so in the end it just removes the line).
parsePreProcessor :: (Monoid m) => Parse m
parsePreProcessor = newline *> character '#' *> consumeLine *> pure mempty

-- | Parse @//@-style comments.
parseLineComment :: (Monoid m) => Parse m
parseLineComment = string "//"
                   -- Note: do /not/ consume the newlines, as they're
                   -- needed in case the next line is a pre-processor
                   -- line.
                   *> consumeLine
                   *> pure mempty

-- | Parse @/* ... */@-style comments.
parseMultiLineComment :: (Monoid m) => Parse m
parseMultiLineComment = bracket start end (many inner) *> pure mempty
  where
    start = string "/*"
    end = string "*/"
    inner = (many1Satisfy ('*' /=) *> pure ())
            `onFail`
            (character '*' *> satisfy ('/' /=) *> inner)

parseConcatStrings :: Parse Builder
parseConcatStrings = wrapQuotes . mconcat <$> sepBy1 parseString parseConcat
  where
    qParse = bracket (character '"') (commit $ character '"')
    parseString = qParse (mconcat <$> many parseInner)
    parseInner = (string "\\\"" *> pure (B.fromLazyText $ T.pack "\\\""))
                 `onFail`
                 -- Need to parse an explicit `\', in case it ends the
                 -- string (and thus the next step would get parsed by the
                 -- previous option).
                 (string "\\\\" *> pure (B.fromLazyText $ T.pack "\\\\"))
                 `onFail`
                 parseSplitLine -- in case there's a split mid-quote
                 `onFail`
                 fmap B.singleton (satisfy (quoteChar /=))
    parseConcat = parseSep *> character '+' *> parseSep
    parseSep = many $ whitespace1 `onFail` parseUnwanted
    wrapQuotes str = qc `mappend` str `mappend` qc
    qc = B.singleton '"'

-- | Lines can be split with a @\\@ at the end of the line.
parseSplitLine :: (Monoid m) => Parse m
parseSplitLine = character '\\' *> newline *> pure mempty

parseHTML :: Parse Builder
parseHTML = fmap (addAngled . mconcat)
            . parseAngled $ many inner
  where
    inner = parseHTML
            `onFail`
            (B.fromLazyText <$> many1Satisfy (\c -> c /= open && c /= close))
    addAngled str = B.singleton open `mappend` str `mappend` B.singleton close
    open = '<'
    close = '>'