1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
|
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.SCargot.Parse
( -- * Parsing
decode
, decodeOne
-- * Parsing Control
, SExprParser
, Reader
, Comment
, mkParser
, setCarrier
, addReader
, setComment
-- * Specific SExprParser Conversions
, asRich
, asWellFormed
, withQuote
) where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*), pure)
#endif
import Control.Monad ((>=>))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Text (Text)
import Data.String (IsString)
import Text.Parsec ( (<|>)
, (<?>)
, char
, eof
, lookAhead
, many1
, runParser
, skipMany
)
import Text.Parsec.Char (anyChar, space)
import Text.Parsec.Text (Parser)
import Data.SCargot.Repr ( SExpr(..)
, RichSExpr
, WellFormedSExpr
, toRich
, toWellFormed
)
type ReaderMacroMap atom = Map Char (Reader atom)
-- | A 'Reader' represents a reader macro: it takes a parser for
-- the S-Expression type and performs as much or as little
-- parsing as it would like, and then returns an S-expression.
type Reader atom = (Parser (SExpr atom) -> Parser (SExpr atom))
-- | A 'Comment' represents any kind of skippable comment. This
-- parser __must__ be able to fail if a comment is not being
-- recognized, and it __must__ not consume any input in case
-- of failure.
type Comment = Parser ()
-- | A 'SExprParser' describes a parser for a particular value
-- that has been serialized as an s-expression. The @atom@ parameter
-- corresponds to a Haskell type used to represent the atoms,
-- and the @carrier@ parameter corresponds to the parsed S-Expression
-- structure.
data SExprParser atom carrier = SExprParser
{ sesPAtom :: Parser atom
, readerMap :: ReaderMacroMap atom
, comment :: Maybe Comment
, postparse :: SExpr atom -> Either String carrier
}
-- | Create a basic 'SExprParser' when given a parser
-- for an atom type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = mkParser (many1 alphaNum)
-- >>> decode parser "(ele phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
mkParser :: Parser atom -> SExprParser atom (SExpr atom)
mkParser parser = SExprParser
{ sesPAtom = parser
, readerMap = M.empty
, comment = Nothing
, postparse = return
}
-- | Modify the carrier type for a 'SExprParser'. This is
-- used internally to convert between various 'SExpr' representations,
-- but could also be used externally to add an extra conversion layer
-- onto a 'SExprParser'.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> import Data.SCargot.Repr (toRich)
-- >>> let parser = setCarrier (return . toRich) (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
setCarrier :: (b -> Either String c) -> SExprParser a b -> SExprParser a c
setCarrier f spec = spec { postparse = postparse spec >=> f }
-- | Convert the final output representation from the 'SExpr' type
-- to the 'RichSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asRich (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [RSlist [RSAtom "ele",RSAtom "phant"]]
asRich :: SExprParser a (SExpr b) -> SExprParser a (RichSExpr b)
asRich = setCarrier (return . toRich)
-- | Convert the final output representation from the 'SExpr' type
-- to the 'WellFormedSExpr' type.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = asWellFormed (mkParser (many1 alphaNum))
-- >>> decode parser "(ele phant)"
-- Right [WFSList [WFSAtom "ele",WFSAtom "phant"]]
asWellFormed :: SExprParser a (SExpr b) -> SExprParser a (WellFormedSExpr b)
asWellFormed = setCarrier toWellFormed
-- | Add the ability to execute some particular reader macro, as
-- defined by its initial character and the 'Parser' which returns
-- the parsed S-Expression. The 'Reader' is passed a 'Parser' which
-- can be recursively called to parse more S-Expressions, and begins
-- parsing after the reader character has been removed from the
-- stream.
--
-- >>> import Text.Parsec (alphaNum, char, many1)
-- >>> let vecReader p = (char ']' *> pure SNil) <|> (SCons <$> p <*> vecReader p)
-- >>> let parser = addReader '[' vecReader (mkParser (many1 alphaNum))
-- >>> decode parser "(an [ele phant])"
-- Right [SCons (SAtom "an") (SCons (SCons (SAtom "ele") (SCons (SAtom "phant") SNil)) SNil)]
addReader :: Char -> Reader a -> SExprParser a c -> SExprParser a c
addReader c reader spec = spec
{ readerMap = M.insert c reader (readerMap spec) }
-- | Add the ability to ignore some kind of comment. This gets
-- factored into whitespace parsing, and it's very important that
-- the parser supplied __be able to fail__ (as otherwise it will
-- cause an infinite loop), and also that it __not consume any input__
-- (which may require it to be wrapped in 'try'.)
--
-- >>> import Text.Parsec (alphaNum, anyChar, manyTill, many1, string)
-- >>> let comment = string "//" *> manyTill anyChar newline *> pure ()
-- >>> let parser = setComment comment (mkParser (many1 alphaNum))
-- >>> decode parser "(ele //a comment\n phant)"
-- Right [SCons (SAtom "ele") (SCons (SAtom "phant") SNil)]
setComment :: Comment -> SExprParser a c -> SExprParser a c
setComment c spec = spec { comment = Just (c <?> "comment") }
-- | Add the ability to understand a quoted S-Expression.
-- Many Lisps use @'sexpr@ as sugar for @(quote sexpr)@. This
-- assumes that the underlying atom type implements the "IsString"
-- class, and will create the @quote@ atom using @fromString "quote"@.
--
-- >>> import Text.Parsec (alphaNum, many1)
-- >>> let parser = withQuote (mkParser (many1 alphaNum))
-- >>> decode parser "'elephant"
-- Right [SCons (SAtom "quote") (SCons (SAtom "foo") SNil)]
withQuote :: IsString t => SExprParser t (SExpr t) -> SExprParser t (SExpr t)
withQuote = addReader '\'' (fmap go)
where go s = SCons "quote" (SCons s SNil)
peekChar :: Parser (Maybe Char)
peekChar = Just <$> lookAhead anyChar <|> pure Nothing
parseGenericSExpr ::
Parser atom -> ReaderMacroMap atom -> Parser () -> Parser (SExpr atom)
parseGenericSExpr atom reader skip = do
let sExpr = parseGenericSExpr atom reader skip <?> "s-expr"
skip
c <- peekChar
r <- case c of
Nothing -> fail "Unexpected end of input"
Just '(' -> char '(' >> skip >> parseList sExpr skip
Just (flip M.lookup reader -> Just r) -> anyChar >> r sExpr
_ -> SAtom `fmap` atom
skip
return r
parseList :: Parser (SExpr atom) -> Parser () -> Parser (SExpr atom)
parseList sExpr skip = do
i <- peekChar
case i of
Nothing -> fail "Unexpected end of input"
Just ')' -> char ')' >> return SNil
_ -> do
car <- sExpr
skip
c <- peekChar
case c of
Just '.' -> do
_ <- char '.'
cdr <- sExpr
skip
_ <- char ')'
skip
return (SCons car cdr)
Just ')' -> do
_ <- char ')'
skip
return (SCons car SNil)
_ -> do
cdr <- parseList sExpr skip
return (SCons car cdr)
-- | Given a CommentMap, create the corresponding parser to
-- skip those comments (if they exist).
buildSkip :: Maybe (Parser ()) -> Parser ()
buildSkip Nothing = skipMany space
buildSkip (Just c) = alternate
where alternate = skipMany space >> ((c >> alternate) <|> return ())
doParse :: Parser a -> Text -> Either String a
doParse p t = case runParser p () "" t of
Left err -> Left (show err)
Right x -> Right x
-- | Decode a single S-expression. If any trailing input is left after
-- the S-expression (ignoring comments or whitespace) then this
-- will fail: for those cases, use 'decode', which returns a list of
-- all the S-expressions found at the top level.
decodeOne :: SExprParser atom carrier -> Text -> Either String carrier
decodeOne spec = doParse (parser <* eof) >=> (postparse spec)
where parser = parseGenericSExpr
(sesPAtom spec)
(readerMap spec)
(buildSkip (comment spec))
-- | Decode several S-expressions according to a given 'SExprParser'. This
-- will return a list of every S-expression that appears at the top-level
-- of the document.
decode :: SExprParser atom carrier -> Text -> Either String [carrier]
decode spec =
doParse (many1 parser <* eof) >=> mapM (postparse spec)
where parser = parseGenericSExpr
(sesPAtom spec)
(readerMap spec)
(buildSkip (comment spec))
{-
-- | Encode (without newlines) a single S-expression.
encodeSExpr :: SExpr atom -> (atom -> Text) -> Text
encodeSExpr SNil _ = "()"
encodeSExpr (SAtom s) t = t s
encodeSExpr (SCons x xs) t = go xs (encodeSExpr x t)
where go (SAtom s) rs = "(" <> rs <> " . " <> t s <> ")"
go SNil rs = "(" <> rs <> ")"
go (SCons x xs) rs = go xs (rs <> " " <> encodeSExpr x t)
-- | Emit an S-Expression in a machine-readable way. This does no
-- pretty-printing or indentation, and produces no comments.
encodeOne :: SExprParser atom carrier -> carrier -> Text
encodeOne spec c = encodeSExpr (preserial spec c) (sesSAtom spec)
encode :: SExprParser atom carrier -> [carrier] -> Text
encode spec cs = T.concat (map (encodeOne spec) cs)
-}
|