1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
-- | CSV parser as specified in RFC4180
--
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Foundation.Format.CSV.Parser
( file
, recordC
, record
, record_
, field
) where
import Basement.Imports hiding (throw)
import Foundation.Format.CSV.Types
import Basement.String (snoc)
import Foundation.Parser
import Foundation.Monad
import Foundation.Collection (Collection (elem))
import Foundation.Conduit
import Control.Monad (void)
import Data.Typeable (typeRep)
import Data.Proxy (Proxy(..))
recordC :: (Monad m, MonadThrow m) => Conduit String Row m ()
recordC = awaitForever $ recordC' . parse (record <* optional (elements crlf))
where
recordC' (ParseFailed err) = throw err
recordC' (ParseOk rest v) = leftover rest *> yield v
recordC' (ParseMore more) = do
mm <- await
case mm of
Nothing -> throw (NotEnoughParseOnly :: ParseError String)
Just b -> recordC' (more b)
record_ :: forall row . (Typeable row, Record row) => Parser String row
record_ = do
rs <- record
case fromRow rs of
Left err -> reportError $ Expected (show $ typeRep (Proxy @row)) err
Right v -> pure v
file :: Parser String CSV
file = do
mh <- optional $ header <* elements crlf
x <- record
xs <- some $ elements crlf *> record
void $ optional $ elements crlf
pure $ fromList $ case mh of
Nothing -> x : xs
Just h -> h : x : xs
header :: Parser String Row
header = do
x <- name
xs <- some $ element comma *> name
pure $ fromList $ x : xs
record :: Parser String Row
record = do
x <- field
xs <- some $ element comma *> field
pure $ fromList $ x : xs
name :: Parser String Field
name = field
{-# INLINE name #-}
field :: Parser String Field
field = escaped <|> nonEscaped
escaped :: Parser String Field
escaped = element dquote *> escaped'
where
escaped' = do
x <- takeWhile (dquote /=)
element dquote
p <- peek
if p == (Just dquote)
then skip 1 >> descaped' (snoc x dquote)
else pure (FieldString x Escape)
descaped' acc = do
x <- takeWhile (dquote /=)
element dquote
p <- peek
if p == (Just dquote)
then skip 1 >> descaped' (acc <> snoc x dquote)
else pure (FieldString (acc <> x) DoubleEscape)
nonEscaped :: Parser String Field
nonEscaped = flip FieldString NoEscape <$> takeWhile (not . flip elem specials)
{-# INLINE nonEscaped #-}
comma :: Char
comma = ','
{-# INLINE comma #-}
cr :: Char
cr = '\r'
{-# INLINE cr #-}
dquote :: Char
dquote = '"'
{-# INLINE dquote #-}
lf :: Char
lf = '\n'
{-# INLINE lf #-}
crlf :: String
crlf = fromList [cr, lf]
{-# NOINLINE crlf #-}
{-
textdataQuoted :: String
textdataQuoted = textdata <> specials
{-# NOINLINE textdataQuoted #-}
-}
specials :: String
specials = ",\r\n"
{-# INLINE specials #-}
{-
textdata :: String
textdata = fromList $ [' '..'!'] <> ['#'..'+'] <> ['-'..'~']
{-# NOINLINE textdata #-}
-}
|