File: Parser.hs

package info (click to toggle)
haskell-foundation 0.0.30-5
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 928 kB
  • sloc: haskell: 9,124; ansic: 570; makefile: 6
file content (131 lines) | stat: -rw-r--r-- 3,128 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
-- | CSV parser as specified in RFC4180
--

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module Foundation.Format.CSV.Parser
    ( file
    , recordC
    , record
    , record_
    , field
    ) where

import Basement.Imports hiding (throw)
import Foundation.Format.CSV.Types
import Basement.String (snoc)
import Foundation.Parser
import Foundation.Monad
import Foundation.Collection (Collection (elem))
import Foundation.Conduit
import Control.Monad (void)
import Data.Typeable (typeRep)
import Data.Proxy (Proxy(..))

recordC :: (Monad m, MonadThrow m) => Conduit String Row m ()
recordC = awaitForever $ recordC' . parse (record <* optional (elements crlf))
  where
    recordC' (ParseFailed err) = throw err
    recordC' (ParseOk rest v)  = leftover rest *> yield v
    recordC' (ParseMore more)  = do
        mm <- await
        case mm of
            Nothing -> throw (NotEnoughParseOnly :: ParseError String)
            Just b  -> recordC' (more b)

record_ :: forall row . (Typeable row, Record row) => Parser String row
record_ = do
    rs <- record
    case fromRow rs of
        Left err -> reportError $ Expected (show $ typeRep (Proxy @row)) err
        Right v  -> pure v

file :: Parser String CSV
file = do
    mh <- optional $ header <* elements crlf
    x <- record
    xs <- some $ elements crlf *> record
    void $ optional $ elements crlf
    pure $ fromList $ case mh of
        Nothing -> x : xs
        Just h  -> h : x : xs

header :: Parser String Row
header = do
    x <- name
    xs <- some $ element comma *> name
    pure $ fromList $ x : xs

record :: Parser String Row
record = do
    x <- field
    xs <- some $ element comma *> field
    pure $ fromList $ x : xs

name :: Parser String Field
name = field
{-# INLINE name #-}

field :: Parser String Field
field = escaped <|> nonEscaped

escaped :: Parser String Field
escaped = element dquote *> escaped'
  where
    escaped' = do
        x <- takeWhile (dquote /=)
        element dquote
        p <- peek
        if p == (Just dquote)
            then skip 1 >> descaped' (snoc x dquote)
            else pure (FieldString x Escape)
    descaped' acc = do
        x <- takeWhile (dquote /=)
        element dquote
        p <- peek
        if p == (Just dquote)
            then skip 1 >> descaped' (acc <> snoc x dquote)
            else pure (FieldString (acc <> x) DoubleEscape)

nonEscaped :: Parser String Field
nonEscaped = flip FieldString NoEscape <$> takeWhile (not . flip elem specials)
{-# INLINE nonEscaped #-}

comma :: Char
comma = ','
{-# INLINE comma #-}

cr :: Char
cr = '\r'
{-# INLINE cr #-}

dquote :: Char
dquote = '"'
{-# INLINE dquote #-}

lf :: Char
lf = '\n'
{-# INLINE lf #-}

crlf :: String
crlf = fromList [cr, lf]
{-# NOINLINE crlf #-}

{-
textdataQuoted :: String
textdataQuoted = textdata <> specials
{-# NOINLINE textdataQuoted #-}
-}

specials :: String
specials = ",\r\n"
{-# INLINE specials #-}

{-
textdata :: String
textdata = fromList $ [' '..'!'] <> ['#'..'+'] <> ['-'..'~']
{-# NOINLINE textdata #-}
-}