File: TextSpec.hs

package info (click to toggle)
haskell-conduit-extra 1.3.8-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 288 kB
  • sloc: haskell: 2,601; makefile: 3
file content (230 lines) | stat: -rw-r--r-- 11,170 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
module Data.Conduit.TextSpec (spec) where

import Data.Conduit ((.|), runConduit, runConduitPure)
import Control.Exception (SomeException)
import qualified Data.Conduit.Text as CT
import qualified Data.Conduit as C
import Data.Conduit.Lift (runCatchC, catchCatchC)
import Data.Functor.Identity (runIdentity)
import qualified Data.Conduit.List as CL
import Test.Hspec
import Test.Hspec.QuickCheck
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TEE
import qualified Data.Text.Lazy.Encoding as TLE
import Control.Arrow
import qualified Data.ByteString as S
import qualified Data.Text.Lazy as TL
import qualified Data.ByteString.Lazy as L
import Control.Monad.Catch.Pure (runCatchT)

spec :: Spec
spec = describe "Data.Conduit.Text" $ do
    describe "text" $ do
        let go enc tenc tdec cenc = describe enc $ do
                prop "single chunk" $ \chars -> do
                    let tl = TL.pack chars
                        lbs = tenc tl
                        src = CL.sourceList $ L.toChunks lbs
                    ts <- runConduit $ src .| CT.decode cenc .| CL.consume
                    TL.fromChunks ts `shouldBe` tl
                prop "many chunks" $ \chars -> do
                    let tl = TL.pack chars
                        lbs = tenc tl
                        src = mconcat $ map (CL.sourceList . return . S.singleton) $ L.unpack lbs

                    ts <- runConduit $ src .| CT.decode cenc .| CL.consume
                    TL.fromChunks ts `shouldBe` tl

                -- Check whether raw bytes are decoded correctly, in
                -- particular that Text decoding produces an error if
                -- and only if Conduit does.
                prop "raw bytes" $ \bytes -> do
                    let lbs = L.pack bytes
                        src = CL.sourceList $ L.toChunks lbs
                        tl' = tdec lbs
                        etl = runConduit $ src .| CT.decode cenc .| CL.consume
                    case etl of
                          (Left _) -> (return $! TL.toStrict tl') `shouldThrow` anyException
                          (Right tl) -> TL.fromChunks tl `shouldBe` tl'
                prop "encoding" $ \chars -> do
                    let tss = map T.pack chars
                        lbs = tenc $ TL.fromChunks tss
                        src = mconcat $ map (CL.sourceList . return) tss
                    bss <- runConduit $ src .| CT.encode cenc .| CL.consume
                    L.fromChunks bss `shouldBe` lbs
                prop "valid then invalid" $ \x y chars -> do
                    let tss = map T.pack ([x, y]:chars)
                        ts = T.concat tss
                        lbs = tenc (TL.fromChunks tss) `L.append` "\0\0\0\0\0\0\0"
                        src = mapM_ C.yield $ L.toChunks lbs
                    Just x' <- runConduit $ src .| CT.decode cenc .| C.await
                    (x' `T.isPrefixOf` ts) `shouldBe` True
        go "utf8" TLE.encodeUtf8 TLE.decodeUtf8 CT.utf8
        go "utf16_le" TLE.encodeUtf16LE TLE.decodeUtf16LE CT.utf16_le
        go "utf16_be" TLE.encodeUtf16BE TLE.decodeUtf16BE CT.utf16_be
        go "utf32_le" TLE.encodeUtf32LE TLE.decodeUtf32LE CT.utf32_le
        go "utf32_be" TLE.encodeUtf32BE TLE.decodeUtf32BE CT.utf32_be
        it "mixed utf16 and utf8" $ do
            let bs = "8\NUL:\NULu\NUL\215\216\217\218"
                src = C.yield bs .| CT.decode CT.utf16_le
            text <- runConduit $ src .| C.await
            text `shouldBe` Just "8:u"
            (runConduit $ src .| CL.sinkNull) `shouldThrow` anyException
        it "invalid utf8" $ do
            let bs = S.pack [0..255]
                src = C.yield bs .| CT.decode CT.utf8
            text <- runConduit $ src .| C.await
            text `shouldBe` Just (T.pack $ map toEnum [0..127])
            (runConduit $ src .| CL.sinkNull) `shouldThrow` anyException
        it "catch UTF8 exceptions" $ do
            let badBS = "this is good\128\128\0that was bad"

                grabExceptions inner = C.catchC
                    (inner .| CL.map Right)
                    (\e -> C.yield (Left (e :: CT.TextException)))

            res <- runConduit $ C.yield badBS .| (,)
                <$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
                <*> CL.consume

            first (map (either (Left . show) Right)) res `shouldBe`
                ( [ Right "this is good"
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
                  ]
                , ["\128\128\0that was bad"]
                )
        it "catch UTF8 exceptions, pure" $ do
            let badBS = "this is good\128\128\0that was bad"

                grabExceptions inner = do
                    res <- runCatchC $ inner .| CL.map Right
                    case res of
                        Left e -> C.yield $ Left e
                        Right () -> return ()

            let res = runConduitPure $ C.yield badBS .| (,)
                        <$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
                        <*> CL.consume

            first (map (either (Left . show) Right)) res `shouldBe`
                ( [ Right "this is good"
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
                  ]
                , ["\128\128\0that was bad"]
                )
        it "catch UTF8 exceptions, catchExceptionC" $ do
            let badBS = "this is good\128\128\0that was bad"

                grabExceptions inner = catchCatchC
                    (inner .| CL.map Right)
                    (\e -> C.yield $ Left e)

            let Right res = runIdentity $ runCatchT $ runConduit $ C.yield badBS .| (,)
                        <$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
                        <*> CL.consume

            first (map (either (Left . show) Right)) res `shouldBe`
                ( [ Right "this is good"
                  , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
                  ]
                , ["\128\128\0that was bad"]
                )
        it "catch UTF8 exceptions, catchExceptionC, decodeUtf8" $ do
            let badBS = ["this is good", "\128\128\0that was bad"]

                grabExceptions inner = catchCatchC
                    (inner .| CL.map Right)
                    (\e -> C.yield $ Left e)

            let Right res = runIdentity $ runCatchT $ runConduit $
                  mapM_ C.yield badBS .| (,)
                        <$> (grabExceptions CT.decodeUtf8 .| CL.consume)
                        <*> CL.consume

            first (map (either (Left . const ()) Right)) res `shouldBe`
                ( [ Right "this is good"
                  , Left ()
                  ]
                , ["\128\128\0that was bad"]
                )
        prop "lenient UTF8 decoding" $ \good1 good2 -> do
            let bss = [TE.encodeUtf8 $ T.pack good1, "\128\129\130", TE.encodeUtf8 $ T.pack good2]
                bs = S.concat bss
                expected = TE.decodeUtf8With TEE.lenientDecode bs
                actual = runConduitPure $ mapM_ C.yield bss .| CT.decodeUtf8Lenient .| CL.consume
            T.concat actual `shouldBe` expected

    describe "text lines" $ do
        it "yields nothing given nothing" $
            (runConduit $ CL.sourceList [] .| CT.lines .| CL.consume) ==
                [[]]
        it "yields nothing given only empty text" $
            (runConduit $ CL.sourceList [""] .| CT.lines .| CL.consume) ==
                [[]]
        it "works across split lines" $
            (runConduit $ CL.sourceList ["abc", "d\nef"] .| CT.lines .| CL.consume) ==
                [["abcd", "ef"]]
        it "works with multiple lines in an item" $
            (runConduit $ CL.sourceList ["ab\ncd\ne"] .| CT.lines .| CL.consume) ==
                [["ab", "cd", "e"]]
        it "works with ending on a newline" $
            (runConduit $ CL.sourceList ["ab\n"] .| CT.lines .| CL.consume) ==
                [["ab"]]
        it "works with ending a middle item on a newline" $
            (runConduit $ CL.sourceList ["ab\n", "cd\ne"] .| CT.lines .| CL.consume) ==
                [["ab", "cd", "e"]]
        it "works with empty text" $
            (runConduit $ CL.sourceList ["ab", "", "cd"] .| CT.lines .| CL.consume) ==
                [["abcd"]]
        it "works with empty lines" $
            (runConduit $ CL.sourceList ["\n\n"] .| CT.lines .| CL.consume) ==
                [["", ""]]

    describe "text lines bounded" $ do
        it "yields nothing given nothing" $
            (runConduit $ CL.sourceList [] .| CT.linesBounded 80 .| CL.consume) ==
                [[]]
        it "yields nothing given only empty text" $
            (runConduit $ CL.sourceList [""] .| CT.linesBounded 80 .| CL.consume) ==
                [[]]
        it "works across split lines" $
            (runConduit $ CL.sourceList ["abc", "d\nef"] .| CT.linesBounded 80 .| CL.consume) ==
                [["abcd", "ef"]]
        it "works with multiple lines in an item" $
            (runConduit $ CL.sourceList ["ab\ncd\ne"] .| CT.linesBounded 80 .| CL.consume) ==
                [["ab", "cd", "e"]]
        it "works with ending on a newline" $
            (runConduit $ CL.sourceList ["ab\n"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
                [["ab"]]
        it "works with ending a middle item on a newline" $
            (runConduit $ CL.sourceList ["ab\n", "cd\ne"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
                [["ab", "cd", "e"]]
        it "works with empty text" $
            (runConduit $ CL.sourceList ["ab", "", "cd"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
                [["abcd"]]
        it "works with empty lines" $
            (runConduit (CL.sourceList ["\n\n"] .| CT.linesBounded 80 .| CL.consume)) `shouldBe`
                [["", ""]]
        it "throws an exception when lines are too long" $ do
            let x :: Either SomeException [T.Text]
                x = runConduit $ CL.sourceList ["hello\nworld"] .| CT.linesBounded 4 .| CL.consume
            show x `shouldBe` show (Left $ CT.LengthExceeded 4 :: Either CT.TextException ())
        it "works with infinite input" $ do
            let x :: Either SomeException [T.Text]
                x = runConduit $ CL.sourceList (cycle ["hello"]) .| CT.linesBounded 256 .| CL.consume
            show x `shouldBe` show (Left $ CT.LengthExceeded 256 :: Either CT.TextException ())
    describe "text decode" $ do
        it' "doesn't throw runtime exceptions" $ do
            let x = runConduit $ C.yield "\x89\x243" .| CT.decode CT.utf8 .| CL.consume
            case x of
                Left _ -> return ()
                Right t -> error $ "This should have failed: " ++ show t
        it "is not too eager" $ do
            x <- runConduit $ CL.sourceList ["foobarbaz", error "ignore me"] .| CT.decode CT.utf8 .| CL.head
            x `shouldBe` Just "foobarbaz"

it' :: String -> IO () -> Spec
it' = it