1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
{-# LANGUAGE FlexibleContexts, OverloadedStrings #-}
module Data.Conduit.TextSpec (spec) where
import Data.Conduit ((.|), runConduit, runConduitPure)
import Control.Exception (SomeException)
import qualified Data.Conduit.Text as CT
import qualified Data.Conduit as C
import Data.Conduit.Lift (runCatchC, catchCatchC)
import Data.Functor.Identity (runIdentity)
import qualified Data.Conduit.List as CL
import Test.Hspec
import Test.Hspec.QuickCheck
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Encoding.Error as TEE
import qualified Data.Text.Lazy.Encoding as TLE
import Control.Arrow
import qualified Data.ByteString as S
import qualified Data.Text.Lazy as TL
import qualified Data.ByteString.Lazy as L
import Control.Monad.Catch.Pure (runCatchT)
spec :: Spec
spec = describe "Data.Conduit.Text" $ do
describe "text" $ do
let go enc tenc tdec cenc = describe enc $ do
prop "single chunk" $ \chars -> do
let tl = TL.pack chars
lbs = tenc tl
src = CL.sourceList $ L.toChunks lbs
ts <- runConduit $ src .| CT.decode cenc .| CL.consume
TL.fromChunks ts `shouldBe` tl
prop "many chunks" $ \chars -> do
let tl = TL.pack chars
lbs = tenc tl
src = mconcat $ map (CL.sourceList . return . S.singleton) $ L.unpack lbs
ts <- runConduit $ src .| CT.decode cenc .| CL.consume
TL.fromChunks ts `shouldBe` tl
-- Check whether raw bytes are decoded correctly, in
-- particular that Text decoding produces an error if
-- and only if Conduit does.
prop "raw bytes" $ \bytes -> do
let lbs = L.pack bytes
src = CL.sourceList $ L.toChunks lbs
tl' = tdec lbs
etl = runConduit $ src .| CT.decode cenc .| CL.consume
case etl of
(Left _) -> (return $! TL.toStrict tl') `shouldThrow` anyException
(Right tl) -> TL.fromChunks tl `shouldBe` tl'
prop "encoding" $ \chars -> do
let tss = map T.pack chars
lbs = tenc $ TL.fromChunks tss
src = mconcat $ map (CL.sourceList . return) tss
bss <- runConduit $ src .| CT.encode cenc .| CL.consume
L.fromChunks bss `shouldBe` lbs
prop "valid then invalid" $ \x y chars -> do
let tss = map T.pack ([x, y]:chars)
ts = T.concat tss
lbs = tenc (TL.fromChunks tss) `L.append` "\0\0\0\0\0\0\0"
src = mapM_ C.yield $ L.toChunks lbs
Just x' <- runConduit $ src .| CT.decode cenc .| C.await
(x' `T.isPrefixOf` ts) `shouldBe` True
go "utf8" TLE.encodeUtf8 TLE.decodeUtf8 CT.utf8
go "utf16_le" TLE.encodeUtf16LE TLE.decodeUtf16LE CT.utf16_le
go "utf16_be" TLE.encodeUtf16BE TLE.decodeUtf16BE CT.utf16_be
go "utf32_le" TLE.encodeUtf32LE TLE.decodeUtf32LE CT.utf32_le
go "utf32_be" TLE.encodeUtf32BE TLE.decodeUtf32BE CT.utf32_be
it "mixed utf16 and utf8" $ do
let bs = "8\NUL:\NULu\NUL\215\216\217\218"
src = C.yield bs .| CT.decode CT.utf16_le
text <- runConduit $ src .| C.await
text `shouldBe` Just "8:u"
(runConduit $ src .| CL.sinkNull) `shouldThrow` anyException
it "invalid utf8" $ do
let bs = S.pack [0..255]
src = C.yield bs .| CT.decode CT.utf8
text <- runConduit $ src .| C.await
text `shouldBe` Just (T.pack $ map toEnum [0..127])
(runConduit $ src .| CL.sinkNull) `shouldThrow` anyException
it "catch UTF8 exceptions" $ do
let badBS = "this is good\128\128\0that was bad"
grabExceptions inner = C.catchC
(inner .| CL.map Right)
(\e -> C.yield (Left (e :: CT.TextException)))
res <- runConduit $ C.yield badBS .| (,)
<$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
<*> CL.consume
first (map (either (Left . show) Right)) res `shouldBe`
( [ Right "this is good"
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
]
, ["\128\128\0that was bad"]
)
it "catch UTF8 exceptions, pure" $ do
let badBS = "this is good\128\128\0that was bad"
grabExceptions inner = do
res <- runCatchC $ inner .| CL.map Right
case res of
Left e -> C.yield $ Left e
Right () -> return ()
let res = runConduitPure $ C.yield badBS .| (,)
<$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
<*> CL.consume
first (map (either (Left . show) Right)) res `shouldBe`
( [ Right "this is good"
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
]
, ["\128\128\0that was bad"]
)
it "catch UTF8 exceptions, catchExceptionC" $ do
let badBS = "this is good\128\128\0that was bad"
grabExceptions inner = catchCatchC
(inner .| CL.map Right)
(\e -> C.yield $ Left e)
let Right res = runIdentity $ runCatchT $ runConduit $ C.yield badBS .| (,)
<$> (grabExceptions (CT.decode CT.utf8) .| CL.consume)
<*> CL.consume
first (map (either (Left . show) Right)) res `shouldBe`
( [ Right "this is good"
, Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t"
]
, ["\128\128\0that was bad"]
)
it "catch UTF8 exceptions, catchExceptionC, decodeUtf8" $ do
let badBS = ["this is good", "\128\128\0that was bad"]
grabExceptions inner = catchCatchC
(inner .| CL.map Right)
(\e -> C.yield $ Left e)
let Right res = runIdentity $ runCatchT $ runConduit $
mapM_ C.yield badBS .| (,)
<$> (grabExceptions CT.decodeUtf8 .| CL.consume)
<*> CL.consume
first (map (either (Left . const ()) Right)) res `shouldBe`
( [ Right "this is good"
, Left ()
]
, ["\128\128\0that was bad"]
)
prop "lenient UTF8 decoding" $ \good1 good2 -> do
let bss = [TE.encodeUtf8 $ T.pack good1, "\128\129\130", TE.encodeUtf8 $ T.pack good2]
bs = S.concat bss
expected = TE.decodeUtf8With TEE.lenientDecode bs
actual = runConduitPure $ mapM_ C.yield bss .| CT.decodeUtf8Lenient .| CL.consume
T.concat actual `shouldBe` expected
describe "text lines" $ do
it "yields nothing given nothing" $
(runConduit $ CL.sourceList [] .| CT.lines .| CL.consume) ==
[[]]
it "yields nothing given only empty text" $
(runConduit $ CL.sourceList [""] .| CT.lines .| CL.consume) ==
[[]]
it "works across split lines" $
(runConduit $ CL.sourceList ["abc", "d\nef"] .| CT.lines .| CL.consume) ==
[["abcd", "ef"]]
it "works with multiple lines in an item" $
(runConduit $ CL.sourceList ["ab\ncd\ne"] .| CT.lines .| CL.consume) ==
[["ab", "cd", "e"]]
it "works with ending on a newline" $
(runConduit $ CL.sourceList ["ab\n"] .| CT.lines .| CL.consume) ==
[["ab"]]
it "works with ending a middle item on a newline" $
(runConduit $ CL.sourceList ["ab\n", "cd\ne"] .| CT.lines .| CL.consume) ==
[["ab", "cd", "e"]]
it "works with empty text" $
(runConduit $ CL.sourceList ["ab", "", "cd"] .| CT.lines .| CL.consume) ==
[["abcd"]]
it "works with empty lines" $
(runConduit $ CL.sourceList ["\n\n"] .| CT.lines .| CL.consume) ==
[["", ""]]
describe "text lines bounded" $ do
it "yields nothing given nothing" $
(runConduit $ CL.sourceList [] .| CT.linesBounded 80 .| CL.consume) ==
[[]]
it "yields nothing given only empty text" $
(runConduit $ CL.sourceList [""] .| CT.linesBounded 80 .| CL.consume) ==
[[]]
it "works across split lines" $
(runConduit $ CL.sourceList ["abc", "d\nef"] .| CT.linesBounded 80 .| CL.consume) ==
[["abcd", "ef"]]
it "works with multiple lines in an item" $
(runConduit $ CL.sourceList ["ab\ncd\ne"] .| CT.linesBounded 80 .| CL.consume) ==
[["ab", "cd", "e"]]
it "works with ending on a newline" $
(runConduit $ CL.sourceList ["ab\n"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
[["ab"]]
it "works with ending a middle item on a newline" $
(runConduit $ CL.sourceList ["ab\n", "cd\ne"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
[["ab", "cd", "e"]]
it "works with empty text" $
(runConduit $ CL.sourceList ["ab", "", "cd"] .| CT.linesBounded 80 .| CL.consume) `shouldBe`
[["abcd"]]
it "works with empty lines" $
(runConduit (CL.sourceList ["\n\n"] .| CT.linesBounded 80 .| CL.consume)) `shouldBe`
[["", ""]]
it "throws an exception when lines are too long" $ do
let x :: Either SomeException [T.Text]
x = runConduit $ CL.sourceList ["hello\nworld"] .| CT.linesBounded 4 .| CL.consume
show x `shouldBe` show (Left $ CT.LengthExceeded 4 :: Either CT.TextException ())
it "works with infinite input" $ do
let x :: Either SomeException [T.Text]
x = runConduit $ CL.sourceList (cycle ["hello"]) .| CT.linesBounded 256 .| CL.consume
show x `shouldBe` show (Left $ CT.LengthExceeded 256 :: Either CT.TextException ())
describe "text decode" $ do
it' "doesn't throw runtime exceptions" $ do
let x = runConduit $ C.yield "\x89\x243" .| CT.decode CT.utf8 .| CL.consume
case x of
Left _ -> return ()
Right t -> error $ "This should have failed: " ++ show t
it "is not too eager" $ do
x <- runConduit $ CL.sourceList ["foobarbaz", error "ignore me"] .| CT.decode CT.utf8 .| CL.head
x `shouldBe` Just "foobarbaz"
it' :: String -> IO () -> Spec
it' = it
|