File: TextSpec.hs

package info (click to toggle)
haskell-streaming-commons 0.2.2.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 284 kB
  • sloc: haskell: 2,547; ansic: 297; makefile: 7
file content (135 lines) | stat: -rw-r--r-- 5,999 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
{-# LANGUAGE OverloadedStrings #-}
module Data.Streaming.TextSpec (spec) where

import Test.Hspec
import Test.Hspec.QuickCheck
import qualified Data.Streaming.Text as SD
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Control.Exception (evaluate, try, SomeException)
import Control.DeepSeq (deepseq, NFData)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import Control.Monad (forM_)
import Data.ByteString.Char8 ()

try' :: NFData a => a -> IO (Either SomeException a)
try' a = try $ evaluate (a `deepseq` a)

spec :: Spec
spec = describe "Data.Streaming.TextSpec" $ {-modifyMaxSuccess (const 10000) $ -}do
    let test name lazy stream encodeLazy encodeStrict = describe name $ do
            prop "bytes" $ check lazy stream
            prop "chars" $ \css -> do
                let ts = map T.pack css
                    lt = TL.fromChunks ts
                    lbs = encodeLazy lt
                    bss = L.toChunks lbs
                    wss = map S.unpack bss
                 in check lazy stream wss
            it "high code points" $ forM_ [100, 200..50000] $ \cnt -> do
                let t = T.replicate cnt "\x10000"
                    bs = encodeStrict t
                case stream bs of
                    SD.DecodeResultSuccess t' dec -> do
                        t' `shouldBe` t
                        case dec S.empty of
                            SD.DecodeResultSuccess _ _ -> return ()
                            SD.DecodeResultFailure _ _ -> error "unexpected failure 1"
                    SD.DecodeResultFailure _ _ -> error "unexpected failure 2"

        check lazy stream wss = do
            let bss = map S.pack wss
                lbs = L.fromChunks bss
            x <- try' $ feedLazy stream lbs
            y <- try' $ lazy lbs
            case (x, y) of
                (Right x', Right y') -> x' `shouldBe` y'
                (Left _, Left _) -> return ()
                _ -> error $ show (x, y)
    test "UTF8" TLE.decodeUtf8 SD.decodeUtf8 TLE.encodeUtf8 TE.encodeUtf8
    test "UTF8 pure" TLE.decodeUtf8 SD.decodeUtf8Pure TLE.encodeUtf8 TE.encodeUtf8
    test "UTF16LE" TLE.decodeUtf16LE SD.decodeUtf16LE TLE.encodeUtf16LE TE.encodeUtf16LE
    test "UTF16BE" TLE.decodeUtf16BE SD.decodeUtf16BE TLE.encodeUtf16BE TE.encodeUtf16BE
    test "UTF32LE" TLE.decodeUtf32LE SD.decodeUtf32LE TLE.encodeUtf32LE TE.encodeUtf32LE
    test "UTF32BE" TLE.decodeUtf32BE SD.decodeUtf32BE TLE.encodeUtf32BE TE.encodeUtf32BE

    describe "UTF8 leftovers" $ do
        describe "C" $ do
            it "single chunk" $ do
                let bs = "good\128\128bad"
                case SD.decodeUtf8 bs of
                    SD.DecodeResultSuccess _ _ -> error "Shouldn't have succeeded"
                    SD.DecodeResultFailure t bs' -> do
                        t `shouldBe` "good"
                        bs' `shouldBe` "\128\128bad"

            it "multi chunk, no good" $ do
                let bs1 = "\226"
                    bs2 = "\130"
                    bs3 = "ABC"
                case SD.decodeUtf8 bs1 of
                    SD.DecodeResultSuccess "" dec2 ->
                        case dec2 bs2 of
                            SD.DecodeResultSuccess "" dec3 ->
                                case dec3 bs3 of
                                    SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC"
                                    _ -> error "fail on dec3"
                            _ -> error "fail on dec2"
                    _ -> error "fail on dec1"

            it "multi chunk, good in the middle" $ do
                let bs1 = "\226"
                    bs2 = "\130\172\226"
                    bs3 = "\130ABC"
                case SD.decodeUtf8 bs1 of
                    SD.DecodeResultSuccess "" dec2 ->
                        case dec2 bs2 of
                            SD.DecodeResultSuccess "\x20AC" dec3 ->
                                case dec3 bs3 of
                                    SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC"
                                    _ -> error "fail on dec3"
                            _ -> error "fail on dec2"
                    _ -> error "fail on dec1"
        describe "pure" $ do
            it "multi chunk, no good" $ do
                let bs1 = "\226"
                    bs2 = "\130"
                    bs3 = "ABC"
                case SD.decodeUtf8Pure bs1 of
                    SD.DecodeResultSuccess "" dec2 ->
                        case dec2 bs2 of
                            SD.DecodeResultSuccess "" dec3 ->
                                case dec3 bs3 of
                                    SD.DecodeResultFailure "" bs -> bs `shouldBe` "\226\130ABC"
                                    _ -> error "fail on dec3"
                            _ -> error "fail on dec2"
                    _ -> error "fail on dec1"

    describe "UTF16LE spot checks" $ do
        it "[[0,216,0],[220,0,0,0,0,0,0]]" $ do
            let bss = map S.pack [[0,216,0],[220,0,0,0,0,0,0]]
                lbs = L.fromChunks bss
            x <- try' $ feedLazy SD.decodeUtf16LE lbs
            y <- try' $ TLE.decodeUtf16LE lbs
            case (x, y) of
                (Right x', Right y') -> x' `shouldBe` y'
                (Left _, Left _) -> return ()
                _ -> error $ show (x, y)

feedLazy :: (S.ByteString -> SD.DecodeResult)
         -> L.ByteString
         -> TL.Text
feedLazy start =
    TL.fromChunks . loop start . L.toChunks
  where
    loop dec [] =
        case dec S.empty of
            SD.DecodeResultSuccess t _ -> [t]
            SD.DecodeResultFailure _ _ -> [error "invalid sequence 1"]
    loop dec (bs:bss) =
        case dec bs of
            SD.DecodeResultSuccess t dec' -> t : loop dec' bss
            SD.DecodeResultFailure _ _ -> [error "invalid sequence 2"]