File: EncodingSpec.hs

package info (click to toggle)
haskell-os-string 2.0.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 332 kB
  • sloc: haskell: 3,283; makefile: 3
file content (174 lines) | stat: -rw-r--r-- 7,646 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE TypeApplications #-}

module EncodingSpec where

import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS

import Arbitrary
import Test.QuickCheck

import Data.Either ( isRight )
import qualified System.OsString.Data.ByteString.Short as BS8
import qualified System.OsString.Data.ByteString.Short.Word16 as BS16
import System.OsString.Encoding.Internal
import GHC.IO (unsafePerformIO)
import GHC.IO.Encoding ( setFileSystemEncoding )
import System.IO
    ( utf16le )
import Control.Exception
import Control.DeepSeq
import Data.Bifunctor ( first )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )


tests :: [(String, Property)]
tests =
  [ ("ucs2le_decode . ucs2le_encode == id",
    property $ \(padEven -> ba) ->
      let decoded = decodeWithTE ucs2le (BS8.toShort ba)
          encoded = encodeWithTE ucs2le =<< decoded
      in (BS8.fromShort <$> encoded) === Right ba)
  , ("utf16 doesn't handle invalid surrogate pairs",
     property $
      let str = [toEnum 55296, toEnum 55297]
          encoded = encodeWithTE utf16le str
          decoded = decodeWithTE utf16le =<< encoded
#if __GLASGOW_HASKELL__ >= 910
      in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing))
#elif __GLASGOW_HASKELL__ >= 904
      in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
      in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif
  , ("ucs2 handles invalid surrogate pairs",
     property $
      let str = [toEnum 55296, toEnum 55297]
          encoded = encodeWithTE ucs2le str
          decoded = decodeWithTE ucs2le =<< encoded
      in decoded === Right str)
  , ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)",
     property $
      \bs ->
        let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded
        in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))

  , ("can decode arbitrary strings through utf-8 (with RoundtripFailure)",
     property $
      \(NonNullSurrogateString str) ->
        let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
            decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
        in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str))

  , ("utf-8 roundtrip encode cannot deal with some surrogates",
     property $
      let str = [toEnum 0xDFF0, toEnum 0xDFF2]
          encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
          decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
#if __GLASGOW_HASKELL__ >= 910
      in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing))
#elif __GLASGOW_HASKELL__ >= 904
      in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
      in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif

  , ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)",
     property $
      \(padEven -> bs) ->
        let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded
        in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
  , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)",
     property $
      \(padEven -> bs) ->
        let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded
        in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
  , ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)",
     property $
      \bs ->
        let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded
        in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
  , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)",
     property $
      \(padEven -> bs) ->
        let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded
        in (isRight encoded, isRight decoded) === (True, True))
  , ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)",
     property $
      \bs ->
        let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded
        in (isRight encoded, isRight decoded) === (True, True))
  , ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)",
     property $
      \(padEven -> bs) ->
        let decoded = decodeW' (BS8.toShort bs)
            encoded = encodeW' =<< decoded
        in (isRight encoded, isRight decoded) === (True, True))
  , ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)",
     property $
      \bs -> ioProperty $ do
        setFileSystemEncoding (mkUTF8 TransliterateCodingFailure)
        let decoded = decodeP' (BS8.toShort bs)
            encoded = encodeP' =<< decoded
        pure $ (isRight encoded, isRight decoded) === (True, True))

  , ("decodeWithBaseWindows == utf16le_b",
     property $
      \(BS8.toShort . padEven -> bs) ->
        let decoded  = decodeW' bs
            decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs
        in decoded === decoded')

  , ("encodeWithBaseWindows == utf16le_b",
     property $
      \(NonNullSurrogateString str) ->
        let decoded  = encodeW' str
            decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str
        in decoded === decoded')

  , ("encodeWithTE/decodeWithTE never fails (utf16le_b)",
     property $
      \(padEven -> bs) ->
        let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs)
            encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded
        in (isRight encoded, isRight decoded) === (True, True))
  ]


padEven :: ByteString -> ByteString
padEven bs
  | even (BS.length bs) = bs
  | otherwise = bs `BS.append` BS.pack [70]


decodeP' :: BS8.ShortByteString -> Either String String
decodeP' ba = unsafePerformIO $ do
  r <- try @SomeException $ decodeWithBasePosix ba
  evaluate $ force $ first displayException r

encodeP' :: String -> Either String BS8.ShortByteString
encodeP' str = unsafePerformIO $ do
  r <- try @SomeException $ encodeWithBasePosix str
  evaluate $ force $ first displayException r

decodeW' :: BS16.ShortByteString -> Either String String
decodeW' ba = unsafePerformIO $ do
  r <- try @SomeException $ decodeWithBaseWindows ba
  evaluate $ force $ first displayException r

encodeW' :: String -> Either String BS8.ShortByteString
encodeW' str = unsafePerformIO $ do
  r <- try @SomeException $ encodeWithBaseWindows str
  evaluate $ force $ first displayException r