1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE TypeApplications #-}
module EncodingSpec where
import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS
import Arbitrary
import Test.QuickCheck
import Data.Either ( isRight )
import qualified System.OsString.Data.ByteString.Short as BS8
import qualified System.OsString.Data.ByteString.Short.Word16 as BS16
import System.OsString.Encoding.Internal
import GHC.IO (unsafePerformIO)
import GHC.IO.Encoding ( setFileSystemEncoding )
import System.IO
( utf16le )
import Control.Exception
import Control.DeepSeq
import Data.Bifunctor ( first )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
tests :: [(String, Property)]
tests =
[ ("ucs2le_decode . ucs2le_encode == id",
property $ \(padEven -> ba) ->
let decoded = decodeWithTE ucs2le (BS8.toShort ba)
encoded = encodeWithTE ucs2le =<< decoded
in (BS8.fromShort <$> encoded) === Right ba)
, ("utf16 doesn't handle invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encodeWithTE utf16le str
decoded = decodeWithTE utf16le =<< encoded
#if __GLASGOW_HASKELL__ >= 910
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing))
#elif __GLASGOW_HASKELL__ >= 904
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif
, ("ucs2 handles invalid surrogate pairs",
property $
let str = [toEnum 55296, toEnum 55297]
encoded = encodeWithTE ucs2le str
decoded = decodeWithTE ucs2le =<< encoded
in decoded === Right str)
, ("can roundtrip arbitrary bytes through utf-8 (with RoundtripFailure)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 RoundtripFailure) =<< decoded
in (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
, ("can decode arbitrary strings through utf-8 (with RoundtripFailure)",
property $
\(NonNullSurrogateString str) ->
let encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
in expectFailure $ (either (const 0) length decoded, decoded) === (length str, Right str))
, ("utf-8 roundtrip encode cannot deal with some surrogates",
property $
let str = [toEnum 0xDFF0, toEnum 0xDFF2]
encoded = encodeWithTE (mkUTF8 RoundtripFailure) str
decoded = decodeWithTE (mkUTF8 RoundtripFailure) =<< encoded
#if __GLASGOW_HASKELL__ >= 910
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")\n") Nothing))
#elif __GLASGOW_HASKELL__ >= 904
in decoded === Left (EncodingError ("recoverEncode: invalid argument (cannot encode character " <> show (head str) <> ")") Nothing))
#else
in decoded === Left (EncodingError "recoverEncode: invalid argument (invalid character)" Nothing))
#endif
, ("cannot roundtrip arbitrary bytes through utf-16 (with RoundtripFailure)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le RoundtripFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le RoundtripFailure) =<< decoded
in expectFailure $ (either (const 0) BS8.length encoded, encoded) === (BS8.length (BS8.toShort bs), Right (BS8.toShort bs)))
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le ErrorOnCodingFailure) =<< decoded
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE ErrorOnCodingFailure fails (utf8)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 ErrorOnCodingFailure) =<< decoded
in expectFailure $ (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le TransliterateCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le TransliterateCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithTE/decodeWithTE TransliterateCodingFailure never fails (utf8)",
property $
\bs ->
let decoded = decodeWithTE (mkUTF8 TransliterateCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF8 TransliterateCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithBaseWindows/decodeWithBaseWindows never fails (utf16le)",
property $
\(padEven -> bs) ->
let decoded = decodeW' (BS8.toShort bs)
encoded = encodeW' =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
, ("encodeWithBasePosix/decodeWithBasePosix never fails (utf8b)",
property $
\bs -> ioProperty $ do
setFileSystemEncoding (mkUTF8 TransliterateCodingFailure)
let decoded = decodeP' (BS8.toShort bs)
encoded = encodeP' =<< decoded
pure $ (isRight encoded, isRight decoded) === (True, True))
, ("decodeWithBaseWindows == utf16le_b",
property $
\(BS8.toShort . padEven -> bs) ->
let decoded = decodeW' bs
decoded' = first displayException $ decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) bs
in decoded === decoded')
, ("encodeWithBaseWindows == utf16le_b",
property $
\(NonNullSurrogateString str) ->
let decoded = encodeW' str
decoded' = first displayException $ encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) str
in decoded === decoded')
, ("encodeWithTE/decodeWithTE never fails (utf16le_b)",
property $
\(padEven -> bs) ->
let decoded = decodeWithTE (mkUTF16le_b ErrorOnCodingFailure) (BS8.toShort bs)
encoded = encodeWithTE (mkUTF16le_b ErrorOnCodingFailure) =<< decoded
in (isRight encoded, isRight decoded) === (True, True))
]
padEven :: ByteString -> ByteString
padEven bs
| even (BS.length bs) = bs
| otherwise = bs `BS.append` BS.pack [70]
decodeP' :: BS8.ShortByteString -> Either String String
decodeP' ba = unsafePerformIO $ do
r <- try @SomeException $ decodeWithBasePosix ba
evaluate $ force $ first displayException r
encodeP' :: String -> Either String BS8.ShortByteString
encodeP' str = unsafePerformIO $ do
r <- try @SomeException $ encodeWithBasePosix str
evaluate $ force $ first displayException r
decodeW' :: BS16.ShortByteString -> Either String String
decodeW' ba = unsafePerformIO $ do
r <- try @SomeException $ decodeWithBaseWindows ba
evaluate $ force $ first displayException r
encodeW' :: String -> Either String BS8.ShortByteString
encodeW' str = unsafePerformIO $ do
r <- try @SomeException $ encodeWithBaseWindows str
evaluate $ force $ first displayException r
|