1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements UTF-32 encoding and decoding.
See <http://en.wikipedia.org/wiki/UTF-32> for more information.
-}
module Data.Encoding.UTF32
(UTF32(..))
where
import Data.Encoding.Base
import Data.Encoding.ByteSink
import Data.Encoding.ByteSource
import Data.Encoding.Exception
import Data.Char
import Data.Typeable
data UTF32
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
deriving (Eq,Show,Typeable)
instance Encoding UTF32 where
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)
encodeChar _ ch = pushWord32be (fromIntegral $ ord ch)
decodeChar UTF32LE = do
wrd <- fetchWord32le
return $ chr $ fromIntegral wrd
decodeChar _ = do
wrd <- fetchWord32be
return $ chr $ fromIntegral wrd
encode UTF32 str = do
encodeChar UTF32 '\xFEFF'
mapM_ (encodeChar UTF32) str
encode enc str = mapM_ (encodeChar enc) str
decode UTF32 = do
ch <- fetchWord32be
case ch of
0x0000FEFF -> untilM sourceEmpty (decodeChar UTF32BE)
0xFFFE0000 -> untilM sourceEmpty (decodeChar UTF32LE)
_ -> do
rest <- untilM sourceEmpty (decodeChar UTF32)
return ((chr $ fromIntegral ch):rest)
decode enc = untilM sourceEmpty (decodeChar enc)
encodeable _ _ = True
|