1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
|
-- Copyright (c) 2002-2004 John Meacham (john at repetae dot net)
-- Copyright (c) 2004 Don Stewart - http://www.cse.unsw.edu.au/~dons
--
-- This library is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.
--
-- This library is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public
-- License along with this library; if not, write to the Free Software
-- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
module UI.HSCurses.CWString (
-- * utf8 versions
withUTF8String,
withUTF8StringLen,
newUTF8String,
newUTF8StringLen,
peekUTF8String,
peekUTF8StringLen,
-- * WChar stuff
#ifdef HAVE_WCHAR_H
withCWString,
withCWStringLen,
newCWString,
newCWStringLen,
peekCWString,
peekCWStringLen,
wcharIsUnicode,
CWChar,
CWString,
CWStringLen,
#endif
-- * Locale versions
withLCString,
withLCStringLen,
newLCString,
newLCStringLen,
peekLCStringLen,
peekLCString,
-- charIsRepresentable
) where
import Data.Char ( ord, chr )
import Data.Bits ( Bits((.|.), (.&.), shift) )
import Foreign.C.String
#if __GLASGOW_HASKELL__ < 603
import GHC.Exts
#endif
#ifdef HAVE_WCHAR_H
import Foreign.C.Types
#include <wchar.h>
#include <limits.h>
#include <stdlib.h>
type CWChar = (#type wchar_t)
type CWString = Ptr CWChar
type CWStringLen = (CWString, Int)
fi :: (Integral a, Num b) => a -> b
fi x = fromIntegral x
-------------------
-- CWChar functions
-------------------
{-# INLINE wcharIsUnicode #-}
wcharIsUnicode :: Bool
#if defined(__STDC_ISO_10646__)
wcharIsUnicode = True
-- support functions
wNUL :: CWChar
wNUL = 0
#ifndef __GLASGOW_HASKELL__
pairLength :: String -> CString -> CStringLen
pairLength = flip (,) . length
cwCharsToChars :: [CWChar] -> [Char]
cwCharsToChars xs = map castCWCharToChar xs
charsToCWChars :: [Char] -> [CWChar]
charsToCWChars xs = map castCharToCWChar xs
#endif
-- __STDC_ISO_10646__
castCWCharToChar :: CWChar -> Char
castCWCharToChar ch = chr (fromIntegral ch )
castCharToCWChar :: Char -> CWChar
castCharToCWChar ch = fromIntegral (ord ch)
-- exported functions
peekCWString :: CWString -> IO String
#ifndef __GLASGOW_HASKELL__
peekCString cp = do cs <- peekArray0 wNUL cp; return (cwCharsToChars cs)
#else
peekCWString cp = loop 0
where
loop i = do
val <- peekElemOff cp i
if val == wNUL then return [] else do
rest <- loop (i+1)
return (castCWCharToChar val : rest)
#endif
peekCWStringLen :: CWStringLen -> IO String
#ifndef __GLASGOW_HASKELL__
peekCWStringLen (cp, len) = do cs <- peekArray len cp; return (cwCharsToChars cs)
#else
peekCWStringLen (cp, len) = loop 0
where
loop i | i == len = return []
| otherwise = do
val <- peekElemOff cp i
rest <- loop (i+1)
return (castCWCharToChar val : rest)
#endif
newCWString :: String -> IO CWString
#ifndef __GLASGOW_HASKELL__
newCWString = newArray0 wNUL . charsToCWChars
#else
newCWString str = do
ptr <- mallocArray0 (length str)
let
go [] n## = pokeElemOff ptr (I## n##) wNUL
go (c:cs) n## = do pokeElemOff ptr (I## n##) (castCharToCWChar c); go cs (n## +## 1##)
go str 0##
return ptr
#endif
newCWStringLen :: String -> IO CWStringLen
#ifndef __GLASGOW_HASKELL__
newCWStringLen str = do a <- newArray (charsToCWChars str)
return (pairLength str a)
#else
newCWStringLen str = do
ptr <- mallocArray0 len
let
go [] _ = return ()
go (c:cs) n## = do pokeElemOff ptr (I## n##) (castCharToCWChar c); go cs (n## +## 1##)
go str 0##
return (ptr, len)
where
len = length str
#endif
withCWString :: String -> (CWString -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCWString = withArray0 wNUL . charsToCWChars
#else
withCWString str f =
allocaArray0 (length str) $ \ptr ->
let
go [] n## = pokeElemOff ptr (I## n##) wNUL
go (c:cs) n## = do pokeElemOff ptr (I## n##) (castCharToCWChar c); go cs (n## +## 1##)
in do
go str 0##
f ptr
#endif
withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCWStringLen str act = withArray (charsToCWChars str) $ act . pairLength str
#else
withCWStringLen str f =
allocaArray len $ \ptr ->
let
go [] _ = return ()
go (c:cs) n## = do pokeElemOff ptr (I## n##) (castCharToCWChar c); go cs (n## +## 1##)
in do
go str 0##
f (ptr,len)
where
len = length str
#endif
#else
-- no __STDC_ISO_10646__
wcharIsUnicode = False
#endif
newtype MBState = MBState { _mbstate :: (Ptr MBState)}
withMBState :: (MBState -> IO a) -> IO a
withMBState act = allocaBytes (#const sizeof(mbstate_t)) (\mb -> c_memset mb 0 (#const sizeof(mbstate_t)) >> act (MBState mb))
clearMBState :: MBState -> IO ()
clearMBState (MBState mb) = c_memset mb 0 (#const sizeof(mbstate_t)) >> return ()
wcsrtombs :: CWString -> (CString, CSize) -> IO CSize
wcsrtombs wcs (cs,len) =
alloca (\p ->
poke p wcs >> withMBState (\mb ->
wcsrtombs' p cs len mb))
where
wcsrtombs' p cs' len' mb = do
x <- c_wcsrtombs cs p len' mb
case x of
-1 -> do
sp <- peek p
poke sp ((fi (ord '?'))::CWChar)
poke p wcs
clearMBState mb
wcsrtombs' p cs' len' mb
e | e >= 0 && e <= len' -> do
let ep = advancePtr cs' (fi e)
poke ep (fi (0::Int))
return x
e -> error $ "HSCurses.CWString.wcsrtombs: impossible case: "++show e
foreign import ccall unsafe hs_get_mb_cur_max :: IO Int
mb_cur_max :: Int
mb_cur_max = unsafePerformIO hs_get_mb_cur_max
{-
charIsRepresentable :: Char -> IO Bool
charIsRepresentable ch = fmap (/= -1) $ allocaBytes mb_cur_max (\cs -> c_wctomb cs (fi $ ord ch))
-}
{-
foreign import ccall unsafe "stdlib.h wctomb" c_wctomb :: CString -> CWChar -> IO CInt
-}
foreign import ccall unsafe "stdlib.h wcsrtombs"
c_wcsrtombs :: CString -> (Ptr (Ptr CWChar)) -> CSize -> MBState -> IO CSize
foreign import ccall unsafe "string.h memset"
c_memset :: Ptr a -> CInt -> CSize -> IO (Ptr a)
foreign import ccall unsafe "stdlib.h mbstowcs"
c_mbstowcs :: CWString -> CString -> CSize -> IO CSize
mbstowcs :: CWString
-> Foreign.C.String.CString
-> Foreign.C.Types.CSize -> IO Foreign.C.Types.CSize
mbstowcs a b s = throwIf (== -1) (const "mbstowcs") $ c_mbstowcs a b s
peekLCString :: CString -> IO String
peekLCString cp = do
sz <- mbstowcs nullPtr cp 0
allocaArray (fi $ sz + 1) (\wcp -> mbstowcs wcp cp (sz + 1) >> peekCWString wcp)
-- TODO fix for embeded NULs
peekLCStringLen :: CStringLen -> IO String
peekLCStringLen (cp, len) = allocaBytes (len + 1) $ \ncp -> do
copyBytes ncp cp len
pokeElemOff ncp len 0
peekLCString ncp
newLCString :: String -> IO CString
newLCString s =
withCWString s $ \wcs -> do
cs <- mallocArray0 alen
wcsrtombs wcs (cs, fi alen)
return cs
where alen = mb_cur_max * length s
newLCStringLen :: String -> IO CStringLen
newLCStringLen str = newLCString str >>= \cs -> return (pairLength1 str cs)
withLCString :: String -> (CString -> IO a) -> IO a
withLCString s a =
withCWString s $ \wcs ->
allocaArray0 alen $ \cs ->
wcsrtombs wcs (cs,fi alen) >> a cs
where alen = mb_cur_max * length s
withLCStringLen :: String -> (CStringLen -> IO a) -> IO a
withLCStringLen s a =
withCWString s $ \wcs ->
allocaArray0 alen $ \cs -> do
sz <- wcsrtombs wcs (cs,fi alen)
a (cs,fi sz)
where alen = mb_cur_max * length s
pairLength1 :: String -> CString -> CStringLen
pairLength1 = flip (,) . length
#else
-- -----------------------------------------------------------
-- no CF_WCHAR_SUPPORT (OpenBSD)
{-
charIsRepresentable :: Char -> IO Bool
charIsRepresentable ch = return $ isLatin1 ch
-}
withLCString :: String -> (Foreign.C.String.CString -> IO a) -> IO a
withLCString = withCString
withLCStringLen :: String -> (Foreign.C.String.CStringLen -> IO a) -> IO a
withLCStringLen = withCStringLen
newLCString :: String -> IO Foreign.C.String.CString
newLCString = newCString
newLCStringLen :: String -> IO Foreign.C.String.CStringLen
newLCStringLen = newCStringLen
peekLCString :: Foreign.C.String.CString -> IO String
peekLCString = peekCString
peekLCStringLen :: Foreign.C.String.CStringLen -> IO String
peekLCStringLen = peekCStringLen
#endif
-- no CF_WCHAR_SUPPORT
-----------------
-- UTF8 versions
-----------------
withUTF8String :: String -> (CString -> IO a) -> IO a
withUTF8String hsStr = withCString (toUTF hsStr)
withUTF8StringLen :: String -> (CStringLen -> IO a) -> IO a
withUTF8StringLen hsStr = withCStringLen (toUTF hsStr)
newUTF8String :: String -> IO CString
newUTF8String = newCString . toUTF
newUTF8StringLen :: String -> IO CStringLen
newUTF8StringLen = newCStringLen . toUTF
peekUTF8String :: CString -> IO String
peekUTF8String strPtr = fmap fromUTF $ peekCString strPtr
peekUTF8StringLen :: CStringLen -> IO String
peekUTF8StringLen strPtr = fmap fromUTF $ peekCStringLen strPtr
-- these should read and write directly from/to memory.
-- A first pass will be needed to determine the size of the allocated region
toUTF :: String -> String
toUTF [] = []
toUTF (x:xs) | ord x<=0x007F = x:toUTF xs
| ord x<=0x07FF = chr (0xC0 .|. ((ord x `shift` (-6)) .&. 0x1F)):
chr (0x80 .|. (ord x .&. 0x3F)):
toUTF xs
| otherwise = chr (0xE0 .|. ((ord x `shift` (-12)) .&. 0x0F)):
chr (0x80 .|. ((ord x `shift` (-6)) .&. 0x3F)):
chr (0x80 .|. (ord x .&. 0x3F)):
toUTF xs
fromUTF :: String -> String
fromUTF [] = []
fromUTF (al@(x:xs)) | ord x<=0x7F = x:fromUTF xs
| ord x<=0xBF = err
| ord x<=0xDF = twoBytes al
| ord x<=0xEF = threeBytes al
| otherwise = err
where
twoBytes (x1:x2:xs') = chr (((ord x1 .&. 0x1F) `shift` 6) .|.
(ord x2 .&. 0x3F)):fromUTF xs'
twoBytes _ = error "fromUTF: illegal two byte sequence"
threeBytes (x1:x2:x3:xs') = chr (((ord x1 .&. 0x0F) `shift` 12) .|.
((ord x2 .&. 0x3F) `shift` 6) .|.
(ord x3 .&. 0x3F)):fromUTF xs'
threeBytes _ = error "fromUTF: illegal three byte sequence"
err = error "fromUTF: illegal UTF-8 character"
|