1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
|
{-# OPTIONS -fno-implicit-prelude #-}
-----------------------------------------------------------------------------
-- |
-- Module : Foreign.C.String
-- Copyright : (c) The FFI task force 2001
-- License : BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer : ffi@haskell.org
-- Stability : provisional
-- Portability : portable
--
-- Utilities for primitive marshalling of C strings.
--
-- The marshalling converts each Haskell character, representing a Unicode
-- code point, to one or more bytes in a manner that, by default, is
-- determined by the current locale. As a consequence, no guarantees
-- can be made about the relative length of a Haskell string and its
-- corresponding C string, and therefore all the marshalling routines
-- include memory allocation. The translation between Unicode and the
-- encoding of the current locale may be lossy.
--
-----------------------------------------------------------------------------
module Foreign.C.String ( -- representation of strings in C
-- * C strings
CString, -- = Ptr CChar
CStringLen, -- = (Ptr CChar, Int)
-- ** Using a locale-dependent encoding
-- | Currently these functions are identical to their @CAString@ counterparts;
-- eventually they will use an encoding determined by the current locale.
-- conversion of C strings into Haskell strings
--
peekCString, -- :: CString -> IO String
peekCStringLen, -- :: CStringLen -> IO String
-- conversion of Haskell strings into C strings
--
newCString, -- :: String -> IO CString
newCStringLen, -- :: String -> IO CStringLen
-- conversion of Haskell strings into C strings using temporary storage
--
withCString, -- :: String -> (CString -> IO a) -> IO a
withCStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
charIsRepresentable, -- :: Char -> IO Bool
-- ** Using 8-bit characters
-- | These variants of the above functions are for use with C libraries
-- that are ignorant of Unicode. These functions should be used with
-- care, as a loss of information can occur.
castCharToCChar, -- :: Char -> CChar
castCCharToChar, -- :: CChar -> Char
peekCAString, -- :: CString -> IO String
peekCAStringLen, -- :: CStringLen -> IO String
newCAString, -- :: String -> IO CString
newCAStringLen, -- :: String -> IO CStringLen
withCAString, -- :: String -> (CString -> IO a) -> IO a
withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a
-- * C wide strings
-- | These variants of the above functions are for use with C libraries
-- that encode Unicode using the C @wchar_t@ type in a system-dependent
-- way. The only encodings supported are
--
-- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
--
-- * UTF-16 (as used on Windows systems).
CWString, -- = Ptr CWchar
CWStringLen, -- = (Ptr CWchar, Int)
peekCWString, -- :: CWString -> IO String
peekCWStringLen, -- :: CWStringLen -> IO String
newCWString, -- :: String -> IO CWString
newCWStringLen, -- :: String -> IO CWStringLen
withCWString, -- :: String -> (CWString -> IO a) -> IO a
withCWStringLen, -- :: String -> (CWStringLen -> IO a) -> IO a
) where
import Foreign.Marshal.Array
import Foreign.C.Types
import Foreign.Ptr
import Foreign.Storable
import Data.Word
#ifdef __GLASGOW_HASKELL__
import GHC.List
import GHC.Real
import GHC.Num
import GHC.IOBase
import GHC.Base
#else
import Data.Char ( chr, ord )
#define unsafeChr chr
#endif
-----------------------------------------------------------------------------
-- Strings
-- representation of strings in C
-- ------------------------------
-- | A C string is a reference to an array of C characters terminated by NUL.
type CString = Ptr CChar
-- | A string with explicit length information in bytes instead of a
-- terminating NUL (allowing NUL characters in the middle of the string).
type CStringLen = (Ptr CChar, Int)
-- exported functions
-- ------------------
--
-- * the following routines apply the default conversion when converting the
-- C-land character encoding into the Haskell-land character encoding
-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCString :: CString -> IO String
peekCString = peekCAString
-- | Marshal a C string with explicit length into a Haskell string.
--
peekCStringLen :: CStringLen -> IO String
peekCStringLen = peekCAStringLen
-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCString :: String -> IO CString
newCString = newCAString
-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCStringLen :: String -> IO CStringLen
newCStringLen = newCAStringLen
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCString :: String -> (CString -> IO a) -> IO a
withCString = withCAString
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCStringLen :: String -> (CStringLen -> IO a) -> IO a
withCStringLen = withCAStringLen
-- | Determines whether a character can be accurately encoded in a 'CString'.
-- Unrepresentable characters are converted to @\'?\'@.
--
-- Currently only Latin-1 characters are representable.
charIsRepresentable :: Char -> IO Bool
charIsRepresentable c = return (ord c < 256)
-- single byte characters
-- ----------------------
--
-- ** NOTE: These routines don't handle conversions! **
-- | Convert a C byte, representing a Latin-1 character, to the corresponding
-- Haskell character.
castCCharToChar :: CChar -> Char
castCCharToChar ch = unsafeChr (fromIntegral (fromIntegral ch :: Word8))
-- | Convert a Haskell character to a C character.
-- This function is only safe on the first 256 characters.
castCharToCChar :: Char -> CChar
castCharToCChar ch = fromIntegral (ord ch)
-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCAString :: CString -> IO String
#ifndef __GLASGOW_HASKELL__
peekCAString cp = do
cs <- peekArray0 nUL cp
return (cCharsToChars cs)
#else
peekCAString cp = do
l <- lengthArray0 nUL cp
if l <= 0 then return "" else loop "" (l-1)
where
loop s i = do
xval <- peekElemOff cp i
let val = castCCharToChar xval
val `seq` if i <= 0 then return (val:s) else loop (val:s) (i-1)
#endif
-- | Marshal a C string with explicit length into a Haskell string.
--
peekCAStringLen :: CStringLen -> IO String
#ifndef __GLASGOW_HASKELL__
peekCAStringLen (cp, len) = do
cs <- peekArray len cp
return (cCharsToChars cs)
#else
peekCAStringLen (cp, len)
| len <= 0 = return "" -- being (too?) nice.
| otherwise = loop [] (len-1)
where
loop acc i = do
xval <- peekElemOff cp i
let val = castCCharToChar xval
-- blow away the coercion ASAP.
if (val `seq` (i == 0))
then return (val:acc)
else loop (val:acc) (i-1)
#endif
-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCAString :: String -> IO CString
#ifndef __GLASGOW_HASKELL__
newCAString = newArray0 nUL . charsToCChars
#else
newCAString str = do
ptr <- mallocArray0 (length str)
let
go [] n = pokeElemOff ptr n nUL
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
go str 0
return ptr
#endif
-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCAStringLen :: String -> IO CStringLen
#ifndef __GLASGOW_HASKELL__
newCAStringLen str = do
a <- newArray (charsToCChars str)
return (pairLength str a)
#else
newCAStringLen str = do
ptr <- mallocArray0 len
let
go [] n = n `seq` return () -- make it strict in n
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
go str 0
return (ptr, len)
where
len = length str
#endif
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCAString :: String -> (CString -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCAString = withArray0 nUL . charsToCChars
#else
withCAString str f =
allocaArray0 (length str) $ \ptr ->
let
go [] n = pokeElemOff ptr n nUL
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in do
go str 0
f ptr
#endif
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCAStringLen :: String -> (CStringLen -> IO a) -> IO a
#ifndef __GLASGOW_HASKELL__
withCAStringLen str act = withArray (charsToCChars str) $ act . pairLength str
#else
withCAStringLen str f =
allocaArray len $ \ptr ->
let
go [] n = n `seq` return () -- make it strict in n
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in do
go str 0
f (ptr,len)
where
len = length str
#endif
-- auxiliary definitions
-- ----------------------
-- C's end of string character
--
nUL :: CChar
nUL = 0
-- pair a C string with the length of the given Haskell string
--
pairLength :: String -> a -> (a, Int)
pairLength = flip (,) . length
#ifndef __GLASGOW_HASKELL__
-- cast [CChar] to [Char]
--
cCharsToChars :: [CChar] -> [Char]
cCharsToChars xs = map castCCharToChar xs
-- cast [Char] to [CChar]
--
charsToCChars :: [Char] -> [CChar]
charsToCChars xs = map castCharToCChar xs
#endif
-----------------------------------------------------------------------------
-- Wide strings
-- representation of wide strings in C
-- -----------------------------------
-- | A C wide string is a reference to an array of C wide characters
-- terminated by NUL.
type CWString = Ptr CWchar
-- | A wide character string with explicit length information in bytes
-- instead of a terminating NUL (allowing NUL characters in the middle
-- of the string).
type CWStringLen = (Ptr CWchar, Int)
-- | Marshal a NUL terminated C wide string into a Haskell string.
--
peekCWString :: CWString -> IO String
peekCWString cp = do
cs <- peekArray0 wNUL cp
return (cWcharsToChars cs)
-- | Marshal a C wide string with explicit length into a Haskell string.
--
peekCWStringLen :: CWStringLen -> IO String
peekCWStringLen (cp, len) = do
cs <- peekArray len cp
return (cWcharsToChars cs)
-- | Marshal a Haskell string into a NUL terminated C wide string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCWString :: String -> IO CWString
newCWString = newArray0 wNUL . charsToCWchars
-- | Marshal a Haskell string into a C wide string (ie, wide character array)
-- with explicit length information.
--
-- * new storage is allocated for the C string and must be explicitly freed
--
newCWStringLen :: String -> IO CWStringLen
newCWStringLen str = do
a <- newArray (charsToCWchars str)
return (pairLength str a)
-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCWString :: String -> (CWString -> IO a) -> IO a
withCWString = withArray0 wNUL . charsToCWchars
-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * see the lifetime constraints of 'Foreign.Marshal.Alloc.alloca'
--
withCWStringLen :: String -> (CWStringLen -> IO a) -> IO a
withCWStringLen str act = withArray (charsToCWchars str) $ act . pairLength str
-- auxiliary definitions
-- ----------------------
wNUL :: CWchar
wNUL = 0
cWcharsToChars :: [CWchar] -> [Char]
charsToCWchars :: [Char] -> [CWchar]
#ifdef mingw32_TARGET_OS
-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.
-- coding errors generate Chars in the surrogate range
cWcharsToChars = map chr . fromUTF16 . map fromIntegral
where
fromUTF16 (c1:c2:wcs)
| 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
fromUTF16 (c:wcs) = c : fromUTF16 wcs
fromUTF16 [] = []
charsToCWchars = foldr utf16Char [] . map ord
where
utf16Char c wcs
| c < 0x10000 = fromIntegral c : wcs
| otherwise = let c' = c - 0x10000 in
fromIntegral (c' `div` 0x400 + 0xd800) :
fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs
#else /* !mingw32_TARGET_OS */
cWcharsToChars xs = map castCWcharToChar xs
charsToCWchars xs = map castCharToCWchar xs
-- These conversions only make sense if __STDC_ISO_10646__ is defined
-- (meaning that wchar_t is ISO 10646, aka Unicode)
castCWcharToChar :: CWchar -> Char
castCWcharToChar ch = chr (fromIntegral ch )
castCharToCWchar :: Char -> CWchar
castCharToCWchar ch = fromIntegral (ord ch)
#endif /* !mingw32_TARGET_OS */
|