1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
|
{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
{-# LANGUAGE UnliftedFFITypes, MagicHash,
UnboxedTuples, DeriveDataTypeable #-}
{-# LANGUAGE TypeFamilies #-}
#if __GLASGOW_HASKELL__ >= 703
{-# LANGUAGE Unsafe #-}
#endif
{-# OPTIONS_HADDOCK not-home #-}
-- |
-- Module : Data.ByteString.Internal
-- Copyright : (c) Don Stewart 2006-2008
-- (c) Duncan Coutts 2006-2012
-- License : BSD-style
-- Maintainer : dons00@gmail.com, duncan@community.haskell.org
-- Stability : unstable
-- Portability : non-portable
--
-- A module containing semi-public 'ByteString' internals. This exposes the
-- 'ByteString' representation and low level construction functions. As such
-- all the functions in this module are unsafe. The API is also not stable.
--
-- Where possible application should instead use the functions from the normal
-- public interface modules, such as "Data.ByteString.Unsafe". Packages that
-- extend the ByteString system at a low level will need to use this module.
--
module Data.ByteString.Internal (
-- * The @ByteString@ type and representation
ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable
-- * Conversion with lists: packing and unpacking
packBytes, packUptoLenBytes, unsafePackLenBytes,
packChars, packUptoLenChars, unsafePackLenChars,
unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
unsafePackAddress,
-- * Low level imperative construction
create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
createUptoN, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createUptoN', -- :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString
unsafeCreateUptoN, -- :: Int -> (Ptr Word8 -> IO Int) -> ByteString
unsafeCreateUptoN', -- :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
mallocByteString, -- :: Int -> IO (ForeignPtr a)
-- * Conversion to and from ForeignPtrs
fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> Int -> ByteString
toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int)
-- * Utilities
nullForeignPtr, -- :: ForeignPtr Word8
checkedAdd, -- :: String -> Int -> Int -> Int
-- * Standard C Functions
c_strlen, -- :: CString -> IO CInt
c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ())
memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
memcmp, -- :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
memcpy, -- :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
-- * cbits functions
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
-- * Chars
w2c, c2w, isSpaceWord8, isSpaceChar8,
-- * Deprecated and unmentionable
accursedUnutterablePerformIO, -- :: IO a -> a
inlinePerformIO, -- :: IO a -> a
unsafeWithForeignPtr
) where
import Prelude hiding (concat, null)
import qualified Data.List as List
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
#if MIN_VERSION_base(4,15,0)
import GHC.ForeignPtr (unsafeWithForeignPtr)
#endif
import Foreign.Ptr (Ptr, FunPtr, plusPtr)
import Foreign.Storable (Storable(..))
#if MIN_VERSION_base(4,5,0) || __GLASGOW_HASKELL__ >= 703
import Foreign.C.Types (CInt(..), CSize(..), CULong(..))
#else
import Foreign.C.Types (CInt, CSize, CULong)
#endif
import Foreign.C.String (CString)
#if MIN_VERSION_base(4,13,0)
import Data.Semigroup (Semigroup (sconcat))
import Data.List.NonEmpty (NonEmpty ((:|)))
#elif MIN_VERSION_base(4,9,0)
import Data.Semigroup (Semigroup ((<>), sconcat))
import Data.List.NonEmpty (NonEmpty ((:|)))
#endif
#if !(MIN_VERSION_base(4,8,0))
import Data.Monoid (Monoid(..))
#endif
import Control.DeepSeq (NFData(rnf))
import Data.String (IsString(..))
import Control.Exception (assert)
import Data.Char (ord)
import Data.Word (Word8)
import Data.Typeable (Typeable)
import Data.Data (Data(..), mkNoRepType)
import GHC.Base (nullAddr#,realWorld#,unsafeChr)
#if MIN_VERSION_base(4,7,0)
import GHC.Exts (IsList(..))
#endif
#if MIN_VERSION_base(4,4,0)
import GHC.CString (unpackCString#)
#else
import GHC.Base (unpackCString#)
#endif
import GHC.Prim (Addr#)
#if __GLASGOW_HASKELL__ >= 611
import GHC.IO (IO(IO),unsafeDupablePerformIO)
#else
import GHC.IOBase (IO(IO),RawBuffer,unsafeDupablePerformIO)
#endif
import GHC.ForeignPtr (ForeignPtr(ForeignPtr)
,newForeignPtr_, mallocPlainForeignPtrBytes)
import GHC.Ptr (Ptr(..), castPtr)
#if !MIN_VERSION_base(4,15,0)
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr = withForeignPtr
#endif
-- CFILES stuff is Hugs only
{-# CFILES cbits/fpstring.c #-}
-- -----------------------------------------------------------------------------
-- | A space-efficient representation of a 'Word8' vector, supporting many
-- efficient operations.
--
-- A 'ByteString' contains 8-bit bytes, or by using the operations from
-- "Data.ByteString.Char8" it can be interpreted as containing 8-bit
-- characters.
--
data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- length
deriving (Typeable)
instance Eq ByteString where
(==) = eq
instance Ord ByteString where
compare = compareBytes
#if MIN_VERSION_base(4,9,0)
instance Semigroup ByteString where
(<>) = append
sconcat (b:|bs) = concat (b:bs)
#endif
instance Monoid ByteString where
mempty = PS nullForeignPtr 0 0
#if MIN_VERSION_base(4,9,0)
mappend = (<>)
#else
mappend = append
#endif
mconcat = concat
instance NFData ByteString where
rnf PS{} = ()
instance Show ByteString where
showsPrec p ps r = showsPrec p (unpackChars ps) r
instance Read ByteString where
readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
#if MIN_VERSION_base(4,7,0)
-- | @since 0.10.12.0
instance IsList ByteString where
type Item ByteString = Word8
fromList = packBytes
toList = unpackBytes
#endif
-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ByteString where
fromString = packChars
instance Data ByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
toConstr _ = error "Data.ByteString.ByteString.toConstr"
gunfold _ _ = error "Data.ByteString.ByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.ByteString"
------------------------------------------------------------------------
-- Packing and unpacking from lists
packBytes :: [Word8] -> ByteString
packBytes ws = unsafePackLenBytes (List.length ws) ws
packChars :: [Char] -> ByteString
packChars cs = unsafePackLenChars (List.length cs) cs
{-# INLINE [0] packChars #-}
{-# RULES
"ByteString packChars/packAddress" forall s .
packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s)
#-}
unsafePackLenBytes :: Int -> [Word8] -> ByteString
unsafePackLenBytes len xs0 =
unsafeCreate len $ \p -> go p xs0
where
go !_ [] = return ()
go !p (x:xs) = poke p x >> go (p `plusPtr` 1) xs
unsafePackLenChars :: Int -> [Char] -> ByteString
unsafePackLenChars len cs0 =
unsafeCreate len $ \p -> go p cs0
where
go !_ [] = return ()
go !p (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) cs
-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
-- Addr\# (an arbitrary machine address assumed to point outside the
-- garbage-collected heap) into a @ByteString@. A much faster way to
-- create an 'Addr#' is with an unboxed string literal, than to pack a
-- boxed string. A unboxed string literal is compiled to a static @char
-- []@ by GHC. Establishing the length of the string requires a call to
-- @strlen(3)@, so the 'Addr#' must point to a null-terminated buffer (as
-- is the case with @\"string\"\#@ literals in GHC). Use 'Data.ByteString.Unsafe.unsafePackAddressLen'
-- if you know the length of the string statically.
--
-- An example:
--
-- > literalFS = unsafePackAddress "literal"#
--
-- This function is /unsafe/. If you modify the buffer pointed to by the
-- original 'Addr#' this modification will be reflected in the resulting
-- @ByteString@, breaking referential transparency.
--
-- Note this also won't work if your 'Addr#' has embedded @\'\\0\'@ characters in
-- the string, as @strlen@ will return too short a length.
--
unsafePackAddress :: Addr# -> IO ByteString
unsafePackAddress addr# = do
p <- newForeignPtr_ (castPtr cstr)
l <- c_strlen cstr
return $ PS p 0 (fromIntegral l)
where
cstr :: CString
cstr = Ptr addr#
{-# INLINE unsafePackAddress #-}
packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
packUptoLenBytes len xs0 =
unsafeCreateUptoN' len $ \p -> go p len xs0
where
go !_ !n [] = return (len-n, [])
go !_ !0 xs = return (len, xs)
go !p !n (x:xs) = poke p x >> go (p `plusPtr` 1) (n-1) xs
packUptoLenChars :: Int -> [Char] -> (ByteString, [Char])
packUptoLenChars len cs0 =
unsafeCreateUptoN' len $ \p -> go p len cs0
where
go !_ !n [] = return (len-n, [])
go !_ !0 cs = return (len, cs)
go !p !n (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) (n-1) cs
-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
-- we would like to write a tight loop that just blasts the list into memory, on
-- the other hand we want it to be unpacked lazily so we don't end up with a
-- massive list data structure in memory.
--
-- Our strategy is to combine both: we will unpack lazily in reasonable sized
-- chunks, where each chunk is unpacked strictly.
--
-- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and
-- unpackAppendChars do the chunks strictly.
unpackBytes :: ByteString -> [Word8]
unpackBytes bs = unpackAppendBytesLazy bs []
unpackChars :: ByteString -> [Char]
unpackChars bs = unpackAppendCharsLazy bs []
unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8]
unpackAppendBytesLazy (PS fp off len) xs
| len <= 100 = unpackAppendBytesStrict (PS fp off len) xs
| otherwise = unpackAppendBytesStrict (PS fp off 100) remainder
where
remainder = unpackAppendBytesLazy (PS fp (off+100) (len-100)) xs
-- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
-- takes just shy of 4k which seems like a reasonable amount.
-- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
unpackAppendCharsLazy :: ByteString -> [Char] -> [Char]
unpackAppendCharsLazy (PS fp off len) cs
| len <= 100 = unpackAppendCharsStrict (PS fp off len) cs
| otherwise = unpackAppendCharsStrict (PS fp off 100) remainder
where
remainder = unpackAppendCharsLazy (PS fp (off+100) (len-100)) cs
-- For these unpack functions, since we're unpacking the whole list strictly we
-- build up the result list in an accumulator. This means we have to build up
-- the list starting at the end. So our traversal starts at the end of the
-- buffer and loops down until we hit the sentinal:
unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8]
unpackAppendBytesStrict (PS fp off len) xs =
accursedUnutterablePerformIO $ withForeignPtr fp $ \base ->
loop (base `plusPtr` (off-1)) (base `plusPtr` (off-1+len)) xs
where
loop !sentinal !p acc
| p == sentinal = return acc
| otherwise = do x <- peek p
loop sentinal (p `plusPtr` (-1)) (x:acc)
unpackAppendCharsStrict :: ByteString -> [Char] -> [Char]
unpackAppendCharsStrict (PS fp off len) xs =
accursedUnutterablePerformIO $ withForeignPtr fp $ \base ->
loop (base `plusPtr` (off-1)) (base `plusPtr` (off-1+len)) xs
where
loop !sentinal !p acc
| p == sentinal = return acc
| otherwise = do x <- peek p
loop sentinal (p `plusPtr` (-1)) (w2c x:acc)
------------------------------------------------------------------------
-- | The 0 pointer. Used to indicate the empty Bytestring.
nullForeignPtr :: ForeignPtr Word8
nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr") --TODO: should ForeignPtrContents be strict?
-- ---------------------------------------------------------------------
-- Low level constructors
-- | /O(1)/ Build a ByteString from a ForeignPtr.
--
-- If you do not need the offset parameter then you do should be using
-- 'Data.ByteString.Unsafe.unsafePackCStringLen' or
-- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead.
--
fromForeignPtr :: ForeignPtr Word8
-> Int -- ^ Offset
-> Int -- ^ Length
-> ByteString
fromForeignPtr = PS
{-# INLINE fromForeignPtr #-}
-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length)
toForeignPtr (PS ps s l) = (ps, s, l)
{-# INLINE toForeignPtr #-}
-- | A way of creating ByteStrings outside the IO monad. The @Int@
-- argument gives the final size of the ByteString.
unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
unsafeCreate l f = unsafeDupablePerformIO (create l f)
{-# INLINE unsafeCreate #-}
-- | Like 'unsafeCreate' but instead of giving the final size of the
-- ByteString, it is just an upper bound. The inner action returns
-- the actual size. Unlike 'createAndTrim' the ByteString is not
-- reallocated if the final size is less than the estimated size.
unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString
unsafeCreateUptoN l f = unsafeDupablePerformIO (createUptoN l f)
{-# INLINE unsafeCreateUptoN #-}
-- | @since 0.10.12.0
unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
unsafeCreateUptoN' l f = unsafeDupablePerformIO (createUptoN' l f)
{-# INLINE unsafeCreateUptoN' #-}
-- | Create ByteString of size @l@ and use action @f@ to fill its contents.
create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
create l f = do
fp <- mallocByteString l
withForeignPtr fp $ \p -> f p
return $! PS fp 0 l
{-# INLINE create #-}
-- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString'
-- starting at the given 'Ptr' and returns the actual utilized length,
-- @`createUpToN'` l f@ returns the filled 'ByteString'.
createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createUptoN l f = do
fp <- mallocByteString l
l' <- withForeignPtr fp $ \p -> f p
assert (l' <= l) $ return $! PS fp 0 l'
{-# INLINE createUptoN #-}
-- | Like 'createUpToN', but also returns an additional value created by the
-- action.
--
-- @since 0.10.12.0
createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
createUptoN' l f = do
fp <- mallocByteString l
(l', res) <- withForeignPtr fp $ \p -> f p
assert (l' <= l) $ return (PS fp 0 l', res)
{-# INLINE createUptoN' #-}
-- | Given the maximum size needed and a function to make the contents
-- of a ByteString, createAndTrim makes the 'ByteString'. The generating
-- function is required to return the actual final size (<= the maximum
-- size), and the resulting byte array is realloced to this size.
--
-- createAndTrim is the main mechanism for creating custom, efficient
-- ByteString functions, using Haskell or C functions to fill the space.
--
createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
createAndTrim l f = do
fp <- mallocByteString l
withForeignPtr fp $ \p -> do
l' <- f p
if assert (l' <= l) $ l' >= l
then return $! PS fp 0 l
else create l' $ \p' -> memcpy p' p l'
{-# INLINE createAndTrim #-}
createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
createAndTrim' l f = do
fp <- mallocByteString l
withForeignPtr fp $ \p -> do
(off, l', res) <- f p
if assert (l' <= l) $ l' >= l
then return (PS fp 0 l, res)
else do ps <- create l' $ \p' ->
memcpy p' (p `plusPtr` off) l'
return (ps, res)
-- | Wrapper of 'Foreign.ForeignPtr.mallocForeignPtrBytes' with faster implementation for GHC
--
mallocByteString :: Int -> IO (ForeignPtr a)
mallocByteString = mallocPlainForeignPtrBytes
{-# INLINE mallocByteString #-}
------------------------------------------------------------------------
-- Implementations for Eq, Ord and Monoid instances
eq :: ByteString -> ByteString -> Bool
eq a@(PS fp off len) b@(PS fp' off' len')
| len /= len' = False -- short cut on length
| fp == fp' && off == off' = True -- short cut for the same string
| otherwise = compareBytes a b == EQ
{-# INLINE eq #-}
-- ^ still needed
compareBytes :: ByteString -> ByteString -> Ordering
compareBytes (PS _ _ 0) (PS _ _ 0) = EQ -- short cut for empty strings
compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) =
accursedUnutterablePerformIO $
unsafeWithForeignPtr fp1 $ \p1 ->
unsafeWithForeignPtr fp2 $ \p2 -> do
i <- memcmp (p1 `plusPtr` off1) (p2 `plusPtr` off2) (min len1 len2)
return $! case i `compare` 0 of
EQ -> len1 `compare` len2
x -> x
append :: ByteString -> ByteString -> ByteString
append (PS _ _ 0) b = b
append a (PS _ _ 0) = a
append (PS fp1 off1 len1) (PS fp2 off2 len2) =
unsafeCreate (len1+len2) $ \destptr1 -> do
let destptr2 = destptr1 `plusPtr` len1
unsafeWithForeignPtr fp1 $ \p1 -> memcpy destptr1 (p1 `plusPtr` off1) len1
unsafeWithForeignPtr fp2 $ \p2 -> memcpy destptr2 (p2 `plusPtr` off2) len2
concat :: [ByteString] -> ByteString
concat = \bss0 -> goLen0 bss0 bss0
-- The idea here is we first do a pass over the input list to determine:
--
-- 1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@,
-- and @concat ["hello", mempty, mempty]@ can all be handled without
-- copying.
-- 2. if a copy is necessary, how large is the result going to be?
--
-- If a copy is necessary then we create a buffer of the appropriate size
-- and do another pass over the input list, copying the chunks into the
-- buffer. Also, since foreign calls aren't entirely free we skip over
-- empty chunks while copying.
--
-- We pass the original [ByteString] (bss0) through as an argument through
-- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing
-- it as an explicit argument avoids capturing it in these functions'
-- closures which would result in unnecessary closure allocation.
where
-- It's still possible that the result is empty
goLen0 _ [] = mempty
goLen0 bss0 (PS _ _ 0 :bss) = goLen0 bss0 bss
goLen0 bss0 (bs :bss) = goLen1 bss0 bs bss
-- It's still possible that the result is a single chunk
goLen1 _ bs [] = bs
goLen1 bss0 bs (PS _ _ 0 :bss) = goLen1 bss0 bs bss
goLen1 bss0 bs (PS _ _ len:bss) = goLen bss0 (checkedAdd "concat" len' len) bss
where PS _ _ len' = bs
-- General case, just find the total length we'll need
goLen bss0 !total (PS _ _ len:bss) = goLen bss0 total' bss
where total' = checkedAdd "concat" total len
goLen bss0 total [] =
unsafeCreate total $ \ptr -> goCopy bss0 ptr
-- Copy the data
goCopy [] !_ = return ()
goCopy (PS _ _ 0 :bss) !ptr = goCopy bss ptr
goCopy (PS fp off len:bss) !ptr = do
unsafeWithForeignPtr fp $ \p -> memcpy ptr (p `plusPtr` off) len
goCopy bss (ptr `plusPtr` len)
{-# NOINLINE concat #-}
{-# RULES
"ByteString concat [] -> mempty"
concat [] = mempty
"ByteString concat [bs] -> bs" forall x.
concat [x] = x
#-}
-- | Add two non-negative numbers. Errors out on overflow.
checkedAdd :: String -> Int -> Int -> Int
checkedAdd fun x y
| r >= 0 = r
| otherwise = overflowError fun
where r = x + y
{-# INLINE checkedAdd #-}
------------------------------------------------------------------------
-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
w2c :: Word8 -> Char
w2c = unsafeChr . fromIntegral
{-# INLINE w2c #-}
-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
-- silently truncates to 8 bits Chars > '\255'. It is provided as
-- convenience for ByteString construction.
c2w :: Char -> Word8
c2w = fromIntegral . ord
{-# INLINE c2w #-}
-- | Selects words corresponding to white-space characters in the Latin-1 range
-- ordered by frequency.
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 w =
w == 0x20 ||
w == 0x0A || -- LF, \n
w == 0x09 || -- HT, \t
w == 0x0C || -- FF, \f
w == 0x0D || -- CR, \r
w == 0x0B || -- VT, \v
w == 0xA0 -- spotted by QC..
{-# INLINE isSpaceWord8 #-}
-- | Selects white-space characters in the Latin-1 range
isSpaceChar8 :: Char -> Bool
isSpaceChar8 c =
c == ' ' ||
c == '\t' ||
c == '\n' ||
c == '\r' ||
c == '\f' ||
c == '\v' ||
c == '\xa0'
{-# INLINE isSpaceChar8 #-}
overflowError :: String -> a
overflowError fun = error $ "Data.ByteString." ++ fun ++ ": size overflow"
------------------------------------------------------------------------
-- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but
-- it is in fact a malevolent agent of chaos. It unpicks the seams of reality
-- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you
-- into thinking it is reasonable, but when you are not looking it stabs you
-- in the back and aliases all of your mutable buffers. The carcass of many a
-- seasoned Haskell programmer lie strewn at its feet.
--
-- Witness the trail of destruction:
--
-- * <https://github.com/haskell/bytestring/commit/71c4b438c675aa360c79d79acc9a491e7bbc26e7>
--
-- * <https://github.com/haskell/bytestring/commit/210c656390ae617d9ee3b8bcff5c88dd17cef8da>
--
-- * <https://ghc.haskell.org/trac/ghc/ticket/3486>
--
-- * <https://ghc.haskell.org/trac/ghc/ticket/3487>
--
-- * <https://ghc.haskell.org/trac/ghc/ticket/7270>
--
-- Do not talk about \"safe\"! You do not know what is safe!
--
-- Yield not to its blasphemous call! Flee traveller! Flee or you will be
-- corrupted and devoured!
--
{-# INLINE accursedUnutterablePerformIO #-}
accursedUnutterablePerformIO :: IO a -> a
accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
inlinePerformIO :: IO a -> a
inlinePerformIO = accursedUnutterablePerformIO
{-# INLINE inlinePerformIO #-}
{-# DEPRECATED inlinePerformIO "If you think you know what you are doing, use 'System.IO.Unsafe.unsafePerformIO'. If you are sure you know what you are doing, use 'unsafeDupablePerformIO'. If you enjoy sharing an address space with a malevolent agent of chaos, try 'accursedUnutterablePerformIO'." #-}
-- ---------------------------------------------------------------------
--
-- Standard C functions
--
foreign import ccall unsafe "string.h strlen" c_strlen
:: CString -> IO CSize
foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
:: FunPtr (Ptr Word8 -> IO ())
foreign import ccall unsafe "string.h memchr" c_memchr
:: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
memchr p w s = c_memchr p (fromIntegral w) s
foreign import ccall unsafe "string.h memcmp" c_memcmp
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
memcmp p q s = c_memcmp p q (fromIntegral s)
foreign import ccall unsafe "string.h memcpy" c_memcpy
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memcpy p q s = c_memcpy p q (fromIntegral s) >> return ()
{-
foreign import ccall unsafe "string.h memmove" c_memmove
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
memmove p q s = do c_memmove p q s
return ()
-}
foreign import ccall unsafe "string.h memset" c_memset
:: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
memset p w s = c_memset p (fromIntegral w) s
-- ---------------------------------------------------------------------
--
-- Uses our C code
--
foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
:: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
:: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
:: Ptr Word8 -> CULong -> IO Word8
foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
:: Ptr Word8 -> CULong -> IO Word8
foreign import ccall unsafe "static fpstring.h fps_count" c_count
:: Ptr Word8 -> CULong -> Word8 -> IO CULong
|