1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
{-# LANGUAGE MagicHash #-}
-- | Evaluate an array in parallel in an interleaved fashion,
-- with each by having each processor computing alternate elements.
module Data.Array.Repa.Eval.Interleaved
( fillInterleavedP)
where
import Data.Array.Repa.Eval.Gang
import GHC.Exts
import Prelude as P
-- | Fill something in parallel.
--
-- * The array is split into linear chunks and each thread fills one chunk.
--
fillInterleavedP
:: Int -- ^ Number of elements.
-> (Int -> a -> IO ()) -- ^ Update function to write into result buffer.
-> (Int -> a) -- ^ Fn to get the value at a given index.
-> IO ()
{-# INLINE [0] fillInterleavedP #-}
fillInterleavedP !(I# len) write getElem
= gangIO theGang
$ \(I# thread) ->
let !step = threads
!start = thread
!count = elemsForThread thread
in fill step start count
where
-- Decide now to split the work across the threads.
!(I# threads) = gangSize theGang
-- All threads get this many elements.
!chunkLenBase = len `quotInt#` threads
-- Leftover elements to divide between first few threads.
!chunkLenSlack = len `remInt#` threads
-- How many elements to compute with this thread.
elemsForThread thread
| 1# <- thread <# chunkLenSlack
= chunkLenBase +# 1#
| otherwise
= chunkLenBase
{-# INLINE elemsForThread #-}
-- Evaluate the elements of a single chunk.
fill !step !ix0 !count0
= go ix0 count0
where
go !ix !count
| 1# <- count <=# 0#
= return ()
| otherwise
= do write (I# ix) (getElem (I# ix))
go (ix +# step) (count -# 1#)
{-# INLINE fill #-}
|