1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
|
module Events.SparkTree (
SparkTree,
sparkTreeMaxDepth,
emptySparkTree,
eventsToSparkDurations,
mkSparkTree,
sparkProfile,
) where
import qualified Events.SparkStats as SparkStats
import qualified GHC.RTS.Events as GHCEvents
import GHC.RTS.Events (Timestamp)
import Control.Exception (assert)
import Text.Printf
-- import Debug.Trace
-- | Sparks change state. Each state transition process has a duration.
-- SparkDuration is a condensed description of such a process,
-- containing a start time of the duration interval,
-- spark stats that record the spark transition rate
-- and the absolute number of sparks in the spark pool within the duration.
data SparkDuration =
SparkDuration { startT :: {-#UNPACK#-}!Timestamp,
deltaC :: {-#UNPACK#-}!SparkStats.SparkStats }
deriving Show
-- | Calculates durations and maximal rendered values from the event log.
-- Warning: cannot be applied to a suffix of the log (assumes start at time 0).
eventsToSparkDurations :: [GHCEvents.Event] -> (Double, [SparkDuration])
eventsToSparkDurations es =
let aux _startTime _startCounters [] = (0, [])
aux startTime startCounters (event : events) =
case GHCEvents.spec event of
GHCEvents.SparkCounters crt dud ovf cnv fiz gcd rem ->
let endTime = GHCEvents.time event
endCounters = (crt, dud, ovf, cnv, fiz, gcd, rem)
delta = SparkStats.create startCounters endCounters
newMaxSparkPool = SparkStats.maxPool delta
sd = SparkDuration { startT = startTime,
deltaC = delta }
(oldMaxSparkPool, l) =
aux endTime endCounters events
in ( max oldMaxSparkPool newMaxSparkPool,
sd : l)
_otherEvent -> aux startTime startCounters events
in aux 0 (0,0,0,0,0,0,0) es
-- | We map the spark transition durations (intervals) onto a binary
-- search tree, so that we can easily find the durations
-- that correspond to a particular view of the timeline.
-- Additionally, each node of the tree contains a summary
-- of the information below it, so that we can render views at various
-- levels of resolution. For example, if a tree node would represent
-- less than one pixel on the display, there is no point is descending
-- the tree further.
data SparkTree
= SparkTree
{-#UNPACK#-}!Timestamp -- ^ start time of span represented by the tree
{-#UNPACK#-}!Timestamp -- ^ end time of the span represented by the tree
SparkNode
deriving Show
data SparkNode
= SparkSplit
{-#UNPACK#-}!Timestamp -- ^ time used to split the span into two parts
SparkNode
-- ^ the LHS split; all data lies completely between start and split
SparkNode
-- ^ the RHS split; all data lies completely between split and end
{-#UNPACK#-}!SparkStats.SparkStats
-- ^ aggregate of the spark stats within the span
| SparkTreeLeaf
{-#UNPACK#-}!SparkStats.SparkStats
-- ^ the spark stats for the base duration
| SparkTreeEmpty
-- ^ represents a span that no data referts to, e.g., after the last GC
deriving Show
sparkTreeMaxDepth :: SparkTree -> Int
sparkTreeMaxDepth (SparkTree _ _ t) = sparkNodeMaxDepth t
sparkNodeMaxDepth :: SparkNode -> Int
sparkNodeMaxDepth (SparkSplit _ lhs rhs _)
= 1 + sparkNodeMaxDepth lhs `max` sparkNodeMaxDepth rhs
sparkNodeMaxDepth _ = 1
emptySparkTree :: SparkTree
emptySparkTree = SparkTree 0 0 SparkTreeEmpty
-- | Create spark tree from spark durations.
-- Note that the last event may be not a spark event, in which case
-- there is no data about sparks for the last time interval
-- (the subtree for the interval will have SparkTreeEmpty node).
mkSparkTree :: [SparkDuration] -- ^ spark durations calculated from events
-> Timestamp -- ^ end time of last event in the list
-> SparkTree
mkSparkTree es endTime =
SparkTree s e $
-- trace (show tree) $
tree
where
tree = splitSparks es endTime
(s, e) = if null es then (0, 0) else (startT (head es), endTime)
-- | Construct spark tree, by recursively splitting time intervals..
-- We only split at spark transition duration boundaries;
-- we never split a duration into multiple pieces.
-- Therefore, the binary tree is only roughly split by time,
-- the actual split depends on the distribution of sample points below it.
splitSparks :: [SparkDuration] -> Timestamp -> SparkNode
splitSparks [] !_endTime =
SparkTreeEmpty
splitSparks [e] !_endTime =
SparkTreeLeaf (deltaC e)
splitSparks es !endTime
| null rhs
= splitSparks es lhs_end
| null lhs
= error $
printf "splitSparks: null lhs: len = %d, startTime = %d, endTime = %d\n"
(length es) startTime endTime
++ '\n' : show es
| otherwise
= -- trace (printf "len = %d, startTime = %d, endTime = %d\n" (length es) startTime endTime) $
assert (length lhs + length rhs == length es) $
SparkSplit (startT $ head rhs)
ltree
rtree
(SparkStats.aggregate (subDelta rtree ++ subDelta ltree))
where
-- | Integer division, rounding up.
divUp :: Timestamp -> Timestamp -> Timestamp
divUp n k = (n + k - 1) `div` k
startTime = startT $ head es
splitTime = startTime + (endTime - startTime) `divUp` 2
(lhs, lhs_end, rhs) = splitSparkList es [] splitTime 0
ltree = splitSparks lhs lhs_end
rtree = splitSparks rhs endTime
subDelta (SparkSplit _ _ _ delta) = [delta]
subDelta (SparkTreeLeaf delta) = [delta]
subDelta SparkTreeEmpty = []
splitSparkList :: [SparkDuration]
-> [SparkDuration]
-> Timestamp
-> Timestamp
-> ([SparkDuration], Timestamp, [SparkDuration])
splitSparkList [] acc !_tsplit !tmax
= (reverse acc, tmax, [])
splitSparkList [e] acc !_tsplit !tmax
-- Just one event left: put it on the right. This ensures that we
-- have at least one event on each side of the split.
= (reverse acc, tmax, [e])
splitSparkList (e:es) acc !tsplit !tmax
| startT e <= tsplit -- pick all durations that start at or before the split
= splitSparkList es (e:acc) tsplit (max tmax (startT e))
| otherwise
= (reverse acc, tmax, e:es)
-- | For each timeslice, give the spark stats calculated for that interval.
-- The spark stats are Approximated from the aggregated data
-- at the level of the spark tree covering intervals of the size
-- similar to the timeslice size.
sparkProfile :: Timestamp -> Timestamp -> Timestamp -> SparkTree
-> [SparkStats.SparkStats]
sparkProfile slice start0 end0 t
= {- trace (show flat) $ -} chopped
where
-- do an extra slice at both ends
start = if start0 < slice then start0 else start0 - slice
end = end0 + slice
flat = flatten start t []
-- TODO: redefine chop so that it's obvious this error will not happen
-- e.g., catch pathological cases, like a tree with only SparkTreeEmpty
-- inside and/or make it tail-recursive instead of
-- taking the 'previous' argument
chopped0 = chop (error "Fatal error in sparkProfile.") [] start flat
chopped | start0 < slice = SparkStats.initial : chopped0
| otherwise = chopped0
flatten :: Timestamp -> SparkTree -> [SparkTree] -> [SparkTree]
flatten _start (SparkTree _s _e SparkTreeEmpty) rest = rest
flatten start t@(SparkTree s e (SparkSplit split l r _)) rest
| e <= start = rest
| end <= s = rest
| start >= split = flatten start (SparkTree split e r) rest
| end <= split = flatten start (SparkTree s split l) rest
| e - s > slice = flatten start (SparkTree s split l) $
flatten start (SparkTree split e r) rest
-- A rule of thumb: if a node is narrower than slice, don't drill down,
-- even if the node sits astride slice boundaries and so the readings
-- for each of the two neigbouring slices will not be accurate
-- (but for the pair as a whole, they will be). Smooths the curve down
-- even more than averaging over the timeslice already does.
| otherwise = t : rest
flatten _start t@(SparkTree _s _e (SparkTreeLeaf _)) rest
= t : rest
chop :: SparkStats.SparkStats -> [SparkStats.SparkStats]
-> Timestamp -> [SparkTree] -> [SparkStats.SparkStats]
chop _previous sofar start1 _ts
| start1 >= end
= case sofar of
_ : _ -> [SparkStats.aggregate sofar]
[] -> []
chop _previous sofar _start1 [] -- data too short for the redrawn area
| null sofar -- no data at all in the redrawn area
= []
| otherwise
= [SparkStats.aggregate sofar]
chop previous sofar start1 (t : ts)
| e <= start1 -- skipping data left of the slice
= case sofar of
_ : _ -> error "chop"
[] -> chop previous sofar start1 ts
| s >= start1 + slice -- postponing data right of the slice
= let (c, p) = SparkStats.agEx sofar previous
in c : chop p [] (start1 + slice) (t : ts)
| e > start1 + slice
= let (c, p) = SparkStats.agEx (created_in_this_slice t ++ sofar) previous
in c : chop p [] (start1 + slice) (t : ts)
| otherwise
= chop previous (created_in_this_slice t ++ sofar) start1 ts
where
(s, e) | SparkTree s e _ <- t = (s, e)
-- The common part of the slice and the duration.
mi = min (start1 + slice) e
ma = max start1 s
common = if mi < ma then 0 else mi - ma
-- Instead of drilling down the tree (unless it's a leaf),
-- we approximate by taking a proportion of the aggregate value,
-- depending on how much of the spark duration corresponding
-- to the tree node is covered by our timeslice.
proportion = if e > s
then fromIntegral common / fromIntegral (e - s)
else assert (e == s && common == 0) $ 0
-- Spark transitions in the tree are in units spark/duration.
-- Here the numbers are rescaled so that the units are spark/ms.
created_in_this_slice (SparkTree _ _ node) = case node of
SparkTreeLeaf delta -> [SparkStats.rescale proportion delta]
SparkTreeEmpty -> []
SparkSplit _ _ _ delta -> [SparkStats.rescale proportion delta]
|