1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
|
/* record_replay.h -*-C++-*-
*
*************************************************************************
*
* @copyright
* Copyright (C) 2012-2013, Intel Corporation
* All rights reserved.
*
* @copyright
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* @copyright
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************/
/**
* @file record-replay.h
*
* @brief record-replay.h and .cpp encapsulate most of the functionality to
* record and play back a Cilk Plus application.
*
* Recording is directed by the setting of the CILK_RECORD_LOG environment
* variable. If it's defined, the value specifies the root we'll use to
* generate files for each worker using the following format string:
* "%s%d.cilklog", where the integer is the value of w->self.
*
* Replay is directed by the setting of the CILK_REPLAY_LOG environment
* variable, interpreted the same way as CILK_RECORD_LOG. If both
* CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
* and the attempt to record a log will be ignored.
*
* Recording is relatively straightforward. We write all information about a
* worker to a per-worker file.
*
* Each pedigree record consists of the following fields. All fields must be
* present in every record to make parsing easy.
* - Type - A string identifying the pedigree record. See the PED_TYPE_STR_
* macros for the currently defined values.
* - Pedigree - A string of pedigree values, with underscores between
* adjacent values.
* - i1 - Record type-specific value. -1 if not used.
* - i2 - Record type-specific value. -1 if not used.
*
* WORKERS record - only written to the file for worker 0. Note that this is
* the first worker in the workers array. Worker 0 is the first system worker,
* *NOT* a user worker.
* - Type: "Workers"
* - Pedigree: Always "0" - ignored
* - i1: Number of workers (g->P) when we recorded the log. A mismatch when
* we attempt to replay the log will result in aborting the execution.
* - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
*
* STEAL record - written after a successful steal.
* - Type: "Steal"
* - Pedigree: Pedigree of stolen frame
* - i1: Worker the frame was stolen from
* - i2: -1
*
* SYNC record - written after a worker continues from a sync.
* - Type: "Sync"
* - Pedigree: Pedigree of sync. Note that this is the pedigree *before*
* the pedigree in incremented in setup_for_execution_pedigree().
* - i1: -1
* - i2: -1
*
* ORPHANED record - saved on a return to a stolen parent.
* - Type: "Orphaned"
* - Pedigree: Pedigree of the parent frame *before* the pedigree is
* incremented by the return
* - i1: -1
* - i2: -1
*
* On replay, the data is loaded into a per-worker array, and the data is
* consumed in order as needed.
*/
#ifndef INCLUDED_RECORD_REPLAY_DOT_H
#define INCLUDED_RECORD_REPLAY_DOT_H
#include "cilk/common.h"
#include "global_state.h"
/**
* Define CILK_RECORD_REPLAY to enable record/replay functionality. If
* CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
* record-replay.h will be stubbed out. Since they're declared as inline,
* functions, the resulting build should have no performance impact due to
* the implementation or record/replay.
*/
#define CILK_RECORD_REPLAY 1
/**
* Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This
* should only be needed when debugging the replay functionality. This should
* always be defined as 0 when record-replay.h is checked in.
*/
#define RECORD_ON_REPLAY 0
__CILKRTS_BEGIN_EXTERN_C
#ifdef CILK_RECORD_REPLAY
// Declarations of internal record/replay functions. The inlined versions
// further down do some preliminary testing (like if we're not recording or
// replaying) and will stub out the functionality if we've compiled out the
// record/replay feature
int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
void replay_record_sync_internal(__cilkrts_worker *w);
void replay_record_orphaned_internal(__cilkrts_worker *w);
int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
void replay_advance_from_sync_internal (__cilkrts_worker *w);
int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
#endif // CILK_RECORD_REPLAY
// Publically defined record/replay API
/**
* If we're replaying a log, wait for our parent to be stolen if it was when
* the log was recorded. If record/replay is compiled out, this is a noop.
*
* @param w The __cilkrts_worker we're executing on. The worker's replay
* list will be checked for a ORPHANED record with a matching pedigree. If
* there is a match, the ORPHANED record will be consumed.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
{
// Only check if we're replaying a log
if (REPLAY_LOG == w->g->record_or_replay)
replay_wait_for_steal_if_parent_was_stolen_internal(w);
}
#else
__CILKRTS_INLINE
void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
{
// If record/replay is disabled, we never wait
}
#endif // CILK_RECORD_REPLAY
/**
* Called from random_steal() to override the ID of the randomly chosen victim
* worker which this worker will attempt to steal from. Returns the worker id
* of the next victim this worker was recorded stealing from, or -1 if the
* next record in the log is not a STEAL.
*
* @note This call does NOT attempt to match the pedigree. That will be done
* by replay_match_victim_pedigree() after random_steal() has locked the victim
* worker.
*
* @param w The __cilkrts_worker we're executing on. The worker's replay log
* is checked for a STEAL record. If we've got one, the stolen worker ID is
* returned.
* @param id The randomly chosen victim worker ID. If we're not replaying a
* log, or if record/replay has been compiled out, this is the value that
* will be returned.
*
* @return id if we're not replaying a log
* @return -1 if the next record is not a STEAL
* @return recorded stolen worker ID if we've got a matching STEAL record
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
{
// Only check if we're replaying a log
if (REPLAY_LOG == w->g->record_or_replay)
return replay_get_next_recorded_victim_internal(w);
else
return id;
}
#else
__CILKRTS_INLINE
int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
{
// Record/replay is disabled. Always return the original worker id
return id;
}
#endif // CILK_RECORD_REPLAY
/**
* Initialize per-worker data for record/replay. A noop if record/replay
* is disabled, or if we're not recording or replaying anything.
*
* If we're recording a log, this will ready us to create the per-worker
* logs.
*
* If we're replaying a log, this will read the logs into the per-worker
* structures.
*
* @param g Cilk runtime global state
*/
void replay_init_workers(global_state_t *g);
/**
* Record a record on a successful steal. A noop if record/replay is
* diabled, or if we're not recording anything
*
* @param w The __cilkrts_worker we're executing on. The pedigree of
* the stolen frame will be walked to generate the STEAL record.
*
* @param victim_id The worker ID of the worker w stole from.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
{
#if RECORD_ON_REPLAY
// If we're recording on replay, write the record if we're recording or
// replaying
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
return;
#else
// Only write the record if we're recording
if (RECORD_LOG != w->g->record_or_replay)
return;
#endif
replay_record_steal_internal(w, victim_id);
}
#else
__CILKRTS_INLINE
void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
{
}
#endif // CILK_RECORD_REPLAY
/**
* Record a record when continuing after a sync. A noop if record/replay is
* diabled, or if we're not recording anything, or if the sync was abandoned,
* meaning this isn't the worker that continues from the sync.
*
* @param w The __cilkrts_worker for we're executing on. The pedigree of
* the sync-ing frame will be walked to generate the SYNC record.
*
* @param continuing True if this worker will be continuing from the
* cilk_sync. A SYNC record will only be generated if continuing is true.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
void replay_record_sync(__cilkrts_worker *w, int continuing)
{
// If this was not the last worker to the syn, return
if (! continuing)
return;
#if RECORD_ON_REPLAY
// If we're recording on replay, write the record if we're recording or
// replaying
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
return;
#else
// Only write the record if we're recording
if (RECORD_LOG != w->g->record_or_replay)
return;
#endif
replay_record_sync_internal(w);
}
#else
__CILKRTS_INLINE
void replay_record_sync(__cilkrts_worker *w, int abandoned)
{
}
#endif // CILK_RECORD_REPLAY
/**
* Record a record on a return to a stolen parent. A noop if record/replay is
* diabled, or if we're not recording anything.
*
* @param w The __cilkrts_worker for we're executing on. The pedigree of the
* frame that has discovered that its parent has been stolken will be walked
* to generate the ORPHANED record.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
void replay_record_orphaned(__cilkrts_worker *w)
{
#if RECORD_ON_REPLAY
// If we're recording on replay, write the record if we're recording or
// replaying
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
return;
#else
// Only write the record if we're recording
if (RECORD_LOG != w->g->record_or_replay)
return;
#endif
replay_record_orphaned_internal(w);
}
#else
__CILKRTS_INLINE
void replay_record_orphaned(__cilkrts_worker *w)
{
}
#endif // CILK_RECORD_REPLAY
/**
* Test whether the frame at the head of the victim matches the pedigree of
* the frame that was recorded being stolen. Called in random steal to verify
* that we're about to steal the correct frame.
*
* @param w The __cilkrts_worker for we're executing on. The current worker
* is needed to find the replay entry to be checked.
*
* @param victim The __cilkrts_worker for we're proposing to steal a frame
* from. The victim's head entry is
* is needed to find the replay entry to be checked.
*
* @return 0 if we're replaying a log and the victim's pedigree does NOT match
* the next frame the worker is expected to steal.
*
* @return 1 in all other cases to indicate that the steal attempt should
* continue
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
{
// We're not replaying a log. The victim is always acceptable
if (REPLAY_LOG != w->g->record_or_replay)
return 1;
// Return 1 if the victim's pedigree matches the frame the worker stole
// when we recorded the log
return replay_match_victim_pedigree_internal(w, victim);
}
#else
__CILKRTS_INLINE
int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
{
// Record/replay is disabled. The victim is always acceptable
return 1;
}
#endif // CILK_RECORD_REPLAY
/**
* Test whether the current replay entry is a sync record matching the
* worker's pedigree.
*
* @param w The __cilkrts_worker for we're executing on.
*
* @return 1 if the current replay entry matches the current pedigree.
* @return 0 if there's no match, or if we're not replaying a log.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
int replay_match_sync_pedigree(__cilkrts_worker *w)
{
// If we're not replaying, assume no match
if (REPLAY_LOG != w->g->record_or_replay)
return 0;
return replay_match_sync_pedigree_internal(w);
}
#else
__CILKRTS_INLINE
int replay_match_sync_pedigree(__cilkrts_worker *w)
{
// Record/replay is disabled. Assume no match
return 0;
}
#endif
/**
* Marks a sync record seen, advancing to the next record in the replay list.
*
* This function will only advance to the next record if:
* - Record/replay hasn't been compiled out AND
* - We're replaying a log AND
* - A match was found AND
* - The sync is not being abandoned
*
* @param w The __cilkrts_worker for we're executing on.
* @param match_found The value returned by replay_match_sync_pedigree(). If
* match_found is false, nothing is done.
* @param continuing Flag indicating whether this worker will continue from
* the sync (it's the last worker to the sync) or if it will abandon the work
* and go to the scheduling loop to look for more work it can steal.
*/
#ifdef CILK_RECORD_REPLAY
__CILKRTS_INLINE
void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
{
// If we're replaying a log, and the current sync wasn't abandoned, and we
// found a match in the log, mark the sync record seen.
if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
replay_advance_from_sync_internal(w);
}
#else
__CILKRTS_INLINE
void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
{
}
#endif
/**
* Release any resources used to read or write a replay log.
*
* @param g Cilk runtime global state
*/
void replay_term(global_state_t *g);
__CILKRTS_END_EXTERN_C
#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
|