1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
/****************************************************************
* *
* Copyright (c) 2012-2018 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include <errno.h>
#include "aswp.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsroot.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "jnl.h"
#include "copy.h"
#include "interlock.h"
#include "performcaslatchcheck.h"
#include "relqop.h"
#include "wcs_sleep.h"
#include "caller_id.h"
#include "rel_quant.h"
#include "sleep_cnt.h"
#include "gtmsource_srv_latch.h"
#include "repl_msg.h"
#include "gtmsource.h"
#include "repl_instance.h"
#include "have_crit.h"
#include "util.h" /* For OUT_BUFF_SIZE */
GBLREF int4 process_id;
GBLREF int num_additional_processors;
GBLREF jnlpool_addrs_ptr_t jnlpool;
GBLREF jnl_gbls_t jgbl;
#ifdef DEBUG
GBLREF node_local_ptr_t locknl;
GBLREF gd_region *gv_cur_region;
GBLREF boolean_t is_src_server;
#endif
error_def(ERR_REPLREQROLLBACK);
error_def(ERR_TEXT);
/* Note we don't increment fast_lock_count as part of getting the latch and decrement it when releasing it because ROLLBACK
* can hold onto this latch for a long while and can do updates in this duration and we should NOT have a non-zero fast_lock_count
* as many places like t_begin/dsk_read have asserts to this effect. It is okay to NOT increment fast_lock_count as ROLLBACK
* anyways have logic to disable interrupts the moment it starts doing database updates.
*/
boolean_t grab_gtmsource_srv_latch(sm_global_latch_ptr_t latch, uint4 max_timeout_in_secs, uint4 onln_rlbk_action)
{
uint4 spins, maxspins, retries, max_retries;
unix_db_info *udi;
sgmnt_addrs *repl_csa;
boolean_t cycle_mismatch;
char scndry_msg[OUT_BUFF_SIZE];
assert(!have_crit(CRIT_HAVE_ANY_REG));
udi = FILE_INFO(jnlpool->jnlpool_dummy_reg);
repl_csa = &udi->s_addrs;
maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
/* outer-loop : X minutes, 1 loop in 4 is sleep of 1 ms */
max_retries = (max_timeout_in_secs < (UINT32_MAX / 4 / 1000)) ? (max_timeout_in_secs * 4 * 1000) : UINT32_MAX;
for (retries = max_retries - 1; 0 < retries; retries--)
{ /* seems like it should be a mutex */
for (spins = maxspins; 0 < spins; spins--)
{
assert(latch->u.parts.latch_pid != process_id); /* We better not hold it if trying to get it */
if (GET_SWAPLOCK(latch))
{
DEBUG_ONLY(locknl = repl_csa->nl); /* Use the journal pool to maintain lock history */
LOCK_HIST("OBTN", latch, process_id, retries);
DEBUG_ONLY(locknl = NULL);
if (jnlpool->repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk)
{
/* Journal pool indicates an abnormally terminated online rollback. Cannot continue until
* the rollback command is re-run to bring the journal pool/file and instance file to a
* consistent state.
*/
SNPRINTF(scndry_msg, OUT_BUFF_SIZE, "Instance file header has file_corrupt field set to "
"TRUE");
/* No need to release the latch before rts_error_csa (mupip_exit_handler will do it for
* us).
*/
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn),
ERR_TEXT, 2, LEN_AND_STR(scndry_msg));
}
cycle_mismatch = (repl_csa->onln_rlbk_cycle != jnlpool->jnlpool_ctl->onln_rlbk_cycle);
assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || !cycle_mismatch);
if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && cycle_mismatch)
{
assert(is_src_server);
SYNC_ONLN_RLBK_CYCLES;
gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */
rel_gtmsource_srv_latch(latch);
}
return TRUE;
}
}
if (retries & 0x3)
{ /* On all but every 4th pass, do a simple rel_quant */
rel_quant();
} else
{
/* On every 4th pass, we bide for awhile */
wcs_sleep(LOCK_SLEEP);
/* Check if we're due to check for lock abandonment check or holder wakeup */
if (0 == (retries & (LOCK_CASLATCH_CHKINTVL - 1)))
performCASLatchCheck(latch, TRUE);
}
}
DUMP_LOCKHIST();
assert(FALSE);
assert(jnlpool->gtmsource_local && jnlpool->gtmsource_local->gtmsource_pid);
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(5) ERR_SRVLCKWT2LNG, 2, jnlpool->gtmsource_local->gtmsource_pid,
max_timeout_in_secs);
return FALSE; /* to keep the compiler happy */
}
boolean_t rel_gtmsource_srv_latch(sm_global_latch_ptr_t latch)
{
sgmnt_addrs *repl_csa;
repl_csa = &FILE_INFO(jnlpool->jnlpool_dummy_reg)->s_addrs;
DEBUG_ONLY(locknl = repl_csa->nl);
LOCK_HIST("RLSE", latch, process_id, 0);
DEBUG_ONLY(locknl = NULL);
assert(process_id == latch->u.parts.latch_pid);
RELEASE_SWAPLOCK(latch);
return TRUE;
}
boolean_t gtmsource_srv_latch_held_by_us()
{
return (process_id == jnlpool->gtmsource_local->gtmsource_srv_latch.u.parts.latch_pid);
}
|