1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
|
/****************************************************************
* *
* Copyright (c) 2001-2020 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "gdsbgtr.h"
#include "filestruct.h"
#include "iosp.h"
#include "jnl.h"
#include "lockconst.h"
#include "interlock.h"
#include "sleep_cnt.h"
#include "send_msg.h"
#include "wcs_sleep.h"
#include "is_proc_alive.h"
#include "compswap.h"
#include "is_file_identical.h"
#include "have_crit.h"
#include "wbox_test_init.h"
#include "anticipatory_freeze.h"
#include "repl_msg.h" /* needed for gtmsource.h */
#include "gtmsource.h" /* needed for jnlpool_addrs typedef */
#include "gtmmsg.h"
#include "io.h" /* needed by gtmsecshr.h */
#include "gtmsecshr.h" /* for continue_proc */
#include "gtm_c_stack_trace.h"
#include "sleep.h"
#define ITERATIONS_100K 100000
GBLREF jnlpool_addrs_ptr_t jnlpool;
GBLREF uint4 process_id;
GBLREF uint4 image_count;
error_def(ERR_JNLACCESS);
error_def(ERR_JNLCNTRL);
error_def(ERR_JNLFLUSH);
error_def(ERR_JNLFLUSHNOPROG);
error_def(ERR_JNLPROCSTUCK);
error_def(ERR_JNLQIOSALVAGE);
error_def(ERR_JNLWRTDEFER);
error_def(ERR_JNLWRTNOWWRTR);
error_def(ERR_TEXT);
static uint4 jnl_sub_write_attempt(jnl_private_control *jpc, unsigned int *lcnt, uint4 threshold)
{
sgmnt_addrs *csa;
jnl_buffer_ptr_t jb;
unsigned int status;
boolean_t was_crit, exact_check, freeze_waiter = FALSE, freeze_cleared;
/**** Note static/local */
static uint4 loop_image_count, writer; /* assumes calls from one loop at a time */
uint4 new_dskaddr, new_dsk;
uint4 dskaddr, freeaddr, free, rsrv_freeaddr;
uint4 phase2_commit_index1;
static uint4 stuck_cnt = 0;
jnlpool_addrs_ptr_t local_jnlpool;
intrpt_state_t prev_intrpt_state;
/* Some callers of jnl_sub_write_attempt (jnl_flush->jnl_write_attempt, jnl_write->jnl_write_attempt) are in
* crit, and some other (jnl_wait->jnl_write_attempt) are not. Callers in crit do not need worry about journal
* buffer fields (dskaddr, freeaddr) changing underneath them, but for those not in crit, jnl_sub_write_attempt
* might incorrectly return an error status when journal file is switched. Such callers should check for
* journal file switched condition and terminate any loops they are in.
*/
jb = jpc->jnl_buff;
status = ERR_JNLWRTDEFER;
csa = &FILE_INFO(jpc->region)->s_addrs;
was_crit = csa->now_crit;
exact_check = was_crit && (threshold == jb->rsrv_freeaddr); /* see comment in jnl_write_attempt() for why this is needed */
while (exact_check ? (jb->dskaddr != threshold) : (jb->dskaddr < threshold))
{
if (jb->io_in_prog_latch.u.parts.latch_pid == process_id)
{
/* if error condition occurred while doing jnl_qio_start(), then release the lock before waiting */
/* note that this is done only in UNIX because Unix does synchronous I/O */
jb->image_count = 0;
RELEASE_SWAPLOCK(&jb->io_in_prog_latch);
}
if ((!jb->io_in_prog_latch.u.parts.latch_pid) DEBUG_ONLY(&& !WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE)))
{
if (freeze_waiter)
{
CLEAR_ANTICIPATORY_FREEZE(freeze_cleared); /* sets freeze_cleared */
REPORT_INSTANCE_UNFROZEN(freeze_cleared);
ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
freeze_waiter = FALSE;
}
status = jnl_qio_start(jpc);
}
if (SS_NORMAL == status)
break;
assert(ERR_JNLWRTNOWWRTR != status); /* don't have asynchronous jnl writes in Unix */
if ((ERR_JNLWRTNOWWRTR != status) && (ERR_JNLWRTDEFER != status))
{
assert(!freeze_waiter);
return status;
}
if (freeze_waiter)
{
if (!IS_REPL_INST_FROZEN)
{ /* Somehow the freeze was lifted by someone else */
ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
freeze_waiter = FALSE;
} else
{
wcs_sleep(*lcnt);
continue;
}
}
if ((writer != CURRENT_JNL_IO_WRITER(jb)) || (1 == *lcnt))
{
writer = CURRENT_JNL_IO_WRITER(jb);
loop_image_count = jb->image_count;
*lcnt = 1; /* !!! this should be detected and limited by the caller !!! */
# ifdef DEBUG
if (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))
writer = process_id;
else
# endif
break;
}
if ((*lcnt <= JNL_MAX_FLUSH_TRIES) DEBUG_ONLY(&& !(WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
{
wcs_sleep(*lcnt);
break;
}
if ((writer == CURRENT_JNL_IO_WRITER(jb)) DEBUG_ONLY(|| (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
{ /* It isn't strictly necessary to hold crit here since we are doing an atomic operation on
* io_in_prog_latch, which won't have any effect if the writer changed. If things are in a bad state,
* though, grabbing crit will call wcs_recover() for us.
* However, a grab_crit() here may result in a deadlock, so just do a grab_crit_immediate() and proceed.
*/
if (!was_crit)
grab_crit_immediate(jpc->region, TRUE, NOT_APPLICABLE);
/* If no one home, try to clear the latch. */
if (((FALSE == is_proc_alive(writer, jb->image_count))
DEBUG_ONLY(&& !(WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
&& COMPSWAP_UNLOCK(&jb->io_in_prog_latch, writer, jb->image_count, LOCK_AVAILABLE, 0))
{ /* We cleared the latch, so report it and restart the loop. */
BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_lost);
jnl_send_oper(jpc, ERR_JNLQIOSALVAGE);
if (!was_crit && csa->now_crit) /* Check now_crit in case grab_crit_immediate() failed */
rel_crit(jpc->region);
*lcnt = 1;
continue;
}
if (!was_crit && csa->now_crit) /* Check now_crit in case grab_crit_immediate() failed */
rel_crit(jpc->region);
/* this is the interesting case: a process is stuck */
BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_stuck);
if (IS_REPL_INST_FROZEN)
{ /* Restart if instance frozen. */
*lcnt = 1;
continue;
}
jpc->status = status;
send_msg_csa(CSA_ARG(csa) VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, writer);
# ifdef DEBUG
if (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))
gtm_white_box_test_case_enabled = FALSE;
# endif
stuck_cnt++;
if (IS_REPL_INST_FROZEN)
{ /* The instance wasn't frozen above, but it is now, so most likely we froze it.
* Note the fact.
* Deferring interrupts here prevents possible hangs in GET_C_STACK_FROM_SCRIPT.
*/
DEFER_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
freeze_waiter = TRUE;
}
GET_C_STACK_FROM_SCRIPT("JNLPROCSTUCK", process_id, writer, stuck_cnt);
*lcnt = 1; /* ??? is it necessary to limit this, and if so, how ??? */
if (freeze_waiter)
{ /* We are frozen, so restart. */
continue;
}
status = ERR_JNLPROCSTUCK;
continue_proc(writer);
break;
}
break;
}
if (csa->now_crit && (jb->dskaddr > jb->freeaddr))
{ /* jb->dskaddr > jb->freeaddr => out of design condition if we have crit.
* If we don't have crit, a journal switch could have occurred, so not an error condition.
*/
status = ERR_JNLCNTRL;
}
if (freeze_waiter)
{
CLEAR_ANTICIPATORY_FREEZE(freeze_cleared); /* sets freeze_cleared */
REPORT_INSTANCE_UNFROZEN(freeze_cleared);
ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
}
return status;
}
uint4 jnl_write_attempt(jnl_private_control *jpc, uint4 threshold)
{
jnl_buffer_ptr_t jb;
uint4 prev_freeaddr;
unsigned int lcnt, prev_lcnt, cnt;
sgmnt_addrs *csa;
jnlpool_addrs_ptr_t save_jnlpool;
unsigned int status;
boolean_t was_crit, jnlfile_lost, exact_check;
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
jb = jpc->jnl_buff;
csa = &FILE_INFO(jpc->region)->s_addrs;
save_jnlpool = jnlpool;
if (csa->jnlpool && (csa->jnlpool != jnlpool))
jnlpool = csa->jnlpool;
was_crit = csa->now_crit;
/* If holding crit and input threshold matches jb->rsrv_freeaddr, then we need to wait in the loop as long as dskaddr
* is not EQUAL to threshold. This is because if dskaddr is lesser than threshold we need to wait. If ever it
* becomes greater than threshold, it is an out-of-design situation (since dskaddr has effectively become > rsrv_freeaddr)
* and so we need to trigger "jnl_file_lost" which is done in "jnl_sub_write_attempt" so it is important to invoke
* that routine (in the for loop below). Hence the need to do an exact match instead of a < match. If not holding
* crit or input threshold does not match jb->rsrv_freeaddr, then dskaddr becoming GREATER than threshold is a valid
* condition so we should do a (dskaddr < threshold), not a (dskaddr != threshold) check in that case.
*/
exact_check = was_crit && (threshold == jb->rsrv_freeaddr);
assert(!was_crit || threshold <= jb->rsrv_freeaddr);
/* Check that we either own crit on the current region or we don't own crit on ANY region. This is relied upon by
* the grab_crit calls (done in jnl_write_attempt and jnl_sub_write_attempt) to ensure no deadlocks are possible.
*/
assert(was_crit || (0 == have_crit(CRIT_HAVE_ANY_REG)));
for (prev_lcnt = lcnt = cnt = 1;
(was_crit || (NOJNL != jpc->channel)) && (exact_check ? jb->dskaddr != threshold : jb->dskaddr < threshold);
lcnt++, prev_lcnt = lcnt, cnt++)
{
prev_freeaddr = jb->freeaddr;
if (prev_freeaddr < threshold)
{
JNL_PHASE2_CLEANUP_IF_POSSIBLE(csa, jb); /* phase2 commits in progress. Clean them up if possible */
if (prev_freeaddr == jb->freeaddr)
{ /* No cleanup happened implies process in phase2 commit is still alive.
* Give it some time to finish its job. Not sleeping here could result in a spinloop
* below (due to the "continue" below under the "SS_NORMAL == status" if check).
*/
BG_TRACE_PRO_ANY(csa, jnl_phase2_cleanup_if_possible);
SLEEP_USEC(1, FALSE);
if (!was_crit && (0 == (lcnt % ITERATIONS_100K)))
{ /* We do not have crit and have slept a while (100K iterations of 1-micro-second each
* == a total of ~ 100 milli-seconds). See if crit can be obtained that way the
* JNL_PHASE2_CLEANUP_IF_POSSIBLE macro will attempt "jnl_phase2_salvage" if needed.
* An example scenario where this is needed is if a process is in "gds_rundown"->"jnl_wait"
* and does not hold crit but has written journal records after those written by another
* process which was kill -9ed in phase2 of its jnl commit. Not doing this check would
* cause the process in gds_rundown to be indefinitely stuck in "jnl_wait".
*/
if (grab_crit_immediate(jpc->region, OK_FOR_WCS_RECOVER_TRUE, NOT_APPLICABLE))
{
JNL_PHASE2_CLEANUP_IF_POSSIBLE(csa, jb); /* phase2 commits in progress.
* Clean them up if possible.
*/
rel_crit(jpc->region);
}
}
}
}
status = jnl_sub_write_attempt(jpc, &lcnt, threshold);
if (JNL_FILE_SWITCHED(jpc))
{ /* If we are holding crit, the journal file switch could happen in the form of journaling getting
* turned OFF (due to disk space issues etc.)
*/
jpc->status = SS_NORMAL;
if (save_jnlpool != jnlpool)
jnlpool = save_jnlpool;
return SS_NORMAL;
}
if (SS_NORMAL == status)
{
/* In Unix, writes are synchronous so SS_NORMAL status return implies we have completed a jnl
* write and "jb->dskaddr" is closer to "threshold" than it was in the previous iteration.
* A sleep at this point will only slow things down unnecessarily. Hence no sleep if Unix.
*/
continue;
}
if ((ERR_JNLCNTRL == status) || (ERR_JNLACCESS == status)
|| (csa->now_crit && (ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status)))
{ /* If JNLCNTRL or if holding crit and not waiting for some other writer
* better turn off journaling and proceed with database update to avoid a database hang.
*/
if (was_crit)
jb->blocked = 0;
else
{
assertpro(0 == have_crit(CRIT_HAVE_ANY_REG));
grab_crit(jpc->region, WS_4); /*jnl_write_attempt has assert about have_crit that this relies on */
}
jnlfile_lost = FALSE;
assert(TREF(gtm_test_fake_enospc) || WBTEST_ENABLED(WBTEST_JNL_FILE_LOST_DSKADDR)
|| WBTEST_ENABLED(WBTEST_RECOVER_ENOSPC) || (ERR_JNLPROCSTUCK == status));
if (JNL_ENABLED(csa->hdr) && (ERR_JNLPROCSTUCK != status))
{ /* We ignore the return value of jnl_file_lost() since we always want to report the journal
* error, whatever its error handling method is. Also, an operator log will be sent by some
* callers (t_end()) only if an error is returned here, and the operator log is wanted in
* those cases.
*/
jnl_file_lost(jpc, status);
jnlfile_lost = TRUE;
}
/* Else journaling got closed concurrently by another process by invoking "jnl_file_lost"
* just before we got crit. Do not invoke "jnl_file_lost" again on the same journal file.
* Instead continue and next iteration will detect the journal file has switched and terminate.
*/
if (!was_crit)
rel_crit(jpc->region);
if (!jnlfile_lost)
continue;
else
{
if (save_jnlpool != jnlpool)
jnlpool = save_jnlpool;
return status;
}
}
if (ERR_JNLWRTDEFER == status)
{ /* Check if the write was deferred because the instance is frozen.
* In that case, wait until the freeze is lifted instead of wasting time spinning on the latch
* in jnl_qio.
*/
assert(!csa->jnlpool || (csa->jnlpool == jnlpool));
WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa);
}
if ((ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status))
{ /* If holding crit, then jnl_sub_write_attempt would have invoked jnl_file_lost which would have
* caused the JNL_FILE_SWITCHED check at the beginning of this for loop to succeed and return from
* this function so we should never have gotten here. Assert accordingly. If not holding crit,
* wait for some crit holder to invoke jnl_file_lost. Until then keep sleep looping indefinitely.
* The sleep in this case is not time-limited because the callers of jnl_write_attempt (particularly
* jnl_wait) do not check its return value so they assume success returns from this function. It is
* non-trivial to change the interface and code of all callers to handle the error situation so we
* instead choose to sleep indefinitely here until some crit process encounters the same error and
* triggers jnl_file_lost processing which will terminate the loop due to the JNL_FILE_SWITCHED check.
*/
assert(!csa->now_crit);
wcs_sleep(lcnt);
} else if (prev_lcnt != lcnt)
{
assert(1 == lcnt);
if ((ERR_JNLWRTDEFER == status) && (JNL_FLUSH_PROG_TRIES <= cnt))
{ /* Change of writer */
send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),
ERR_TEXT, 2, LEN_AND_LIT("No progress even with multiple writers"));
cnt = 0;
}
}
}
if (save_jnlpool != jnlpool)
jnlpool = save_jnlpool;
return SS_NORMAL;
}
|