File: jnl_write_attempt.c

package info (click to toggle)
fis-gtm 6.3-014-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 36,680 kB
  • sloc: ansic: 333,039; asm: 5,180; csh: 4,956; sh: 1,924; awk: 291; makefile: 66; sed: 13
file content (373 lines) | stat: -rw-r--r-- 15,191 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
/****************************************************************
 *								*
 * Copyright (c) 2001-2020 Fidelity National Information	*
 * Services, Inc. and/or its subsidiaries. All rights reserved.	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "gdsbgtr.h"
#include "filestruct.h"
#include "iosp.h"
#include "jnl.h"
#include "lockconst.h"
#include "interlock.h"
#include "sleep_cnt.h"
#include "send_msg.h"
#include "wcs_sleep.h"
#include "is_proc_alive.h"
#include "compswap.h"
#include "is_file_identical.h"
#include "have_crit.h"
#include "wbox_test_init.h"
#include "anticipatory_freeze.h"
#include "repl_msg.h"			/* needed for gtmsource.h */
#include "gtmsource.h"			/* needed for jnlpool_addrs typedef */
#include "gtmmsg.h"
#include "io.h"                 /* needed by gtmsecshr.h */
#include "gtmsecshr.h"          /* for continue_proc */
#include "gtm_c_stack_trace.h"
#include "sleep.h"

#define	ITERATIONS_100K	100000

GBLREF	jnlpool_addrs_ptr_t	jnlpool;
GBLREF	uint4			process_id;
GBLREF	uint4			image_count;

error_def(ERR_JNLACCESS);
error_def(ERR_JNLCNTRL);
error_def(ERR_JNLFLUSH);
error_def(ERR_JNLFLUSHNOPROG);
error_def(ERR_JNLPROCSTUCK);
error_def(ERR_JNLQIOSALVAGE);
error_def(ERR_JNLWRTDEFER);
error_def(ERR_JNLWRTNOWWRTR);
error_def(ERR_TEXT);

static uint4 jnl_sub_write_attempt(jnl_private_control *jpc, unsigned int *lcnt, uint4 threshold)
{
	sgmnt_addrs		*csa;
	jnl_buffer_ptr_t	jb;
	unsigned int		status;
	boolean_t		was_crit, exact_check, freeze_waiter = FALSE, freeze_cleared;
	/**** Note static/local */
	static uint4		loop_image_count, writer;	/* assumes calls from one loop at a time */
	uint4			new_dskaddr, new_dsk;
	uint4			dskaddr, freeaddr, free, rsrv_freeaddr;
	uint4			phase2_commit_index1;
	static uint4		stuck_cnt = 0;
	jnlpool_addrs_ptr_t	local_jnlpool;
	intrpt_state_t		prev_intrpt_state;

	/* Some callers of jnl_sub_write_attempt (jnl_flush->jnl_write_attempt, jnl_write->jnl_write_attempt) are in
	 * crit, and some other (jnl_wait->jnl_write_attempt) are not. Callers in crit do not need worry about journal
	 * buffer fields (dskaddr, freeaddr) changing underneath them, but for those not in crit, jnl_sub_write_attempt
	 * might incorrectly return an error status when journal file is switched. Such callers should check for
	 * journal file switched condition and terminate any loops they are in.
	 */
	jb = jpc->jnl_buff;
	status = ERR_JNLWRTDEFER;
	csa = &FILE_INFO(jpc->region)->s_addrs;
	was_crit = csa->now_crit;
	exact_check = was_crit && (threshold == jb->rsrv_freeaddr); /* see comment in jnl_write_attempt() for why this is needed */
	while (exact_check ? (jb->dskaddr != threshold) : (jb->dskaddr < threshold))
	{
		if (jb->io_in_prog_latch.u.parts.latch_pid == process_id)
		{
			/* if error condition occurred while doing jnl_qio_start(), then release the lock before waiting */
			/* note that this is done only in UNIX because Unix does synchronous I/O */
			jb->image_count = 0;
			RELEASE_SWAPLOCK(&jb->io_in_prog_latch);
		}
		if ((!jb->io_in_prog_latch.u.parts.latch_pid) DEBUG_ONLY(&& !WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE)))
		{
			if (freeze_waiter)
			{
				CLEAR_ANTICIPATORY_FREEZE(freeze_cleared);			/* sets freeze_cleared */
				REPORT_INSTANCE_UNFROZEN(freeze_cleared);
				ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
				freeze_waiter = FALSE;
			}
			status = jnl_qio_start(jpc);
		}
		if (SS_NORMAL == status)
			break;
		assert(ERR_JNLWRTNOWWRTR != status);	/* don't have asynchronous jnl writes in Unix */
		if ((ERR_JNLWRTNOWWRTR != status) && (ERR_JNLWRTDEFER != status))
		{
			assert(!freeze_waiter);
			return status;
		}
		if (freeze_waiter)
		{
			if (!IS_REPL_INST_FROZEN)
			{	/* Somehow the freeze was lifted by someone else */
				ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
				freeze_waiter = FALSE;
			} else
			{
				wcs_sleep(*lcnt);
				continue;
			}
		}
		if ((writer != CURRENT_JNL_IO_WRITER(jb)) || (1 == *lcnt))
		{
			writer = CURRENT_JNL_IO_WRITER(jb);
			loop_image_count = jb->image_count;
			*lcnt = 1;	/* !!! this should be detected and limited by the caller !!! */
#			ifdef DEBUG
			if (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))
				writer = process_id;
			else
#			endif
			break;
		}
		if ((*lcnt <= JNL_MAX_FLUSH_TRIES) DEBUG_ONLY(&& !(WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
		{
			wcs_sleep(*lcnt);
			break;
		}
		if ((writer == CURRENT_JNL_IO_WRITER(jb)) DEBUG_ONLY(|| (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
		{	/* It isn't strictly necessary to hold crit here since we are doing an atomic operation on
			 * io_in_prog_latch, which won't have any effect if the writer changed. If things are in a bad state,
			 * though, grabbing crit will call wcs_recover() for us.
			 * However, a grab_crit() here may result in a deadlock, so just do a grab_crit_immediate() and proceed.
			 */
			if (!was_crit)
				grab_crit_immediate(jpc->region, TRUE, NOT_APPLICABLE);
			/* If no one home, try to clear the latch. */
			if (((FALSE == is_proc_alive(writer, jb->image_count))
				DEBUG_ONLY(&& !(WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))))
				&& COMPSWAP_UNLOCK(&jb->io_in_prog_latch, writer, jb->image_count, LOCK_AVAILABLE, 0))
			{	/* We cleared the latch, so report it and restart the loop. */
				BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_lost);
				jnl_send_oper(jpc, ERR_JNLQIOSALVAGE);
				if (!was_crit && csa->now_crit)		/* Check now_crit in case grab_crit_immediate() failed */
					rel_crit(jpc->region);
				*lcnt = 1;
				continue;
			}
			if (!was_crit && csa->now_crit)		/* Check now_crit in case grab_crit_immediate() failed */
				rel_crit(jpc->region);
			/* this is the interesting case: a process is stuck */
			BG_TRACE_PRO_ANY(csa, jnl_blocked_writer_stuck);
			if (IS_REPL_INST_FROZEN)
			{	/* Restart if instance frozen. */
				*lcnt = 1;
				continue;
			}
			jpc->status = status;
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(3) ERR_JNLPROCSTUCK, 1, writer);
#			ifdef DEBUG
			if (WBTEST_ENABLED(WBTEST_JNLPROCSTUCK_FORCE))
				gtm_white_box_test_case_enabled = FALSE;
#			endif
			stuck_cnt++;
			if (IS_REPL_INST_FROZEN)
			{	/* The instance wasn't frozen above, but it is now, so most likely we froze it.
				 * Note the fact.
				 * Deferring interrupts here prevents possible hangs in GET_C_STACK_FROM_SCRIPT.
				 */
				DEFER_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
				freeze_waiter = TRUE;
			}
			GET_C_STACK_FROM_SCRIPT("JNLPROCSTUCK", process_id, writer, stuck_cnt);
			*lcnt = 1;	/* ??? is it necessary to limit this, and if so, how ??? */
			if (freeze_waiter)
			{	/* We are frozen, so restart. */
				continue;
			}
			status = ERR_JNLPROCSTUCK;
			continue_proc(writer);
			break;
		}
		break;
	}
	if (csa->now_crit && (jb->dskaddr > jb->freeaddr))
	{	/* jb->dskaddr > jb->freeaddr => out of design condition if we have crit.
		 * If we don't have crit, a journal switch could have occurred, so not an error condition.
		 */
		status = ERR_JNLCNTRL;
	}
	if (freeze_waiter)
	{
		CLEAR_ANTICIPATORY_FREEZE(freeze_cleared);			/* sets freeze_cleared */
		REPORT_INSTANCE_UNFROZEN(freeze_cleared);
		ENABLE_INTERRUPTS(INTRPT_IN_RETRY_LOOP, prev_intrpt_state);
	}
	return status;
}

uint4 jnl_write_attempt(jnl_private_control *jpc, uint4 threshold)
{
	jnl_buffer_ptr_t	jb;
	uint4			prev_freeaddr;
	unsigned int		lcnt, prev_lcnt, cnt;
	sgmnt_addrs		*csa;
	jnlpool_addrs_ptr_t	save_jnlpool;
	unsigned int		status;
	boolean_t		was_crit, jnlfile_lost, exact_check;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	jb = jpc->jnl_buff;
	csa = &FILE_INFO(jpc->region)->s_addrs;
	save_jnlpool = jnlpool;
	if (csa->jnlpool && (csa->jnlpool != jnlpool))
		jnlpool = csa->jnlpool;
	was_crit = csa->now_crit;

	/* If holding crit and input threshold matches jb->rsrv_freeaddr, then we need to wait in the loop as long as dskaddr
	 * is not EQUAL to threshold. This is because if dskaddr is lesser than threshold we need to wait. If ever it
	 * becomes greater than threshold, it is an out-of-design situation (since dskaddr has effectively become > rsrv_freeaddr)
	 * and so we need to trigger "jnl_file_lost" which is done in "jnl_sub_write_attempt" so it is important to invoke
	 * that routine (in the for loop below). Hence the need to do an exact match instead of a < match. If not holding
	 * crit or input threshold does not match jb->rsrv_freeaddr, then dskaddr becoming GREATER than threshold is a valid
	 * condition so we should do a (dskaddr < threshold), not a (dskaddr != threshold) check in that case.
	 */
	exact_check = was_crit && (threshold == jb->rsrv_freeaddr);
	assert(!was_crit || threshold <= jb->rsrv_freeaddr);
	/* Check that we either own crit on the current region or we don't own crit on ANY region. This is relied upon by
	 * the grab_crit calls (done in jnl_write_attempt and jnl_sub_write_attempt) to ensure no deadlocks are possible.
	 */
	assert(was_crit || (0 == have_crit(CRIT_HAVE_ANY_REG)));
	for (prev_lcnt = lcnt = cnt = 1;
		(was_crit || (NOJNL != jpc->channel)) && (exact_check ? jb->dskaddr != threshold : jb->dskaddr < threshold);
		lcnt++, prev_lcnt = lcnt, cnt++)
	{
		prev_freeaddr = jb->freeaddr;
		if (prev_freeaddr < threshold)
		{
			JNL_PHASE2_CLEANUP_IF_POSSIBLE(csa, jb); /* phase2 commits in progress. Clean them up if possible */
			if (prev_freeaddr == jb->freeaddr)
			{	/* No cleanup happened implies process in phase2 commit is still alive.
				 * Give it some time to finish its job. Not sleeping here could result in a spinloop
				 * below (due to the "continue" below under the "SS_NORMAL == status" if check).
				 */
				BG_TRACE_PRO_ANY(csa, jnl_phase2_cleanup_if_possible);
				SLEEP_USEC(1, FALSE);
				if (!was_crit && (0 == (lcnt % ITERATIONS_100K)))
				{	/* We do not have crit and have slept a while (100K iterations of 1-micro-second each
					 * == a total of ~ 100 milli-seconds). See if crit can be obtained that way the
					 * JNL_PHASE2_CLEANUP_IF_POSSIBLE macro will attempt "jnl_phase2_salvage" if needed.
					 * An example scenario where this is needed is if a process is in "gds_rundown"->"jnl_wait"
					 * and does not hold crit but has written journal records after those written by another
					 * process which was kill -9ed in phase2 of its jnl commit. Not doing this check would
					 * cause the process in gds_rundown to be indefinitely stuck in "jnl_wait".
					 */
					if (grab_crit_immediate(jpc->region, OK_FOR_WCS_RECOVER_TRUE, NOT_APPLICABLE))
					{
						JNL_PHASE2_CLEANUP_IF_POSSIBLE(csa, jb); /* phase2 commits in progress.
											  * Clean them up if possible.
											  */
						rel_crit(jpc->region);
					}
				}
			}
		}
		status = jnl_sub_write_attempt(jpc, &lcnt, threshold);
		if (JNL_FILE_SWITCHED(jpc))
		{	/* If we are holding crit, the journal file switch could happen in the form of journaling getting
			 * turned OFF (due to disk space issues etc.)
			 */
			jpc->status = SS_NORMAL;
			if (save_jnlpool != jnlpool)
				jnlpool = save_jnlpool;
			return SS_NORMAL;
		}
		if (SS_NORMAL == status)
		{
			/* In Unix, writes are synchronous so SS_NORMAL status return implies we have completed a jnl
			 * write and "jb->dskaddr" is closer to "threshold" than it was in the previous iteration.
			 * A sleep at this point will only slow things down unnecessarily. Hence no sleep if Unix.
			 */
			continue;
		}
		if ((ERR_JNLCNTRL == status) || (ERR_JNLACCESS == status)
			|| (csa->now_crit && (ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status)))
		{	/* If JNLCNTRL or if holding crit and not waiting for some other writer
			 * better turn off journaling and proceed with database update to avoid a database hang.
			 */
			if (was_crit)
				jb->blocked = 0;
			else
			{
				assertpro(0 == have_crit(CRIT_HAVE_ANY_REG));
				grab_crit(jpc->region, WS_4); /*jnl_write_attempt has assert about have_crit that this relies on */
			}
			jnlfile_lost = FALSE;
			assert(TREF(gtm_test_fake_enospc) || WBTEST_ENABLED(WBTEST_JNL_FILE_LOST_DSKADDR)
			|| WBTEST_ENABLED(WBTEST_RECOVER_ENOSPC) || (ERR_JNLPROCSTUCK == status));
			if (JNL_ENABLED(csa->hdr) && (ERR_JNLPROCSTUCK != status))
			{	/* We ignore the return value of jnl_file_lost() since we always want to report the journal
				 * error, whatever its error handling method is.  Also, an operator log will be sent by some
				 * callers (t_end()) only if an error is returned here, and the operator log is wanted in
				 * those cases.
				 */
				jnl_file_lost(jpc, status);
				jnlfile_lost = TRUE;
			}
			/* Else journaling got closed concurrently by another process by invoking "jnl_file_lost"
			 * just before we got crit. Do not invoke "jnl_file_lost" again on the same journal file.
			 * Instead continue and next iteration will detect the journal file has switched and terminate.
			 */
			if (!was_crit)
				rel_crit(jpc->region);
			if (!jnlfile_lost)
				continue;
			else
			{
				if (save_jnlpool != jnlpool)
					jnlpool = save_jnlpool;
				return status;
			}
		}
		if (ERR_JNLWRTDEFER == status)
		{	/* Check if the write was deferred because the instance is frozen.
			 * In that case, wait until the freeze is lifted instead of wasting time spinning on the latch
			 * in jnl_qio.
			 */
			assert(!csa->jnlpool || (csa->jnlpool == jnlpool));
			WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa);
		}
		if ((ERR_JNLWRTDEFER != status) && (ERR_JNLWRTNOWWRTR != status))
		{	/* If holding crit, then jnl_sub_write_attempt would have invoked jnl_file_lost which would have
			 * caused the JNL_FILE_SWITCHED check at the beginning of this for loop to succeed and return from
			 * this function so we should never have gotten here. Assert accordingly. If not holding crit,
			 * wait for some crit holder to invoke jnl_file_lost. Until then keep sleep looping indefinitely.
			 * The sleep in this case is not time-limited because the callers of jnl_write_attempt (particularly
			 * jnl_wait) do not check its return value so they assume success returns from this function. It is
			 * non-trivial to change the interface and code of all callers to handle the error situation so we
			 * instead choose to sleep indefinitely here until some crit process encounters the same error and
			 * triggers jnl_file_lost processing which will terminate the loop due to the JNL_FILE_SWITCHED check.
			 */
			assert(!csa->now_crit);
			wcs_sleep(lcnt);
		} else if (prev_lcnt != lcnt)
		{
			assert(1 == lcnt);
			if ((ERR_JNLWRTDEFER == status) && (JNL_FLUSH_PROG_TRIES <= cnt))
			{	/* Change of writer */
				send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_JNLFLUSHNOPROG, 2, JNL_LEN_STR(csa->hdr),
					ERR_TEXT, 2, LEN_AND_LIT("No progress even with multiple writers"));
				cnt = 0;
			}
		}
	}
	if (save_jnlpool != jnlpool)
		jnlpool = save_jnlpool;
	return SS_NORMAL;
}