File: wcs_write_in_progress_wait.c

package info (click to toggle)
fis-gtm 7.1-006-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 32,908 kB
  • sloc: ansic: 344,906; asm: 5,184; csh: 4,859; sh: 2,000; awk: 294; makefile: 73; sed: 13
file content (117 lines) | stat: -rw-r--r-- 4,092 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/****************************************************************
 *								*
 * Copyright (c) 2007-2024 Fidelity National Information	*
 * Services, Inc. and/or its subsidiaries. All rights reserved.	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "interlock.h"
#include "gdsbgtr.h"
#include "sleep_cnt.h"
#include "wbox_test_init.h"
#include "copy.h"

/* Include prototypes */
#include "caller_id.h"
#include "send_msg.h"
#include "wcs_sleep.h"
#include "is_proc_alive.h"
#include "wcs_write_in_progress_wait.h"
#include "add_inter.h"
#include "gtm_c_stack_trace.h"
#include "wcs_wt.h"

GBLREF	gd_region		*gv_cur_region;	/* for the LOCK_HIST macro used in LOCK_BUFF_FOR_UPDATE macro */
GBLREF	sgmnt_data_ptr_t	cs_data;
GBLREF	sgmnt_addrs		*cs_addrs;
GBLREF	uint4			process_id;	/* for the LOCK_HIST macro used in LOCK_BUFF_FOR_UPDATE macro */

error_def (ERR_WRITEWAITPID);

/* Waits for a concurrently running write (of a global buffer to disk) to complete.
 *
 * Returns TRUE if write completes within timeout of approx. 1 minute.
 * Returns FALSE otherwise.
 */
boolean_t	wcs_write_in_progress_wait(node_local_ptr_t cnl, cache_rec_ptr_t cr, wbtest_code_t wbox_test_code)
{
	uint4		lcnt;
	int4		n;

	assert(!TWINNING_ON(cs_data));
	for (lcnt = 1; ; lcnt++)
	{	/* the design here is that either this process owns the block, or the writer does.
		 * if the writer does, it must be allowed to finish its write; then it will release the block
		 * and the next LOCK will establish ownership
		 */
		LOCK_BUFF_FOR_UPDATE(cr, n, &cnl->db_latch);
		/* This destroys evidence of writer ownership, but this is really a test that
		 * there was no prior owner. It will only be true if the writer has cleared it.
		 */
		if (OWN_BUFF(n))
			break;
		else
		{
			GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK));
			/* We have noticed the below assert to fail occasionally on some platforms
			 * We suspect it is because of waiting for another writer that is in jnl_fsync
			 * (as part of flushing a global buffer) which takes more than a minute to finish.
			 * To avoid false failures (where the other writer finishes its job in a little over
			 * a minute) we wait for twice the time in the debug version.
			 */
#			ifdef DEBUG
			if ((BUF_OWNER_STUCK == lcnt) && cr->epid)
				GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
#			endif
			if (0 == lcnt % (BUF_OWNER_STUCK DEBUG_ONLY( * 2)))
			{	/* sick of waiting */
				if (0 == cr->dirty)
				{	/* someone dropped something; assume it was the writer and go on */
					LOCK_NEW_BUFF_FOR_UPDATE(cr);
					break;
				} else
				{
					if (cr->epid)
					{	/* Getting the stack can take some time, so send to the syslog first and check
						 * that we are still in the same state after.
						 */
						send_msg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id,
								DEBUG_ONLY(TWICE) PRO_ONLY(ONCE),
								cr->epid, &(cr->blk), DB_LEN_STR(gv_cur_region));
						GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid,
									DEBUG_ONLY(TWICE) PRO_ONLY(ONCE));
						if (cr->dirty && cr->epid && !is_proc_alive(cr->epid, 0))
							return FALSE;
					}
					assert((WBTEST_DB_WRITE_HANG == gtm_white_box_test_case_number)
						|| (WBTEST_EXPECT_IO_HANG == gtm_white_box_test_case_number));
				}
			}
			if (WRITER_STILL_OWNS_BUFF(cr, n))
			{
				if (cs_addrs && cs_addrs->nl)
				{
					assert(cs_addrs->now_crit);
					INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, ms_wip_critsleeps,
							(MAXSLPTIME < lcnt ? MAXSLPTIME : lcnt));
				}
				wcs_sleep(lcnt);
			}
		}
	}	/* end of for loop to control buffer */
	return TRUE;
}