1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
/****************************************************************
* *
* Copyright (c) 2006-2024 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_socket.h"
#include "gtm_fcntl.h"
#include "gtm_unistd.h"
#include "gtm_inet.h"
#include "gtm_string.h"
#include <sys/time.h>
#include <errno.h>
#include <sys/sem.h>
#include "repl_instance.h"
#include "gdsroot.h"
#include "gdsblk.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "iosp.h"
#include "gtmrecv.h"
#include "repl_dbg.h"
#include "gtm_stdio.h"
#include "repl_shutdcode.h"
#include "repl_sem.h"
#include "is_proc_alive.h"
#include "repl_log.h"
#include "gt_timer.h"
#include "ftok_sems.h"
#include "gtmmsg.h"
#include "repl_msg.h"
#include "gtmsource.h"
#ifdef DEBUG
#include "wbox_test_init.h"
#include "gtmio.h"
#endif
#define GTMRECV_WAIT_FOR_SHUTDOWN (1000 - 1) /* ms, almost 1s */
GBLREF jnlpool_addrs_ptr_t jnlpool;
GBLREF uint4 process_id;
GBLREF recvpool_addrs recvpool;
GBLREF int recvpool_shmid;
GBLREF gtmrecv_options_t gtmrecv_options;
GBLREF boolean_t is_rcvr_server;
GBLREF int gtmrecv_srv_count;
GBLREF void (*call_on_signal)();
GBLREF boolean_t holds_sem[NUM_SEM_SETS][NUM_SRC_SEMS];
error_def(ERR_RECVPOOLSETUP);
error_def(ERR_TEXT);
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status)
{
uint4 savepid;
boolean_t shut_upd_too = FALSE, was_crit;
int status, save_errno;
unix_db_info *udi;
udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);
repl_log(stdout, TRUE, TRUE, "Initiating SHUTDOWN operation on receiver server pid [%ld]\n", process_id);
call_on_signal = NULL; /* So we don't reenter on error */
/* assert that auto shutdown should be invoked only if the current process is a receiver server */
assert(!auto_shutdown || gtmrecv_srv_count);
if (auto_shutdown)
{ /* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */
repl_inst_ftok_sem_lock();
status = grab_sem(RECV, RECV_POOL_ACCESS_SEM);
if (0 > status)
{
save_errno = errno;
repl_log(stderr, TRUE, TRUE, "Error grabbing receive pool control semaphore : %s. "
"Shutdown not complete\n", STRERROR(save_errno));
repl_inst_ftok_sem_release();
return ABNORMAL_SHUTDOWN;
}
} else
{ /* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */
assert(udi->grabbed_ftok_sem);
assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]);
/* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */
assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]);
recvpool.gtmrecv_local->shutdown = SHUTDOWN;
/* Wait for receiver server to die. But before that release ftok semaphore and receive pool access control
* semaphore. This way, other processes (either in this environment or a different one) don't encounter startup
* issues. However, to ensure that a concurrent argument-less rundown doesn't remove these semaphores (in case they
* are orphaned), increment the counter semaphore.
*/
if (0 != (status = incr_sem(RECV, RECV_SERV_COUNT_SEM)))
{
save_errno = errno;
repl_log(stderr, TRUE, TRUE, "Could not acquire Receive Pool counter semaphore : %s. "
"Shutdown did not complete\n", STRERROR(save_errno));
/* Even though we hold the FTOK and RECV_POOL_ACCESS_SEM before entering this function (as ensured by
* asserts above), it is safe to release them in case of a premature error (like this one). The caller
* doesn't rely on the semaphores being held and this function is designed to release these semaphores
* eventually anyways (after gtmrecv_ipc_cleanup())
*/
repl_inst_ftok_sem_release();
status = rel_sem(RECV, RECV_POOL_ACCESS_SEM);
assert(0 == status);
return ABNORMAL_SHUTDOWN;
}
if (0 != (status = rel_sem(RECV, RECV_POOL_ACCESS_SEM)))
{
save_errno = errno;
repl_log(stderr, TRUE, TRUE, "Could not release Receive Pool access control semaphore : %s. "
"Shutdown did not complete\n", STRERROR(save_errno));
repl_inst_ftok_sem_release(); /* see comment above for why this is okay */
status = decr_sem(RECV, RECV_SERV_COUNT_SEM);
assert(0 == status);
return ABNORMAL_SHUTDOWN;
}
repl_inst_ftok_sem_release();
while((SHUTDOWN == (exit_status = recvpool.gtmrecv_local->shutdown))
&& (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid))
&& is_proc_alive(savepid, 0))
SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN);
if (SHUTDOWN == exit_status)
{
if (0 == savepid) /* No Receiver Process */
exit_status = NORMAL_SHUTDOWN;
else /* Receiver Server Crashed */
{
repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n");
exit_status = ABNORMAL_SHUTDOWN;
shut_upd_too = TRUE;
}
}
/* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */
repl_inst_ftok_sem_lock();
# ifdef DEBUG
/* Sleep for a few seconds to test for concurrent argument-less RUNDOWN to ensure that the latter doesn't remove
* the RECV_POOL_ACCESS_SEM under the assumption that it is orphaned.
*/
if (gtm_white_box_test_case_enabled && (WBTEST_LONGSLEEP_IN_REPL_SHUTDOWN == gtm_white_box_test_case_number))
{
DBGFPF((stderr, "GTMRECV_SHUTDOWN is about to start long sleep\n"));
LONG_SLEEP(10);
}
# endif
if (0 != (status = grab_sem(RECV, RECV_POOL_ACCESS_SEM)))
{
save_errno = errno;
repl_log(stderr, TRUE, TRUE, "Could not acquire Receive Pool access control semaphore : %s. "
"Shutdown did not complete\n", STRERROR(save_errno));
repl_inst_ftok_sem_release();
status = decr_sem(RECV, RECV_SERV_COUNT_SEM);
assert(0 == status);
return ABNORMAL_SHUTDOWN;
}
/* Now that semaphores are acquired, decrement the counter semaphore */
if (0 != (status = decr_sem(RECV, RECV_SERV_COUNT_SEM)))
{
save_errno = errno;
repl_log(stderr, TRUE, TRUE, "Could not release Receive Pool counter semaphore : %s. "
"Shutdown did not complete\n", STRERROR(save_errno));
repl_inst_ftok_sem_release();
status = rel_sem(RECV, RECV_POOL_ACCESS_SEM);
assert(0 == status);
return ABNORMAL_SHUTDOWN;
}
}
if (shut_upd_too)
{
gtmrecv_end_helpers(FALSE);
gtmrecv_endupd();
}
/* gtmrecv_ipc_cleanup will not be successful unless receiver server has completely exited.
* It relies on RECV_SERV_COUNT_SEM value.
*/
if (FALSE == gtmrecv_ipc_cleanup(auto_shutdown, &exit_status))
{ /* Release all semaphores */
if (!auto_shutdown)
{
decr_sem(RECV, UPD_PROC_COUNT_SEM);
decr_sem(RECV, RECV_SERV_COUNT_SEM);
}
rel_sem_immediate( RECV, RECV_POOL_ACCESS_SEM);
} else
{ /* Receive Pool and Access Control Semaphores removed. Invalidate corresponding fields in file header */
assert(!udi->s_addrs.hold_onto_crit);
was_crit = udi->s_addrs.now_crit;
/* repl_inst_recvpool_reset inturn invokes repl_inst_flush_filehdr which expects the caller to grab journal pool
* lock if journal pool is available.
*/
if ((NULL != jnlpool->jnlpool_ctl) && !was_crit)
grab_lock(jnlpool->jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY);
repl_inst_recvpool_reset();
if ((NULL != jnlpool->jnlpool_ctl) && !was_crit)
rel_lock(jnlpool->jnlpool_dummy_reg);
}
assert(NULL != jnlpool->jnlpool_ctl);
if (!ftok_sem_release(recvpool.recvpool_dummy_reg,
!jnlpool->jnlpool_ctl->ftok_counter_halted && udi->counter_ftok_incremented, FALSE))
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_RECVPOOLSETUP);
repl_log(stdout, TRUE, TRUE, "Finished SHUTDOWN operation on receiver server pid [%d]\n", process_id);
return (exit_status);
}
static void gtmrecv_stop(boolean_t exit)
{
int status;
status = gtmrecv_shutdown(TRUE, gtmrecv_end1(TRUE)) - NORMAL_SHUTDOWN;
if (exit)
gtmrecv_exit(status);
return;
}
void gtmrecv_sigstop(void)
{
if (is_rcvr_server)
gtmrecv_stop(FALSE);
return;
}
void gtmrecv_autoshutdown(void)
{
gtmrecv_stop(TRUE);
return;
}
|