1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
/****************************************************************
* *
* Copyright (c) 2009-2017 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#ifdef GTM_FD_TRACE
#include "gtm_stat.h"
#include "gtm_string.h"
#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "gdscc.h"
#include "jnl.h"
#include "eintr_wrappers.h"
#include "dpgbldir.h"
#include "gtm_dbjnl_dupfd_check.h"
#include "error.h"
#include "send_msg.h"
#include "is_file_identical.h"
#define MAX_FD_FOR_FASTCHECK 256
GBLDEF gd_region *dupfd_check_reg; /* for debugging purposes */
GBLDEF int dupfd_check_fd; /* for debugging purposes */
GBLDEF fdinfo_t *dupfd_check_openfdarray; /* for debugging purposes */
error_def(ERR_GVFAILCORE);
/* Before fixing corrupt jnl fd take a core dump and send syslog message to ensure it gets analyzed */
#define FIX_CORRUPT_JNLFD(REG) \
{ \
jnl_private_control *jpc; \
\
assert(FALSE); \
gtm_fork_n_core(); \
send_msg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_GVFAILCORE); \
jpc = FILE_INFO(REG)->s_addrs.jnl; \
jpc->channel = NOJNL; \
jpc->cycle--; \
jpc->pini_addr = 0; \
}
boolean_t gtm_check_fd_is_valid(gd_region *reg, boolean_t is_db, int fd)
{
struct stat stat_buf;
sgmnt_addrs *csa;
int fstat_res;
FSTAT_FILE(fd, &stat_buf, fstat_res);
assertpro(-1 != fstat_res);
assert(reg->open);
if (is_db)
assertpro(is_gdid_stat_identical(&FILE_ID(reg), &stat_buf)); /* db fd does not corespond back to itself */
else
{
csa = &FILE_INFO(reg)->s_addrs;
/* If fd does not point back to journal file, it could be because of a concurrent journal switch.
* Check that. If that fails as well, go ahead and fix the journal file descriptor.
*/
if (!is_gdid_stat_identical(JNL_GDID_PTR(csa), &stat_buf) && !JNL_FILE_SWITCHED(csa->jnl))
{
FIX_CORRUPT_JNLFD(reg); /* Journal file fd is corrupt. Fix it. */
return FALSE;
}
}
return TRUE;
}
void gtm_dupfd_check_specific(gd_region *reg, fdinfo_t *open_fdarray, int fd, boolean_t is_db)
{
gd_region *db_reg, *jnl_reg;
int fstat_res;
struct stat stat_buf;
/* Record key local variables in globals in case we take an assertpro and need to analyze the pro core */
dupfd_check_fd = fd;
dupfd_check_reg = reg;
dupfd_check_openfdarray = open_fdarray;
assertpro(0 <= fd);
if (MAX_FD_FOR_FASTCHECK > fd)
{ /* fd is within fastcheck range. We assume the first fd that fills the array is valid and skip the
* heavyweight fstat check. For dbg builds though, we do this check just so that code is exercised as well.
*/
assert((NULL != open_fdarray[fd].reg) || gtm_check_fd_is_valid(reg, is_db, fd));
if (NULL != open_fdarray[fd].reg)
{
assertpro(!(is_db && open_fdarray[fd].is_db)); /* Cannot do much to recover from 2 DBs with SAME the fd */
/* The fds of one region's database and another region's journal collide.
* Check if db fd is indeed valid and if so close the journal's fd.
* If db fd is not valid, then cannot do much to recover from this situation.
*/
FSTAT_FILE(fd, &stat_buf, fstat_res);
assertpro(-1 != fstat_res);
if (is_db)
{
db_reg = reg;
jnl_reg = open_fdarray[fd].reg;
} else
{
db_reg = open_fdarray[fd].reg;
jnl_reg = reg;
}
assertpro(is_gdid_stat_identical(&FILE_ID(db_reg), &stat_buf)); /* fd doesn't lead back to DB; corrupted! */
/* fd corresponds back to the database which means the jnl file structure is corrupt which can be fixed */
FIX_CORRUPT_JNLFD(jnl_reg);
if (!is_db) /* Entry in open_fdarray[fd] is correct. So return without updating it (to the wrong value) */
return;
}
open_fdarray[fd].reg = reg;
open_fdarray[fd].is_db = is_db;
} else
{ /* fd is outside the fast check range. no other go but check that fd is indeed valid (using heavyweight fstat) */
gtm_check_fd_is_valid(reg, is_db, fd);
}
}
/* This routine is a debugging tool written to detect the symptom of D9I11-002714 before any damage to the database occurs.
* It checks all open db and jnl file descriptors and identifies any duplicates and if so creates a core file for analysis.
*/
void gtm_dbjnl_dupfd_check(void)
{
fdinfo_t open_fdarray[MAX_FD_FOR_FASTCHECK];
gd_addr *addr_ptr;
gd_region *r_top, *reg;
gd_segment *seg;
int fd;
sgmnt_addrs *csa;
unix_db_info *udi;
memset(open_fdarray, 0, SIZEOF(open_fdarray));
for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr))
{
for (reg = addr_ptr->regions, r_top = reg + addr_ptr->n_regions; reg < r_top; reg++)
{
seg = reg->dyn.addr;
if (!IS_ACC_METH_BG_OR_MM(seg->acc_meth))
continue;
if (!reg->open || reg->was_open)
continue;
udi = FILE_INFO(reg);
/* Check DB first */
fd = udi->fd;
gtm_dupfd_check_specific(reg, open_fdarray, fd, TRUE);
/* Check JNL next */
csa = &udi->s_addrs;
if (JNL_ALLOWED(csa))
{
fd = csa->jnl->channel;
if (NOJNL != fd)
gtm_dupfd_check_specific(reg, open_fdarray, fd, FALSE);
}
}
}
}
#endif
|