1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
/****************************************************************
* *
* Copyright (c) 2001-2016 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_string.h"
#include "gdsroot.h"
#include "gdsblk.h"
#include "min_max.h" /* needed for gdsblkops.h */
#include "gdsblkops.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsfhead.h"
#include "gdscc.h"
#include "gdskill.h"
#include "filestruct.h"
#include "copy.h"
#include "jnl.h"
#include "buddy_list.h" /* needed for tp.h */
#include "hashtab_int4.h" /* needed for tp.h */
#include "tp.h"
#include "gvcst_blk_build.h"
#include "gtmimagename.h"
#include "spec_type.h"
#ifdef DEBUG
GBLREF boolean_t skip_block_chain_tail_check;
#endif
GBLREF unsigned char cw_set_depth;
GBLREF uint4 dollar_tlevel;
GBLREF sgm_info *sgm_info_ptr;
GBLREF sgmnt_addrs *cs_addrs;
GBLREF sgmnt_data_ptr_t cs_data;
GBLREF boolean_t write_after_image;
GBLREF unsigned int t_tries;
#ifdef UNIX
GBLREF jnl_gbls_t jgbl;
#endif
void gvcst_blk_build(cw_set_element *cse, sm_uc_ptr_t base_addr, trans_num ctn)
{
blk_segment *seg, *stop_ptr, *array;
off_chain chain;
sm_uc_ptr_t ptr, ptrtop, c;
sm_ulong_t n;
int4 offset;
trans_num blktn;
# ifdef DEBUG
boolean_t integ_error_found;
rec_hdr_ptr_t rp;
sm_uc_ptr_t chainptr, input_base_addr;
unsigned short nRecLen;
# endif
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
/* For a TP transaction we should reach here with crit as the only function that invokes this is bg_update_phase2
* which operates outside crit. The exceptions to this are DSE (write_after_image is TRUE) or ONLINE ROLLBACK
* which holds crit for the entire duration
*/
assert((dba_bg != cs_data->acc_meth) || dollar_tlevel || !cs_addrs->now_crit || write_after_image
UNIX_ONLY(|| jgbl.onlnrlbk));
assert((dba_mm != cs_data->acc_meth) || dollar_tlevel || cs_addrs->now_crit);
assert(cse->mode != gds_t_writemap);
array = (blk_segment *)cse->upd_addr;
assert(array->len >= SIZEOF(blk_hdr));
assert(array->len <= cs_data->blk_size);
assert((cse->ins_off + SIZEOF(block_id)) <= array->len);
assert((short)cse->index >= 0);
assert(!cse->undo_next_off[0] && !cse->undo_offset[0]);
assert(!cse->undo_next_off[1] && !cse->undo_offset[1]);
DEBUG_ONLY(input_base_addr = base_addr;)
if (base_addr == NULL)
{ /* it's the first private TP build */
assert(dollar_tlevel);
assert(cse->blk_target);
base_addr = cse->new_buff = (unsigned char *)get_new_free_element(sgm_info_ptr->new_buff_list);
cse->first_copy = TRUE;
} else
assert(0 == ((sm_ulong_t)base_addr & 3)); /* word aligned at least */
/* The block-transaction-number is modified before the contents of the block are modified. This is
* done so as to allow a cdb_sc_blkmod check (done in t_qread, gvcst_search, gvcst_put and tp_hist)
* to be done out-of-crit by just checking for the transaction numbers. If the contents of the block
* were modified first, there is a possibility that the block-transaction number didn't get updated
* although the contents of the block may have changed and basing the decision of block-modified on
* just the transaction numbers may not always be correct.
* Note that in mm_update and bg_update there is an else block where instead of gvcst_blk_build(),
* a memcpy is done. To effect the above change, we also need to switch the order of memcpy and
* block-transaction-number-updation in those places.
* Note that a similar change is not needed in gvcst_map_build() because that will never be in the
* search history for any key.
*/
if (!ctn && dollar_tlevel)
{ /* Subtract one so will pass concurrency control for mm databases.
* This block is guaranteed to be in an earlier history from when it was first read,
* so this history is superfluous for concurrency control.
* The correct tn is put in the block in mm_update or bg_update when the block is copied to the database.
*/
ctn = cs_addrs->ti->curr_tn - 1;
}
/* Assert that the block's transaction number is LESS than the transaction number corresponding to the blk build.
* i.e. no one else should have touched the block contents in shared memory from the time we locked this in phase1
* to the time we build it in phase2.
* There are a few exceptions.
* a) With DSE, it is possible to change the block transaction number and then a DSE or MUPIP command can run
* on the above block with the above condition not true.
* b) tp_tend calls gvcst_blk_build for cse's with mode kill_t_write/kill_t_create. For them we build a private
* copy of the block for later use in phase2 of the M-kill. In this case, blktn could be
* uninitialized so cannot do any checks using this value.
* c) For MM, we dont have two phase commits so dont do any checks in that case.
* d) For acquired blocks, it is possible that some process had read in the uninitialized block from disk
* outside of crit (due to concurrency issues). Therefore the buffer could contain garbage. So we cannot
* rely on the buffer contents to determine the block's transaction number.
* e) If a twin is created, we explicitly set its buffer tn to be equal to ctn in phase1.
* But since we are not passed the "cr" in this routine, it is not easily possible to check that.
* Hence in case of twinning, we relax the check so buffertn == ctn is allowed.
*/
DEBUG_ONLY(blktn = ((blk_hdr_ptr_t)base_addr)->tn);
assert(!IS_MCODE_RUNNING || !cs_addrs->t_commit_crit || (dba_bg != cs_data->acc_meth) || (n_gds_t_op < cse->mode)
|| (cse->mode == gds_t_acquired) || ((!cs_data->asyncio && (blktn < ctn)) || (cs_data->asyncio && (blktn <= ctn))));
/* With memory instruction reordering (currently possible only on AIX with the POWER architecture) it is possible
* the early_tn we read in the assert below gets executed BEFORE the curr_tn read that happens a few lines above.
* That could then fail this assert (GTM-8523). Account for that with the AIX_ONLY condition below.
*/
assert((ctn < cs_addrs->ti->early_tn) || write_after_image AIX_ONLY(|| (cs_data->acc_meth == dba_mm)));
((blk_hdr_ptr_t)base_addr)->bver = GDSVCURR;
((blk_hdr_ptr_t)base_addr)->tn = ctn;
((blk_hdr_ptr_t)base_addr)->bsiz = UINTCAST(array->len);
((blk_hdr_ptr_t)base_addr)->levl = cse->level;
if (cse->forward_process)
{
stop_ptr = (blk_segment *)array->addr;
seg = cse->first_copy ? array + 1: array + 2;
ptr = base_addr + SIZEOF(blk_hdr);
if (!cse->first_copy)
ptr += ((blk_segment *)(array + 1))->len;
for ( ; seg <= stop_ptr; )
{
assert(0L <= ((INTPTR_T)seg->len));
DBG_BG_PHASE2_CHECK_CR_IS_PINNED(cs_addrs, seg);
memmove(ptr, seg->addr, seg->len);
ptr += seg->len;
seg++;
}
} else
{
stop_ptr = cse->first_copy ? array : array + 1;
seg = (blk_segment *)array->addr;
ptr = base_addr + array->len;
while (seg != stop_ptr)
{
assert(0L <= ((INTPTR_T)seg->len));
DBG_BG_PHASE2_CHECK_CR_IS_PINNED(cs_addrs, seg);
ptr -= (n = seg->len);
memmove(ptr, seg->addr, n);
seg--;
}
}
if (dollar_tlevel)
{
if (cse->ins_off)
{ /* if the cw set has a reference to resolve, move it to the block */
assert(cse->index < sgm_info_ptr->cw_set_depth);
assert((int)cse->ins_off >= (int)(SIZEOF(blk_hdr) + SIZEOF(rec_hdr)));
assert((int)(cse->next_off + cse->ins_off + SIZEOF(block_id)) <= array->len);
if (cse->first_off == 0)
cse->first_off = cse->ins_off;
chain.flag = 1;
chain.cw_index = cse->index;
chain.next_off = cse->next_off;
ptr = base_addr + cse->ins_off;
GET_LONGP(ptr, &chain);
cse->index = 0;
cse->ins_off = 0;
cse->next_off = 0;
}
# ifdef DEBUG
if (offset = cse->first_off)
{ /* Verify the integrity of the TP chains within a newly created block.
* If it is the first TP private build, the update array could have referenced
* shared memory global buffers which could have been concurrently updated.
* So the integrity of the chain cannot be easily verified. If ever we find
* an integ error in the chain, we check if this is the first private TP build
* and if so allow it but set a debug flag donot_commit so we never ever commit
* this transaction. The hope is that it will instead restart after validation.
*/
ptr = base_addr;
ptrtop = ptr + ((blk_hdr_ptr_t)ptr)->bsiz;
chainptr = ptr + offset;
ptr += SIZEOF(blk_hdr);
integ_error_found = FALSE;
for ( ; ptr < ptrtop; )
{
do
{
GET_USHORT(nRecLen, &((rec_hdr_ptr_t)ptr)->rsiz);
if (0 == nRecLen)
{
assert(NULL == input_base_addr);
integ_error_found = TRUE;
break;
}
c = ptr;
c += SIZEOF(rec_hdr);
/* The *-key does not have a key. Everything else has one. Account for that. */
if (BSTAR_REC_SIZE != nRecLen)
{
for ( ; (c < ptrtop) && ((*c++ != KEY_DELIMITER) || (*c != KEY_DELIMITER)); )
;
if (c >= ptrtop)
{
assert(NULL == input_base_addr);
integ_error_found = TRUE;
break;
}
c++;
}
ptr += nRecLen;
if (c == chainptr)
{
if (((ptr - SIZEOF(off_chain)) != chainptr)
&& ((ptr - SIZEOF(off_chain) - COLL_SPEC_LEN) != chainptr))
{
assert(NULL == input_base_addr);
integ_error_found = TRUE;
}
break;
}
if (c > chainptr)
{
assert(NULL == input_base_addr);
integ_error_found = TRUE;
break;
}
GET_LONGP(&chain, c);
if (chain.flag)
{
assert(NULL == input_base_addr);
integ_error_found = TRUE;
break;
}
} while (ptr < ptrtop);
if (integ_error_found)
break;
if (chainptr < ptrtop)
{
GET_LONGP(&chain, chainptr);
assert(1 == chain.flag || (skip_block_chain_tail_check && (0 == chain.next_off)));
assert(chain.cw_index < sgm_info_ptr->cw_set_depth);
offset = chain.next_off;
if (0 == offset)
chainptr = ptrtop;
else
{
chainptr = chainptr + offset;
assert(chainptr < ptrtop); /* ensure we have not overrun the buffer */
}
}
}
if (integ_error_found)
TREF(donot_commit) |= DONOTCOMMIT_GVCST_BLK_BUILD_TPCHAIN;
else
assert(0 == offset); /* ensure the chain is NULL terminated */
}
# endif
} else
assert(dollar_tlevel || (cse->index < (int)cw_set_depth));
}
|