File: jnl_pool_write.c

package info (click to toggle)
fis-gtm 6.3-007-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 36,284 kB
  • sloc: ansic: 328,861; asm: 5,182; csh: 5,102; sh: 1,918; awk: 291; makefile: 69; sed: 13
file content (165 lines) | stat: -rw-r--r-- 7,015 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/****************************************************************
 *								*
 * Copyright (c) 2007-2017 Fidelity National Information	*
 * Services, Inc. and/or its subsidiaries. All rights reserved.	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include "gtm_string.h"
#include "gtm_inet.h"

#include <stddef.h> /* for offsetof() macro */

#include "gdsroot.h"
#include "gtm_facility.h"
#include "fileinfo.h"
#include "gdsbt.h"
#include "gdsblk.h"
#include "gdsfhead.h"
#include "filestruct.h"
#include "ccp.h"
#include "iosp.h"
#include "jnl.h"
#include "repl_msg.h"
#include "gtmsource.h"
#include "min_max.h"
#include "sleep_cnt.h"
#include "jnl_write.h"
#include "copy.h"
#include "sleep.h"

GBLREF	jnlpool_addrs_ptr_t	jnlpool;
GBLREF	jnl_gbls_t		jgbl;
GBLREF	boolean_t		is_replicator;

/* This function writes the journal records ONLY TO the journal pool.
 *
 * csa 	   : sgmnt_addrs corresponding to region
 * rectype : Record type
 * jnl_rec : This contains fixed part of a variable size record or the complete fixed size records.
 * jfb     : For SET/KILL/ZKILL records entire record is formatted in this.
 */
void	jnl_pool_write(sgmnt_addrs *csa, enum jnl_record_type rectype, jnl_record *jnl_rec, jnl_format_buffer *jfb)
{
	boolean_t		pool_overflow;
	int			max_iters, num_iters, num_participants;
	uint4			dstlen, rlen;
	uint4			jnlpool_size, tot_jrec_len;
	uchar_ptr_t		jnlrecptr;
	jnlpool_addrs_ptr_t	local_jnlpool;
	jnlpool_ctl_ptr_t	jctl;
	jpl_rsrv_struct_t	*jrs;
	uint4			write, write_total;
	qw_off_t		cur_write_addr, end_write_addr;
	gtm_int64_t		wait_write_addr;	/* needed signed because of subtraction happening below */

	assert(is_replicator);
	assert(NULL != csa);
	local_jnlpool = JNLPOOL_FROM(csa);
	assert(NULL != local_jnlpool);
	assert(NULL != jnl_rec);
	assert(IS_VALID_RECTYPES_RANGE(rectype) && IS_REPLICATED(rectype));
	assert(JNL_ENABLED(csa) || REPL_WAS_ENABLED(csa));
	jctl = local_jnlpool->jnlpool_ctl;
	assert(NULL != jctl); /* ensure we haven't yet detached from the jnlpool */
	jrs = &local_jnlpool->jrs;
	jnlpool_size = jctl->jnlpool_size;
	tot_jrec_len = jrs->tot_jrec_len;
	DEBUG_ONLY(jgbl.cu_jnl_index++;)
	if (JRT_TCOM == rectype)
	{	/* If this is a TCOM record, check if this is a multi-region TP transaction. In that case, tp_tend would call
		 * "jnl_pool_write" with a sequence say TSET1, TCOM1, TSET2, TCOM2 (where 1 implies REG1, 2 implies REG2).
		 * But we cannot write this sequence into the journal pool as replication ("repl_sort_tr_buff" etc.) relies on
		 * all TCOMs coming at the end, i.e. the desired order is TSET1, TSET2, TCOM1, TCOM2. So adjust that by noting
		 * down if this TCOM is not the last one and if so skip writing it to the jnlpool and write it only when it is.
		 * Thankfully it is okay to write N copies of the last TCOM record to correspond to each of the N regions since
		 * replication does not care about region-specific information in the TCOM record (e.g. checksum, pini_addr etc.).
		 * All replication cares about is the time, seqno etc. which is all common across all the regions.
		 */
		num_participants = jnl_rec->jrec_tcom.num_participants;
		assert(jrs->num_tcoms < num_participants);
		if (++jrs->num_tcoms != num_participants)
			return;
		max_iters = num_participants;	/* write one TCOM record per region in a loop */
	} else
		max_iters = 1;
	rlen = jnl_rec->prefix.forwptr;
	assert(0 == rlen % JNL_REC_START_BNDRY);
	assert((rlen + SIZEOF(jnldata_hdr_struct)) <= tot_jrec_len);
	pool_overflow = (tot_jrec_len > jnlpool_size);
	write_total = jrs->write_total;
	cur_write_addr = jrs->cur_write_addr;
	for (num_iters = 0; num_iters < max_iters; num_iters++)
	{
		write_total += rlen;
		if (write_total > tot_jrec_len)
		{	/* "tot_jrec_len" (computed in phase1) becomes lesser than "write_total" (computed in phase2).
			 * There is not enough reserved space in the jnlpool to write the transaction's journal records.
			 * Skip writing any more records in the reserved space. A later call to JPL_PHASE2_WRITE_COMPLETE
			 * will know this happened by checking jrs->write_total and will take appropriate action.
			 * But continue "write_total" accumulation (used at end to set jrs->write_total) hence
			 * the "continue" below instead of a "break".
			 */
			assert(FALSE);
			continue;
		}
		assert(cur_write_addr >= jctl->write_addr);
		end_write_addr = cur_write_addr + rlen;
		assert(end_write_addr <= jctl->rsrv_write_addr);
		/* If we cannot fit in this whole transaction in the journal pool, source server will anyways read this
		 * transaction from the journal files. So skip the memcpy onto the jnlpool in the interest of time.
		 */
		if (!pool_overflow)
		{
			assert(!jrs->memcpy_skipped);
			/* Wait for jctl->write_addr to be high so we can go ahead with write
			 * without overflowing/underflowing the pool.
			 */
			wait_write_addr = (gtm_int64_t)end_write_addr - jnlpool_size;
			while ((gtm_int64_t)jctl->write_addr < wait_write_addr)
			{
				JPL_TRACE_PRO(jctl, jnl_pool_write_sleep);
				SLEEP_USEC(1, FALSE);
				/* TODO: Need to handle case of too-many "repl_phase2_cleanup" (and hence "is_proc_alive")
				 * calls by concurrent processes at same time. If we see a lot of the "jnl_pool_write_sleep"
				 * counter value then this will become a priority. For now we expect that counter to be ~ 0.
				 */
				repl_phase2_cleanup(local_jnlpool);
			}
			/* If the database is encrypted, then at this point jfb->buff will contain encrypted
			 * data which we don't want to to push into the jnlpool. Instead, we make use of the
			 * alternate alt_buff which is guaranteed to contain the original unencrypted data.
			 */
			if (jrt_fixed_size[rectype])
				jnlrecptr = (uchar_ptr_t)jnl_rec;
			else if (IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype) && USES_ANY_KEY(csa->hdr))
				jnlrecptr = (uchar_ptr_t)jfb->alt_buff;
			else
				jnlrecptr = (uchar_ptr_t)jfb->buff;
			write = cur_write_addr % jnlpool_size;
			dstlen = jnlpool_size - write;
			assert(rlen < jnlpool_size);	/* Because of "if (tot_jrec_len <= jnlpool_size)" above */
			/* Inspite of the above assert, do a "rlen < jnlpool_size" check below in pro to be safe */
			if (rlen <= dstlen)		/* dstlen & srclen >= rlen  (most frequent case) */
				memcpy(local_jnlpool->jnldata_base + write, jnlrecptr, rlen);
			else if (rlen < jnlpool_size)	/* dstlen < rlen <= jnlpool_size */
			{
				memcpy(local_jnlpool->jnldata_base + write, jnlrecptr, dstlen);
				memcpy(local_jnlpool->jnldata_base, jnlrecptr + dstlen, rlen - dstlen);
			}
		} else
			jrs->memcpy_skipped = TRUE;
		cur_write_addr = end_write_addr;
	}
	assert(end_write_addr > jrs->cur_write_addr);
	jrs->cur_write_addr = end_write_addr;
	jrs->write_total = write_total;
	return;
}