1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2001-2002
* Sleepycat Software. All rights reserved.
*/
#ifndef _REP_H_
#define _REP_H_
#define REP_ALIVE 1 /* I am alive message. */
#define REP_ALIVE_REQ 2 /* Request for alive messages. */
#define REP_ALL_REQ 3 /* Request all log records greater than LSN. */
#define REP_ELECT 4 /* Indicates that all listeners should */
/* begin master election */
#define REP_FILE 6 /* Page of a database file. */
#define REP_FILE_REQ 7 /* Request for a database file. */
#define REP_LOG 8 /* Log record. */
#define REP_LOG_MORE 9 /* There are more log records to request. */
#define REP_LOG_REQ 10 /* Request for a log record. */
#define REP_MASTER_REQ 11 /* Who is the master */
#define REP_NEWCLIENT 12 /* Announces the presence of a new client. */
#define REP_NEWFILE 13 /* Announce a log file change. */
#define REP_NEWMASTER 14 /* Announces who the master is. */
#define REP_NEWSITE 15 /* Announces that a site has heard from a new
* site; like NEWCLIENT, but indirect. A
* NEWCLIENT message comes directly from the new
* client while a NEWSITE comes indirectly from
* someone who heard about a NEWSITE.
*/
#define REP_PAGE 16 /* Database page. */
#define REP_PAGE_REQ 17 /* Request for a database page. */
#define REP_PLIST 18 /* Database page list. */
#define REP_PLIST_REQ 19 /* Request for a page list. */
#define REP_VERIFY 20 /* A log record for verification. */
#define REP_VERIFY_FAIL 21 /* The client is outdated. */
#define REP_VERIFY_REQ 22 /* Request for a log record to verify. */
#define REP_VOTE1 23 /* Send out your information for an election. */
#define REP_VOTE2 24 /* Send a "you are master" vote. */
/* Used to consistently designate which messages ought to be received where. */
#define MASTER_ONLY(dbenv) \
if (!F_ISSET(dbenv, DB_ENV_REP_MASTER)) return (EINVAL)
#define CLIENT_ONLY(dbenv) \
if (!F_ISSET(dbenv, DB_ENV_REP_CLIENT)) return (EINVAL)
#define ANYSITE(dbenv)
/* Shared replication structure. */
typedef struct __rep {
/*
* Due to alignment constraints on some architectures (e.g. HP-UX),
* DB_MUTEXes must be the first element of shalloced structures,
* and as a corollary there can be only one per structure. Thus,
* db_mutex_off points to a mutex in a separately-allocated chunk.
*/
DB_MUTEX mutex; /* Region lock. */
roff_t db_mutex_off; /* Client database mutex. */
u_int32_t tally_off; /* Offset of the tally region. */
int eid; /* Environment id. */
int master_id; /* ID of the master site. */
u_int32_t gen; /* Replication generation number */
int asites; /* Space allocated for sites. */
int nsites; /* Number of sites in group. */
int priority; /* My priority in an election. */
u_int32_t gbytes; /* Limit on data sent in single... */
u_int32_t bytes; /* __rep_process_message call. */
#define DB_REP_REQUEST_GAP 4
#define DB_REP_MAX_GAP 128
u_int32_t request_gap; /* # of records to receive before we
* request a missing log record. */
u_int32_t max_gap; /* Maximum number of records before
* requesting a missing log record. */
/* Vote tallying information. */
int sites; /* Sites heard from. */
int winner; /* Current winner. */
int w_priority; /* Winner priority. */
u_int32_t w_gen; /* Winner generation. */
DB_LSN w_lsn; /* Winner LSN. */
int w_tiebreaker; /* Winner tiebreaking value. */
int votes; /* Number of votes for this site. */
/* Statistics. */
DB_REP_STAT stat;
#define REP_F_EPHASE1 0x01 /* In phase 1 of election. */
#define REP_F_EPHASE2 0x02 /* In phase 2 of election. */
#define REP_F_LOGSONLY 0x04 /* Log-site only; cannot be upgraded. */
#define REP_F_MASTER 0x08 /* Master replica. */
#define REP_F_RECOVER 0x10
#define REP_F_UPGRADE 0x20 /* Upgradeable replica. */
#define REP_ISCLIENT (REP_F_UPGRADE | REP_F_LOGSONLY)
u_int32_t flags;
} REP;
#define IN_ELECTION(R) F_ISSET((R), REP_F_EPHASE1 | REP_F_EPHASE2)
#define ELECTION_DONE(R) F_CLR((R), REP_F_EPHASE1 | REP_F_EPHASE2)
/*
* Per-process replication structure.
*/
struct __db_rep {
DB_MUTEX *mutexp;
DB_MUTEX *db_mutexp; /* Mutex for bookkeeping database. */
DB *rep_db; /* Bookkeeping database. */
REP *region; /* In memory structure. */
int (*rep_send) /* Send function. */
__P((DB_ENV *,
const DBT *, const DBT *, int, u_int32_t));
};
/*
* Control structure for replication communication infrastructure.
*
* Note that the version information should be at the beginning of the
* structure, so that we can rearrange the rest of it while letting the
* version checks continue to work. DB_REPVERSION should be revved any time
* the rest of the structure changes.
*/
typedef struct __rep_control {
#define DB_REPVERSION 1
u_int32_t rep_version; /* Replication version number. */
u_int32_t log_version; /* Log version number. */
DB_LSN lsn; /* Log sequence number. */
u_int32_t rectype; /* Message type. */
u_int32_t gen; /* Generation number. */
u_int32_t flags; /* log_put flag value. */
} REP_CONTROL;
/* Election vote information. */
typedef struct __rep_vote {
int priority; /* My site's priority. */
int nsites; /* Number of sites I've been in
* communication with. */
int tiebreaker; /* Tie-breaking quasi-random int. */
} REP_VOTE_INFO;
/*
* This structure takes care of representing a transaction.
* It holds all the records, sorted by page number so that
* we can obtain locks and apply updates in a deadlock free
* order.
*/
typedef struct __lsn_page {
DB_LSN lsn;
u_int32_t fid;
DB_LOCK_ILOCK pgdesc;
#define LSN_PAGE_NOLOCK 0x0001 /* No lock necessary for log rec. */
u_int32_t flags;
} LSN_PAGE;
typedef struct __txn_recs {
int npages;
int nalloc;
LSN_PAGE *array;
u_int32_t txnid;
u_int32_t lockid;
} TXN_RECS;
typedef struct __lsn_collection {
int nlsns;
int nalloc;
DB_LSN *array;
} LSN_COLLECTION;
/*
* This is used by the page-prep routines to do the lock_vec call to
* apply the updates for a single transaction or a collection of
* transactions.
*/
typedef struct _linfo {
int n;
DB_LOCKREQ *reqs;
DBT *objs;
} linfo_t;
#include "dbinc_auto/rep_ext.h"
#endif /* !_REP_H_ */
|