1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
|
/** @file dbclient_rs.h Connect to a Replica Set, from C++ */
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../pch.h"
#include "dbclient.h"
namespace mongo {
class ReplicaSetMonitor;
typedef shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorPtr;
typedef pair<set<string>,set<int> > NodeDiff;
/**
* manages state about a replica set for client
* keeps tabs on whose master and what slaves are up
* can hand a slave to someone for SLAVE_OK
* one instace per process per replica set
* TODO: we might be able to use a regular Node * to avoid _lock
*/
class ReplicaSetMonitor {
public:
typedef boost::function1<void,const ReplicaSetMonitor*> ConfigChangeHook;
/**
* gets a cached Monitor per name or will create if doesn't exist
*/
static ReplicaSetMonitorPtr get( const string& name , const vector<HostAndPort>& servers );
/**
* gets a cached Monitor per name or will return none if it doesn't exist
*/
static ReplicaSetMonitorPtr get( const string& name );
/**
* checks all sets for current master and new secondaries
* usually only called from a BackgroundJob
*/
static void checkAll( bool checkAllSecondaries );
/**
* this is called whenever the config of any repclia set changes
* currently only 1 globally
* asserts if one already exists
* ownership passes to ReplicaSetMonitor and the hook will actually never be deleted
*/
static void setConfigChangeHook( ConfigChangeHook hook );
~ReplicaSetMonitor();
/** @return HostAndPort or throws an exception */
HostAndPort getMaster();
/**
* notify the monitor that server has faild
*/
void notifyFailure( const HostAndPort& server );
/** @return prev if its still ok, and if not returns a random slave that is ok for reads */
HostAndPort getSlave( const HostAndPort& prev );
/** @return a random slave that is ok for reads */
HostAndPort getSlave();
/**
* notify the monitor that server has faild
*/
void notifySlaveFailure( const HostAndPort& server );
/**
* checks for current master and new secondaries
*/
void check( bool checkAllSecondaries );
string getName() const { return _name; }
string getServerAddress() const;
bool contains( const string& server ) const;
void appendInfo( BSONObjBuilder& b ) const;
private:
/**
* This populates a list of hosts from the list of seeds (discarding the
* seed list).
* @param name set name
* @param servers seeds
*/
ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers );
/**
* Checks all connections from the host list and sets the current
* master.
*
* @param checkAllSecondaries if set to false, stop immediately when
* the master is found or when _master is not -1.
*/
void _check( bool checkAllSecondaries );
/**
* Use replSetGetStatus command to make sure hosts in host list are up
* and readable. Sets Node::ok appropriately.
*/
void _checkStatus( const string& hostAddr );
/**
* Add array of hosts to host list. Doesn't do anything if hosts are
* already in host list.
* @param hostList the list of hosts to add
* @param changed if new hosts were added
*/
void _checkHosts(const BSONObj& hostList, bool& changed);
/**
* Updates host list.
* Invariant: if nodesOffset is >= 0, _nodes[nodesOffset].conn should be
* equal to conn.
*
* @param conn the connection to check
* @param maybePrimary OUT
* @param verbose
* @param nodesOffset - offset into _nodes array, -1 for not in it
*
* @return true if the connection is good or false if invariant
* is broken
*/
bool _checkConnection( DBClientConnection* conn, string& maybePrimary,
bool verbose, int nodesOffset );
string _getServerAddress_inlock() const;
NodeDiff _getHostDiff_inlock( const BSONObj& hostList );
bool _shouldChangeHosts( const BSONObj& hostList, bool inlock );
/**
* @return the index to _nodes corresponding to the server address.
*/
int _find( const string& server ) const ;
int _find_inlock( const string& server ) const ;
/**
* Checks whether the given connection matches the connection stored in _nodes.
* Mainly used for sanity checking to confirm that nodeOffset still
* refers to the right connection after releasing and reacquiring
* a mutex.
*/
bool _checkConnMatch_inlock( DBClientConnection* conn, size_t nodeOffset ) const;
// protects _nodes and indices pointing to it (_master & _nextSlave)
mutable mongo::mutex _lock;
/**
* "Synchronizes" the _checkConnection method. Should ideally be one mutex per
* connection object being used. The purpose of this lock is to make sure that
* the reply from the connection the lock holder got is the actual response
* to what it sent.
*
* Deadlock WARNING: never acquire this while holding _lock
*/
mutable mongo::mutex _checkConnectionLock;
string _name;
struct Node {
Node( const HostAndPort& a , DBClientConnection* c )
: addr( a ) , conn(c) , ok( c != NULL ),
ismaster(false), secondary( false ) , hidden( false ) , pingTimeMillis(0) {
}
bool okForSecondaryQueries() const {
return ok && secondary && ! hidden;
}
BSONObj toBSON() const {
return BSON( "addr" << addr.toString() <<
"isMaster" << ismaster <<
"secondary" << secondary <<
"hidden" << hidden <<
"ok" << ok );
}
string toString() const {
return toBSON().toString();
}
HostAndPort addr;
shared_ptr<DBClientConnection> conn;
// if this node is in a failure state
// used for slave routing
// this is too simple, should make it better
bool ok;
// as reported by ismaster
BSONObj lastIsMaster;
bool ismaster;
bool secondary;
bool hidden;
int pingTimeMillis;
};
/**
* Host list.
*/
vector<Node> _nodes;
int _master; // which node is the current master. -1 means no master is known
int _nextSlave; // which node is the current slave
static mongo::mutex _setsLock; // protects _sets
static map<string,ReplicaSetMonitorPtr> _sets; // set name to Monitor
static ConfigChangeHook _hook;
};
/** Use this class to connect to a replica set of servers. The class will manage
checking for which server in a replica set is master, and do failover automatically.
This can also be used to connect to replica pairs since pairs are a subset of sets
On a failover situation, expect at least one operation to return an error (throw
an exception) before the failover is complete. Operations are not retried.
*/
class DBClientReplicaSet : public DBClientBase {
public:
/** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */
DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers, double so_timeout=0 );
virtual ~DBClientReplicaSet();
/** Returns false if nomember of the set were reachable, or neither is
* master, although,
* when false returned, you can still try to use this connection object, it will
* try reconnects.
*/
bool connect();
/** Authorize. Authorizes all nodes as needed
*/
virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true );
// ----------- simple functions --------------
/** throws userassertion "no master found" */
virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0,
const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 );
/** throws userassertion "no master found" */
virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
virtual void insert( const string &ns , BSONObj obj , int flags=0);
/** insert multiple objects. Note that single object insert is asynchronous, so this version
is only nominally faster and not worth a special effort to try to use. */
virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0);
virtual void remove( const string &ns , Query obj , bool justOne = 0 );
virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 );
virtual void killCursor( long long cursorID );
// ---- access raw connections ----
DBClientConnection& masterConn();
DBClientConnection& slaveConn();
// ---- callback pieces -------
virtual void say( Message &toSend, bool isRetry = false );
virtual bool recv( Message &toRecv );
virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL );
/* this is the callback from our underlying connections to notify us that we got a "not master" error.
*/
void isntMaster();
/* this is used to indicate we got a "not master or secondary" error from a secondary.
*/
void isntSecondary();
// ----- status ------
virtual bool isFailed() const { return ! _master || _master->isFailed(); }
// ----- informational ----
double getSoTimeout() const { return _so_timeout; }
string toString() { return getServerAddress(); }
string getServerAddress() const { return _monitor->getServerAddress(); }
virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; }
virtual bool lazySupported() const { return true; }
// ---- low level ------
virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 );
virtual bool callRead( Message& toSend , Message& response ) { return checkMaster()->callRead( toSend , response ); }
protected:
virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); }
private:
// Used to simplify slave-handling logic on errors
auto_ptr<DBClientCursor> checkSlaveQueryResult( auto_ptr<DBClientCursor> result );
DBClientConnection * checkMaster();
DBClientConnection * checkSlave();
void _auth( DBClientConnection * conn );
ReplicaSetMonitorPtr _monitor;
HostAndPort _masterHost;
scoped_ptr<DBClientConnection> _master;
HostAndPort _slaveHost;
scoped_ptr<DBClientConnection> _slave;
double _so_timeout;
/**
* for storing authentication info
* fields are exactly for DBClientConnection::auth
*/
struct AuthInfo {
AuthInfo( string d , string u , string p , bool di )
: dbname( d ) , username( u ) , pwd( p ) , digestPassword( di ) {}
string dbname;
string username;
string pwd;
bool digestPassword;
};
// we need to store so that when we connect to a new node on failure
// we can re-auth
// this could be a security issue, as the password is stored in memory
// not sure if/how we should handle
list<AuthInfo> _auths;
protected:
/**
* for storing (non-threadsafe) information between lazy calls
*/
class LazyState {
public:
LazyState() : _lastClient( NULL ), _lastOp( -1 ), _slaveOk( false ), _retries( 0 ) {}
DBClientConnection* _lastClient;
int _lastOp;
bool _slaveOk;
int _retries;
} _lazyState;
};
}
|