1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
|
#
# drbd.conf example
#
# parameters you _need_ to change are the hostname, device, disk,
# meta-disk, address and port in the "on <hostname> {}" sections.
#
# you ought to know about the protocol, and the various timeouts.
#
# you probably want to set the rate in the syncer sections
#
# NOTE common pitfall:
# rate is given in units of _byte_ not bit
#
#
# increase timeout and maybe ping-int in net{}, if you see
# problems with "connection lost/connection established"
# (or change your setup to reduce network latency; make sure full
# duplex behaves as such; check average roundtrip times while
# network is saturated; and so on ...)
#
skip {
As you can see, you can also comment chunks of text
with a 'skip[optional nonsense]{ skipped text }' section.
This comes in handy, if you just want to comment out
some 'resource <some name> {...}' section:
just precede it with 'skip'.
The basic format of option assignment is
<option name><linear whitespace><value>;
It should be obvious from the examples below,
but if you really care to know the details:
<option name> :=
valid options in the respective scope
<value> := <num>|<string>|<choice>|...
depending on the set of allowed values
for the respective option.
<num> := [0-9]+, sometimes with an optional suffix of K,M,G
<string> := (<name>|\"([^\"\\\n]*|\\.)*\")+
<name> := [/_.A-Za-z0-9-]+
}
#
# At most ONE global section is allowed.
# It must precede any resource section.
#
global {
# By default we load the module with a minor-count of 32. In case you
# have more devices in your config, the module gets loaded with
# a minor-count that ensures that you have 10 minors spare.
# In case 10 spare minors are too little for you, you can set the
# minor-count exeplicit here. ( Note, in contrast to DRBD-0.7 an
# unused, spare minor has only a very little overhead of allocated
# memory (a single pointer to be exact). )
#
# minor-count 64;
# The user dialog counts and displays the seconds it waited so
# far. You might want to disable this if you have the console
# of your server connected to a serial terminal server with
# limited logging capacity.
# The Dialog will print the count each 'dialog-refresh' seconds,
# set it to 0 to disable redrawing completely. [ default = 1 ]
#
# dialog-refresh 5; # 5 seconds
# You might disable one of drbdadm's sanity check.
# disable-ip-verification;
# Participate in DRBD's online usage counter at http://usage.drbd.org
# possilbe options: ask, yes, no. Default is ask. In case you do not
# know, set it to ask, and follow the on screen instructions later.
usage-count yes;
}
#
# The common section can have all the sections a resource can have but
# not the host section (started with the "on" keyword).
# The common section must precede all resources.
# All resources inherit the settings from the common section.
# Whereas settings in the resources have precedence over the common
# setting.
#
common {
syncer { rate 10M; }
}
#
# this need not be r#, you may use phony resource names,
# like "resource web" or "resource mail", too
#
resource r0 {
# transfer protocol to use.
# C: write IO is reported as completed, if we know it has
# reached _both_ local and remote DISK.
# * for critical transactional data.
# B: write IO is reported as completed, if it has reached
# local DISK and remote buffer cache.
# * for most cases.
# A: write IO is reported as completed, if it has reached
# local DISK and local tcp send buffer. (see also sndbuf-size)
# * for high latency networks
#
#**********
# uhm, benchmarks have shown that C is actually better than B.
# this note shall disappear, when we are convinced that B is
# the right choice "for most cases".
# Until then, always use C unless you have a reason not to.
# --lge
#**********
#
protocol C;
handlers {
# what should be done in case the node is primary, degraded
# (=no connection) and has inconsistent data.
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# The node is currently primary, but lost the after split brain
# auto recovery procedure. As as consequence it should go away.
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# In case you have set the on-io-error option to "call-local-io-error",
# this script will get executed in case of a local IO error. It is
# expected that this script will case a immediate failover in the
# cluster.
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
# Commands to run in case we need to downgrade the peer's disk
# state to "Outdated". Should be implemented by the superior
# communication possibilities of our cluster manager.
# The provided script uses ssh, and is for demonstration/development
# purposis.
# fence-peer "/usr/lib/drbd/outdate-peer.sh on amd 192.168.22.11 192.168.23.11 on alf 192.168.22.12 192.168.23.12";
#
# Update: Now there is a solution that relies on heartbeat's
# communication layers. You should really use this.
fence-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
# For Pacemaker you might use:
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# The node is currently primary, but should become sync target
# after the negotiating phase. Alert someone about this incident.
#pri-lost "/usr/lib/drbd/notify-pri-lost.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
# Notify someone of a split brain immediately, regardless of auto recovery policies.
#initial-split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# Notify someone or maybe do something else if DRBD split brained, was not automatically
# recovered and resource is now sitting disconneted.
#split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# Notify someone in case an online verify run found the backing devices out of sync.
#out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
#
# These two handlers can be used to snapshot sync-target devices
# before for the time of the resync.
# The provided scripts has these options:
# -p | --percent <reserve space in percent of the original volume. Default: 10%>
# -a | --additional <snapshot space in KiB. Default: 10 MiB>
# -n | --disconnect-on-error
# By default the script tells DRBD to do the resync no matter
# if the taking the snapshot works or not.
# If you prefer to drop connection in case taking the snapshot
# failes use the --disconnect-on-error option.
# -v | --verbose
# -- <additional lvcreate options>
#before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
#after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
# Wait for connection timeout.
# The init script blocks the boot process until the resources
# are connected. This is so when the cluster manager starts later,
# it does not see a resource with internal split-brain.
# In case you want to limit the wait time, do it here.
# Default is 0, which means unlimited. Unit is seconds.
#
# wfc-timeout 0;
# Wait for connection timeout if this node was a degraded cluster.
# In case a degraded cluster (= cluster with only one node left)
# is rebooted, this timeout value is used.
#
degr-wfc-timeout 120; # 2 minutes.
# Wait for connection timeout if the peer node is already outdated.
# (Do not set this to 0, since that means unlimited)
#
outdated-wfc-timeout 2; # 2 seconds.
# In case there was a split brain situation the devices will
# drop their network configuration instead of connecting. Since
# this means that the network is working, the cluster manager
# should be able to communicate as well. Therefore the default
# of DRBD's init script is to terminate in this case. To make
# it to continue waiting in this case set this option.
#
# wait-after-sb;
# In case you are using DRBD for GFS/OCFS2 you want that the
# startup script promotes it to primary. Nodenames are also
# possible instead of "both".
# become-primary-on both;
}
disk {
# if the lower level device reports io-error you have the choice of
# "pass_on" -> Report the io-error to the upper layers.
# Primary -> report it to the mounted file system.
# Secondary -> ignore it.
# "call-local-io-error"
# -> Call the script configured by the name "local-io-error".
# "detach" -> The node drops its backing storage device, and
# continues in disk less mode.
#
on-io-error detach;
# Controls the fencing policy, default is "dont-care". Before you
# set any policy you need to make sure that you have a working
# fence-peer handler. Possible values are:
# "dont-care" -> Never call the fence-peer handler. [ DEFAULT ]
# "resource-only" -> Call the fence-peer handler if we primary and
# loose the connection to the secondary. As well
# whenn a unconnected secondary wants to become
# primary.
# "resource-and-stonith"
# -> Calls the fence-peer handler and freezes local
# IO immediately after loss of connection. This is
# necessary if your heartbeat can STONITH the other
# node.
# fencing resource-only;
# In case you only want to use a fraction of the available space
# you might use the "size" option here.
#
# size 10G;
# In case you are sure that your storage subsystem has battery
# backed up RAM and you know from measurements that it really honors
# flush instructions by flushing data out from its non volatile
# write cache to disk, you have double security. You might then
# reduce this to single security by disabling disk flushes with
# this option. It might improve performance in this case.
# ONLY USE THIS OPTION IF YOU KNOW WHAT YOU ARE DOING.
# no-disk-flushes;
# no-md-flushes;
# In some special circumstances the device mapper stack manages to
# pass BIOs to DRBD that violate the constraints that are set forth
# by DRBD's merge_bvec() function and which have more than one bvec.
# A known example is:
# phys-disk -> DRBD -> LVM -> Xen -> missaligned partition (63) -> DomU FS
# Then you might see "bio would need to, but cannot, be split:" in
# the Dom0's kernel log.
# The best workaround is to proper align the partition within
# the VM (E.g. start it at sector 1024). (Costs 480 KiByte of storage)
# Unfortunately the default of most Linux partitioning tools is
# to start the first partition at an odd number (63). Therefore
# most distribution's install helpers for virtual linux machines will
# end up with missaligned partitions.
# The second best workaround is to limit DRBD's max bvecs per BIO
# (= max-bio-bvecs) to 1. (Costs performance).
# max-bio-bvecs 1;
}
net {
# this is the size of the tcp socket send buffer
# increase it _carefully_ if you want to use protocol A over a
# high latency network with reasonable write throughput.
# defaults to 2*65535; you might try even 1M, but if your kernel or
# network driver chokes on that, you have been warned.
# sndbuf-size 512k;
# timeout 60; # 6 seconds (unit = 0.1 seconds)
# connect-int 10; # 10 seconds (unit = 1 second)
# ping-int 10; # 10 seconds (unit = 1 second)
# ping-timeout 5; # 500 ms (unit = 0.1 seconds)
# Maximal number of requests (4K) to be allocated by DRBD.
# The minimum is hardcoded to 32 (=128 kByte).
# For high performance installations it might help if you
# increase that number. These buffers are used to hold
# datablocks while they are written to disk.
#
# max-buffers 2048;
# When the number of outstanding requests on a standby (secondary)
# node exceeds bdev-threshold, we start to kick the backing device
# to start its request processing. This is an advanced tuning
# parameter to get more performance out of capable storage controlers.
# Some controlers like to be kicked often, other controlers
# deliver better performance when they are kicked less frequently.
# Set it to the value of max-buffers to get the least possible
# number of run_task_queue_disk() / q->unplug_fn(q) calls.
#
# unplug-watermark 128;
# The highest number of data blocks between two write barriers.
# If you set this < 10 you might decrease your performance.
# max-epoch-size 2048;
# if some block send times out this many times, the peer is
# considered dead, even if it still answers ping requests.
# ko-count 4;
# If you want to use OCFS2/openGFS on top of DRBD enable
# this optione, and only enable it if you are going to use
# one of these filesystems. Do not enable it for ext2,
# ext3,reiserFS,XFS,JFS etc...
# allow-two-primaries;
# This enables peer authentication. Without this everybody
# on the network could connect to one of your DRBD nodes with
# a program that emulates DRBD's protocoll and could suck off
# all your data.
# Specify one of the kernel's digest algorithms, e.g.:
# md5, sha1, sha256, sha512, wp256, wp384, wp512, michael_mic ...
# an a shared secret.
# Authentication is only done once after the TCP connection
# is establised, there are no disadvantages from using authentication,
# therefore I suggest to enable it in any case.
# cram-hmac-alg "sha1";
# shared-secret "FooFunFactory";
# In case the nodes of your cluster nodes see each other again, after
# an split brain situation in which both nodes where primary
# at the same time, you have two diverged versions of your data.
#
# In case both nodes are secondary you can control DRBD's
# auto recovery strategy by the "after-sb-0pri" options. The
# default is to disconnect.
# "disconnect" ... No automatic resynchronisation, simply disconnect.
# "discard-younger-primary"
# Auto sync from the node that was primary before
# the split brain situation happened.
# "discard-older-primary"
# Auto sync from the node that became primary
# as second during the split brain situation.
# "discard-least-changes"
# Auto sync from the node that touched more
# blocks during the split brain situation.
# "discard-node-NODENAME"
# Auto sync _to_ the named node.
after-sb-0pri disconnect;
# In one of the nodes is already primary, then the auto-recovery
# strategie is controled by the "after-sb-1pri" options.
# "disconnect" ... always disconnect
# "consensus" ... discard the version of the secondary if the outcome
# of the "after-sb-0pri" algorithm would also destroy
# the current secondary's data. Otherwise disconnect.
# "violently-as0p" Always take the decission of the "after-sb-0pri"
# algorithm. Even if that causes case an erratic change
# of the primarie's view of the data.
# This is only usefull if you use an 1node FS (i.e.
# not OCFS2 or GFS) with the allow-two-primaries
# flag, _AND_ you really know what you are doing.
# This is DANGEROUS and MAY CRASH YOUR MACHINE if you
# have a FS mounted on the primary node.
# "discard-secondary"
# discard the version of the secondary.
# "call-pri-lost-after-sb" Always honour the outcome of the "after-sb-0pri"
# algorithm. In case it decides the the current
# secondary has the right data, it panics the
# current primary.
# "suspend-primary" ???
after-sb-1pri disconnect;
# In case both nodes are primary you control DRBD's strategy by
# the "after-sb-2pri" option.
# "disconnect" ... Go to StandAlone mode on both sides.
# "violently-as0p" Always take the decission of the "after-sb-0pri".
# "call-pri-lost-after-sb" ... Honor the outcome of the "after-sb-0pri"
# algorithm and panic the other node.
after-sb-2pri disconnect;
# To solve the cases when the outcome of the resync descissions is
# incompatible to the current role asignment in the cluster.
# "disconnect" ... No automatic resynchronisation, simply disconnect.
# "violently" .... Sync to the primary node is allowed, violating the
# assumption that data on a block device is stable
# for one of the nodes. DANGEROUS, DO NOT USE.
# "call-pri-lost" Call the "pri-lost" helper program on one of the
# machines. This program is expected to reboot the
# machine. (I.e. make it secondary.)
rr-conflict disconnect;
# DRBD-0.7's behaviour is equivalent to
# after-sb-0pri discard-younger-primary;
# after-sb-1pri consensus;
# after-sb-2pri disconnect;
# DRBD can ensure the data integrity of the user's data on the network
# by comparing hash values.
# Note: Normally this is ensured by the 16 bit checksums in the headers
# of TCP/IP packets. Unforunately it turned out that GBit NICs with
# various offloading engines might produce valid checksums for corrupted
# data. Use this option during your pre-production tests, usually you
# want to turn it off for production to reduce CPU overhead.
# Note2: If data blocks that gets written to disk are changed while the
# transfer goes on cause false positives. Known block device users which
# do so are the swap code and ReiserFS
# data-integrity-alg "md5";
# DRBD usually uses the TCP socket option TCP_CORK to hint to the network
# stack when it can expect more data, and when it should flush out what it
# has in its send queue. It turned out that there is at lease one network
# stack that performs worse when one uses this hinting method. Therefore
# we introducted this option, which disable the setting and clearing of
# the TCP_CORK socket option by DRBD.
# no-tcp-cork;
}
syncer {
# Limit the bandwith used by the resynchronisation process.
# default unit is kByte/sec; optional suffixes K,M,G are allowed.
#
# Even though this is a network setting, the units are based
# on _byte_ (octet for our french friends) not bit.
# We are storage guys.
#
# Note that on 100Mbit ethernet, you cannot expect more than
# 12.5 MByte total transfer rate.
# Consider using GigaBit Ethernet.
#
rate 10M;
# Normally all devices are resynchronized parallel.
# To achieve better resynchronisation performance you should resync
# DRBD resources which have their backing storage on one physical
# disk sequentially. The express this use the "after" keyword.
after "r2";
# Configures the size of the active set. Each extent is 4M,
# 257 Extents ~> 1GB active set size. In case your syncer
# runs @ 10MB/sec, all resync after a primary's crash will last
# 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds.
# BTW, the hash algorithm works best if the number of al-extents
# is prime. (To test the worst case performace use a power of 2)
al-extents 257;
# Sets the CPU affinity mask of DRBD's threads. Might be of interest
# for advanced performance tuning.
# cpu-mask 15;
# Alternatively to the fixed resync rate DRBD has a resync speed
# controller. Its purpose is to resync as fast as possible, without
# filling up queues along the data path.
# The controller gets enabled with setting c-plan-ahead to a value
# greater than 0 (by default 0, i.e. it is disabled).
#c-plan-ahead 10; # How long the controller should plan ahead (unit = 0.1 seconds)
#c-fill-target 0; # Aimed fill level. 0 to use c-delay-target (unit = sectors)
#c-delay-target 6; # Aimed delay by the drbd-proxy's fill level (unit = 0.1 seconds)
#c-max-rate 100M; # Upper bound for the controller
#c-min-rate 4M; # below this rate, application IO will not cause extra throttling of resync
# If the local disk and the connection to the peer fail cuncurrently,
# DRBD fails IO requests by default. Alternatively you can set this to
# "suspend-io".
on-no-data-accessible io-error;
}
on amd {
device /dev/drbd0;
disk /dev/hde5;
address 192.168.22.11:7788;
flexible-meta-disk internal;
# meta-disk is either 'internal' or '/dev/ice/name [idx]'
#
# You can use a single block device to store meta-data
# of multiple DRBD's.
# E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1];
# for two different resources. In this case the meta-disk
# would need to be at least 256 MB in size.
#
# 'internal' means, that the last 128 MB of the lower device
# are used to store the meta-data.
# You must not give an index with 'internal'.
}
on alf {
device /dev/drbd0;
disk /dev/hdc5;
address 192.168.22.12:7788;
meta-disk internal;
}
}
#
# yes, you may also quote the resource name.
# but don't include whitespace, unless you mean it :)
#
resource "r1" {
protocol C;
startup {
wfc-timeout 0; ## Infinite!
degr-wfc-timeout 120; ## 2 minutes.
}
disk {
on-io-error detach;
}
net {
# timeout 60;
# connect-int 10;
# ping-int 10;
# max-buffers 2048;
# max-epoch-size 2048;
}
syncer {
}
# It is valid to move device, disk and meta-disk to the
# resource level.
device /dev/drbd1;
disk /dev/hde6;
meta-disk /dev/somewhere [7];
on amd {
# Here is an example of ipv6.
# If you want to use ipv4 in ipv6 i.e. something like [::ffff:192.168.22.11]
# you have to set disable-ip-verification in the global section.
address ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5c9a]:7789;
}
on alf {
address ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5ebe]:7789;
}
}
resource r2 {
protocol C;
startup { wfc-timeout 0; degr-wfc-timeout 120; }
disk { on-io-error detach; }
net { timeout 60; connect-int 10; ping-int 10;
max-buffers 2048; max-epoch-size 2048; }
syncer { rate 4M; } # sync when r0 and r1 are finished syncing.
on amd {
address 192.168.22.11:7790;
disk /dev/hde7; device /dev/drbd2; meta-disk "internal";
}
on alf {
device "/dev/drbd2"; disk "/dev/hdc7"; meta-disk "internal";
address 192.168.22.12:7790;
}
}
resource r3 {
protocol C;
device /dev/drbd3;
on amd {
disk /dev/hde8;
address 192.168.22.11:7791;
meta-disk internal;
}
on alf {
disk /dev/hdc8;
address 192.168.22.12:7791;
meta-disk /some/where[8];
}
}
resource r4 {
protocol C;
device minor 4;
on amd {
disk /dev/hde9;
address 192.168.22.11:7792;
meta-disk internal;
}
on alf {
disk /dev/hdc9;
address 192.168.22.12:7792;
meta-disk /some/where[9];
}
}
resource lower-alice-bob {
protocol C;
on alice {
device /dev/drbd4;
disk /dev/hde9;
address 192.168.23.11:7791;
meta-disk internal;
}
on bob {
device /dev/drbd4;
disk /dev/hdc9;
address 192.168.23.12:7791;
meta-disk /some/where[8];
}
}
resource lower-charly-daisy {
protocol C;
on charly {
device /dev/drbd4;
disk /dev/hde9;
address 192.168.23.13:7791;
meta-disk internal;
}
on daisy {
device /dev/drbd4;
disk /dev/hdc9;
address 192.168.23.14:7791;
meta-disk /some/where[8];
}
}
resource upper {
protocol A;
stacked-on-top-of lower-alice-bob {
device /dev/drbd10;
address 127.0.0.1:1230;
proxy on alic bob {
inside 127.0.0.1:1234;
outside 192.168.23.21:7791;
}
}
stacked-on-top-of lower-charly-daisy {
device /dev/drbd10;
address 127.0.0.1:1230;
proxy on charly daisy {
inside 127.0.0.1:1234;
outside 192.168.23.22:7791;
}
}
}
|