1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
|
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rafthttp
import (
"context"
"net/http"
"sync"
"time"
"go.etcd.io/etcd/client/pkg/v3/transport"
"go.etcd.io/etcd/client/pkg/v3/types"
"go.etcd.io/etcd/raft/v3"
"go.etcd.io/etcd/raft/v3/raftpb"
"go.etcd.io/etcd/server/v3/etcdserver/api/snap"
stats "go.etcd.io/etcd/server/v3/etcdserver/api/v2stats"
"github.com/xiang90/probing"
"go.uber.org/zap"
"golang.org/x/time/rate"
)
type Raft interface {
Process(ctx context.Context, m raftpb.Message) error
IsIDRemoved(id uint64) bool
ReportUnreachable(id uint64)
ReportSnapshot(id uint64, status raft.SnapshotStatus)
}
type Transporter interface {
// Start starts the given Transporter.
// Start MUST be called before calling other functions in the interface.
Start() error
// Handler returns the HTTP handler of the transporter.
// A transporter HTTP handler handles the HTTP requests
// from remote peers.
// The handler MUST be used to handle RaftPrefix(/raft)
// endpoint.
Handler() http.Handler
// Send sends out the given messages to the remote peers.
// Each message has a To field, which is an id that maps
// to an existing peer in the transport.
// If the id cannot be found in the transport, the message
// will be ignored.
Send(m []raftpb.Message)
// SendSnapshot sends out the given snapshot message to a remote peer.
// The behavior of SendSnapshot is similar to Send.
SendSnapshot(m snap.Message)
// AddRemote adds a remote with given peer urls into the transport.
// A remote helps newly joined member to catch up the progress of cluster,
// and will not be used after that.
// It is the caller's responsibility to ensure the urls are all valid,
// or it panics.
AddRemote(id types.ID, urls []string)
// AddPeer adds a peer with given peer urls into the transport.
// It is the caller's responsibility to ensure the urls are all valid,
// or it panics.
// Peer urls are used to connect to the remote peer.
AddPeer(id types.ID, urls []string)
// RemovePeer removes the peer with given id.
RemovePeer(id types.ID)
// RemoveAllPeers removes all the existing peers in the transport.
RemoveAllPeers()
// UpdatePeer updates the peer urls of the peer with the given id.
// It is the caller's responsibility to ensure the urls are all valid,
// or it panics.
UpdatePeer(id types.ID, urls []string)
// ActiveSince returns the time that the connection with the peer
// of the given id becomes active.
// If the connection is active since peer was added, it returns the adding time.
// If the connection is currently inactive, it returns zero time.
ActiveSince(id types.ID) time.Time
// ActivePeers returns the number of active peers.
ActivePeers() int
// Stop closes the connections and stops the transporter.
Stop()
}
// Transport implements Transporter interface. It provides the functionality
// to send raft messages to peers, and receive raft messages from peers.
// User should call Handler method to get a handler to serve requests
// received from peerURLs.
// User needs to call Start before calling other functions, and call
// Stop when the Transport is no longer used.
type Transport struct {
Logger *zap.Logger
DialTimeout time.Duration // maximum duration before timing out dial of the request
// DialRetryFrequency defines the frequency of streamReader dial retrial attempts;
// a distinct rate limiter is created per every peer (default value: 10 events/sec)
DialRetryFrequency rate.Limit
TLSInfo transport.TLSInfo // TLS information used when creating connection
ID types.ID // local member ID
URLs types.URLs // local peer URLs
ClusterID types.ID // raft cluster ID for request validation
Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
Snapshotter *snap.Snapshotter
ServerStats *stats.ServerStats // used to record general transportation statistics
// LeaderStats is used to record transportation statistics with followers when
// performing as leader in raft protocol
LeaderStats *stats.LeaderStats
// ErrorC is used to report detected critical errors, e.g.,
// the member has been permanently removed from the cluster
// When an error is received from ErrorC, user should stop raft state
// machine and thus stop the Transport.
ErrorC chan error
streamRt http.RoundTripper // roundTripper used by streams
pipelineRt http.RoundTripper // roundTripper used by pipelines
mu sync.RWMutex // protect the remote and peer map
remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
peers map[types.ID]Peer // peers map
pipelineProber probing.Prober
streamProber probing.Prober
}
func (t *Transport) Start() error {
var err error
t.streamRt, err = newStreamRoundTripper(t.TLSInfo, t.DialTimeout)
if err != nil {
return err
}
t.pipelineRt, err = NewRoundTripper(t.TLSInfo, t.DialTimeout)
if err != nil {
return err
}
t.remotes = make(map[types.ID]*remote)
t.peers = make(map[types.ID]Peer)
t.pipelineProber = probing.NewProber(t.pipelineRt)
t.streamProber = probing.NewProber(t.streamRt)
// If client didn't provide dial retry frequency, use the default
// (100ms backoff between attempts to create a new stream),
// so it doesn't bring too much overhead when retry.
if t.DialRetryFrequency == 0 {
t.DialRetryFrequency = rate.Every(100 * time.Millisecond)
}
return nil
}
func (t *Transport) Handler() http.Handler {
pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
mux := http.NewServeMux()
mux.Handle(RaftPrefix, pipelineHandler)
mux.Handle(RaftStreamPrefix+"/", streamHandler)
mux.Handle(RaftSnapshotPrefix, snapHandler)
mux.Handle(ProbingPrefix, probing.NewHandler())
return mux
}
func (t *Transport) Get(id types.ID) Peer {
t.mu.RLock()
defer t.mu.RUnlock()
return t.peers[id]
}
func (t *Transport) Send(msgs []raftpb.Message) {
for _, m := range msgs {
if m.To == 0 {
// ignore intentionally dropped message
continue
}
to := types.ID(m.To)
t.mu.RLock()
p, pok := t.peers[to]
g, rok := t.remotes[to]
t.mu.RUnlock()
if pok {
if m.Type == raftpb.MsgApp {
t.ServerStats.SendAppendReq(m.Size())
}
p.send(m)
continue
}
if rok {
g.send(m)
continue
}
if t.Logger != nil {
t.Logger.Debug(
"ignored message send request; unknown remote peer target",
zap.String("type", m.Type.String()),
zap.String("unknown-target-peer-id", to.String()),
)
}
}
}
func (t *Transport) Stop() {
t.mu.Lock()
defer t.mu.Unlock()
for _, r := range t.remotes {
r.stop()
}
for _, p := range t.peers {
p.stop()
}
t.pipelineProber.RemoveAll()
t.streamProber.RemoveAll()
if tr, ok := t.streamRt.(*http.Transport); ok {
tr.CloseIdleConnections()
}
if tr, ok := t.pipelineRt.(*http.Transport); ok {
tr.CloseIdleConnections()
}
t.peers = nil
t.remotes = nil
}
// CutPeer drops messages to the specified peer.
func (t *Transport) CutPeer(id types.ID) {
t.mu.RLock()
p, pok := t.peers[id]
g, gok := t.remotes[id]
t.mu.RUnlock()
if pok {
p.(Pausable).Pause()
}
if gok {
g.Pause()
}
}
// MendPeer recovers the message dropping behavior of the given peer.
func (t *Transport) MendPeer(id types.ID) {
t.mu.RLock()
p, pok := t.peers[id]
g, gok := t.remotes[id]
t.mu.RUnlock()
if pok {
p.(Pausable).Resume()
}
if gok {
g.Resume()
}
}
func (t *Transport) AddRemote(id types.ID, us []string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.remotes == nil {
// there's no clean way to shutdown the golang http server
// (see: https://github.com/golang/go/issues/4674) before
// stopping the transport; ignore any new connections.
return
}
if _, ok := t.peers[id]; ok {
return
}
if _, ok := t.remotes[id]; ok {
return
}
urls, err := types.NewURLs(us)
if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
}
}
t.remotes[id] = startRemote(t, urls, id)
if t.Logger != nil {
t.Logger.Info(
"added new remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("remote-peer-id", id.String()),
zap.Strings("remote-peer-urls", us),
)
}
}
func (t *Transport) AddPeer(id types.ID, us []string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.peers == nil {
panic("transport stopped")
}
if _, ok := t.peers[id]; ok {
return
}
urls, err := types.NewURLs(us)
if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
}
}
fs := t.LeaderStats.Follower(id.String())
t.peers[id] = startPeer(t, urls, id, fs)
addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
if t.Logger != nil {
t.Logger.Info(
"added remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("remote-peer-id", id.String()),
zap.Strings("remote-peer-urls", us),
)
}
}
func (t *Transport) RemovePeer(id types.ID) {
t.mu.Lock()
defer t.mu.Unlock()
t.removePeer(id)
}
func (t *Transport) RemoveAllPeers() {
t.mu.Lock()
defer t.mu.Unlock()
for id := range t.peers {
t.removePeer(id)
}
}
// the caller of this function must have the peers mutex.
func (t *Transport) removePeer(id types.ID) {
if peer, ok := t.peers[id]; ok {
peer.stop()
} else {
if t.Logger != nil {
t.Logger.Panic("unexpected removal of unknown remote peer", zap.String("remote-peer-id", id.String()))
}
}
delete(t.peers, id)
delete(t.LeaderStats.Followers, id.String())
t.pipelineProber.Remove(id.String())
t.streamProber.Remove(id.String())
if t.Logger != nil {
t.Logger.Info(
"removed remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("removed-remote-peer-id", id.String()),
)
}
}
func (t *Transport) UpdatePeer(id types.ID, us []string) {
t.mu.Lock()
defer t.mu.Unlock()
// TODO: return error or just panic?
if _, ok := t.peers[id]; !ok {
return
}
urls, err := types.NewURLs(us)
if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
}
}
t.peers[id].update(urls)
t.pipelineProber.Remove(id.String())
addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
t.streamProber.Remove(id.String())
addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
if t.Logger != nil {
t.Logger.Info(
"updated remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("updated-remote-peer-id", id.String()),
zap.Strings("updated-remote-peer-urls", us),
)
}
}
func (t *Transport) ActiveSince(id types.ID) time.Time {
t.mu.RLock()
defer t.mu.RUnlock()
if p, ok := t.peers[id]; ok {
return p.activeSince()
}
return time.Time{}
}
func (t *Transport) SendSnapshot(m snap.Message) {
t.mu.Lock()
defer t.mu.Unlock()
p := t.peers[types.ID(m.To)]
if p == nil {
m.CloseWithError(errMemberNotFound)
return
}
p.sendSnap(m)
}
// Pausable is a testing interface for pausing transport traffic.
type Pausable interface {
Pause()
Resume()
}
func (t *Transport) Pause() {
t.mu.RLock()
defer t.mu.RUnlock()
for _, p := range t.peers {
p.(Pausable).Pause()
}
}
func (t *Transport) Resume() {
t.mu.RLock()
defer t.mu.RUnlock()
for _, p := range t.peers {
p.(Pausable).Resume()
}
}
// ActivePeers returns a channel that closes when an initial
// peer connection has been established. Use this to wait until the
// first peer connection becomes active.
func (t *Transport) ActivePeers() (cnt int) {
t.mu.RLock()
defer t.mu.RUnlock()
for _, p := range t.peers {
if !p.activeSince().IsZero() {
cnt++
}
}
return cnt
}
|