1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
|
package memberlist
import (
"bytes"
"fmt"
"io"
"log"
"net"
"sync"
"sync/atomic"
"time"
"github.com/armon/go-metrics"
sockaddr "github.com/hashicorp/go-sockaddr"
)
const (
// udpPacketBufSize is used to buffer incoming packets during read
// operations.
udpPacketBufSize = 65536
// udpRecvBufSize is a large buffer size that we attempt to set UDP
// sockets to in order to handle a large volume of messages.
udpRecvBufSize = 2 * 1024 * 1024
)
// NetTransportConfig is used to configure a net transport.
type NetTransportConfig struct {
// BindAddrs is a list of addresses to bind to for both TCP and UDP
// communications.
BindAddrs []string
// BindPort is the port to listen on, for each address above.
BindPort int
// Logger is a logger for operator messages.
Logger *log.Logger
// MetricLabels is a map of optional labels to apply to all metrics
// emitted by this transport.
MetricLabels []metrics.Label
}
// NetTransport is a Transport implementation that uses connectionless UDP for
// packet operations, and ad-hoc TCP connections for stream operations.
type NetTransport struct {
config *NetTransportConfig
packetCh chan *Packet
streamCh chan net.Conn
logger *log.Logger
wg sync.WaitGroup
tcpListeners []*net.TCPListener
udpListeners []*net.UDPConn
shutdown int32
metricLabels []metrics.Label
}
var _ NodeAwareTransport = (*NetTransport)(nil)
// NewNetTransport returns a net transport with the given configuration. On
// success all the network listeners will be created and listening.
func NewNetTransport(config *NetTransportConfig) (*NetTransport, error) {
// If we reject the empty list outright we can assume that there's at
// least one listener of each type later during operation.
if len(config.BindAddrs) == 0 {
return nil, fmt.Errorf("At least one bind address is required")
}
// Build out the new transport.
var ok bool
t := NetTransport{
config: config,
packetCh: make(chan *Packet),
streamCh: make(chan net.Conn),
logger: config.Logger,
metricLabels: config.MetricLabels,
}
// Clean up listeners if there's an error.
defer func() {
if !ok {
t.Shutdown()
}
}()
// Build all the TCP and UDP listeners.
port := config.BindPort
for _, addr := range config.BindAddrs {
ip := net.ParseIP(addr)
tcpAddr := &net.TCPAddr{IP: ip, Port: port}
tcpLn, err := net.ListenTCP("tcp", tcpAddr)
if err != nil {
return nil, fmt.Errorf("Failed to start TCP listener on %q port %d: %v", addr, port, err)
}
t.tcpListeners = append(t.tcpListeners, tcpLn)
// If the config port given was zero, use the first TCP listener
// to pick an available port and then apply that to everything
// else.
if port == 0 {
port = tcpLn.Addr().(*net.TCPAddr).Port
}
udpAddr := &net.UDPAddr{IP: ip, Port: port}
udpLn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
return nil, fmt.Errorf("Failed to start UDP listener on %q port %d: %v", addr, port, err)
}
if err := setUDPRecvBuf(udpLn); err != nil {
return nil, fmt.Errorf("Failed to resize UDP buffer: %v", err)
}
t.udpListeners = append(t.udpListeners, udpLn)
}
// Fire them up now that we've been able to create them all.
for i := 0; i < len(config.BindAddrs); i++ {
t.wg.Add(2)
go t.tcpListen(t.tcpListeners[i])
go t.udpListen(t.udpListeners[i])
}
ok = true
return &t, nil
}
// GetAutoBindPort returns the bind port that was automatically given by the
// kernel, if a bind port of 0 was given.
func (t *NetTransport) GetAutoBindPort() int {
// We made sure there's at least one TCP listener, and that one's
// port was applied to all the others for the dynamic bind case.
return t.tcpListeners[0].Addr().(*net.TCPAddr).Port
}
// See Transport.
func (t *NetTransport) FinalAdvertiseAddr(ip string, port int) (net.IP, int, error) {
var advertiseAddr net.IP
var advertisePort int
if ip != "" {
// If they've supplied an address, use that.
advertiseAddr = net.ParseIP(ip)
if advertiseAddr == nil {
return nil, 0, fmt.Errorf("Failed to parse advertise address %q", ip)
}
// Ensure IPv4 conversion if necessary.
if ip4 := advertiseAddr.To4(); ip4 != nil {
advertiseAddr = ip4
}
advertisePort = port
} else {
if t.config.BindAddrs[0] == "0.0.0.0" {
// Otherwise, if we're not bound to a specific IP, let's
// use a suitable private IP address.
var err error
ip, err = sockaddr.GetPrivateIP()
if err != nil {
return nil, 0, fmt.Errorf("Failed to get interface addresses: %v", err)
}
if ip == "" {
return nil, 0, fmt.Errorf("No private IP address found, and explicit IP not provided")
}
advertiseAddr = net.ParseIP(ip)
if advertiseAddr == nil {
return nil, 0, fmt.Errorf("Failed to parse advertise address: %q", ip)
}
} else {
// Use the IP that we're bound to, based on the first
// TCP listener, which we already ensure is there.
advertiseAddr = t.tcpListeners[0].Addr().(*net.TCPAddr).IP
}
// Use the port we are bound to.
advertisePort = t.GetAutoBindPort()
}
return advertiseAddr, advertisePort, nil
}
// See Transport.
func (t *NetTransport) WriteTo(b []byte, addr string) (time.Time, error) {
a := Address{Addr: addr, Name: ""}
return t.WriteToAddress(b, a)
}
// See NodeAwareTransport.
func (t *NetTransport) WriteToAddress(b []byte, a Address) (time.Time, error) {
addr := a.Addr
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
return time.Time{}, err
}
// We made sure there's at least one UDP listener, so just use the
// packet sending interface on the first one. Take the time after the
// write call comes back, which will underestimate the time a little,
// but help account for any delays before the write occurs.
_, err = t.udpListeners[0].WriteTo(b, udpAddr)
return time.Now(), err
}
// See Transport.
func (t *NetTransport) PacketCh() <-chan *Packet {
return t.packetCh
}
// See IngestionAwareTransport.
func (t *NetTransport) IngestPacket(conn net.Conn, addr net.Addr, now time.Time, shouldClose bool) error {
if shouldClose {
defer conn.Close()
}
// Copy everything from the stream into packet buffer.
var buf bytes.Buffer
if _, err := io.Copy(&buf, conn); err != nil {
return fmt.Errorf("failed to read packet: %v", err)
}
// Check the length - it needs to have at least one byte to be a proper
// message. This is checked elsewhere for writes coming in directly from
// the UDP socket.
if n := buf.Len(); n < 1 {
return fmt.Errorf("packet too short (%d bytes) %s", n, LogAddress(addr))
}
// Inject the packet.
t.packetCh <- &Packet{
Buf: buf.Bytes(),
From: addr,
Timestamp: now,
}
return nil
}
// See Transport.
func (t *NetTransport) DialTimeout(addr string, timeout time.Duration) (net.Conn, error) {
a := Address{Addr: addr, Name: ""}
return t.DialAddressTimeout(a, timeout)
}
// See NodeAwareTransport.
func (t *NetTransport) DialAddressTimeout(a Address, timeout time.Duration) (net.Conn, error) {
addr := a.Addr
dialer := net.Dialer{Timeout: timeout}
return dialer.Dial("tcp", addr)
}
// See Transport.
func (t *NetTransport) StreamCh() <-chan net.Conn {
return t.streamCh
}
// See IngestionAwareTransport.
func (t *NetTransport) IngestStream(conn net.Conn) error {
t.streamCh <- conn
return nil
}
// See Transport.
func (t *NetTransport) Shutdown() error {
// This will avoid log spam about errors when we shut down.
atomic.StoreInt32(&t.shutdown, 1)
// Rip through all the connections and shut them down.
for _, conn := range t.tcpListeners {
conn.Close()
}
for _, conn := range t.udpListeners {
conn.Close()
}
// Block until all the listener threads have died.
t.wg.Wait()
return nil
}
// tcpListen is a long running goroutine that accepts incoming TCP connections
// and hands them off to the stream channel.
func (t *NetTransport) tcpListen(tcpLn *net.TCPListener) {
defer t.wg.Done()
// baseDelay is the initial delay after an AcceptTCP() error before attempting again
const baseDelay = 5 * time.Millisecond
// maxDelay is the maximum delay after an AcceptTCP() error before attempting again.
// In the case that tcpListen() is error-looping, it will delay the shutdown check.
// Therefore, changes to maxDelay may have an effect on the latency of shutdown.
const maxDelay = 1 * time.Second
var loopDelay time.Duration
for {
conn, err := tcpLn.AcceptTCP()
if err != nil {
if s := atomic.LoadInt32(&t.shutdown); s == 1 {
break
}
if loopDelay == 0 {
loopDelay = baseDelay
} else {
loopDelay *= 2
}
if loopDelay > maxDelay {
loopDelay = maxDelay
}
t.logger.Printf("[ERR] memberlist: Error accepting TCP connection: %v", err)
time.Sleep(loopDelay)
continue
}
// No error, reset loop delay
loopDelay = 0
t.streamCh <- conn
}
}
// udpListen is a long running goroutine that accepts incoming UDP packets and
// hands them off to the packet channel.
func (t *NetTransport) udpListen(udpLn *net.UDPConn) {
defer t.wg.Done()
for {
// Do a blocking read into a fresh buffer. Grab a time stamp as
// close as possible to the I/O.
buf := make([]byte, udpPacketBufSize)
n, addr, err := udpLn.ReadFrom(buf)
ts := time.Now()
if err != nil {
if s := atomic.LoadInt32(&t.shutdown); s == 1 {
break
}
t.logger.Printf("[ERR] memberlist: Error reading UDP packet: %v", err)
continue
}
// Check the length - it needs to have at least one byte to be a
// proper message.
if n < 1 {
t.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes) %s",
len(buf), LogAddress(addr))
continue
}
// Ingest the packet.
metrics.IncrCounterWithLabels([]string{"memberlist", "udp", "received"}, float32(n), t.metricLabels)
t.packetCh <- &Packet{
Buf: buf[:n],
From: addr,
Timestamp: ts,
}
}
}
// setUDPRecvBuf is used to resize the UDP receive window. The function
// attempts to set the read buffer to `udpRecvBuf` but backs off until
// the read buffer can be set.
func setUDPRecvBuf(c *net.UDPConn) error {
size := udpRecvBufSize
var err error
for size > 0 {
if err = c.SetReadBuffer(size); err == nil {
return nil
}
size = size / 2
}
return err
}
|