1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
|
package netlink
import (
"math/rand"
"sync"
"sync/atomic"
"syscall"
"time"
"golang.org/x/net/bpf"
)
// A Conn is a connection to netlink. A Conn can be used to send and
// receives messages to and from netlink.
//
// A Conn is safe for concurrent use, but to avoid contention in
// high-throughput applications, the caller should almost certainly create a
// pool of Conns and distribute them among workers.
//
// A Conn is capable of manipulating netlink subsystems from within a specific
// Linux network namespace, but special care must be taken when doing so. See
// the documentation of Config for details.
type Conn struct {
// Atomics must come first.
//
// seq is an atomically incremented integer used to provide sequence
// numbers when Conn.Send is called.
seq uint32
// mu serializes access to the netlink socket for the request/response
// transaction within Execute.
mu sync.RWMutex
// sock is the operating system-specific implementation of
// a netlink sockets connection.
sock Socket
// pid is the PID assigned by netlink.
pid uint32
// d provides debugging capabilities for a Conn if not nil.
d *debugger
}
// A Socket is an operating-system specific implementation of netlink
// sockets used by Conn.
//
// Deprecated: the intent of Socket was to provide an abstraction layer for
// testing, but this abstraction is awkward to use properly and disables much of
// the functionality of the Conn type. Do not use.
type Socket interface {
Close() error
Send(m Message) error
SendMessages(m []Message) error
Receive() ([]Message, error)
}
// Dial dials a connection to netlink, using the specified netlink family.
// Config specifies optional configuration for Conn. If config is nil, a default
// configuration will be used.
func Dial(family int, config *Config) (*Conn, error) {
// TODO(mdlayher): plumb in netlink.OpError wrapping?
// Use OS-specific dial() to create Socket.
c, pid, err := dial(family, config)
if err != nil {
return nil, err
}
return NewConn(c, pid), nil
}
// NewConn creates a Conn using the specified Socket and PID for netlink
// communications.
//
// NewConn is primarily useful for tests. Most applications should use
// Dial instead.
func NewConn(sock Socket, pid uint32) *Conn {
// Seed the sequence number using a random number generator.
r := rand.New(rand.NewSource(time.Now().UnixNano()))
seq := r.Uint32()
// Configure a debugger if arguments are set.
var d *debugger
if len(debugArgs) > 0 {
d = newDebugger(debugArgs)
}
return &Conn{
seq: seq,
sock: sock,
pid: pid,
d: d,
}
}
// debug executes fn with the debugger if the debugger is not nil.
func (c *Conn) debug(fn func(d *debugger)) {
if c.d == nil {
return
}
fn(c.d)
}
// Close closes the connection and unblocks any pending read operations.
func (c *Conn) Close() error {
// Close does not acquire a lock because it must be able to interrupt any
// blocked system calls, such as when Receive is waiting on a multicast
// group message.
//
// We rely on the kernel to deal with concurrent operations to the netlink
// socket itself.
return newOpError("close", c.sock.Close())
}
// Execute sends a single Message to netlink using Send, receives one or more
// replies using Receive, and then checks the validity of the replies against
// the request using Validate.
//
// Execute acquires a lock for the duration of the function call which blocks
// concurrent calls to Send, SendMessages, and Receive, in order to ensure
// consistency between netlink request/reply messages.
//
// See the documentation of Send, Receive, and Validate for details about
// each function.
func (c *Conn) Execute(m Message) ([]Message, error) {
// Acquire the write lock and invoke the internal implementations of Send
// and Receive which require the lock already be held.
c.mu.Lock()
defer c.mu.Unlock()
req, err := c.lockedSend(m)
if err != nil {
return nil, err
}
res, err := c.lockedReceive()
if err != nil {
return nil, err
}
if err := Validate(req, res); err != nil {
return nil, err
}
return res, nil
}
// SendMessages sends multiple Messages to netlink. The handling of
// a Header's Length, Sequence and PID fields is the same as when
// calling Send.
func (c *Conn) SendMessages(msgs []Message) ([]Message, error) {
// Wait for any concurrent calls to Execute to finish before proceeding.
c.mu.RLock()
defer c.mu.RUnlock()
for i := range msgs {
c.fixMsg(&msgs[i], nlmsgLength(len(msgs[i].Data)))
}
c.debug(func(d *debugger) {
for _, m := range msgs {
d.debugf(1, "send msgs: %+v", m)
}
})
if err := c.sock.SendMessages(msgs); err != nil {
c.debug(func(d *debugger) {
d.debugf(1, "send msgs: err: %v", err)
})
return nil, newOpError("send-messages", err)
}
return msgs, nil
}
// Send sends a single Message to netlink. In most cases, a Header's Length,
// Sequence, and PID fields should be set to 0, so they can be populated
// automatically before the Message is sent. On success, Send returns a copy
// of the Message with all parameters populated, for later validation.
//
// If Header.Length is 0, it will be automatically populated using the
// correct length for the Message, including its payload.
//
// If Header.Sequence is 0, it will be automatically populated using the
// next sequence number for this connection.
//
// If Header.PID is 0, it will be automatically populated using a PID
// assigned by netlink.
func (c *Conn) Send(m Message) (Message, error) {
// Wait for any concurrent calls to Execute to finish before proceeding.
c.mu.RLock()
defer c.mu.RUnlock()
return c.lockedSend(m)
}
// lockedSend implements Send, but must be called with c.mu acquired for reading.
// We rely on the kernel to deal with concurrent reads and writes to the netlink
// socket itself.
func (c *Conn) lockedSend(m Message) (Message, error) {
c.fixMsg(&m, nlmsgLength(len(m.Data)))
c.debug(func(d *debugger) {
d.debugf(1, "send: %+v", m)
})
if err := c.sock.Send(m); err != nil {
c.debug(func(d *debugger) {
d.debugf(1, "send: err: %v", err)
})
return Message{}, newOpError("send", err)
}
return m, nil
}
// Receive receives one or more messages from netlink. Multi-part messages are
// handled transparently and returned as a single slice of Messages, with the
// final empty "multi-part done" message removed.
//
// If any of the messages indicate a netlink error, that error will be returned.
func (c *Conn) Receive() ([]Message, error) {
// Wait for any concurrent calls to Execute to finish before proceeding.
c.mu.RLock()
defer c.mu.RUnlock()
return c.lockedReceive()
}
// lockedReceive implements Receive, but must be called with c.mu acquired for reading.
// We rely on the kernel to deal with concurrent reads and writes to the netlink
// socket itself.
func (c *Conn) lockedReceive() ([]Message, error) {
msgs, err := c.receive()
if err != nil {
c.debug(func(d *debugger) {
d.debugf(1, "recv: err: %v", err)
})
return nil, err
}
c.debug(func(d *debugger) {
for _, m := range msgs {
d.debugf(1, "recv: %+v", m)
}
})
// When using nltest, it's possible for zero messages to be returned by receive.
if len(msgs) == 0 {
return msgs, nil
}
// Trim the final message with multi-part done indicator if
// present.
if m := msgs[len(msgs)-1]; m.Header.Flags&Multi != 0 && m.Header.Type == Done {
return msgs[:len(msgs)-1], nil
}
return msgs, nil
}
// receive is the internal implementation of Conn.Receive, which can be called
// recursively to handle multi-part messages.
func (c *Conn) receive() ([]Message, error) {
// NB: All non-nil errors returned from this function *must* be of type
// OpError in order to maintain the appropriate contract with callers of
// this package.
//
// This contract also applies to functions called within this function,
// such as checkMessage.
var res []Message
for {
msgs, err := c.sock.Receive()
if err != nil {
return nil, newOpError("receive", err)
}
// If this message is multi-part, we will need to continue looping to
// drain all the messages from the socket.
var multi bool
for _, m := range msgs {
if err := checkMessage(m); err != nil {
return nil, err
}
// Does this message indicate a multi-part message?
if m.Header.Flags&Multi == 0 {
// No, check the next messages.
continue
}
// Does this message indicate the last message in a series of
// multi-part messages from a single read?
multi = m.Header.Type != Done
}
res = append(res, msgs...)
if !multi {
// No more messages coming.
return res, nil
}
}
}
// A groupJoinLeaver is a Socket that supports joining and leaving
// netlink multicast groups.
type groupJoinLeaver interface {
Socket
JoinGroup(group uint32) error
LeaveGroup(group uint32) error
}
// JoinGroup joins a netlink multicast group by its ID.
func (c *Conn) JoinGroup(group uint32) error {
conn, ok := c.sock.(groupJoinLeaver)
if !ok {
return notSupported("join-group")
}
return newOpError("join-group", conn.JoinGroup(group))
}
// LeaveGroup leaves a netlink multicast group by its ID.
func (c *Conn) LeaveGroup(group uint32) error {
conn, ok := c.sock.(groupJoinLeaver)
if !ok {
return notSupported("leave-group")
}
return newOpError("leave-group", conn.LeaveGroup(group))
}
// A bpfSetter is a Socket that supports setting and removing BPF filters.
type bpfSetter interface {
Socket
bpf.Setter
RemoveBPF() error
}
// SetBPF attaches an assembled BPF program to a Conn.
func (c *Conn) SetBPF(filter []bpf.RawInstruction) error {
conn, ok := c.sock.(bpfSetter)
if !ok {
return notSupported("set-bpf")
}
return newOpError("set-bpf", conn.SetBPF(filter))
}
// RemoveBPF removes a BPF filter from a Conn.
func (c *Conn) RemoveBPF() error {
conn, ok := c.sock.(bpfSetter)
if !ok {
return notSupported("remove-bpf")
}
return newOpError("remove-bpf", conn.RemoveBPF())
}
// A deadlineSetter is a Socket that supports setting deadlines.
type deadlineSetter interface {
Socket
SetDeadline(time.Time) error
SetReadDeadline(time.Time) error
SetWriteDeadline(time.Time) error
}
// SetDeadline sets the read and write deadlines associated with the connection.
func (c *Conn) SetDeadline(t time.Time) error {
conn, ok := c.sock.(deadlineSetter)
if !ok {
return notSupported("set-deadline")
}
return newOpError("set-deadline", conn.SetDeadline(t))
}
// SetReadDeadline sets the read deadline associated with the connection.
func (c *Conn) SetReadDeadline(t time.Time) error {
conn, ok := c.sock.(deadlineSetter)
if !ok {
return notSupported("set-read-deadline")
}
return newOpError("set-read-deadline", conn.SetReadDeadline(t))
}
// SetWriteDeadline sets the write deadline associated with the connection.
func (c *Conn) SetWriteDeadline(t time.Time) error {
conn, ok := c.sock.(deadlineSetter)
if !ok {
return notSupported("set-write-deadline")
}
return newOpError("set-write-deadline", conn.SetWriteDeadline(t))
}
// A ConnOption is a boolean option that may be set for a Conn.
type ConnOption int
// Possible ConnOption values. These constants are equivalent to the Linux
// setsockopt boolean options for netlink sockets.
const (
PacketInfo ConnOption = iota
BroadcastError
NoENOBUFS
ListenAllNSID
CapAcknowledge
ExtendedAcknowledge
GetStrictCheck
)
// An optionSetter is a Socket that supports setting netlink options.
type optionSetter interface {
Socket
SetOption(option ConnOption, enable bool) error
}
// SetOption enables or disables a netlink socket option for the Conn.
func (c *Conn) SetOption(option ConnOption, enable bool) error {
conn, ok := c.sock.(optionSetter)
if !ok {
return notSupported("set-option")
}
return newOpError("set-option", conn.SetOption(option, enable))
}
// A bufferSetter is a Socket that supports setting connection buffer sizes.
type bufferSetter interface {
Socket
SetReadBuffer(bytes int) error
SetWriteBuffer(bytes int) error
}
// SetReadBuffer sets the size of the operating system's receive buffer
// associated with the Conn.
func (c *Conn) SetReadBuffer(bytes int) error {
conn, ok := c.sock.(bufferSetter)
if !ok {
return notSupported("set-read-buffer")
}
return newOpError("set-read-buffer", conn.SetReadBuffer(bytes))
}
// SetWriteBuffer sets the size of the operating system's transmit buffer
// associated with the Conn.
func (c *Conn) SetWriteBuffer(bytes int) error {
conn, ok := c.sock.(bufferSetter)
if !ok {
return notSupported("set-write-buffer")
}
return newOpError("set-write-buffer", conn.SetWriteBuffer(bytes))
}
// A syscallConner is a Socket that supports syscall.Conn.
type syscallConner interface {
Socket
syscall.Conn
}
var _ syscall.Conn = &Conn{}
// SyscallConn returns a raw network connection. This implements the
// syscall.Conn interface.
//
// SyscallConn is intended for advanced use cases, such as getting and setting
// arbitrary socket options using the netlink socket's file descriptor.
//
// Once invoked, it is the caller's responsibility to ensure that operations
// performed using Conn and the syscall.RawConn do not conflict with
// each other.
func (c *Conn) SyscallConn() (syscall.RawConn, error) {
sc, ok := c.sock.(syscallConner)
if !ok {
return nil, notSupported("syscall-conn")
}
// TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of
// FD remaining valid for duration of calls?
return sc.SyscallConn()
}
// fixMsg updates the fields of m using the logic specified in Send.
func (c *Conn) fixMsg(m *Message, ml int) {
if m.Header.Length == 0 {
m.Header.Length = uint32(nlmsgAlign(ml))
}
if m.Header.Sequence == 0 {
m.Header.Sequence = c.nextSequence()
}
if m.Header.PID == 0 {
m.Header.PID = c.pid
}
}
// nextSequence atomically increments Conn's sequence number and returns
// the incremented value.
func (c *Conn) nextSequence() uint32 {
return atomic.AddUint32(&c.seq, 1)
}
// Validate validates one or more reply Messages against a request Message,
// ensuring that they contain matching sequence numbers and PIDs.
func Validate(request Message, replies []Message) error {
for _, m := range replies {
// Check for mismatched sequence, unless:
// - request had no sequence, meaning we are probably validating
// a multicast reply
if m.Header.Sequence != request.Header.Sequence && request.Header.Sequence != 0 {
return newOpError("validate", errMismatchedSequence)
}
// Check for mismatched PID, unless:
// - request had no PID, meaning we are either:
// - validating a multicast reply
// - netlink has not yet assigned us a PID
// - response had no PID, meaning it's from the kernel as a multicast reply
if m.Header.PID != request.Header.PID && request.Header.PID != 0 && m.Header.PID != 0 {
return newOpError("validate", errMismatchedPID)
}
}
return nil
}
// Config contains options for a Conn.
type Config struct {
// Groups is a bitmask which specifies multicast groups. If set to 0,
// no multicast group subscriptions will be made.
Groups uint32
// NetNS specifies the network namespace the Conn will operate in.
//
// If set (non-zero), Conn will enter the specified network namespace and
// an error will occur in Dial if the operation fails.
//
// If not set (zero), a best-effort attempt will be made to enter the
// network namespace of the calling thread: this means that any changes made
// to the calling thread's network namespace will also be reflected in Conn.
// If this operation fails (due to lack of permissions or because network
// namespaces are disabled by kernel configuration), Dial will not return
// an error, and the Conn will operate in the default network namespace of
// the process. This enables non-privileged use of Conn in applications
// which do not require elevated privileges.
//
// Entering a network namespace is a privileged operation (root or
// CAP_SYS_ADMIN are required), and most applications should leave this set
// to 0.
NetNS int
// DisableNSLockThread is a no-op.
//
// Deprecated: internal changes have made this option obsolete and it has no
// effect. Do not use.
DisableNSLockThread bool
// PID specifies the port ID used to bind the netlink socket. If set to 0,
// the kernel will assign a port ID on the caller's behalf.
//
// Most callers should leave this field set to 0. This option is intended
// for advanced use cases where the kernel expects a fixed unicast address
// destination for netlink messages.
PID uint32
// Strict applies a more strict default set of options to the Conn,
// including:
// - ExtendedAcknowledge: true
// - provides more useful error messages when supported by the kernel
// - GetStrictCheck: true
// - more strictly enforces request validation for some families such
// as rtnetlink which were historically misused
//
// If any of the options specified by Strict cannot be configured due to an
// outdated kernel or similar, an error will be returned.
//
// When possible, setting Strict to true is recommended for applications
// running on modern Linux kernels.
Strict bool
}
|