File: events.go

package info (click to toggle)
incus 6.0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 24,392 kB
  • sloc: sh: 16,313; ansic: 3,121; python: 457; makefile: 337; ruby: 51; sql: 50; lisp: 6
file content (442 lines) | stat: -rw-r--r-- 14,146 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
package cluster

import (
	"context"
	"errors"
	"slices"
	"sync"
	"time"

	incus "github.com/lxc/incus/v6/client"
	"github.com/lxc/incus/v6/internal/server/db"
	"github.com/lxc/incus/v6/internal/server/endpoints"
	"github.com/lxc/incus/v6/internal/server/events"
	"github.com/lxc/incus/v6/shared/api"
	"github.com/lxc/incus/v6/shared/logger"
	"github.com/lxc/incus/v6/shared/revert"
	localtls "github.com/lxc/incus/v6/shared/tls"
)

// eventHubMinHosts is the minimum number of members that must have the event-hub role to trigger switching into
// event-hub mode (where cluster members will only connect to event-hub members rather than all members when
// operating in the normal full-mesh mode).
const eventHubMinHosts = 2

// EventMode indicates the event distribution mode.
type EventMode string

// EventModeFullMesh is when every cluster member connects to every other cluster member to pull events.
const EventModeFullMesh EventMode = "full-mesh"

// EventModeHubServer is when the cluster is operating in event-hub mode and this server is designated as a hub
// server, meaning that it will only connect to the other event-hub members and not other members.
const EventModeHubServer EventMode = "hub-server"

// EventModeHubClient is when the cluster is operating in event-hub mode and this member is designated as a hub
// client, meaning that it is expected to connect to the event-hub members.
const EventModeHubClient EventMode = "hub-client"

// eventListenerClient stores both the event listener and its associated client.
type eventListenerClient struct {
	*incus.EventListener

	client        incus.InstanceServer
	hubPushCancel context.CancelFunc
}

// Disconnect disconnects both the listener and the client.
func (lc *eventListenerClient) Disconnect() {
	if lc.hubPushCancel != nil {
		lc.hubPushCancel()
	}

	lc.EventListener.Disconnect()
	lc.client.Disconnect()
}

// SetEventMode applies the specified eventMode of the local server to the listener.
// If the eventMode is EventModeHubClient then a go routine is started that consumes events from eventHubPushCh and
// pushes them to the remote server. If the eventMode is anything else then the go routine is stopped if running.
func (lc *eventListenerClient) SetEventMode(eventMode EventMode, eventHubPushCh chan api.Event) {
	if eventMode == EventModeHubClient {
		if lc.hubPushCancel != nil || !lc.IsActive() {
			return
		}

		ctx, cancel := context.WithCancel(context.Background())

		go func() {
			lc.hubPushCancel = cancel
			info, _ := lc.client.GetConnectionInfo()
			logger.Info("Event hub client started", logger.Ctx{"remote": info.URL})
			defer logger.Info("Event hub client stopped", logger.Ctx{"remote": info.URL})
			defer func() {
				cancel()
				lc.hubPushCancel = nil
			}()

			for {
				select {
				case event, more := <-eventHubPushCh:
					if !more {
						return
					}

					err := lc.client.SendEvent(event)
					if err != nil {
						//  Send failed, something is wrong with this hub server.
						lc.Disconnect() // Disconnect listener and client.

						// Try and put event back onto event hub push queue for consumption
						// by another consumer.
						ctx, cancel := context.WithTimeout(context.Background(), eventHubPushChTimeout)
						defer cancel()

						select {
						case eventHubPushCh <- event:
						case <-ctx.Done(): // Don't block if all consumers are slow/down.
						}

						return
					}

				case <-ctx.Done():
					return
				}
			}
		}()
	} else if lc.hubPushCancel != nil {
		lc.hubPushCancel()
		lc.hubPushCancel = nil
	}
}

var (
	eventMode             = EventModeFullMesh
	eventHubAddresses     []string
	eventHubPushCh        = make(chan api.Event, 10) // Buffer size to accommodate slow consumers before dropping events.
	eventHubPushChTimeout = time.Duration(time.Second)
	listeners             = map[string]*eventListenerClient{}
	listenersUnavailable  = map[string]bool{}
	listenersNotify       = map[chan struct{}][]string{}
	listenersLock         sync.Mutex
	listenersUpdateLock   sync.Mutex
)

// ServerEventMode returns the event distribution mode that this local server is operating in.
func ServerEventMode() EventMode {
	listenersLock.Lock()
	defer listenersLock.Unlock()

	return eventMode
}

// RoleInSlice returns whether or not the rule is within the roles list.
func RoleInSlice(role db.ClusterRole, roles []db.ClusterRole) bool {
	return slices.Contains(roles, role)
}

// EventListenerWait waits for there to be listener connected to the specified address, or one of the event hubs
// if operating in event hub mode.
func EventListenerWait(ctx context.Context, address string) error {
	// Check if there is already a listener.
	listenersLock.Lock()
	listener, found := listeners[address]
	if found && listener.IsActive() {
		listenersLock.Unlock()
		return nil
	}

	if listenersUnavailable[address] {
		listenersLock.Unlock()
		return errors.New("Server isn't ready yet")
	}

	listenAddresses := []string{address}

	// Check if operating in event hub mode and if one of the event hub connections is available.
	// If so then we are ready to receive events from all members.
	if eventMode != EventModeFullMesh {
		for _, eventHubAddress := range eventHubAddresses {
			listener, found := listeners[eventHubAddress]
			if found && listener.IsActive() {
				listenersLock.Unlock()
				return nil
			}

			listenAddresses = append(listenAddresses, eventHubAddress)
		}
	}

	// If not setup a notification for when the desired address or any of the event hubs connect.
	connected := make(chan struct{})
	listenersNotify[connected] = listenAddresses
	listenersLock.Unlock()

	defer func() {
		listenersLock.Lock()
		delete(listenersNotify, connected)
		listenersLock.Unlock()
	}()

	// Wait for the connected channel to be closed (indicating a new listener has been connected), and return.
	select {
	case <-connected:
		return nil
	case <-ctx.Done():
		if ctx.Err() != nil {
			return errors.New("Missing event connection with target cluster member")
		}

		return nil
	}
}

// hubAddresses returns the addresses of members with event-hub role, and the event mode of the server.
// The event mode will only be hub-server or hub-client if at least eventHubMinHosts have an event-hub role.
// Otherwise the mode will be full-mesh.
func hubAddresses(localAddress string, members map[int64]APIHeartbeatMember) ([]string, EventMode) {
	var hubAddresses []string
	var localHasHubRole bool

	// Do a first pass of members to count the members with event-hub role, and whether we are a hub server.
	for _, member := range members {
		if RoleInSlice(db.ClusterRoleEventHub, member.Roles) {
			hubAddresses = append(hubAddresses, member.Address)

			if member.Address == localAddress {
				localHasHubRole = true
			}
		}
	}

	eventMode := EventModeFullMesh
	if len(hubAddresses) >= eventHubMinHosts {
		if localHasHubRole {
			eventMode = EventModeHubServer
		} else {
			eventMode = EventModeHubClient
		}
	}

	return hubAddresses, eventMode
}

// EventsUpdateListeners refreshes the cluster event listener connections.
func EventsUpdateListeners(endpoints *endpoints.Endpoints, cluster *db.Cluster, serverCert func() *localtls.CertInfo, hbMembers map[int64]APIHeartbeatMember, inject events.InjectFunc) {
	listenersUpdateLock.Lock()
	defer listenersUpdateLock.Unlock()

	// If no heartbeat members provided, populate from global database.
	if hbMembers == nil {
		var err error
		var members []db.NodeInfo
		var offlineThreshold time.Duration

		err = cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
			members, err = tx.GetNodes(ctx)
			if err != nil {
				return err
			}

			offlineThreshold, err = tx.GetNodeOfflineThreshold(ctx)
			if err != nil {
				return err
			}

			return nil
		})
		if err != nil {
			logger.Warn("Failed to get current cluster members", logger.Ctx{"err": err})
			return
		}

		hbMembers = make(map[int64]APIHeartbeatMember, len(members))
		for _, member := range members {
			hbMembers[member.ID] = APIHeartbeatMember{
				ID:            member.ID,
				Name:          member.Name,
				Address:       member.Address,
				LastHeartbeat: member.Heartbeat,
				Online:        !member.IsOffline(offlineThreshold),
				Roles:         member.Roles,
			}
		}
	}

	localAddress := endpoints.NetworkAddress()
	hubAddresses, localEventMode := hubAddresses(localAddress, hbMembers)

	keepListeners := make(map[string]struct{})
	wg := sync.WaitGroup{}
	for _, hbMember := range hbMembers {
		// Don't bother trying to connect to ourselves or offline members.
		if hbMember.Address == localAddress || !hbMember.Online {
			continue
		}

		if localEventMode != EventModeFullMesh && !RoleInSlice(db.ClusterRoleEventHub, hbMember.Roles) {
			continue // Skip non-event-hub members if we are operating in event-hub mode.
		}

		listenersLock.Lock()
		listener, ok := listeners[hbMember.Address]

		// If the member already has a listener associated to it, check that the listener is still active.
		// If it is, just move on to next member, but if not then we'll try to connect again.
		if ok {
			if listener.IsActive() {
				keepListeners[hbMember.Address] = struct{}{} // Add to current listeners list.
				listener.SetEventMode(localEventMode, eventHubPushCh)
				listenersLock.Unlock()
				continue
			}

			// Disconnect and delete listener, but don't delete any listenersNotify entry as there
			// might be something waiting for a future connection.
			listener.Disconnect()
			delete(listeners, hbMember.Address)
			listenersLock.Unlock()

			// Log after releasing listenersLock to avoid deadlock on listenersLock with EventHubPush.
			logger.Debug("Removed inactive member event listener client", logger.Ctx{"address": hbMember.Address})
		} else {
			listenersLock.Unlock()
		}

		keepListeners[hbMember.Address] = struct{}{} // Add to current listeners list.

		// Connect to remote concurrently and add to active listeners if successful.
		wg.Add(1)
		go func(m APIHeartbeatMember) {
			defer wg.Done()
			l := logger.AddContext(logger.Ctx{"local": localAddress, "remote": m.Address})

			if !HasConnectivity(endpoints.NetworkCert(), serverCert(), m.Address, true) {
				listenersLock.Lock()
				listenersUnavailable[m.Address] = true
				listenersLock.Unlock()
				return
			}

			listener, err := eventsConnect(m.Address, endpoints.NetworkCert(), serverCert())
			if err != nil {
				l.Warn("Failed adding member event listener client", logger.Ctx{"err": err})
				return
			}

			_, _ = listener.AddHandler(nil, func(event api.Event) {
				// Inject event received via pull as forwarded so that its not forwarded again
				// onto other members.
				inject(event, events.EventSourcePull)
			})

			listener.SetEventMode(localEventMode, eventHubPushCh)

			listenersLock.Lock()
			listeners[m.Address] = listener
			listenersUnavailable[m.Address] = false

			// Indicate to any notifiers waiting for this member's address that it is connected.
			for connected, notifyAddresses := range listenersNotify {
				if slices.Contains(notifyAddresses, m.Address) {
					close(connected)
					delete(listenersNotify, connected)
				}
			}

			listenersLock.Unlock()

			// Log after releasing listenersLock to avoid deadlock on listenersLock with EventHubPush.
			l.Debug("Added member event listener client")
		}(hbMember)
	}

	wg.Wait()

	// Disconnect and delete any out of date listeners and their notifiers.
	var removedAddresses []string

	listenersLock.Lock()
	for address, listener := range listeners {
		_, found := keepListeners[address]
		if !found {
			listener.Disconnect()
			delete(listeners, address)

			// Record address removed, but don't log it here as this could cause a deadlock on
			// listenersLock with EventHubPush
			removedAddresses = append(removedAddresses, address)
		}
	}

	// Store event hub addresses in global slice late in the function after all event connections have been
	// opened above. This way the reported state by this server won't be updated until its ready.
	eventHubAddresses = hubAddresses
	eventMode = localEventMode

	listenersLock.Unlock()

	// Log the listeners removed after releasing listenersLock.
	for _, removedAddress := range removedAddresses {
		logger.Debug("Removed old member event listener client", logger.Ctx{"address": removedAddress})
	}

	if len(hbMembers) > 1 && len(keepListeners) <= 0 {
		logger.Warn("No active cluster event listener clients")
	}
}

// Establish a client connection to get events from the given node.
func eventsConnect(address string, networkCert *localtls.CertInfo, serverCert *localtls.CertInfo) (*eventListenerClient, error) {
	client, err := Connect(address, networkCert, serverCert, nil, true)
	if err != nil {
		return nil, err
	}

	reverter := revert.New()
	reverter.Add(func() {
		client.Disconnect()
	})

	listener, err := client.GetEventsAllProjects()
	if err != nil {
		return nil, err
	}

	reverter.Success()

	lc := &eventListenerClient{
		EventListener: listener,
		client:        client,
	}

	return lc, nil
}

// EventHubPush pushes the event to the event hub members if local server is an event-hub client.
func EventHubPush(event api.Event) {
	listenersLock.Lock()
	// If the local server isn't an event-hub client, then we don't need to push messages as the other
	// members should be connected to us via a pull event listener and so will receive the event that way.
	// Also if there are no listeners available then there's no point in pushing to the eventHubPushCh as it
	// will have no consumers reading from it (this allows somewhat graceful handling of the situation where
	// all event-hub members are down by dropping events rather than slowing down the local system).
	if eventMode != EventModeHubClient || len(listeners) <= 0 {
		listenersLock.Unlock()
		return
	}

	listenersLock.Unlock()

	// Run in a go routine so as not to delay caller of this function as we try and deliver it.
	go func() {
		ctx, cancel := context.WithTimeout(context.Background(), eventHubPushChTimeout)
		defer cancel()

		select {
		case eventHubPushCh <- event:
		case <-ctx.Done(): // Don't block if all consumers are slow/down.
		}
	}()
}