1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
|
package cluster_test
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/cowsql/go-cowsql/driver"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/lxc/incus/v6/internal/server/cluster"
clusterConfig "github.com/lxc/incus/v6/internal/server/cluster/config"
"github.com/lxc/incus/v6/internal/server/db"
"github.com/lxc/incus/v6/internal/server/node"
"github.com/lxc/incus/v6/internal/server/state"
"github.com/lxc/incus/v6/internal/version"
"github.com/lxc/incus/v6/shared/osarch"
localtls "github.com/lxc/incus/v6/shared/tls"
"github.com/lxc/incus/v6/shared/tls/tlstest"
)
// After a heartbeat request is completed, the leader updates the heartbeat
// timestamp column, and the serving node updates its cache of raft nodes.
func TestHeartbeat(t *testing.T) {
f := heartbeatFixture{t: t}
defer f.Cleanup()
f.Bootstrap()
f.Grow()
f.Grow()
time.Sleep(1 * time.Second) // Wait for join notification triggered heartbeats to complete.
leader := f.Leader()
leaderState := f.State(leader)
// Artificially mark all nodes as down
err := leaderState.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
members, err := tx.GetNodes(ctx)
require.NoError(t, err)
for _, member := range members {
err := tx.SetNodeHeartbeat(member.Address, time.Now().Add(-time.Minute))
require.NoError(t, err)
}
return nil
})
require.NoError(t, err)
// Perform the heartbeat requests.
leader.Cluster = leaderState.DB.Cluster
heartbeat, _ := cluster.HeartbeatTask(leader)
ctx := context.Background()
heartbeat(ctx)
// The heartbeat timestamps of all nodes got updated
err = leaderState.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
members, err := tx.GetNodes(ctx)
require.NoError(t, err)
offlineThreshold, err := tx.GetNodeOfflineThreshold(ctx)
require.NoError(t, err)
for _, member := range members {
assert.False(t, member.IsOffline(offlineThreshold))
}
return nil
})
require.NoError(t, err)
}
// Helper for testing heartbeat-related code.
type heartbeatFixture struct {
t *testing.T
gateways map[int]*cluster.Gateway // node index to gateway
states map[*cluster.Gateway]*state.State // gateway to its state handle
servers map[*cluster.Gateway]*httptest.Server // gateway to its HTTP server
cleanups []func()
}
// Bootstrap the first node of the cluster.
func (f *heartbeatFixture) Bootstrap() *cluster.Gateway {
f.t.Logf("create bootstrap node for test cluster")
state, gateway, _ := f.node()
err := cluster.Bootstrap(state, gateway, "buzz")
require.NoError(f.t, err)
return gateway
}
// Grow adds a new node to the cluster.
func (f *heartbeatFixture) Grow() *cluster.Gateway {
// Figure out the current leader
f.t.Logf("adding another node to the test cluster")
target := f.Leader()
targetState := f.states[target]
state, gateway, address := f.node()
name := address
nodes, err := cluster.Accept(
targetState, target, name, address, cluster.SchemaVersion, len(version.APIExtensions), osarch.ARCH_64BIT_INTEL_X86)
require.NoError(f.t, err)
err = cluster.Join(state, gateway, target.NetworkCert(), target.ServerCert(), name, nodes)
require.NoError(f.t, err)
return gateway
}
// Return the leader gateway in the cluster.
func (f *heartbeatFixture) Leader() *cluster.Gateway {
timeout := time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for {
for _, gateway := range f.gateways {
isLeader, err := gateway.IsLeader()
if err != nil {
f.t.Errorf("failed to check leadership: %v", err)
}
if isLeader {
return gateway
}
}
select {
case <-ctx.Done():
f.t.Errorf("no leader was elected within %s", timeout)
default:
}
// Wait a bit for election to take place
time.Sleep(10 * time.Millisecond)
}
}
// Return a follower gateway in the cluster.
func (f *heartbeatFixture) Follower() *cluster.Gateway {
timeout := time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for {
for _, gateway := range f.gateways {
isLeader, err := gateway.IsLeader()
if err != nil {
f.t.Errorf("failed to check leadership: %v", err)
}
if !isLeader {
return gateway
}
}
select {
case <-ctx.Done():
f.t.Errorf("no node running as follower")
default:
}
// Wait a bit for election to take place
time.Sleep(10 * time.Millisecond)
}
}
// Return the cluster index of the given gateway.
func (f *heartbeatFixture) Index(gateway *cluster.Gateway) int {
for i := range f.gateways {
if f.gateways[i] == gateway {
return i
}
}
return -1
}
// Return the state associated with the given gateway.
func (f *heartbeatFixture) State(gateway *cluster.Gateway) *state.State {
return f.states[gateway]
}
// Return the HTTP server associated with the given gateway.
func (f *heartbeatFixture) Server(gateway *cluster.Gateway) *httptest.Server {
return f.servers[gateway]
}
// Creates a new node, without either bootstrapping or joining it.
//
// Return the associated gateway and network address.
func (f *heartbeatFixture) node() (*state.State, *cluster.Gateway, string) {
if f.gateways == nil {
f.gateways = make(map[int]*cluster.Gateway)
f.states = make(map[*cluster.Gateway]*state.State)
f.servers = make(map[*cluster.Gateway]*httptest.Server)
}
state, cleanup := state.NewTestState(f.t)
f.cleanups = append(f.cleanups, cleanup)
serverCert := tlstest.TestingKeyPair(f.t)
state.ServerCert = func() *localtls.CertInfo { return serverCert }
gateway := newGateway(f.t, state.DB.Node, serverCert, state)
f.cleanups = append(f.cleanups, func() { _ = gateway.Shutdown() })
mux := http.NewServeMux()
server := newServer(serverCert, mux)
for path, handler := range gateway.HandlerFuncs(nil, trustedCerts) {
mux.HandleFunc(path, handler)
}
address := server.Listener.Addr().String()
mf := &membershipFixtures{t: f.t, state: state}
mf.ClusterAddress(address)
var err error
require.NoError(f.t, state.DB.Cluster.Close())
store := gateway.NodeStore()
dial := gateway.DialFunc()
state.DB.Cluster, err = db.OpenCluster(context.Background(), "db.bin", store, address, "/unused/db/dir", 5*time.Second, driver.WithDialFunc(dial))
require.NoError(f.t, err)
err = state.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error {
state.GlobalConfig, err = clusterConfig.Load(ctx, tx)
if err != nil {
return err
}
// Get the local node (will be used if clustered).
state.ServerName, err = tx.GetLocalNodeName(ctx)
if err != nil {
return err
}
return nil
})
require.NoError(f.t, err)
err = state.DB.Node.Transaction(context.TODO(), func(ctx context.Context, tx *db.NodeTx) error {
state.LocalConfig, err = node.ConfigLoad(ctx, tx)
return err
})
require.NoError(f.t, err)
f.gateways[len(f.gateways)] = gateway
f.states[gateway] = state
f.servers[gateway] = server
return state, gateway, address
}
func (f *heartbeatFixture) Cleanup() {
// Run the cleanups in reverse order
for i := len(f.cleanups) - 1; i >= 0; i-- {
f.cleanups[i]()
}
for _, server := range f.servers {
server.Close()
}
}
|