File: test_networking.py

package info (click to toggle)
cloud-init 25.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,412 kB
  • sloc: python: 135,894; sh: 3,883; makefile: 141; javascript: 30; xml: 22
file content (482 lines) | stat: -rw-r--r-- 16,673 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
"""Networking-related tests."""

import contextlib
import json

import pytest
import yaml

from cloudinit.subp import subp
from tests.integration_tests import random_mac_address
from tests.integration_tests.clouds import Ec2Cloud, IntegrationCloud
from tests.integration_tests.instances import IntegrationInstance
from tests.integration_tests.integration_settings import PLATFORM
from tests.integration_tests.releases import (
    CURRENT_RELEASE,
    IS_UBUNTU,
    JAMMY,
    NOBLE,
)
from tests.integration_tests.util import (
    has_netplanlib,
    verify_clean_boot,
    verify_clean_log,
)

# Older Ubuntu series didn't read cloud-init.* config keys
LXD_NETWORK_CONFIG_KEY = (
    "user.network-config"
    if CURRENT_RELEASE < JAMMY
    else "cloud-init.network-config"
)


def _add_dummy_bridge_to_netplan(client: IntegrationInstance):
    # Update netplan configuration to ensure it doesn't change on reboot
    netplan = yaml.safe_load(
        client.execute("cat /etc/netplan/50-cloud-init.yaml")
    )
    # Just a dummy bridge to do nothing
    try:
        netplan["network"]["bridges"]["dummy0"] = {"dhcp4": False}
    except KeyError:
        netplan["network"]["bridges"] = {"dummy0": {"dhcp4": False}}

    dumped_netplan = yaml.dump(netplan)
    client.write_to_file("/etc/netplan/50-cloud-init.yaml", dumped_netplan)


USER_DATA = """\
#cloud-config
updates:
  network:
    when: [boot]
"""


@pytest.mark.skipif(
    PLATFORM not in ("lxd_container", "lxd_vm"),
    reason=(
        f"{PLATFORM} could make nic changes in a reboot event invalidating"
        f" these tests."
    ),
)
@pytest.mark.user_data(USER_DATA)
class TestNetplanGenerateBehaviorOnReboot:
    def test_skip(self, client: IntegrationInstance):
        log = client.read_from_file("/var/log/cloud-init.log")
        assert "Applying network configuration" in log
        assert "Selected renderer 'netplan'" in log
        client.execute(
            "mv /var/log/cloud-init.log /var/log/cloud-init.log.bak"
        )
        if has_netplanlib(client):
            assert "Rendered netplan config using netplan python API" in log
        else:
            assert (
                "No netplan python module. Fallback to write"
                " /etc/netplan/50-cloud-init.yaml" in log
            )
        netplan = yaml.safe_load(
            client.execute("cat /etc/netplan/50-cloud-init.yaml")
        )

        client.restart()

        log = client.read_from_file("/var/log/cloud-init.log")
        assert "Event Allowed: scope=network EventType=boot" in log
        assert "Applying network configuration" in log
        assert "Running command ['netplan', 'generate']" not in log
        assert (
            "skipping call to `netplan generate`."
            " reason: identical netplan config"
        ) in log
        netplan_new = yaml.safe_load(
            client.execute("cat /etc/netplan/50-cloud-init.yaml")
        )
        assert netplan == netplan_new, "no changes expected in netplan config"

    def test_applied(self, client: IntegrationInstance):
        log = client.read_from_file("/var/log/cloud-init.log")
        assert "Applying network configuration" in log
        assert "Selected renderer 'netplan'" in log
        client.execute(
            "mv /var/log/cloud-init.log /var/log/cloud-init.log.bak"
        )
        # fake a change in the rendered network config file
        _add_dummy_bridge_to_netplan(client)
        netplan = yaml.safe_load(
            client.execute("cat /etc/netplan/50-cloud-init.yaml")
        )

        client.restart()

        log = client.read_from_file("/var/log/cloud-init.log")
        assert "Event Allowed: scope=network EventType=boot" in log
        assert "Applying network configuration" in log
        assert (
            "skipping call to `netplan generate`."
            " reason: identical netplan config"
        ) not in log
        assert "Running command ['netplan', 'generate']" in log
        netplan_new = yaml.safe_load(
            client.execute("cat /etc/netplan/50-cloud-init.yaml")
        )
        assert netplan != netplan_new, "changes expected in netplan config"


NET_V1_CONFIG = """
config:
- name: eth0
  type: physical
  mac_address: '{mac_addr}'
  subnets:
  - control: auto
    type: dhcp
version: 1
"""


NET_V2_MATCH_CONFIG = """
version: 2
ethernets:
  eth0:
      dhcp4: true
      match:
        macaddress: {mac_addr}
      set-name: eth0
"""

EXPECTED_NETPLAN_HEADER = """\
# This file is generated from information provided by the datasource.  Changes
# to it will not persist across an instance reboot.  To disable cloud-init's
# network configuration capabilities, write a file
# /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg with the following:
# network: {config: disabled}"""

EXPECTED_NET_CONFIG = """\
network:
  version: 2
  ethernets:
    eth0:
      dhcp4: true
      set-name: eth0
      match:
        macaddress: {mac_addr}
"""

BAD_NETWORK_V2 = """\
version: 2
ethernets:
  eth0:
    dhcp4: badval
    match:
      {match_condition}
"""


@pytest.mark.skipif(
    PLATFORM != "lxd_vm",
    reason="Test requires custom networking provided by LXD",
)
@pytest.mark.parametrize(
    "net_config",
    (
        pytest.param(NET_V1_CONFIG, id="v1"),
        pytest.param(NET_V2_MATCH_CONFIG, id="v2"),
    ),
)
def test_netplan_rendering(net_config, session_cloud: IntegrationCloud):
    mac_addr = random_mac_address()
    launch_kwargs = {
        "config_dict": {
            LXD_NETWORK_CONFIG_KEY: net_config.format(mac_addr=mac_addr),
            "volatile.eth0.hwaddr": mac_addr,
        },
    }
    expected = yaml.safe_load(EXPECTED_NET_CONFIG)
    expected["network"]["ethernets"]["eth0"]["match"] = {
        "macaddress": mac_addr
    }
    with session_cloud.launch(launch_kwargs=launch_kwargs) as client:
        result = client.execute("cat /etc/netplan/50-cloud-init.yaml")
        if has_netplanlib(client):
            assert EXPECTED_NETPLAN_HEADER not in result.stdout
        else:
            assert result.stdout.startswith(EXPECTED_NETPLAN_HEADER)
        assert expected == yaml.safe_load(result.stdout)


NET_V1_NAME_TOO_LONG = """\
config:
- name: eth01234567890123
  type: physical
  mac_address: '{mac_addr}'
  subnets:
  - control: auto
    type: dhcp
version: 1
"""


@pytest.mark.skipif(
    PLATFORM != "lxd_vm",
    reason="Test requires custom networking provided by LXD",
)
@pytest.mark.parametrize("net_config", (NET_V1_NAME_TOO_LONG,))
def test_schema_warnings(net_config, session_cloud: IntegrationCloud):
    # TODO: This test takes a lot more time than it needs to.
    # The default launch wait will wait until cloud-init done, but the
    # init network stage will wait 2 minutes for network timeout.
    # We could set wait=False and do our own waiting, but there's also the
    # issue of `execute_via_ssh=False` on pycloudlib means we `sudo -u ubuntu`
    # the exec commands, but the ubuntu user won't exist until
    # # after the init network stage runs.
    mac_addr = random_mac_address()
    launch_kwargs = {
        "execute_via_ssh": False,
        "config_dict": {
            LXD_NETWORK_CONFIG_KEY: net_config.format(mac_addr=mac_addr),
            "volatile.eth0.hwaddr": mac_addr,
        },
    }
    expected = yaml.safe_load(EXPECTED_NET_CONFIG)
    expected["network"]["ethernets"]["eth0"]["match"] = {}
    expected["network"]["ethernets"]["eth0"]["match"]["macaddress"] = mac_addr
    with session_cloud.launch(launch_kwargs=launch_kwargs) as client:
        result = client.execute("cloud-init status --format=json")
        if CURRENT_RELEASE < NOBLE:
            assert result.ok
            assert result.return_code == 0  # Stable release still exit 0
        else:
            assert result.failed
            assert result.return_code == 2  # Warnings exit 2 after 23.4
        assert (
            'eth01234567890123\\" is wrong: \\"name\\" not a valid ifname'
            in result.stdout
        )
        result = client.execute("cloud-init schema --system")
        assert "Invalid network-config " in result.stdout


@pytest.mark.skipif(
    not IS_UBUNTU, reason="Dependent on netplan API availability on Ubuntu"
)
@pytest.mark.skipif(
    PLATFORM not in ("lxd_vm", "lxd_container"),
    reason="Test requires lxc exec feature due to broken network config",
)
def test_invalid_network_v2_netplan(session_cloud: IntegrationCloud):
    mac_addr = random_mac_address()

    if PLATFORM == "lxd_vm":
        config_dict = {
            LXD_NETWORK_CONFIG_KEY: BAD_NETWORK_V2.format(
                match_condition=f"macaddress: {mac_addr}"
            ),
            "volatile.eth0.hwaddr": mac_addr,
        }
    else:
        config_dict = {
            LXD_NETWORK_CONFIG_KEY: BAD_NETWORK_V2.format(
                match_condition="name: eth0"
            )
        }

    with session_cloud.launch(
        launch_kwargs={
            "execute_via_ssh": False,
            "config_dict": config_dict,
        }
    ) as client:
        if has_netplanlib(client):
            assert (
                "network-config failed schema validation! You may run "
                "'sudo cloud-init schema --system' to check the details."
            ) in client.execute("cloud-init status --format=json")
            assert (
                "Invalid network-config /var/lib/cloud/instances/"
                in client.execute("cloud-init schema --system")
            )
            assert (
                "# E1: Invalid netplan schema. Error in network definition:"
                " invalid boolean value 'badval"
            ) in client.execute("cloud-init schema --system --annotate")
        else:
            assert (
                "Skipping netplan schema validation. No netplan API available"
            ) in client.read_from_file("/var/log/cloud-init.log")
            assert (
                "Skipping network-config schema validation for version: 2."
                " No netplan API available."
            ) in client.execute("cloud-init schema --system")


@pytest.mark.skipif(PLATFORM != "ec2", reason="test is ec2 specific")
def test_ec2_multi_nic_reboot(session_cloud: IntegrationCloud):
    """Tests that additional secondary NICs and secondary IPs on them are
    routable from non-local networks after a reboot event when network updates
    are configured on every boot."""
    with session_cloud.launch(launch_kwargs={}, user_data=USER_DATA) as client:
        # Add secondary NIC with two private and public ips
        client.instance.add_network_interface(
            ipv4_address_count=2, ipv4_public_ip_count=2
        )

        public_ips = client.instance.public_ips
        assert len(public_ips) == 3, (
            "Expected 3 public ips, one from the primary nic and 2 from the"
            " secondary one"
        )

        # Reboot to update network config
        client.execute("cloud-init clean --logs")
        client.restart()

        log_content = client.read_from_file("/var/log/cloud-init.log")
        verify_clean_log(log_content)
        verify_clean_boot(client)

        # SSH over primary and secondary NIC works
        for ip in public_ips:
            subp("nc -w 5 -zv " + ip + " 22", shell=True)


@pytest.mark.adhoc  # costly instance not available in all regions / azs
@pytest.mark.skipif(PLATFORM != "ec2", reason="test is ec2 specific")
def test_ec2_multi_network_cards(session_cloud: Ec2Cloud):
    """
    Tests that with an interface type with multiple network cards (non unique
    device indexes).

    https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html
    https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/p5-efa.html
    """
    ec2 = session_cloud.cloud_instance.client

    vpc = session_cloud.cloud_instance.get_or_create_vpc(
        name="ec2-cloud-init-integration"
    )
    [subnet_id] = [s.id for s in vpc.vpc.subnets.all()]
    security_group_ids = [sg.id for sg in vpc.vpc.security_groups.all()]

    launch_kwargs = {
        "InstanceType": "p5.48xlarge",
        "NetworkInterfaces": [
            {
                "NetworkCardIndex": 0,
                "DeviceIndex": 0,
                "InterfaceType": "efa",
                "DeleteOnTermination": True,
                "Groups": security_group_ids,
                "SubnetId": subnet_id,
            },
            {
                "NetworkCardIndex": 1,
                "DeviceIndex": 1,
                "InterfaceType": "efa",
                "DeleteOnTermination": True,
                "Groups": security_group_ids,
                "SubnetId": subnet_id,
            },
            {
                "NetworkCardIndex": 2,
                "DeviceIndex": 1,
                "InterfaceType": "efa",
                "DeleteOnTermination": True,
                "Groups": security_group_ids,
                "SubnetId": subnet_id,
            },
        ],
    }
    # Instances with this network setups do not get a public ip.
    # Do not wait until we associate one to the primary interface so that we
    # can interact with it.
    with session_cloud.launch(
        launch_kwargs=launch_kwargs,
        user_data=USER_DATA,
        enable_ipv6=False,
        wait=False,
    ) as client:
        client.instance._instance.wait_until_running(
            Filters=[
                {
                    "Name": "instance-id",
                    "Values": [client.instance.id],
                }
            ]
        )

        network_interfaces = iter(
            ec2.describe_network_interfaces(
                Filters=[
                    {
                        "Name": "attachment.instance-id",
                        "Values": [client.instance.id],
                    }
                ]
            )["NetworkInterfaces"]
        )
        nic_id_0 = next(network_interfaces)["NetworkInterfaceId"]

        try:
            allocation_0 = ec2.allocate_address(Domain="vpc")
            association_0 = ec2.associate_address(
                AllocationId=allocation_0["AllocationId"],
                NetworkInterfaceId=nic_id_0,
            )
            assert association_0["ResponseMetadata"]["HTTPStatusCode"] == 200

            result = client.execute(
                "cloud-init query ds.meta-data.network.interfaces.macs"
            )
            assert result.ok, result.stderr
            for _macs, net_metadata in json.load(result.stdout):
                assert "network-card" in net_metadata

            nic_id_1 = next(network_interfaces)["NetworkInterfaceId"]
            allocation_1 = ec2.allocate_address(Domain="vpc")
            association_1 = ec2.associate_address(
                AllocationId=allocation_1["AllocationId"],
                NetworkInterfaceId=nic_id_1,
            )
            assert association_1["ResponseMetadata"]["HTTPStatusCode"] == 200

            nic_id_2 = next(network_interfaces)["NetworkInterfaceId"]
            allocation_2 = ec2.allocate_address(Domain="vpc")
            association_2 = ec2.associate_address(
                AllocationId=allocation_2["AllocationId"],
                NetworkInterfaceId=nic_id_2,
            )
            assert association_2["ResponseMetadata"]["HTTPStatusCode"] == 200

            # Reboot to update network config
            client.execute("cloud-init clean --logs")
            client.restart()

            log_content = client.read_from_file("/var/log/cloud-init.log")
            verify_clean_log(log_content)
            verify_clean_boot(client)

            # SSH over secondary NICs works
            subp("nc -w 5 -zv " + allocation_1["PublicIp"] + " 22", shell=True)
            subp("nc -w 5 -zv " + allocation_2["PublicIp"] + " 22", shell=True)
        finally:
            with contextlib.suppress(Exception):
                ec2.disassociate_address(
                    AssociationId=association_0["AssociationId"]
                )
            with contextlib.suppress(Exception):
                ec2.release_address(AllocationId=allocation_0["AllocationId"])
            with contextlib.suppress(Exception):
                ec2.disassociate_address(
                    AssociationId=association_1["AssociationId"]
                )
            with contextlib.suppress(Exception):
                ec2.release_address(AllocationId=allocation_1["AllocationId"])
            with contextlib.suppress(Exception):
                ec2.disassociate_address(
                    AssociationId=association_2["AssociationId"]
                )
            with contextlib.suppress(Exception):
                ec2.release_address(AllocationId=allocation_2["AllocationId"])