File: agent_cgroups_process_check-unknown_process_check.py

package info (click to toggle)
waagent 2.12.0.2-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 8,780 kB
  • sloc: python: 55,011; xml: 3,325; sh: 1,183; makefile: 22
file content (96 lines) | stat: -rwxr-xr-x 3,969 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env pypy3
# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This script forces the process check by putting unknown process in the agent's cgroup

import subprocess
import datetime

from assertpy import fail

from azurelinuxagent.common.utils import shellutil
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false


def prepare_agent():
    check_time = datetime.datetime.utcnow()
    log.info("Executing script update-waagent-conf to enable agent cgroups config flag")
    result = shellutil.run_command(["update-waagent-conf", "Debug.CgroupCheckPeriod=20", "Debug.CgroupLogMetrics=y",
                                    "Debug.CgroupDisableOnProcessCheckFailure=y",
                                    "Debug.CgroupDisableOnQuotaCheckFailure=n"])
    log.info("Successfully enabled agent cgroups config flag: {0}".format(result))

    found: bool = retry_if_false(lambda: check_log_message(" Agent cgroups enabled: True", after_timestamp=check_time))
    if not found:
        fail("Agent cgroups not enabled")


def creating_dummy_process():
    log.info("Creating dummy process to add to agent's cgroup")
    dd_command = ["sleep", "60m"]
    proc = subprocess.Popen(dd_command)
    return proc.pid


def remove_dummy_process(pid):
    log.info("Removing dummy process from agent's cgroup")
    shellutil.run_command(["kill", "-9", str(pid)])


def disable_agent_cgroups_with_unknown_process(pid):
    """
    Adding dummy process to the agent's cgroup and verifying that the agent detects the unknown process and disables cgroups

    Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process
    """

    def unknown_process_found():
        cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct')
        log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path)
        try:
            with open(cgroup_procs_path, 'a') as f:
                f.write("\n")
                f.write(str(pid))
        except Exception as e:
            log.warning("Error while adding process to cgroup.procs file: {0}".format(e))
            return False

        # The log message indicating the check failed is similar to
        #     2021-03-29T23:33:15.603530Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed:
        #     [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s']
        found: bool = retry_if_false(lambda: check_log_message(
            "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes:.+{0}".format(
                pid)), attempts=3)
        return found and retry_if_false(check_agent_quota_disabled, attempts=3)

    found: bool = retry_if_false(unknown_process_found, attempts=3)
    if not found:
        fail("The agent did not detect unknown process: {0}".format(pid))


def main():
    prepare_agent()
    pid = creating_dummy_process()
    disable_agent_cgroups_with_unknown_process(pid)
    remove_dummy_process(pid)


if __name__ == "__main__":
    main()