1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
#!/usr/bin/env pypy3
# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This script forces the process check by putting unknown process in the agent's cgroup
import subprocess
import datetime
from assertpy import fail
from azurelinuxagent.common.utils import shellutil
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false
def prepare_agent():
check_time = datetime.datetime.utcnow()
log.info("Executing script update-waagent-conf to enable agent cgroups config flag")
result = shellutil.run_command(["update-waagent-conf", "Debug.CgroupCheckPeriod=20", "Debug.CgroupLogMetrics=y",
"Debug.CgroupDisableOnProcessCheckFailure=y",
"Debug.CgroupDisableOnQuotaCheckFailure=n"])
log.info("Successfully enabled agent cgroups config flag: {0}".format(result))
found: bool = retry_if_false(lambda: check_log_message(" Agent cgroups enabled: True", after_timestamp=check_time))
if not found:
fail("Agent cgroups not enabled")
def creating_dummy_process():
log.info("Creating dummy process to add to agent's cgroup")
dd_command = ["sleep", "60m"]
proc = subprocess.Popen(dd_command)
return proc.pid
def remove_dummy_process(pid):
log.info("Removing dummy process from agent's cgroup")
shellutil.run_command(["kill", "-9", str(pid)])
def disable_agent_cgroups_with_unknown_process(pid):
"""
Adding dummy process to the agent's cgroup and verifying that the agent detects the unknown process and disables cgroups
Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process
"""
def unknown_process_found():
cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct')
log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path)
try:
with open(cgroup_procs_path, 'a') as f:
f.write("\n")
f.write(str(pid))
except Exception as e:
log.warning("Error while adding process to cgroup.procs file: {0}".format(e))
return False
# The log message indicating the check failed is similar to
# 2021-03-29T23:33:15.603530Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed:
# [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s']
found: bool = retry_if_false(lambda: check_log_message(
"Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes:.+{0}".format(
pid)), attempts=3)
return found and retry_if_false(check_agent_quota_disabled, attempts=3)
found: bool = retry_if_false(unknown_process_found, attempts=3)
if not found:
fail("The agent did not detect unknown process: {0}".format(pid))
def main():
prepare_agent()
pid = creating_dummy_process()
disable_agent_cgroups_with_unknown_process(pid)
remove_dummy_process(pid)
if __name__ == "__main__":
main()
|