File: host_cpu.py

package info (click to toggle)
python-pypowervm 1.1.24%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,400 kB
  • sloc: python: 29,780; xml: 174; makefile: 14
file content (248 lines) | stat: -rw-r--r-- 11,009 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# Copyright 2014, 2017 IBM Corp.
#
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

from oslo_log import log as logging
from pypowervm.tasks.monitor import util as pcm_util


LOG = logging.getLogger(__name__)


class HostCPUMetricCache(pcm_util.MetricCache):
    """Collects the PowerVM CPU metrics.

    PowerVM only gathers the CPU statistics once every 30 seconds.  It does
    this to reduce overhead.  There is a function to gather statistics quicker,
    but that can be very expensive.  Therefore, to ensure that the client's
    workload is not impacted, these 'longer term' metrics will be used.

    This class builds off of a base pypowervm function where it can obtain
    the samples through a PCM 'cache'.  If a new sample is available, the cache
    pulls the sample.  If it is not, the existing sample is used.

    This can result in multiple, quickly successive calls to the host stats
    returning the same data (because a new sample may not be available yet).

    The class analyzes the data and keeps running counts of total, user, and
    firmware cycles.
    """

    def __init__(self, adapter, host_uuid):
        """Creates an instance of the HostCPUMetricCache.

        :param adapter: The pypowervm Adapter.
        :param host_uuid: The UUID of the host CEC to maintain a metrics
                          cache for.
        """
        # Running counts for total, firmware, and user cycles
        self.total_cycles = 0
        self.total_fw_cycles = 0
        self.total_user_cycles = 0
        self.cpu_freq = self._get_cpu_freq()

        # Invoke the parent to seed the metrics.  Don't include VIO - will
        # result in quicker calls.
        super(HostCPUMetricCache, self).__init__(adapter, host_uuid,
                                                 include_vio=False)

    def refresh(self):
        """Updates the host-level CPU metrics if needed."""
        self._refresh_if_needed()

    def _update_internal_metric(self):
        """Updates cycle totals using the latest stats from the cache.

        This method is invoked by the parent class after the raw metrics are
        updated.
        """

        # If there is no 'new' data (perhaps sampling is not turned on) then
        # return no data.
        if self.cur_phyp is None:
            return

        # Compute the cycles spent in FW since last collection.
        fw_cycles_delta = self._get_fw_cycles_delta()

        # Compute the cycles the system spent since last run.
        tot_cycles_delta = self._get_total_cycles_delta()

        # Get the user cycles since last run
        user_cycles_delta = self._gather_user_cycles_delta()

        # Make sure that the total cycles is higher than the user/fw cycles.
        # Should not happen, but just in case there is any precision loss from
        # CPU data back to system.
        if user_cycles_delta + fw_cycles_delta > tot_cycles_delta:
            LOG.warning(
                "Host CPU Metrics determined that the total cycles reported "
                "was less than the used cycles.  This indicates an issue with "
                "the PCM data.  Please investigate the results.\n"
                "Total Delta Cycles: %(tot_cycles)d\n"
                "User Delta Cycles: %(user_cycles)d\n"
                "Firmware Delta Cycles: %(fw_cycles)d",
                {'tot_cycles': tot_cycles_delta, 'fw_cycles': fw_cycles_delta,
                 'user_cycles': user_cycles_delta})
            tot_cycles_delta = user_cycles_delta + fw_cycles_delta

        self.total_cycles += tot_cycles_delta
        self.total_user_cycles += user_cycles_delta
        self.total_fw_cycles += fw_cycles_delta

    def _gather_user_cycles_delta(self):
        """The estimated user cycles of all VMs/VIOSes since last run.

        The sample data includes information about how much CPU has been used
        by workloads and the Virtual I/O Servers.  There is not one global
        counter that can be used to obtain the CPU spent cycles.

        This method will calculate the delta of workload (and I/O Server)
        cycles between the previous sample and the current sample.

        There are edge cases for this however.  If a VM is deleted or migrated
        its cycles will no longer be taken into account.  The algorithm takes
        this into account by building on top of the previous sample's user
        cycles.

        :return: Estimated cycles spent on workload (including VMs and Virtual
                 I/O Server).  This represents the entire server's current
                 'user' load.
        """
        # Current samples should be guaranteed to be there.
        vm_cur_samples = self.cur_phyp.sample.lpars
        vios_cur_samples = self.cur_phyp.sample.vioses

        # The previous samples may not have been there.
        vm_prev_samples, vios_prev_samples = None, None
        if self.prev_phyp is not None:
            vm_prev_samples = self.prev_phyp.sample.lpars
            vios_prev_samples = self.prev_phyp.sample.vioses

        # Gather the delta cycles between the previous and current data sets
        vm_delta_cycles = self._delta_proc_cycles(vm_cur_samples,
                                                  vm_prev_samples)
        vios_delta_cycles = self._delta_proc_cycles(vios_cur_samples,
                                                    vios_prev_samples)

        return vm_delta_cycles + vios_delta_cycles

    @staticmethod
    def _get_cpu_freq():
        # The output will be similar to '4116.000000MHz' on a POWER system.
        with open('/proc/cpuinfo') as cpuinfo:
            for line in cpuinfo:
                if line.startswith('clock'):
                    return int(float(line.split()[-1].rstrip('MHz')))

    def _delta_proc_cycles(self, samples, prev_samples):
        """Sums all the processor delta cycles for a set of VM/VIOS samples.

        This sum is the difference from the last sample to the current sample.

        :param samples: A set of PhypVMSample or PhypViosSample samples.
        :param prev_samples: The set of the previous samples.  May be None.
        :return: The cycles spent on workload across all of the samples.
        """
        # Determine the user cycles spent between the last sample and the
        # current.
        user_cycles = 0
        for lpar_sample in samples:
            prev_sample = self._find_prev_sample(lpar_sample, prev_samples)
            user_cycles += self._delta_user_cycles(lpar_sample, prev_sample)
        return user_cycles

    @staticmethod
    def _delta_user_cycles(cur_sample, prev_sample):
        """Determines the delta of user cycles from the cur and prev sample.

        :param cur_sample: The current sample.
        :param prev_sample: The previous sample.  May be None.
        :return: The difference in cycles between the two samples.  If the data
                 only exists in the current sample (indicates a new workload),
                 then all of the cycles from the current sample will be
                 considered the delta.
        """
        # If the previous sample for this VM is None it could be one of two
        # conditions.  It could be a new spawn or a live migration.  The cycles
        # from a live migrate are brought over from the previous host.  That
        # can disorient the calculation because all of a sudden you could get
        # months of cycles.  Since we can not discern between the two
        # scenarios, we return 0 (effectively throwing the sample out).
        # The next pass through will have the previous sample and will be
        # included.
        if prev_sample is None:
            return 0
        # If the previous sample values are all 0 (happens when VM is just
        # migrated, phyp creates entry for VM with 0 values), then ignore the
        # sample.
        if (prev_sample.processor.util_cap_proc_cycles ==
                prev_sample.processor.util_uncap_proc_cycles ==
                prev_sample.processor.idle_proc_cycles == 0):
            return 0
        # The VM utilization on host is its capped + uncapped - idle cycles.
        # Donated proc cycles should not be considered as these are
        # not guaranteed to be getting utilized by any other lpar on the host.
        prev_amount = (prev_sample.processor.util_cap_proc_cycles +
                       prev_sample.processor.util_uncap_proc_cycles -
                       prev_sample.processor.idle_proc_cycles)
        cur_amount = (cur_sample.processor.util_cap_proc_cycles +
                      cur_sample.processor.util_uncap_proc_cycles -
                      cur_sample.processor.idle_proc_cycles)
        return cur_amount - prev_amount

    @staticmethod
    def _find_prev_sample(sample, prev_samples):
        """Finds the previous VM Sample for a given current sample.

        :param sample: The current sample.
        :param prev_samples: The previous samples to search through.
        :return: The previous sample, if it exists.  None otherwise.
        """
        # Will occur if there are no previous samples.
        if prev_samples is None:
            return None
        for prev_sample in prev_samples:
            if prev_sample.id == sample.id and prev_sample.name == sample.name:
                return prev_sample
        return None

    def _get_total_cycles_delta(self):
        """Returns the 'total cycles' on the system since last sample.

        :return: The total delta cycles since the last run.
        """
        sample = self.cur_phyp.sample
        cur_cores = sample.processor.configurable_proc_units
        cur_cycles_per_core = sample.time_based_cycles

        if self.prev_phyp:
            prev_cycles_per_core = self.prev_phyp.sample.time_based_cycles
        else:
            prev_cycles_per_core = 0

        # Get the delta cycles between the cores.
        delta_cycles_per_core = cur_cycles_per_core - prev_cycles_per_core

        # Total cycles since last sample is the 'per cpu' cycles spent
        # times the number of active cores.
        return delta_cycles_per_core * cur_cores

    def _get_fw_cycles_delta(self):
        """Returns the number of cycles spent on firmware since last sample."""
        cur_fw = self.cur_phyp.sample.system_firmware.utilized_proc_cycles
        prev_fw = (self.prev_phyp.sample.system_firmware.utilized_proc_cycles
                   if self.prev_phyp else 0)
        return cur_fw - prev_fw