File: speed_limit_observer_win.cc

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,112,112 kB
  • sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (267 lines) | stat: -rw-r--r-- 10,119 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/power_monitor/speed_limit_observer_win.h"

#include <windows.h>

#include <powerbase.h>
#include <winternl.h>

#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#include "base/logging.h"
#include "base/system/sys_info.h"
#include "base/timer/elapsed_timer.h"
#include "base/trace_event/base_tracing.h"
#include "build/build_config.h"

namespace {

// From ntdef.f
#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0)

// We poll for new speed-limit values once every second.
constexpr base::TimeDelta kSampleInterval = base::Seconds(1);

// Size of moving-average filter which is used to smooth out variations in
// speed-limit estimates.
size_t kMovingAverageWindowSize = 10;

constexpr const char kPowerTraceCategory[] = TRACE_DISABLED_BY_DEFAULT("power");

// From
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa373184(v=vs.85).aspx.
// Note that this structure definition was accidentally omitted from WinNT.h.
typedef struct _PROCESSOR_POWER_INFORMATION {
  ULONG Number;
  ULONG MaxMhz;
  ULONG CurrentMhz;
  ULONG MhzLimit;
  ULONG MaxIdleState;
  ULONG CurrentIdleState;
} PROCESSOR_POWER_INFORMATION, *PPROCESSOR_POWER_INFORMATION;

// From
// https://docs.microsoft.com/en-us/windows/win32/power/system-power-information-str.
// Note that this structure definition was accidentally omitted from WinNT.h.
typedef struct _SYSTEM_POWER_INFORMATION {
  ULONG MaxIdlenessAllowed;
  ULONG Idleness;
  ULONG TimeRemaining;
  UCHAR CoolingMode;
} SYSTEM_POWER_INFORMATION, *PSYSTEM_POWER_INFORMATION;

// Returns information about the idleness of the system.
bool GetCPUIdleness(int* idleness_percent) {
  auto info = std::make_unique<SYSTEM_POWER_INFORMATION>();
  if (!NT_SUCCESS(CallNtPowerInformation(SystemPowerInformation, nullptr, 0,
                                         info.get(),
                                         sizeof(SYSTEM_POWER_INFORMATION)))) {
    *idleness_percent = 0;
    return false;
  }
  // The current idle level, expressed as a percentage.
  *idleness_percent = static_cast<int>(info->Idleness);
  return true;
}

#if defined(ARCH_CPU_X86_FAMILY)
// Returns the estimated CPU frequency by executing a tight loop of predictable
// assembly instructions. The estimated frequency should be proportional and
// about the same magnitude than the real CPU frequency. The measurement should
// be long enough to avoid Turbo Boost effect (~3ms) and be low enough to stay
// within the operating system scheduler quantum (~100ms).
double EstimateCpuFrequency() {
  // The heuristic to estimate CPU frequency is based on UIforETW code.
  // see: https://github.com/google/UIforETW/blob/main/UIforETW/CPUFrequency.cpp
  //      https://github.com/google/UIforETW/blob/main/UIforETW/SpinALot64.asm
  base::ElapsedTimer timer;
  const int kAmountOfIterations = 50000;
  const int kAmountOfInstructions = 10;
  for (int i = 0; i < kAmountOfIterations; ++i) {
    __asm__ __volatile__(
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        "addl  %%eax, %%eax\n"
        :
        :
        : "eax");
  }

  const base::TimeDelta elapsed = timer.Elapsed();
  const double estimated_frequency =
      (kAmountOfIterations * kAmountOfInstructions) / elapsed.InSecondsF();
  return estimated_frequency;
}
#endif

}  // namespace

namespace base {

SpeedLimitObserverWin::SpeedLimitObserverWin(
    SpeedLimitUpdateCallback speed_limit_update_callback)
    : callback_(std::move(speed_limit_update_callback)),
      num_cpus_(static_cast<size_t>(SysInfo::NumberOfProcessors())),
      moving_average_(kMovingAverageWindowSize) {
  DVLOG(1) << __func__ << "(num_CPUs=" << num_cpus() << ")";
  timer_.Start(FROM_HERE, kSampleInterval, this,
               &SpeedLimitObserverWin::OnTimerTick);
}

SpeedLimitObserverWin::~SpeedLimitObserverWin() {
  timer_.Stop();
}

int SpeedLimitObserverWin::GetCurrentSpeedLimit() {
  const int kSpeedLimitMax = PowerThermalObserver::kSpeedLimitMax;

  int idleness_percent = 0;
  if (!GetCPUIdleness(&idleness_percent)) {
    DLOG(WARNING) << "GetCPUIdleness failed";
    return kSpeedLimitMax;
  }

  // Get the latest estimated throttling level (value between 0.0 and 1.0).
  float throttling_level = EstimateThrottlingLevel();

  // Emit trace events to investigate issues with power throttling. Run this
  // block only if tracing is running to avoid executing expensive calls to
  // EstimateCpuFrequency(...).
  bool trace_events_enabled;
  TRACE_EVENT_CATEGORY_GROUP_ENABLED(kPowerTraceCategory,
                                     &trace_events_enabled);
  if (trace_events_enabled) {
    TRACE_COUNTER1(kPowerTraceCategory, "idleness", idleness_percent);
    TRACE_COUNTER1(kPowerTraceCategory, "throttling_level",
                   static_cast<unsigned int>(throttling_level * 100));

#if defined(ARCH_CPU_X86_FAMILY)
    double cpu_frequency = EstimateCpuFrequency();
    TRACE_COUNTER1(kPowerTraceCategory, "frequency_mhz",
                   static_cast<unsigned int>(cpu_frequency / 1'000'000));
#endif
  }

  // Ignore the value if the global idleness is above 90% or throttling value
  // is very small. This approach avoids false alarms and removes noise from the
  // measurements.
  if (idleness_percent > 90 || throttling_level < 0.1f) {
    moving_average_.Reset();
    return kSpeedLimitMax;
  }

  // The speed limit metric is a value between 0 and 100 [%] where 100 means
  // "full speed". The corresponding UMA metric is CPU_Speed_Limit.
  float speed_limit_factor = 1.0f - throttling_level;
  int speed_limit =
      static_cast<int>(std::ceil(kSpeedLimitMax * speed_limit_factor));

  // The previous speed-limit value was below 100 but the new value is now back
  // at max again. To make this state more "stable or sticky" we reset the MA
  // filter and return kSpeedLimitMax. As a result, single drops in speedlimit
  // values will not result in a value less than 100 since the MA filter must
  // be full before we start to produce any output.
  if (speed_limit_ < kSpeedLimitMax && speed_limit == kSpeedLimitMax) {
    moving_average_.Reset();
    return kSpeedLimitMax;
  }

  // Add the latest speed-limit value [0,100] to the MA filter and return its
  // output after ensuring that the filter is full. We do this to avoid initial
  // false alarms at startup and after calling Reset() on the filter.
  moving_average_.AddSample(speed_limit);
  if (moving_average_.Count() < kMovingAverageWindowSize) {
    return kSpeedLimitMax;
  }
  return moving_average_.Mean();
}

void SpeedLimitObserverWin::OnTimerTick() {
  // Get the latest (filtered) speed-limit estimate and trigger a new callback
  // if the new value is different from the last.
  const int speed_limit = GetCurrentSpeedLimit();
  if (speed_limit != speed_limit_) {
    speed_limit_ = speed_limit;
    callback_.Run(speed_limit_);
  }

  TRACE_COUNTER1(kPowerTraceCategory, "speed_limit",
                 static_cast<unsigned int>(speed_limit));
}

float SpeedLimitObserverWin::EstimateThrottlingLevel() {
  float throttling_level = 0.f;

  // Populate the PROCESSOR_POWER_INFORMATION structures for all logical CPUs
  // using the CallNtPowerInformation API.
  std::vector<PROCESSOR_POWER_INFORMATION> info(num_cpus());
  if (!NT_SUCCESS(CallNtPowerInformation(
          ProcessorInformation, nullptr, 0, &info[0],
          static_cast<ULONG>(sizeof(PROCESSOR_POWER_INFORMATION) *
                             num_cpus())))) {
    return throttling_level;
  }

  // Estimate the level of throttling by measuring how many CPUs that are not
  // in idle state and how "far away" they are from the most idle state. Local
  // tests have shown that `MaxIdleState` is typically 2 or 3 and
  //
  // `CurrentIdleState` switches to 2 or 1 when some sort of throttling starts
  // to take place. The Intel Extreme Tuning Utility application has been used
  // to monitor when any type of throttling (thermal, power-limit, PMAX etc)
  // starts.
  //
  // `CurrentIdleState` contains the CPU C-State + 1. When `MaxIdleState` is
  // 1, the `CurrentIdleState` will always be 0 and the C-States are not
  // supported.
  int num_non_idle_cpus = 0;
  int num_active_cpus = 0;
  float load_fraction_total = 0.0;
  for (size_t i = 0; i < num_cpus(); ++i) {
    // Amount of "non-idleness" is the distance from the max idle state.
    const auto idle_diff = info[i].MaxIdleState - info[i].CurrentIdleState;
    // Derive a value between 0.0 and 1.0 where 1.0 corresponds to max load on
    // CPU#i.
    // Example: MaxIdleState=2, CurrentIdleState=1 => (2 - 1) / 2 = 0.5.
    // Example: MaxIdleState=2, CurrentIdleState=2 => (2 - 2) / 2 = 1.0.
    // Example: MaxIdleState=3, CurrentIdleState=1 => (3 - 1) / 3 = 0.6666.
    // Example: MaxIdleState=3, CurrentIdleState=2 => (3 - 2) / 3 = 0.3333.
    const float load_fraction =
        static_cast<float>(idle_diff) / info[i].MaxIdleState;
    // Accumulate the total load for all CPUs.
    load_fraction_total += load_fraction;
    // Used for a sanity check only.
    num_non_idle_cpus += (info[i].CurrentIdleState < info[i].MaxIdleState);

    // Count the amount of CPU that are in the C0 state (active). If
    // `MaxIdleState` is 1, C-states are not supported and we consider the CPU
    // is active.
    if (info[i].MaxIdleState == 1 || info[i].CurrentIdleState == 1) {
      num_active_cpus++;
    }
  }

  DCHECK_LE(load_fraction_total, static_cast<float>(num_non_idle_cpus))
      << " load_fraction_total: " << load_fraction_total
      << " num_non_idle_cpus:" << num_non_idle_cpus;
  throttling_level = (load_fraction_total / num_cpus());

  TRACE_COUNTER1(kPowerTraceCategory, "num_active_cpus", num_active_cpus);

  return throttling_level;
}

}  // namespace base