File: PerfContextSwitchDecoder.cpp

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (332 lines) | stat: -rw-r--r-- 11,950 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
//===-- PerfContextSwitchDecoder.cpp --======------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "PerfContextSwitchDecoder.h"
#include <optional>

using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::trace_intel_pt;
using namespace llvm;

/// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on
/// non-linux platforms.
/// \{
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)

#define PERF_RECORD_LOST 2
#define PERF_RECORD_THROTTLE 5
#define PERF_RECORD_UNTHROTTLE 6
#define PERF_RECORD_LOST_SAMPLES 13
#define PERF_RECORD_SWITCH_CPU_WIDE 15
#define PERF_RECORD_MAX 19

struct perf_event_header {
  uint32_t type;
  uint16_t misc;
  uint16_t size;

  /// \return
  ///   An \a llvm::Error if the record looks obviously wrong, or \a
  ///   llvm::Error::success() otherwise.
  Error SanityCheck() const {
    // The following checks are based on visual inspection of the records and
    // enums in
    // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h
    // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records
    // hold.

    // A record of too many uint64_t's or more should mean that the data is
    // wrong
    const uint64_t max_valid_size_bytes = 8000;
    if (size == 0 || size > max_valid_size_bytes)
      return createStringError(
          inconvertibleErrorCode(),
          formatv("A record of {0} bytes was found.", size));

    // We add some numbers to PERF_RECORD_MAX because some systems might have
    // custom records. In any case, we are looking only for abnormal data.
    if (type >= PERF_RECORD_MAX + 100)
      return createStringError(
          inconvertibleErrorCode(),
          formatv("Invalid record type {0} was found.", type));
    return Error::success();
  }

  bool IsContextSwitchRecord() const {
    return type == PERF_RECORD_SWITCH_CPU_WIDE;
  }

  bool IsErrorRecord() const {
    return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE ||
           type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES;
  }
};
/// \}

/// Record found in the perf_event context switch traces. It might contain
/// additional fields in memory, but header.size should have the actual size
/// of the record.
struct PerfContextSwitchRecord {
  struct perf_event_header header;
  uint32_t next_prev_pid;
  uint32_t next_prev_tid;
  uint32_t pid, tid;
  uint64_t time_in_nanos;

  bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
};

/// Record produced after parsing the raw context switch trace produce by
/// perf_event. A major difference between this struct and
/// PerfContextSwitchRecord is that this one uses tsc instead of nanos.
struct ContextSwitchRecord {
  uint64_t tsc;
  /// Whether the switch is in or out
  bool is_out;
  /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally
  /// runs after a context switch out of a normal user thread.
  lldb::pid_t pid;
  lldb::tid_t tid;

  bool IsOut() const { return is_out; }

  bool IsIn() const { return !is_out; }
};

uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const {
  switch (variant) {
  case Variant::Complete:
    return tscs.complete.start;
  case Variant::OnlyStart:
    return tscs.only_start.start;
  case Variant::OnlyEnd:
    return tscs.only_end.end;
  case Variant::HintedEnd:
    return tscs.hinted_end.start;
  case Variant::HintedStart:
    return tscs.hinted_start.end;
  }
}

uint64_t ThreadContinuousExecution::GetStartTSC() const {
  switch (variant) {
  case Variant::Complete:
    return tscs.complete.start;
  case Variant::OnlyStart:
    return tscs.only_start.start;
  case Variant::OnlyEnd:
    return 0;
  case Variant::HintedEnd:
    return tscs.hinted_end.start;
  case Variant::HintedStart:
    return tscs.hinted_start.hinted_start;
  }
}

uint64_t ThreadContinuousExecution::GetEndTSC() const {
  switch (variant) {
  case Variant::Complete:
    return tscs.complete.end;
  case Variant::OnlyStart:
    return std::numeric_limits<uint64_t>::max();
  case Variant::OnlyEnd:
    return tscs.only_end.end;
  case Variant::HintedEnd:
    return tscs.hinted_end.hinted_end;
  case Variant::HintedStart:
    return tscs.hinted_start.end;
  }
}

ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
    lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
    uint64_t end) {
  ThreadContinuousExecution o(cpu_id, tid, pid);
  o.variant = Variant::Complete;
  o.tscs.complete.start = start;
  o.tscs.complete.end = end;
  return o;
}

ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
    lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid,
    uint64_t hinted_start, uint64_t end) {
  ThreadContinuousExecution o(cpu_id, tid, pid);
  o.variant = Variant::HintedStart;
  o.tscs.hinted_start.hinted_start = hinted_start;
  o.tscs.hinted_start.end = end;
  return o;
}

ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
    lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
    uint64_t hinted_end) {
  ThreadContinuousExecution o(cpu_id, tid, pid);
  o.variant = Variant::HintedEnd;
  o.tscs.hinted_end.start = start;
  o.tscs.hinted_end.hinted_end = hinted_end;
  return o;
}

ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
    lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) {
  ThreadContinuousExecution o(cpu_id, tid, pid);
  o.variant = Variant::OnlyEnd;
  o.tscs.only_end.end = end;
  return o;
}

ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
    lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) {
  ThreadContinuousExecution o(cpu_id, tid, pid);
  o.variant = Variant::OnlyStart;
  o.tscs.only_start.start = start;
  return o;
}

static Error RecoverExecutionsFromConsecutiveRecords(
    cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion,
    const ContextSwitchRecord &current_record,
    const std::optional<ContextSwitchRecord> &prev_record,
    std::function<void(const ThreadContinuousExecution &execution)>
        on_new_execution) {
  if (!prev_record) {
    if (current_record.IsOut()) {
      on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
          cpu_id, current_record.tid, current_record.pid, current_record.tsc));
    }
    // The 'in' case will be handled later when we try to look for its end
    return Error::success();
  }

  const ContextSwitchRecord &prev = *prev_record;
  if (prev.tsc >= current_record.tsc)
    return createStringError(
        inconvertibleErrorCode(),
        formatv("A context switch record doesn't happen after the previous "
                "record. Previous TSC= {0}, current TSC = {1}.",
                prev.tsc, current_record.tsc));

  if (current_record.IsIn() && prev.IsIn()) {
    // We found two consecutive ins, which means that we didn't capture
    // the end of the previous execution.
    on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
        cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
  } else if (current_record.IsOut() && prev.IsOut()) {
    // We found two consecutive outs, that means that we didn't capture
    // the beginning of the current execution.
    on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
        cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
        current_record.tsc));
  } else if (current_record.IsOut() && prev.IsIn()) {
    if (current_record.pid == prev.pid && current_record.tid == prev.tid) {
      /// A complete execution
      on_new_execution(ThreadContinuousExecution::CreateCompleteExecution(
          cpu_id, current_record.tid, current_record.pid, prev.tsc,
          current_record.tsc));
    } else {
      // An out after the in of a different thread. The first one doesn't
      // have an end, and the second one doesn't have a start.
      on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
          cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
      on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
          cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
          current_record.tsc));
    }
  }
  return Error::success();
}

Expected<std::vector<ThreadContinuousExecution>>
lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace(
    ArrayRef<uint8_t> data, cpu_id_t cpu_id,
    const LinuxPerfZeroTscConversion &tsc_conversion) {

  std::vector<ThreadContinuousExecution> executions;

  // This offset is used to create the error message in case of failures.
  size_t offset = 0;

  auto do_decode = [&]() -> Error {
    std::optional<ContextSwitchRecord> prev_record;
    while (offset < data.size()) {
      const perf_event_header &perf_record =
          *reinterpret_cast<const perf_event_header *>(data.data() + offset);
      if (Error err = perf_record.SanityCheck())
        return err;

      if (perf_record.IsContextSwitchRecord()) {
        const PerfContextSwitchRecord &context_switch_record =
            *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
                                                               offset);
        ContextSwitchRecord record{
            tsc_conversion.ToTSC(context_switch_record.time_in_nanos),
            context_switch_record.IsOut(),
            static_cast<lldb::pid_t>(context_switch_record.pid),
            static_cast<lldb::tid_t>(context_switch_record.tid)};

        if (Error err = RecoverExecutionsFromConsecutiveRecords(
                cpu_id, tsc_conversion, record, prev_record,
                [&](const ThreadContinuousExecution &execution) {
                  executions.push_back(execution);
                }))
          return err;

        prev_record = record;
      }
      offset += perf_record.size;
    }

    // We might have an incomplete last record
    if (prev_record && prev_record->IsIn())
      executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution(
          cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc));
    return Error::success();
  };

  if (Error err = do_decode())
    return createStringError(inconvertibleErrorCode(),
                             formatv("Malformed perf context switch trace for "
                                     "cpu {0} at offset {1}. {2}",
                                     cpu_id, offset, toString(std::move(err))));

  return executions;
}

Expected<std::vector<uint8_t>>
lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace(
    llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) {
  size_t offset = 0;
  std::vector<uint8_t> out_data;

  while (offset < data.size()) {
    const perf_event_header &perf_record =
        *reinterpret_cast<const perf_event_header *>(data.data() + offset);
    if (Error err = perf_record.SanityCheck())
      return std::move(err);
    bool should_copy = false;
    if (perf_record.IsContextSwitchRecord()) {
      const PerfContextSwitchRecord &context_switch_record =
          *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
                                                             offset);
      if (pids.count(context_switch_record.pid))
        should_copy = true;
    } else if (perf_record.IsErrorRecord()) {
      should_copy = true;
    }

    if (should_copy) {
      for (size_t i = 0; i < perf_record.size; i++) {
        out_data.push_back(data[offset + i]);
      }
    }

    offset += perf_record.size;
  }
  return out_data;
}