1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
|
// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Class to represent the symbol map. The symbol map is a map from
// symbol names to the symbol class.
// This class is thread-safe.
#ifndef AUTOFDO_SYMBOL_MAP_H_
#define AUTOFDO_SYMBOL_MAP_H_
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "addr2line.h"
#include "base/common.h"
#include "base/macros.h"
#include "source_info.h"
// Macros from gcc (profile.c)
#define NUM_GCOV_WORKING_SETS 128
#define WORKING_SET_INSN_PER_BB 10
namespace autofdo {
typedef std::map<string, uint64> CallTargetCountMap;
typedef std::pair<string, uint64> TargetCountPair;
typedef std::vector<TargetCountPair> TargetCountPairs;
class Addr2line;
/* Struct from gcc (basic-block.h).
Working set size statistics for a given percentage of the entire
profile (sum_all from the counter summary). */
struct gcov_working_set_info {
public:
gcov_working_set_info() : num_counters(0), min_counter(0) {}
/* Number of hot counters included in this working set. */
uint32 num_counters;
/* Smallest counter included in this working set. */
uint64 min_counter;
};
// Returns a sorted vector of target_count pairs. target_counts is a pointer
// to an empty vector in which the output will be stored.
// Sorting is based on count in descending order.
void GetSortedTargetCountPairs(const CallTargetCountMap &call_target_count_map,
TargetCountPairs *target_counts);
// Represents profile information of a given source.
class ProfileInfo {
public:
ProfileInfo() : count(0), num_inst(0) {}
ProfileInfo &operator+=(const ProfileInfo &other);
uint64 count;
uint64 num_inst;
CallTargetCountMap target_map;
};
// Map from source stack to profile,
// TODO(dehao): deprecate this when old profile format is deprecated.
typedef std::map<const SourceStack, ProfileInfo> SourceStackCountMap;
// Map from a source location (represented by offset+discriminator) to profile.
typedef std::map<uint32, ProfileInfo> PositionCountMap;
// callsite_location, callee_name
typedef std::pair<uint32, const char *> Callsite;
struct CallsiteLess {
bool operator()(const Callsite& c1, const Callsite& c2) const {
if (c1.first != c2.first)
return c1.first < c2.first;
if ((c1.second == NULL || c2.second == NULL))
return c1.second < c2.second;
return strcmp(c1.second, c2.second) < 0;
}
};
class Symbol;
// Map from a callsite to the callee symbol.
typedef std::map<Callsite, Symbol *, CallsiteLess> CallsiteMap;
// Contains information about a specific symbol.
// There are two types of symbols:
// 1. Actual symbol: the symbol exists in the binary as a standalone function.
// It has the begin_address and end_address, and its name
// is always full assembler name.
// 2. Inlined symbol: the symbol is cloned in another function. It does not
// have the begin_address and end_address, and its name
// could be a short bfd_name.
class Symbol {
public:
// This constructor is used to create inlined symbol.
Symbol(const char *name, const char *dir, const char *file, uint32 start)
: info(SourceInfo(name, dir, file, start, 0, 0)),
total_count(0), head_count(0) {}
// This constructor is used to create aliased symbol.
Symbol(const Symbol *src, const char *new_func_name)
: info(src->info), total_count(src->total_count),
head_count(src->head_count) {
info.func_name = new_func_name;
}
Symbol() : total_count(0), head_count(0) {}
~Symbol();
static string Name(const char *name) {
return (name && strlen(name) > 0) ? name : "noname";
}
string name() const {
return Name(info.func_name);
}
// Merges profile stored in src symbol with this symbol.
void Merge(const Symbol *src);
// Returns the module name of the symbol. Module name is the source file
// that the symbol belongs to. It is an attribute of the actual symbol.
string ModuleName() const;
// Returns true if the symbol is from a header file.
bool IsFromHeader() const;
// Dumps content of the symbol with a give indentation.
void Dump(int indent) const;
// Returns the max of pos_counts and callsites' pos_counts.
uint64 MaxPosCallsiteCount() const;
// Source information about the the symbol (func_name, file_name, etc.)
SourceInfo info;
// The total sampled count.
uint64 total_count;
// The total sampled count in the head bb.
uint64 head_count;
// Map from callsite location to callee symbol.
CallsiteMap callsites;
// Map from source location to count and instruction number.
PositionCountMap pos_counts;
};
// Maps function name to actual symbol. (Top level map).
typedef map<string, Symbol *> NameSymbolMap;
// Maps symbol's start address to its name and size.
typedef std::map<uint64, std::pair<string, uint64> > AddressSymbolMap;
// Maps from symbol's name to its start address.
typedef std::map<string, uint64> NameAddressMap;
// Maps function name to alias names.
typedef map<string, set<string> > NameAliasMap;
// SymbolMap stores the symbols in the binary, and maintains
// a map from symbol name to its related information.
class SymbolMap {
public:
explicit SymbolMap(const string &binary)
: binary_(binary),
base_addr_(0),
count_threshold_(0),
use_discriminator_encoding_(false),
ignore_thresholds_(false) {
if (!binary.empty()) {
BuildSymbolMap();
BuildNameAddressMap();
}
}
SymbolMap()
: base_addr_(0),
count_threshold_(0),
use_discriminator_encoding_(false) {}
~SymbolMap();
uint64 size() const {
return map_.size();
}
void set_count_threshold(int64 n) {count_threshold_ = n;}
int64 count_threshold() const {return count_threshold_;}
// Returns true if the count is large enough to be emitted.
bool ShouldEmit(int64 count) const {
if (ignore_thresholds_) return true;
CHECK_GT(count_threshold_, 0);
return count > count_threshold_;
}
// Caculates sample threshold from given total count.
void CalculateThresholdFromTotalCount(int64 total_count);
// Caculates sample threshold from symbol map.
// All symbols should have been counted.
void CalculateThreshold();
// Returns relocation start address.
uint64 base_addr() const {
return base_addr_;
}
void set_use_discriminator_encoding(bool v) {
use_discriminator_encoding_ = v;
}
void set_ignore_thresholds(bool v) {
ignore_thresholds_ = v;
}
void set_addr2line(std::unique_ptr<Addr2line> addr2line) {
addr2line_ = std::move(addr2line);
}
Addr2line *get_addr2line() const { return addr2line_.get(); }
// Adds an empty named symbol.
void AddSymbol(const string &name);
const NameSymbolMap &map() const {
return map_;
}
NameSymbolMap &map() {
return map_;
}
const gcov_working_set_info *GetWorkingSets() const {
return working_set_;
}
uint64 GetSymbolStartAddr(const string &name) const {
const auto &iter = name_addr_map_.find(name);
if (iter == name_addr_map_.end()) {
return 0;
}
return iter->second;
}
void UpdateWorkingSet(int i, uint32 num_counters, uint64 min_counter) {
if (working_set_[i].num_counters == 0) {
working_set_[i].num_counters = num_counters;
} else {
// This path only happens during profile merge.
// Different profiles will have similar num_counters, so calculating
// average for each iteration will no lose much precision.
working_set_[i].num_counters =
(working_set_[i].num_counters + num_counters) / 2;
}
working_set_[i].min_counter += min_counter;
}
const Symbol *GetSymbolByName(const string &name) const {
NameSymbolMap::const_iterator ret = map_.find(name);
if (ret != map_.end()) {
return ret->second;
} else {
return NULL;
}
}
// Merges symbols with suffixes like .isra, .part as a single symbol.
void Merge();
// Increments symbol's entry count.
void AddSymbolEntryCount(const string &symbol, uint64 count);
typedef enum {INVALID = 1, SUM, MAX} Operation;
// Increments source stack's count.
// symbol: name of the symbol in which source is located.
// source: source location (in terms of inlined source stack).
// count: total sampled count.
// num_inst: number of instructions that is mapped to the source.
// op: operation used to calculate count (SUM or MAX).
void AddSourceCount(const string &symbol, const SourceStack &source,
uint64 count, uint64 num_inst, Operation op);
// Adds the indirect call target to source stack.
// symbol: name of the symbol in which source is located.
// source: source location (in terms of inlined source stack).
// target: indirect call target.
// count: total sampled count.
// Returns false if we failed to add the call target.
bool AddIndirectCallTarget(const string &symbol, const SourceStack &source,
const string &target, uint64 count);
// Traverses the inline stack in source, update the symbol map by adding
// count to the total count in the inlined symbol. Returns the leaf symbol. If
// the inline stack is empty, returns nullptr without any other updates.
Symbol *TraverseInlineStack(const string &symbol, const SourceStack &source,
uint64 count);
// Updates function name, start_addr, end_addr of a function that has a
// given address. Returns false if no such symbol exists.
const bool GetSymbolInfoByAddr(uint64 addr, const string **name,
uint64 *start_addr, uint64 *end_addr) const;
// Returns a pointer to the symbol name for a given start address. Returns
// NULL if no such symbol exists.
const string *GetSymbolNameByStartAddr(uint64 address) const;
// Returns the overlap between two symbol maps. For two profiles, if
// count_i_j denotes the function count of the ith function in profile j;
// total_j denotes the total count of all functions in profile j. Then
// overlap = sum(min(count_i_1/total_1, count_i_2/total_2))
float Overlap(const SymbolMap &map) const;
// Iterates the address count map to calculate the working set of the profile.
// Working set is a map from bucket_num to total number of instructions that
// consumes bucket_num/NUM_GCOV_WORKING_SETS of dynamic instructions. This
// mapping indicates how large is the dynamic hot code region during run time.
//
// To compute working set, the following algorithm is used:
//
// Input: map from instruction to execution count.
// Output: working set.
// 1. compute histogram: map (execution count --> number of instructions)
// 2. traverse the histogram in decending order
// 2.1 calculate accumulated_count.
// 2.2 compute the working set bucket number.
// 2.3 update the working set bucket from last update to calculated bucket
// number.
void ComputeWorkingSets();
// Traverses all symbols that has been sampled (appears in sampled_functions).
// Uses addr2line to derive symbol's source info and update the symbol.
void UpdateSymbolMap(const Addr2line *addr2line,
const std::map<uint64, uint64> &sampled_functions);
// Returns a map from start addresses of functions that have been sampled to
// the size of the function.
std::map<uint64, uint64> GetSampledSymbolStartAddressSizeMap(
const std::set<uint64> &sampled_addrs) const;
// Returns a map from start addresses of functions that have been sampled in
// the old profile that has already been loaded, to the size of the function.
// This function is used by profile_update, which takes the old profile as
// input, and use the debug/module info in the new binary to update the old
// profile's module info. For the efficiency consideration, we only need to
// read debug info for the symbols that has been sampled in the old profile.
std::map<uint64, uint64> GetLegacySymbolStartAddressSizeMap() const;
void Dump() const;
void DumpFuncLevelProfileCompare(const SymbolMap &map) const;
void AddAlias(const string& sym, const string& alias);
// Validates if the current symbol map is sane.
bool Validate() const;
private:
// Reads from the binary's elf section to build the symbol map.
void BuildSymbolMap();
// Reads from address_symbol_map_ and update name_addr_map_.
void BuildNameAddressMap() {
for (const auto &addr_symbol : address_symbol_map_) {
name_addr_map_[addr_symbol.second.first] = addr_symbol.first;
}
}
NameSymbolMap map_;
NameAliasMap name_alias_map_;
NameAddressMap name_addr_map_;
AddressSymbolMap address_symbol_map_;
const string binary_;
uint64 base_addr_;
int64 count_threshold_;
bool use_discriminator_encoding_;
bool ignore_thresholds_;
std::unique_ptr<Addr2line> addr2line_;
/* working_set_[i] stores # of instructions that consumes
i/NUM_GCOV_WORKING_SETS of total instruction counts. */
gcov_working_set_info working_set_[NUM_GCOV_WORKING_SETS];
DISALLOW_COPY_AND_ASSIGN(SymbolMap);
};
} // namespace autofdo
#endif // AUTOFDO_SYMBOL_MAP_H_
|