File: dwarf_line_no.cc

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
links: PTS, VCS
area: main
in suites: bullseye
size: 6,112,112 kB
sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (1363 lines) | stat: -rw-r--r-- 45,565 bytes
parent folder | download | duplicates (2)
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/debug/dwarf_line_no.h"

#include "base/memory/raw_ref.h"

#ifdef USE_SYMBOLIZE
#include <algorithm>
#include <cstdint>
#include <limits>

#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "base/debug/buffered_dwarf_reader.h"
#include "base/debug/stack_trace.h"
#include "base/memory/raw_ptr.h"
#include "base/third_party/symbolize/symbolize.h"

namespace base {
namespace debug {

namespace {

constexpr uint64_t kMaxOffset = std::numeric_limits<uint64_t>::max();

// These numbers are suitable for most compilation units for chrome and
// content_shell. If a compilation unit has bigger number of directories or
// filenames, the additional directories/filenames will be ignored, and the
// stack frames pointing to these directories/filenames will not get line
// numbers. We can't set these numbers too big because they affect the size of
// ProgramInfo which is allocated in the stack.
constexpr int kMaxDirectories = 128;
constexpr size_t kMaxFilenames = 512;

// DWARF-4 line number program header, section 6.2.4
struct ProgramInfo {
  uint64_t header_length;
  uint64_t start_offset;
  uint64_t end_offset;
  uint8_t minimum_instruction_length;
  uint8_t maximum_operations_per_instruction;
  uint8_t default_is_stmt;
  int8_t line_base;
  uint8_t line_range;
  uint8_t opcode_base;
  uint8_t standard_opcode_lengths[256];
  uint8_t include_directories_table_offset;
  uint8_t file_names_table_offset;

  // Store the directories as offsets.
  int num_directories = 1;
  uint64_t directory_offsets[kMaxDirectories];
  uint64_t directory_sizes[kMaxDirectories];

  // Store the file number table offsets.
  mutable unsigned int num_filenames = 1;
  mutable uint64_t filename_offsets[kMaxFilenames];
  mutable uint8_t filename_dirs[kMaxFilenames];

  unsigned int OpcodeToAdvance(uint8_t adjusted_opcode) const {
    // Special opcodes advance line numbers by an amount based on line_range
    // and opcode_base. This calculation is taken from 6.2.5.1.
    return static_cast<unsigned int>(adjusted_opcode) / line_range;
  }
};

// DWARF-4 line number program registers, section 6.2.2
struct LineNumberRegisters {
  // During the line number program evaluation, some instructions perform a
  // "commit" which is when the registers have finished calculating a new row in
  // the line-number table. This callback is executed and can be viewed as a
  // iterator over all rows in the line number table.
  class OnCommit {
   public:
    virtual void Do(LineNumberRegisters* registers) = 0;
  };

  raw_ptr<OnCommit> on_commit;
  LineNumberRegisters(ProgramInfo info, OnCommit* on_commit)
      : on_commit(on_commit), is_stmt(info.default_is_stmt) {}

  // Current program counter.
  uintptr_t address = 0;

  // For VLIW architectures, the index of the operation in the VLIW instruction.
  unsigned int op_index = 0;

  // Identifies the source file relating to the address in the DWARF File name
  // table.
  uint64_t file = 0;

  // Identifies the line number. Starts at 1. Can become 0 if instruction does
  // not match any line in the file.
  uint64_t line = 1;

  // Identifies the column within the source line. Starts at 1 though "0"
  // also means "left edge" of the line.
  uint64_t column = 0;

  // Boolean determining if this is a recommended spot for a breakpoint.
  // Should be initialized by the program header.
  bool is_stmt = false;

  // Indicates start of a basic block.
  bool basic_block = false;

  // Indicates first byte after a sequence of machine instructions.
  bool end_sequence = false;

  // Indicates this may be where execution should stop if trying to break for
  // entering a function.
  bool prologue_end = false;

  // Indicates this may be where execution should stop if trying to break for
  // exiting a function.
  bool epilogue_begin = false;

  // Identifier for the instruction set of the current address.
  uint64_t isa = 0;

  // Identifies which block the current instruction belongs to.
  uint64_t discriminator = 0;

  // Values from the previously committed line. See OnCommit interface for more
  // details. This conceptually should be a copy of the whole
  // LineNumberRegisters but since only 4 pieces of data are needed, hacking
  // it inline was easier.
  uintptr_t last_address = 0;
  uint64_t last_file = 0;
  uint64_t last_line = 0;
  uint64_t last_column = 0;

  // This is the magical calculation for decompressing the line-number
  // information. The `program_info` provides the parameters for the formula
  // and the `op_advance` is the input value. See DWARF-4 sections 6.2.5.1 for
  // the formula.
  void OpAdvance(const ProgramInfo* program_info, uint64_t op_advance) {
    address += program_info->minimum_instruction_length *
               ((op_index + op_advance) /
                program_info->maximum_operations_per_instruction);

    op_index = (op_index + op_advance) %
               program_info->maximum_operations_per_instruction;
  }

  // Committing a line means the calculation has landed on a stable set of
  // values that represent an actual entry in the line number table.
  void CommitLine() {
    on_commit->Do(this);

    // Inlined or compiler generator code may have line number 0 which isn't
    // useful to the user. Better to go up one line number.
    if (line != 0) {
      last_address = address;
      last_file = file;
      last_column = column;
      last_line = line;
    }
  }
};

struct LineNumberInfo {
  uint64_t pc = 0;
  uint64_t line = 0;
  uint64_t column = 0;

  // Offsets here are to the file table and directory table arrays inside the
  // ProgramInfo.
  uint64_t module_dir_offset = 0;
  uint64_t dir_size = 0;
  uint64_t module_filename_offset = 0;
};

// Evaluates a Line Number Program as defined by the rules in section 6.2.5.
void EvaluateLineNumberProgram(const int fd,
                               LineNumberInfo* info,
                               uint64_t base_address,
                               uint64_t start,
                               const ProgramInfo& program_info) {
  BufferedDwarfReader reader(fd, start);
  uint64_t module_relative_pc = info->pc - base_address;

  // Helper that records the line-number table entry corresponding with the
  // `module_relative_pc`. This is the thing that actually finds the line
  // number for an address.
  struct OnCommitImpl : public LineNumberRegisters::OnCommit {
   private:
    raw_ptr<LineNumberInfo> info;
    uint64_t module_relative_pc;
    const raw_ref<const ProgramInfo> program_info;

   public:
    OnCommitImpl(LineNumberInfo* info,
                 uint64_t module_relative_pc,
                 const ProgramInfo& program_info)
        : info(info),
          module_relative_pc(module_relative_pc),
          program_info(program_info) {}

    void Do(LineNumberRegisters* registers) override {
      // When a line is committed, the program counter needs to check if it is
      // in the [last_address, cur_addres) range. If yes, then the line pertains
      // to the program counter.
      if (registers->last_address == 0) {
        // This is the first table entry so by definition, nothing is in its
        // range.
        return;
      }

      // If module_relative_pc is out of range, skip.
      if (module_relative_pc < registers->last_address ||
          module_relative_pc >= registers->address)
        return;

      if (registers->last_file < program_info->num_filenames) {
        info->line = registers->last_line;
        info->column = registers->last_column;

        // Since DW_AT_name in the compile_unit is optional, it may be empty. If
        // it is, guess that the file in entry 1 is the name. This does not
        // follow spec, but seems to be common behavior. See the following LLVM
        // bug for more info: https://reviews.llvm.org/D11003
        if (registers->last_file == 0 &&
            program_info->filename_offsets[0] == 0 &&
            1 < program_info->num_filenames) {
          program_info->filename_offsets[0] = program_info->filename_offsets[1];
          program_info->filename_dirs[0] = program_info->filename_dirs[1];
        }

        if (registers->last_file < kMaxFilenames) {
          info->module_filename_offset =
              program_info->filename_offsets[registers->last_file];

          uint8_t dir = program_info->filename_dirs[registers->last_file];
          info->module_dir_offset = program_info->directory_offsets[dir];
          info->dir_size = program_info->directory_sizes[dir];
        }
      }
    }
  } on_commit(info, module_relative_pc, program_info);

  LineNumberRegisters registers(program_info, &on_commit);

  // Special opcode range is [program_info.opcode_base, 255].
  // Lines can be max incremented by [line_base + line range - 1].
  // opcode = (desired line increment - line_base) + (line_range * operation
  // advance) + opcode_base.
  uint8_t opcode;
  while (reader.position() < program_info.end_offset && info->line == 0) {
    if (!reader.ReadInt8(opcode))
      return;

    // It's SPECIAL OPCODE TIME!. They're so special that they make up the
    // vast majority of the opcodes and are the first thing described in the
    // documentation.
    //
    // See DWARF-4 spec 6.2.5.1.
    if (opcode >= program_info.opcode_base) {
      uint8_t adjusted_opcode = opcode - program_info.opcode_base;
      registers.OpAdvance(&program_info,
                          program_info.OpcodeToAdvance(adjusted_opcode));
      const int line_adjust =
          program_info.line_base + (adjusted_opcode % program_info.line_range);
      if (line_adjust < 0) {
        if (static_cast<uint64_t>(-line_adjust) > registers.line)
          return;
        registers.line -= static_cast<uint64_t>(-line_adjust);
      } else {
        registers.line += static_cast<uint64_t>(line_adjust);
      }
      registers.basic_block = false;
      registers.prologue_end = false;
      registers.epilogue_begin = false;
      registers.discriminator = 0;
      registers.CommitLine();
    } else {
      // Standard opcodes
      switch (opcode) {
        case 0: {
          // Extended opcode.
          uint64_t extended_opcode;
          uint64_t extended_opcode_length;
          if (!reader.ReadLeb128(extended_opcode_length))
            return;
          uint64_t next_opcode = reader.position() + extended_opcode_length;
          if (!reader.ReadLeb128(extended_opcode))
            return;
          switch (extended_opcode) {
            case 1: {
              // DW_LNE_end_sequence
              registers.end_sequence = true;
              registers.CommitLine();
              registers = LineNumberRegisters(program_info, &on_commit);
              break;
            }

            case 2: {
              // DW_LNE_set_address
              uint32_t value;
              if (!reader.ReadInt32(value))
                return;
              registers.address = value;
              registers.op_index = 0;
              break;
            }

            case 3: {
              // DW_LNE_define_file
              //
              // This should only get used if the filename table itself is null.
              // Record the module offset for the string and then drop the data.
              uint64_t filename_offset = reader.position();
              reader.ReadCString(program_info.end_offset, nullptr, 0);

              // dir index
              uint64_t value;
              if (!reader.ReadLeb128(value))
                return;
              size_t cur_filename = program_info.num_filenames;
              if (cur_filename < kMaxFilenames && value < kMaxDirectories) {
                ++program_info.num_filenames;
                // Store the offset from the start of file and skip the data to
                // save memory.
                program_info.filename_offsets[cur_filename] = filename_offset;
                program_info.filename_dirs[cur_filename] =
                    static_cast<uint8_t>(value);
              }

              // modification time
              if (!reader.ReadLeb128(value))
                return;

              // source file length
              if (!reader.ReadLeb128(value))
                return;
              break;
            }

            case 4: {
              // DW_LNE_set_discriminator
              uint64_t value;
              if (!reader.ReadLeb128(value))
                return;
              registers.discriminator = value;
              break;
            }

            default:
              abort();
          }

          // Skip any padding bytes in extended opcode.
          reader.set_position(next_opcode);
          break;
        }

        case 1: {
          // DW_LNS_copy. This commits the registers to the line number table.
          registers.CommitLine();
          registers.discriminator = 0;
          registers.basic_block = false;
          registers.prologue_end = false;
          registers.epilogue_begin = false;
          break;
        }

        case 2: {
          // DW_LNS_advance_pc
          uint64_t op_advance;
          if (!reader.ReadLeb128(op_advance))
            return;
          registers.OpAdvance(&program_info, op_advance);
          break;
        }

        case 3: {
          // DW_LNS_advance_line
          int64_t line_advance;
          if (!reader.ReadLeb128(line_advance))
            return;
          if (line_advance < 0) {
            if (static_cast<uint64_t>(-line_advance) > registers.line)
              return;
            registers.line -= static_cast<uint64_t>(-line_advance);
          } else {
            registers.line += static_cast<uint64_t>(line_advance);
          }
          break;
        }

        case 4: {
          // DW_LNS_set_file
          uint64_t value;
          if (!reader.ReadLeb128(value))
            return;
          registers.file = value;
          break;
        }

        case 5: {
          // DW_LNS_set_column
          uint64_t value;
          if (!reader.ReadLeb128(value))
            return;
          registers.column = value;
          break;
        }

        case 6:
          // DW_LNS_negate_stmt
          registers.is_stmt = !registers.is_stmt;
          break;

        case 7:
          // DW_LNS_set_basic_block
          registers.basic_block = true;
          break;

        case 8:
          // DW_LNS_const_add_pc
          registers.OpAdvance(
              &program_info,
              program_info.OpcodeToAdvance(255 - program_info.opcode_base));
          break;

        case 9: {
          // DW_LNS_fixed_advance_pc
          uint16_t value;
          if (!reader.ReadInt16(value))
            return;
          registers.address += value;
          registers.op_index = 0;
          break;
        }

        case 10:
          // DW_LNS_set_prologue_end
          registers.prologue_end = true;
          break;

        case 11:
          // DW_LNS_set_epilogue_begin
          registers.epilogue_begin = true;
          break;

        case 12: {
          // DW_LNS_set_isa
          uint64_t value;
          if (!reader.ReadLeb128(value))
            return;
          registers.isa = value;
          break;
        }

        default:
          abort();
      }
    }
  }
}

// Parses a 32-bit DWARF-4 line number program header per section 6.2.4.
// `cu_name_offset` is the module offset for the 0th entry of the file table.
bool ParseDwarf4ProgramInfo(BufferedDwarfReader* reader,
                            bool is_64bit,
                            uint64_t cu_name_offset,
                            ProgramInfo* program_info) {
  if (!reader->ReadOffset(is_64bit, program_info->header_length))
    return false;
  program_info->start_offset = reader->position() + program_info->header_length;

  if (!reader->ReadInt8(program_info->minimum_instruction_length) ||
      !reader->ReadInt8(program_info->maximum_operations_per_instruction) ||
      !reader->ReadInt8(program_info->default_is_stmt) ||
      !reader->ReadInt8(program_info->line_base) ||
      !reader->ReadInt8(program_info->line_range) ||
      !reader->ReadInt8(program_info->opcode_base)) {
    return false;
  }

  for (int i = 0; i < (program_info->opcode_base - 1); i++) {
    if (!reader->ReadInt8(program_info->standard_opcode_lengths[i]))
      return false;
  }

  // Table ends with a single null line. This basically means search for 2
  // contiguous empty bytes.
  uint8_t last = 0, cur = 0;
  for (;;) {
    // Read a byte.
    last = cur;
    if (!reader->ReadInt8(cur))
      return false;

    if (last == 0 && cur == 0) {
      // We're at the last entry where it's a double null.
      break;
    }

    // Read in all of the filename.
    int cur_dir = program_info->num_directories;
    if (cur_dir < kMaxDirectories) {
      ++program_info->num_directories;
      // "-1" is because we have already read the first byte above.
      program_info->directory_offsets[cur_dir] = reader->position() - 1;
      program_info->directory_sizes[cur_dir] = 1;
    }
    do {
      if (!reader->ReadInt8(cur))
        return false;
      if (cur_dir < kMaxDirectories)
        ++program_info->directory_sizes[cur_dir];
    } while (cur != '\0');
  }

  // Read filename table line-by-line.
  last = 0;
  cur = 0;
  for (;;) {
    // Read a byte.
    last = cur;
    if (!reader->ReadInt8(cur))
      return false;

    if (last == 0 && cur == 0) {
      // We're at the last entry where it's a double null.
      break;
    }

    // Read in all of the filename. "-1" is because we have already read the
    // first byte of the filename above.
    uint64_t filename_offset = reader->position() - 1;
    do {
      if (!reader->ReadInt8(cur))
        return false;
    } while (cur != '\0');

    uint64_t value;

    // Dir index
    if (!reader->ReadLeb128(value))
      return false;
    size_t cur_filename = program_info->num_filenames;
    if (cur_filename < kMaxFilenames && value < kMaxDirectories) {
      ++program_info->num_filenames;
      program_info->filename_offsets[cur_filename] = filename_offset;
      program_info->filename_dirs[cur_filename] = static_cast<uint8_t>(value);
    }

    // Modification time
    if (!reader->ReadLeb128(value))
      return false;

    // Bytes in file.
    if (!reader->ReadLeb128(value))
      return false;
  }

  // Set up the 0th filename.
  program_info->filename_offsets[0] = cu_name_offset;
  program_info->filename_dirs[0] = 0;
  program_info->directory_offsets[0] = 0;

  return true;
}

// Returns the offset of the next byte to read.
// `program_info.program_end` is guaranteed to be initlialized to either
// `kMaxOffset` if the program length could not be processed, or to
// the byte after the end of this program.
bool ReadProgramInfo(const int fd,
                     uint64_t start,
                     uint64_t cu_name_offset,
                     ProgramInfo* program_info) {
  BufferedDwarfReader reader(fd, start);
  program_info->end_offset = kMaxOffset;

  // Note that 64-bit dwarf does NOT imply a 64-bit binary and vice-versa. In
  // fact many 64-bit binaries use 32-bit dwarf encoding.
  bool is_64bit = false;
  uint64_t data_length;
  if (!reader.ReadInitialLength(is_64bit, data_length)) {
    return false;
  }

  // Set the program end. This allows the search to recover by skipping an
  // unparsable program.
  program_info->end_offset = reader.position() + data_length;

  uint16_t version;
  if (!reader.ReadInt16(version)) {
    return false;
  }

  if (version == 4) {
    return ParseDwarf4ProgramInfo(&reader, is_64bit, cu_name_offset,
                                  program_info);
  }

  // Currently does not support other DWARF versions.
  return false;
}

// Attempts to find line-number info for all of |info|. Returns the number of
// entries that do not have info yet.
uint64_t GetLineNumbersInProgram(const int fd,
                                 LineNumberInfo* info,
                                 uint64_t base_address,
                                 uint64_t start,
                                 uint64_t cu_name_offset) {
  // Open the program.
  ProgramInfo program_info;
  if (ReadProgramInfo(fd, start, cu_name_offset, &program_info)) {
    EvaluateLineNumberProgram(fd, info, base_address, program_info.start_offset,
                              program_info);
  }

  return program_info.end_offset;
}

// Scans the .debug_abbrev entry until it finds the Attribute List matching the
// `wanted_abbreviation_code`. This is called when parsing a DIE in .debug_info.
bool AdvancedReaderToAttributeList(BufferedDwarfReader& reader,
                                   uint64_t table_end,
                                   uint64_t wanted_abbreviation_code,
                                   uint64_t& tag,
                                   bool& has_children) {
  // Abbreviation Table entries are:
  //   LEB128 - abbreviation code
  //   LEB128 - the entry's tag
  //   1 byte - DW_CHILDREN_yes or DW_CHILDREN_no for if entry has children.
  //   [LEB128, LEB128] -- repeated set of attribute + form values in LEB128
  //   [0, 0] -- null entry terminating list is 2 LEB128 0s.
  while (reader.position() < table_end) {
    uint64_t abbreviation_code;
    if (!reader.ReadLeb128(abbreviation_code)) {
      return false;
    }

    if (!reader.ReadLeb128(tag)) {
      return false;
    }

    uint8_t raw_has_children;
    if (!reader.ReadInt8(raw_has_children)) {
      return false;
    }
    if (raw_has_children == 0) {
      has_children = false;
    } else if (raw_has_children == 1) {
      has_children = true;
    } else {
      return false;
    }

    if (abbreviation_code == wanted_abbreviation_code) {
      return true;
    }

    // Incorrect Abbreviation entry. Skip all of its attributes.
    uint64_t attr;
    uint64_t form;
    do {
      if (!reader.ReadLeb128(attr) || !reader.ReadLeb128(form)) {
        return false;
      }
    } while (attr != 0 || form != 0);
  }

  return false;
}

// This reads through a .debug_info compile unit entry to try and extract
// the `cu_name_offset` as well as the `debug_line_offset` (offset into the
// .debug_lines table` corresponding to `pc`.
//
// The .debug_info sections are a packed set of bytes whose format is defined
// by a corresponding .debug_abbrev entry. Basically .debug_abbrev describes
// a struct and .debug_info has a header that tells which struct it is followed
// by a bunch of bytes.
//
// The control flow is to find the .debug_abbrev entry for each .debug_info
// entry, then walk through the .debug_abbrev entry to parse the bytes of the
// .debug_info entry. A successful parse calculates the address range that the
// .debug_info entry covers. When that is retrieved, `pc` can be compared to
// the range and a corresponding .debug_info can be found.
//
// The `debug_info_start` be the start of the whole .debug_info section or an
// offset into the section if it was known ahead of time (perhaps by consulting
// .debug_aranges).
//
// To fully interpret this data, the .debug_ranges and .debug_str sections
// also need to be interpreted.
bool GetCompileUnitName(int fd,
                        uint64_t debug_info_start,
                        uint64_t debug_info_end,
                        uint64_t pc,
                        uint64_t module_base_address,
                        uint64_t* debug_line_offset,
                        uint64_t* cu_name_offset) {
  // Ensure defined `cu_name_offset` in case DW_AT_name is missing.
  *cu_name_offset = 0;

  // Open .debug_info and .debug_abbrev as both are needed to find the
  // DW_AT_name for the DW_TAG_compile_unit or DW_TAG_partial_unit
  // corresponding to the given address.

  ElfW(Shdr) debug_abbrev;
  constexpr static char kDebugAbbrevSectionName[] = ".debug_abbrev";
  if (!google::GetSectionHeaderByName(fd, kDebugAbbrevSectionName,
                                      sizeof(kDebugAbbrevSectionName),
                                      &debug_abbrev)) {
    return false;
  }
  uint64_t debug_abbrev_end = debug_abbrev.sh_offset + debug_abbrev.sh_size;

  ElfW(Shdr) debug_str;
  constexpr static char kDebugStrSectionName[] = ".debug_str";
  if (!google::GetSectionHeaderByName(
          fd, kDebugStrSectionName, sizeof(kDebugStrSectionName), &debug_str)) {
    return false;
  }
  uint64_t debug_str_end = debug_str.sh_offset + debug_str.sh_size;

  ElfW(Shdr) debug_ranges;
  constexpr static char kDebugRangesSectionName[] = ".debug_ranges";
  if (!google::GetSectionHeaderByName(fd, kDebugRangesSectionName,
                                      sizeof(kDebugRangesSectionName),
                                      &debug_ranges)) {
    return false;
  }
  uint64_t debug_ranges_end = debug_ranges.sh_offset + debug_ranges.sh_size;

  // Iterate Compile Units.
  uint64_t next_compilation_unit = kMaxOffset;
  for (BufferedDwarfReader reader(fd, debug_info_start);
       reader.position() < debug_info_end;
       reader.set_position(next_compilation_unit)) {
    bool is_64bit;
    uint64_t length;
    uint16_t dwarf_version;
    uint64_t abbrev_offset;
    uint8_t address_size;
    if (!reader.ReadCommonHeader(is_64bit, length, dwarf_version, abbrev_offset,
                                 address_size, next_compilation_unit)) {
      return false;
    }

    // Compilation Unit Header parsed. Now read the first tag which is either a
    // DW_TAG_compile_unit or DW_TAG_partial_unit. The entry type is designated
    // by a LEB128 number that needs to be cross-referenced in the abbreviations
    // table to understand the format of the rest of the entry.
    uint64_t abbreviation_code;
    if (!reader.ReadLeb128(abbreviation_code)) {
      return false;
    }

    // Find entry in the abbreviation table.
    BufferedDwarfReader abbrev_reader(fd,
                                      debug_abbrev.sh_offset + abbrev_offset);
    uint64_t tag;
    bool has_children;
    AdvancedReaderToAttributeList(abbrev_reader, debug_abbrev_end,
                                  abbreviation_code, tag, has_children);

    // Ignore if it has children.
    static constexpr int kDW_TAG_compile_unit = 0x11;
    static constexpr int kDW_TAG_partial_unit = 0x3c;
    if (tag != kDW_TAG_compile_unit && tag != kDW_TAG_partial_unit) {
      return false;
    }

    // Use table to parse the name, high, and low attributes.
    static constexpr int kDW_AT_name = 0x3;        // string
    static constexpr int kDW_AT_stmt_list = 0x10;  // lineptr
    static constexpr int kDW_AT_low_pc = 0x11;     // address
    static constexpr int kDW_AT_high_pc = 0x12;    // address, constant
    static constexpr int kDW_AT_ranges = 0x55;     // rangelistptr
    uint64_t attr;
    uint64_t form;
    uint64_t low_pc = 0;
    uint64_t high_pc = 0;
    bool high_pc_is_offset = false;
    bool is_found_in_range = false;
    do {
      if (!abbrev_reader.ReadLeb128(attr)) {
        return false;
      }
      if (!abbrev_reader.ReadLeb128(form)) {
        return false;
      }
      // Table from 7.5.4, Figure 20.
      enum Form {
        kDW_FORM_addr = 0x01,
        kDW_FORM_block2 = 0x03,
        kDW_FORM_block4 = 0x04,
        kDW_FORM_data2 = 0x05,
        kDW_FORM_data4 = 0x06,
        kDW_FORM_data8 = 0x07,
        kDW_FORM_string = 0x08,
        kDW_FORM_block = 0x09,
        kDW_FORM_block1 = 0x0a,
        kDW_FORM_data1 = 0x0b,
        kDW_FORM_flag = 0x0c,
        kDW_FORM_sdata = 0x0d,
        kDW_FORM_strp = 0x0e,
        kDW_FORM_udata = 0x0f,
        kDW_FORM_ref_addr = 0x10,
        kDW_FORM_ref1 = 0x11,
        kDW_FORM_ref2 = 0x12,
        kDW_FORM_ref4 = 0x13,
        kDW_FORM_ref8 = 0x14,
        kDW_FORM_ref_udata = 0x15,
        kDW_FORM_ref_indrect = 0x16,
        kDW_FORM_sec_offset = 0x17,
        kDW_FORM_exprloc = 0x18,
        kDW_FORM_flag_present = 0x19,
        kDW_FORM_ref_sig8 = 0x20,
      };

      switch (form) {
        case kDW_FORM_string: {
          // Read the value into if necessary `out`
          if (attr == kDW_AT_name) {
            *cu_name_offset = reader.position();
          }
          if (!reader.ReadCString(debug_info_end, nullptr, 0)) {
            return false;
          }
        } break;

        case kDW_FORM_strp: {
          uint64_t strp_offset;
          if (!reader.ReadOffset(is_64bit, strp_offset)) {
            return false;
          }

          if (attr == kDW_AT_name) {
            uint64_t pos = debug_str.sh_offset + strp_offset;
            if (pos >= debug_str_end) {
              return false;
            }
            *cu_name_offset = pos;
          }
        } break;

        case kDW_FORM_addr: {
          uint64_t address;
          if (!reader.ReadAddress(address_size, address)) {
            return false;
          }

          if (attr == kDW_AT_low_pc) {
            low_pc = address;
          } else if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = false;
            high_pc = address;
          }
        } break;

        case kDW_FORM_data1: {
          uint8_t data;
          if (!reader.ReadInt8(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = data;
          }
        } break;

        case kDW_FORM_data2: {
          uint16_t data;
          if (!reader.ReadInt16(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = data;
          }
        } break;

        case kDW_FORM_data4: {
          uint32_t data;
          if (!reader.ReadInt32(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = data;
          }
        } break;

        case kDW_FORM_data8: {
          uint64_t data;
          if (!reader.ReadInt64(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = data;
          }
        } break;

        case kDW_FORM_sdata: {
          int64_t data;
          if (!reader.ReadLeb128(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = static_cast<uint64_t>(data);
          }
        } break;

        case kDW_FORM_udata: {
          uint64_t data;
          if (!reader.ReadLeb128(data)) {
            return false;
          }
          if (attr == kDW_AT_high_pc) {
            high_pc_is_offset = true;
            high_pc = data;
          }
        } break;

        case kDW_FORM_ref_addr:
        case kDW_FORM_sec_offset: {
          uint64_t value;
          if (!reader.ReadOffset(is_64bit, value)) {
            return false;
          }

          if (attr == kDW_AT_ranges) {
            uint64_t current_base_address = module_base_address;
            BufferedDwarfReader ranges_reader(fd,
                                              debug_ranges.sh_offset + value);

            while (ranges_reader.position() < debug_ranges_end) {
              // Ranges are 2 addresses in size.
              uint64_t range_start;
              uint64_t range_end;
              if (!ranges_reader.ReadAddress(address_size, range_start)) {
                return false;
              }
              if (!ranges_reader.ReadAddress(address_size, range_end)) {
                return false;
              }
              uint64_t relative_pc = pc - current_base_address;

              if (range_start == 0 && range_end == 0) {
                if (!is_found_in_range) {
                  // Time to go to the next iteration.
                  goto next_cu;
                }
                break;
              } else if (((address_size == 4) &&
                          (range_start == 0xffffffffUL)) ||
                         ((address_size == 8) &&
                          (range_start == 0xffffffffffffffffULL))) {
                // Check if this is a new base add value. 2.17.3
                current_base_address = range_end;
              } else {
                if (relative_pc >= range_start && relative_pc < range_end) {
                  is_found_in_range = true;
                  break;
                }
              }
            }
          } else if (attr == kDW_AT_stmt_list) {
            *debug_line_offset = value;
          }
        } break;

        case kDW_FORM_flag:
        case kDW_FORM_ref1:
        case kDW_FORM_block1: {
          uint8_t dummy;
          if (!reader.ReadInt8(dummy)) {
            return false;
          }
        } break;

        case kDW_FORM_ref2:
        case kDW_FORM_block2: {
          uint16_t dummy;
          if (!reader.ReadInt16(dummy)) {
            return false;
          }
        } break;

        case kDW_FORM_ref4:
        case kDW_FORM_block4: {
          uint32_t dummy;
          if (!reader.ReadInt32(dummy)) {
            return false;
          }
        } break;

        case kDW_FORM_ref8: {
          uint64_t dummy;
          if (!reader.ReadInt64(dummy)) {
            return false;
          }
        } break;

        case kDW_FORM_ref_udata:
        case kDW_FORM_block: {
          uint64_t dummy;
          if (!reader.ReadLeb128(dummy)) {
            return false;
          }
        } break;

        case kDW_FORM_exprloc: {
          uint64_t value;
          if (!reader.ReadLeb128(value)) {
            return false;
          }
          reader.set_position(reader.position() + value);
        } break;
      }
    } while (attr != 0 || form != 0);

    // Because attributes can be in any order, most of the computations (minus
    // checking range list entries) cannot happen until everything is parsed for
    // the one .debug_info entry. Do the analysis here.
    if (is_found_in_range) {
      // Well formed compile_unit and partial_unit tags either have a
      // DT_AT_ranges entry or an DT_AT_low_pc entiry. If is_found_in_range
      // matched as true, then this entry matches the given pc.
      return true;
    }

    // If high_pc_is_offset is 0, it was never found in the DIE. This indicates
    // a single address entry. Only look at the low_pc.
    {
      uint64_t module_relative_pc = pc - module_base_address;
      if (high_pc == 0 && module_relative_pc != low_pc) {
        goto next_cu;
      }

      // Otherwise this is a contiguous range DIE. Normalize the meaning of the
      // high_pc field and check if it contains the pc.
      if (high_pc_is_offset) {
        high_pc = low_pc + high_pc;
        high_pc_is_offset = false;
      }

      if (module_relative_pc >= low_pc && module_relative_pc < high_pc) {
        return true;
      }
    }

    // Not found.
  next_cu:;
  }
  return false;
}

// Thin wrapper over `GetCompileUnitName` that opens the .debug_info section.
bool ReadCompileUnit(int fd,
                     uint64_t pc,
                     uint64_t cu_offset,
                     uint64_t base_address,
                     uint64_t* debug_line_offset,
                     uint64_t* cu_name_offset) {
  if (cu_offset == 0) {
    return false;
  }

  ElfW(Shdr) debug_info;
  constexpr static char kDebugInfoSectionName[] = ".debug_info";
  if (!google::GetSectionHeaderByName(fd, kDebugInfoSectionName,
                                      sizeof(kDebugInfoSectionName),
                                      &debug_info)) {
    return false;
  }
  uint64_t debug_info_end = debug_info.sh_offset + debug_info.sh_size;

  return GetCompileUnitName(fd, debug_info.sh_offset + cu_offset,
                            debug_info_end, pc, base_address, debug_line_offset,
                            cu_name_offset);
}

// Takes the information from `info` and renders the data located in the
// object file `fd` into `out`.  The format looks like:
//
//   [../path/to/foo.cc:10:40]
//
// which would indicate line 10 column 40 in  ../path/to/foo.cc
void SerializeLineNumberInfoToString(int fd,
                                     const LineNumberInfo& info,
                                     char* out,
                                     size_t out_size) {
  size_t out_pos = 0;
  if (info.module_filename_offset) {
    BufferedDwarfReader reader(fd, info.module_dir_offset);
    if (info.module_dir_offset != 0) {
      out_pos +=
          reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos);
      out[out_pos - 1] = '/';
    }

    reader.set_position(info.module_filename_offset);
    out_pos +=
        reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos);
  } else {
    out[out_pos++] = '\0';
  }

  out[out_pos - 1] = ':';
  char* tmp = internal::itoa_r(static_cast<intptr_t>(info.line), out + out_pos,
                               out_size - out_pos, 10, 0);
  out_pos += strlen(tmp) + 1;
  out[out_pos - 1] = ':';
  tmp = internal::itoa_r(static_cast<intptr_t>(info.column), out + out_pos,
                         out_size - out_pos, 10, 0);
  out_pos += strlen(tmp) + 1;
}

// Reads the Line Number info for a compile unit.
bool GetLineNumberInfoFromObject(int fd,
                                 uint64_t pc,
                                 uint64_t cu_offset,
                                 uint64_t base_address,
                                 char* out,
                                 size_t out_size) {
  uint64_t cu_name_offset;
  uint64_t debug_line_offset;
  if (!ReadCompileUnit(fd, pc, cu_offset, base_address, &debug_line_offset,
                       &cu_name_offset)) {
    return false;
  }

  ElfW(Shdr) debug_line;
  constexpr static char kDebugLineSectionName[] = ".debug_line";
  if (!google::GetSectionHeaderByName(fd, kDebugLineSectionName,
                                      sizeof(kDebugLineSectionName),
                                      &debug_line)) {
    return false;
  }

  LineNumberInfo info;
  info.pc = pc;
  uint64_t line_info_program_offset = debug_line.sh_offset + debug_line_offset;
  GetLineNumbersInProgram(fd, &info, base_address, line_info_program_offset,
                          cu_name_offset);

  if (info.line == 0) {
    // No matching line number or filename found.
    return false;
  }

  SerializeLineNumberInfoToString(fd, info, out, out_size);

  return true;
}

struct FrameInfo {
  raw_ptr<uint64_t> cu_offset;
  uintptr_t pc;
};

// Returns the number of frames still missing info.
//
// The aranges table is a mapping of ranges to compilation units. Given an array
// of `frame_info`, this finds the compile units for each of the frames doing
// only one pass over the table. It does not preserve the order of `frame_info`.
//
// The main benefit of this function is preserving the single pass through the
// table which is important for performance.
size_t ProcessFlatArangeSet(BufferedDwarfReader* reader,
                            uint64_t next_set,
                            uint8_t address_size,
                            uint64_t base_address,
                            uint64_t cu_offset,
                            FrameInfo* frame_info,
                            size_t num_frames) {
  size_t unsorted_start = 0;
  while (unsorted_start < num_frames && reader->position() < next_set) {
    uint64_t start;
    uint64_t length;
    if (!reader->ReadAddress(address_size, start)) {
      break;
    }
    if (!reader->ReadAddress(address_size, length)) {
      break;
    }
    uint64_t end = start + length;
    for (size_t i = unsorted_start; i < num_frames; ++i) {
      uint64_t module_relative_pc = frame_info[i].pc - base_address;
      if (start <= module_relative_pc && module_relative_pc < end) {
        *frame_info[i].cu_offset = cu_offset;
        if (i != unsorted_start) {
          // Move to sorted section.
          std::swap(frame_info[i], frame_info[unsorted_start]);
        }
        unsorted_start++;
      }
    }
  }

  return unsorted_start;
}

// This is a pre-step that uses the .debug_aranges table to find all the compile
// units for a given set of frames. This allows code to avoid iterating over
// all compile units at a later step in the symbolization process.
void PopulateCompileUnitOffsets(int fd,
                                FrameInfo* frame_info,
                                size_t num_frames,
                                uint64_t base_address) {
  ElfW(Shdr) debug_aranges;
  constexpr static char kDebugArangesSectionName[] = ".debug_aranges";
  if (!google::GetSectionHeaderByName(fd, kDebugArangesSectionName,
                                      sizeof(kDebugArangesSectionName),
                                      &debug_aranges)) {
    return;
  }
  uint64_t debug_aranges_end = debug_aranges.sh_offset + debug_aranges.sh_size;
  uint64_t next_arange_set = kMaxOffset;
  size_t unsorted_start = 0;
  for (BufferedDwarfReader reader(fd, debug_aranges.sh_offset);
       unsorted_start < num_frames && reader.position() < debug_aranges_end;
       reader.set_position(next_arange_set)) {
    bool is_64bit;
    uint64_t length;
    uint16_t arange_version;
    uint64_t debug_info_offset;
    uint8_t address_size;
    if (!reader.ReadCommonHeader(is_64bit, length, arange_version,
                                 debug_info_offset, address_size,
                                 next_arange_set)) {
      return;
    }

    uint8_t segment_size;
    if (!reader.ReadInt8(segment_size)) {
      return;
    }

    if (segment_size != 0) {
      // Only flat namespaces are supported.
      return;
    }

    // The tuple list is aligned, to a multiple of the tuple-size after the
    // section sstart. Because this code only supports flat address spaces, this
    // means 2*address_size.
    while (((reader.position() - debug_aranges.sh_offset) %
            (2 * address_size)) != 0) {
      uint8_t dummy;
      if (!reader.ReadInt8(dummy)) {
        return;
      }
    }
    unsorted_start += ProcessFlatArangeSet(
        &reader, next_arange_set, address_size, base_address, debug_info_offset,
        &frame_info[unsorted_start], num_frames - unsorted_start);
  }
}

}  // namespace

bool GetDwarfSourceLineNumber(const void* pc,
                              uintptr_t cu_offset,
                              char* out,
                              size_t out_size) {
  uint64_t pc0 = reinterpret_cast<uint64_t>(pc);
  uint64_t object_start_address = 0;
  uint64_t object_base_address = 0;

  google::FileDescriptor object_fd(google::FileDescriptor(
      google::OpenObjectFileContainingPcAndGetStartAddress(
          pc0, object_start_address, object_base_address, nullptr, 0)));

  if (!object_fd.get()) {
    return false;
  }

  if (!GetLineNumberInfoFromObject(object_fd.get(), pc0, cu_offset,
                                   object_base_address, out, out_size)) {
    return false;
  }

  return true;
}

void GetDwarfCompileUnitOffsets(const void* const* trace,
                                uint64_t* cu_offsets,
                                size_t num_frames) {
  // Ensure `cu_offsets` always has a known state.
  memset(cu_offsets, 0, sizeof(uint64_t) * num_frames);

  FrameInfo* frame_info =
      static_cast<FrameInfo*>(alloca(sizeof(FrameInfo) * num_frames));
  for (size_t i = 0; i < num_frames; i++) {
    // The `cu_offset` also encodes the original sort order.
    frame_info[i].cu_offset = &cu_offsets[i];
    frame_info[i].pc = reinterpret_cast<uintptr_t>(trace[i]);
  }
  auto pc_comparator = [](const FrameInfo& lhs, const FrameInfo& rhs) {
    return lhs.pc < rhs.pc;
  };

  // Use heapsort to avoid recursion in a signal handler.
  std::make_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator);
  std::sort_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator);

  // Walk the frame_info one compilation unit at a time.
  for (size_t cur_frame = 0; cur_frame < num_frames; ++cur_frame) {
    uint64_t object_start_address = 0;
    uint64_t object_base_address = 0;
    google::FileDescriptor object_fd(google::FileDescriptor(
        google::OpenObjectFileContainingPcAndGetStartAddress(
            frame_info[cur_frame].pc, object_start_address, object_base_address,
            nullptr, 0)));

    // TODO(https://crbug.com/1335630): Consider exposing the end address so a
    // range of frames can be bulk-populated. This was originally implemented,
    // but line number symbolization is currently broken by default (and also
    // broken in sandboxed processes). The various issues will be addressed
    // incrementally in follow-up patches, and the optimization here restored if
    // needed.

    PopulateCompileUnitOffsets(object_fd.get(), &frame_info[cur_frame], 1,
                               object_base_address);
  }
}

}  // namespace debug
}  // namespace base

#else  // USE_SYMBOLIZE

#include <cstring>

namespace base {
namespace debug {

bool GetDwarfSourceLineNumber(void* pc,
                              uintptr_t cu_offset,
                              char* out,
                              size_t out_size) {
  return false;
}

void GetDwarfCompileUnitOffsets(void* const* trace,
                                uint64_t* cu_offsets,
                                size_t num_frames) {
  // Provide defined values even in the stub.
  memset(cu_offsets, 0, sizeof(cu_offsets) * num_frames);
}

}  // namespace debug
}  // namespace base

#endif