1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
|
#!/usr/bin/env python
# Copyright 2012 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
This script can take an Apple-style CrashReporter log and symbolicate it. This
is useful for when a user's reports aren't being uploaded, for example.
Only versions 6, 7, 8, and 9 reports are supported. For more information on the
file format, reference this document:
TN2123 <http://developer.apple.com/library/mac/#technotes/tn2004/tn2123.html>
Information on symbolication was gleaned from:
<http://developer.apple.com/tools/xcode/symbolizingcrashdumps.html>
"""
from __future__ import print_function
import optparse
import os.path
import re
import subprocess
import sys
# Maps binary image identifiers to binary names (minus the .dSYM portion) found
# in the archive. These are the only objects that will be looked up.
SYMBOL_IMAGE_MAP = {
'com.google.Chrome': 'Google Chrome.app',
'com.google.Chrome.framework': 'Google Chrome Framework.framework',
'com.google.Chrome.helper': 'Google Chrome Helper.app'
}
class CrashReport(object):
"""A parsed representation of an Apple CrashReport text file."""
def __init__(self, file_name):
super(CrashReport, self).__init__()
self.report_info = {}
self.threads = []
self._binary_images = {}
fd = open(file_name, 'r')
self._ParseHeader(fd)
# Try and get the report version. If it's not a version we handle, abort.
self.report_version = int(self.report_info['Report Version'])
# Version 6: 10.5 and 10.6 crash report
# Version 7: 10.6 spindump report
# Version 8: 10.7 spindump report
# Version 9: 10.7 crash report
valid_versions = (6, 7, 8, 9)
if self.report_version not in valid_versions:
raise Exception("Only crash reports of versions %s are accepted." %
str(valid_versions))
# If this is a spindump (version 7 or 8 report), use a special parser. The
# format is undocumented, but is similar to version 6. However, the spindump
# report contains user and kernel stacks for every process on the system.
if self.report_version == 7 or self.report_version == 8:
self._ParseSpindumpStack(fd)
else:
self._ParseStack(fd)
self._ParseBinaryImages(fd)
fd.close()
def Symbolicate(self, symbol_path):
"""Symbolicates a crash report stack trace."""
# In order to be efficient, collect all the offsets that will be passed to
# atos by the image name.
offsets_by_image = self._CollectAddressesForImages(SYMBOL_IMAGE_MAP.keys())
# For each image, run atos with the list of addresses.
for image_name, addresses in offsets_by_image.items():
# If this image was not loaded or is in no stacks, skip.
if image_name not in self._binary_images or not len(addresses):
continue
# Combine the |image_name| and |symbol_path| into the path of the dSYM.
dsym_file = self._GetDSymPath(symbol_path, image_name)
# From the list of 2-Tuples of (frame, address), create a list of just
# addresses.
address_list = map(lambda x: x[1], addresses)
# Look up the load address of the image.
binary_base = self._binary_images[image_name][0]
# This returns a list of just symbols. The indices will match up with the
# list of |addresses|.
symbol_names = self._RunAtos(binary_base, dsym_file, address_list)
if not symbol_names:
print('Error loading symbols for ' + image_name)
continue
# Attaches a list of symbol names to stack frames. This assumes that the
# order of |addresses| has stayed the same as |symbol_names|.
self._AddSymbolsToFrames(symbol_names, addresses)
def _ParseHeader(self, fd):
"""Parses the header section of a crash report, which contains the OS and
application version information."""
# The header is made up of different sections, depending on the type of
# report and the report version. Almost all have a format of a key and
# value separated by a colon. Accumulate all of these artifacts into a
# dictionary until the first thread stack is reached.
thread_re = re.compile('^[ \t]*Thread ([a-f0-9]+)')
line = ''
while not thread_re.match(line):
# Skip blank lines. There are typically three or four sections separated
# by newlines in the header.
line = line.strip()
if line:
parts = line.split(':', 1)
# Certain lines in different report versions don't follow the key-value
# format, so skip them.
if len(parts) == 2:
# There's a varying amount of space padding after the ':' to align all
# the values; strip that.
self.report_info[parts[0]] = parts[1].lstrip()
line = fd.readline()
# When this loop exits, the header has been read in full. However, the first
# thread stack heading has been read past. Seek backwards from the current
# position by the length of the line so that it is re-read when
# _ParseStack() is entered.
fd.seek(-len(line), os.SEEK_CUR)
def _ParseStack(self, fd):
"""Parses the stack dump of a crash report and creates a list of threads
and their stack traces."""
# Compile a regex that matches the start of a thread stack. Note that this
# must be specific to not include the thread state section, which comes
# right after all the stack traces.
line_re = re.compile('^Thread ([0-9]+)( Crashed)?:(.*)')
# On entry into this function, the fd has been walked up to the "Thread 0"
# line.
line = fd.readline().rstrip()
in_stack = False
thread = None
while line_re.match(line) or in_stack:
# Check for start of the thread stack.
matches = line_re.match(line)
if not line.strip():
# A blank line indicates a break in the thread stack.
in_stack = False
elif matches:
# If this is the start of a thread stack, create the CrashThread.
in_stack = True
thread = CrashThread(matches.group(1))
thread.name = matches.group(3)
thread.did_crash = matches.group(2) != None
self.threads.append(thread)
else:
# All other lines are stack frames.
thread.stack.append(self._ParseStackFrame(line))
# Read the next line.
line = fd.readline()
def _ParseStackFrame(self, line):
"""Takes in a single line of text and transforms it into a StackFrame."""
frame = StackFrame(line)
# A stack frame is in the format of:
# |<frame-number> <binary-image> 0x<address> <symbol> <offset>|.
regex = '^([0-9]+) +(.+)[ \t]+(0x[0-9a-f]+) (.*) \+ ([0-9]+)$'
matches = re.match(regex, line)
if matches is None:
return frame
# Create a stack frame with the information extracted from the regex.
frame.frame_id = matches.group(1)
frame.image = matches.group(2)
frame.address = int(matches.group(3), 0) # Convert HEX to an int.
frame.original_symbol = matches.group(4)
frame.offset = matches.group(5)
frame.line = None
return frame
def _ParseSpindumpStack(self, fd):
"""Parses a spindump stack report. In this format, each thread stack has
both a user and kernel trace. Only the user traces are symbolicated."""
# The stack trace begins with the thread header, which is identified by a
# HEX number. The thread names appear to be incorrect in spindumps.
user_thread_re = re.compile('^ Thread ([0-9a-fx]+)')
# When this method is called, the fd has been walked right up to the first
# line.
line = fd.readline()
in_user_stack = False
in_kernel_stack = False
thread = None
frame_id = 0
while user_thread_re.match(line) or in_user_stack or in_kernel_stack:
# Check for the start of a thread.
matches = user_thread_re.match(line)
if not line.strip():
# A blank line indicates the start of a new thread. The blank line comes
# after the kernel stack before a new thread header.
in_kernel_stack = False
elif matches:
# This is the start of a thread header. The next line is the heading for
# the user stack, followed by the actual trace.
thread = CrashThread(matches.group(1))
frame_id = 0
self.threads.append(thread)
in_user_stack = True
line = fd.readline() # Read past the 'User stack:' header.
elif line.startswith(' Kernel stack:'):
# The kernel stack header comes immediately after the last frame (really
# the top frame) in the user stack, without a blank line.
in_user_stack = False
in_kernel_stack = True
elif in_user_stack:
# If this is a line while in the user stack, parse it as a stack frame.
thread.stack.append(self._ParseSpindumpStackFrame(line))
# Loop with the next line.
line = fd.readline()
# When the loop exits, the file has been read through the 'Binary images:'
# header. Seek backwards so that _ParseBinaryImages() does the right thing.
fd.seek(-len(line), os.SEEK_CUR)
def _ParseSpindumpStackFrame(self, line):
"""Parses a spindump-style stackframe."""
frame = StackFrame(line)
# The format of the frame is either:
# A: |<space><steps> <symbol> + <offset> (in <image-name>) [<address>]|
# B: |<space><steps> ??? (in <image-name> + <offset>) [<address>]|
regex_a = '^([ ]+[0-9]+) (.*) \+ ([0-9]+) \(in (.*)\) \[(0x[0-9a-f]+)\]'
regex_b = '^([ ]+[0-9]+) \?\?\?( \(in (.*) \+ ([0-9]+)\))? \[(0x[0-9a-f]+)\]'
# Create the stack frame with the information extracted from the regex.
matches = re.match(regex_a, line)
if matches:
frame.frame_id = matches.group(1)[4:] # Remove some leading spaces.
frame.original_symbol = matches.group(2)
frame.offset = matches.group(3)
frame.image = matches.group(4)
frame.address = int(matches.group(5), 0)
frame.line = None
return frame
# If pattern A didn't match (which it will most of the time), try B.
matches = re.match(regex_b, line)
if matches:
frame.frame_id = matches.group(1)[4:] # Remove some leading spaces.
frame.image = matches.group(3)
frame.offset = matches.group(4)
frame.address = int(matches.group(5), 0)
frame.line = None
return frame
# Otherwise, this frame could not be matched and just use the raw input.
frame.line = frame.line.strip()
return frame
def _ParseBinaryImages(self, fd):
"""Parses out the binary images section in order to get the load offset."""
# The parser skips some sections, so advance until the "Binary Images"
# header is reached.
while not fd.readline().lstrip().startswith("Binary Images:"): pass
# Create a regex to match the lines of format:
# |0x<start> - 0x<end> <binary-image> <version> (<version>) <<UUID>> <path>|
image_re = re.compile(
'[ ]*(0x[0-9a-f]+) -[ \t]+(0x[0-9a-f]+) [+ ]([a-zA-Z0-9._\-]+)')
# This section is in this format:
# |<start address> - <end address> <image name>|.
while True:
line = fd.readline()
if not line.strip():
# End when a blank line is hit.
return
# Match the line to the regex.
match = image_re.match(line)
if match:
# Store the offsets by image name so it can be referenced during
# symbolication. These are hex numbers with leading '0x', so int() can
# convert them to decimal if base=0.
address_range = (int(match.group(1), 0), int(match.group(2), 0))
self._binary_images[match.group(3)] = address_range
def _CollectAddressesForImages(self, images):
"""Iterates all the threads and stack frames and all the stack frames that
are in a list of binary |images|. The result is a dictionary, keyed by the
image name that maps to a list of tuples. Each is a 2-Tuple of
(stack_frame, address)"""
# Create the collection and initialize it with empty lists for each image.
collection = {}
for image in images:
collection[image] = []
# Perform the iteration.
for thread in self.threads:
for frame in thread.stack:
image_name = self._ImageForAddress(frame.address)
if image_name in images:
# Replace the image name in the frame in case it was elided.
frame.image = image_name
collection[frame.image].append((frame, frame.address))
# Return the result.
return collection
def _ImageForAddress(self, address):
"""Given a PC address, returns the bundle identifier of the image in which
the address resides."""
for image_name, address_range in self._binary_images.items():
if address >= address_range[0] and address <= address_range[1]:
return image_name
return None
def _GetDSymPath(self, base_path, image_name):
"""Takes a base path for the symbols and an image name. It looks the name up
in SYMBOL_IMAGE_MAP and creates a full path to the dSYM in the bundle."""
image_file = SYMBOL_IMAGE_MAP[image_name]
return os.path.join(base_path, image_file + '.dSYM', 'Contents',
'Resources', 'DWARF',
os.path.splitext(image_file)[0]) # Chop off the extension.
def _RunAtos(self, load_address, dsym_file, addresses):
"""Runs the atos with the provided arguments. |addresses| is used as stdin.
Returns a list of symbol information in the same order as |addresses|."""
args = ['atos', '-l', str(load_address), '-o', dsym_file]
# Get the arch type. This is of the format |X86 (Native)|.
if 'Code Type' in self.report_info:
arch = self.report_info['Code Type'].lower().split(' ')
if len(arch) == 2:
arch = arch[0]
if arch == 'x86':
# The crash report refers to i386 as x86, but atos doesn't know what
# that is.
arch = 'i386'
args.extend(['-arch', arch])
proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
addresses = map(hex, addresses)
(stdout, stderr) = proc.communicate(' '.join(addresses))
if proc.returncode:
return None
return stdout.rstrip().split('\n')
def _AddSymbolsToFrames(self, symbols, address_tuples):
"""Takes a single value (the list) from _CollectAddressesForImages and does
a smart-zip with the data returned by atos in |symbols|. Note that the
indices must match for this to succeed."""
if len(symbols) != len(address_tuples):
print('symbols do not match')
# Each line of output from atos is in this format:
# |<symbol> (in <image>) (<file>:<line>)|.
line_regex = re.compile('(.+) \(in (.+)\) (\((.+):([0-9]+)\))?')
# Zip the two data sets together.
for i in range(len(symbols)):
symbol_parts = line_regex.match(symbols[i])
if not symbol_parts:
continue # Error.
frame = address_tuples[i][0]
frame.symbol = symbol_parts.group(1)
frame.image = symbol_parts.group(2)
frame.file_name = symbol_parts.group(4)
frame.line_number = symbol_parts.group(5)
class CrashThread(object):
"""A CrashThread represents a stacktrace of a single thread """
def __init__(self, thread_id):
super(CrashThread, self).__init__()
self.thread_id = thread_id
self.name = None
self.did_crash = False
self.stack = []
def __repr__(self):
name = ''
if self.name:
name = ': ' + self.name
return 'Thread ' + self.thread_id + name + '\n' + \
'\n'.join(map(str, self.stack))
class StackFrame(object):
"""A StackFrame is owned by a CrashThread."""
def __init__(self, line):
super(StackFrame, self).__init__()
# The original line. This will be set to None if symbolication was
# successfuly.
self.line = line
self.frame_id = 0
self.image = None
self.address = 0x0
self.original_symbol = None
self.offset = 0x0
# The following members are set after symbolication.
self.symbol = None
self.file_name = None
self.line_number = 0
def __repr__(self):
# If symbolication failed, just use the original line.
if self.line:
return ' %s' % self.line
# Use different location information depending on symbolicated data.
location = None
if self.file_name:
location = ' - %s:%s' % (self.file_name, self.line_number)
else:
location = ' + %s' % self.offset
# Same with the symbol information.
symbol = self.original_symbol
if self.symbol:
symbol = self.symbol
return ' %s\t0x%x\t[%s\t%s]\t%s' % (self.frame_id, self.address,
self.image, location, symbol)
def PrettyPrintReport(report):
"""Takes a crash report and prints it like the crash server would."""
print('Process : ' + report.report_info['Process'])
print('Version : ' + report.report_info['Version'])
print('Date : ' + report.report_info['Date/Time'])
print('OS Version : ' + report.report_info['OS Version'])
print()
if 'Crashed Thread' in report.report_info:
print('Crashed Thread : ' + report.report_info['Crashed Thread'])
print()
if 'Event' in report.report_info:
print('Event : ' + report.report_info['Event'])
print()
for thread in report.threads:
print()
if thread.did_crash:
exc_type = report.report_info['Exception Type'].split(' ')[0]
exc_code = report.report_info['Exception Codes'].replace('at', '@')
print('*CRASHED* ( ' + exc_type + ' / ' + exc_code + ' )')
# Version 7 reports have spindump-style output (with a stepped stack trace),
# so remove the first tab to get better alignment.
if report.report_version == 7:
for line in repr(thread).split('\n'):
print(line.replace('\t', ' ', 1))
else:
print(thread)
def Main(args):
"""Program main."""
parser = optparse.OptionParser(
usage='%prog [options] symbol_path crash_report',
description='This will parse and symbolicate an Apple CrashReporter v6-9 '
'file.')
parser.add_option('-s', '--std-path', action='store_true', dest='std_path',
help='With this flag, the symbol_path is a containing '
'directory, in which a dSYM files are stored in a '
'directory named by the version. Example: '
'[symbolicate_crash.py -s ./symbols/ report.crash] will '
'look for dSYMs in ./symbols/15.0.666.0/ if the report is '
'from that verison.')
(options, args) = parser.parse_args(args[1:])
# Check that we have something to symbolicate.
if len(args) != 2:
parser.print_usage()
return 1
report = CrashReport(args[1])
symbol_path = None
# If not using the standard layout, this is a full path to the symbols.
if not options.std_path:
symbol_path = args[0]
# Otherwise, use the report version to locate symbols in a directory.
else:
# This is in the format of |M.N.B.P (B.P)|. Get just the part before the
# space.
chrome_version = report.report_info['Version'].split(' ')[0]
symbol_path = os.path.join(args[0], chrome_version)
# Check that the symbols exist.
if not os.path.isdir(symbol_path):
print('Symbol path %s is not a directory' % symbol_path, file=sys.stderr)
return 2
print('Using symbols from ' + symbol_path, file=sys.stderr)
print('=' * 80, file=sys.stderr)
report.Symbolicate(symbol_path)
PrettyPrintReport(report)
return 0
if __name__ == '__main__':
sys.exit(Main(sys.argv))
|