1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
#!/usr/bin/env python
from __future__ import print_function
"""
Helper script to print out the raw content of an ELF section.
Example usages:
```
# print out as bits by default
extract-section.py .text --input-file=foo.o
```
```
# read from stdin and print out in hex
cat foo.o | extract-section.py -h .text
```
This is merely a wrapper around `llvm-readobj` that focuses on the binary
content as well as providing more formatting options.
"""
# Unfortunately reading binary from stdin is not so trivial in Python...
def read_raw_stdin():
import sys
if sys.version_info >= (3, 0):
reading_source = sys.stdin.buffer
else:
# Windows will always read as string so we need some
# special handling
if sys.platform == "win32":
import os, msvcrt
msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
reading_source = sys.stdin
return reading_source.read()
def get_raw_section_dump(readobj_path, section_name, input_file):
import subprocess
cmd = [
readobj_path,
"--elf-output-style=GNU",
"--hex-dump={}".format(section_name),
input_file,
]
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
if input_file == "-":
# From stdin
out, _ = proc.communicate(input=read_raw_stdin())
else:
out, _ = proc.communicate()
return out.decode("utf-8") if type(out) is not str else out
if __name__ == "__main__":
import argparse
# The default '-h' (--help) will conflict with our '-h' (hex) format
arg_parser = argparse.ArgumentParser(add_help=False)
arg_parser.add_argument(
"--readobj-path",
metavar="<executable path>",
type=str,
help="Path to llvm-readobj",
)
arg_parser.add_argument(
"--input-file",
metavar="<file>",
type=str,
help="Input object file, or '-' to read from stdin",
)
arg_parser.add_argument(
"section", metavar="<name>", type=str, help="Name of the section to extract"
)
# Output format
format_group = arg_parser.add_mutually_exclusive_group()
format_group.add_argument(
"-b",
dest="format",
action="store_const",
const="bits",
help="Print out in bits",
)
arg_parser.add_argument(
"--byte-indicator",
action="store_true",
help="Whether to print a '.' every 8 bits in bits printing mode",
)
arg_parser.add_argument(
"--bits-endian",
metavar="<little/big>",
type=str,
choices=["little", "big"],
help="Print out bits in specified endianness (little or big); defaults to big",
)
format_group.add_argument(
"-h",
dest="format",
action="store_const",
const="hex",
help="Print out in hexadecimal",
)
arg_parser.add_argument(
"--hex-width",
metavar="<# of bytes>",
type=int,
help="The width (in byte) of every element in hex printing mode",
)
arg_parser.add_argument("--help", action="help")
arg_parser.set_defaults(
format="bits",
tool_path="llvm-readobj",
input_file="-",
byte_indicator=False,
hex_width=4,
bits_endian="big",
)
args = arg_parser.parse_args()
raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
results = []
for line in raw_section.splitlines(False):
if line.startswith("Hex dump"):
continue
parts = line.strip().split(" ")[1:]
for part in parts[:4]:
# exclude any non-hex dump string
try:
val = int(part, 16)
if args.format == "bits":
# divided into bytes first
offsets = (24, 16, 8, 0)
if args.bits_endian == "little":
offsets = (0, 8, 16, 24)
for byte in [(val >> off) & 0xFF for off in offsets]:
for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
results.append(str(bit))
if args.byte_indicator:
results.append(".")
elif args.format == "hex":
assert args.hex_width <= 4 and args.hex_width > 0
width_bits = args.hex_width * 8
offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
mask = (1 << width_bits) - 1
format_str = "{:0" + str(args.hex_width * 2) + "x}"
for word in [(val >> i) & mask for i in offsets]:
results.append(format_str.format(word))
except:
break
print(" ".join(results), end="")
|