File: linux_load_commands.py

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (171 lines) | stat: -rw-r--r-- 5,986 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

# REQUIRES: platform=Linux
# RUN: rm -rf %T && mkdir -p %t
# RUN: %{python} %s '%{package_path}' '%T' '%{readelf}'

# Test that all linux libraries that we provide do not have any load
# commands that are both writeable and executable.

import argparse
import re
import sys
import subprocess

# For each library, we want to run llvm-readelf on it and verify that none of
# the flag fields say that the load commands are both writable and
# executable. Our target outputs look like this:
#
# ----
# There are 7 program headers, starting at offset 64
#
# Program Headers:
#   Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
#   PHDR           0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R   0x8
#   LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x9839a0 0x9839a0 R E 0x1000
#   LOAD           0x983a60 0x0000000000984a60 0x0000000000984a60 0x07ad78 0x0a3da9 RW  0x1000
#   DYNAMIC        0x9b5b88 0x00000000009b6b88 0x00000000009b6b88 0x0002f0 0x0002f0 RW  0x8
#   GNU_EH_FRAME   0x95ecd4 0x000000000095ecd4 0x000000000095ecd4 0x024ccc 0x024ccc R   0x4
#   GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x0
#   GNU_RELRO      0x983a60 0x0000000000984a60 0x0000000000984a60 0x0345a0 0x0345a0 RW  0x10
# ----
#
# TODO: Evaluate if parallelism helps here. We /could/ use libdispatch to work
# in parallel over all artifacts.
class ParseState(object):
    firstLine = 0
    programHeadersLine = 1
    dataHeader = 2
    data = 3

    def __init__(self, state=None):
        if state is None:
            state = ParseState.firstLine
        self.value = state

    @property
    def regex_string(self):
        if self.value == ParseState.firstLine:
            return "There are (\d+) program headers"
        if self.value == ParseState.programHeadersLine:
            return "Program Headers:"
        if self.value == ParseState.dataHeader:
            return "\\s+Type"
        if self.value == ParseState.data:
            name = "(\w+)"
            hex_pattern = "0x[0-9a-fA-F]+"
            ws = "\s"
            col = "{}+{}".format(ws, hex_pattern)
            return "^{ws}*{name}{col}{col}{col}{col}{col} (.+) 0x".format(**
                {'ws': ws, 'name': name, 'col': col})
        raise RuntimeError('Invalid ParseState value')

    @property
    def regex(self):
        return re.compile(self.regex_string)

    @property
    def next(self):
        if self.value == ParseState.firstLine:
            return ParseState(ParseState.programHeadersLine)
        if self.value == ParseState.programHeadersLine:
            return ParseState(ParseState.dataHeader)
        if self.value == ParseState.dataHeader:
            return ParseState(ParseState.data)
        if self.value == ParseState.data:
            return self
        raise RuntimeError('Invalid ParseState value')

    def matches(self, input_string):
        return self.regex.match(input_string)

def process_library(args, lib):
    assert(len(lib) > 0)

    numberOfLines = None
    numberOfLinesSeen = 0

    print("Visiting lib: {}".format(lib))
    lines = list(reversed(subprocess.check_output([args.read_elf, "--program-headers", lib], universal_newlines=True).split("\n")[:-1]))
    p = ParseState()

    # Until we finish parsing or run out of lines to parse...
    while len(lines) > 0:
        l = lines.pop()
        print("DUMP: '{}'".format(l))
        assert(p is not None)
        curState = p

        m = curState.matches(l)
        if m is None:
            continue

        p = curState.next
        if curState.value == ParseState.firstLine:
            numberOfLines = int(m.group(1))
            continue

        if curState.value == ParseState.programHeadersLine:
            continue

        if curState.value == ParseState.dataHeader:
            continue

        if curState.value == ParseState.data:
            val = m.group(1)
            if val == "LOAD":
                flags = m.group(2)
                print("Found LOAD command! Flags: '{}'. Full match: '{}'".format(flags, l))
                if "W" in flags and "E" in flags:
                    raise RuntimeError("Found a load command that loads something executable and writeable")

            # If we haven't seen enough lines, continue.
            assert(numberOfLines is not None)
            if numberOfLinesSeen != numberOfLines - 1:
                numberOfLinesSeen += 1
                continue

            # If we have seen enough lines, be sure to not only break out
            # of the switch, but additionally break out of the whole
            # parsing loop. We could go through the rest of the output from
            # llvm-readelf, but there isn't any point.
            p = None
            break

    # If we ran out of lines to parse without finishing parsing, we failed.
    assert(p is None)
    assert(numberOfLines is not None)
    assert(numberOfLinesSeen == numberOfLines - 1)

def get_libraries(package_path):
    cmd = [
        "/usr/bin/find",
        package_path,
        "-iname",
        "*.so"
    ]
    return subprocess.check_output(cmd, universal_newlines=True).split("\n")[:-1]

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('package_path')
    parser.add_argument('tmp_dir')
    parser.add_argument('read_elf')
    args = parser.parse_args()

    libraries = get_libraries(args.package_path)
    for l in libraries:

          # When linking the swiftCompilerModules to lldb, the text segment
          # gets RWE for some reason.
          # TODO: remove this workaround once rdar://87078244 is fixed
          #
          # Additionally th cpython lldb library also has this too
          # TODO: remove that workaround once rdar://124693792 is fixed
          if "liblldb.so" in l or "_lldb.cpython" in l:
                continue

          process_library(args, l)
    sys.exit(0)

if __name__ == "__main__":
    main()