File: stap-profile-annotate.in

package info (click to toggle)
systemtap 5.1-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 47,964 kB
  • sloc: cpp: 80,838; ansic: 54,757; xml: 49,725; exp: 43,665; sh: 11,527; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (383 lines) | stat: -rwxr-xr-x 16,055 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#!/usr/bin/python3

# This script uses tapset/hit-count.stp to profile a specific process
# or the kernel. It may take a context width, module path, pid, cmd, and timeout.
# It generates folders based on buildid, containing subdirectories
# leading to sourcefiles where one may read how many times the pc
# was at a certain line in that sourcefile.


import argparse
import sys
import os
import re
import subprocess
import tempfile
from collections import defaultdict

parser = argparse.ArgumentParser()
pid_cmd_group = parser.add_mutually_exclusive_group()
pid_cmd_group.add_argument("-x", "--pid", help='PID for systemtap to target.', type=int)
pid_cmd_group.add_argument("-c", "--cmd", help='Command for systemtap to target.', type=str)
parser.add_argument('-d', metavar="BINARY", help='Add symbol information for given binary and its shared libraries.', type=str, action='append', default=[])
parser.add_argument("-e", "--events", help='Override the list of profiling probe points.', type=str, default='timer.profile')
parser.add_argument("-T", "--timeout", help="Exit in 'timeout' seconds.", type=int)
parser.add_argument("-p", "--print", help="Print annotated source files to stdout instead of files.", action='store_true')
parser.add_argument("-w", "--context-width", metavar="WIDTH", help='Limit number of lines of context around each hit.  Defaults to unlimited.', type=int, default=-1)
parser.add_argument("-s", "--stap", metavar="PATH", help='Override the path to the stap interpreter.', type=str)
parser.add_argument("-v", "--verbose", help="Increase verbosity.", action='count', default=0)

args = parser.parse_args()
verbosity = args.verbose
DB_URLS = os.getenv("DEBUGINFOD_URLS")

def vprint(level,*args):
    if (verbosity >= level):
        print(*args)


stap_script="""
global count
global unknown
global kernel
global user
probe begin {
  system(\"echo Starting stap data collector.\") # sent to stdout of stap-profile-annotate process
}
probe """ + args.events + """ {
  if (! user_mode()) {
    kernel <<< 1
    next
  }
  try {
    if (target()==0 || target_set_pid(pid()))
      {
        buildid = umodbuildid(uaddr());
        addr= umodaddr(uaddr());
        count[buildid,addr] <<< 1;
        user <<< 1
      }
  }
  catch /*(e)*/ { unknown <<< 1 /* printf ("%s", e) */ }
}

probe timer.s(1),end
{
  println (\"BEGIN\");
  foreach ( [buildid, addr] in count)
    {
      c = @count(count[buildid,addr]);
      println(buildid, " " , addr, " ", c);
    }
  println (\"END\");
  delete count
}
probe end,error
{
  printf (\"Counted %d known userspace hits.\\n\", @count(user))
  if (@count(kernel))
    printf (\"Ignored %d kernel hits.\\n\", @count(kernel))
  if (@count(unknown))
    printf (\"Ignored %d unknown userspace hits.\\n\", @count(unknown))
  println(\"Stopped stap data collector.\")
}
"""

# buildid class
class BuildIDProfile:
    def __init__(self,buildid):
        self.counts = defaultdict(lambda: 0)
        self.buildid = buildid
        self.filename = self.buildid + 'addrs.txt'
        self.sources = {}

    def __str__(self):
        return "BuildIDProfile(buildid %s) items: %s sources: %s" % (self.buildid, self.counts.items(), self.sources.items())
    
    # Build the 'counts' dict by adding the hit count to its associated address
    def accumulate(self,pc,count):
        self.counts[pc] += count

    # Get the Find the sources of relative addresses from self.counts.keys()
    def get_sources(self):
        vprint(1,"Computing addr2line for %s" % (self.buildid))
        # Used to maintain order of writing
        ordered_keys = list(self.counts.keys())
        # create addr file in /tmp/
        with open('/tmp/'+self.filename, 'w') as f:
            for k in ordered_keys:
                f.write(str(hex(k)) + '\n')
        vprint(2,"Dumped addresses")
        # Get source:linenum info 
        dbginfo = self.get_debuginfo()
        # Split the lines into a list divided by newlines
        lines = dbginfo.split('\n')

        for i in range(0,len(lines)):
            if lines[i] == '':
                continue
            split = lines[i].split(':')
            src = split[0]
            line_number = split[1]
            if line_number == None:
                continue
            if src not in self.sources.keys():
                self.sources[src] = SourceLineProfile(self.buildid,src)
 
            # Sometimes addr2line reponds with a string of format ("linenum" discriminator "num")
            # trim this to yield "linenum" using a regular expression:
            m = re.search('[0-9]+',line_number)
            # If m doesn't contain the above regex, it has no number so don't accumulate it
            if m == None:
                continue
            line_number = int(m.group(0))
            # eu-addr2line gives outputs beginning at 1, where as in SourceLineProfiler.report
            # the line numbering begins at 0. This offset of 1 must be reomved from eu-addr2line
            # to ensure compatibility with SourceLineProfiler.report
            self.sources[src].accumulate(line_number-1, self.counts[ordered_keys[i]])
        vprint(2,"Mapped to %d source files" % (len(self.sources),))
        # Remove tempfile
        os.remove('/tmp/'+self.filename)

    # Report information for this buildid's source files
    def report(self,totalhits):
        for so in self.sources.values():
            so.report(totalhits)

    # Get source:linenum information. Assumes self.filename has relative address information
    def get_debuginfo(self):
        try:
            #Get the debuginfo of the bulidid retrieved from stap
            p = subprocess.Popen(['debuginfod-find', 'debuginfo', self.buildid],stdout=subprocess.PIPE)
            dbg_file,err = p.communicate()
            dbg_file = dbg_file.decode('utf-8').rstrip()
            if dbg_file == '' or dbg_file == None:
                raise Exception("No debug file for bid %s from debuginfod servers: %s" % (self.bid, DB_URLS))
            elif err != '' and err != None:
                raise Exception(err.decode('utf-8').rstrip())
            vprint(2, "Stored debuginfod-find debuginfo file as %s" % (dbg_file))
            #Use the debuginfo attained from the above process
            process = subprocess.Popen(['sh','-c', 'eu-addr2line -A -e '  + dbg_file + ' < /tmp/' + self.filename],  stdout=subprocess.PIPE)
            out,err = process.communicate()
        except Exception as e:
            print (e)
        return out.decode('utf-8')


# Contains information related to each source of a buildid
class SourceLineProfile:
    def __init__(self,  bid, source):
        self.bid = bid
        self.source = source
        self.counts = defaultdict(lambda: 0)

    def __str__(self):
        return "SourceLineProfile(bid %s, source %s) counts: %s" % (self.bid, self.source, self.counts.items())

    # Accumulate hits on a line
    def accumulate(self, line, count):
        self.counts[line] += count

    # Get the source file associated with a buildid
    def get_source_file(self):
        try: 
            p = subprocess.Popen(['debuginfod-find', 'source', self.bid, self.source],stdout=subprocess.PIPE)
            sourcefile,err = p.communicate()
            sourcefile = sourcefile.decode('utf-8').rstrip()
            if sourcefile == '' or sourcefile == None:
                raise Exception("No source file for bid %s, source %s from debuginfod servers: %s" % (self.bid, self.source, DB_URL))
            elif err != '' and err != None:
                raise Exception(err.decode('utf-8').rstrip())
            vprint(2, "Stored debuginfod-find source file as %s" % (sourcefile))
            return sourcefile
        except Exception as e:
            print (e)

    # Reporting function for the source file
    def report(self, totalhits):
        filehits=sum(self.counts.values())
        if self.source == '??' or self.source == '':
            vprint(0,"%08d (%.2f%%) hits in buildid %s with unknown source" % (filehits, filehits/totalhits*100,
                                                                               self.bid))
            return
        # Retrieve the sourcefile's name 
        sourcefile = self.get_source_file()
        if sourcefile == None or sourcefile == '':
            return 

        outfile = os.path.join('profile-'+self.bid, (sourcefile.split('/')[-1]).replace('##','/'))

        # Try creating the appropriate directory
        if not args.print:
            try:
                # Begins at -1 so that when the for loop counts the profile-buildid directory the
                # above_profile_dir is set to 0 (the intended beginning position)
                # This saves having to either remove profile-buildid or check for it each iteration
                # This variable represents how many directories we are above the profile-buildid
                # directory
                above_profile_dir = -1
                for word in '/'.split(outfile):
                    if word == "..":
                        above_profile_dir -=1
                    else:
                        above_profile_dir += 1
                    if above_profile_dir < 0:
                        raise Exception(outfile + " descends beyond its intended root directory, profile-"+self.bid+".\nEnsuring the directory remains above profile-"+self.bid+" ... ")
                outfile = re.sub("\/\.\.","/dotdot", outfile)
                if not os.path.isfile(outfile):
                    os.makedirs(os.path.dirname(outfile))
            except Exception as e:
                print(e)

        # Output source code to 'outfile' and if a line has associated hits (read out of sourcefile)
        # then add the line number and hit count before that line. If a context_width is present use
        # print the surrounding lines for context in accordance with context_width
        vprint(0,"%07d (%.2f%%) hits in %s over %d lines." % (filehits, filehits/totalhits*100,
                                                             outfile, len(self.counts)))
        class blob:
            def __init__(self, lower, upper, hit):
                self.lower = lower
                self.upper = upper
                self.hits = []
                self.hits.append(hit)
            def __str__(self):
                if self.lower != self.upper:
                    return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context from lines %s to %s") % (self.lower, self.upper)
                else:
                    return ("Hits: " + ', '.join(str(i) for i in self.hits) + ". Context of line %s") % (self.upper)

            def get_context(self):
                return "//" + str(self) +"\n"

        num_lines = sum(1 for line in open(sourcefile,'r')) - 1
        with open(sourcefile,'r') as f, open(outfile, 'w') as of:
            hitlines = sorted( list(self.counts.keys()) )
            width = -1
            if args.context_width >= 0:
                width = int(args.context_width)
            else:
                width = sys.maxsize
            upper_bound = sys.maxsize if width == sys.maxsize else hitlines[0]+width
            lower_bound = -1 if width == sys.maxsize else hitlines[0] - width
            # Set the first upper and lower bounds
            context_blobs = []
            context_blobs.append(blob(lower_bound, upper_bound, hitlines[0]))
            blob_num = 0
            for i in hitlines[1:]:
                lower = i-width
                upper = i+width
                # - 1 to connect blobs bordering one another
                if context_blobs[blob_num].upper >= lower-1:
                    context_blobs[blob_num].upper = upper
                    context_blobs[blob_num].hits.append(i)
                else:
                    blob_num = blob_num+1
                    context_blobs.append(blob(lower, upper, i))
            context_blobs[-1].upper = num_lines if context_blobs[-1].upper > num_lines else context_blobs[-1].upper
            for linenum, line, in list(enumerate(f)):
                # Convenience variable
                hits = context_blobs[0].hits
                # If we've passed this blobs area of context, pop it
                if context_blobs and context_blobs[0].upper < linenum:
                    context_blobs.pop(0)
                if not context_blobs:
                    break
                # If we have reached the beginning of a blob's context,
                # print_context()
                if context_blobs and linenum == context_blobs[0].lower:
                    of.write(context_blobs[0].get_context())

                # If we have found a line with hits, output info
                # otherwise if there is no width, don't take it into account
                # otherwise if the current line is within the desired width
                #  print it for context
                if linenum in hits:
                    of.write("%07d %s\n" % ( self.counts[linenum], line.rstrip()))
                elif width == -1:
                    of.write("%7s %s\n" % ("", line))
                elif context_blobs[0].lower <= linenum and linenum <= context_blobs[0].upper:
                    of.write("%7s %s\n" % ("" , line.rstrip()))

            if not args.print: # don't close stdout
                of.close()

def __main__():
    # We require $DEBUGINFOD_URLS
    if (not DB_URLS):
        raise Exception("Required DEBUGINFOD_URLS is unset.")
    
    # Run SystemTap
    (tmpfd,tmpfilename) = tempfile.mkstemp()
    stap_cmd = "@prefix@/bin/stap"  # not @ bindir @ because autoconf expands that to shell $var expressions
    stap_args = ['--ldd', '-o'+tmpfilename]

    if args.cmd:
        stap_args += ['-c', args.cmd]
    if args.timeout:
        if args.timeout < 0:
            raise Exception("Timeout must be positive")
        stap_args += ['-T', str(args.timeout)]
    if args.pid:
        if args.pid < 0:
            raise Exception("pid must be positive")
        stap_args += ['-x', str(args.pid)]
    for d in args.d:
        stap_args += ['-d', d]
    if args.stap:
        stap_cmd = args.stap
    if args.context_width and args.context_width < -1:
        raise Exception("context_width must be positive or -1 (for all file)")
    stap_args += ['-e', stap_script]

    vprint(1,"Building stap data collector.")
    vprint(2,"%s %s" % (stap_cmd, stap_args))

    try:
        p = subprocess.Popen([stap_cmd] + stap_args)
        p.communicate() # wait until process exits
    except KeyboardInterrupt:
        pass
    p.kill()
    
    buildids = {} # dict from buildid hexcode to BuildIdProfile object
    
    outp_begin = False
    proflines = 0
    totalhits = 0

    for line in open(tmpfilename,"r"): # read stap output, text mode
        line = line.rstrip()
        # All relevant output is after BEGIN and before END
        if "BEGIN" in line:
            outp_begin = True
        elif "END" in line:
            outp_begin = False
        elif outp_begin == False:
            if line != "": # diagnostic message
                vprint(0,line)
            else:
                pass
        else: # an actual profile record
            try:
                proflines += 1
                (buildid,pc,hits) = line.split()
                vprint(3,"(%s,%s,%s)" % (buildid,pc,hits))
                totalhits += int(hits)
                bidp = buildids.setdefault(buildid, BuildIDProfile(buildid))
                # Accumulate hits for offset pc
                bidp.accumulate(int(pc),int(hits))
            except Exception as e: # parse error?
                vprint(2,e)

    os.remove(tmpfilename)
        
    vprint(0, "Consumed %d profile records of %d hits across %d buildids." % (proflines, totalhits, len(buildids)))
        
    # Output source information for each buildid
    totalhits = sum([sum(bid.counts.values()) for bid in buildids.values()])
    for buildid, bidp in buildids.items():
        bidp.get_sources()
        bidp.report(totalhits)

if __name__ == '__main__':
    __main__()