File: find-most-common-warn-messages.py

package info (click to toggle)
libreoffice 1:6.1.5-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 2,699,584 kB
  • sloc: cpp: 4,078,558; xml: 324,485; java: 281,935; python: 48,236; ansic: 36,476; perl: 32,383; sh: 13,671; yacc: 10,820; makefile: 9,085; cs: 6,600; lex: 2,184; objc: 1,904; awk: 978; pascal: 949; asm: 866; php: 79; csh: 20; sed: 5
file content (39 lines) | stat: -rwxr-xr-x 1,357 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/python

# A script to search our test logs and sort the messages by how common they are so we can start to
# reduce the noise a little.

import sys
import re
import io
import subprocess

# find . -name '*.log' | xargs grep -h 'warn:' | sort | uniq -c | sort -n --field-separator=: --key=5,6

process = subprocess.Popen("find workdir -name '*.log' | xargs grep -h 'warn:' | sort",
                            shell=True, stdout=subprocess.PIPE, universal_newlines=True)

messages = dict() # dict of sourceAndLine->count
sampleOfMessage = dict() # dict of sourceAndLine->string
for line in process.stdout:
    line = line.strip()
    # a sample line is:
    #    warn:sw:18790:1:sw/source/core/doc/DocumentRedlineManager.cxx:98: redline table corrupted: overlapping redlines
    tokens = line.split(":")
    sourceAndLine = tokens[4] + ":" + tokens[5]
    if (sourceAndLine in messages):
        messages[sourceAndLine] = messages[sourceAndLine] + 1
    else:
        messages[sourceAndLine] = 1
        sampleOfMessage[sourceAndLine] = line[line.find(tokens[6]):]

tmplist = list() # set of tuple (count, sourceAndLine)
for key, value in messages.iteritems():
    tmplist.append([value,key])

print( "The top 20 warnings" )
print
for i in sorted(tmplist, key=lambda v: v[0])[-20:]:
    print( "%6d %s %s" % (i[0], i[1], sampleOfMessage[i[1]]) )