File: semanticDiffVersion.py

package info (click to toggle)
vtk6 6.3.0%2Bdfsg2-8.1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 118,972 kB
  • sloc: cpp: 1,442,790; ansic: 113,395; python: 72,383; tcl: 46,998; xml: 8,119; yacc: 4,525; java: 4,239; perl: 3,108; lex: 1,694; sh: 1,093; asm: 154; makefile: 68; objc: 17
file content (493 lines) | stat: -rwxr-xr-x 20,609 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
#!/usr/bin/env python
"""
This script generates API changes between two different versions of VTK.
The script uses git and ctags to preset a list of classes added/removed, public
methods added/removed when going from one version to the other.
"""
# ctags generated by the command:
# ctags -R --sort=yes --c++-kinds=cf --fields=aiksz --language-force=C++
# --exclude=*.in --exclude=*.java --exclude=*.py --exclude=*.js --exclude=*.bmp
# -f vtk ~/Projects/vtk/src

import distutils.spawn
import os.path
import re
import shutil
import subprocess
import sys
import tempfile

try:
    import argparse
except ImportError:
    from vtk.util import _argparse as argparse

tagMatcherType = re.compile('^(.+)\t(\S+)\t/\^(.*)\$/;"\t(.*)\n')

class Tag:
    def __init__(self):
        self._name = None
        self._access = None
        self._kind = None
        self._fyle = None
        self._decl = None
        self._class = None
        self._inherits = None

    def __repr__(self):
        return "<Tag %s in file %s>" % (self.name, self.fyle)

    def parse(self, line):
        """
        Parse a CTags line and set myself from it.
        """
        # example of CTags lines:

# Type1: ARangeFunctor /home/sankhesh/Projects/vtk/src/Common/Core/Testing/Cxx/TestSMP.cxx /^class ARangeFunctor$/;" kind:c

# Type2: APIDiagram  /home/sankhesh/Projects/vtk/src/Charts/Core/Testing/Cxx/TestDiagram.cxx /^class APIDiagram : public vtkContextItem$/;"  kind:c  inherits:vtkContextItem

# Type3: AbortFlagOff /home/sankhesh/Projects/vtk/src/Common/Core/vtkCommand.h  /^  void AbortFlagOff()$/;" kind:f  class:vtkCommand  access:public

# Type4: BooleanSet     /home/sankhesh/Projects/vtk/src/Filters/General/vtkMultiThreshold.h     /^  class BooleanSet : public Set {$/;" kind:c  class:vtkMultiThreshold inherits:Set    access:public

        tag = tagMatcherType.search(line)
        if not tag:
            raise KeyError, "line %s is not an expected ctags line" %line

        self._name = tag.expand('\\1').strip()
        self._fyle = tag.expand('\\2').strip()
        self._decl = tag.expand('\\3').strip()
        exts = tag.expand('\\4').strip().split('\t')
        for ext in exts:
            ext_stripped = ext.strip()
            pre = ext_stripped.split(':')[0]
            suf = ext_stripped.split(':')[1]
            if pre == 'kind':
                self._kind = suf
                continue
            if pre == 'class' or pre == 'struct':
                first_colon = ext_stripped.find(':')
                self._class = ext_stripped[(first_colon+1):]
                if "anon" in self._class:
                    first_anon = self._class.find('__anon')
                    first_dcolon = self._class[first_anon:].find("::")
                    if first_dcolon > -1:
                        self._class = self._class[(first_anon+first_dcolon+2):]
                    else:
                        self._class = ""
                continue
            if pre == 'access':
                self._access = suf
                continue
            if pre == 'inherits':
                self._inherits = suf
                continue
            if pre == 'line':
                self._fyle = self._fyle + ":" + suf

class CTags():

    def __init__(self):
        self._tagFile = "" # path to tag file
        self._classes = []
        self._pub_methods = []
        self._class_file = {}
        self._met_file = {}

    def set_tag_file(self, path):
        """
        Parse tags from the given file.
        """
        handle = open(path, "r")
        self._parse(handle.readlines())

    def _parse(self, lines):
        for line in lines:
            if line.startswith('!_TAG_'):
                continue
            if line.startswith('ctags: Warning: ignoring null tag'):
                continue

            t = Tag()
            t.parse(line)

       # Now make append to list of classes and public methods
            if t._kind == 'c':
                self._classes.append(t._name)
                self._class_file[t._name] = t._fyle
            if t._kind == 'f' and t._access == 'public':
                met = t._class + ":" + t._name
                self._pub_methods.append(met)
                self._met_file[met] = t._fyle

class CompareVersions():
    def __init__(self, src_dir, version1, version2, tmp_dir):
        self.tmp_dir = os.path.abspath(tmp_dir)
        self.src_dir = self.tmp_dir + os.sep + os.path.basename(src_dir)
        self.src_work_dir = os.path.abspath(src_dir)
        self.version1 = version1
        self.version2 = version2
        self.classes_removed = []
        self._classes_removed = {}
        self.classes_added = []
        self._classes_added = {}
        self.pub_methods_removed = []
        self._pub_methods_removed = {}
        self.pub_methods_added = []
        self._pub_methods_added = {}
        self.tag_f1 = None
        self.tag_f2 = None
        self._git_HEAD = None
        self._git_diff = None
        self.dep_methods = {}
        # Get system executable paths for git and ctags
        self.svar = SystemVar()
        # Setup the temporary working tree
        self.setup_src_dir()

        # Get newly deprecated methods list
        self.get_deprecated_methods()

        # Compare the two versions provided
        self.generate_ctags()
        self.get_diff()

    def get_diff(self):
        print "Parsing tag file %s..." % self.tag_f1
        T1 = CTags()
        T1.set_tag_file(self.tag_f1)
        print "Parsing tag file %s...Done" % self.tag_f1
        print "Parsing tag file %s..." % self.tag_f2
        T2 = CTags()
        T2.set_tag_file(self.tag_f2)
        print "Parsing tag file %s...Done" % self.tag_f2
        self.classes_added = list(set(T2._classes) - set(T1._classes))
        self.classes_removed = list(set(T1._classes) - set(T2._classes))
        self.pub_methods_added = list(set(T2._pub_methods) - set(T1._pub_methods))
        self.pub_methods_removed = list(set(T1._pub_methods) - set(T2._pub_methods))
        for cls in self.classes_added:
            self._classes_added[cls] = T2._class_file[cls].split(":")[0]
        for cls in self.classes_removed:
            self._classes_removed[cls] = T1._class_file[cls].split(":")[0]
        for met in self.pub_methods_added:
            self._pub_methods_added[met] = T2._met_file[met].split(":")[0]
        for met in self.pub_methods_removed:
            self._pub_methods_removed[met] = T1._met_file[met].split(":")[0]

    def git_checkout_version(self, ver):
        git_cmd = self.svar.git_exe + ' --git-dir=' + self.src_dir + os.sep + '.git --work-tree=' +\
            self.src_dir
        # Make sure working tree does not have any local modifications
        git_modified = git_cmd + ' ls-files -m'
        git_m_proc = subprocess.Popen(git_modified.split(), stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
        git_m_proc.wait()
        git_m_proc_stdout = git_m_proc.stdout.read()
        if git_m_proc_stdout:
            print "Working tree has local modifications."
            print "Please commit or reset the following files:"
            print git_m_proc_stdout
            sys.exit(1)

        # Reset the working tree to version specified
        git_reset_cmd = git_cmd + ' checkout ' + ver
        git_proc = subprocess.Popen(git_reset_cmd.split(), stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
        git_proc.wait()
        git_proc_stderr = git_proc.stderr.read()
        if git_proc_stderr:
            print "Error while git reset to version:", ver
            print git_proc_stderr
            sys.exit(1)

    def create_tagfile(self, src_dir, fname):
        ctags_proc = subprocess.Popen([self.svar.ctags_exe, '-R', '--sort=yes',
            '--c++-kinds=cf', '--fields=aiknsz', '--language-force=C++',
            '--exclude=*.in', '--exclude=*.java', '--exclude=*.py',
            '--exclude=*.css', '--exclude=*.js', '--exclude=*.bmp',
            '-f', fname, src_dir],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        ctags_proc.wait()
        ctags_proc_stderr = ctags_proc.stderr.read()
        if ctags_proc_stderr:
            print "Error while creating tagfile with ctags:", fname
            print ctags_proc_stderr
            sys.exit(1)

    def generate_ctags(self):
        # Store the current HEAD SHA-1 sum
        print "Getting the SHA-1 sum of HEAD of current working tree..."
        self._git_HEAD = self.git_current_version(self.src_dir)
        print "Getting the SHA-1 sum of HEAD of current working tree... %s" %\
            self._git_HEAD
        # Reset working tree to version1
        print "Checking out version %s..." % self.version1
        self.git_checkout_version(self.version1)
        # Create tag file for version1
        self.tag_f1 = self.gen_tagfile_name(self.version1)
        print "Creating tagfile (%s) for version %s..." % (self.tag_f1,
                self.version1)
        self.create_tagfile(self.src_dir, self.tag_f1)
        print "Creating tagfile (%s) for version %s...Done" % (self.tag_f1,
                self.version1)
        # Reset working tree to version2
        print "Checking out version %s..." % self.version2
        self.git_checkout_version(self.version2)
        # Create tag file for version2
        self.tag_f2 = self.gen_tagfile_name(self.version2)
        print "Creating tagfile (%s) for version %s..." % (self.tag_f2,
                self.version2)
        self.create_tagfile(self.src_dir, self.tag_f2)
        print "Creating tagfile (%s) for version %s...Done" % (self.tag_f2,
                self.version2)

    def gen_tagfile_name(self, ver):
        tag_f_name = self.tmp_dir + os.sep + os.path.basename(self.src_dir) +\
                "_" + ver + '.ctags'
        return tag_f_name

    def git_current_version(self, src_dir):
        git_cmd = self.svar.git_exe + ' --git-dir=' + src_dir + os.sep +\
                '.git --work-tree=' + src_dir + ' rev-parse HEAD'
        git_proc = subprocess.Popen(git_cmd.split(), stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
        git_proc.wait()
        git_proc_stderr = git_proc.stderr.read()
        if git_proc_stderr:
            print "Error while getting current HEAD SHA-1 sum:"
            print git_proc_stderr
            sys.exit(1)
        return git_proc.stdout.read()

    def git_diff_versions(self):
        git_cmd = self.svar.git_exe + ' --git-dir=' + self.src_dir + os.sep +\
                '.git --work-tree=' + self.src_dir + ' diff ' + self.version1 +\
                '..' + self.version2
        self._git_diff = self.tmp_dir + os.sep + os.path.basename(self.src_dir)\
                + "_" + self.version1 + "_" + self.version2 + ".diff"
        with open(self._git_diff, "w") as diff_file:
            git_proc = subprocess.Popen(git_cmd.split(), stdout=diff_file,
                stderr=subprocess.PIPE)
            git_proc.wait()
            git_proc_stderr = git_proc.stderr.read()
            if git_proc_stderr:
                print "Error while getting git diff between versions %s and %s" %\
                    (self.version1, self.version2)
                print git_proc_stderr
                sys.exit(1)

    def get_deprecated_methods(self):
        fnameMatcher = re.compile('diff --git .* b/(.*)\n')
        depMatcher = re.compile('^\+.*VTK_LEGACY\((.*)\).*\n')
        rtDepMatcher = re.compile('^\+.*VTK_LEGACY_BODY\((.*),\s*"(.*)"\).*\n')
        self.git_diff_versions()
        fname = ''
        with open(self._git_diff, "r+") as diff_file:
            for line in diff_file:
                fnamematch = fnameMatcher.search(line)
                if fnamematch:
                    fname = fnamematch.expand('\\1').strip()#.split('/')[-1]
                if "vtkSetGet.h" in fname:
                    continue
                dep_diff = depMatcher.search(line)
                if dep_diff:
                    self.dep_methods[dep_diff.expand('\\1').strip()] = [fname, "?"]
                else:
                    dep_diff = rtDepMatcher.search(line)
                    if dep_diff:
                        self.dep_methods[dep_diff.expand('\\1').strip()] =\
                            [fname, dep_diff.expand('\\2').strip()]

    def setup_src_dir(self):
        print "Setting up a clone of working tree (%s) in %s..." %\
            (self.src_work_dir, self.src_dir)
        git_cmd = self.svar.git_exe + ' clone -l --no-hardlinks -q ' +\
            self.src_work_dir + ' ' + self.src_dir
        git_proc = subprocess.Popen(git_cmd.split(), stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
        git_proc.wait()
        git_proc_stderr = git_proc.stderr.read()
        if git_proc_stderr:
            print "Error while git clone into temporary working directory."
            print "Command Used:", git_cmd
            print git_proc_stderr
            sys.exit(1)
        print "Setting up a clone of working tree (%s) in %s...Done" %\
            (self.src_work_dir, self.src_dir)

    def cleanup(self):
        if os.path.exists(self.tag_f1):
            os.remove(self.tag_f1)
        if os.path.exists(self.tag_f2):
            os.remove(self.tag_f2)
        if os.path.exists(self._git_diff):
            os.remove(self._git_diff)
        if os.path.exists(self.src_dir):
            shutil.rmtree(self.src_dir)

class SystemVar():
    def __init__(self):
        self.git_exe = None
        self.ctags_exe = None
        self.get_exes()

    def get_exes(self):
        self.git_exe = self.get_exe("git")
        self.ctags_exe = self.get_exe("ctags")

    def get_exe(self, exe):
        exe_file = distutils.spawn.find_executable(exe)
        if not exe_file:
            print "%s not found in system PATH" % exe
            for trial in range(3):
                exe_file = raw_input("Enter full path to %s executable:\n"\
                        % exe)
                if os.path.isfile(exe_file) and os.access(exe_file, os.X_OK):
                    break
                else:
                    print "The provided path is not an executable"
                    exe_file = None
            if not exe_file:
                print "Could not locate %s executable in 3 tries. Exiting..."\
                        % exe
                sys.exit(1)
        return exe_file


def printReport(comp, args):
    # Remove location of temp dir from printout
    str_to_replace = os.path.abspath(args.tmp) + os.sep +\
        os.path.basename(args.src[0]) + os.sep

    def cleanListIndices(alist, parentList=None):
        # Remove stuff we don't care about and sort so module/kits are apparent
        def removeExtras(cls):
            tmp = alist[cls]
            if isinstance(tmp, list):
                tmp = tmp[0]
            if parentList:
                # don't report new/removed methods that are part of new/removed classes
                clsname = tmp.split('/')[-1].split('.')[0]
                if clsname in parentList:
                    return False
            return  ("Testing/Cxx" not in tmp and
                     "Examples" not in tmp and
                     "ThirdParty" not in tmp and
                     "Utilities" not in tmp)
        def getDir(cls):
            tmp = alist[cls]
            if isinstance(tmp, list):
                tmp = tmp[0]
            return tmp.replace(str_to_replace, '')
        return sorted(
            filter(removeExtras, alist),
            key=lambda cls: getDir(cls))

    if args.wikiOutput:
        print "==API differences when going from version %s to version %s=="\
            %(args.versions[0], args.versions[1])
        print "===Classes/Structs added in version %s===" %args.versions[1]
        print r'{| class="wikitable sortable" border="1" cellpadding="5" '\
            'cellspacing="0"'
        print r'!Class Name'
        print r'!File'
        for cls in cleanListIndices(comp._classes_added):
            print r'|-'
            print r'|%s' %cls
            print r'|%s' %comp._classes_added[cls].replace(str_to_replace, '')
        print r'|}'
        print "===Classes/Structs removed from version %s===" %args.versions[0]
        print r'{| class="wikitable sortable" border="1" cellpadding="5" '\
            'cellspacing="0"'
        print r'!Class Name'
        print r'!File'
        for cls in cleanListIndices(comp._classes_removed):
            print r'|-'
            print r'|%s' %cls
            print r'|%s' %comp._classes_removed[cls].replace(str_to_replace, '')
        print r'|}'
        print "===Public methods added in version %s===" %args.versions[1]
        print r'{| class="wikitable sortable" border="1" cellpadding="5" '\
            'cellspacing="0"'
        print r'!Method'
        print r'!File'
        for cls in cleanListIndices(comp._pub_methods_added, comp._classes_added):
            print r'|-'
            print r'|%s' %cls
            print r'|%s' %comp._pub_methods_added[cls].replace(str_to_replace, '')
        print r'|}'
        print "===Public methods removed from version %s===" %args.versions[0]
        print r'{| class="wikitable sortable" border="1" cellpadding="5" '\
            'cellspacing="0"'
        print r'!Method'
        print r'!File'
        for cls in cleanListIndices(comp._pub_methods_removed, comp._classes_removed):
            print r'|-'
            print r'|%s' %cls
            print r'|%s' %comp._pub_methods_removed[cls].replace(str_to_replace, '')
        print r'|}'
        print "==Deprecated Methods=="
        print r'{| class="wikitable sortable" border="1" cellpadding="5" '\
            'cellspacing="0"'
        print r'!Method'
        print r'!Deprecated in'
        print r'!As of'
        for cls in cleanListIndices(comp.dep_methods):
            print r'|-'
            print r'|%s' %cls
            print r'|%s' %comp.dep_methods[cls][0]
            print r'|%s' %comp.dep_methods[cls][1]
        print r'|}'
    else:
        print "-------------------- REPORT --------------------"
        print "==API differences when going from version %s to version %s=="\
            %(args.versions[0], args.versions[1])
        print "~~~~~~~~~~~~~Classes/Structs Added~~~~~~~~~~~~~~~"
        for cls in cleanListIndices(comp._classes_added):
            print "%s\t%s" %(cls, comp._classes_added[cls].replace(str_to_replace, ''))
        print "~~~~~~~~~~~~Classes/Structs Removed~~~~~~~~~~~~~~"
        for cls in cleanListIndices(comp._classes_removed):
            print "%s\t%s" %(cls, comp._classes_removed[cls].replace(str_to_replace, ''))
        print "~~~~~~~~~~~~~Public Methods Added~~~~~~~~~~~~~~~"
        for cls in cleanListIndices(comp._pub_methods_added, comp._classes_added):
            print "%s\t%s" %(cls, comp._pub_methods_added[cls].replace(str_to_replace, ''))
        print "~~~~~~~~~~~~Public Methods Removed~~~~~~~~~~~~~~"
        for cls in cleanListIndices(comp._pub_methods_removed, comp._classes_removed):
            print "%s\t%s" %(cls, comp._pub_methods_removed[cls].replace(str_to_replace, ''))
        print "~~~~~~~~~~~~~~Deprecated Methods~~~~~~~~~~~~~~~~~"
        for cls in cleanListIndices(comp.dep_methods):
            print "%s\tdeprecated in %s as of %s" %(cls, comp.dep_methods[cls][0], comp.dep_methods[cls][1])


def start(args):
    # Create a clone of the current working tree in temp dir
    C = CompareVersions(args.src[0], args.versions[0], args.versions[1],
            args.tmp)
    printReport(C, args)
    if not args.dontClean:
        C.cleanup()
    else:
        # Restore the working directory to its original state
        C.git_checkout_version(C._git_HEAD)



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Compare two different'\
        ' revisions of a C++ source tree under GIT revision control')
    parser.add_argument("src", type=str, nargs=1,
        help="Path to the working directory")
    parser.add_argument("versions", type=str, nargs=2,
        help="Two versions to compare. (GIT SHA/branch/tag)")
    parser.add_argument("-t", "--tmp", type=str,
        default=tempfile.gettempdir(),
        help="Path to a temporary directory where the current working tree"\
             " can be cloned.(default: System Temp Directory")
    parser.add_argument("-w", "--wikiOutput", action='store_true',
        help="Print output with wiki markup.")
    parser.add_argument("-d", "--dontClean", action='store_true',
        help="Do not delete temporary files and directories.")

    args = parser.parse_args()
    start(args)