File: diff.py

package info (click to toggle)
chromium-browser 41.0.2272.118-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 2,189,132 kB
  • sloc: cpp: 9,691,462; ansic: 3,341,451; python: 712,689; asm: 518,779; xml: 208,926; java: 169,820; sh: 119,353; perl: 68,907; makefile: 28,311; yacc: 13,305; objc: 11,385; tcl: 3,186; cs: 2,225; sql: 2,217; lex: 2,215; lisp: 1,349; pascal: 1,256; awk: 407; ruby: 155; sed: 53; php: 14; exp: 11
file content (181 lines) | stat: -rw-r--r-- 4,734 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
# Copyright (c) 2010 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Diff - Generate a unified diff of two sources or directories.

 The diff module provides a mechanism for running a diff a pair of
 sources or directories.  If there are any differences they are
 displayed as a unified diff and the module returns non zero.  If
 there are no differences the module returns zero.

 diff.py <opts> <dir1/file1> <dir2/file2>

 diff will load one source file, or multiple files if the sources are
 directories.  Only the top directory is used unless the recursive '-r'
 option is used.  Normally diff will quitely return zero if the files
 are the same.  Verbose '-v' will list all the files compared as well
 and the status of the diff.

 -a, --all : All sources of set 1 must be in set 2
 -h, --help : Display usage.
 -r, --recursive : Search subdirectories as well.
 -v, --verbose : Dump verbose information.

"""

import getopt
import os
import sys
import difflib


class SourceInfo(object):
  """ Contains one set of sources. """
  def __init__(self, name, recursive):
    self.files = []
    name = os.path.normpath(name)

    if not os.path.exists(name):
      print "Can not find", name
      usage()

    # If the source is a directory
    if os.path.isdir(name):
      self.path = name
      self.dir = True

      if recursive:
        # and we are searching recursively, get a set of
        # all files
        self.files = GetFileList(name)
      else:
        # otherwise just a set of files in this directory
        files = os.listdir(path)
        for file in files:
          if os.path.isfile(os.path.join(path, file)):
            self.files.append(file)
    else:
      # If not a directory, then just add the one file
      self.path = os.path.dirname(name)
      self.files.append(os.path.basename(name))
      self.dir = False

    if recursive and not self.dir:
      print "Source must be a directory to diff recusively."
      usage()


def GetFileList(path):
  """ Generates a list of files at a given path """
  files = []

  # Determine the length of the leading path
  skip = len(path.split(os.path.sep))

  for dirname, dirnames, filenames in os.walk(path):
    # Remove the leading path
    dirname = os.path.sep.join(dirname.split(os.path.sep)[skip:])

    for filename in filenames:
        path = os.path.join(dirname, filename)
        files.append(path)
  return files


def SourceIntersect(src1, src2):
  """ Returns the intersection of both source sets. """
  files = []
  src1set = set(src1.files)
  src2set = set(src2.files)

  for file1 in src1.files:
    if file1 in src2set:
      files.append(file1)
  return files


def ReadLines(path):
  """ Returns a list of lines of the file found at 'path'. """
  try:
    file = open(path, "r")
  except IOError, e:
    print "  ***I/O error({0}): {1} {2}".format(e[0], e[1], path)
    print
    raise

  lines = file.readlines()
  file.close()
  return lines


def Diff(file1, file2):
  """ Print the unified returing non zero if not equal."""
  foundDiff = 0
  try:
    lines1 = ReadLines(file1)
    lines2 = ReadLines(file2)
    diffs = difflib.unified_diff(lines1, lines2, fromfile=file1, tofile=file2)
    for diff in diffs:
      foundDiff = foundDiff + 1
      sys.stdout.write(diff)
    return foundDiff
  except IOError:
    return 1

def usage():
  """ Print the usage information. """
  print __doc__
  sys.exit(1)


def main(argv):
  verbose = False
  recursive = False
  all = False

  # Parse command-line arguments
  long_opts = ['all','help','recursive','verbose']
  opts, args = getopt.getopt(argv[1:], 'ahrv', long_opts)

  # Process options
  for k,v in opts:
    if k == '-h' or k == '--help':
      usage()
    if k == '-v' or k == '--verbose':
      verbose = True
    if k == '-r' or k == '--recursive':
      recursive = True
    if k == '-a' or k == '--all':
      all = True

  # Process sources
  if len(args) != 2:
    print "Expecting two sources (files or directories)."
    usage()

  src1 = SourceInfo(args[0], recursive)
  src2 = SourceInfo(args[1], recursive)

  if all:
    # Diff all files in the first set
    files = src1.files
  else:
    # Get a list of all matching files
    files = SourceIntersect(src1, src2)

  diffCnt = 0
  for file in files:
    path1 = os.path.join(src1.path, file)
    path2 = os.path.join(src2.path, file)
    if verbose:
      print "Compare %s and %s" % (path1, path2)
    diffCnt = diffCnt + Diff(path1, path2)

  if verbose:
    print "%d different file(s) or failure(s)." % diffCnt
  return diffCnt

if __name__ == '__main__':
  sys.exit(main(sys.argv))