File: findgremlins.py

package info (click to toggle)
python2.3 2.3.5-3sarge2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 43,908 kB
  • ctags: 81,384
  • sloc: ansic: 266,250; python: 246,028; makefile: 4,194; perl: 3,702; lisp: 3,630; sh: 2,576; xml: 1,601; objc: 740; cpp: 106; sed: 2
file content (57 lines) | stat: -rw-r--r-- 1,216 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""findgremlins - Search through a folder and subfolders for
text files that have characters with bit 8 set, and print
the filename and a bit of context.

By Just, with a little glue by Jack"""

import EasyDialogs
import MacOS
import re
import os
import string
import sys

xpat = re.compile(r"[\200-\377]")

def walk(top, recurse=1):
	if os.path.isdir(top):
		if recurse:
			for name in os.listdir(top):
				path = os.path.join(top, name)
				walk(path)
	else:
		cr, tp = MacOS.GetCreatorAndType(top)
		if tp in ('TEXT', '\0\0\0\0') and top[-4:] <> ".hqx":
			data = open(top).read()
			badcount = 0
			for ch in data[:256]:
				if ord(ch) == 0 or ord(ch) >= 0200:
					badcount = badcount + 1
			if badcount > 16:
				print `top`, 'appears to be a binary file'
				return
			pos = 0
			gotone = 0
			while 1:
				m = xpat.search(data, pos)
				if m is None:
					break
				if not gotone:
					print `top`
					gotone = 1
				[(i, j)] = m.regs
				print "     ", string.replace(data[i-15:j+15], '\n', ' ')
				pos = j

def main():
	if sys.argv[1:]:
		for pathname in sys.argv[1:]:
			walk(pathname)
	else:
		pathname = EasyDialogs.AskFolder()
		if pathname:
			walk(pathname)
		
if __name__ == '__main__':
	main()