1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
|
#! /usr/bin/env python
#
# Script to conservatively convert initial spaces to tabs
#
# The script first guesses the number of spaces used (e.g. 2,3,4, or 8).
# It then passes this to the unexpand utility for the actual conversion.
import sys,os
import optparse
import subprocess
def space_distribution(filename):
"""Reads the specified file and creates a histogram of the number of initial
spaces in each line of the file.
Returns a dictionary mapping n to the number of lines with n initial spaces
"""
spaces = {}
with open(filename,'r') as file:
for line in file.readlines():
s = 0
for c in line:
if c == ' ':
s += 1
elif c == '*':
s -= 1
break
else:
break
spaces.setdefault(s,0)
spaces[s] += 1
return spaces
def max_compatible(distribution, threshold=.8, possible=[8,4,3,2]):
"""Guess the most likely number of spaces per tab for a file
distribution: a dictionary giving the number of lines with a particular
number of initial spaces
threshold: Fraction of lines that must be correctly spaced
possible: list of possible tabstops to consider
Returns the highest tabstop such that at least threshold-fraction of the
lines contain initial spaces that are a multiple of that tabstop.
Returns None if no possiblities meet the threshold, or if the file
did not contain sufficient lines to determine the tabstop
unambiguously.
"""
for poss in sorted(possible,reverse=True):
compat = 0
incompat = 0
for spaces, count in distribution.items():
# Only assess lines with spaces
if spaces == 0:
continue
if spaces%poss == 0:
compat += count
else:
incompat += count
# unambiguous case
if compat > 0 and incompat == 0:
return poss
# require sufficient lines with spaces
if threshold < 1 and (compat+incompat)*(1-threshold) < 2:
continue
if compat >= (compat+incompat)*threshold:
return poss
def unexpand(filename, spaces):
"""Calls the unexpand command to convert initial spaces into tabs
"""
args = ["unexpand","--first-only","-t",str(spaces),filename]
print " ".join(args)
bits = subprocess.check_output(args)
with open(filename,'wb') as out:
out.write(bits)
if __name__ == "__main__":
parser = optparse.OptionParser( usage="usage: python %prog [options] filenames..." )
parser.add_option("-t","--threshold",help="fraction of lines with compatible spacing",
dest="threshold",default=0.8, type='float',action='store')
parser.add_option("-x","--unexpand",help="Unexpand spaces in the input files",
dest="unexpand",default=False, action='store_true')
(options,args) = parser.parse_args()
if len(args) < 1:
parser.print_usage()
parser.exit("Error. No files")
for f in args:
dist = space_distribution(f)
# skip tabbed files
if dist.keys() == [0]:
continue
c = max_compatible(dist,options.threshold)
if c is not None:
if options.unexpand:
unexpand(f,c)
else:
print "Match\t%s\t%d\t%s"%(f,c,dist)
else:
print "Unclear\t%s\t?\t%s"%(f,dist)
|