1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
|
#!/usr/bin/env python
# -*- mode: python; coding: utf-8-unix -*-
# ***** BEGIN LICENSE BLOCK *****
#////////////////////////////////////////////////////////////////////////
# Copyright (c) 2011-2014 RALOVICH, Kristóf //
# //
# This program is free software; you can redistribute it and/or modify //
# it under the terms of the GNU General Public License as published by //
# the Free Software Foundation; either version 3 of the License, or //
# (at your option) any later version. //
# //
# This program is distributed in the hope that it will be useful, //
# but WITHOUT ANY WARRANTY; without even the implied warranty of //
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
# GNU General Public License for more details. //
# //
#////////////////////////////////////////////////////////////////////////
# ***** END LICENSE BLOCK *****
import os
import fnmatch
import filecmp
import sys
db_dir=os.getenv('XDG_CONFIG_HOME', os.getenv('HOME')+'/.config/' )+'/antpm/'
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
def get_immediate_empty_subdirectories(a_dir):
return [name for name in os.listdir(a_dir)
if ( os.path.isdir(os.path.join(a_dir, name)) and os.listdir(os.path.join(a_dir, name)) == [])]
def SubDirPath (d):
return filter(os.path.isdir, [os.path.join(d,f) for f in os.listdir(d)])
def FindDuplicates(folder, ftype):
duplicates = {}
redundant = set()
# sorting ensures that we discover duplicates in folder that
# correspond to later dates
fit_files = [f for f in sorted(find_files(folder, ftype))]
for i in range(0, len(fit_files)):
for j in range(i+1, len(fit_files)):
assert(fit_files[i] != fit_files[j])
eq = filecmp.cmp(fit_files[i], fit_files[j], False)
if eq:
if fit_files[i] in redundant: continue
if not fit_files[i] in duplicates: duplicates[fit_files[i]]=[]
assert(fit_files[j].startswith(devFold))
redundant.add(fit_files[j])
shorter_name=fit_files[j][len(devFold):]
duplicates[fit_files[i]].append(shorter_name)
return duplicates, redundant
if __name__=='__main__':
# for filename in find_files(db_dir, '*.c'):
# print 'Found C source:', filename
devices = []
devices = SubDirPath(db_dir)
print 'Found', len(devices), 'devices:', devices
for devFold in devices:
duplicates_fit, _ = FindDuplicates(devFold, '*.fit')
duplicates_gpx, _ = FindDuplicates(devFold, '*.gpx')
duplicates = dict(duplicates_fit.items() + duplicates_gpx.items())
print '\tFor device', os.path.basename(devFold), 'has', len(duplicates), 'duplicates'
#print duplicates
for k in sorted(duplicates):
print '\t\t', k, 'has', len(duplicates[k]), 'duplicates: ', duplicates[k]
if len(sys.argv)>1 and '--really-delete' in sys.argv[1:]:
print '\n\n\t\tDELETING DUPLICATES...\n\n'
for k in sorted(duplicates):
for dupl in duplicates[k]:
print '\t\tRM', dupl
os.remove(devFold+'/'+dupl)
for devFold in devices:
emp_sub_dirs = get_immediate_empty_subdirectories(devFold)
print '\t2nd pass cleanup, found', len(emp_sub_dirs), 'empty subfolders'
for d in emp_sub_dirs:
print '\t\t', d
if len(sys.argv)>1 and '--really-delete' in sys.argv[1:]:
for d in emp_sub_dirs:
os.rmdir(devFold+'/'+d)
|