diff options
author | RALOVICH, Kristof <tade60@freemail.hu> | 2014-08-27 22:12:20 +0200 |
---|---|---|
committer | RALOVICH, Kristof <tade60@freemail.hu> | 2014-08-27 22:12:20 +0200 |
commit | af4dd87c9171435e874791b8327b8ee25919f9ba (patch) | |
tree | 91ccccc2b1c2c42f9ceaf43dbb94ffe392a06c19 | |
parent | 5e660e4e0e61c3f811f937d433c6716989e9e6e5 (diff) |
scripts: database cleaner
-rwxr-xr-x | scripts/antpm-clean-db | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/scripts/antpm-clean-db b/scripts/antpm-clean-db new file mode 100755 index 0000000..bf25e2b --- /dev/null +++ b/scripts/antpm-clean-db @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# -*- mode: python; coding: utf-8-unix -*- +# ***** BEGIN LICENSE BLOCK ***** +#//////////////////////////////////////////////////////////////////////// +# Copyright (c) 2011-2014 RALOVICH, Kristóf // +# // +# This program is free software; you can redistribute it and/or modify // +# it under the terms of the GNU General Public License as published by // +# the Free Software Foundation; either version 3 of the License, or // +# (at your option) any later version. // +# // +# This program is distributed in the hope that it will be useful, // +# but WITHOUT ANY WARRANTY; without even the implied warranty of // +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +# GNU General Public License for more details. // +# // +#//////////////////////////////////////////////////////////////////////// +# ***** END LICENSE BLOCK ***** + +import os +import fnmatch +import filecmp +import sys + +db_dir=os.getenv('XDG_CONFIG_HOME')+'/antpm/' + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + filename = os.path.join(root, basename) + yield filename + +def get_immediate_empty_subdirectories(a_dir): + return [name for name in os.listdir(a_dir) + if ( os.path.isdir(os.path.join(a_dir, name)) and os.listdir(os.path.join(a_dir, name)) == [])] + +def SubDirPath (d): + return filter(os.path.isdir, [os.path.join(d,f) for f in os.listdir(d)]) + +def FindDuplicates(folder, ftype): + duplicates = {} + redundant = set() + # sorting ensures that we discover duplicates in folder that + # correspond to later dates + fit_files = [f for f in sorted(find_files(folder, ftype))] + for i in range(0, len(fit_files)): + for j in range(i+1, len(fit_files)): + assert(fit_files[i] != fit_files[j]) + eq = filecmp.cmp(fit_files[i], fit_files[j], False) + if eq: + if fit_files[i] in redundant: continue + if not fit_files[i] in duplicates: duplicates[fit_files[i]]=[] + assert(fit_files[j].startswith(devFold)) + redundant.add(fit_files[j]) + shorter_name=fit_files[j][len(devFold):] + duplicates[fit_files[i]].append(shorter_name) + return duplicates, redundant + + +if __name__=='__main__': + # for filename in find_files(db_dir, '*.c'): + # print 'Found C source:', filename + + devices = [] + devices = SubDirPath(db_dir) + print 'Found', len(devices), 'devices:', devices + + for devFold in devices: + duplicates_fit, _ = FindDuplicates(devFold, '*.fit') + duplicates_gpx, _ = FindDuplicates(devFold, '*.gpx') + duplicates = dict(duplicates_fit.items() + duplicates_gpx.items()) + print '\tFor device', os.path.basename(devFold), 'has', len(duplicates), 'duplicates' + #print duplicates + for k in sorted(duplicates): + print '\t\t', k, 'has', len(duplicates[k]), 'duplicates: ', duplicates[k] + + if len(sys.argv)>1 and '--really-delete' in sys.argv[1:]: + print '\n\n\t\tDELETING DUPLICATES...\n\n' + for k in sorted(duplicates): + for dupl in duplicates[k]: + print '\t\tRM', dupl + os.remove(devFold+'/'+dupl) + + for devFold in devices: + emp_sub_dirs = get_immediate_empty_subdirectories(devFold) + print '\t2nd pass cleanup, found', len(emp_sub_dirs), 'empty subfolders' + for d in emp_sub_dirs: + print '\t\t', d + + if len(sys.argv)>1 and '--really-delete' in sys.argv[1:]: + for d in emp_sub_dirs: + os.rmdir(devFold+'/'+d) + |