File: find_dups.py

package info (click to toggle)
python-fs 2.4.16-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,944 kB
  • sloc: python: 13,048; makefile: 226; sh: 3
file content (27 lines) | stat: -rw-r--r-- 473 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
"""
Find paths to files with identical contents.

Usage:

    python find_dups.py <PATH or FS URL>

"""

import sys

from collections import defaultdict

from fs import open_fs

hashes = defaultdict(list)
with open_fs(sys.argv[1]) as fs:
    for path in fs.walk.files():
        file_hash = fs.hash(path, "md5")
        hashes[file_hash].append(path)

for paths in hashes.values():
    if len(paths) > 1:
        for path in paths:
            print(path)
        print()