File: ftok_experiment.py

package info (click to toggle)
python-sysv-ipc 0.6.8-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 432 kB
  • ctags: 451
  • sloc: ansic: 3,336; python: 516; makefile: 26; sh: 4
file content (50 lines) | stat: -rw-r--r-- 1,242 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""This is an experiment to see how often ftok() returns duplicate keys for
different filenames. 
"""


import sys
import os
import sysv_ipc

if len(sys.argv) == 2:
    start_path = sys.argv[1]
else:
    msg = "Start path? [Default = your home directory] "
    start_path = raw_input(msg)
    if not start_path:
        start_path = "~"

# Expand paths that start with a tilde and then absolutize.
start_path = os.path.expanduser(start_path)
start_path = os.path.abspath(start_path)

# For every filename in the tree, generate a key and associate the filename
# with that key via a dictionary.
d = { }
nfilenames = 0
for path, dirnames, filenames in os.walk(start_path):
    for filename in filenames:
        # Fully qualify the path
        filename = os.path.join(path, filename)

        nfilenames += 1

        #print "Processing %s..." % filename

        key = sysv_ipc.ftok(filename, 42, True)

        if key not in d:
            d[key] = [ ]

        d[key].append(filename)

# Print statistics, including files with non-unique keys.
ndups = 0
for key in d:
    if len(d[key]) > 1:
        ndups += len(d[key])
        print key, d[key]

print "Out of {0} unique filenames, I found {1} duplicate keys.".format(nfilenames, ndups)