File: hassock_hash.py

package info (click to toggle)
lastz 1.04.52-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,604 kB
  • sloc: ansic: 39,808; python: 6,073; makefile: 843; sh: 53
file content (85 lines) | stat: -rwxr-xr-x 2,239 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
"""
Python implementation of the hash used for "hashed sequence index" files.

The "hassock" hash is a variant of Austin Appleby's MurmurHash2.  The
latter is described (as of Apr/2009) at
	murmurhash.googlepages.com
This variant is based on the endian-neutral version found at
	murmurhash.googlepages.com/MurmurHashNeutral2.cpp
and differs in the following ways:
	(a)	The "seed" is hardwired.
	(b)	We parse the data block in reverse;  this allows the caller to
		prepend an additional seed pattern to his buffer, potentially
		getting better mixing for the bits in the final incorporated
		bytes.
	(c)	The last three bytes are incorporated in a different order than
		they were in MurmurHash2, because the code just works out better
		this way.

:Author: Bob Harris (rsharris@bx.psu.edu)
"""

import sys

seed = 0x5C3FC4D3
mult = 0x87C10417

def hassock_hash(s):
	ix = len(s)
	h  = seed ^ ix							# h = seed ^ len;
	while (ix >= 4):
		k =  ord(s[ix-1])					# k  = *(--data);
		k |= ord(s[ix-2]) << 8				# k |= *(--data) << 8;
		k |= ord(s[ix-3]) << 16				# k |= *(--data) << 16;
		k |= ord(s[ix-4]) << 24				# k |= *(--data) << 24;

		k = (k * mult) & 0xFFFFFFFF			# k *= m; 
		k ^= k >> 24						# k ^= k >> r;
		k = (k * mult) & 0xFFFFFFFF			# k *= m; 

		h = (h * mult) & 0xFFFFFFFF			# h *= m; 
		h ^= k								# h ^= k;
		ix -= 4

	if (ix >= 3):
		h ^= ord(s[2]) << 16				# h ^= *(--data) << 16;
	if (ix >= 2):
		h ^= ord(s[1]) << 8					# h ^= *(--data) << 8;
	if (ix >= 1):
		h ^= ord(s[0])						# h ^= *(--data);
		h = (h * mult) & 0xFFFFFFFF			# h *= m; 

	h ^= h >> 13							# h ^= h >> 13;
	h = (h * mult) & 0xFFFFFFFF				# h *= m; 
	h ^= h >> 15							# h ^= h >> 15;

	return h


# main program to test

def main():
	m = None

	strings = []

	for s in sys.argv[1:]:
		if (s.startswith("--mod=")): m = int(s.split("=",1)[1])
		else:                        strings += [s]

	if (strings != []):
		for s in strings:
			demonstrate_hash(s,m)
	else:
		for line in sys.stdin:
			line = line.rstrip()
			demonstrate_hash(line,m)

def demonstrate_hash(s,m):
	if (m == None): print ("%08X: %s" % (hassock_hash(s),s))
	else:           print ("%d: %s"   % (hassock_hash(s)%m,s))


if __name__ == "__main__": main()