File: hashtest.c

package info (click to toggle)
zmailer 2.99.51.52pre3-2
  • links: PTS
  • area: main
  • in suites: potato
  • size: 16,596 kB
  • ctags: 7,422
  • sloc: ansic: 90,470; sh: 3,608; makefile: 2,784; perl: 1,585; python: 115; awk: 22
file content (117 lines) | stat: -rw-r--r-- 2,793 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*
 *  Hashtest -- a small utility to test how  CRC32() and PJWHASH32()
 *  functions work with real-world user input.
 *
 *  Part of ZMailer -- test which mailbox hash function is best for you.
 *
 *  This should show to you, that 'hashtest -XX' produces best two-
 *  level subdirectory hash with max 676 sub-buckets, and likely all
 *  bucket abundances are within 20-30 % of each other.
 *
 *  My test material (189438 userids from several systems pulled together)
 *  did show that  pjwhash32() suffers from some odd thing which always
 *  looses two low bits, and thus produces only 169 different hash buckets,
 *  while crc32() produces all 676 buckets.
 *
 *  Old "Pick two first letters of the username for subdir" approach
 *  produced 565 buckets, but the distribution was absolutely terribly
 *  scewed - 20 top-abundant buckets had over 50% of all hits.
 *  The 5 top-abundant buckets all had more than 7000 hits.
 *
 *  Runtime comparisons show that:
 *     -PP:  0.598 sec user space
 *     -XX:  0.592 sec user space
 *     -DD:  0.443 sec user space
 *
 *  from which we can propably safely say that  crc32() and pjwhash32()
 *  are absolutely equal in execution time, and likely present only
 *  0.150 seconds of the test runtime.  ( Or 790 nanoseconds per user
 *  name -- yeah, Alpha rules ;) Guestimate says each hash took some
 *  680 instruction cycles -- HOT caches! )
 *
 *  Matti Aarnio <mea@nic.funet.fi> 9-Sep-1999
 *
 */


#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>

extern long pjwhash32 (const void *);
extern long crc32     (const void *);

static void
usage()
{
      printf("hashtest -({P|D|X}+) < usernamefile\n");
      exit(64);
}


int main(argc, argv)
     int argc;
     char *argv[];
{
  int c;
  int pjwhashes = 0, dirhashes = 0, crchashes = 0;
  char buf[2000];

  while ((c = getopt(argc, argv, "?PDX")) != EOF) {
    switch (c) {
    case 'P':
      ++pjwhashes;
      break;
    case 'D':
      ++dirhashes;
      break;
    case 'X':
      ++crchashes;
      break;
    default:
      usage();
    }
  }
  if (!pjwhashes && !dirhashes && !crchashes)
    usage();


  while (!feof(stdin) && !ferror(stdin)) {

    char *s;
    int hash, i;

    if (fgets(buf, sizeof(buf), stdin) == NULL)
      break;
    s = strchr(buf,'\n');
    if (s) *s = 0;
    hash = 0;
    if (dirhashes) {

      s = buf;
      for (i = 0; i < dirhashes; ++i,++s)
	putc(*s, stdout);

    } else if (pjwhashes) {

      hash = pjwhash32(buf);
      for (i = 0; i < pjwhashes; ++i) {
	putc('A' + (hash % 26), stdout);
	hash /= 26;
      }

    } else { /* CRChashes */

      hash = crc32(buf);
      for (i = 0; i < crchashes; ++i) {
	putc('A' + (hash % 26), stdout);
	hash /= 26;
      }

    }
    printf("\n");
  }

  return 0;
}