1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
/*
mairix - message index builder and finder for maildir folders.
**********************************************************************
* Copyright (C) Richard P. Curnow 2002-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
**********************************************************************
*/
#include "mairix.h"
#include "memmac.h"
#include "reader.h"
static void do_toktable(struct toktable *x, int *lc, int *elc, int *ec, int size, int *ml, int *mel, int *me)
{
int i;
for (i=0; i<x->size; i++) {
struct token *tok = x->tokens[i];
unsigned char *j, *last_char;
int incr;
if (tok) {
int len = strlen(tok->text);
if (len > size) {
fprintf(stderr, "Token length %d exceeds size\n", len);
} else {
lc[len]++;
if (len > *ml) *ml = len;
}
/* Deal with encoding length */
if (tok->match0.n > size) {
fprintf(stderr, "Token encoding length %d exceeds size\n", tok->match0.n);
} else {
elc[tok->match0.n]++;
if (tok->match0.n > *mel) *mel = tok->match0.n;
}
/* Deal with encoding */
j = tok->match0.msginfo;
last_char = j + tok->match0.n;
while (j < last_char) {
incr = read_increment(&j);
if (incr > size) {
fprintf(stderr, "Encoding increment %d exceeds size\n", incr);
} else {
ec[incr]++;
if (incr > *me) *me = incr;
}
}
}
}
}
void print_table(int *x, int max) {
int total, sum;
int i;
int kk, kk1;
total = 0;
for (i = 0; i<=max; i++) {
total += x[i];
}
sum = 0;
kk1 = 0;
for (i = 0; i<=max; i++) {
sum += x[i];
kk = (int)((double)sum*256.0/(double)total);
printf("%5d : %5d %3d %3d\n", i, x[i], kk-kk1, kk);
kk1 = kk;
}
}
void get_db_stats(struct database *db)
{
/* Deal with paths later - problem is, they will be biased by length of folder_base at the moment. */
int size = 4096;
int *len_counts, *enc_len_counts, *enc_counts;
int max_len, max_enc_len, max_enc;
max_len = 0;
max_enc_len = 0;
max_enc = 0;
len_counts = new_array(int, size);
memset(len_counts, 0, size * sizeof(int));
enc_len_counts = new_array(int, size);
memset(enc_len_counts, 0, size * sizeof(int));
enc_counts = new_array(int, size);
memset(enc_counts, 0, size * sizeof(int));
do_toktable(db->to, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
do_toktable(db->cc, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
do_toktable(db->from, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
do_toktable(db->subject, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
do_toktable(db->body, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
#if 0
/* no longer works now that the msg_ids table has 2 encoding chains. fix
* this when required. */
do_toktable(db->msg_ids, len_counts, enc_len_counts, enc_counts, size, &max_len, &max_enc_len, &max_enc);
#endif
printf("Max token length : %d\n", max_len);
print_table(len_counts, max_len);
printf("Max encoding vector length : %d\n", max_enc_len);
print_table(enc_len_counts, max_enc_len);
printf("Max encoding increment : %d\n", max_enc);
print_table(enc_counts, max_enc);
return;
}
|