File: util.c

package info (click to toggle)
ifile 1.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 1,096 kB
  • ctags: 1,025
  • sloc: ansic: 7,409; makefile: 421; sh: 269
file content (344 lines) | stat: -rw-r--r-- 9,337 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/* ifile - intelligent mail filter for EXMH/MH
   ifile is Copyright (C) 1997  Jason Rennie <jrennie@ai.mit.edu>
   
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program (see file 'COPYING'); if not, write to the Free
   Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA  02111-1307, USA.
   */

#include <stdarg.h>
#include <time.h>
#include <ifile.h>      /* main ifile function library */

/* variables for keeping track of time/speed of ifile */
extern clock_t DMZ_start, DMZ_end, DMZ2_start;

/* returns a hash value for the string S */
/* written by Jason Rennie <jrennie@ai.mit.edu> */
unsigned long
hash (const char * s, long int size)
{
  long int hashval;

  if (s == NULL) return 0;

  for(hashval=0; *s!='\0'; s++)
    hashval = (*s + (hashval << 5) - hashval) % size;

  return hashval;
}


/* Given a printf style format string and an arbitrarily long list of
 * arguments which are in accordance with the format string, ifile_sprintf
 * will allocate memory for and create a string according to the given
 * information. */
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile. */
char *
ifile_sprintf (char * format, ...)
{
  char buf[MAX_STR_LEN] = ""; /* holds string to be returned by function */
  va_list ap;
  char * rtn;

  va_start(ap, format);
  assert(vsprintf(buf, format, ap) < MAX_STR_LEN - 1);
  va_end(ap);

  rtn = malloc(strlen(buf)+1);
  if (!rtn) abort();
  strcpy(rtn, buf);
  return rtn;
}


/* Returns a string which is the concatenation of an arbitrary number
 * of strings passed as arguments to the function.
 * First argumet passed to function is the number of strings passed to
 * the function which are to be concatenated. */
/* written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
char *
ifile_cats (long int num_strings, ...)
{
  va_list ap;
  long int string_size = 0;
  char * new_string = NULL;
  long int i = 0;

  va_start(ap, num_strings);
  for (i=0; i < num_strings; i++)
    string_size += strlen(va_arg(ap, char *));
  va_end(ap);

  new_string = malloc(string_size+1);
  if (!new_string) abort();
  new_string[0] = '\0';

  va_start(ap, num_strings);
  for (i=0; i < num_strings; i++)
    strcat(new_string, va_arg(ap, char *));
  va_end(ap);

  return new_string;
}


/* Given an integer value, allocates space for and returns a string
 * representing the integer in character form */
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
char *
itoa (long int number)
{  
  linked_list * list = NULL;
  linked_list * new_digit = NULL;
  linked_list * list_ptr = NULL, * old_list_ptr = NULL;
  char buf[MAX_STR_LEN];
  long int negative = FALSE;
  long int i = 0;
  char * tmp = NULL;

  if (number == 0)
    { 
      tmp = malloc(strlen("0") + 1);
      if (!tmp) abort();
      strcpy(tmp, "0"); 
      return tmp;
    }
  
  if (number < 0)
    {
      number = abs(number);
      negative = TRUE;
    }

  /* Loads digits into stack-like linked list */

  while (number > 0)
    {
      new_digit = (linked_list *) malloc(sizeof(linked_list));
      if (!new_digit) abort();
      new_digit->next = list;
      new_digit->digit = number - ((number/10)*10);
      number /= 10;
       list = new_digit;
    }
  
  /* Removes digits from list, copying them into the string to be returned */

  i = 0;
  if (negative == TRUE)
    { 
      buf[0] = '-';
      i++;
    }
  list_ptr = list; 
  for (; i < MAX_STR_LEN - 1; i++)
    {
      buf[i] = 48 + list_ptr->digit;
      old_list_ptr = list_ptr;
      list_ptr = list_ptr -> next;
      free(old_list_ptr);
      if (list_ptr == NULL) break;
    }
  buf[i+1] = '\0';
  tmp = malloc(strlen(buf) + 1);
  if (!tmp) abort();
  strcpy(tmp, buf);
  return tmp;
}


/*
 * Reads up to and including the next feedline (\n).  Returns a pointer to
 * STRING on success, and NULL on EOF or error.  Updates bufp also.
 */
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
char *
readline (char **bufp)
{
  char *first = *bufp, *last;

  last = strchr(first, '\n');
  if (last == NULL)
    {
      return NULL;
    }
  *last = '\0';
  *bufp = last + 1;

  return first;
}


/* Wrapper for standard free() function.  Frees memory and then sets
 * pointer equal to NULL. */
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */

/* This seems like a big noop that's costing us efficiency -- jhbrown */
void 
ifile_free (void * var)
{
  free(var);
  var = NULL;
}


/* Accepts a pointer to a message and prints out all the words of that
 * message */
void
ifile_print_message (htable * message)
{
  hash_elem * elem;

  for (elem = htable_init_traversal(message);
       elem != NULL; elem = htable_next_traversal(message, elem))
    printf("(%ld,%s) ", (long int) elem->entry, (char *) elem->index);
  printf("\n");
}


/* Accepts a file pointer and reads/lexes text from the associated file.
 * Returns a hash table which maps words appearing in the message to
 * their frequency in the message. */
/* The GOOD code!  This uses Andrew's cool lexing code :) */
/* Written by Jason Rennie <jrennie@ai.mit.edu> and others for ifile */
htable * 
ifile_read_message (FILE * FP)
{
  ifile_lex * document;
  char token[MAX_STR_LEN];
  long int token_len;  /* length of token */
  long int old_freq;   /* previous frequency of word */
  htable * message = malloc(sizeof(htable));
  if (!message) abort();

  ifile_verbosify(ifile_verbose, "Reading message...\n");
  htable_init(message, 100, (unsigned long (*)(const void *, long int)) hash);

  DMZ2_start = clock();

  document = ifile_default_lexer->open_text_fp (ifile_default_lexer, FP);

  if (document)
    {
      token_len = ifile_default_lexer->get_word (ifile_default_lexer, document,
						 token, MAX_STR_LEN);
      while (token_len != 0)
	{
	  ifile_verbosify(ifile_debug, "Read \'%s\'.  length=%d\n", token,
			  token_len);
	  /* update arrays which strictly concern message */
	  old_freq = (long int) htable_lookup(message, (void *) token);
	  htable_put(message, ((char *) token),
		     (void *) (old_freq+1));
	  
	  token_len = ifile_default_lexer->get_word (ifile_default_lexer,
						     document, token, MAX_STR_LEN);
	}
      
      ifile_default_lexer->close (ifile_default_lexer, document);
    }
  else
    {
      ifile_verbosify(ifile_quiet, "Unable to read message.\n");
      ifile_free(message);
      return NULL;
    }
    
  ifile_verbosify(ifile_debug, "\n");
  DMZ_end = clock();
  ifile_verbosify(ifile_verbose,
		  "Finishing reading message.  Time used: %.3f sec\n",
		  ((float)(DMZ_end-DMZ2_start))/CLOCKS_PER_SECOND);

  return message;
}


/* Given a hash table representing a message, changes all non-zero
 * frequency values to zero */
void
ifile_bitify_document(htable * message)
{
  hash_elem *elem;

  for (elem = htable_init_traversal(message);
       elem != NULL; elem = htable_next_traversal(message, elem))
    if ((long int) elem->entry > 0)
      elem->entry = (void *) 1U;
}


/* Given an array of categories and their respective ratings, prints
 * the information to the given file */
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
void 
ifile_print_ratings (FILE * FP, category_rating * ratings, ifile_db * idata,
		     int thresh)
{
  long int i;

  for (i = 0; i < idata->num_folders; i++)
    fprintf(FP, "%s %.8f\n", ratings[i].category, ratings[i].rating);
  
  if (thresh != 0 &&
      idata->num_folders > 1 && 
      (ratings[0].rating + ratings[1].rating) != 0)
  {
      fprintf(FP, "diff[%s,%s](%%) %.2f\n", 
                ratings[0].category, ratings[1].category,
                -(ratings[0].rating - ratings[1].rating) / 
                  (ratings[0].rating + ratings[1].rating) * 100);
  }
  fprintf(FP, "---------\n");
}

/* Written by Karl Vogel <vogelke@dnaco.net> for ifile */
void
ifile_concise_ratings (char * path, FILE * FP, category_rating * ratings,
                     ifile_db * idata, int thresh)
{
  //if (path) fprintf (FP, "%s ", path);
  float diff=0;
  
  diff=thresh+1;

  if (thresh != 0 && 
      idata->num_folders > 1 &&
      (ratings[0].rating + ratings[1].rating) != 0)
  {
    diff = -(ratings[0].rating - ratings[1].rating) / 
                  (ratings[0].rating + ratings[1].rating) * 1000;
  }
  if (path) fprintf (FP, "%s ", path);
  if (diff < thresh) 
    fprintf (FP, "%s,%s\n", ratings[0].category, ratings[1].category);
  else
    fprintf (FP, "%s\n", ratings[0].category);
}

/* Returns a pointer to a new string that is an exact duplicate of the
 * string pointed to by the s1 parameter.  The malloc() function is
 * used to allocate space for the new string. */
/* written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
char *
ifile_strdup (const char *s1)
{
  char *s = (char *) malloc(strlen(s1)+1);

  if (!s) abort();
  strcpy(s, s1);

  return s;
}