File: tfidf.c

package info (click to toggle)
sufary 2.1.1-2
  • links: PTS
  • area: main
  • in suites: woody
  • size: 1,236 kB
  • ctags: 782
  • sloc: ansic: 4,122; perl: 1,378; makefile: 726; sh: 664; tcl: 441; cpp: 192
file content (73 lines) | stat: -rw-r--r-- 1,772 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/******************************************************************************

 *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "sufary.h"

int search_and_check(SUFARY *ary, DID *d, char *key);

unsigned char *did_check;

main(int argc, char *argv[])
{
  SUFARY *ary;
  DID *did;
  char didfile[1000];

  if(argc < 2) exit(1); /* ­ʤ */

  /** ե򳫤 **/
  if ((ary = sa_openfiles(argv[2],NULL)) == NULL) exit(1);
  sprintf(didfile,"%s.did",argv[2]);
  if ((did = sa_opendid(didfile)) == NULL) exit(1);

  /** ̥å **/
  did_check = (char *)malloc(sa_did_size(did));
  (void)memset(did_check, 0, sa_did_size(did));

  if(search_and_check(ary, did, argv[1]) == 0) exit(0);
    
  /** եĤ **/
  sa_closefiles(ary);
  sa_closedid(did);
}



/************************************************************
  Ʒ̤å
 ************************************************************/
int search_and_check(SUFARY *ary, DID *d, char *key)
{
  long i, ai, sar, sal, no;
  sa_reset(ary);
  if (sa_sel(ary, key) == CONT){
    long df = 0;
    sar = sa_right(ary); sal = sa_left(ary);

    printf("DOC = %ld\n",sa_did_size(d));
    printf("TF(%s) = %ld\n",key,sar-sal+1);

    for (ai = sal; ai <= sar; ai++){
      sa_didsearch(d, sa_aryidx2txtidx(ary, ai));
      no = sa_doc_no(d);
      if(no != -1) did_check[no]++;
    }
    
    for (i = 0; i< sa_did_size(d); i++)
      if(did_check[i] > 0){
	printf("TF(%d,%s) = %ld\n",i,key,did_check[i]);
	df++;
      }

    printf("DF(%s) = %ld\n",key,df);

    printf("IDF(%s) = %e\n",key,log(sa_did_size(d)/df));

    return 1;
  } else {
    printf("NOT FOUND [%s]\n", key);
    return 0;
  }
}