File: getart.c

package info (click to toggle)
sufary 2.1b3-4
  • links: PTS
  • area: main
  • in suites: potato
  • size: 2,032 kB
  • ctags: 968
  • sloc: ansic: 5,926; perl: 1,378; tcl: 771; makefile: 728; sh: 664; cpp: 192
file content (121 lines) | stat: -rw-r--r-- 3,332 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/******************************************************************************

  SUFARYڵץ getart
    Version 1.0 981022

   USAGE:
     getart KEYWORD FILE1 FILE2
     getart '' FILE1 FILE2

   FILE2 DocIDե

   (KEYWORD)  '' ꤹȡɸϤ饭
     ϤƸԤʤޤ

 *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "sufary.h"

typedef struct _doc_info {
  long start;
  long size;
  long no;
  int freq;
} doc_info;

void search_and_print(SUFARY *ary, DID *d, char *key, char *fname);
static int mycomp(doc_info *i,doc_info *j){return(i->no - j->no);}
static int mycomp2(doc_info *i,doc_info *j){return(j->freq - i->freq);}

main(int argc, char *argv[])
{
  SUFARY *ary;
  DID *did;

  if(argc < 3){ /* ­ʤ */
    fprintf(stderr, "getart Version 1.0 981022\n"
	    "  USAGE: getart KEYWORD TEXT_FILE DocID_FILE\n");
    exit(1);
  }

  /** ե򳫤 **/
  if ((ary = sa_openfiles(argv[2],NULL)) == NULL) exit(1);
  if ((did = sa_opendid(argv[3])) == NULL) exit(1);

  if(argv[1][0] == '\0'){ /*** ɸϤ饭 ***/
    char cmd[1000];
    while(fgets(cmd, (int)sizeof(cmd), stdin)){
      cmd[strlen(cmd)-1] = '\0'; /* Ϥ줿ɤβ٤ */
      search_and_print(ary, did, cmd, NULL);
      sa_reset(ary); /*  */
    }
  } else /*** (argv[1]) ***/
    search_and_print(ary, did, argv[1], NULL);

  /** եĤ **/
  sa_closefiles(ary);
  sa_closedid(did);
}


/************************************************************
  void search_and_print(SUFARY *ary, char *key, char *fname)
  ɽ
 ************************************************************/
void search_and_print(SUFARY *ary, DID *d, char *key, char *fname)
{
  int i, pre_art, num;
  long ai, pos, lpos, sar, sal;
  char *s;
  doc_info *start_and_size;

  if (sa_sel(ary, key) == CONT){
    sar = sa_right(ary); sal = sa_left(ary);
    start_and_size = (doc_info *)malloc(sizeof(doc_info) * (sar - sal +1));
    for (ai = sal; ai <= sar; ai++){
      pos = sa_aryidx2txtidx(ary, ai);
      sa_didsearch(d, pos);
      start_and_size[ai-sal].start = sa_doc_start(d);
      start_and_size[ai-sal].size = sa_doc_size(d);
      start_and_size[ai-sal].no = sa_doc_no(d);
    }
  } else {
    printf("NOT FOUND\n");
    return;
  }

  qsort(start_and_size,(size_t)(sar-sal+1),sizeof(doc_info),
	(int(*)(const void*,const void*))mycomp);

  /*** Ĥ ***/
  pre_art = -1;
  num = 0;
  for(i = 0; i < sar-sal+1; i++){
    long now_start = start_and_size[i].start;
    if(now_start == pre_art){
      if(now_start != -1) (start_and_size[num-1].freq)++;
      goto JUMP;
    }
    if(start_and_size[num].start != now_start)
      start_and_size[num] = start_and_size[i];
    start_and_size[num].freq = 1;
    num++;
  JUMP:
    pre_art = start_and_size[i].start;
  }

  printf("FOUND %d\n", num);

  qsort(start_and_size,(size_t)num,sizeof(doc_info),
	(int(*)(const void*,const void*))mycomp2);

  /*** ɽ ***/
  for(i = 0; i < num; i++){
    char *art;
    art = sa_getstr(ary, start_and_size[i].start, start_and_size[i].size);
    printf("freq=%d\n", start_and_size[i].freq);
    printf("%s\n", art);
    free(art);
  }
}