1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
/******************************************************************************
SUFARYڵץ getart
Version 1.0 981022
USAGE:
getart KEYWORD FILE1 FILE2
getart '' FILE1 FILE2
FILE2 DocIDե
(KEYWORD) '' ꤹȡɸϤ饭
ϤƸԤʤޤ
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "sufary.h"
typedef struct _doc_info {
long start;
long size;
long no;
int freq;
} doc_info;
void search_and_print(SUFARY *ary, DID *d, char *key, char *fname);
static int mycomp(doc_info *i,doc_info *j){return(i->no - j->no);}
static int mycomp2(doc_info *i,doc_info *j){return(j->freq - i->freq);}
main(int argc, char *argv[])
{
SUFARY *ary;
DID *did;
if(argc < 3){ /* ʤ */
fprintf(stderr, "getart Version 1.0 981022\n"
" USAGE: getart KEYWORD TEXT_FILE DocID_FILE\n");
exit(1);
}
/** ե **/
if ((ary = sa_openfiles(argv[2],NULL)) == NULL) exit(1);
if ((did = sa_opendid(argv[3])) == NULL) exit(1);
if(argv[1][0] == '\0'){ /*** ɸϤ饭 ***/
char cmd[1000];
while(fgets(cmd, (int)sizeof(cmd), stdin)){
cmd[strlen(cmd)-1] = '\0'; /* Ϥ줿ɤβ٤ */
search_and_print(ary, did, cmd, NULL);
sa_reset(ary); /* */
}
} else /*** (argv[1]) ***/
search_and_print(ary, did, argv[1], NULL);
/** եĤ **/
sa_closefiles(ary);
sa_closedid(did);
}
/************************************************************
void search_and_print(SUFARY *ary, char *key, char *fname)
ɽ
************************************************************/
void search_and_print(SUFARY *ary, DID *d, char *key, char *fname)
{
int i, pre_art, num;
long ai, pos, lpos, sar, sal;
char *s;
doc_info *start_and_size;
if (sa_sel(ary, key) == CONT){
sar = sa_right(ary); sal = sa_left(ary);
start_and_size = (doc_info *)malloc(sizeof(doc_info) * (sar - sal +1));
for (ai = sal; ai <= sar; ai++){
pos = sa_aryidx2txtidx(ary, ai);
sa_didsearch(d, pos);
start_and_size[ai-sal].start = sa_doc_start(d);
start_and_size[ai-sal].size = sa_doc_size(d);
start_and_size[ai-sal].no = sa_doc_no(d);
}
} else {
printf("NOT FOUND\n");
return;
}
qsort(start_and_size,(size_t)(sar-sal+1),sizeof(doc_info),
(int(*)(const void*,const void*))mycomp);
/*** Ĥ ***/
pre_art = -1;
num = 0;
for(i = 0; i < sar-sal+1; i++){
long now_start = start_and_size[i].start;
if(now_start == pre_art){
if(now_start != -1) (start_and_size[num-1].freq)++;
goto JUMP;
}
if(start_and_size[num].start != now_start)
start_and_size[num] = start_and_size[i];
start_and_size[num].freq = 1;
num++;
JUMP:
pre_art = start_and_size[i].start;
}
printf("FOUND %d\n", num);
qsort(start_and_size,(size_t)num,sizeof(doc_info),
(int(*)(const void*,const void*))mycomp2);
/*** ɽ ***/
for(i = 0; i < num; i++){
char *art;
art = sa_getstr(ary, start_and_size[i].start, start_and_size[i].size);
printf("freq=%d\n", start_and_size[i].freq);
printf("%s\n", art);
free(art);
}
}
|