File: libtest.c

package info (click to toggle)
swish-e 2.4.7-1
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 7,224 kB
  • ctags: 8,194
  • sloc: ansic: 51,637; sh: 8,895; perl: 3,018; makefile: 591; xml: 9
file content (544 lines) | stat: -rwxr-xr-x 15,355 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
/*
$Id: libtest.c 2291 2009-03-31 01:56:00Z karpet $
**
    This file is part of Swish-e.

    Swish-e is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Swish-e is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along  with Swish-e; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    
    See the COPYING file that accompanies the Swish-e distribution for details
    of the GNU GPL and the special exception available for linking against
    the Swish-e library.
    
** Mon May  9 15:22:55 CDT 2005
** added GPL

**---------------------------------------------------------
*
*   Example program for interfacing a C program with the Swish-e C library.
*
*   ./libtest [optional index file]
*
*   use quotes for more than one file
*       ./libtest index.swish-e
*       ./libtest 'index1 index2 index3'
*
*   See the perl/API.xs file for more detail
*
*/


#include <stdio.h>
#include "swish-e.h"  /* use locally for testing */



#define MEM_TEST 1

#ifdef MEM_TEST
#include "mem.h"   // for mem_summary only
#endif



#define DISPLAY_COUNT 10  // max to display


static void display_results( SW_HANDLE, SW_RESULTS );
static void print_error_or_abort( SW_HANDLE swish_handle );

static void print_index_headers( SW_HANDLE swish_handle, SW_RESULTS results );
static void print_index_metadata( SW_HANDLE swish_handle );
static void print_header_value( SW_HANDLE swish_handle, const char *name, SWISH_HEADER_VALUE head_value, SWISH_HEADER_TYPE head_type );
static void demo_stemming( SW_RESULTS results );
static void stem_it( SW_RESULT r, char *word );


int     main(int argc, char **argv)
{
    SW_HANDLE   swish_handle = NULL;    /* Database handle */
    SW_SEARCH   search = NULL;          /* search handle -- holds search parameters */
    SW_RESULTS  results = NULL;         /* results handle -- holds list of results */

    char    input_buf[200];
    char   *index_file_list;



    SwishErrorsToStderr();      /* Send any errors or warnings to stderr (default is stdout) */

    /* Connect to the indexes specified */

    index_file_list = argv[1] && *(argv[1]) ? argv[1] : "index.swish-e";

    swish_handle = SwishInit( index_file_list );


    /* set ranking scheme. default is 0 */
    
    SwishRankScheme( swish_handle, 1 );

    /* return raw values */
    SwishReturnRawRank( swish_handle, 1 );


    /* Check for errors after every call */

    if ( SwishError( swish_handle ) )
        print_error_or_abort( swish_handle );  /* print an error or abort -- see below */


    /* Here's a short-cut to searching that creates a search object and searches at the same time */

    results = SwishQuery( swish_handle, "foo OR bar" );

    if ( SwishError( swish_handle ) )
        print_error_or_abort( swish_handle );  /* print an error or abort -- see below */
    else
    {
        display_results( swish_handle, results );

        printf( "Testing SW_ResultsToSW_HANDLE() = '%s'\n",
            SW_ResultsToSW_HANDLE( results ) == swish_handle ? "OK" : "Not OK" );

        demo_stemming( results );

        Free_Results_Object( results );
    }

    /* This may change since it only supports 8-bit chars */
    {
        const char *words = SwishWordsByLetter( swish_handle, "index.swish-e", 'f' );
        char *tmp = (char *)words;
        printf("Words that begin with 'f': ");
        for(;tmp && tmp[0]; tmp += strlen(tmp)+1 )
            printf("%s \n", tmp);

        printf("\n");
    }

    /* 
     * Stem a word -- this method is somewhat depreciated.
     * It stores the stemmed word in a single location in the SW_OBJECT
     */

    {
        char *stemmed = SwishStemWord( swish_handle, "running" );
        printf("SwishStemWord 'running' => '%s'\n\n", stemmed ? stemmed : "Failed to stem" );
    }


    /* Typical use of the library is to create a search object */
    /* and use the search object to make multiple queries */

    /* Create a search object for searching - the query string is optional */
    /* Remember to free the search object when done */

    search = New_Search_Object( swish_handle, "foo" );




    /* Adjust some of the search parameters if different than the defaults */
    SwishSetSort( search, "swishrank desc" );

    // SwishSetStructure( search, IN_TITLE );  /* limit to title only */

    /* Set Limit parameters like */

    /*****

    SwishSetSearchLimit( search, "swishtitle", "a", "z" );
    SwishSetSearchLimit( search, "age", "18", "65" );

    if ( SwishError( swish_handle ) )  // e.g. can't define two limits for same prop name
        print_error_or_abort( swish_handle );

    // use SwishResetLimit() if wish to change the parameters on a active search object        

    *****/


    /* Now we are ready to search  */


    while ( 1 )
    {
        printf("Enter search words: ");
        if ( !fgets( input_buf, 200, stdin ) )
            break;


        results = SwishExecute( search, input_buf );

        /* check for errors */

        if ( SwishError( swish_handle ) )
        {
            print_error_or_abort( swish_handle );

            if ( results ) /* probably always true */
                Free_Results_Object( results );

            continue;
        }

        display_results( swish_handle, results );
        Free_Results_Object( results );

#ifdef MEM_TEST
        /* It's expected to see some memory used here since a swish_handle exists */
        Mem_Summary("End of loop", 1);
#endif

    }

    Free_Search_Object( search );
    SwishClose( swish_handle );


    /* Look for memory leaks -- configure swish-e with --enable-memtrace to use */
#ifdef MEM_TEST
    Mem_Summary("At end of program", 1);
#endif

    return 0;
}

/* Display some standard properties -- see perl/SWISHE.xs for how to get at the data */

static void display_results( SW_HANDLE swish_handle, SW_RESULTS results )
{
    SW_RESULT result;
    int       hits;
    int       first = 1;

    if ( !results )  /* better safe than sorry */
        return;



    /* Display the set of headers for the index(es) */
    print_index_headers( swish_handle, results );


    /* Try to get metadata from the index */
    print_index_metadata( swish_handle );

    hits = SwishHits( results );

    if ( 0 == hits )
    {
        printf("no results!\n");
        return;
    }


    printf("# Total Results: %d\n", hits );




    if ( SwishSeekResult(results, 0 ) < 0 )  // how to seek to a page of results
    {
        print_error_or_abort( swish_handle );  /* seek past end of file */
        return;
    }

   

    while ( (result = SwishNextResult( results )) )
    {

        /* This SwishResultPropertyStr() will work for all types of props */
        /* But SwishResultPropertyULong() can be used to return numeric types */
        /* Should probably check for errors after every call  */
        /* SwishResultPropertyULong will return ULONG_MAX if the value cannot be returned */
        /* that could mean an error, or just that there was not a property assigned (which is not an error) */

        printf("Path: %s\n  Rank: %lu\n  Size: %lu\n  Title: %s\n  Index: %s\n  Modified: %s\n  Record #: %lu\n  File   #: %lu\n\n",
            SwishResultPropertyStr   ( result, "swishdocpath" ),
            SwishResultPropertyULong ( result, "swishrank" ),
            SwishResultPropertyULong ( result, "swishdocsize" ),
            SwishResultPropertyStr   ( result, "swishtitle"),
            SwishResultPropertyStr   ( result, "swishdbfile" ),
            SwishResultPropertyStr   ( result, "swishlastmodified" ),
            SwishResultPropertyULong ( result, "swishreccount" ),  /* can figure this out in loop, of course */
            SwishResultPropertyULong ( result, "swishfilenum" )
        );



        /* Generally not useful, but also can lookup Index header data via the current result */
        {
            SWISH_HEADER_VALUE header_value;
            SWISH_HEADER_TYPE  header_type;
            const char *example = "WordCharacters";
            
            header_value = SwishResultIndexValue( result, example, &header_type );
            print_header_value( swish_handle, example, header_value, header_type );
        }

        if ( first )
        {
            printf( "Testing SW_ResultToSW_HANDLE() = '%s'\n",
                SW_ResultToSW_HANDLE( result ) == swish_handle ? "OK" : "Not OK" );

            first = 0;
        }
            
    }

    
}


/**********************************************************************
* print_index_headers
*
*   This displays the standard headers associated with an index
*
*   Pass in:
*       swish_handle -- for standard headers
*
*   Note:
*       The SWISH_HEADER value, and the data it points to, is only
*       valid during the current call.
*
*
***********************************************************************/

static void print_index_headers( SW_HANDLE swish_handle, SW_RESULTS results )
{
    const char **header_names = SwishHeaderNames(swish_handle);  /* fetch the list of available header names */
    const char **index_name = SwishIndexNames( swish_handle );
    SWISH_HEADER_VALUE header_value;
    SWISH_HEADER_TYPE  header_type;

    /* display for each index */

    while ( *index_name )
    {
        const char **cur_header = header_names;

        while ( *cur_header )
        {
            header_value = SwishHeaderValue( swish_handle, *index_name, *cur_header, &header_type );
            print_header_value( swish_handle, *cur_header, header_value, header_type );


            cur_header++;  /* move to next header name */
        }


        /* Now print out results-specific data */

        header_value = SwishParsedWords( results, *index_name );
        print_header_value( swish_handle, "Parsed Words", header_value, SWISH_LIST );

        header_value = SwishRemovedStopwords( results, *index_name );
        print_header_value( swish_handle, "Removed Stopwords", header_value, SWISH_LIST );


        index_name++;  /* move to next index file */
    }
}

static void print_header_value( SW_HANDLE swish_handle, const char *name, SWISH_HEADER_VALUE head_value, SWISH_HEADER_TYPE head_type )
{
    const char **string_list;
    
    printf("# %s:", name );

    switch ( head_type )
    {
        case SWISH_STRING:
            printf(" %s\n", head_value.string ? head_value.string : "" );
            return;

        case SWISH_NUMBER:
            printf(" %lu\n", head_value.number );
            return;

        case SWISH_BOOL:
            printf(" %s\n", head_value.boolean ? "Yes" : "No" );
            return;

        case SWISH_LIST:
            string_list = head_value.string_list;
            
            while ( *string_list )
            {
                printf(" %s", *string_list );
                string_list++;
            }
            printf("\n");
            return;

        case SWISH_HEADER_ERROR:
            print_error_or_abort( swish_handle );
            return;

        default:
            printf(" Unknown header type '%d'\n", (int)head_type );
            return;
    }
}


/**********************************************************************
* print_index_metadata
*
*   This displays the metanames and property names in each index.
*
*   Pass in:
*       swish_handle -- for standard headers
*
*   Note:
*       The SWISH_HEADER value, and the data it points to, is only
*       valid during the current call.
*
*
***********************************************************************/

static void print_index_metadata( SW_HANDLE swish_handle )
{
    const char **index_name = SwishIndexNames( swish_handle );
    
    while ( *index_name ) {
      SWISH_META_LIST meta_list = SwishMetaList( swish_handle, *index_name );
      SWISH_META_LIST prop_list = SwishPropertyList( swish_handle, *index_name );

      while ( *meta_list ) {
	printf("# Meta: " );
	printf( "%s ", SwishMetaName(*meta_list));
	printf( "type=%d ", SwishMetaType(*meta_list));
	printf( "id=%d ", SwishMetaID(*meta_list));
	printf("\n");
	meta_list++;
      }      
      while ( *prop_list ) {
	printf("# Property: " );
	printf( "%s ", SwishMetaName(*prop_list));
	printf( "type=%d ", SwishMetaType(*prop_list));
	printf( "id=%d ", SwishMetaID(*prop_list));
	printf("\n");
	prop_list++;
      }      
      index_name++;
    }
}


/*************************************************************
*  print_error_or_abort -- display an error message / abort
*
*   This displays the error message, and aborts if it's a critical
*   error.  This is overkill -- normally a critical error means
*   that the you should call SwishClose() and start over.
*
*   On searches means that the search could not be completed
*
*
**************************************************************/

static void print_error_or_abort( SW_HANDLE swish_handle )
{
    if ( !SwishError( swish_handle ) )
        return;

    /* On critical errors simply exit -- normally you would SwishClose() and loop */

    if ( SwishCriticalError( swish_handle ) )
       SwishAbortLastError( swish_handle );   /* prints message and exits */


    /* print a message */        
    fprintf(stderr,
        "err: Number [%d], Type [%s],  Optional Message: [%s]\n",
        SwishError( swish_handle ),
        SwishErrorString( swish_handle ),
        SwishLastErrorMsg( swish_handle )
    );
}

/*
 * This shows how to use the stemmer based on a result.
 * It's done this way because a result is related to a
 * specific index (where a result list may contain results
 * from many indexes).
 * Typically, the stemmer is used at search time to highlight words
 * so it would be based on a given result.
 */

static void demo_stemming( SW_RESULTS results )
{
    SW_RESULT r;

    printf("\n-- Stemmer Test --\n");


    if ( !SwishHits( results ) )
    {
        printf("Couldn't test stemming because search returned no results\n");
        return;
    }

    if (SwishSeekResult( results, 0) )
    {
        printf("Failed to seek to result 0\n");
        return;
    }
    r = SwishNextResult( results );

    if ( !r )
    {
        printf("Failed to get first result\n");
        return;
    }

    printf("Fuzzy Mode: %s\n", SwishFuzzyMode( r ) );

    stem_it( r, "running" );
    stem_it( r, "runs" );
    stem_it( r, "12345" );
    stem_it( r, "abc3def" );
    stem_it( r, "");
    stem_it( r, "sugar" );  /* produces two metaphones */
}

static void stem_it( SW_RESULT r, char *word )
{
    const char **word_list;
    SW_FUZZYWORD fw;

    printf(" [%s] : ", word );
    
    fw = SwishFuzzyWord( r, word );
    printf(" Status: %d", SwishFuzzyWordError(fw) );
    printf(" Word Count: %d\n", SwishFuzzyWordCount(fw) );

    printf("   words:");
    word_list = SwishFuzzyWordList( fw );
    while ( *word_list )
    {
        printf(" %s", *word_list );
        word_list++;
    }
    
    printf("\n");

    SwishFuzzyWordFree( fw );
}