File: lookup.c

package info (click to toggle)
mmorph 2.3.4.2-17
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 1,248 kB
  • sloc: ansic: 4,986; yacc: 1,215; lex: 417; makefile: 259; sh: 48; sed: 33; csh: 26
file content (143 lines) | stat: -rw-r--r-- 3,540 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*
    mmorph, MULTEXT morphology tool
    Version 2.3, October 1995
    Copyright (c) 1994,1995 ISSCO/SUISSETRA, Geneva, Switzerland
    Dominique Petitpierre, <petitp@divsun.unige.ch>
*/
/*
    lookup.c

    quick hack to lookup words in the database
    should be changed to use SGML API
*/

#include <ctype.h>
#include "user.h"


#define MAXSTRLNG 128

/*
    Read one word per line, and look it up.
    All characters count except newline.
 */
static      t_boolean
get_segment(infile, segment_id, segment, tfs)
FILE       *infile;
t_segment_id *segment_id;	/* output */
char      **segment;	/* output */
s_tfs     **tfs;	/* output */

{

    static char string_segment[MAXSTRLNG];
    static t_segment_id id_count = 0;	/* 32 bits = 4294967296 ids */
    char       *s;
    int         c;

    /*
       read a word, i.e all the characters on the line fgets is not used
       because it takes the newline in.
    */
    s = string_segment;
    while ((c = getc(infile)) != EOF
	   && (s - string_segment < MAXSTRLNG - 1)
	   && (c != (int) '\n')) {
	*s++ = (char) c;
    }
    *s = '\0';
    *segment_id = id_count++;
    *segment = string_segment;
    *tfs = NULL;	/* no yet implemented */
    if (c == (int) '\n')
	return (TRUE);
    else if (c == EOF)
	if (s == string_segment)
	    return (FALSE);	/* normal EOF */
	else {
	    print_warning("input file does not terminate %s",
			  "with a newline");
	    return (TRUE);
	}
    else
	fatal_error("word too long (max %d):\n %s",
		    MAXSTRLNG - 1, string_segment);
    return (FALSE);	/* shut up gcc -Wall */
    /* NOTREACHED */
}

/* see print_projection()
put_lex(segment_id,record, surface_lex)
{
}
*/

static void
reject_segment(segment_id, segment)
t_segment_id segment_id;	/* output */
char       *segment;	/* output */

{
    if (want_segment_id)
	print(rejectfile, "%d\t", segment_id);
    print(rejectfile, "%s\n", segment);
}

void
lookup(infile)
FILE       *infile;

{
    t_segment_id segment_id;
    char       *segment;
    s_tfs      *tfs;
    t_letter    surface_lex[MAXSTRLNG];
    t_boolean   found;
    t_boolean   folded;
    t_boolean   prompt_user;

    /* read a word and look-it up */
    prompt_user = isatty(fileno(infile)) && isatty(fileno(outfile));
    if (prompt_user) {
	print_out("%s", prompt);
	if (want_flush)	/* normally not necessary for ttys; just in case */
	    flush_out();
    }
    while (get_segment(infile, &segment_id, &segment, &tfs)) {
	if (fold_case_always) {
	    folded = fold_case((unsigned char *) segment,
			       (unsigned char *) surface_lex);
	    found = (map_letter((char *) surface_lex, surface_lex,
				Surface_Letter)
		     && db_forms_lookup(segment_id, surface_lex, tfs));
	}
	else {
	    folded = FALSE;
	    found = (map_letter(segment, surface_lex, Surface_Letter)
		     && db_forms_lookup(segment_id, surface_lex, tfs));
	}
	if (!fold_case_always && fold_case_fallback && !found) {
	    folded = fold_case((unsigned char *) segment,
			       (unsigned char *) surface_lex);
	    if (folded)
		found |= (map_letter((char *) surface_lex, surface_lex,
				     Surface_Letter)
			  && db_forms_lookup(segment_id, surface_lex, tfs));
	}
	if (!found) {
	    reject_segment(segment_id, segment);
	    if (want_segment_id)
		print_out("%d\t\n", segment_id);
	    else
		print_out("\n");
	}
	else if (!want_segment_id)
	    print_out("\n");	/* separator between possible analyses */
	if (prompt_user)
	    print_out("%s", prompt);
	if (want_flush)
	    flush_out();
    }
    if (prompt_user)
	print_out("\n");
}