File: Rule.java

package info (click to toggle)
javahelp2 2.0.05.ds1-10
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,552 kB
  • sloc: java: 28,795; xml: 1,631; makefile: 16; sh: 2
file content (245 lines) | stat: -rw-r--r-- 7,732 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*
 * @(#)Rule.java	1.3 06/10/30
 * 
 * Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 * 
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the LICENSE file that accompanied this code.
 * 
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 * 
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 * 
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

/*
 * @(#) Rule.java 1.3 - last change made 10/30/06
 */

package com.sun.java.help.search;

import java.util.Vector;
import java.util.StringTokenizer;

/**
 * A Rule matches a pattern at the right ends of a word, removes an indicated
 * number of characters from the end to produce a stem, and generate a list
 * of alternative forms of the word by adding each of a specified list of
 * endings to the stem.  
 * <p>
 * Each rule specifies an ending pattern and a list
 * of endings to be added to the stem to produce different variant forms
 * of the input word.  The ending pattern consists of a string of letters
 * or letter groups separated by spaces to be matched against corresponding
 * letters at the end of the word.  
 * <ul>
 * <li>
 * A letter group (e.g., aeiou) will match any of the letters in the group.  
 * <li> 
 * A letter group prefixed with a period (.) may be matched anywhere 
 * preceding the match of its subsequent letter group.  A letter group 
 * of this type is referred to as "unanchored." 
 * <li>
 * A group that is to be matched at a specified position is "anchored." 
 * <li>
 * A plus sign (+) in the pattern, in place of a letter group, marks the
 * point in the pattern after which the matching letters will be removed
 * to form the stem.  There should be no unanchored letter groups after
 * the plus sign, and there should be at most one plus sign in the pattern
 * (otherwise only the leftmost will count).  
 * <li>
 * An ampersand (&) in place of
 * a letter group in an ending pattern will match a letter that is the same
 * as its preceeding letter in the word.  
 * <li>
 * An ampersand in the list of alternative endings indicates a repeat of
 * the letter that ends the stem. 
 * <li>
 * An under bar (_) as an alternative ending indicates that nothing is to
 * be added to the stem for that alternative.  
 * <li>
 * An ending beginning with an
 * asterisk (*) indicates that the rules are to be reapplied recursively to
 * the form obtained from using this ending.
 * </ul>
 * <p>
 * Rule(s) are grouped in blocks and labeled (usually by a common final
 * sequence) and are ordered within each group so that after a matching
 * rule is found no further rules are to be tried (except when invoked
 * explicitly on a new word by a redo (*) operator in an alternative ending).
 *
 * @author Roger D. Brinkley
 * @author Jacek Ambroziak
 * @version	1.3	10/30/06
 *
 * @see LiteMorph
 */

public class Rule {
    private String[] pattern;
    private int killnum=0;
    private String[] expansions;
    private Vector words;
    private LiteMorph morph;

    /**
     * Create a Rule
     * @param expression A String representing the ending patern described previously.
     * @param expansionString A String of space delimeted expansions as described previously.
     */
    public Rule(String expression, String expansionString, LiteMorph morph) {
	String chars;
	boolean passedPlus = false;

	this.morph = morph;

	//set up pattern array:

	if (expression.length() > 0) {
	    Vector patternBuffer = new Vector(expression.length());
	    StringTokenizer temp = new StringTokenizer(expression, " \t\n\r");
	    while (temp.hasMoreTokens()) {
		if (passedPlus) {
		    // count number of characters after +
		    killnum++; 
		}
		chars = temp.nextToken();
		if (chars.equals("+")) {
		    passedPlus = true;
		} else {
		    patternBuffer.addElement(chars);
		}
	    }
	    pattern = new String[patternBuffer.size()];
	    patternBuffer.copyInto(pattern);
	}
	else pattern =  new String[0];

	//set up expansions array:
	if (expansionString.length() > 0) {
	    Vector expansionsBuffer = new Vector(expansionString.length());
	    StringTokenizer temp = new StringTokenizer(expansionString, ", \t\n\r");
	    while (temp.hasMoreTokens()) {
		expansionsBuffer.addElement(temp.nextToken());
	    }
	    expansions = new String[expansionsBuffer.size()];
	    expansionsBuffer.copyInto(expansions);
	}
	else {
	    expansions = new String[0];
	}
    }
    
    /**
     * Determines if a word matches the rule
     */
    public String [] match(String word, int depth, int skipnum) {
	words = new Vector();
	boolean matched = true;
	
	//skipnum positions have already been
	//tested by the dispatch method
	int position = word.length()-1-skipnum;
	int i = pattern.length-1-skipnum;
	while (i > -1) {
	    debug("   trying "+pattern[i]+" at "+position+
		  " for i = "+i);
	    // There isn't anything left to test
	    // the match failed
	    if (position<0) {
		matched = false;
		break;
	    }

	    //"&" match duplicate of previous letter
	    if (pattern[i].equals("&")) {
		if (position < 1 || word.charAt(position) != word.charAt(position-1)) {
		    matched = false;
		    break;
		}
		else i--;
	    }

	    //"." pattern can match anywhere
	    else if (pattern[i].startsWith(".")) { 
		if (pattern[i].indexOf(word.charAt(position), 1) >= 0) {
		    //it matches here, so go to next pattern element
		    i--; 
		}
	    } else if (pattern[i].indexOf(word.charAt(position))<0) {
		// doesn't match here
		matched = false;
		break;
	    } else  {
		i--;
	    }
	    position--;
	}

	// All done with the compares. If we've got a match then
	// build the list words from the expansion list
	if (matched)
	    {
		String stem = word.substring(0, word.length() - killnum);
		for (i = 0; i < expansions.length; i++) {
		    makeForm(stem, expansions[i], depth);
		}
	    }
	String[] result = new String[words.size()];
	words.copyInto(result);
	words = null;
	return result;
    }

    
    private void makeForm(String stem, String expansion, int depth) {
	switch (expansion.charAt(0)) {
	case '_':
	    // just use the stem; nothing to add
	    words.addElement(stem);
	    break;
	
	case '&':
	    // double last letter of stem
	    words.addElement(stem + stem.charAt(stem.length() - 1) + expansion.substring(1));
	    break;
	
	case '*':
	    // redo MorphWord on the resulting form
	    debug(" starting redo: with "+stem+" + "+expansion+
		  " from depth "+depth);
	    if (expansion.charAt(1) == '_')
		morph.morphWord(stem, depth + 1);
	    else
		morph.morphWord(stem + expansion.substring(1), depth + 1);
	    break;

	default:
	    words.addElement(stem + expansion);
	    break;
	}
    }
    /**
     * For printf debugging.
     */
    private static final boolean debugFlag = false;
    private static void debug(String str) {
        if( debugFlag ) {
            System.out.println("Rule: " + str);
        }
    }
}