File: StringDistanceTeacher.java

package info (click to toggle)
libsecondstring-java 0.1~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 764 kB
  • sloc: java: 9,592; xml: 114; makefile: 6
file content (54 lines) | stat: -rw-r--r-- 1,902 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package com.wcohen.ss.api;

/**
 * Train a StringDistanceLearner and return the learned
 * StringDistance, using some unspecified source of information to
 * train the learner.
 *
 */
public abstract class StringDistanceTeacher
{
	final public StringDistance train(StringDistanceLearner learner) 
	{
		// TFIDF-style 'training' based on observing corpus statistics
		learner.setStringWrapperPool( learner.prepare(stringWrapperIterator()) );

		// provide examples for unsupervised/semi-supervised training
		learner.setDistanceInstancePool( learner.prepare(distanceInstancePool() ));

		// supervised training
		for (DistanceInstanceIterator i=distanceExamplePool(); i.hasNext(); ) {
			learner.addExample( i.nextDistanceInstance() );
		}

		// active or passive learning from labeled data
		while (learner.hasNextQuery() && hasAnswers()) {
			DistanceInstance query = learner.nextQuery();
			DistanceInstance answeredQuery = labelInstance(query);
			if (answeredQuery!=null) {
				learner.addExample( answeredQuery );
			}
		}

		// final result
		return learner.getDistance();
	}

	/** Strings over which distances will be computed. */
	abstract protected StringWrapperIterator stringWrapperIterator();

	/** A pool of unlabeled pairs of strings over which distances will be computed, 
	 * to be used for active or semi-supervised learning. */
	abstract protected DistanceInstanceIterator distanceInstancePool();

	/** A pool of unlabeled pairs of strings over which distances will be computed, 
	 * to be used for supervised learning. */
	abstract protected DistanceInstanceIterator distanceExamplePool();

	/** Label an instance queried by the learner.  Return null if the query
	 * can't be answered. */
	abstract protected DistanceInstance labelInstance(DistanceInstance distanceInstance);

	/** Return true if this teacher can answer more queries. */
	abstract protected boolean hasAnswers();
}