1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
package com.wcohen.ss;
import com.wcohen.ss.api.*;
import java.util.*;
/**
* Abstract class StringDistance defined over Strings that are broken
* into fields. This could actually be used in several ways:
* <ol>
* <li>To merge together scores from a single StringDistance d
* applied to several different subfields f1,.., fk of a string.
* <li>To merge together scores from a multiple StringDistances
* d1, ..., dk, where di applied to the corresponding fi.
* <p>
* <li>With a little extra coding, this could also be
* used to merge together scores from a multiple StringDistances
* applied a single string. This would require using a new
* MultiStringWrapper constructor that makes k copies of
* a single string, rather than splitting a string into k
* disjoint parts.
* </ol>
*/
public abstract class MultiStringDistance implements StringDistance
{
private String delim;
public MultiStringDistance(String delim) {
this.delim = delim;
}
final public double score(StringWrapper s,StringWrapper t)
{
MultiStringWrapper ms = asMultiStringWrapper(s);
MultiStringWrapper mt = asMultiStringWrapper(t);
return scoreCombination( multiScore(ms,mt) );
}
/** Combine the scores for each primitive distance function on each field. */
abstract protected double scoreCombination(double[] multiScore);
/** Compute the scores for each primitive distance function on each field. */
private double[] multiScore(MultiStringWrapper ms,MultiStringWrapper mt)
{
if (ms.size() != mt.size()) {
throw new IllegalArgumentException("inputs have different numbers of fields");
}
int n = ms.size();
double scores[] = new double[n];
for (int i=0; i<n; i++) {
scores[i] = getDistance(i).score(ms.get(i), mt.get(i));
}
return scores;
}
final public String explainScore(StringWrapper s, StringWrapper t)
{
MultiStringWrapper ms = asMultiStringWrapper(s);
MultiStringWrapper mt = asMultiStringWrapper(t);
if (ms.size() != mt.size()) {
throw new IllegalArgumentException("inputs have different numbers of fields");
}
int n = ms.size();
StringBuffer buf = new StringBuffer();
for (int i=0; i<n; i++) {
buf.append("Field "+(i+1)+": s='"+ms.get(i)+"' t='"+mt.get(i)+"':\n");
buf.append( getDistance(i).explainScore( ms.get(i), mt.get(i)) );
}
buf.append("combination:\n");
buf.append( explainScoreCombination(multiScore(ms,mt)) );
return buf.toString();
}
/** Explain how to combine the scores for each primitive distance
* function on each field. */
abstract protected String explainScoreCombination(double[] multiScore);
/** Strings are scored by converting them to StringWrappers with the
* prepare function. */
final public double score(String s, String t) {
return score(prepare(s), prepare(t));
}
/** Scores are explained by converting Strings to StringWrappers
* with the prepare function. */
final public String explainScore(String s, String t) {
return explainScore(prepare(s),prepare(t));
}
/** Prepare a string.
*/
final public StringWrapper prepare(String s) {
MultiStringWrapper ms = new MultiStringWrapper(s,delim);
if (!isLegalMultiStringWrapperSize(ms.size())) {
throw new IllegalArgumentException("string has invalid number of fields");
}
for (int i=0; i<ms.size(); i++) {
ms.set(i, getDistance(i).prepare( ms.get(i).unwrap() ));
}
return ms;
}
/** Lazily prepare a string. Ie, if it's already a
* MultiStringWrapper, do nothing, otherwise use prepare() to
* convert to a MultiStringWrapper.
*/
protected MultiStringWrapper asMultiStringWrapper(StringWrapper w) {
if (w instanceof MultiStringWrapper) return (MultiStringWrapper)w;
else return (MultiStringWrapper)prepare(w.unwrap());
}
/** Get the distance used for the i-th pair of fields */
abstract protected StringDistance getDistance(int i);
/** Check if a string has a valid number of fields. Override this
* method if some assumption is made about the number of fields.
*/
protected boolean isLegalMultiStringWrapperSize(int n) {
return n!=0;
}
/** Default main routine for testing */
final protected static void doMain(StringDistance d,String[] argv)
{
if (argv.length!=2) {
System.out.println("usage: string1 string2");
} else {
System.out.println(d.explainScore(argv[0],argv[1]));
}
}
}
|