File: JelinekMercerJS.java

package info (click to toggle)
libsecondstring-java 0.1~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 764 kB
  • sloc: java: 9,592; xml: 114; makefile: 6
file content (38 lines) | stat: -rw-r--r-- 1,057 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package com.wcohen.ss;

import java.util.*;
import com.wcohen.ss.tokens.*;
import com.wcohen.ss.api.*;

/**
 * Jensen-Shannon distance of two unigram language models, smoothed
 * using Jelinek-Mercer mixture model.
 */

public class JelinekMercerJS extends JensenShannonDistance
{
	private double lambda = 0.5;

	public double getLambda() { return lambda; }
	public void setLambda(double lambda) { this.lambda = lambda; }
	public void setLambda(Double lambda) { this.lambda = lambda.doubleValue(); }
	
	public JelinekMercerJS(Tokenizer tokenizer,double lambda) { 
		super(tokenizer);
		setLambda(lambda);
	}
	public JelinekMercerJS() { 
		this(SimpleTokenizer.DEFAULT_TOKENIZER, 0.2); 
	}

	/** smoothed probability of the token */
	protected double smoothedProbability(Token tok, double freq, double totalWeight) 
	{
		return (1-lambda) * (freq/totalWeight) + lambda * backgroundProb(tok);
	}
	public String toString() { return "[JelinekMercerJS lambda="+lambda+"]"; }

	static public void main(String[] argv) {
		doMain(new JelinekMercerJS(), argv);
	}
}