File: DirichletJS.java

package info (click to toggle)
libsecondstring-java 0.1~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 764 kB
  • sloc: java: 9,592; xml: 114; makefile: 6
file content (38 lines) | stat: -rw-r--r-- 1,069 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package com.wcohen.ss;

import java.util.*;
import com.wcohen.ss.tokens.*;
import com.wcohen.ss.api.*;

/**
 * Jensen-Shannon distance of two unigram language models, smoothed
 * using Dirichlet prior.
 */

public class DirichletJS extends JensenShannonDistance
{
	private double pseudoCount = 1.0;

	public double getPseudoCount() { return pseudoCount; }
	public void setPseudoCount(double c) { this.pseudoCount = c; }
	public void setPseudoCount(Double c) { this.pseudoCount = c.doubleValue(); }
	
	public DirichletJS(Tokenizer tokenizer,double pseudoCount) { 
		super(tokenizer);
		setPseudoCount(pseudoCount);
	}
	public DirichletJS() { 
		this(SimpleTokenizer.DEFAULT_TOKENIZER, 1.0); 
	}
	public String toString() { return "[DirichletJS pcount="+pseudoCount+"]"; }

	/** smoothed probability of the token */
	protected double smoothedProbability(Token tok, double freq, double totalWeight) 
	{
		return (freq + pseudoCount * backgroundProb(tok)) / (totalWeight + pseudoCount);
	}

	static public void main(String[] argv) {
		doMain(new DirichletJS(), argv);
	}
}