File: NGramBlocker.java

package info (click to toggle)
libsecondstring-java 0.1~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 764 kB
  • sloc: java: 9,592; xml: 114; makefile: 6
file content (28 lines) | stat: -rw-r--r-- 754 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
package com.wcohen.ss.expt;

import com.wcohen.ss.api.*;
import com.wcohen.ss.tokens.*;

/**
 * Finds all pairs that share a not-too-common character n-gram.
 */

public class NGramBlocker extends TokenBlocker 
{
	private int maxN=4, minN=4;

	public NGramBlocker() { super(); tokenizer=initTokenizer(); }

	public int getMaxNGramSize() { return maxN; }
	public int getMinNGramSize() { return minN; }
	public void setMaxNGramSize(int n) { maxN=n; tokenizer=initTokenizer(); }
	public void setMinNGramSize(int n) { minN=n; tokenizer=initTokenizer(); }

	private Tokenizer initTokenizer() 
	{
		return new NGramTokenizer(minN,maxN,false,SimpleTokenizer.DEFAULT_TOKENIZER);
	}


	public String toString() { return "[NGramBlocker: N="+minN+"-"+maxN+"]"; }
}