File: distance_canberraword.py

package info (click to toggle)
shogun 3.2.0-7.3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 28,452 kB
  • ctags: 24,670
  • sloc: cpp: 210,224; python: 19,668; java: 3,119; ansic: 3,000; cs: 2,335; xml: 1,178; perl: 1,172; sh: 848; ruby: 680; makefile: 269; objc: 37
file content (26 lines) | stat: -rw-r--r-- 946 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]

def distance_canberraword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
			    gap=0,reverse='n'):

	sg('set_distance', 'CANBERRA', 'WORD')
	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('CanberraWordDistance')
	distance_canberraword(*parameter_list[0])