File: ukkonen.go

package info (click to toggle)
golang-github-xrash-smetrics 0.0~git20170218.a3153f7-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,076 kB
  • sloc: makefile: 9
file content (92 lines) | stat: -rw-r--r-- 1,714 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package smetrics

import (
	"math"
)

func Ukkonen(a, b string, icost, dcost, scost int) int {
	var lowerCost int

	if icost < dcost && icost < scost {
		lowerCost = icost
	} else if dcost < scost {
		lowerCost = dcost
	} else {
		lowerCost = scost
	}

	infinite := math.MaxInt32 / 2

	var r []int
	var k, kprime, p, t int
	var ins, del, sub int

	if len(a) > len(b) {
		t = (len(a) - len(b) + 1) * lowerCost
	} else {
		t = (len(b) - len(a) + 1) * lowerCost
	}

	for {
		if (t / lowerCost) < (len(b) - len(a)) {
			continue
		}

		// This is the right damn thing since the original Ukkonen
		// paper minimizes the expression result only, but the uncommented version
		// doesn't need to deal with floats so it's faster.
		// p = int(math.Floor(0.5*((float64(t)/float64(lowerCost)) - float64(len(b) - len(a)))))
		p = ((t / lowerCost) - (len(b) - len(a))) / 2

		k = -p
		kprime = k

		rowlength := (len(b) - len(a)) + (2 * p)

		r = make([]int, rowlength+2)

		for i := 0; i < rowlength+2; i++ {
			r[i] = infinite
		}

		for i := 0; i <= len(a); i++ {
			for j := 0; j <= rowlength; j++ {
				if i == j+k && i == 0 {
					r[j] = 0
				} else {
					if j-1 < 0 {
						ins = infinite
					} else {
						ins = r[j-1] + icost
					}

					del = r[j+1] + dcost
					sub = r[j] + scost

					if i-1 < 0 || i-1 >= len(a) || j+k-1 >= len(b) || j+k-1 < 0 {
						sub = infinite
					} else if a[i-1] == b[j+k-1] {
						sub = r[j]
					}

					if ins < del && ins < sub {
						r[j] = ins
					} else if del < sub {
						r[j] = del
					} else {
						r[j] = sub
					}
				}
			}
			k++
		}

		if r[(len(b)-len(a))+(2*p)+kprime] <= t {
			break
		} else {
			t *= 2
		}
	}

	return r[(len(b)-len(a))+(2*p)+kprime]
}