1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
|
package minhash
import (
"github.com/dgryski/go-spooky"
"testing"
)
func TestBottomK(t *testing.T) {
tests := []struct {
s1 []string
s2 []string
}{
{
[]string{"hello", "world", "foo", "baz", "bar", "zomg"},
[]string{"goodbye", "world", "foo", "qux", "bar", "zomg"},
},
}
for _, tt := range tests {
m1 := NewBottomK(spooky.Hash64, 4)
m2 := NewBottomK(spooky.Hash64, 4)
for _, s := range tt.s1 {
m1.Push([]byte(s))
}
for _, s := range tt.s2 {
m2.Push([]byte(s))
}
t.Log(m1.Similarity(m2))
}
}
func TestBottomKMerge(t *testing.T) {
s1 := []string{"hello", "world", "foo", "baz"}
s2 := []string{"goodbye", "world", "foo", "qux", "bar", "zomg"}
s1a := []string{"bar", "zomg"}
m1 := NewBottomK(spooky.Hash64, 4)
m2 := NewBottomK(spooky.Hash64, 4)
for _, s := range s1 {
m1.Push([]byte(s))
}
for _, s := range s2 {
m2.Push([]byte(s))
}
t.Log(m1.Similarity(m2))
m1a := NewBottomK(spooky.Hash64, 4)
for _, s := range s1a {
m1a.Push([]byte(s))
}
m1.Merge(m1a)
t.Log(m1.Similarity(m2))
}
|