File: bottomk_test.go

package info (click to toggle)
golang-github-dgryski-go-minhash 0.0~git20190315.ad340ca-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, trixie
  • size: 84 kB
  • sloc: makefile: 2
file content (64 lines) | stat: -rw-r--r-- 1,056 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package minhash

import (
	"github.com/dgryski/go-spooky"
	"testing"
)

func TestBottomK(t *testing.T) {

	tests := []struct {
		s1 []string
		s2 []string
	}{
		{
			[]string{"hello", "world", "foo", "baz", "bar", "zomg"},
			[]string{"goodbye", "world", "foo", "qux", "bar", "zomg"},
		},
	}

	for _, tt := range tests {
		m1 := NewBottomK(spooky.Hash64, 4)
		m2 := NewBottomK(spooky.Hash64, 4)

		for _, s := range tt.s1 {
			m1.Push([]byte(s))
		}

		for _, s := range tt.s2 {
			m2.Push([]byte(s))
		}

		t.Log(m1.Similarity(m2))
	}
}

func TestBottomKMerge(t *testing.T) {

	s1 := []string{"hello", "world", "foo", "baz"}
	s2 := []string{"goodbye", "world", "foo", "qux", "bar", "zomg"}

	s1a := []string{"bar", "zomg"}

	m1 := NewBottomK(spooky.Hash64, 4)
	m2 := NewBottomK(spooky.Hash64, 4)

	for _, s := range s1 {
		m1.Push([]byte(s))
	}

	for _, s := range s2 {
		m2.Push([]byte(s))
	}

	t.Log(m1.Similarity(m2))

	m1a := NewBottomK(spooky.Hash64, 4)
	for _, s := range s1a {
		m1a.Push([]byte(s))
	}

	m1.Merge(m1a)

	t.Log(m1.Similarity(m2))
}