File: pragmatic_test.go

package info (click to toggle)
golang-github-jdkato-prose 1.1.0%2Bgit20171031.e27abfd-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,848 kB
  • sloc: python: 115; makefile: 55; sh: 21
file content (72 lines) | stat: -rw-r--r-- 1,761 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package tokenize

import (
	"encoding/json"
	"fmt"
	"path/filepath"
	"testing"

	"github.com/jdkato/prose/internal/util"
)

type goldenRule struct {
	Name   string
	Input  string
	Output []string
}

func TestPragmaticRulesEn(t *testing.T) { testLang("en", t) }
func TestPragmaticRulesFr(t *testing.T) { testLang("fr", t) }
func TestPragmaticRulesEs(t *testing.T) { testLang("es", t) }

func BenchmarkPragmaticRulesEn(b *testing.B) { benchmarkLang("en", b) }

func benchmarkLang(lang string, b *testing.B) {
	tests := make([]goldenRule, 0)
	f := fmt.Sprintf("golden_rules_%s.json", lang)
	cases := util.ReadDataFile(filepath.Join(testdata, f))

	tok, err := NewPragmaticSegmenter(lang)
	util.CheckError(err)

	util.CheckError(json.Unmarshal(cases, &tests))
	for n := 0; n < b.N; n++ {
		for _, test := range tests {
			tok.Tokenize(test.Input)
		}
	}
}

func testLang(lang string, t *testing.T) {
	tests := make([]goldenRule, 0)
	f := fmt.Sprintf("golden_rules_%s.json", lang)
	cases := util.ReadDataFile(filepath.Join(testdata, f))

	tok, err := NewPragmaticSegmenter(lang)
	util.CheckError(err)

	util.CheckError(json.Unmarshal(cases, &tests))
	for _, test := range tests {
		compare(t, test.Name, test.Input, test.Output, tok)
	}
}

func compare(t *testing.T, test, actualText string, expected []string, tok *PragmaticSegmenter) bool {
	actual := tok.Tokenize(actualText)
	if len(actual) != len(expected) {
		t.Log(test)
		t.Logf("Actual: %v\n", actual)
		t.Errorf("Actual: %d, Expected: %d\n", len(actual), len(expected))
		t.Log("===")
		return false
	}
	for index, sent := range actual {
		if sent != expected[index] {
			t.Log(test)
			t.Errorf("Actual: [%s] Expected: [%s]\n", sent, expected[index])
			t.Log("===")
			return false
		}
	}
	return true
}