File: regexp_test.go

package info (click to toggle)
golang-github-jdkato-prose 1.1.0%2Bgit20171031.e27abfd-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,848 kB
  • sloc: python: 115; makefile: 55; sh: 21
file content (33 lines) | stat: -rw-r--r-- 766 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
package tokenize

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestWordPunctTokenizer(t *testing.T) {
	input, output := getWordData("word_punct.json")
	wordTokenizer := NewWordPunctTokenizer()
	for i, s := range input {
		assert.Equal(t, output[i], wordTokenizer.Tokenize(s))
	}
}

func TestNewRegexpTokenizer(t *testing.T) {
	input, _ := getWordData("word_punct.json")
	expected := NewWordPunctTokenizer()
	observed := NewRegexpTokenizer(`\w+|[^\w\s]+`, false, false)
	for _, s := range input {
		assert.Equal(t, expected.Tokenize(s), observed.Tokenize(s))
	}
}

func BenchmarkWordPunctTokenizer(b *testing.B) {
	word := NewWordPunctTokenizer()
	for n := 0; n < b.N; n++ {
		for _, s := range getWordBenchData() {
			word.Tokenize(s)
		}
	}
}