File: sentence_test.go

package info (click to toggle)
golang-github-rivo-uniseg 0.4.7-1
links: PTS, VCS
area: main
in suites: experimental, forky, sid, trixie, trixie-backports, trixie-proposed-updates
size: 4,516 kB
sloc: makefile: 2
file content (173 lines) | stat: -rw-r--r-- 4,658 bytes
package uniseg

import (
	"testing"
)

// Test all official Unicode test cases for sentence boundaries using the byte
// slice function.
func TestSentenceCasesBytes(t *testing.T) {
	for testNum, testCase := range sentenceBreakTestCases {
		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
		testNum,
		strings.TrimSpace(testCase.original),
		testCase.expected,
		decomposed(testCase.original),
		[]rune(testCase.original))*/
		var (
			sentence []byte
			index    int
		)
		state := -1
		b := []byte(testCase.original)
	WordLoop:
		for index = 0; len(b) > 0; index++ {
			if index >= len(testCase.expected) {
				t.Errorf(`Test case %d %q failed: More sentences %d returned than expected %d`,
					testNum,
					testCase.original,
					index,
					len(testCase.expected))
				break
			}
			sentence, b, state = FirstSentence(b, state)
			cluster := []rune(string(sentence))
			if len(cluster) != len(testCase.expected[index]) {
				t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
					testNum,
					testCase.original,
					index,
					len(cluster),
					cluster,
					len(testCase.expected[index]),
					testCase.expected[index])
				break
			}
			for i, r := range cluster {
				if r != testCase.expected[index][i] {
					t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
						testNum,
						testCase.original,
						index,
						cluster,
						testCase.expected[index])
					break WordLoop
				}
			}
		}
		if index < len(testCase.expected) {
			t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
				testNum,
				testCase.original,
				index,
				len(testCase.expected))
		}
	}
	sentence, rest, newState := FirstSentence([]byte{}, -1)
	if len(sentence) > 0 {
		t.Errorf(`Expected sentence to be empty byte slice, got %q`, sentence)
	}
	if len(rest) > 0 {
		t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
	}
	if newState != 0 {
		t.Errorf(`Expected newState to be 0, got %d`, newState)
	}
}

// Test all official Unicode test cases for sentence boundaries using the string
// function.
func TestSentenceCasesString(t *testing.T) {
	for testNum, testCase := range sentenceBreakTestCases {
		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
		testNum,
		strings.TrimSpace(testCase.original),
		testCase.expected,
		decomposed(testCase.original),
		[]rune(testCase.original))*/
		var (
			sentence string
			index    int
		)
		state := -1
		str := testCase.original
	WordLoop:
		for index = 0; len(str) > 0; index++ {
			if index >= len(testCase.expected) {
				t.Errorf(`Test case %d %q failed: More sentences %d returned than expected %d`,
					testNum,
					testCase.original,
					index,
					len(testCase.expected))
				break
			}
			sentence, str, state = FirstSentenceInString(str, state)
			cluster := []rune(string(sentence))
			if len(cluster) != len(testCase.expected[index]) {
				t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
					testNum,
					testCase.original,
					index,
					len(cluster),
					cluster,
					len(testCase.expected[index]),
					testCase.expected[index])
				break
			}
			for i, r := range cluster {
				if r != testCase.expected[index][i] {
					t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
						testNum,
						testCase.original,
						index,
						cluster,
						testCase.expected[index])
					break WordLoop
				}
			}
		}
		if index < len(testCase.expected) {
			t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
				testNum,
				testCase.original,
				index,
				len(testCase.expected))
		}
	}
	sentence, rest, newState := FirstSentenceInString("", -1)
	if len(sentence) > 0 {
		t.Errorf(`Expected sentence to be empty string, got %q`, sentence)
	}
	if len(rest) > 0 {
		t.Errorf(`Expected rest to be empty string, got %q`, rest)
	}
	if newState != 0 {
		t.Errorf(`Expected newState to be 0, got %d`, newState)
	}
}

// Benchmark the use of the sentence break function for byte slices.
func BenchmarkSentenceFunctionBytes(b *testing.B) {
	for i := 0; i < b.N; i++ {
		var c []byte
		state := -1
		str := benchmarkBytes
		for len(str) > 0 {
			c, str, state = FirstSentence(str, state)
			resultRunes = []rune(string(c))
		}
	}
}

// Benchmark the use of the sentence break function for strings.
func BenchmarkSentenceFunctionString(b *testing.B) {
	for i := 0; i < b.N; i++ {
		var c string
		state := -1
		str := benchmarkStr
		for len(str) > 0 {
			c, str, state = FirstSentenceInString(str, state)
			resultRunes = []rune(c)
		}
	}
}