File: words_conformance_tests.rs

package info (click to toggle)
rust-unic-segment 0.9.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 364 kB
  • sloc: makefile: 2
file content (78 lines) | stat: -rw-r--r-- 2,539 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// Copyright 2012-2015 The Rust Project Developers.
// Copyright 2017 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use unic_segment::{WordBoundIndices, WordBounds};

type TestData = &'static [(&'static str, &'static [&'static str])];

const TEST_DATA: TestData = include!("tables/word_break_test_data.rsv");

/// Extra cases that the official test suite doesn't cover.
const EXTRA_TEST_DATA: TestData = include!("extra_word_break_test_data.rsv");

#[test]
fn test_words_conformance() {
    let tests = TEST_DATA.iter().chain(EXTRA_TEST_DATA);

    for &(input, words) in tests {
        macro_rules! assert_ {
            ($test:expr, $exp:expr, $name:expr) => {
                // collect into vector for better diagnostics in failure case
                let testing = $test.collect::<Vec<_>>();
                let expected = $exp.collect::<Vec<_>>();
                assert_eq!(
                    testing, expected,
                    "{} test for testcase ({:?}, {:?}) failed.",
                    $name, input, words
                )
            };
        }

        // test forward word boundaries
        assert_!(
            WordBounds::new(input),
            words.iter().cloned(),
            "Forward word boundaries"
        );

        // test reverse word boundaries
        assert_!(
            WordBounds::new(input).rev(),
            words.iter().rev().cloned(),
            "Reverse word boundaries"
        );

        // generate offsets from word string lengths
        let mut indices = vec![0];
        for i in words.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
            *t += n;
            Some(*t)
        }) {
            indices.push(i);
        }
        indices.pop();
        let indices = indices;

        // test forward indices iterator
        assert_!(
            WordBoundIndices::new(input).map(|(l, _)| l),
            indices.iter().cloned(),
            "Forward word indices"
        );

        // test backward indices iterator
        assert_!(
            WordBoundIndices::new(input).rev().map(|(l, _)| l),
            indices.iter().rev().cloned(),
            "Reverse word indices"
        );
    }
}