File: test.rs

package info (click to toggle)
rustc 1.85.0%2Bdfsg2-3
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 893,176 kB
sloc: xml: 158,127; python: 35,830; javascript: 19,497; cpp: 19,002; sh: 17,245; ansic: 13,127; asm: 4,376; makefile: 1,051; lisp: 29; perl: 29; ruby: 19; sql: 11
file content (247 lines) | stat: -rw-r--r-- 8,623 bytes
parent folder | download | duplicates (5)
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use quickcheck::quickcheck;
use unicode_segmentation::UnicodeSegmentation;

#[rustfmt::skip]
mod testdata;

#[test]
fn test_graphemes() {
    use crate::testdata::{TEST_DIFF, TEST_SAME};

    pub const EXTRA_DIFF: &[(&str, &[&str], &[&str])] = &[
        // Official test suite doesn't include two Prepend chars between two other chars.
        (
            "\u{20}\u{600}\u{600}\u{20}",
            &["\u{20}", "\u{600}\u{600}\u{20}"],
            &["\u{20}", "\u{600}", "\u{600}", "\u{20}"],
        ),
        // Test for Prepend followed by two Any chars
        (
            "\u{600}\u{20}\u{20}",
            &["\u{600}\u{20}", "\u{20}"],
            &["\u{600}", "\u{20}", "\u{20}"],
        ),
    ];

    pub const EXTRA_SAME: &[(&str, &[&str])] = &[
        // family emoji (more than two emoji joined by ZWJ)
        (
            "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
            &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"],
        ),
        // cartwheel emoji followed by two fitzpatrick skin tone modifiers
        // (test case from issue #19)
        (
            "\u{1F938}\u{1F3FE}\u{1F3FE}",
            &["\u{1F938}\u{1F3FE}\u{1F3FE}"],
        ),
    ];

    for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
        // test forward iterator
        let our_extended: Vec<_> = UnicodeSegmentation::graphemes(s, true).collect();
        let our_legacy: Vec<_> = UnicodeSegmentation::graphemes(s, false).collect();
        assert_eq!(our_extended, g, "{s:?} extended");
        assert_eq!(our_legacy, g, "{s:?} legacy");

        // test reverse iterator
        assert!(UnicodeSegmentation::graphemes(s, true)
            .rev()
            .eq(g.iter().rev().cloned()));
        assert!(UnicodeSegmentation::graphemes(s, false)
            .rev()
            .eq(g.iter().rev().cloned()));
    }

    for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
        // test forward iterator
        assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
        assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));

        // test reverse iterator
        assert!(UnicodeSegmentation::graphemes(s, true)
            .rev()
            .eq(gt.iter().rev().cloned()));
        assert!(UnicodeSegmentation::graphemes(s, false)
            .rev()
            .eq(gf.iter().rev().cloned()));
    }

    // test the indices iterators
    let s = "a̐éö̲\r\n";
    let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
    let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
    assert_eq!(gr_inds, b);
    let gr_inds = UnicodeSegmentation::grapheme_indices(s, true)
        .rev()
        .collect::<Vec<(usize, &str)>>();
    let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
    assert_eq!(gr_inds, b);
    let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
    {
        let gr_inds = gr_inds_iter.by_ref();
        let e1 = gr_inds.size_hint();
        assert_eq!(e1, (1, Some(13)));
        let c = gr_inds.count();
        assert_eq!(c, 4);
    }
    let e2 = gr_inds_iter.size_hint();
    assert_eq!(e2, (0, Some(0)));

    // make sure the reverse iterator does the right thing with "\n" at beginning of string
    let s = "\n\r\n\r";
    let gr = UnicodeSegmentation::graphemes(s, true)
        .rev()
        .collect::<Vec<&str>>();
    let b: &[_] = &["\r", "\r\n", "\n"];
    assert_eq!(gr, b);
}

#[test]
fn test_words() {
    use crate::testdata::TEST_WORD;

    // Unicode's official tests don't really test longer chains of flag emoji
    // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
    const EXTRA_TESTS: &[(&str, &[&str])] = &[
        (
            "🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴",
            &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"],
        ),
        ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]),
        (
            "🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦",
            &["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"],
        ),
        (
            "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",
            &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"],
        ),
        ("😌👎🏼", &["😌", "👎🏼"]),
        // perhaps wrong, spaces should not be included?
        ("hello world", &["hello", " ", "world"]),
        ("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]),
    ];
    for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
        macro_rules! assert_ {
            ($test:expr, $exp:expr, $name:expr) => {
                // collect into vector for better diagnostics in failure case
                let testing = $test.collect::<Vec<_>>();
                let expected = $exp.collect::<Vec<_>>();
                assert_eq!(
                    testing, expected,
                    "{} test for testcase ({:?}, {:?}) failed.",
                    $name, s, w
                )
            };
        }
        // test forward iterator
        assert_!(
            s.split_word_bounds(),
            w.iter().cloned(),
            "Forward word boundaries"
        );

        // test reverse iterator
        assert_!(
            s.split_word_bounds().rev(),
            w.iter().rev().cloned(),
            "Reverse word boundaries"
        );

        // generate offsets from word string lengths
        let mut indices = vec![0];
        for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
            *t += n;
            Some(*t)
        }) {
            indices.push(i);
        }
        indices.pop();
        let indices = indices;

        // test forward indices iterator
        assert_!(
            s.split_word_bound_indices().map(|(l, _)| l),
            indices.iter().cloned(),
            "Forward word indices"
        );

        // test backward indices iterator
        assert_!(
            s.split_word_bound_indices().rev().map(|(l, _)| l),
            indices.iter().rev().cloned(),
            "Reverse word indices"
        );
    }
}

#[test]
fn test_sentences() {
    use crate::testdata::TEST_SENTENCE;

    for &(s, w) in TEST_SENTENCE.iter() {
        macro_rules! assert_ {
            ($test:expr, $exp:expr, $name:expr) => {
                // collect into vector for better diagnostics in failure case
                let testing = $test.collect::<Vec<_>>();
                let expected = $exp.collect::<Vec<_>>();
                assert_eq!(
                    testing, expected,
                    "{} test for testcase ({:?}, {:?}) failed.",
                    $name, s, w
                )
            };
        }

        assert_!(
            s.split_sentence_bounds(),
            w.iter().cloned(),
            "Forward sentence boundaries"
        );
    }
}

quickcheck! {
    fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
        let a = s.graphemes(true).collect::<Vec<_>>();
        let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
        b.reverse();
        a == b
    }

    fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
        let a = s.graphemes(false).collect::<Vec<_>>();
        let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
        b.reverse();
        a == b
    }

    fn quickcheck_join_graphemes(s: String) -> bool {
        let a = s.graphemes(true).collect::<String>();
        let b = s.graphemes(false).collect::<String>();
        a == s && b == s
    }

    fn quickcheck_forward_reverse_words(s: String) -> bool {
        let a = s.split_word_bounds().collect::<Vec<_>>();
        let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
        b.reverse();
        a == b
    }

    fn quickcheck_join_words(s: String) -> bool {
        let a = s.split_word_bounds().collect::<String>();
        a == s
    }
}