File: word_bounds.rs

package info (click to toggle)

rustc 1.85.0%2Bdfsg2-3

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 893,176 kB
sloc: xml: 158,127; python: 35,830; javascript: 19,497; cpp: 19,002; sh: 17,245; ansic: 13,127; asm: 4,376; makefile: 1,051; lisp: 29; perl: 29; ruby: 19; sql: 11

file content (37 lines) | stat: -rw-r--r-- 827 bytes

parent folder | download | duplicates (5)

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};

use std::fs;
use unicode_segmentation::UnicodeSegmentation;

const FILES: &[&str] = &[
    "arabic",
    "english",
    "hindi",
    "japanese",
    "korean",
    "mandarin",
    "russian",
    "source_code",
];

#[inline(always)]
fn grapheme(text: &str) {
    for w in text.split_word_bounds() {
        black_box(w);
    }
}

fn bench_all(c: &mut Criterion) {
    let mut group = c.benchmark_group("word_bounds");

    for file in FILES {
        group.bench_with_input(
            BenchmarkId::new("grapheme", file),
            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
            |b, content| b.iter(|| grapheme(content)),
        );
    }
}

criterion_group!(benches, bench_all);
criterion_main!(benches);