File: simple.rs

package info (click to toggle)
rustc-web 1.85.0%2Bdfsg3-1~deb12u3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bookworm-proposed-updates
  • size: 1,759,988 kB
  • sloc: xml: 158,127; python: 35,830; javascript: 19,497; cpp: 19,002; sh: 17,245; ansic: 13,127; asm: 4,376; makefile: 1,056; lisp: 29; perl: 29; ruby: 19; sql: 11
file content (26 lines) | stat: -rw-r--r-- 1,058 bytes parent folder | download | duplicates (22)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


use string_cache::DefaultAtom;

fn main() {
    let mut interned_stuff = Vec::new();
    let text = "here is a sentence of text that will be tokenised and interned and some repeated \
                tokens is of text and";
    for word in text.split_whitespace() {
        let seen_before = interned_stuff
            .iter()
            // We can use impl PartialEq<T> where T is anything string-like to compare to
            // interned strings to either other interned strings, or actual strings  Comparing two
            // interned strings is very fast (normally a single cpu operation).
            .filter(|interned_word| interned_word == &word)
            .count();
        if seen_before > 0 {
            println!(r#"Seen the word "{}" {} times"#, word, seen_before);
        } else {
            println!(r#"Not seen the word "{}" before"#, word);
        }
        // We use the impl From<(Cow<'a, str>, or &'a str, or String) for Atom<Static> to intern a
        // new string
        interned_stuff.push(DefaultAtom::from(word));
    }
}