File: tldextract.rs

package info (click to toggle)
rust-psl 2.1.78-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,540 kB
  • sloc: makefile: 2
file content (67 lines) | stat: -rw-r--r-- 2,015 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use psl_types::List;

pub trait TldExtract {
    fn extract<'a>(&self, host: &'a str) -> Option<Parts<'a>>;
}

impl<T: List> TldExtract for T {
    fn extract<'a>(&self, host: &'a str) -> Option<Parts<'a>> {
        let host_len = host.len();
        let suffix_len = self.suffix(host.as_bytes())?.as_bytes().len();
        let suffix = {
            let offset = host_len - suffix_len;
            &host[offset..]
        };
        let suffix_plus_dot = suffix_len + 1;
        let (subdomain, domain) = if host_len > suffix_plus_dot {
            match host.get(..host_len - suffix_plus_dot) {
                Some(prefix) => match prefix.rfind('.') {
                    Some(offset) => (prefix.get(..offset), prefix.get(offset + 1..)),
                    None => (None, Some(prefix)),
                },
                None => (None, None),
            }
        } else {
            (None, None)
        };
        Some(Parts {
            suffix,
            domain,
            subdomain,
        })
    }
}

#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
pub struct Parts<'a> {
    pub suffix: &'a str,
    pub domain: Option<&'a str>,
    pub subdomain: Option<&'a str>,
}

// This example is inspired by https://github.com/john-kurkowski/tldextract
// Unlike that project, we don't try to parse URLs though. That can easily
// be done by using the `url` crate and feeding the output of `Url::domain`
// to `TldExtract::extract`.
fn main() {
    use psl::List;
    use std::env;

    let domain = match env::args().nth(1) {
        Some(name) => name,
        None => {
            eprintln!("Usage: {} <domain name>", env::args().nth(0).unwrap());
            std::process::exit(1);
        }
    };

    match List.extract(&domain) {
        Some(info) => println!(
            "{} {} {}",
            info.subdomain.unwrap_or("(None)"),
            info.domain.unwrap_or("(None)"),
            info.suffix
        ),
        None => eprintln!("`{}` is not domain name", domain),
    }
}