File: lib.rs

package info (click to toggle)
python-setuptools-rust 1.12.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 660 kB
  • sloc: python: 1,815; javascript: 95; sh: 14; makefile: 13
file content (53 lines) | stat: -rw-r--r-- 1,464 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
use pyo3::prelude::*;

#[pymodule]
mod html_py_ever {
    use pyo3::prelude::*;
    use std::io::Read;
    use std::path::Path;
    use tendril::stream::TendrilSink;

    /// A parsed html document
    #[pyclass(unsendable)]
    struct Document {
        node: kuchiki::NodeRef,
    }

    #[pymethods]
    impl Document {
        /// Returns the selected elements as strings
        fn select(&self, selector: &str) -> Vec<String> {
            self.node
                .select(selector)
                .unwrap()
                .map(|css_match| css_match.text_contents())
                .collect()
        }
    }

    impl Document {
        fn from_reader(reader: &mut impl Read) -> PyResult<Document> {
            let node = kuchiki::parse_html().from_utf8().read_from(reader)?;
            Ok(Document { node })
        }

        fn from_file(path: &Path) -> PyResult<Document> {
            let node = kuchiki::parse_html().from_utf8().from_file(path)?;
            Ok(Document { node })
        }
    }

    /// Parses the File from the specified Path into a document
    #[pyfunction]
    fn parse_file(path: &str) -> PyResult<Document> {
        let document = Document::from_file(Path::new(path))?;
        Ok(document)
    }

    /// Parses the given html test into a document
    #[pyfunction]
    fn parse_text(text: &str) -> PyResult<Document> {
        let document = Document::from_reader(&mut text.as_bytes())?;
        Ok(document)
    }
}