File: lib.rs

package info (click to toggle)
python-setuptools-rust 1.9.0%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 648 kB
  • sloc: python: 1,703; javascript: 95; sh: 14; makefile: 13
file content (58 lines) | stat: -rw-r--r-- 1,491 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use std::io::Read;
use std::path::Path;
use tendril::stream::TendrilSink;

/// A parsed html document
#[pyclass(unsendable)]
struct Document {
    node: kuchiki::NodeRef,
}

#[pymethods]
impl Document {
    /// Returns the selected elements as strings
    fn select(&self, selector: &str) -> Vec<String> {
        self.node
            .select(selector)
            .unwrap()
            .map(|css_match| css_match.text_contents())
            .collect()
    }
}

impl Document {
    fn from_reader(reader: &mut impl Read) -> PyResult<Document> {
        let node = kuchiki::parse_html().from_utf8().read_from(reader)?;
        Ok(Document { node })
    }

    fn from_file(path: &Path) -> PyResult<Document> {
        let node = kuchiki::parse_html().from_utf8().from_file(path)?;
        Ok(Document { node })
    }
}

/// Parses the File from the specified Path into a document
#[pyfunction]
fn parse_file(path: &str) -> PyResult<Document> {
    let document = Document::from_file(Path::new(path))?;
    Ok(document)
}

/// Parses the given html test into a document
#[pyfunction]
fn parse_text(text: &str) -> PyResult<Document> {
    let document = Document::from_reader(&mut text.as_bytes())?;
    Ok(document)
}

#[pymodule]
fn html_py_ever(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
    m.add_wrapped(wrap_pyfunction!(parse_file))?;
    m.add_wrapped(wrap_pyfunction!(parse_text))?;
    m.add_class::<Document>()?;

    Ok(())
}