File: main.rs

package info (click to toggle)
rustc 1.85.0%2Bdfsg3-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental, sid, trixie
  • size: 893,396 kB
  • sloc: xml: 158,127; python: 35,830; javascript: 19,497; cpp: 19,002; sh: 17,245; ansic: 13,127; asm: 4,376; makefile: 1,051; perl: 29; lisp: 29; ruby: 19; sql: 11
file content (61 lines) | stat: -rw-r--r-- 1,547 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
use clap::Parser;
use html_parser::{Dom, Result};
use std::{
    fs::File,
    io::{self, Read},
    path::PathBuf,
};

#[derive(Debug, Parser)]
/// A simple and general purpose html/xhtml parser.
struct Opt {
    #[arg(short, long)]
    /// Pretty-print the output.
    pretty_print: bool,

    #[arg(short, long)]
    /// Debug the parser, this will print errors to the console.
    debug: bool,

    /// Path to the file, or stdin (piped content).
    ///
    /// This argument can either be a path to the html-file that you would like to parse or the
    /// result of stdin. Note: Content over stdin needs to be finite, for now, as it is collected
    /// into a string and then processed by the parser.
    input: Option<PathBuf>,
}

fn main() -> Result<()> {
    let opt = Opt::parse();

    let mut content = String::with_capacity(100_000);

    // If input is provided then use that as a path
    if let Some(path) = opt.input {
        let mut file = File::open(path)?;
        file.read_to_string(&mut content)?;

    // Else read from stdin, this enables piping
    // ex: `cat index.html | html_parser`
    } else {
        let stdin = io::stdin();
        let mut handle = stdin.lock();
        handle.read_to_string(&mut content)?;
    };

    let dom = Dom::parse(&content)?;

    if opt.debug {
        for error in &dom.errors {
            println!("# {}", error);
        }
    }

    if opt.pretty_print {
        println!("{}", dom.to_json_pretty()?);
    } else {
        println!("{}", dom.to_json()?);
    }

    Ok(())
}