File: simple_xml_tokenizer.rs

package info (click to toggle)
rust-xml5ever 0.18.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 276 kB
  • sloc: xml: 3; makefile: 2
file content (81 lines) | stat: -rw-r--r-- 2,467 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env run-cargo-script
//! This is a regular crate doc comment, but it also contains a partial
//! Cargo manifest.  Note the use of a *fenced* code block, and the
//! `cargo` "language".
//!
//! ```cargo
//! [dependencies]
//! xml5ever = "0.1.1"
//! tendril = "0.1.3"
//! markup5ever = "0.7.4"
//! ```
extern crate markup5ever;
extern crate xml5ever;

use std::io;

use markup5ever::buffer_queue::BufferQueue;
use xml5ever::tendril::{ByteTendril, ReadExt};
use xml5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken};
use xml5ever::tokenizer::{CommentToken, PIToken, Pi};
use xml5ever::tokenizer::{Doctype, DoctypeToken, EOFToken};
use xml5ever::tokenizer::{ParseError, Token, TokenSink, XmlTokenizer};

struct SimpleTokenPrinter;

impl TokenSink for SimpleTokenPrinter {
    fn process_token(&mut self, token: Token) {
        match token {
            CharacterTokens(b) => {
                println!("TEXT: {}", &*b);
            },
            NullCharacterToken => print!("NULL"),
            TagToken(tag) => {
                println!("{:?} {} ", tag.kind, &*tag.name.local);
            },
            ParseError(err) => {
                println!("ERROR: {}", err);
            },
            PIToken(Pi {
                ref target,
                ref data,
            }) => {
                println!("PI : <?{} {}?>", target, data);
            },
            CommentToken(ref comment) => {
                println!("<!--{:?}-->", comment);
            },
            EOFToken => {
                println!("EOF");
            },
            DoctypeToken(Doctype {
                ref name,
                ref public_id,
                ..
            }) => {
                println!("<!DOCTYPE {:?} {:?}>", name, public_id);
            },
        }
    }
}

fn main() {
    // Our implementation of TokenSink
    let sink = SimpleTokenPrinter;

    // We need a ByteTendril to read a file
    let mut input = ByteTendril::new();

    // Using SliceExt.read_to_tendril we can read stdin
    io::stdin().read_to_tendril(&mut input).unwrap();
    // For xml5ever we need StrTendril, so we reinterpret it
    // into StrTendril.

    // Load input into BufferQueue
    let mut input_buffer = BufferQueue::default();
    input_buffer.push_back(input.try_reinterpret().unwrap());
    // Here we create and run tokenizer
    let mut tok = XmlTokenizer::new(sink, Default::default());
    tok.feed(&mut input_buffer);
    tok.end();
}