File: parse-xml.pl

package info (click to toggle)
libparser-mgc-perl 0.22-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 536 kB
  • sloc: perl: 1,881; makefile: 2; sh: 1
file content (141 lines) | stat: -rw-r--r-- 2,833 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/perl

use v5.14;
use warnings;

# DO NOT RELY ON THIS AS A REAL XML PARSER!

# It is not intended to be used actually as an XML parser, simply to stand as
# an example of how you might use Parser::MGC to parse an XML-like syntax

# There are a great many things it doesn't do correctly; it lacks at least the
# following features:
#   Entities
#   Processing instructions
#   Comments
#   CDATA

package XmlParser;
use base qw( Parser::MGC );

sub parse
{
   my $self = shift;

   my $rootnode = $self->parse_node;
   $rootnode->kind eq "element" or die "Expected XML root node";
   $rootnode->name eq "xml"     or die "Expected XML root node";

   return [ $rootnode->children ];
}

sub parse_node
{
   my $self = shift;

   # A "node" is either an XML element subtree or plaintext
   $self->any_of( 'parse_plaintext', 'parse_element' );
}

sub parse_plaintext
{
   my $self = shift;

   my $str = $self->substring_before( '<' );
   $self->fail( "No plaintext" ) unless length $str;

   return XmlParser::Node::Plain->new( $str );
}

sub parse_element
{
   my $self = shift;

   my $tag = $self->parse_tag;

   $self->commit;

   return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs} ) if $tag->{selfclose};

   my $childlist = $self->sequence_of( 'parse_node' );

   $self->parse_close_tag->{name} eq $tag->{name}
      or $self->fail( "Expected $tag->{name} to be closed" );

   return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs}, @$childlist );
}

sub parse_tag
{
   my $self = shift;

   $self->expect( '<' );
   my $tagname = $self->token_ident;

   my $attrs = $self->sequence_of( 'parse_tag_attr' );

   my $selfclose = $self->maybe_expect( '/' );
   $self->expect( '>' );

   return {
      name  => $tagname,
      attrs => { map { ( $_->[0], $_->[1] ) } @$attrs },
      selfclose => $selfclose,
   };
}

sub parse_close_tag
{
   my $self = shift;

   $self->expect( '</' );
   my $tagname = $self->token_ident;
   $self->expect( '>' );

   return { name => $tagname };
}

sub parse_tag_attr
{
   my $self = shift;

   my $attrname = $self->token_ident;
   $self->expect( '=' );
   return [ $attrname => $self->parse_tag_attr_value ];
}

sub parse_tag_attr_value
{
   my $self = shift;

   # TODO: This sucks
   return $self->token_string;
}


use Data::Dumper;

if( !caller ) {
   my $parser = __PACKAGE__->new;

   my $ret = $parser->from_file( \*STDIN );
   print Dumper( $ret );
}


package XmlParser::Node;
sub new { my $class = shift; bless [ @_ ], $class }

package XmlParser::Node::Plain;
use base qw( XmlParser::Node );
sub kind { "plain" }
sub text { shift->[0] }

package XmlParser::Node::Element;
use base qw( XmlParser::Node );
sub kind     { "element" }
sub name     { shift->[0] }
sub attrs    { shift->[1] }
sub children { my $self = shift; @{$self}[2..$#$self] }

1;