1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
#!/usr/bin/perl
use v5.14;
use warnings;
# DO NOT RELY ON THIS AS A REAL XML PARSER!
# It is not intended to be used actually as an XML parser, simply to stand as
# an example of how you might use Parser::MGC to parse an XML-like syntax
# There are a great many things it doesn't do correctly; it lacks at least the
# following features:
# Entities
# Processing instructions
# Comments
# CDATA
package XmlParser;
use base qw( Parser::MGC );
sub parse
{
my $self = shift;
my $rootnode = $self->parse_node;
$rootnode->kind eq "element" or die "Expected XML root node";
$rootnode->name eq "xml" or die "Expected XML root node";
return [ $rootnode->children ];
}
sub parse_node
{
my $self = shift;
# A "node" is either an XML element subtree or plaintext
$self->any_of( 'parse_plaintext', 'parse_element' );
}
sub parse_plaintext
{
my $self = shift;
my $str = $self->substring_before( '<' );
$self->fail( "No plaintext" ) unless length $str;
return XmlParser::Node::Plain->new( $str );
}
sub parse_element
{
my $self = shift;
my $tag = $self->parse_tag;
$self->commit;
return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs} ) if $tag->{selfclose};
my $childlist = $self->sequence_of( 'parse_node' );
$self->parse_close_tag->{name} eq $tag->{name}
or $self->fail( "Expected $tag->{name} to be closed" );
return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs}, @$childlist );
}
sub parse_tag
{
my $self = shift;
$self->expect( '<' );
my $tagname = $self->token_ident;
my $attrs = $self->sequence_of( 'parse_tag_attr' );
my $selfclose = $self->maybe_expect( '/' );
$self->expect( '>' );
return {
name => $tagname,
attrs => { map { ( $_->[0], $_->[1] ) } @$attrs },
selfclose => $selfclose,
};
}
sub parse_close_tag
{
my $self = shift;
$self->expect( '</' );
my $tagname = $self->token_ident;
$self->expect( '>' );
return { name => $tagname };
}
sub parse_tag_attr
{
my $self = shift;
my $attrname = $self->token_ident;
$self->expect( '=' );
return [ $attrname => $self->parse_tag_attr_value ];
}
sub parse_tag_attr_value
{
my $self = shift;
# TODO: This sucks
return $self->token_string;
}
use Data::Dumper;
if( !caller ) {
my $parser = __PACKAGE__->new;
my $ret = $parser->from_file( \*STDIN );
print Dumper( $ret );
}
package XmlParser::Node;
sub new { my $class = shift; bless [ @_ ], $class }
package XmlParser::Node::Plain;
use base qw( XmlParser::Node );
sub kind { "plain" }
sub text { shift->[0] }
package XmlParser::Node::Element;
use base qw( XmlParser::Node );
sub kind { "element" }
sub name { shift->[0] }
sub attrs { shift->[1] }
sub children { my $self = shift; @{$self}[2..$#$self] }
1;
|