1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
XML-TreePuller
INSTALLATION
To install this module, run the following commands:
perl Makefile.PL
make
make test
make install
ABOUT
This module implements a tree oriented XML pull processor using a combination of
XML::LibXML::Reader and an object-oriented interface around the output of XML::CompactTree.
It provides a fast and convenient way to access the content of extremely large XML documents
serially.
EXAMPLE
#!/usr/bin/env perl
use strict;
use warnings;
use XML::TreePuller;
sub gen_xml {
return <<EOF
<wiki version="0.3">
<!-- schema says that there is always 1 siteinfo and zero or more page
elements follow -->
<siteinfo>
<sitename>ExamplePedia</sitename>
<url>http://example.pedia/</url>
<namespaces>
<namespace key="-1">Special</namespace>
<namespace key="0" />
<namespace key="1">Talk</namespace>
</namespaces>
</siteinfo>
<page>
<title>A good article</title>
<text>Some good content</text>
</page>
<page>
<title>A bad article</title>
<text>Some bad content</text>
</page>
</wiki>
EOF
}
sub element_example {
my $xml = XML::TreePuller->new(string => gen_xml());
print "Printing namespace names using configuration style:\n";
$xml->config('/wiki/siteinfo/namespaces/namespace' => 'short');
while(defined(my $element = $xml->next)) {
print $element->attribute('key'), ": ", $element->text, "\n";
}
print "End of namespace names\n";
}
sub subtree_example {
my $xml = XML::TreePuller->new(string => gen_xml());
print "Printing titles using a subtree:\n";
$xml->config('/wiki/page' => 'subtree');
while(defined(my $element = $xml->next)) {
print "Title: ", $element->get_elements('title')->text, "\n";
}
print "End of titles\n";
}
sub path_example {
my $xml = XML::TreePuller->new(string => gen_xml());
print "Printing path example:\n";
$xml->config('/wiki/siteinfo', 'subtree');
$xml->config('/wiki/page/title', 'short');
while(my ($matched_path, $element) = $xml->next) {
print "Path: $matched_path\n";
}
print "End path example\n";
}
element_example(); print "\n";
subtree_example(); print "\n";
path_example(); print "\n";
__END__
Printing namespace names using configuration style:
-1: Special
0:
1: Talk
End of namespace names
Printing titles using a subtree:
Title: A good article
Title: A bad article
End of titles
Printing path example:
Path: /wiki/siteinfo
Path: /wiki/page/title
Path: /wiki/page/title
End path example
SUPPORT AND DOCUMENTATION
After installing, you can find documentation for this module with the
perldoc command.
perldoc XML::TreePuller
You can also look for information at:
RT, CPAN's request tracker
http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-TreePuller
AnnoCPAN, Annotated CPAN documentation
http://annocpan.org/dist/XML-TreePuller
CPAN Ratings
http://cpanratings.perl.org/d/XML-TreePuller
Search CPAN
http://search.cpan.org/dist/XML-TreePuller/
COPYRIGHT AND LICENCE
Copyright (C) 2010 "Tyler Riddle"
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
|