1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
#!/usr/bin/perl -w
#
# Copyright (C) 2007-2008 Alex Linke <alinke@lingua-systems.com>
# Copyright (C) 2009-2016 Lingua-Systems Software GmbH
# Copyright (C) 2016 Netzum Sorglos, Lingua-Systems Software GmbH
#
use strict;
use warnings;
require 5.008;
use XML::LibXML;
use Data::Dumper;
use Getopt::Long;
my $VERSION = '0.6';
my %tables;
# set default options
my %opt = (
output => "tables.dump",
verbose => 0,
);
# parse commandline options
show_help(1)
unless GetOptions(
"output|o=s" => \$opt{output},
"verbose|v" => \$opt{verbose},
"help|h" => \$opt{help}
);
show_help(1) if scalar(@ARGV) == 0; # No XML file(s) given
show_help(0) if $opt{help};
my $xmlparser = new XML::LibXML();
# Set parser options
$xmlparser->pedantic_parser(1);
$xmlparser->validation(1);
$xmlparser->expand_entities(1);
$xmlparser->keep_blanks(1);
$xmlparser->line_numbers(1);
# Treat everything else in @ARGV as a filename
foreach my $file (@ARGV) {
print "Parsing $file..." if $opt{verbose};
my %counts = ( rules => 0, contexts => 0 );
my $ds;
my $doc = $xmlparser->parse_file($file)
or die "Error parsing $file: $!\n";
# Retrieve meta-documentation from XML document first
foreach my $meta (qw/name desc reverse/) {
my @nodes = $doc->findnodes("/translit/$meta");
die "#/translit/$meta != 1" if ( scalar(@nodes) != 1 );
$ds->{$meta} = $nodes[0]->to_literal();
}
# Perform some basic meta data checks
die "Name undefined.\n" unless $ds->{name};
die "Description undefined.\n" unless $ds->{desc};
die "Reversibility undefined.\n" unless $ds->{reverse};
# Check <reverse> tag contains valid data.
# TODO: move this to the DTD
die "Reversibility: '$ds->{reverse}' -- Should be 'true' or 'false'.\n"
unless $ds->{reverse} =~ /^(true|false)$/;
# Set the table's identifier
$ds->{id} = lc( $ds->{name} );
$ds->{id} =~ s/\s/_/g;
# Retrieve all rules, extract their data and store it to an appropriate
# data structure
foreach my $rule ( $doc->findnodes("/translit/rules/rule") ) {
my @nodes;
my $rule_ds;
# Retrieve "from" and "to" literals
foreach my $n (qw/from to/) {
@nodes = $rule->findnodes("./$n");
die "#/translit/rules/rules/$n != 1 "
. "(at line "
. $rule->line_number() . ")\n"
if ( scalar(@nodes) != 1 );
$rule_ds->{$n} = $nodes[0]->to_literal();
}
# Retrieve rule's "context"
@nodes = $rule->findnodes("./context");
die "#/translit/rules/rule/context > 1 "
. "(at line "
. $rule->line_number() . ")\n"
if ( scalar(@nodes) > 1 );
# Process rule's "context" if necessary
if ( scalar(@nodes) ) {
foreach my $context (qw/before after/) {
@nodes = $rule->findnodes("./context/$context");
die "#/translit/rules/rule/context/$context > 1 "
. "(at line "
. $rule->line_number() . ")\n"
if ( scalar(@nodes) > 1 );
# Copy the context to the rule's data structure
if ( scalar(@nodes) ) {
$rule_ds->{context}->{$context} = $nodes[0]->to_literal();
}
}
$counts{contexts}++;
}
$counts{rules}++;
die $rule_ds->{name} . ": from==to -> " . $rule_ds->{from} . "\n"
if ( $rule_ds->{from} eq $rule_ds->{to} );
push @{ $ds->{rules} }, $rule_ds;
}
# Copy transliteration structure over to the final hash
$tables{ $ds->{id} } = $ds;
print " ($ds->{id}: rules=$counts{rules}, contexts=$counts{contexts})\n"
if $opt{verbose};
undef($ds); # free memory
}
# Configure Data::Dumper
my $dumper = new Data::Dumper( [ \%tables ], [qw/*tables/] );
$dumper->Purity(0);
$dumper->Useqq(1);
$dumper->Indent(1);
# Dump the table(s) to disk
open FH, ">$opt{output}" or die "$opt{output}: $!\n";
print FH $dumper->Dump();
close(FH);
print scalar( keys(%tables) ),
" transliteration table(s) dumped to $opt{output}.\n"
if $opt{verbose};
sub show_help {
my $retval = shift();
print STDERR
"xml2dump v$VERSION -- Copyright 2007-2008 by Alex Linke ",
"<alinke\@lingua-systems.com>\n\n",
"usage: $0 [-v -h] -o FILE XML-FILE(s)\n\n",
"\t--output -o FILE set output file (default: transtbl.dump)\n",
"\t--verbose -v be verbose\n",
"\t--help -h show this help\n";
exit($retval);
}
# vim: set ft=perl sw=4 sts=4 ts=4 ai et:
|