File: leaf2xml.pl

package info (click to toggle)
pdfedit 0.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 15,032 kB
  • ctags: 21,708
  • sloc: cpp: 185,471; xml: 8,824; yacc: 1,178; ansic: 666; perl: 664; makefile: 636; sh: 371; lisp: 51
file content (125 lines) | stat: -rwxr-xr-x 2,556 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/perl

use strict;

#omit output
my $omit=0;
my $code=0;
my @ttc=();

sub add_tag {
 my $tg=shift;
 close_tags($tg);
 push @ttc,$tg;
}

sub close_tags {
 my $regex=shift;
 my $tags='';
 while (@ttc) {
  my $last=$ttc[@ttc-1];
  if ($last=~/^$regex$/) {
   $tags.='</'.$last.'>';
   pop @ttc;
   next;
  }
  last;
 }
 return $tags;
}

my %tagx=();
$tagx{'c'}='code';
$tagx{'e'}='emphasis';

sub munge {
 my $par=shift;
 $par=~s/[^a-zA-Z0-9]/_/g;
 return $par;
}

my %taken_sections=();

sub uniq {
 my $par=shift;
 if ($taken_sections{$par}) {
  my $i=0;
  while ($taken_sections{$par.'_'.$i}) {
   $i++;
  }
  $par=$par.'_'.$i;
 }
 $taken_sections{$par}=1;
 return $par;
}

sub tag {
 my $tg=shift;
 my $param=shift;
 if ($tg eq 'omit')	{ $omit=1; return ''; }
 if ($tg eq 'endomit')	{ $omit=0; return ''; }
 if ($tg eq 'code')	{ $code=1; return '<programlisting>'; }
 if ($tg eq 'endcode')	{ $code=0; return '</programlisting>'; }
 if ($tg=~/^(chapter|section(\d+))$/)	{
  my $t='';
  if ($1 eq 'chapter') {$t=close_tags('.*'); }
  else {my $n=$2;$t=close_tags('sect['.$n.'-9]'); $tg=~s/^section/sect/;}
  add_tag($tg);
  my $idsect='p_'.munge($param);
 $idsect=uniq($idsect);
  return $t.'<'.$tg.' id="'.$idsect.'"><title>'.$param.'</title>';
 }
 if ($tg=~/^(c|e)(\{?)$/)	{
  my $cn=$tagx{$1};
  if ($2 eq '{') {
   $param="<$cn>$param</$cn>";
  } else {
   $param=~s/^(\S+)/<$cn>$1<\/$cn>/;
  }
  return $param;
 }
 if ($tg eq 'list')	{ return '<itemizedlist spacing="compact">'; }
 if ($tg eq 'endlist')	{ return close_tags('listitem|para').'</itemizedlist>'; }
 if ($tg eq 'l')	{
  #link to somewhere ... probably to somewhere out of the docs, so not supported
  return $param;
 }
 if ($tg eq 'img')	{
  #Image
  $param=~s/^\s+//;
  $param=~s/\s+$//;
  return '<mediaobject><imageobject><imagedata fileref="images/'.$param.'" /></imageobject></mediaobject>';
 }
 if ($tg eq 'i')	{
  my $t=close_tags('listitem|para');
  add_tag('listitem');
  add_tag('para');
  return $t.'<listitem><para>'.ptags($param);
 }
 die ("Unknown tag \\$tg\n");
}

if (!($ARGV[0] eq 'no')) {
print <<EOF;
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
EOF
}

sub ptags {
 my $l=shift;
 while ($l=~s/\\([a-z0-9_]+\{)([^}]*)\}/tag($1,$2)/em) {;}
 while ($l=~s/\\([a-z0-9_]+)(\s+|$)(.*)/tag($1,$3)/em) {;}
 return $l;
}

while (<STDIN>) {
 s/\r//g;
 s/&/&amp;/g;
 s/</&lt;/g;
 s/>/&gt;/g;
 $_=ptags($_);
 next if ($omit);
 print;
}
print close_tags('.*');