File: publicanize-langref.sh

package info (click to toggle)
systemtap 5.1-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 47,964 kB
  • sloc: cpp: 80,838; ansic: 54,757; xml: 49,725; exp: 43,665; sh: 11,527; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (157 lines) | stat: -rwxr-xr-x 8,493 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/bin/bash
#this script converts the langref.tex source for the Language Reference Guide into 
#DocBook XML. the conversion is done thru latexml, a utility that comes with dblatex-0.2.7.
#the output xml file of latexml is pretty dirty, so this script is needed to further clean it up.

#copy latex file to here
cp ../langref.tex .

#convert it to raw xml
latexml langref.tex --dest=Language_Reference_Guide.xml

#remove excess whitespace
sed -i -e 's/^\s*//g' Language_Reference_Guide.xml

sed -i -e 's/<!--\ %\*\*\*\* langref.tex Line [0-9]* \*\*\*\* -->//g' Language_Reference_Guide.xml

cat Language_Reference_Guide.xml | 
perl -p -e 'undef $/;s|<!-- %SystemTap Language Reference -->\n<\?latexml options="twoside,english" class="article"\?>\n<\?latexml package="geometry"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n<\?latexml RelaxNGSchema="LaTeXML"\?>\n<document xmlns="http://dlmf.nist.gov/LaTeXML">\n<title>SystemTap Language Reference</title>|<\!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
]>\n<book>\n<xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />|msg' | 
perl -p -e 'undef $/;s|<para xml:id="p1a">\n<p>This document was derived from other documents contributed to the SystemTap project by employees of Red Hat, IBM and Intel.</p>\n</para>\n<para xml:id="p2">\n<p>Copyright © 2007 Red Hat Inc.\nCopyright © 2007 IBM Corp.\nCopyright © 2007 Intel Corporation.</p>\n</para>\n<para xml:id="p3">\n<p>Permission is granted to copy, distribute and/or modify this document\nunder the terms of the GNU Free Documentation License, Version 1.2\nor any later version published by the Free Software Foundation;\nwith no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.</p>\n</para>\n<para xml:id="p4">\n<p>The GNU Free Documentation License is available from\n<ref class="url" href="http://www.gnu.org/licenses/fdl.html"><text font="typewriter">http://www.gnu.org/licenses/fdl.html</text></ref> or by writing to\nthe Free Software Foundation, Inc., 51 Franklin Street,\nFifth Floor, Boston, MA 02110-1301, USA.</p>\n</para>||msg' |
#fix up screens 
perl -p -e 'undef $/;s|<itemize>\n<item>\n<verbatim font="typewriter">|<screen>|msg' |
perl -p -e 'undef $/;s|<itemize>\n<item>\n\n<verbatim font="typewriter">|<screen>|msg' |
perl -p -e 'undef $/;s|</verbatim>\n</item>\n</itemize>|</screen>|msg' |
perl -p -e 'undef $/;s|</verbatim>\n\n</item>\n</itemize>|</screen>|msg' |
#fix up index tags
perl -p -e 'undef $/;s|<index xml:id="idx">\n<title>Index</title>\n</index>|<index/>|msg' |
#needed later, for TABLES!
perl -p -e 'undef $/;s|</text>\n</td>|</entry>|msg' > clean.xml

#further fix up headers!
perl -p -i -e 's|<\?latexml searchpaths="[^>]*>\n||g' clean.xml


#change main tags
sed -i -e 's/<\/document>/<\/book>/g' clean.xml

#more fixup for screen tags
perl -p -i -e 's|<verbatim font="[^"]*">|<screen>|g' clean.xml
perl -p -i -e 's|</verbatim>|</screen>|g' clean.xml

#clean section tags
sed -i -e 's/<section refnum="[0-9]*"/<section/g' clean.xml
sed -i -e 's/<section xml:id="[0-9S]*"/<section/g' clean.xml
sed -i -e 's/<section labels="LABEL:sec:/<section id="/g' clean.xml

#clean subsection tags
sed -i -e 's/<subsection refnum="[0-9]*.[0-9]*"/<subsection/g' clean.xml
sed -i -e 's/<subsection xml:id="[S.0-9]*"/<subsection/g' clean.xml
sed -i -e 's/<subsection labels="LABEL:sub:/<subsection id="/g' clean.xml

#clean subsubsection tags
sed -i -e 's/<subsubsection refnum="[S.0-9]*"/<subsubsection/g' clean.xml
sed -i -e 's/<subsubsection xml:id="[S.0-9]*"/<subsubsection/g' clean.xml
sed -i -e 's/<subsubsection labels="LABEL:sub:/<subsubsection id="/g' clean.xml

#change section tags to chapter, yay
sed -i -e 's/<section/<chapter/g' clean.xml
sed -i -e 's/<\/section>/<\/chapter>/g' clean.xml

#change subsection and subsubsection tags to section
sed -i -e 's/<subsection/<section/g' clean.xml
sed -i -e 's/<\/subsection>/<\/section>/g' clean.xml
sed -i -e 's/<subsubsection/<section/g' clean.xml
sed -i -e 's/<\/subsubsection>/<\/section>/g' clean.xml

#remove <para, then replace <p> with <para>
sed -i -e 's/<para xml:id="[pS.0-9]*"/<para/g' clean.xml
sed -i -e 's/<para>//g' clean.xml
sed -i -e 's/<\/para>//g' clean.xml
sed -i -e 's/<p>/<para>/g' clean.xml
sed -i -e 's/<\/p>/<\/para>/g' clean.xml

#properly convert xrefs
sed -i -e 's/<ref labelref="LABEL:sub:/<xref linkend="/g' clean.xml
sed -i -e 's/<ref labelref="LABEL:sec:/<xref linkend="/g' clean.xml

#convert indexterms
sed -i -e 's/indexmark>/indexterm>/g' clean.xml
perl -p -i -e 's/<indexphrase key="[^"]*">/<primary>/g' clean.xml
sed -i -e 's/<indexphrase>/<primary>/g' clean.xml
sed -i -e 's/<\/indexphrase>/<\/primary>/g' clean.xml

#convert <emph>s
sed -i -e 's/emph>/emphasis>/g' clean.xml

#convert itemizedlists and listitems, dependent on successful exec of "fix up screens" perl routines
sed -i -e 's/<itemize xml:id="[Ii,0-9]*">/<itemizedlist>/g' clean.xml
sed -i -e 's/<item xml:id="[Ii.0-9]*">/<listitem>/g' clean.xml
sed -i -e 's/<\/itemize>/<\/itemizedlist>/g' clean.xml
sed -i -e 's/<\/item>/<\/listitem>/g' clean.xml

#convert orderedlists and their respective listitems
perl -p -i -e 's|<enumerate xml:id="[^"]*">|<orderedlist>|g' clean.xml
perl -p -i -e 's|</enumerate>|</orderedlist>|g' clean.xml
perl -p -i -e 's|<item refnum="[^"]*" xml:id="[^"]*">|<listitem>|g' clean.xml

#TRICKY: this perl expression takes all occurences of 
# <ref class="url" href="http://sourceware.org/systemtap/wiki/HomePage"><text
# font="typewriter">http://sourceware.org/systemtap/wiki/HomePage</text></ref>
# and replaces the <text font=...</ref> string with "/>". from jfearn
# note: [^"]* means "any number of occurences of characters that are NOT quotes 
# note: () groups strings/an expression together, which can be called later as $1 when replacing
perl -p -i -e 's|(<ref class="url" href="[^"]*")><text font="typewriter">[^<]*</text></ref>|$1/>|g' clean.xml

#now, convert <ref class="url" to <ulink>s
sed -i -e 's/<ref class="url" href=/<ulink url=/g' clean.xml

#TRICKY again: convert <text font=[var]> accordingly; bold is <computeroutput>, typewriter is <command>
perl -p -i -e 's|(<text font="bold">[^<]*)</text>|$1</computeroutput>|g' clean.xml
sed -i -e 's/<text font="bold">/<computeroutput>/g' clean.xml
perl -p -i -e 's|(<text font="typewriter">[^<]*)</text>|$1</command>|g' clean.xml
sed -i -e 's/<text font="typewriter">/<command>/g' clean.xml

#weird remainders, defaulting them to command
perl -p -i -e 's|(<text font="typewriter bold">[^<]*)</text>|$1</command>|g' clean.xml
sed -i -e 's/<text font="typewriter bold">/<command>/g' clean.xml
perl -p -i -e 's|(<text font="smallcaps">[^<]*)</text>|$1</emphasis>|g' clean.xml
sed -i -e 's/<text font="smallcaps">/<emphasis>/g' clean.xml

#TABLES!
#the first expression is quite dirty, since it assumes that all tables have 3 columns. dunno yet how to 
#automagicize this, since the orig XML doesn't have any attribute that specifies columns per table 
sed -i -e 's/<tabular>/<tgroup cols="3">/g' clean.xml
sed -i -e 's/tabular>/tgroup>/g' clean.xml
perl -p -i -e 's|<table placement="[^"]*" refnum="[^"]*" xml:id="([^"]*">)|<table id="$1|g' clean.xml
sed -i -e 's/caption>/title>/g' clean.xml
sed -i -e 's/tr>/row>/g' clean.xml
perl -p -i -e 's|<td[^>]*>||g' clean.xml
sed -i -e 's/<text>/<entry>/g' clean.xml

#this is needed because some indexterms have been nested inside commands *sigh*
perl -p -i -e 's|(<command>[^<]*<indexterm><primary>[^<]*</primary></indexterm>)</text>|$1</command>|g' clean.xml
#this is needed because some closer tags for <text> are on new lines; is a dirty hack since we simply
#assume that all of them are </command>
sed -i -e 's/<\/text>/<\/command>/g' clean.xml

#clean up error tags
perl -p -i -e 's|<ERROR [^/]*/ERROR>|<!-- ERROR TAG REMOVED -->|g' clean.xml
#clean up "Math" tags (like, wtf)
perl -p -i -e 's|<Math [^>]*><XMath><XMApp>|<command>|g' clean.xml
perl -p -i -e 's|<XMTok [^>]*>||g' clean.xml
perl -p -i -e 's|</XMTok>||g' clean.xml
perl -p -i -e 's|</XMApp>||g' clean.xml
perl -p -i -e 's|</XMath>||g' clean.xml
perl -p -i -e 's|</Math>|</command>|g' clean.xml

#remove "About this guide" section
#perl -p -i -e 'undef $/;s|<section>\n<title>About this guide</title>||msg' clean.xml

#finalize: copy clean.xml to en-US, then deletes it
cp clean.xml en-US/Language_Reference_Guide.xml

#delete excess files
rm langref.tex
rm clean.xml
rm Language_Reference_Guide.xml