1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
#! /usr/bin/env ruby
#
# Copyright (c) 2001 by Jim Menard <jimm@io.com>
#
# Released under the same license as Ruby. See
# http://www.ruby-lang.org/en/LICENSE.txt.
#
# I was going to use NQXML to parse README.sgml, but I can't because SGML
# is not XML. For example, the DOCTYPE tag's PUBLIC identifier takes one
# argument in SGML but two arguments in XML, and the Docbook <xref> tag
# has no end tag. Therefore, README.sgml is not a legal XML document.
#
FILL_COLUMN = 75
SCREEN_PREFIX = "\t "
PROGRAM_PREFIX = "\t"
titleCounters = Array.new(6, 0)
exampleXrefs = Hash.new()
exampleCounter = 0
def fillParagraph(prefix, txt)
para = Array.new()
txt = txt.gsub(/\s+/m, ' ') # turn whitespace into single spaces and dup
fillColumn = FILL_COLUMN - prefix.length
regex = /^(.{0,#{fillColumn}})[ \n]/m
while txt.length > fillColumn
if txt =~ regex
para << $1
txt = $'
else
para << txt
txt = ''
end
end
para << txt unless txt.empty?
return "\n" + prefix + para.join("\n" + prefix.gsub(/\S/, ' ')) + "\n"
end
# Slurp the whole file into `text'.
text = File.open(ARGV[0], 'r').read()
# Read ENTITY substitutions and replace them in the text. Must do this
# before we delete the entire DOCTYPE tag below.
text.scan(/<!ENTITY\s+(\w+)\s+"([^"]*)">/) { | name, value |
value.gsub!(/<.*?>/, '')
text.gsub!(/&#{name};/, value)
}
# Erase entire sections of the document.
text.gsub!(/\<artheader>.*<\/artheader>/m, '')
text.gsub!(/<!--(.*?)-->/m, '')
text.gsub!(/<!DOCTYPE.*?]>/m, '')
# Special entity substitutions.
text.gsub!(/</, '<')
text.gsub!(/>/, '>')
text.gsub!(/"/, '"')
text.gsub!(/'/, '\'')
text.gsub!(/©/, '(c)')
text.gsub!(/&/, '&')
# Replace ordered list listitems with numbers
text.gsub!(/<orderedlist>(.*?)<\/orderedlist>/m) {
items = $1
i = 0
items.gsub!(/<listitem>\s*<para>/m) { i += 1; " <para>#{i}. " }
items
}
# Miscellaneous tag replacement.
# FIX: replace <listitem> with number inside <orderedlist>.
text.gsub!(/<listitem>/, ' * ')
text.gsub!(/<replaceable>(.*?)<\/replaceable>/, '<\1>')
text.gsub!(/<email>(.*?)<\/email>/, '<\1>')
text.gsub!(/<firstterm>(.*?)<\/firstterm>/, '``\1\'\'')
# Example headings. Do this before generating cross-references.
text.gsub!(/<example(.*?)<title>(.*?)<\/title>/m) { |t|
id, title = $1, $2
exampleCounter += 1
if id =~ /id\s*=\s*"(.*)"/
exampleXrefs[$1] = exampleCounter
end
"Example #{exampleCounter}. #{title}"
}
# Cross-references.
text.gsub!(/<xref\s+linkend\s*=\s*"([^"]+)">/m) { |t|
if exampleXrefs[$1]
"Example #{exampleXrefs[$1]}"
else
"``#{$1}''"
end
}
# URLs.
text.gsub!(/<ulink\s+url="([^"]*)">(.*?)<\/ulink>/m, '\2 (\1)')
# Program and screen listings: put whitespace at the beginning of each line.
text.gsub!(/<(screen|programlisting)>(.*?)<\/\1>/m) { |t|
t.gsub(/\n/m, "\n" + ($1 == 'screen' ? SCREEN_PREFIX : PROGRAM_PREFIX))
}
# Citetitle. Put it in quotes.
text.gsub!(/<citetitle.*?>(.*?)<\/citetitle>/m, '"\1"')
# Delete start and end tags of a bunch of tags.
tags = %w(article classname filename(\s+class="directory")? function
itemizedlist application acronym literal screen programlisting token
markup option command)
text.gsub!(/<\/?(#{tags.join('|')})>/, '')
# Delete only the ends of these tags.
text.gsub!(/<\/(listitem|sect\d+|example)>/, '')
# Warnings.
text.gsub!(/\s*<warning>\s*/, "\n\n======== WARNING ========\n\n ")
text.gsub!(/\s*<\/warning>\s*/, "\n\n======== WARNING ========\n\n")
# Section headings
text.gsub!(/<sect(\d).*?<title>(.*?)<\/title>/m) { |t|
sect, title = $1.to_i, $2
case sect
when 1
titleCounters[1] += 1
titleCounters[2] = titleCounters[3] = titleCounters[4] = 0
title = "#{titleCounters[1]}. #{title.upcase()}"
title += "\n" + ('=' * title.length)
when 2
titleCounters[2] += 1
titleCounters[3] = 0
title = "#{titleCounters[1]}.#{titleCounters[2]}. #{title}"
title += "\n" + ('-' * title.length)
when 3
titleCounters[3] += 1
titleCounters[4] = 0
title = "#{titleCounters[1]}.#{titleCounters[2]}.#{titleCounters[3]}."+
" #{title}"
when 4
titleCounters[4] += 1
title = "#{titleCounters[1]}.#{titleCounters[2]}.#{titleCounters[3]}."+
"#{titleCounters[4]}. #{title}"
end
title + "\n"
}
# Wrap paragraphs.
text.gsub!(/\n([^\n]*?)<para>\s*(.*?)\s*<\/para>/m) { | txt |
fillParagraph($1, $2)
}
text.gsub!(/^\s+$/, '') # get rid of spaces in spaces-only lines
text.gsub!(/\n\n+/m, "\n\n") # multiple newlines
print text
|