1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
|
require 'fileutils'
require 'rexml/parsers/pullparser'
require 'tmpdir'
module DocBook
class Epub
CHECKER = "epubcheck"
STYLESHEET_PATH = File.join('usr/share/xml/docbook/stylesheet', 'docbook-xsl')
STYLESHEET_NS_PATH = File.join('usr/share/xml/docbook/stylesheet', 'docbook-xsl-ns')
if File.directory?(File.expand_path(File.join(File::Separator, STYLESHEET_PATH)))
STYLESHEET = File.expand_path(File.join(File::Separator, STYLESHEET_PATH, 'epub', "docbook.xsl"))
elsif File.directory?(File.expand_path(File.join(File::Separator, STYLESHEET_NS_PATH)))
STYLESHEET = File.expand_path(File.join(File::Separator, STYLESHEET_NS_PATH, 'epub', "docbook.xsl"))
else
fail "Stylesheet path not found. Use the -s switch."
end
fail "Stylesheet #{STYLESHEET} not found. Use the -s switch." unless File.exist?(STYLESHEET)
CALLOUT_PATH = File.join('images', 'callouts')
CALLOUT_FULL_PATH = File.expand_path(File.join(File::Separator, STYLESHEET_PATH, CALLOUT_PATH))
CALLOUT_LIMIT = 15
CALLOUT_EXT = ".png"
XSLT_PROCESSOR = "xsltproc"
OUTPUT_DIR = File.expand_path(File.join(Dir::tmpdir, ".epubtmp#{Time.now.to_f.to_s}"))
MIMETYPE = "application/epub+zip"
META_DIR = "META-INF"
OEBPS_DIR = "OEBPS"
ZIPPER = "zip"
attr_reader :output_dir
def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
@docbook_file = docbook_file
@output_dir = output_dir
@meta_dir = File.join(@output_dir, META_DIR)
@oebps_dir = File.join(@output_dir, OEBPS_DIR)
@css_file = css_file ? File.expand_path(css_file) : css_file
@embedded_fonts = embedded_fonts
@to_delete = [OUTPUT_DIR]
if customization_layer
@stylesheet = File.expand_path(customization_layer)
else
@stylesheet = STYLESHEET
end
unless File.exist?(@docbook_file)
raise ArgumentError.new("File #{@docbook_file} does not exist")
end
end
def render_to_file(output_file, verbose=false)
render_to_epub(output_file, verbose)
bundle_epub(output_file, verbose)
cleanup_files(@to_delete)
end
def self.invalid?(file)
# Obnoxiously, we can't just check for a non-zero output...
cmd = %Q(#{CHECKER} "#{file}")
output = `#{cmd} 2>&1`
if $?.to_i == 0
return false
else
STDERR.puts output if $DEBUG
return output
end
end
private
def render_to_epub(output_file, verbose)
@collapsed_docbook_file = collapse_docbook()
chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
base = "--stringparam base.dir #{OEBPS_DIR}/"
unless @embedded_fonts.empty?
embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
end
meta = "--stringparam epub.metainf.dir #{META_DIR}/"
oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
options = [chunk_quietly,
callout_path,
callout_limit,
callout_ext,
base,
font,
meta,
oebps,
html_stylesheet,
].join(" ")
# Double-quote stylesheet & file to help Windows cmd.exe
db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
STDERR.puts db2epub_cmd if $DEBUG
success = system(db2epub_cmd)
raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
@to_delete << Dir["#{@meta_dir}/*"]
@to_delete << Dir["#{@oebps_dir}/*"]
end
def bundle_epub(output_file, verbose)
quiet = verbose ? "" : "-q"
mimetype_filename = write_mimetype()
meta = File.basename(@meta_dir)
oebps = File.basename(@oebps_dir)
images = copy_images()
csses = copy_csses()
fonts = copy_fonts()
callouts = copy_callouts()
# zip -X -r ../book.epub mimetype META-INF OEBPS
# Double-quote stylesheet & file to help Windows cmd.exe
zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
puts zip_cmd if $DEBUG
success = system(zip_cmd)
raise "Could not bundle into .epub file to #{output_file}" unless success
end
# Input must be collapsed because REXML couldn't find figures in files that
# were XIncluded or added by ENTITY
# http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
def collapse_docbook
# Double-quote stylesheet & file to help Windows cmd.exe
collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
'.collapsed.' + File.basename(@docbook_file))
entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
entity_success = system(entity_collapse_command)
raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
xinclude_success = system(xinclude_collapse_command)
raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
@to_delete << collapsed_file
return collapsed_file
end
def copy_callouts
new_callout_images = []
if has_callouts?
calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
Dir.glob(calloutglob).each {|img|
img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
# TODO: What to rescue for these two?
FileUtils.mkdir_p(File.dirname(img_new_filename))
FileUtils.cp(img, img_new_filename)
@to_delete << img_new_filename
new_callout_images << img
}
end
return new_callout_images
end
def copy_fonts
new_fonts = []
@embedded_fonts.each {|font_file|
font_new_filename = File.join(@oebps_dir, File.basename(font_file))
FileUtils.cp(font_file, font_new_filename)
new_fonts << font_file
}
return new_fonts
end
def copy_csses
if @css_file
css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
FileUtils.cp(@css_file, css_new_filename)
end
end
def copy_images
image_references = get_image_refs()
new_images = []
image_references.each {|img|
# TODO: It'd be cooler if we had a filetype lookup rather than just
# extension
if img =~ /\.(svg|png|gif|jpe?g|xml)/i
img_new_filename = File.join(@oebps_dir, img)
img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
# TODO: What to rescue for these two?
FileUtils.mkdir_p(File.dirname(img_new_filename))
puts(img_full + ": " + img_new_filename) if $DEBUG
FileUtils.cp(img_full, img_new_filename)
@to_delete << img_new_filename
new_images << img_full
end
}
return new_images
end
def write_mimetype
mimetype_filename = File.join(@output_dir, "mimetype")
File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
@to_delete << mimetype_filename
return File.basename(mimetype_filename)
end
def cleanup_files(file_list)
file_list.flatten.each {|f|
# Yikes
FileUtils.rm_r(f, :force => true )
}
end
# Returns an Array of all of the (image) @filerefs in a document
def get_image_refs
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
image_refs = []
while parser.has_next?
el = parser.pull
if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
image_refs << el[1]['fileref']
end
end
return image_refs.uniq
end
# Returns true if the document has code callouts
def has_callouts?
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
while parser.has_next?
el = parser.pull
if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
return true
end
end
return false
end
end
end
|