1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
|
#' The EPUB e-book format
#'
#' Convert a book to the EPUB format, which is is an e-book format supported by
#' many readers, such as Amazon Kindle Fire and iBooks on Apple devices.
#' @inheritParams html_document2
#' @param fig_width,fig_height,dev,fig_caption Figure options (width, height,
#' the graphical device, and whether to render figure captions).
#' @param number_sections Whether to number sections.
#' @param toc,toc_depth Whether to generate a table of contents, and its depth.
#' @param stylesheet A character vector of paths to CSS stylesheets to be
#' applied to the eBook.
#' @param cover_image The path to a cover image.
#' @param metadata The path to the EPUB metadata file.
#' @param chapter_level The level by which the e-book is split into separate
#' \dQuote{chapter} files.
#' @param epub_version Whether to use version 3 or 2 of EPUB. This correspond to
#' [Pandoc's supported output
#' format](https://pandoc.org/MANUAL.html#option--to). `"epub"` is an alias
#' for `"epub3"` since Pandoc 2.0 and `"epub2"` for earlier version.
#' @param md_extensions A character string of Pandoc Markdown extensions.
#' @param pandoc_args A vector of additional Pandoc arguments.
#' @param template Pandoc template to use for rendering. Pass `"default"`
#' to use Pandoc's built-in template; pass a path to use a custom template.
#' The default pandoc template should be sufficient for most use cases. In
#' case you want to develop a custom template, we highly recommend to start
#' from the default EPUB templates at
#' <https://github.com/jgm/pandoc-templates/>.
#' @note Figure/table numbers cannot be generated if sections are not numbered
#' (`number_sections = FALSE`).
#' @md
#' @export
epub_book = function(
fig_width = 5, fig_height = 4, dev = 'png', fig_caption = TRUE,
number_sections = TRUE, toc = FALSE, toc_depth = 3, stylesheet = NULL,
cover_image = NULL, metadata = NULL, chapter_level = 1,
epub_version = c('epub3', 'epub', 'epub2'), md_extensions = NULL,
global_numbering = !number_sections, pandoc_args = NULL,
template = 'default'
) {
epub_version = match.arg(epub_version)
args = c(
pandoc_args,
if (number_sections) '--number-sections',
if (toc) '--toc',
if (!missing(toc_depth)) c('--toc-depth', toc_depth),
if (!is.null(cover_image)) c('--epub-cover-image', cover_image),
if (!is.null(metadata)) c('--epub-metadata', metadata),
if (!identical(template, 'default')) c('--template', template),
if (rmarkdown::pandoc_available('2.19') && epub_version == 'epub3') c('--mathml'),
if (!missing(chapter_level))
c(if (rmarkdown::pandoc_available('3.0')) '--split-level' else '--epub-chapter-level', chapter_level)
)
if (is.null(stylesheet)) css = NULL else {
css = rmarkdown::pandoc_path_arg(epub_css(stylesheet))
args = c(args, if (pandoc2.0()) '--css' else '--epub-stylesheet', css)
}
from = rmarkdown::from_rmarkdown(fig_caption, md_extensions)
config = rmarkdown::output_format(
knitr = rmarkdown::knitr_options_html(fig_width, fig_height, NULL, FALSE, dev),
pandoc = rmarkdown::pandoc_options(epub_version, from, args, ext = '.epub'),
pre_processor = function(metadata, input_file, runtime, knit_meta, files_dir, output_dir) {
process_markdown(input_file, from, args, global_numbering)
NULL
},
post_processor = function(metadata, input, output, clean, verbose) {
if (length(css)) file.remove(css)
move_output(output)
}
)
config = common_format_config(config, 'epub')
config
}
move_output = function(output) {
if (is.null(opts$get('output_dir'))) return(output)
output2 = output_path(output)
file_rename(output, output2)
output2
}
process_markdown = function(
input_file, from, pandoc_args, global, to_md = output_md(),
content = read_utf8(input_file), output = input_file
) {
intermediate_html = with_ext(input_file, 'tmp.html')
on.exit(file.remove(intermediate_html), add = TRUE)
rmarkdown::pandoc_convert(
input_file, 'html', from, intermediate_html, TRUE,
c(pandoc_args2(pandoc_args), '--section-divs', '--mathjax', '--number-sections')
)
x = read_utf8(intermediate_html)
x = clean_html_tags(x)
figs = parse_fig_labels(x, global)
# resolve cross-references and update the Markdown input file
i = xfun::prose_index(content)
content[i] = resolve_refs_md(content[i], c(figs$ref_table, parse_section_labels(x)), to_md)
if (to_md) content = gsub(
'^\\\\BeginKnitrBlock\\{[^}]+\\}|\\\\EndKnitrBlock\\{[^}]+\\}$', '', content
)
content = resolve_ref_links_epub(
content, parse_ref_links(x, '^<p>%s (.+)</p>$'), to_md
)
if (!to_md) {
i = xfun::prose_index(content)
s = content[i]
s = restore_part_epub(s)
s = restore_appendix_epub(s)
s = protect_math_env(s)
content[i] = s
}
if (is.null(output)) content else write_utf8(content, output)
}
resolve_refs_md = function(content, ref_table, to_md = output_md()) {
ids = names(ref_table)
# replace (\#fig:label) with Figure x.x:
for (i in grep('^(<p class="caption|<caption>|Table:|\\\\BeginKnitrBlock)|(!\\[.*?\\]\\(.+?\\))', content)) {
for (j in ids) {
m = sprintf('\\(\\\\#%s\\)', j)
if (grepl(m, content[i])) {
type = gsub('^([^:]+).*$', '\\1', j)
sep = if (type %in% theorem_abbr) '' else ':'
id = if (type %in% c(theorem_abbr, 'fig', 'tab')) {
sprintf('<span id="%s"></span>', j)
} else ''
label = label_prefix(type, sep = sep)(ref_table[j])
content[i] = sub(m, paste0(id, label, ' '), content[i])
break
}
}
}
# remove labels in figure alt text (it will contain \ like (\#fig:label))
content = gsub('"\\(\\\\#(fig:[-[:alnum:]]+)\\)', '"', content)
# replace (\#eq:label) with equation numbers
content = add_eq_numbers(content, ids, ref_table, to_md)
# look for \@ref(label) and resolve to actual figure/table/section numbers
m = gregexpr('(?<!`)\\\\@ref\\(([-:[:alnum:]]+)\\)', content, perl = TRUE)
refs = regmatches(content, m)
regmatches(content, m) = lapply(refs, ref_to_number, ref_table, TRUE)
content
}
# change labels (\#eq:label) in math environments into actual numbers in \tag{}
add_eq_numbers = function(x, ids, ref_table, to_md = output_md()) {
ids = grep('^eq:', ids, value = TRUE)
if (length(ids) == 0) return(x)
ref_table = ref_table[ids]
env = paste(math_envs, collapse = '|')
# no white spaces allowed after \begin|end{env}, and I added spaces for those
# env in verbatim chunks so so they won't be recognized and I can display
i1 = grep(sprintf('^\\\\begin\\{(%s)\\}$', env), x)
i2 = grep(sprintf('^\\\\end\\{(%s)\\}$', env), x)
if (length(i1) * length(i2) == 0) return(x)
i3 = unlist(mapply(seq, i1, next_nearest(i1, i2), SIMPLIFY = FALSE))
i3 = i3[grep('\\(\\\\(#eq:[-/[:alnum:]]+)\\)', x[i3])]
for (i in i3) {
for (j in ids) {
m = sprintf('\\(\\\\#%s\\)', j)
if (grepl(m, x[i])) {
# it is weird that \tag{} does not work in iBooks, so I have to cheat by
# using \qquad then the (equation number); however, when the output
# format is Markdown instead of EPUB, I'll still use \tag{}
x[i] = sub(m, sprintf(
if (to_md) '\\\\tag{%s}' else '\\\\qquad(%s)', ref_table[j]
), x[i])
break
}
}
}
x
}
# replace text references (ref:label); note refs is the parsed text references
# from the HTML output of Markdown, i.e. Markdown has been translated to HTML
resolve_ref_links_epub = function(x, refs, to_md = output_md()) {
res = parse_ref_links(x, '^%s (.+[^ ])$')
if (is.null(res)) return(x)
if (to_md && length(refs$tags)) {
i = match(res$tags, refs$tags)
res$txts[!is.na(i)] = na.omit(refs$txts[i])
}
restore_ref_links(res$content, '(?<!`)%s', res$tags, res$txts, TRUE)
}
reg_part = '^# \\(PART(\\\\\\*)?\\) .+ \\{-\\}$'
# simply remove parts in epub
restore_part_epub = function(x) {
x[grep(reg_part, x)] = ''
x
}
reg_app = '^(# )\\(APPENDIX\\) (.+ \\{-\\})$'
# this is not good enough since appendix chapters will continue to be numbered
# after the last chapter instead of being numbered differently like A.1, A.2,
# ..., but probably not too many people care about it in e-books
restore_appendix_epub = function(x) {
i = find_appendix_line(reg_app, x)
if (length(i) == 0) return(x)
x[i] = gsub(reg_app, '\\1\\2', x[i])
x
}
# may add more LaTeX environments later
math_envs = c('equation', 'align', 'eqnarray', 'gather')
# wrap math environments in $$, otherwise they are discarded by Pandoc
# https://github.com/jgm/pandoc/issues/2758
protect_math_env = function(x) {
env = c(math_envs, paste0(math_envs, '*'))
s1 = sprintf('\\begin{%s}', env)
s2 = sprintf('\\end{%s}', env)
for (s in s1) {
i = x == s
x[i] = paste0('$$', x[i])
}
for (s in s2) {
i = x == s
x[i] = paste0(x[i], '$$')
}
x
}
# manually base64 encode images in css: https://github.com/jgm/pandoc/issues/2733
epub_css = function(files, output = tempfile('epub', fileext = '.css')) {
css = unlist(lapply(files, function(css) {
in_dir(dirname(css), base64_css(basename(css)))
}))
write_utf8(css, output)
output
}
#' A wrapper function to convert e-books using Calibre
#'
#' This function calls the command \command{ebook-convert} in Calibre
#' (\url{https://calibre-ebook.com}) to convert e-books.
#' @param input The input filename.
#' @param output The output filename or extension (if only an extension is
#' provided, the output filename will be the input filename with its extension
#' replaced by \code{output}; for example, \code{calibre('foo.epub', 'mobi')}
#' generates \file{foo.mobi}).
#' @param options A character vector of additional options to be passed to
#' \command{ebook-convert}.
#' @export
#' @return The output filename.
calibre = function(input, output, options = '') {
if (!grepl('[.]', output)) output = with_ext(input, output)
if (input == output) stop('input and output filenames are the same')
unlink(output)
system2('ebook-convert', c(shQuote(input), shQuote(output), options))
if (!file.exists(output)) stop('Failed to convert ', input, ' to ', output)
invisible(output)
}
|