File: epub2markdown.py

package info (click to toggle)
python-ebooklib 0.20-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 708 kB
  • sloc: python: 2,541; makefile: 132; sh: 53
file content (38 lines) | stat: -rwxr-xr-x 1,252 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
import os.path
import subprocess
import sys

from ebooklib import epub

# This is just a basic example which can easily break in real world.

if __name__ == "__main__":
    # read epub
    book = epub.read_epub(sys.argv[1])

    # get base filename from the epub
    base_name = os.path.basename(os.path.splitext(sys.argv[1])[0])

    for item in book.items:
        # convert into markdown if this is html
        if isinstance(item, epub.EpubHtml):
            proc = subprocess.Popen(
                ["pandoc", "-f", "html", "-t", "markdown", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE
            )
            content, error = proc.communicate(item.content)
            file_name = os.path.splitext(item.file_name)[0] + ".md"
        else:
            file_name = item.file_name
            content = item.content

        # create needed directories
        dir_name = "{}/{}".format(base_name, os.path.dirname(file_name))  # noqa: UP032
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        print(">> {}".format(file_name))  # noqa: UP032

        # write content to file
        with open("{}/{}".format(base_name, file_name), "w") as f:  # noqa: UP032
            f.write(content)