File: epub2markdown.py

package info (click to toggle)
python-ebooklib 0.19-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 648 kB
  • sloc: python: 1,800; makefile: 132; sh: 52
file content (38 lines) | stat: -rwxr-xr-x 1,255 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
import os.path
import subprocess
import sys

from ebooklib import epub

# This is just a basic example which can easily break in real world.

if __name__ == '__main__':
    # read epub
    book = epub.read_epub(sys.argv[1])

    # get base filename from the epub
    base_name = os.path.basename(os.path.splitext(sys.argv[1])[0])

    for item in book.items:
        # convert into markdown if this is html
        if isinstance(item, epub.EpubHtml):
            proc = subprocess.Popen(['pandoc', '-f', 'html', '-t', 'markdown', '-'],
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE)
            content, error = proc.communicate(item.content)
            file_name = os.path.splitext(item.file_name)[0] + '.md'
        else:
            file_name = item.file_name
            content = item.content

        # create needed directories 
        dir_name = '{0}/{1}'.format(base_name, os.path.dirname(file_name))
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        print('>> {0}'.format(file_name))

        # write content to file
        with open('{0}/{1}'.format(base_name, file_name), 'w') as f:
            f.write(content)