File: epub2markdown.py

package info (click to toggle)
python-ebooklib 0.15~ds0-1
  • links: PTS, VCS
  • area: main
  • in suites: buster, jessie, jessie-kfreebsd, stretch
  • size: 468 kB
  • ctags: 258
  • sloc: python: 1,611; sh: 166; makefile: 137
file content (43 lines) | stat: -rwxr-xr-x 1,286 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python

import sys
import subprocess
import os
import os.path

from ebooklib import epub

# This is just a basic example which can easily break in real world.

if __name__ == '__main__':
    # read epub
    book = epub.read_epub(sys.argv[1])

    # get base filename from the epub
    base_name = os.path.basename(os.path.splitext(sys.argv[1])[0])

    for item in book.items:
        # convert into markdown if this is html
        if isinstance(item, epub.EpubHtml):
            proc = subprocess.Popen(['pandoc', '-f', 'html', '-t', 'markdown', '-'],
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE
                                    )
            content, error = proc.communicate(item.content)
            file_name = os.path.splitext(item.file_name)[0]+'.md'
        else:
            file_name = item.file_name
            content = item.content

        # create needed directories 
        dir_name = '%s/%s' % (base_name, os.path.dirname(file_name))
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        print '>> ', file_name

        # write content to file
        f = open('%s/%s' % (base_name, file_name), 'w')
        f.write(content)
        f.close()