File: fix_html.py

package info (click to toggle)
puddletag 2.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,888 kB
  • sloc: python: 24,938; javascript: 21,828; xml: 964; makefile: 129; sh: 85
file content (31 lines) | stat: -rw-r--r-- 786 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import re
import sys


def remove_h1(text):
    regexp = r"<h1.*?>[a-zA-Z \n]*</h1>"
    return re.sub(regexp, '', text)


def clean_files(dirpath):
    filenames = set(['index.html', 'screenshots.html', 'about.html'])
    for filename in filenames:
        path = os.path.join(dirpath, filename)
        if not path.endswith('.html'):
            continue
        try:
            with open(path, 'r+') as fo:
                print("Fixing: " + path)
                text = fo.read()
                text = remove_h1(text)
                fo.seek(0)
                fo.write(text)
                fo.truncate()
        except (IOError, OSError):
            print("Could not edit: " + path)


if __name__ == "__main__":
    input_dir = sys.argv[1]
    clean_files(input_dir)