File: rfc.py

package info (click to toggle)
python-irc 8.5.3%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 436 kB
  • sloc: python: 2,402; makefile: 6
file content (27 lines) | stat: -rw-r--r-- 651 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import re

def get_pages(filename):
	with open(filename) as f:
		data = f.read()
	return data.split('\x0c')

header_pattern = re.compile(r'^RFC \d+\s+.*\s+(\w+ \d{4})$', re.M)
footer_pattern = re.compile(r'^\w+\s+\w+\s+\[Page \d+\]$', re.M)

def remove_header(page):
	page = header_pattern.sub('', page)
	return page.lstrip('\n')

def remove_footer(page):
	page = footer_pattern.sub('', page)
	return page.rstrip() + '\n\n'

def clean_pages():
	return map(remove_header, map(remove_footer, get_pages('rfc2812.txt')))

def save_clean():
	with open('rfc2812-clean.txt', 'w') as f:
		map(f.write, clean_pages())

if __name__ == '__main__':
	save_clean()