File: parsefiles.py

package info (click to toggle)
kvirc 4%3A5.2.10-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 34,136 kB
  • sloc: cpp: 232,431; perl: 2,106; pascal: 1,005; sh: 836; ansic: 244; makefile: 58; python: 54; xml: 19
file content (88 lines) | stat: -rw-r--r-- 2,245 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/python

# An example how to parse the KVIrc documentation with python.
# Do whatever you want with it :D

import re
import os

filelist = []

print "Step 1:"
print "Searching files ",

for root, dirs, files in os.walk("/src/pragma/kvirc-git/KVIrc/"):
	for f in files:
		r = re.compile("(.*)\.(h|cpp|c)$")
		if r.match(f):
			filelist.append(os.path.join(root, f))
			print ".",

outfile = open("index.html","w")

print ""
print "Step 2:"
print "Extracting comments",

# compile regular expressions

regex_comment = re.compile("(\/\*.*?\@doc:[\t ]+(.*?)\n.*?\*\/)", re.M and re.S);
regex_category = re.compile("^[\t ]+\@(type|title|short|syntax|description|doc):(.*)$")
regex_line = re.compile("^[\t ]+(.*)$")

# loop through all files

for filename in filelist:
	# first we go through the file
	comments = {}
	category = ""

	# read file
	data = open(filename).read();

	# get all comments
	comments_found = regex_comment.findall(data)

	for match in comments_found:
		print ".",
		comment = {}
		# we use @doc: to identify and save the comment
		doc_name = match[1]

		# if it already exists append our data
		if comments.has_key(doc_name):
			comment = comments[doc_name]

		# go through the comment line by line
		for m in match[0].splitlines():
			line_result = regex_category.match(m)
			if line_result:
				category = line_result.group(1)
				comment[category] = line_result.group(2)
			else:
				line_result = regex_line.match(m)
				if line_result:
					tmp = line_result.group(1)
					tmp = tmp.replace("<","&lt;")
					tmp = tmp.replace(">","&gt;")
					if category != "":
						if comment.has_key(category):
							comment[category] = comment[category] + tmp
						else:
							comment[category] = tmp

		# add the comment to our comments dict
		comments[doc_name] = comment

	# then we write all parsed comments to our file
	# keeping all stuff in memory created weird result in the dictonaries

	for key in comments:
		outfile.write("<h1>" + key + "</h1>\n")
		for subkey in comments[key]:
			outfile.write("<div style=\"border: 1px dotted green\">\n")
			outfile.write("<b><pre>" + subkey + "</pre></b>\n")
			outfile.write("<pre>" + comments[key][subkey] + "</pre>\n")
			outfile.write("</div><br>\n")

outfile.close()