1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
#!/usr/bin/env python
from __future__ import with_statement
import os
import sys
import string
from string import Template
from config import *
from datetime import date
from textile import textile
from stat import *
import datetime
import PyRSS2Gen
rss = PyRSS2Gen.RSS2(
title = options["sitename"],
link = options["siteurl"],
description = options["slogan"],
lastBuildDate = datetime.datetime.now(),
items = [])
def add_rss_item(rss, title, link, description, pubDate):
item = PyRSS2Gen.RSSItem(title = title, link = link,
description = description,
guid = PyRSS2Gen.Guid(link),
pubDate = datetime.datetime.fromtimestamp(pubDate))
rss.items.append(item)
def ext(fname):
return os.path.splitext(fname)[1]
def process(fname):
with open(fname, 'r') as f:
try:
head, body = f.read().split('\n\n')
body
except:
print 'Invalid file format : ', fname
def parse(fname):
with open(fname, 'r') as f:
raw = f.read()
headers = {}
try:
(header_lines,body) = raw.split("\n\n", 1)
for header in header_lines.split("\n"):
(name, value) = header.split(": ", 1)
headers[name.lower()] = unicode(value.strip())
return headers, body
except:
raise TypeError, "Invalid page file format for %s" % fname
def get_template(template):
"""Takes the directory where templates are located and the template name. Returns a blob containing the template."""
template = os.path.join(template_dir, template)
return Template(open(template, 'r').read())
def source_newer(source, target):
if len(sys.argv) > 1 and sys.argv[1] == "force":
return True
if not os.path.exists(target):
return True
else:
smtime = os.stat(source)[ST_MTIME]
tmtime = os.stat(target)[ST_MTIME]
return smtime > tmtime
def is_blog(current_dir, myself, headers, files):
"""A page tagged as an entry will get the files, sort them by their dates,
and then the contents will be that directory listing instead."""
if 'content-type' in headers and headers['content-type'] == "text/blog":
# it's a listing, make it all work
without_self = files[:]
without_self.remove(os.path.split(myself)[-1])
without_self.sort(reverse=True)
listing = []
for f in without_self:
print "Doing blog", f
# load up the file and peel out the first few paragraphs
content = os.path.join(current_dir, f)
head, body = parse(content)
paras = [p for p in body.split("\n\n") if p]
if paras:
# now make a simple listing entry with it
date, ext = os.path.splitext(f)
head["link"] = os.path.join("/" + os.path.split(current_dir)[-1], date + ".html")
head["date"] = date
format = determine_format(head)
pubDate = smtime = os.stat(content)[ST_CTIME]
head["content"] = content_format(current_dir, f, head, files,
format, "\n\n".join(paras[0:1]))
template = head['item-template'] if 'item-template' in head else headers['item-template']
description = get_template(template).safe_substitute(head)
if "feed" not in headers:
add_rss_item(rss, head["title"], options["siteurl"] +
head["link"], description, pubDate)
listing.append(description)
return lambda s: "".join(listing)
else:
return lambda s: s
def content_format(current_dir, inp, headers, files, format, body):
return {
u'text/plain': lambda s: u'<pre>%s</pre>' % s,
u'text/x-textile': lambda s: u'%s' % textile(s,head_offset=0, validate=0,
sanitize=0, encoding='utf-8', output='utf-8'),
u'text/html': lambda s: s,
u'text/blog': is_blog(current_dir, inp, headers, files)
}[format](body)
def determine_format(headers):
if 'content-type' in headers:
return headers['content-type']
else:
return options['format']
def parse_directory(current_dir, files, output_dir):
files = [f for f in files if ext(f) in options['extensions']]
for f in files:
inp = os.path.join(current_dir, f)
target = os.path.join(output_dir, f)
# TODO: Allow specifying the target extension from headers
outp = os.path.splitext(target)[0] + '.html'
# always redo the indexes since they'll typically list information to
# update from the directory they are in
if not source_newer(inp, outp) and f != "index.txt":
continue
headers, body = parse(inp)
if 'template' not in headers:
blob = get_template(template)
else:
blob = get_template(headers['template'])
format = determine_format(headers)
print "Processing %s" % inp
content = content_format(current_dir, inp, headers, files, format, body)
headers['content'] = content
headers.update(options)
output = blob.safe_substitute(**headers)
outf = open(outp, 'w')
outf.write(output)
outf.close()
def a_fucking_cmp_for_time(x,y):
diff = y.pubDate - x.pubDate
return diff.days * 24 * 60 * 60 + diff.seconds
def main():
### Walks through the input dir creating finding all subdirectories.
for root, dirs, files in os.walk(input_dir):
output = root.replace(input_dir, output_dir)
### Checks if the directory exists in output and creates it if false.
if not os.path.isdir(output):
os.makedirs(output)
parse_directory(root, files, output)
x,y = rss.items[0], rss.items[-1]
diff = x.pubDate - y.pubDate
print "diff!", diff.seconds, diff.days
rss.items.sort(cmp=lambda x,y: a_fucking_cmp_for_time(x,y))
rss.write_xml(open("output/feed.xml", "w"))
if __name__ == '__main__':
main()
|