1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
#!/usr/bin/env python
import log_reader
def daily_date_hasher(date):
return ((date.year, date.month, date.day))
def read_log(filename, path_guesser, date_hasher=daily_date_hasher,
max_reads=None):
assert callable(path_guesser)
lr = log_reader.ApacheReader(filename)
ret = {}
keys = []
for num, i in enumerate(lr):
if max_reads and num > max_reads:
return keys, ret
if 'path' in i:
if path_guesser(i['path']):
hash = date_hasher(i['time'])
if hash in ret:
ret[hash] += 1
else:
ret[hash] = 1
keys.append(hash)
return keys, ret
def test_daily_date_hasher():
from datetime import datetime
one = datetime(2007, 2, 25, 3, 10, 50)
two = datetime(2007, 2, 25, 7, 1, 5)
three = datetime(2007, 3, 25, 3, 10, 50)
assert daily_date_hasher(one) == daily_date_hasher(two)
assert daily_date_hasher(one) != daily_date_hasher(three)
def test_read_log():
def path_guesser(path):
return path.startswith("/viewvc")
keys, ret = read_log("code.log", path_guesser)
assert sum(ret.values()) == 51
assert sorted(keys) == keys
assert ret[(2007, 2, 25)] == 51
if __name__ == "__main__":
import sys
import re
if len(sys.argv) == 3:
max_reads = int(sys.argv[2])
elif len(sys.argv) != 2:
print "Usage ./parse_logs.py <logfile> [max reads]"
sys.exit(1)
else:
max_reads = None
def path_guesser(path):
return "pypy" in path
keys, ret = read_log(sys.argv[1], path_guesser, max_reads=max_reads)
for key in keys:
year, month, day = key
print "%d-%d-%d, %d" % (year, month, day, ret[key])
|