1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
#!/usr/bin/python -tt
"""
Description will eventually go here.
"""
##
# Copyright (C) 2003 by Duke University
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
#
# $Id$
#
# @Author Konstantin Ryabitsev <icon@linux.duke.edu>
# @version $Date$
#
import sys
import re
##
# This is for testing purposes, so you can invoke this from the
# modules directory. See also the testing notes at the end of the
# file.
#
sys.path.insert(0, '../py/')
from epylog import InternalModule, Result
class spamd_mod(InternalModule):
def __init__(self, opts, logger):
InternalModule.__init__(self)
self.logger = logger
rc = re.compile
self.regex_map = {
rc('spamd\[\d+\]: clean message'): self.spamd,
rc('spamd\[\d+\]: identified spam'): self.spamd
}
self.top = int(opts.get('report_top', '10'))
self.thold = int(opts.get('spam_threshold', '5'))
sort_by = opts.get('sort_by', 'most spammed')
if sort_by == 'most spammed': self.sort = 'spammed'
else: self.sort = 'messages'
self.spamd_re = rc('\s\((.*?)/.*for (\S*?):.*in (\S*).*, (\d+) bytes')
self.report_wrap = '<table border="0" width="100%%" rules="cols" cellpadding="2">%s</table>\n'
self.subreport_wrap = '<tr><th colspan="4" align="left"><h3>%s</h3></th></tr>\n'
self.total_title = '<font color="blue">Total stats</font>'
self.users_title = '<font color="blue">Top %d ranking users</font>' % self.top
self.score_rep = '%.1f (%d/%d)'
self.report_line = '<tr%s><td valign="top">%s</td><td valign="top">%s</td><td valign="top">%s</td><td valign="top">%s</td></tr>\n'
self.flip = ' bgcolor="#dddddd"'
##
# Line-matching routines
#
def spamd(self, linemap):
sys, msg, mult = self.get_smm(linemap)
try:
score, user, sec, size = self.spamd_re.search(msg).groups()
except:
logger.put(0, 'Odd spamd line: %s' % msg)
return None
score = float(score)
sec = float(sec)
size = int(size)
return {(user, score, sec, size): mult}
def _mk_score(self, msgs, score, score1, score2):
avg_score = float(score/msgs)
ret = self.score_rep % (avg_score, score1, score2)
return ret
def _mk_time_unit(self, secs):
mins = int(secs/60)
if mins:
hrs = int(mins/60)
if hrs:
days = int(hrs/24)
if days: return (days, 'd')
return (hrs, 'hr')
return (mins, 'min')
return (secs, 'sec')
def finalize(self, rs):
user_rep = ''
users = rs.get_distinct(())
t_msgs = 0
t_score_t = 0
t_thold_lt = 0
t_thold_gt = 0
t_secs = 0
t_size = 0
urs = Result()
for user in users:
submap = rs.get_submap((user,))
msgs = 0
score_t = 0
thold_lt = 0
thold_gt = 0
secs = 0
size = 0
while 1:
try: entry, mult = submap.popitem()
except KeyError: break
msgs += mult
score, sec, bytes = entry
score_t += score * mult
if score < self.thold: thold_lt += mult
else: thold_gt += mult
secs += sec * mult
size += bytes * mult
if msgs == 0: continue
t_msgs += msgs
t_score_t += score_t
t_thold_lt += thold_lt
t_thold_gt += thold_gt
t_secs += secs
t_size += size
if self.sort == 'spammed': ctr = float(score/msgs)
else: ctr = msgs
urs.add_result({(user, msgs, score_t, thold_lt, thold_gt,
secs, size): ctr})
report = ''
report += self.subreport_wrap % self.total_title
score = self._mk_score(t_msgs, t_score_t, t_thold_lt, t_thold_gt)
time = '%d %s' % self._mk_time_unit(t_secs)
size = '%d %s' % self.mk_size_unit(t_size)
user = '%d users/%d msgs' % (len(users), t_msgs)
report += self.report_line % (self.flip, user, size, time, score)
report += self.subreport_wrap % self.users_title
flipper = ''
for avg, entry in urs.get_top(self.top):
if flipper: flipper = ''
else: flipper = self.flip
user, msgs, score_t, thold_lt, thold_gt, secs, size = entry
score = self._mk_score(msgs, score_t, thold_lt, thold_gt)
time = '%d %s' % self._mk_time_unit(secs)
size = '%d %s' % self.mk_size_unit(size)
report += self.report_line % (flipper, user, size, time, score)
report = self.report_wrap % report
return report
##
# This is useful when testing your module out.
# Invoke without command-line parameters to learn about the proper
# invocation.
#
if __name__ == '__main__':
from epylog.helpers import ModuleTest
ModuleTest(spamd_mod, sys.argv)
|