File: mob_stat

package info (click to toggle)
mobyle 1.5.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 8,272 kB
  • ctags: 2,745
  • sloc: python: 22,649; sh: 57; makefile: 31; xml: 6; ansic: 5
file content (234 lines) | stat: -rw-r--r-- 7,413 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from collections import namedtuple, defaultdict
import gzip
import os
import sys
MOBYLEHOME = None
if os.environ.has_key('MOBYLEHOME'):
    MOBYLEHOME = os.environ['MOBYLEHOME']
if not MOBYLEHOME:
    sys.exit('MOBYLEHOME must be defined in your environment if you want to send statistics by email')
if (os.path.join(MOBYLEHOME, 'Src')) not in sys.path:
    sys.path.append(os.path.join(MOBYLEHOME, 'Src'))
    
###############
#  Utilities  #
###############

def memoize(func):
    cache ={}
    def wrapper(*args):
        if args in cache:
            return cache[args]
        res = func(*args)
        cache[args] = res
        return res
    return wrapper

def group_by(logs, func):
    chunks = defaultdict(list)
    for log in logs:
        key = func(log)
        chunks[key].append(log)
    return chunks

def count(logs, key):
    counts = {}
    for log in logs:
        k = getattr(log, key)
        counts[k] = counts[k] + 1 if k in counts else 1
    counts = list(counts.items())
    counts.sort(key = lambda x : x[1])
    return counts
###############
#    core     #
###############

log = namedtuple( 'log' , "day_of_week day_of_month month year hour prog email ip portal")

def parse(log_paths):
    logs= []
    for log_path in log_paths:
        
        if log_path.endswith('.gz'):
            log_file = gzip.open(log_path, 'r')
        else:
            log_file = open(log_path, 'r')
        with log_file:
            for line in log_file:
                fields = line.strip().split()
                email = fields[7]
                portal = fields[9] if len(fields) == 10 else 'UNKNOW_PORTAL' 
                l = log(fields[0], 
                        int(fields[1]),
                        fields[2],
                        int(fields[3]),
                        fields[4],
                        fields[5],
                        email,
                        fields[8],
                        portal)
                logs.append(l)
    return  logs

 
def jobs_per_user(logs):
    users = {}
    for log in logs:
        users[log.email] = users[log.email] + 1 if log.email in users else 1
    users = list(users.items())
    users.sort(key = lambda x : x[1])
    return users

def user_dict_2_list(d):
    x_data = []
    y_data = []
    for x, user in enumerate(d, start = 1):
        #print(user)
        x_data.append(x)
        y_data.append(math.log(user[1])) 
    return x_data, y_data

def is_pasteurien(log):
    return log.email.endswith('pasteur.mg')

def progs_sorter(log):
    return log.prog

def day_of_week_sorter(log):
    return log.day_of_week

def month_sorter(log):
    return log.month

def year_sorter(log):
    return log.year

def by_user(log):
    return log.email

def by_day(log): 
    return (log.day_of_month, log.month, log.year)


if __name__ == "__main__":
    
    import argparse
    parser = argparse.ArgumentParser(description = """parse mobyle access log 
and generate a report
""")
    parser.add_argument("logs",
                        nargs = '+',
                        help = "the access files log in gz format")
    parser.add_argument("-o", "--output",
                        dest = "output",
                        action = "store",
                        default = "mob_stat.out",
                        help = "the output file"
                       )
    parser.add_argument("-m", "--email",
                        dest = "email",
                        action = "store_true",
                        help = "send the results by email (by default to the Mobyle maintainers)"
                       )
    parser.add_argument("--to",
                        dest = "to",
                        nargs = '+',
                        action = "store",
                        default = None,
                        help = "replace the dest of the email (the -m option must be set)"
                       ) 
    args = parser.parse_args()
    logs = parse(args.logs)
    

    TOTAL_JOBS = len(logs)
    UNIC_USERS = count(logs, 'email')

    ##################
    #  programs used #
    ##################
    prog_used = group_by(logs, progs_sorter)
    PROG_USED = len(prog_used)
    prog_used = count(logs, 'prog')
    PROG_USED_TOP_10 = prog_used[-10:]

    #####################################
    # how many user use a given program #
    #####################################
    progs = []
    by_progs = group_by(logs, progs_sorter) 
    for prog in by_progs:
        users = count(by_progs[prog], 'email')
        progs.append((prog, len(users)))
    progs.sort(key= lambda x : x[1])
    NB_OF_USER_BY_PROG_TOP10 = progs[-10:]
    
    users = jobs_per_user(logs)
    NB_OF_JOBS_BY_USER_TOP10 = users[-10:]

    ##########
    # Report #
    ##########
    with open(args.output, 'w') as report:
        if TOTAL_JOBS:
            report.write('Total number of jobs = {}\n'.format(TOTAL_JOBS))
            nb_of_days = len(group_by(logs, by_day))
            report.write('nb of jobs / days = {:.0f}\n'.format(TOTAL_JOBS / nb_of_days))
            report.write("number of users = {0}\n".format(len(UNIC_USERS)))
         
            title = '\n{0} programs used (top 10)\n'.format(PROG_USED)
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tprogram : nb jobs\n')
            report.write('\t{}\n'.format('-'*17))
            PROG_USED_TOP_10.reverse()
            for prg in PROG_USED_TOP_10:
                report.write('\t{} : {}\n'.format(*prg))
            
            title = "\nNumber of users by program (top 10)\n"
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tprogram : users\n')
            report.write('\t{}\n'.format('-'*15))
            NB_OF_USER_BY_PROG_TOP10.reverse()
            for prg in NB_OF_USER_BY_PROG_TOP10:        
                report.write( '\t{} : {}\n'.format(*prg)) 
             
            title = "\nNumber of jobs by user (top 10)\n"
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tuser : jobs\n')
            NB_OF_JOBS_BY_USER_TOP10.reverse()
            report.write('\t{}\n'.format('-'*11))    
            for usr in NB_OF_JOBS_BY_USER_TOP10:        
                report.write( '\t{} : {}\n'.format(*usr))
        else:
            report.write('No jobs')
            
    ##########################################
    # email the report to mobyle maintainers #
    ##########################################
    if args.email:
        from Mobyle.ConfigManager import Config
        config = Config()
        from Mobyle.Net import EmailAddress , Email
        
        if args.to:
            email_addr = EmailAddress(args.to)
        else:
            email_addr = EmailAddress(config.maintainer())
            
        email_checked = email_addr.check()
        if not email_checked:
            msg = email_addr.getMessage()
            print >> sys.stderr , msg
            sys.exit(2)       
        
        mail =  Email( email_addr )    
        mail.send('STAT', {'SENDER'     : config.sender() ,
                           'HELP'       : config.mailHelp() ,
                           'SERVER_NAME': config.portal_url()}, 
                  files = [args.output])