File: reportlib.py

package info (click to toggle)
python-scipy 0.6.0-12
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 32,016 kB
  • ctags: 46,675
  • sloc: cpp: 124,854; ansic: 110,614; python: 108,664; fortran: 76,260; objc: 424; makefile: 384; sh: 10
file content (538 lines) | stat: -rw-r--r-- 21,439 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
"""
Reporting functions

:author: Pierre GF Gerard-Marchant & Matt Knox
:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
:version: $Id: reportlib.py 2829 2007-03-06 21:09:33Z mattknox_ca $

Ideas borrowed from:

- George Sakkis
    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662

- Mike Brown
    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
    
:Examples:

    import numpy as np
    import timeseries as ts
    import maskedarray as ma
    from timeseries import Report, wrap_onspace

    series1 = ts.time_series(np.random.uniform(-100,100,15), start_date=ts.thisday('b')-15)
    series2 = ts.time_series(np.random.uniform(-100,100,13), start_date=ts.thisday('b')-10)
    series3 = ts.time_series(['string1', 'another string', 'yet another string']*3, start_date=ts.thisday('b')-10)
    
    darray = ts.date_array(start_date=ts.thisday('b')-8, end_date=ts.thisday('b')-3)
    
    txt_o = open('myfile.txt', 'w')
    html_o = open('myfile.html', 'w')
    
    # report containing only numerical series, showing 2 decimal places
    num_report = Report(series1, series2, fmtfunc=lambda x:'%.2f' % x)
    
    # report containing some string and numerical data
    mixed_report = Report(series1, series2, series3)

    # output a csv report suitable for excel to sys.stdout, show masked values as "N/A"
    num_report(delim=', ', mask_rep='N/A')

    # format one column one with 2 decimal places, and column two with 4.
    # Add a sum footer. Write the output to txt_o
    num_report(fmtfunc=[(lambda x:'%.2f' % x), (lambda x:'%.4f' % x)],
                 footer_func=ma.sum, footer_label='sum', output=txt_o)

    # create an html table of the data over a specified range.
    # Wrap text in cells to width 10. Output to html_o
    html_o.write("<table>")
    mixed_report(series1, series2, series3, dates=darray,
               delim="</td><td>", prefix="<tr><td>", postfix="</td></tr>",
               wrapfunc=wrap_onspace(10, nls='<BR>'), output=html_o)
    html_o.write("</table>")
    
"""
__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author: mattknox_ca $)"
__version__ = '1.0'
__revision__ = "$Revision: 2829 $"
__date__     = '$Date: 2007-03-06 13:09:33 -0800 (Tue, 06 Mar 2007) $'

import sys
import operator, types, copy
import timeseries as ts
import maskedarray as ma

__all__ = [
    'Report', 'wrap_onspace', 'wrap_onspace_strict',
    'wrap_always']

class fmtfunc_wrapper:
    """wraps a formatting function such that it handles masked values

:IVariables:
    - `fmtfunc` : formatting function.
    - `mask_rep` : string to use for masked values
    """
    def __init__ (self, fmtfunc, mask_rep):
        if fmtfunc is None:
            self.f = str
        else:
            self.f = fmtfunc
        self.mr = mask_rep

    def __call__ (self, item):
        "Execute the call behavior."

        if hasattr(item, "_mask") and isinstance(item._mask, bool) and item._mask:
            return self.mr
        else:
            return self.f(item)


_default_options = {
    'dates':None,
    'header_row':None,
    'header_char':'-',
    'header_justify':None,
    'row_char':None,
    'footer_label':None,
    'footer_char':'-',
    'footer_func':None,
    'delim':' | ',
    'justify':None,
    'prefix':'',
    'postfix':'',
    'mask_rep':'--',
    'datefmt':None,
    'fmtfunc':str,
    'wrapfunc':lambda x:x,
    'col_width':None,
    'nls':'\n',
    'output':sys.stdout,
    'fixed_width':True
}

class Report(object):
    """Create a tabular TimeSeries report with dates in the left column.
All instance variables are optional and simply serve as the defaults when calling
the report. Parameters for calling the report are the exact same as for
initialization. When calling the report, new options specified will not be saved
to the instance.

:IVariables:
        - `*tseries` : time series objects. Must all be at the same frequency, but
          do not need to be aligned.
          
        - `dates` (DateArray, *[None]*) : dates at which values of all the series
          will be output. If not specified, data will be output from the minimum
          start_date to the maximum end_date of all the time series objects
          
        - `header_row` (list, *[None]*) : List of column headers. Specifying
          the header for the date column is optional.
          
        - `header_char` (string, *['-']*): Character to be used for the row separator
          line between the header and first row of data. None for no separator. This
          is ignored if `header_row` is None.
          
        - `header_justify` (List of strings or single string, *[None]*) : Determines
          how headers are justified. If not specified, all headers are left justified.
          If a string is specified, it must be one of 'left', 'right', or 'center'
          and all headers will be justified the same way. If a list is specified, each
          header will be justified according to the specification for that header in
          the list. Specifying the justification for the date column is header is
          optional.
          
        - `row_char` (string, *[None]*): Character to be used for the row separator
          line between each row of data. None for no separator
          
        - `footer_func` (List of functions or single function, *[None]*) : A function or
          list of functions for summarizing each data column in the report. For example,
          ma.sum to get the sum of the column. If a list of functions is provided
          there must be exactly one function for each column. Do not specify a function
          for the Date column.
          
        - `footer_char` (string, *['-']*): Character to be used for the row separator
          line between the last row of data and the footer. None for no separator. This
          is ignored if `footer_func` is None.
          
        - `footer_label` (string, *[None]*) : label for the footer row. This goes at the
          end of the date column. This is ignored if footer_func is None.
          
        - `justify` (List of strings or single string, *[None]*) : Determines how data
          are justified in their column. If not specified, the date column and string
          columns are left justified, and everything else is right justified. If a
          string is specified, it must be one of 'left', 'right', or 'center' and all
          columns will be justified the same way. If a list is specified, each column
          will be justified according to the specification for that column in the list
          Specifying the justification for the date column is optional.
          
        - `prefix` (string, *['']*) : A string prepended to each printed row.
        
        - `postfix` (string, *['']*) : A string appended to each printed row.
        
        - `mask_rep` (string, *['--']*): String used to represent masked values in
          output
          
        - `datefmt` (string, *[None]*) : Formatting string used for displaying the
          dates in the date column. If None, str() is simply called on the dates
          
        - `fmtfunc` (List of functions or single function, *[None]*) : A function or
          list of functions for formatting each data column in the report. If not
          specified, str() is simply called on each item. If a list of functions is
          provided, there must be exactly one function for each column. Do not specify
          a function for the Date column, that is handled by the datefmt argument
          
        - `wrapfunc` (List of functions or single function, *[lambda x:x]*): A function
          f(text) for wrapping text; each element in the column is first wrapped by this
          function. Instances of  wrap_onspace, wrap_onspace_strict, and wrap_always
          (which are part of this module) work well for this. Eg. wrapfunc=wrap_onspace(10)
          If a list is specified, each column will be wrapped according to the
          specification for that column in the list. Specifying a function for the Date
          column is optional
          
        - `col_width` (list of integers or single integer, *[None]*): use this to specify
          a width for all columns (single integer), or each column individually (list
          of integers). The column will be at least as wide as col_width, but may be
          larger if cell contents exceed col_width. If specifying a list, you may
          optionally specify the width for the Date column as the first entry
          
        - `output` (buffer, *[sys.stdout]*): `output` must have a write method.
          
        - `fixed_width` (boolean, *[True]*): If True, columns are fixed width (ie.
          cells will be padded with spaces to ensure all cells in a given column are
          the same width). If False, `col_width` will be ignored and cells will not
          be padded."""

    def __init__(self, *tseries, **kwargs):
        
        self.options = {}
        self.tseries = None
        if len(tseries) > 0:
            self.tseries = tseries
        self.options = self.__make_dict(**kwargs)
        
    def __make_dict(self, **kwargs):
    
        option_dict = copy.copy(self.options)
        
        option_list = list(_default_options)

        for x in [kw for kw in kwargs if kw in option_list]:
            option_dict[x] = kwargs.pop(x)
            
        if len(kwargs) > 0:
            raise KeyError("Unrecognized keyword(s): %s" % (", ".join(kwargs.keys())))
            
        return option_dict
        
    def set_series(self, *tseries):
        """set new time series for the report

:Paramaters:
    - `*tseries` : the TimeSeries objects to be used in the report"""
        self.tseries = tseries

    def set_options(self, **kwargs):
        """set new options or modify options in the report

:Paramaters:
    - `**kwargs` : the options to be used in the report. See the __doc__
      string for the Report class for valid options"""
        self.options = self.__make_dict(**kwargs)

    
    def __call__(self, *tseries, **kwargs):
        """generate a report

:Paramaters:
    - `*tseries` : the TimeSeries objects to be used in the report. If
      omitted, the previously set TimeSeries objects will be used
    - `**kwargs` : the options to be used in the report. See the __doc__
      string for the Report class for valid options. If omitted, the
      previously set options will be used"""

        option_dict = self.__make_dict(**kwargs)
        if len(tseries) == 0:
            tseries = self.tseries
        
        def option(kw):
            return option_dict.get(kw, _default_options[kw])
            
        dates = option('dates')
        header_row = option('header_row')
        header_char = option('header_char')
        header_justify = option('header_justify')
        row_char = option('row_char')
        footer_label = option('footer_label')
        footer_char = option('footer_char')
        footer_func = option('footer_func')
        delim = option('delim')
        justify = option('justify')
        prefix = option('prefix')
        postfix = option('postfix')
        mask_rep = option('mask_rep')
        datefmt = option('datefmt')
        fmtfunc = option('fmtfunc')
        wrapfunc = option('wrapfunc')
        col_width = option('col_width')
        nls=option('nls')
        output=option('output')
        fixed_width=option('fixed_width')
        
        if header_row is not None:
            has_header=True
            if len(header_row) == len(tseries)+1:
                # label for date column included
                rows = [header_row]
            elif len(header_row) == len(tseries):
                # label for date column not included
                rows = [['']+header_row]
            else:
                raise ValueError("mismatch with number of headers and series")
        else:
            has_header=False
            rows=[]

        if fixed_width:
        
            def _standardize_justify(userspec):
                if isinstance(userspec, str):
                    # justify all columns the the same way
                    return [userspec for x in range(len(tseries)+1)]
                elif isinstance(userspec, list):
                    if len(userspec) == len(tseries):
                        # justification for date column not included, so set that
                        # to left by default
                        return ['left'] + userspec
                else:
                    raise ValueError("invalid `justify` specification")
                    
            if justify is not None:
                justify = _standardize_justify(justify)
            else:
                # default column justification
                justify = ['left']
                for ser in tseries:
                    if ser.dtype.char in 'SUO': justify.append('left')
                    else: justify.append('right')
                    
                    
            if header_justify is not None:
                header_justify = _standardize_justify(header_justify)
            else:
                # default column justification
                header_justify = ['left' for x in range(len(tseries)+1)]
        else:
            justify = [None for x in range(len(tseries)+1)]

        if datefmt is None:
            def datefmt_func(date): return str(date)
        else:
            def datefmt_func(date): return date.strfmt(datefmt)

        if dates is None:
            tseries = ts.align_series(*tseries)
            dates = ts.date_array(start_date=tseries[0].start_date,
                                  end_date=tseries[0].end_date)
        else:
            tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries)

        if isinstance(fmtfunc, list):
            fmtfunc = [fmtfunc_wrapper(f, mask_rep) for f in fmtfunc]
        else:
            fmtfunc = [fmtfunc_wrapper(fmtfunc, mask_rep)]*len(tseries)

        def wrapfunc_default(func):
            if func is None: return lambda x:x
            else: return func
            
        if isinstance(wrapfunc, list):
            if len(wrapfunc) == len(tseries):
                wrapfunc = [lambda x: x] + wrapfunc
            wrapfunc = [wrapfunc_default(func) for func in wrapfunc]
        else:
            wrapfunc = [wrapfunc_default(wrapfunc) for x in range(len(tseries)+1)]
    
            
        if isinstance(col_width, list):
            if len(col_width) == len(tseries):
                col_width = [None] + col_width
        else:
            col_width = [col_width for x in range(len(tseries)+1)]

        def getval(series, date):
            try:
                val = series[date]
            except IndexError:
                val = ma.masked
            return val

        for d in dates:
            rows.append([datefmt_func(d)]+[fmtfunc[i](getval(ser, d)) for i, ser in enumerate(tseries)])

        if footer_func is not None:
            has_footer=True
            if not isinstance(footer_func, list):
                footer_func = [footer_func]*len(tseries)

            if footer_label is None: footer_label = ['']
            else: footer_label = [footer_label]

            footer_data = []
            for i, ser in enumerate(tseries):
                if footer_func[i] is None:
                    footer_data.append('')
                else:
                    footer_data.append(fmtfunc[i](footer_func[i](ser[dates])))

            rows.append(footer_label + footer_data)
        else:
            has_footer=False
            
            
        def rowWrapper(row):
            newRows = [wrapfunc[i](item).split('\n') for i, item in enumerate(row)]
            return [[(substr or '') for substr in item] for item in map(None,*newRows)]
        # break each logical row into one or more physical ones
        logicalRows = [rowWrapper(row) for row in rows]
        numLogicalRows = len(logicalRows)
        # columns of physical rows
        columns = map(None,*reduce(operator.add,logicalRows))
        numCols = len(columns)
        colNums = list(range(numCols))

        # get the maximum of each column by the string length of its items
        maxWidths = [max(col_width[i], *[len(str(item)) for item in column])
                        for i, column in enumerate(columns)]

        def getSeparator(char, separate):
            if char is not None and separate:
                return char * (len(prefix) + len(postfix) + sum(maxWidths) + \
                                             len(delim)*(len(maxWidths)-1))
            else:
                return None

        header_separator = getSeparator(header_char, has_header)
        footer_separator = getSeparator(footer_char, has_footer)
        row_separator = getSeparator(row_char, True)

        # select the appropriate justify method
        justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust,
                          'none':(lambda text, width: text)}

        if has_header and has_footer:
            data_start = 1
            data_end = numLogicalRows-3
        elif has_header:
            data_start = 1
            data_end = numLogicalRows-2
        elif has_footer:
            data_start = 0
            data_end = numLogicalRows-3
        else:
            data_start = 0
            data_end = numLogicalRows-2

        for rowNum, physicalRows in enumerate(logicalRows):
            for row in physicalRows:
                if rowNum == 0 and header_separator:
                    _justify = header_justify
                else:
                    _justify = justify
                    
                output.write(prefix \
                           + delim.join([justify_funcs[str(_justify[colNum]).lower()](str(item),width) for (colNum,item,width) in zip(colNums,row,maxWidths)]) \
                           + postfix + nls)

            if row_separator and (data_start <= rowNum <= data_end):
                output.write(row_separator + nls)
            elif header_separator and rowNum < data_start:
                output.write(header_separator + nls)
            elif footer_separator and rowNum == data_end + 1:
                output.write(footer_separator + nls)


class wrap_onspace(object):
    """A callable word-wrap class that preserves existing line breaks
and most spaces in the text.
    
:IVariables:
    - `width` (int): width to wrap at. Won't split up words wider than `width`
    - `nls` (str, *['\n']*): New line separator. Assumes existing line
      breaks use this new line separator as well.
      
:Parameters (for __call__ method):
    - `text` (str): text to wrap"""

    def __init__(self, width, nls='\n'):
        self.width = width
        self.nls = nls
        
    def __call__(self, text):
    
        width = self.width
        nls = self.nls

        def break_or_space(line, word, width):
            temp_idx = (len(line[line.rfind(nls)+1:]) + len(word.split(nls,1)[0]) >= width)
            if temp_idx:
                return nls
            else:
                return ' '

        return reduce(lambda line, word, width=width: '%s%s%s' %
                      (line,
                       break_or_space(line, word, width),
                       word),
                      text.split(' ')
                     )    


import re
class wrap_onspace_strict(object):
    """A callable word-wrap class similar to wrap_onspace, but
enforces the width constraint: words longer than width are split.
    
:IVariables:
    - `width` (int): width to wrap at. Will split up words wider than `width`
    - `nls` (str, *['\n']*): New line separator. Assumes existing line
      breaks use this new line separator as well.
      
:Parameters (for __call__ method):
    - `text` (str): text to wrap"""

    def __init__(self, width, nls='\n'):
        self.width = width
        self.nls = nls
        
    def __call__(self, text):
    
        width = self.width
        nls = self.nls

        wordRegex = re.compile(r'\S{'+str(width)+r',}')
        return wrap_onspace(wordRegex.sub(lambda m: wrap_always(m.group(),width, nls=nls),text),width, nls=nls)


import math
class wrap_always(object):
    """A callable word-wrap class that wraps text on exactly width
characters. It doesn't split the text into words.
    
:IVariables:
    - `width` (int): width to wrap at.
    - `nls` (str, *['\n']*): New line separator.
      
:Parameters (for __call__ method):
    - `text` (str): text to wrap"""

    def __init__(self, width, nls='\n'):
        self.width = width
        self.nls = nls
        
    def __call__(self, text):
    
        width = self.width
        nls = self.nls
        return nls.join([ text[width*i:width*(i+1)] \
                           for i in xrange(int(math.ceil(1.*len(text)/width))) ])