File: pytables.py

package info (click to toggle)
python-ulmo 0.8.8%2Bdfsg1-1.1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,064 kB
  • sloc: python: 6,135; makefile: 144; sh: 5
file content (128 lines) | stat: -rw-r--r-- 4,234 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from builtins import range
from past.builtins import basestring
import datetime

import tables

from ulmo.ncdc.gsod import core
from ulmo import util

# default hdf5 file path
HDF5_FILE_PATH = util.get_default_h5file_path()

raise NotImplementedError("ncdc.gsod.pytables is still a work in progress")


class NCDCValue(tables.IsDescription):
    date = tables.StringCol(8)
    flag = tables.StringCol(1)
    value = tables.StringCol(20)
    last_modified = tables.StringCol(26)


def get_data(station_codes, start=None, end=None, parameters=None,
        path=None):
    if isinstance(station_codes, basestring):
        return _get_station_data(station_codes, start, end,
                parameters)
    else:
        return_dict = {}
        for station_code in station_codes:
            return_dict[station_code] = _get_station_data(station_codes,
                    start, end, parameters)


def get_stations(update=True, path=None):
    #XXX: we should have a fast pytables version of stations list
    return core.get_stations(update=update)


def update_data(station_codes=None, start_year=None, end_year=None, path=None):
    if not start_year:
        last_updated = _last_updated()
        if not last_updated:
            start_year = core.NCDC_GSOD_START_DATE.year
        else:
            start_year = last_updated.year
    if not end_year:
        end_year = datetime.datetime.now().year

    all_stations = get_stations()
    if station_codes:
        stations = dict([
                (station_code, all_stations.get(station_code))
                for station_code in station_codes
                if station_code in all_stations
        ])
    else:
        stations = all_stations

    for year in range(start_year, end_year + 1):
        start = datetime.datetime(year, 1, 1)
        end = datetime.datetime(year, 12, 31)
        data = core.get_data(list(stations.keys()), start=start, end=end)
        for station_code, station_data in data.items():
            station = stations.get(station_code)
            if not station_data is None:
                _update_station_data(station, station_data, path)


def _get_station_data(station_code, start=None, end=None, parameters=None):
    pass


def _get_value_table(h5file, station, variable):
    """returns a value table for a given open h5file (writable), station and
    variable. If the value table already exists, it is returned. If it doesn't,
    it will be created.
    """
    gsod_values_path = '/ncdc/gsod/values'
    station_code = core._station_code(station)
    station_path = '/'.join((gsod_values_path, station_code))
    util.get_or_create_group(h5file, station_path, "station %s" % station_code,
            createparents=True)

    value_table_name = variable
    values_path = '/'.join([station_path, value_table_name])

    try:
        value_table = h5file.getNode(values_path)
    except tables.exceptions.NoSuchNodeError:
        value_table = util.get_or_create_table(
            h5file, values_path, NCDCValue,
            "Values for station: %s, variable: %s" % (station_code, variable))
        value_table.cols.date.createCSIndex()
        value_table.attrs.USAF = station['USAF']
        value_table.attrs.WBAN = station['WBAN']
        value_table.attrs.name = station['name']

    return value_table


def _last_updated():
    """returns date of last update"""
    #TODO: implement
    return datetime.datetime.now()


def _update_station_data(station, station_data, path=None):
    if not path:
        path = HDF5_FILE_PATH
    with util.open_h5file(path, mode='a') as h5file:
        #XXX: assumes first dict is representative of all dicts
        variables = list(station_data[0].keys())

        for variable in variables:
            value_table = _get_value_table(h5file, station, variable)
            util.update_or_append_sortable(value_table, station_data, 'date')


if __name__ == '__main__':
    test_path = '/Users/wilsaj/test/pyhis_test.h5'
    stations = get_stations(update=False, path=test_path)
    texas_stations = [
        code
        for code, station in stations.items()
        if station['state'] == 'TX']
    update_data(texas_stations, 2012, 2012, path=test_path)
    import pdb; pdb.set_trace()