File: umfile.py

package info (click to toggle)
cf-python 1.3.2%2Bdfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 7,996 kB
  • sloc: python: 51,733; ansic: 2,736; makefile: 78; sh: 2
file content (315 lines) | stat: -rw-r--r-- 11,261 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import os
import numpy

import cInterface
from extraData import ExtraDataUnpacker

class UMFileException(Exception):
    pass

class File(object):

    """
    a class for a UM data file that gives a view of the file including sets of
    PP records combined into variables
    """
    
    def __init__(self, path,
                 byte_ordering = None,
                 word_size = None,
                 format = None,
                 parse = True):
        
        """
        Open and parse a UM file.  The following optional arguments specify
        the file type.  If all three are set, then this forces the file type;
        otherwise, the file type is autodetected and any of them that are set 
        are ignored.

           byte_ordering: 'little_endian' or 'big_endian'
           word_size: 4 or 8
           format:  'FF' or 'PP'

        The default action is to open the file, store the file type from the
        arguments or autodetection as described above, and then parse the
        contents, giving a tree of variables and records under the File
        object.  However, if "parse = False" is set, then an object is
        returned in which the last step is omitted, so only the file type is
        stored, and there are no variables under it.  Such an object can be
        passed when instantiating Rec objects, and contains sufficient info
        about the file type to ensure that the get_data method of those Rec
        objects will work.
        """
        c = cInterface.CInterface()
        self._c_interface = c
        self.path = path
        self.fd = None
        self.open_fd()
        if byte_ordering and word_size and format:
            self.format = format
            self.byte_ordering = byte_ordering
            self.word_size = word_size
        else:
            self._detect_file_type()
        self.path = path
        file_type_obj = c.create_file_type(self.format, self.byte_ordering, self.word_size)
        c.set_word_size(file_type_obj)
        if parse:
            info = c.parse_file(self.fd, file_type_obj)
            self.vars = info["vars"]
            self._add_back_refs()
    
    def open_fd(self):
        """
        (Re)open the low-level file descriptor.
        """
        if self.fd is None:
            self.fd = os.open(self.path, os.O_RDONLY)
        return self.fd

    def close_fd(self):
        """
        Close the low-level file descriptor.
        """
        if self.fd:
            os.close(self.fd)
        self.fd = None

    def _detect_file_type(self):
        c = self._c_interface
	try:
	    file_type_obj = c.detect_file_type(self.fd)
	except:
            self.close_fd()
            raise IOError("File {0} has unsupported format".format(path))
        d = c.file_type_obj_to_dict(file_type_obj)
        self.format = d["format"]
        self.byte_ordering = d["byte_ordering"]
        self.word_size = d["word_size"]

    def _add_back_refs(self):
        """
        Add file attribute to Var objects, and both file and var attributes
        to Rec objects.  The important one is the file attribute in the Rec
        object, as this is used when reading data.  The others are provided
        for extra convenience.
        """
        for var in self.vars:
            var.file = self
            for rec in var.recs:
                rec.var = var
                rec.file = self

class Var(object):
    """
    container for some information about variables
    """
    def __init__(self, recs, nz, nt, supervar_index=None):
        self.recs = recs
        self.nz = nz
        self.nt = nt
        self.supervar_index = supervar_index
    
    def _compare_recs_by_extra_data(self, a, b):
        return cmp(a.get_extra_data(),
                   b.get_extra_data())

    def _compare_recs_by_orig_order(self, a, b):
        return cmp(self.recs.index(a), self.recs.index(b))

    def group_records_by_extra_data(self):
        """
        Returns a list of (sub)lists of records where each records within each
        sublist has matching extra data (if any), so if the whole variable has 
        consistent extra data then the return value will be of length 1.
        Within each group, the ordering of returned records is the same as in 
        self.recs
        """
        compare = self._compare_recs_by_extra_data
        recs = self.recs[:]
        n = len(recs)
        if n == 0:
            # shouldn't have a var without records, but...
            return []
        recs.sort(compare)

        # optimise simple case - if two ends of a sorted list match, 
        # the whole list matches
        if not compare(recs[0], recs[-1]):
            return [self.recs[:]]

        groups = []
        this_grp = []
        for i, rec in enumerate(recs):
            this_grp.append(rec)
            if i == n - 1 or compare(rec, recs[i + 1]):
                this_grp.sort(self._compare_recs_by_orig_order)
                groups.append(this_grp)
                this_grp = []
        return groups

class Rec(object):
    """
    container for some information about records
    """
    def __init__(self, int_hdr, real_hdr, hdr_offset, data_offset, disk_length, 
                 file = None):
        """
        Default instantiation, which stores the supplied headers and offsets.
        The 'file' argument, used to set the 'file' attribute, does not need
        to be supplied, but if it is not then it will have to be set on the
        returned object (to the containing File object) before calling
        get_data() will work.  Normally this would be done by the calling code
        instantiating via File rather than directly.
        """
        self.int_hdr = int_hdr
        self.real_hdr = real_hdr
        self.hdr_offset = hdr_offset
        self.data_offset = data_offset
        self.disk_length = disk_length
        self._extra_data = None
        if file:
            self.file = file

    @classmethod
    def from_file_and_offsets(cls, file, hdr_offset, data_offset, disk_length):
        """
        Instantiate a Rec object from the file object and the 
        header and data offsets.  The headers are read in, and 
        also the record object is ready for calling get_data().
        """
        c = file._c_interface
        int_hdr, real_hdr = c.read_header(file.fd,
                                          hdr_offset,
                                          file.byte_ordering,
                                          file.word_size)
        return cls(int_hdr, real_hdr, hdr_offset, data_offset, disk_length, file=file)

    def read_extra_data(self):
        """
        read the extra data associated with the record
        """
        c = self.file._c_interface
        file = self.file

        extra_data_offset, extra_data_length = \
            c.get_extra_data_offset_and_length(self.int_hdr,
                                               self.data_offset, 
                                               self.disk_length)
        
        raw_extra_data = c.read_extra_data(file.fd,
                                           extra_data_offset,
                                           extra_data_length,
                                           file.byte_ordering, 
                                           file.word_size)

        edu = ExtraDataUnpacker(raw_extra_data, 
                                file.word_size,
                                file.byte_ordering)

        return edu.get_data()

    def get_extra_data(self):
        """
        get extra data associated with the record, either by 
        reading or using cached read
        """
        if self._extra_data == None:
            self._extra_data = self.read_extra_data()
        return self._extra_data

    def get_type_and_num_words(self):
        """
        get the data type (as numpy type) and number of words as 2-tuple
        """
        c = self.file._c_interface
        ntype, num_words = c.get_type_and_num_words(self.int_hdr)
        if ntype == 'integer':
            dtype = numpy.dtype(c.file_data_int_type)
        elif ntype == 'real':
            dtype = numpy.dtype(c.file_data_real_type)
        return dtype, num_words

    def get_data(self):
        """
        get the data array associated with the record
        """
        c = self.file._c_interface
        file = self.file
        data_type, nwords = c.get_type_and_num_words(self.int_hdr)
#        print "data_type = %s nwords = %s" % (data_type, nwords)
        return c.read_record_data(file.fd,
                                  self.data_offset,
                                  self.disk_length,
                                  file.byte_ordering,
                                  file.word_size,
                                  self.int_hdr,
                                  self.real_hdr,
                                  data_type, 
                                  nwords)

            

if __name__ == '__main__':

    import sys
    
    path = sys.argv[1]
    f = File(path)
    print f.format, f.byte_ordering, f.word_size
    print "num variables: %s" % len(f.vars)
    for varno, var in enumerate(f.vars):
        print 
        print "var %s: nz = %s, nt = %s" % (varno, var.nz, var.nt)
        for recno, rec in enumerate(var.recs):
            print "var %s record %s" % (varno, recno)
            print "hdr offset: %s" % rec.hdr_offset
            print "data offset: %s" % rec.data_offset
            print "disk length: %s" % rec.disk_length
            print "int hdr: %s" % rec.int_hdr
            print "real hdr: %s" % rec.real_hdr
            print "data: %s" % rec.get_data()
            print "extra_data: %s" % rec.get_extra_data()
            print "type %s, num words: %s" % rec.get_type_and_num_words()
            # if recno == 1:
            #     rec._extra_data['y'] += .01
            #     print "massaged_extra_data: %s" % rec.get_extra_data()
            print "-----------------------"
        print "all records", var.recs
        print "records grouped by extra data ", var.group_records_by_extra_data()
        print "==============================="
    f.close_fd()
    
    # also read a record using saved metadata
    if f.vars:

        format = f.format
        byte_ordering = f.byte_ordering
        word_size = f.word_size
        myrec = f.vars[0].recs[0]
        hdr_offset = myrec.hdr_offset
        data_offset = myrec.data_offset
        disk_length = myrec.disk_length

        del(f)

        fnew = File(path,
                    format = format,
                    byte_ordering = byte_ordering,
                    word_size = word_size,
                    parse = False)

        rnew = Rec.from_file_and_offsets(fnew, hdr_offset, data_offset, disk_length)
        print "record read using saved file type and offsets:"
        print "int hdr: %s" % rnew.int_hdr
        print "real hdr: %s" % rnew.real_hdr
        print "data: %s" % rnew.get_data()
        print "extra data: %s" % rnew.get_extra_data()

        print "nx = %s" % rnew.int_hdr[18]
        print "ny = %s" % rnew.int_hdr[17]
        rdata = open("recdata0.txt", "w")
        for value in rnew.get_data():
            rdata.write("%s\n" % value)
        rdata.close()