File: _fortran_format_parser.py

package info (click to toggle)
python-scipy 0.10.1%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 42,232 kB
  • sloc: cpp: 224,773; ansic: 103,496; python: 85,210; fortran: 79,130; makefile: 272; sh: 43
file content (312 lines) | stat: -rw-r--r-- 8,991 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
"""
Preliminary module to handle fortran formats for IO. Does not use this outside
scipy.sparse io for now, until the API is deemed reasonable.

The *Format classes handle conversion between fortran and python format, and
FortranFormatParser can create *Format instances from raw fortran format
strings (e.g. '(3I4)', '(10I3)', etc...)
"""

import re
import warnings

import numpy as np


__all__ = ["BadFortranFormat", "FortranFormatParser", "IntFormat", "ExpFormat"]


TOKENS = {
    "LPAR": r"\(",
    "RPAR": r"\)",
    "INT_ID": r"I",
    "EXP_ID": r"E",
    "INT": r"\d+",
    "DOT": r"\.",
}


class BadFortranFormat(SyntaxError):
    pass


def number_digits(n):
    return int(np.floor(np.log10(np.abs(n))) + 1)


class IntFormat(object):
    @classmethod
    def from_number(cls, n, min=None):
        """Given an integer, returns a "reasonable" IntFormat instance to represent
        any number between 0 and n if n > 0, -n and n if n < 0

        Parameters
        ----------
        n: int
            max number one wants to be able to represent
        min: int
            minimum number of characters to use for the format

        Returns
        -------
        res: IntFormat
            IntFormat instance with reasonable (see Note) computed width

        Note
        ----
        Reasonable should be understood as the minimal string length necessary
        without losing precision. For example, IntFormat.from_number(1) will
        return an IntFormat instance of width 2, so that any 0 and 1 may be
        represented as 1-character strings without loss of information."""
        width = number_digits(n) + 1
        if n < 0:
            width += 1
        repeat = 80 // width
        return cls(width, min, repeat=repeat)

    def __init__(self, width, min=None, repeat=None):
        self.width = width
        self.repeat = repeat
        self.min = min

    def __repr__(self):
        r = "IntFormat("
        if self.repeat:
            r += "%d" % self.repeat
        r += "I%d" % self.width
        if self.min:
            r += ".%d" % self.min
        return r + ")"

    @property
    def fortran_format(self):
        r = "("
        if self.repeat:
            r += "%d" % self.repeat
        r += "I%d" % self.width
        if self.min:
            r += ".%d" % self.min
        return r + ")"

    @property
    def python_format(self):
        return "%" + str(self.width) + "d"


class ExpFormat(object):
    @classmethod
    def from_number(cls, n, min=None):
        """Given a float number, returns a "reasonable" ExpFormat instance to
        represent any number between -n and n.

        Parameters
        ----------
        n: float
            max number one wants to be able to represent
        min: int
            minimum number of characters to use for the format

        Returns
        -------
        res: ExpFormat
            ExpFormat instance with reasonable (see Note) computed width

        Note
        ----
        Reasonable should be understood as the minimal string length necessary
        to avoid losing precision."""
        # len of one number in exp format: sign + 1|0 + "." +
        # number of digit for fractional part + 'E' + sign of exponent +
        # len of exponent
        finfo = np.finfo(n.dtype)
        # Number of digits for fractional part
        n_prec = finfo.precision + 1
        # Number of digits for exponential part
        n_exp = number_digits(np.max(np.abs([finfo.maxexp, finfo.minexp])))
        width = 1 + 1 + n_prec + 1 + n_exp + 1
        if n < 0:
            width += 1
        repeat = int(np.floor(80 / width))
        return cls(width, n_prec, min, repeat=repeat)

    def __init__(self, width, significand, min=None, repeat=None):
        """\
        Parameters
        ----------
        width: int
            number of characters taken by the string (includes space).
        """
        self.width = width
        self.significand = significand
        self.repeat = repeat
        self.min = min

    def __repr__(self):
        r = "ExpFormat("
        if self.repeat:
            r += "%d" % self.repeat
        r += "E%d.%d" % (self.width, self.significand)
        if self.min:
            r += "E%d" % self.min
        return r + ")"

    @property
    def fortran_format(self):
        r = "("
        if self.repeat:
            r += "%d" % self.repeat
        r += "E%d.%d" % (self.width, self.significand)
        if self.min:
            r += "E%d" % self.min
        return r + ")"

    @property
    def python_format(self):
        return "%" + str(self.width-1) + "." + str(self.significand) + "E"


class Token(object):
    def __init__(self, type, value, pos):
        self.type = type
        self.value = value
        self.pos = pos

    def __str__(self):
        return """Token('%s', "%s")""" % (self.type, self.value)

    def __repr__(self):
        return self.__str__()


class Tokenizer(object):
    def __init__(self):
        self.tokens = TOKENS.keys()
        self.res = [re.compile(TOKENS[i]) for i in self.tokens]

    def input(self, s):
        self.data = s
        self.curpos = 0
        self.len = len(s)

    def next_token(self):
        curpos = self.curpos
        tokens = self.tokens

        while curpos < self.len:
            for i, r in enumerate(self.res):
                m = r.match(self.data, curpos)
                if m is None:
                    continue
                else:
                    self.curpos = m.end()
                    return Token(self.tokens[i], m.group(), self.curpos)
            else:
                raise SyntaxError("Unknown character at position %d (%s)" \
                                  % (self.curpos, self.data[curpos]))


# Grammar for fortran format:
# format            : LPAR format_string RPAR
# format_string     : repeated | simple
# repeated          : repeat simple
# simple            : int_fmt | exp_fmt
# int_fmt           : INT_ID width
# exp_fmt           : simple_exp_fmt
# simple_exp_fmt    : EXP_ID width DOT significand
# extended_exp_fmt  : EXP_ID width DOT significand EXP_ID ndigits
# repeat            : INT
# width             : INT
# significand       : INT
# ndigits           : INT

# Naive fortran formatter - parser is hand-made
class FortranFormatParser(object):
    """Parser for fortran format strings. The parse method returns a *Format
    instance.

    Note
    ----
    Only ExpFormat (exponential format for floating values) and IntFormat
    (integer format) for now.
    """
    def __init__(self):
        self.tokenizer = Tokenizer()

    def parse(self, s):
        self.tokenizer.input(s)

        tokens = []

        try:
            while True:
                t = self.tokenizer.next_token()
                if t is None:
                    break
                else:
                    tokens.append(t)
            return self._parse_format(tokens)
        except SyntaxError, e:
            raise BadFortranFormat(str(e))

    def _get_min(self, tokens):
        next = tokens.pop(0)
        if not next.type == "DOT":
            raise SyntaxError()
        next = tokens.pop(0)
        return next.value

    def _expect(self, token, tp):
        if not token.type == tp:
            raise SyntaxError()

    def _parse_format(self, tokens):
        if not tokens[0].type == "LPAR":
            raise SyntaxError("Expected left parenthesis at position "\
                              "%d (got '%s')" % (0, tokens[0].value))
        elif not tokens[-1].type == "RPAR":
            raise SyntaxError("Expected right parenthesis at position "\
                              "%d (got '%s')" % (len(tokens), tokens[-1].value))

        tokens = tokens[1:-1]
        types = [t.type for t in tokens]
        if types[0] == "INT":
            repeat = int(tokens.pop(0).value)
        else:
            repeat = None

        next = tokens.pop(0)
        if next.type == "INT_ID":
            next = self._next(tokens, "INT")
            width = int(next.value)
            if tokens:
                min = int(self._get_min(tokens))
            else:
                min = None
            return IntFormat(width, min, repeat)
        elif next.type == "EXP_ID":
            next = self._next(tokens, "INT")
            width = int(next.value)

            next = self._next(tokens, "DOT")

            next = self._next(tokens, "INT")
            significand = int(next.value)

            if tokens:
                next = self._next(tokens, "EXP_ID")

                next = self._next(tokens, "INT")
                min = int(next.value)
            else:
                min = None
            return ExpFormat(width, significand, min, repeat)
        else:
            raise SyntaxError("Invalid formater type %s" % next.value)

    def _next(self, tokens, tp):
        if not len(tokens) > 0:
            raise SyntaxError()
        next = tokens.pop(0)
        self._expect(next, tp)
        return next