File: seb.py

package info (click to toggle)
ofxstatement-plugins 20181208
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 4,064 kB
  • sloc: python: 7,004; xml: 1,027; makefile: 135; sh: 84
file content (201 lines) | stat: -rw-r--r-- 6,502 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# -*- coding: utf-8 -*-

import re
import itertools
import logging
import locale

from datetime import datetime
from openpyxl import load_workbook

from contextlib import contextmanager
from ofxstatement.parser import StatementParser
from ofxstatement.plugin import Plugin
from ofxstatement.statement import Statement, StatementLine, generate_transaction_id


def take(n, iterable):
    """Return first n items of the iterable as a list."""
    return list(itertools.islice(iterable, n))


@contextmanager
def scoped_setlocale(category, loc=None):
    """Scoped version of locale.setlocale()"""
    orig = locale.getlocale(category)
    try:
        yield locale.setlocale(category, loc)
    finally:
        locale.setlocale(category, orig)


def atof(string, loc=None):
    """Locale aware atof function for our parser."""
    with scoped_setlocale(locale.LC_NUMERIC, loc):
        return locale.atof(string)


class SebStatementParser(StatementParser):
    date_format = '%Y-%m-%d'
    bank_id = 'SEB'
    currency_id = 'SEK'
    footer_regexps = [
        '^Datum:  -',
        '^Datum: ([0-9]{4}-[0-9]{2}-[0-9]{2}) - ([0-9]{4}-[0-9]{2}-[0-9]{2})$'
    ]

    def __init__(self, fin, locale=None, brief=False):
        """
        Create a new SebStatementParser instance.

        :param fin: filename to create parser for
        :param brief: whenever to attempt replace description with a brief version i.e. all extra info removed
        """

        self.locale = locale
        self.brief = brief

        self.workbook = load_workbook(filename=fin, read_only=True)
        self.validate()

        self.statement = self.parse_statement()

    def validate(self):
        """
        Naive validation to make sure that xlsx document is structured the way it was
        when this parser was written.

        :raises ValueError if workbook has invalid format
        """

        try:
            self._validate()
        except AssertionError as e:
            raise ValueError(e)

    def _validate(self):
        sheet = self.workbook.active

        logging.info('Checking that sheet has at least 5 rows.')
        rows = take(5, sheet.iter_rows())
        assert len(rows) == 5

        logging.info('Verifying that every row has 6 cells.')
        assert type(rows) == list
        for row in rows:
            assert len(row) == 6

        logging.info('Extracting values from every cell.')
        rows = [[c.value for c in row] for row in rows]

        logging.info('Verifying summary header.')
        summary_header_row = rows[0]
        assert ['Saldo', 'Disponibelt belopp', 'Beviljad kredit', None, None] == summary_header_row[1:]

        logging.info('Verifying account id.')
        summary_account_row = rows[1]
        account_id = summary_account_row[0]
        assert re.match('^[0-9]+$', account_id)
        assert [None, None] == summary_account_row[-2:]

        def is_footer(row):
            for r in self.footer_regexps:
                if re.match(r, row[0]):
                    return True
            return False

        logging.info('Verifying summary footer.')
        summary_footer_row = rows[2]
        assert is_footer(summary_footer_row)
        assert [None, None, None, None, None] == summary_footer_row[1:]

        logging.info('Verifying empty row.')
        empty_row = rows[3]
        assert [None, None, None, None, None, None] == empty_row

        logging.info('Verifying statements header.')
        statement_header_row = rows[4]
        assert ['Bokföringsdatum', 'Valutadatum', 'Verifikationsnummer', 'Text/mottagare', 'Belopp', 'Saldo'] == statement_header_row

        logging.info('Everything is OK!')

    def parse_statement(self):
        statement = Statement()
        sheet = self.workbook.active

        # We need only first 3 rows here.
        rows = take(3, sheet.iter_rows())
        rows = [[c.value for c in row] for row in rows]

        assert len(rows) == 3
        header_row, account_row, footer_row = rows

        account_id, saldo, disponibelt_belopp, beviljad_kredit, _1, _2 = account_row
        statement.account_id = account_id
        statement.end_balance = atof(saldo, self.locale)
        statement.bank_id = self.bank_id
        statement.currency = self.currency_id

        for r in self.footer_regexps:
            m = re.match(r, footer_row[0])
            if m and m.groups():
                part_from, part_to = m.groups()
                statement.start_date = self.parse_datetime(part_from)
                statement.end_date = self.parse_datetime(part_to)

        return statement

    def split_records(self):
        sheet = self.workbook.active

        # Skip first 5 rows. Headers they are.
        for row in itertools.islice(sheet.iter_rows(), 5, None):
            yield [c.value for c in row]

    def parse_record(self, row):
        row = take(5, row)

        stmt_line = StatementLine()
        stmt_line.date = self.parse_datetime(row[0])
        _ = self.parse_datetime(row[1])  # TODO: ???
        stmt_line.refnum = row[2]
        stmt_line.memo = row[3]
        stmt_line.amount = row[4]

        #
        # Looks like SEB formats description for card transactions so it includes the actual purchase date
        # within e.g. 'WIRSTRÖMS PU/14-12-31' and it means that description is 'WIRSTRÖMS PU' while the actual
        # card operation is 2014-12-31.
        #
        # P.S. Wirströms Irish Pub is our favorite pub in Stockholm: http://www.wirstromspub.se
        #
        m = re.match('(.*)/([0-9]{2}-[0-9]{2}-[0-9]{2})$', stmt_line.memo)
        if m:
            card_memo, card_date = m.groups()
            if self.brief:
                stmt_line.memo = card_memo
            stmt_line.date_user = datetime.strptime(card_date, '%y-%m-%d')

        stmt_line.id = generate_transaction_id(stmt_line)
        return stmt_line


def parse_bool(value):
    if value in ('True', 'true', '1'):
        return True
    if value in ('False', 'false', '0'):
        return False
    raise ValueError("Can't parse boolean value: %s" % value)


class SebPlugin(Plugin):
    def get_parser(self, fin):
        kwargs = {
            'locale': 'sv_SE'
        }
        if self.settings:
            if 'locale' in self.settings:
                kwargs['locale'] = parse_bool(self.settings.get('locale'))
            if 'brief' in self.settings:
                kwargs['brief'] = parse_bool(self.settings.get('brief'))
        return SebStatementParser(fin, **kwargs)