File: otp.py

package info (click to toggle)
ofxstatement-plugins 20181208
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 4,064 kB
  • sloc: python: 7,004; xml: 1,027; makefile: 135; sh: 84
file content (149 lines) | stat: -rw-r--r-- 4,121 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import xml.etree.ElementTree as ET
import datetime
import html
import re

from ofxstatement.plugin import Plugin
from ofxstatement.statement import Statement, StatementLine


CD_CREDIT = 'CRDT'
CD_DEBIT = 'DBIT'

class OtpPlugin(Plugin):
    """OTP (XML)
    """

    def get_parser(self, filename):
        return OtpParser(filename)


class OtpParser(object):
    def __init__(self, filename):
        self.filename = filename

    def parse(self):
        """Main entry point for parsers
        """
        self.statement = Statement()
        tree = ET.parse(self.filename)

        self._parse_statement_properties(tree)
        self._parse_lines(tree)

        return self.statement

    def _parse_statement_properties(self, tree):
        stmt = tree.find('./Rpt')

        bnk = stmt.find('./Acct/Svcr/FinInstnId/Nm')
        iban = stmt.find('./Acct/Id/Othr/Id')
        bals = stmt.findall('./Bal')

        bal_amts = {}
        bal_dates = {}
        for bal in bals:
            cd = bal.find('./Tp/CdOrPrtry/Cd')
            amt = bal.find('./Amt')
            dt = bal.find('./Dt')

            # Amount currency should match with statement currency
            bal_amts[cd.text] = self._parse_amount(amt)
            bal_dates[cd.text] = self._parse_date(dt)

        self.statement.bank_id = bnk.text
        self.statement.account_id = iban.text
        self.statement.start_balance = bal_amts['OPBD']
        self.statement.start_date = bal_dates['OPBD']
        self.statement.end_balance = bal_amts.get('CLBD', None)
        self.statement.end_date = bal_dates.get('CLBD', None)

    def _parse_lines(self, tree):
        for ntry in _findall(tree, 'Rpt/Ntry'):
            sline = self._parse_line(ntry)
            self.statement.lines.append(sline)

    def _parse_line(self, ntry):
        sline = StatementLine()

        crdeb = _find(ntry, 'CdtDbtInd').text

        amtnode = _find(ntry, 'Amt')
        amt = self._parse_amount(amtnode)
        if crdeb == CD_DEBIT:
            amt = -amt
            payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Cdtr/Nm')
        else:
            payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Dbtr/Nm')
        if payee is not None:
            payee = payee.text

        sline.payee = payee
        sline.amount = amt

        dt = _find(ntry, 'ValDt')
        sline.date = self._parse_date(dt)

        bookdt = _find(ntry, 'BookgDt')
        sline.date_user = self._parse_date(bookdt)

        svcref = _find(ntry, 'NtryDtls/TxDtls/Refs/AcctSvcrRef')
        sline.refnum = getattr(svcref, 'text', None)

        rmtinf = _find(ntry, 'NtryDtls/TxDtls/RmtInf/Ustrd')
        sline.memo = rmtinf.text if rmtinf.text else ''

        addtlinf_node = _find(ntry, 'NtryDtls/TxDtls/AddtlTxInf')
        addtlinf = self._parse_addtlinf(addtlinf_node)

        if 'VÁSÁRLÁS KÁRTYÁVAL' == addtlinf and not sline.payee:
            sline.payee = _trim_payee(sline.memo)

        sline.memo += ' ' + addtlinf

        return sline

    def _parse_date(self, dtnode):
        if dtnode is None:
            return None

        dt = _find(dtnode, 'Dt')
        dttm = _find(dtnode, 'DtTm')

        if dt is not None:
            return datetime.datetime.strptime(dt.text, "%Y-%m-%d")
        else:
            assert dttm is not None
            return datetime.datetime.strptime(dttm.text, "%Y-%m-%dT%H:%M:%S")

    def _parse_amount(self, amtnode):
        return float(amtnode.text)

    def _parse_addtlinf(self, addtlinf):
        string = '<root>' + html.unescape(addtlinf.text) + '</root>'
        return ET.fromstring(string).find('narr').text


def _toxpath(spath):
    tags = spath.split('/')
    path = ['%s' % t for t in tags]
    xpath = './%s' % '/'.join(path)
    return xpath


def _find(tree, spath):
    return tree.find(_toxpath(spath))


def _findall(tree, spath):
    return tree.findall(_toxpath(spath))

_trim_payee_regexes = [
    re.compile('   .*'),
    re.compile('\d+\.\d+\.\d+ \d+'),
    re.compile('[\d,]+EUR.*')
]
def _trim_payee(payee):
    for r in _trim_payee_regexes:
        payee = r.sub('', payee)
    return payee