1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
import xml.etree.ElementTree as ET
import datetime
import html
import re
from ofxstatement.plugin import Plugin
from ofxstatement.statement import Statement, StatementLine
CD_CREDIT = 'CRDT'
CD_DEBIT = 'DBIT'
class OtpPlugin(Plugin):
"""OTP (XML)
"""
def get_parser(self, filename):
return OtpParser(filename)
class OtpParser(object):
def __init__(self, filename):
self.filename = filename
def parse(self):
"""Main entry point for parsers
"""
self.statement = Statement()
tree = ET.parse(self.filename)
self._parse_statement_properties(tree)
self._parse_lines(tree)
return self.statement
def _parse_statement_properties(self, tree):
stmt = tree.find('./Rpt')
bnk = stmt.find('./Acct/Svcr/FinInstnId/Nm')
iban = stmt.find('./Acct/Id/Othr/Id')
bals = stmt.findall('./Bal')
bal_amts = {}
bal_dates = {}
for bal in bals:
cd = bal.find('./Tp/CdOrPrtry/Cd')
amt = bal.find('./Amt')
dt = bal.find('./Dt')
# Amount currency should match with statement currency
bal_amts[cd.text] = self._parse_amount(amt)
bal_dates[cd.text] = self._parse_date(dt)
self.statement.bank_id = bnk.text
self.statement.account_id = iban.text
self.statement.start_balance = bal_amts['OPBD']
self.statement.start_date = bal_dates['OPBD']
self.statement.end_balance = bal_amts.get('CLBD', None)
self.statement.end_date = bal_dates.get('CLBD', None)
def _parse_lines(self, tree):
for ntry in _findall(tree, 'Rpt/Ntry'):
sline = self._parse_line(ntry)
self.statement.lines.append(sline)
def _parse_line(self, ntry):
sline = StatementLine()
crdeb = _find(ntry, 'CdtDbtInd').text
amtnode = _find(ntry, 'Amt')
amt = self._parse_amount(amtnode)
if crdeb == CD_DEBIT:
amt = -amt
payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Cdtr/Nm')
else:
payee = _find(ntry, 'NtryDtls/TxDtls/RltdPties/Dbtr/Nm')
if payee is not None:
payee = payee.text
sline.payee = payee
sline.amount = amt
dt = _find(ntry, 'ValDt')
sline.date = self._parse_date(dt)
bookdt = _find(ntry, 'BookgDt')
sline.date_user = self._parse_date(bookdt)
svcref = _find(ntry, 'NtryDtls/TxDtls/Refs/AcctSvcrRef')
sline.refnum = getattr(svcref, 'text', None)
rmtinf = _find(ntry, 'NtryDtls/TxDtls/RmtInf/Ustrd')
sline.memo = rmtinf.text if rmtinf.text else ''
addtlinf_node = _find(ntry, 'NtryDtls/TxDtls/AddtlTxInf')
addtlinf = self._parse_addtlinf(addtlinf_node)
if 'VÁSÁRLÁS KÁRTYÁVAL' == addtlinf and not sline.payee:
sline.payee = _trim_payee(sline.memo)
sline.memo += ' ' + addtlinf
return sline
def _parse_date(self, dtnode):
if dtnode is None:
return None
dt = _find(dtnode, 'Dt')
dttm = _find(dtnode, 'DtTm')
if dt is not None:
return datetime.datetime.strptime(dt.text, "%Y-%m-%d")
else:
assert dttm is not None
return datetime.datetime.strptime(dttm.text, "%Y-%m-%dT%H:%M:%S")
def _parse_amount(self, amtnode):
return float(amtnode.text)
def _parse_addtlinf(self, addtlinf):
string = '<root>' + html.unescape(addtlinf.text) + '</root>'
return ET.fromstring(string).find('narr').text
def _toxpath(spath):
tags = spath.split('/')
path = ['%s' % t for t in tags]
xpath = './%s' % '/'.join(path)
return xpath
def _find(tree, spath):
return tree.find(_toxpath(spath))
def _findall(tree, spath):
return tree.findall(_toxpath(spath))
_trim_payee_regexes = [
re.compile(' .*'),
re.compile('\d+\.\d+\.\d+ \d+'),
re.compile('[\d,]+EUR.*')
]
def _trim_payee(payee):
for r in _trim_payee_regexes:
payee = r.sub('', payee)
return payee
|