File: message.py

package info (click to toggle)
getmail6 6.19.10-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,124 kB
  • sloc: python: 6,634; sh: 897; makefile: 73
file content (224 lines) | stat: -rwxr-xr-x 8,482 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# -*- coding: utf-8 -*-
# docs/COPYING 2a + DRY: https://github.com/getmail6/getmail6
# Please refer to the git history regarding who changed what and when in this file.

'''The getmail Message class.

'''

import sys
import os
import time
import re
import email
import email.errors as Errors
import email.utils as Utils
import email.parser as Parser
from email.generator import Generator
from email.header import Header


from getmailcore.exceptions import *
from getmailcore.utilities import mbox_from_escape, format_header, \
    address_no_brackets
import getmailcore.logging

__all__ = [
    'Message',
]


message_attributes = (
    'received_by',
    'received_from',
    'received_with',
    'recipient',
    'sender',
)

_NL = os.linesep.encode()

#######################################
def corrupt_message(why, fromlines=None, fromstring=None):
    log = getmailcore.logging.Logger()
    log.error('failed to parse retrieved message; constructing container for '
              'contents\n')
    if fromlines == fromstring == None:
        raise SystemExit('corrupt_message() called with wrong arguments')
    msg = email.message_from_string('')
    msg['From'] = '"unknown sender" <>'
    msg['Subject'] = 'Corrupt message received'
    msg['Date'] = Utils.formatdate(localtime=True)
    body = [
        b'A badly-corrupt message was retrieved and could not be parsed',
        b'for the following reason:',
        b'',
        b'    %s' % str(why).encode(),
        b'',
        b'Below the following line is the original message contents.',
        b'',
        b'--------------------------------------------------------------',
    ]
    if fromlines:
        body.extend([line.rstrip() for line in fromlines])
    elif fromstring:
        body.extend([line.rstrip() for line in fromstring.splitlines()])
    msg.set_payload(_NL.join(body))
    for attr in message_attributes:
        setattr(msg, attr, '')
    return msg

#######################################
class Message(object):
    '''Message class for getmail.  Does sanity-checking on attribute accesses
    and provides some convenient interfaces to an underlying email.Message()
    object.
    '''
    __slots__ = (
        '__msg',
        '__raw',
        'received_by',
        'received_from',
        'received_with',
        'recipient',
        'sender',
    )
    def __init__(self, fromlines=None, fromstring=None, fromfile=None):
        #self.log = Logger()
        self.recipient = None
        self.received_by = None
        self.received_from = None
        self.received_with = None
        self.__raw = None
        parser = Parser.BytesParser()
        parsestr = parser.parsebytes

        # Message is instantiated with fromlines for POP3, fromstring for
        # IMAP (both of which can be badly-corrupted or invalid, i.e. spam,
        # MS worms, etc).  It's instantiated with fromfile for the output
        # of filters, etc, which should be saner.
        if fromlines:
            try:
                self.__msg = parsestr(_NL.join(fromlines))
                #_msg = ucparse(parsestr,_NL.join(["über".encode('latin-1'),"Höhen".encode('latin-1')]))
                #type(_msg) #<class 'email.message.Message'>
            except (Errors.MessageError,UnicodeDecodeError) as o:
                self.__msg = corrupt_message(o, fromlines=fromlines)
            self.__raw = _NL.join(fromlines)
        elif fromstring:
            try:
                self.__msg = parsestr(fromstring)
                #_msg = ucparse(parsestr,"über\nHöhen".encode('latin-1'))
            except (Errors.MessageError,UnicodeDecodeError) as o:
                self.__msg = corrupt_message(o, fromstring=fromstring)
            self.__raw = fromstring
        elif fromfile:
            try:
                self.__msg = parser.parse(fromfile)
                #from io import BytesIO
                #fromfile=BytesIO(_NL.join(["über".encode('latin-1'),"Höhen".encode('latin-1')]))
                #_msg = ucparse(parser.parse,fromfile)
            except (Errors.MessageError,UnicodeDecodeError) as o:
                # Shouldn't happen
                self.__msg = corrupt_message(o, fromstring=fromfile.read())
            # fromfile is only used by getmail_maildir, getmail_mbox, and
            # from reading the output of a filter.  Ignore __raw here.
        else:
            # Can't happen?
            raise SystemExit('Message() called with wrong arguments')

        self.sender = address_no_brackets(self.__msg['Return-Path']
                                          or self.__msg['Sender']
                                          or 'unknown')

    def content(self):
        return self.__msg

    def copyattrs(self, othermsg):
        for attr in message_attributes:
            setattr(self, attr, getattr(othermsg, attr))

    def flatten(self, delivered_to, received, mangle_from=False,
                include_from=False):
        '''Return a string with native EOL convention.

        The email module apparently doesn't always use native EOL, so we force
        it by writing out what we need, letting the generator write out the
        message, splitting it into lines, and joining them with the platform
        EOL.

        Note on mangle_from: the Python Generator class apparently only
        quotes "From ", not ">From " (i.e. it uses mboxo format instead of
        mboxrd).  So we don't use its mangling, and do it by hand instead.
        '''
        if include_from:
            # Mbox-style From line, not rfc822 From: header field.
            fromline = 'From %s %s' % (mbox_from_escape(self.sender),
                                       time.asctime()) + os.linesep
        else:
            fromline = ''
        # Write the Return-Path: header
        rpline = format_header('Return-Path', '<%s>' % self.sender)
        # Remove previous Return-Path: header fields.
        del self.__msg['Return-Path']
        if delivered_to:
            dtline = format_header('Delivered-To', self.recipient or 'unknown')
        else:
            dtline = ''
        if received:
            rcvd = 'from %s by %s with %s' % (
                self.received_from, self.received_by, self.received_with
            )
            rcvd += ' getmail6'
            if received != True:
                rcvd += ' msgid:%s'%received
            if self.recipient is not None:
                rcvd += ' for <%s>' % self.recipient
            rcvd += '; ' + time.strftime('%d %b %Y %H:%M:%S -0000',
                                            time.gmtime())
            rcvline = format_header('Received', rcvd)
        else:
            rcvline = ''
        # From_ handled above, always tell the generator not to include it
        try:
            bmsg = self.__msg.as_bytes(policy=self.__msg.policy.clone(linesep=os.linesep))
            if mangle_from:
                # do mboxrd-style "From " line quoting (add one '>')
                RE_FROMLINE = re.compile(b'^(>*From )', re.MULTILINE)
                bmsg = RE_FROMLINE.sub(b'>\\1', bmsg)

            return ((fromline+rpline+dtline+rcvline).encode('ASCII',errors="replace")+bmsg)

        except (TypeError,UnicodeEncodeError) as o:
            if self.__raw is None:
                # Argh -- a filter took a correctly-formatted message
                # and returned a badly-misformatted one?
                raise getmailDeliveryError('failed to parse retrieved message '
                                           'and could not recover (%s)' % o)
            self.__msg = corrupt_message(o, fromstring=self.__raw)
            return self.flatten(delivered_to, received, mangle_from, include_from)


    def add_header(self, name, content):
        content_rstriped = content.rstrip()
        try:
            self.__msg[name] = Header(content_rstriped)
        except (UnicodeDecodeError, LookupError):
            for chs in self.__msg.get_charsets():
                if chs is None:
                    continue
                try:
                    self.__msg[name] = Header(content_rstriped,chs)
                    return
                except UnicodeError:
                    continue
            self.__msg[name] = Header(content_rstriped,'utf-8',errors="replace")

    def remove_header(self, name):
        del self.__msg[name]

    def headers(self):
        return self.__msg._headers

    def get_all(self, name, failobj=None):
        return self.__msg.get_all(name, failobj)