File: attachments.py

package info (click to toggle)
python-jtoolkit 0.7.8-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,436 kB
  • ctags: 2,536
  • sloc: python: 15,143; makefile: 20
file content (486 lines) | stat: -rwxr-xr-x 18,945 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""attachments module handles all storing and retrieving of attachments, however that's done..."""

# Copyright 2004 St James Software
# 
# This file is part of jToolkit.
#
# jToolkit is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# jToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with jToolkit; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import types
import os
import os.path
from jToolkit.widgets import widgets
from jToolkit import pdffile

class AttachmentError(ValueError):
  """error indicating a problem with an attachment"""
  def __init__(self, message):
    self.message = message

class Attachment:
  # Attributes:
  # attachmentsdir
  # filename
  # content_type
  # contents
  # storedfilename
  def __init__(self, instance, field=None, encodedstring=None, filename=None, content_type = None):
    """initialize an Attachment, either from a field, an encodedstring or a filename"""
    self.instance = instance
    self.attachmentsdir = getattr(instance, 'attachmentsdir', None)
    self.reset()
    if field is not None:
      self.createfromfield(field, getattr(instance, 'maxattachmentsize', None))
    elif encodedstring is not None:
      self.decode(encodedstring)
    elif filename is not None:
      self.createfromfile(filename,content_type)

  def getbasefilename(self, filename):
    """gets the base of a filename in a platform independent way"""
    # IE submits the whole path. lets just get the filename in this case
    if "\\" in filename:
      filename = filename[filename.rfind("\\")+1:]
    if "/" in filename:
      filename = filename[filename.rfind("/")+1:]
    return filename

  def createfromfield(self, field, maxattachmentsize=None):
    """initialize the attachment from the given field (http)"""
    self.filename = self.getbasefilename(field.filename)
    self.content_type = field.type
    self.contents = field.value
    # TODO: put an error in the audit log...
    if maxattachmentsize:
      if len(self.contents) >= maxattachmentsize * 1024:
        raise AttachmentError, self.localize("attachment is too large")

  def createfromfile(self, filename, content_type):
    """initialize the attachment from the file with the given name and content_type"""
    self.filename = self.getbasefilename(filename)
    self.content_type = content_type
    self.contents = open(filename, 'rb').read()

  def decode(self, attachmentstring):
    """decodes the database value, simply breaking it up into parts..."""
    if attachmentstring.count("\n") >= 2:
      self.filename, content_type, self.storedfilename = attachmentstring.split("\n",2)
      self.content_type = str(content_type) # in case of unicode...
    else:
      self.reset()

  def encode(self, includequotes=True):
    """encode the attachment for storing in the database (a reference to the file & type)"""
    if len(self.filename) > 0:
      if isinstance(self.filename, str):
        encodedstring = self.filename.decode("utf8")
      else:
        encodedstring = self.filename
      encodedstring += "\n"+self.content_type+"\n"+self.storedfilename
      if includequotes:
        return "'" + encodedstring.replace("'","''") + "'"
      else:
        return encodedstring
    else:
      return None

  def localize(self, message):
    """dummy localize method..."""
    return message

  def isvalid(self):
    """returns whether this represents a valid attachment or not..."""
    return self.filename is not None

  def getcontents(self):
    """return the contents of the attachment, reading it from disk if neccessary"""
    if self.contents is not None:
      return self.contents
    elif self.storedfilename is not None:
      self.readcontents()
      return self.contents
    else:
      return None

  def fullstoredpath(self):
    """get the full path to the attachment file"""
    if self.attachmentsdir is None:
      raise ValueError("attachments dir not set...")
    if not os.path.isdir(self.attachmentsdir):
      os.mkdir(self.attachmentsdir)
    return os.path.join(self.attachmentsdir, self.storedfilename)

  def setstoragepath(self, attachmentsdir):
    """Sets instance.attachmentsdir"""
    self.attachmentsdir = attachmentsdir

  def readcontents(self):
    """reads contents out of the file storing this attachment"""
    f = open(self.fullstoredpath(), 'rb')
    self.contents = f.read()
    f.close()

  def savecontents(self, storedfilename):
    """saves contents to the given file, remembering it for the future..."""
    self.storedfilename = storedfilename
    f = open(self.fullstoredpath(), 'wb')
    f.write(self.contents)
    f.close()

  def append(self, extracontents, storedfilename = None):
    """append extracontents to the storedfilename (can specify file if neccessary)"""
    if storedfilename is not None: self.storedfilename = storedfilename
    f = open(self.fullstoredpath(), 'ab')
    if isinstance(extracontents, unicode):
      f.write(extracontents.encode('utf8'))
    else:
      f.write(extracontents)
    f.close()

  def deletefile(self, storedfilename = None):
    """deletes the attachment's file from the disk (can specify name if neccessary)"""
    if storedfilename is not None: self.storedfilename = storedfilename
    path = self.fullstoredpath()
    if os.path.exists(path):
      os.remove(path)

  def reset(self):
    """blanks out all the fields of this object"""
    self.filename = None
    self.content_type = None
    self.contents = None
    self.storedfilename = None

  def geturl(self, rowid, category):
    """returns a url that refers to this attachment"""
    filename = self.filename
    if filename is None:
      return 'attachments/missing'
    n = len(filename)
    while n > 0 and (filename[n-1].isalnum() or filename[n-1] in ' _.-'):
      n -= 1
    return "attachments/%s?rowid=%s&name=%s" % (filename[n:],rowid,category)

  def dbrepr(self):
    """how to represent this object for storing in the database"""
    encodedstring = self.encode()
    if encodedstring is None:
      return "null"
    elif isinstance(encodedstring, str):
      try:
        return encodedstring.decode("utf8")
      except LookupError:
        # this is to try and avoid an initialization problem in the codecs module
        # TODO: review this and remove it once the problem goes away
        return encodedstring.decode("utf8")
    elif isinstance(encodedstring, unicode):
      return encodedstring
    else:
      return str(encodedstring)

class MultiAttachment:
  def __init__(self, instance, fields=None, encodedstring=None, filenames=None):
    """initialize a MultiAttachment, either from fields, an encodedstring or a list of filenames"""
    self.attachmentsdir = getattr(instance, 'attachmentsdir', None)
    self.attachments = []
    self.instance = instance
    if fields is not None:
      for field in fields:
        attachment = Attachment(self.instance, field=field)
        self.addattachment(attachment)
    if encodedstring is not None:
      self.decode(encodedstring)
    if filenames is not None:
      for filename in filenames:
        attachment = Attachment(self.instance, filename=filename)
        self.addattachment(attachment)

  def addattachment(self, attachment):
    """adds the attachment to the multiattachment list"""
    self.attachments.append(attachment)

  def removeattachment(self, attachmentnum):
    """marks the attachment for removal"""
    self.attachments[attachmentnum] = False

  def decode(self, attachmentstring):
    """decodes the database value, simply breaking it up into parts..."""
    lines = attachmentstring.split("\n")
    while len(lines) >= 3:
      attachment = Attachment(self.instance, encodedstring="\n".join(lines[:3]))
      self.addattachment(attachment)
      lines = lines[3:]

  def encode(self, includequotes=True):
    """encode the attachment for storing in the database (a reference to the file & type)"""
    if self.attachments:
      encodedstring = ""
      encodedstring = "\n".join([attachment.encode(includequotes=False) for attachment in self.attachments if attachment != False])
      if not encodedstring:
        encodedstring = " "
      if includequotes:
        return "'" + encodedstring.replace("'","''") + "'"
      else:
        return encodedstring
    else:
      return None

  def savecontents(self, storedfilename):
    """saves contents to the given file, remembering it for the future..."""
    for attachmentnum, attachment in enumerate(self.attachments):
      # attachment.contents will be None if this field is unchanged
      if not attachment:
        continue
      if isinstance(attachment, AttachmentError):
        return attachment.message
      if attachment.contents is not None:
        attachment.savecontents("%s-%d" % (storedfilename, attachmentnum))

  def deletefile(self, storedfilename = None):
    """deletes the attachment's file from the disk (can specify name if neccessary)"""
    for attachmentnum, attachment in enumerate(self.attachments):
      if not attachment:
        continue
      if storedfilename is None:
        attachment.deletefile()
      else:
        attachment.deletefile("%s-%d" % (storedfilename, attachmentnum))

  def geturl(self, rowid, category, attachmentnum):
    """returns a url that refers to this attachment"""
    if not 0 <= attachmentnum < len(self.attachments):
      return 'attachments/missing'
    attachment = self.attachments[attachmentnum]
    if attachment:
      filename = attachment.filename
    else:
      filename = None
    if filename is None:
      return 'attachments/missing'
    n = len(filename)
    while n > 0 and (filename[n-1].isalnum() or filename[n-1] in ' _.-'):
      n -= 1
    return "attachments/%s?rowid=%s&attachmentnum=%d&name=%s" % (filename[n:],rowid,attachmentnum,category)

  def dbrepr(self):
    """how to represent this object for storing in the database"""
    encodedstring = self.encode()
    if encodedstring is None:
      return "null"
    elif isinstance(encodedstring, str):
      try:
        return encodedstring.decode("utf8")
      except LookupError:
        # this is to try and avoid an initialization problem in the codecs module
        # TODO: review this and remove it once the problem goes away
        return encodedstring.decode("utf8")
    elif isinstance(encodedstring, unicode):
      return encodedstring
    else:
      return str(encodedstring)

class MultiAttachmentsWidget(widgets.PlainContents):
  """
  an attachment field that is repeatable
  """

  def __init__(self, session, name, rowid, multiattachment, mode):
    """construct the widget displaying the attachments in multiattachment"""
    self.session = session
    self.name = name
    self.rowid = rowid
    self.multiattachment = multiattachment
    self.mode = mode
    contents = self.buildwidget()
    widgets.PlainContents.__init__(self, contents)

  def buildwidget(self):
    """gets the contents of the widget..."""
    links = self.getlinks()
    if self.mode in ("add", "modify"):
      javascript = widgets.Script("text/javascript", '', newattribs={'src':'js/attachments.js'})
      addlink = self.getselectbutton()
      return [javascript, links, addlink]
    elif self.mode == "view":
      if len(links):
        return links
      else:
        return self.session.localize('(no attachment)')

  def getlinks(self):
    """returns all the attachment links"""
    links = []
    if self.multiattachment.attachments:
      for attachmentnum, attachment in enumerate(self.multiattachment.attachments):
        if attachment.isvalid():
          link = self.getlink(attachmentnum, attachment)
          links.append(link)
    return links

  def getlink(self, attachmentnum, attachment):
    """gets the link to the attachment"""
    attachmentlink = self.multiattachment.geturl(self.rowid, self.name, attachmentnum)
    link = widgets.Link(attachmentlink, attachment.filename, {'target':'attachmentpage'})
    if self.mode in ("add", "modify"):
      removefield = widgets.Input({'type':'hidden', 'name': "%s.remove%d" % (self.name, attachmentnum), 'value':''})
      removelink = self.getremovelink()
      link = widgets.Paragraph([link, removefield, removelink])
    return link

  def getremovelink(self):
    """creates a link to remove an attachment - this must be positioned correctly in the form to find the attachment"""
    # TODO: rather pass the attachmentnum in so we find it without relying on form position
    javascriptcall = "MarkRemoved(this,\'%s\'); return false" % (self.name)
    removetext = self.session.localize("remove")
    removetooltip = self.session.localize("remove this attachment")
    restoretext = self.session.localize("restore")
    restoretooltip = self.session.localize("this attachment has been removed. click here to restore it")
    linkattribs = {"onClick": "javascript:%s" % javascriptcall, "title": removetooltip,
                   "removetext": removetext, "removetooltip": removetooltip,
                   "restoretext": restoretext, "restoretooltip": restoretooltip}
    return widgets.Link("#", removetext, linkattribs)

  def getselectbutton(self):
    """returns a button that lets the user select an attachment to upload"""
    newattachmentnum = len(self.multiattachment.attachments)
    javascriptcall = "if (this.fieldnumber == void(0)) { this.fieldnumber = %d }; " % (newattachmentnum)
    javascriptcall += "AddInput(this,\'%s\'); return false" % (self.name)
    attachtext = self.session.localize("attach a file")
    return widgets.Link("#", attachtext, {"onClick":"javascript:%s" % javascriptcall})

def isattachment(value):
  if type(value) == types.InstanceType:
    if isinstance(value, Attachment):
      return 1
    if isinstance(value, MultiAttachment):
      return 1
  return 0

def isattachmenterror(value):
  if type(value) == types.InstanceType:
    if isinstance(value, AttachmentError):
      return 1
  return 0

def getPDFContents(page):
  contents = ''
  if page['Kids'].value() is not None:
    for kid in page['Kids'].value():
      if kid['Contents'] is not pdffile.NullObj:
        contents += str(kid['Contents'].value()) + ' '
      contents += getPDFContents(kid)
  return contents

def extractPDFText(pdfcontents):
  # Takes decrypted PDF code and extracts the text
  # TODO: This could probably be done a lot more robustly with sparse
  contents = ''
  nextTd = pdfcontents.find('Td(')
  nextTc = pdfcontents.find('Tc(')

  if nextTd > nextTc and nextTc != -1:
    nextTag = nextTc
  else:
    nextTag = nextTd

  while nextTag != -1:
    pdfcontents = pdfcontents[nextTag+3:]
    contents += pdfcontents[:pdfcontents.index(')Tj')] + " "
    pdfcontents = pdfcontents[pdfcontents.index(')Tj')+3:]
    nextTd = pdfcontents.find('Td(')
    nextTc =  pdfcontents.find('Tc(')

    if nextTd > nextTc and nextTc != -1:
      nextTag = nextTc
    else:
      nextTag = nextTd

  return contents
    
def indexattachment(indexer, rowid, filename, stored_name, contenttype, contents, modify = 0):
  # TODO: There are only certain types of attachment we want to index
  # We should check contenttype here, and, if necessary, pass the contents to a parser to get the text out
  if indexer is not None:
    # First, parse the contents
    parsedcontents = indexer.decodeContents(contenttype, contents)
    if not parsedcontents:
      if contenttype[:4] == "text":
        parsedcontents = contents
      elif contents[1:4] == "PDF":
        if float(contents[5:8]) > 1.3:
          print "Cannot parse PDF %s: Format is too recent.  Please contact St. James Software about configuration options" % filename
	  parsedcontents = ""
        else:
          from cStringIO import StringIO
          pdf = pdffile.PDFFile()
          pdfcontents = StringIO(contents)
          pdf._load(pdffile.Tokeniser(pdffile.FileByteStream(pdfcontents)))
          parsedcontents = extractPDFText(getPDFContents(pdf.Root['Pages']))
      else:
        parsedcontents = ""
    if modify:
      print "Error: cannot modify attachments yet"
      #indexer.modifyDoc({'rowid':rowid},{'filename':filename, 'stored_name': stored_name, 'content-type': contenttype, 'contents': contents})
    else:
      indexer.indexFields({'filename':filename, 'stored_name': stored_name, 'content-type': contenttype, 'contents': parsedcontents, 'rowid': rowid})

def saveattachments(recorddict, rowid, originalvalues, indexer = None):
  """saves all valid attachments in the recorddict, using the given rowid"""
  emptykeys = []
  results = []
  for key, value in recorddict.iteritems():
    result = ""
    if isattachment(value):
      # only save non-empty attachments...
      if isinstance(value, Attachment):
        if len(value.filename) > 0:
          indexattachment(indexer, rowid, value.filename, rowid+'-'+key, value.content_type, value.contents, len(originalvalues) != 0)
          if originalvalues.has_key(key):
            multiattachment = MultiAttachment(value.instance, encodedstring=originalvalues[key])
            multiattachment.addattachment(value)
            result = multiattachment.savecontents(rowid+'-'+key)
          else:
            result = value.savecontents(rowid+'-'+key)
        else:
          # and remove empty ones from the dictionary
          emptykeys.append(key)
      elif isinstance(value, MultiAttachment):
        for i, attach in enumerate(value.attachments):
          if isinstance(attach, Attachment):
            if indexer is not None:
              indexattachment(indexer, rowid, attach.filename, rowid+'-'+key+'-'+str(i), attach.content_type, attach.contents, len(originalvalues) != 0)
        result = value.savecontents(rowid+'-'+key)
    elif isattachmenterror(value):
      # a return value signifies an error
      result = value.message
    results.append(result)
  # only do actual deletion here otherwise you confuse the loop...
  for key in emptykeys:
    del recorddict[key]
  # return any error messages joined together
  return "\n".join([result for result in results if result])

def deleteattachments(recorddict, rowid, searcher = None):
  """deletes all attachments in the recorddict from the disk, using the given rowid
  doesn't update the database (assumes the caller is responsible)"""
  for key, value in recorddict.iteritems():
    if isattachment(value):
      value.deletefile(rowid+'-'+key)
  if searcher:
    searcher.deleteDoc({'rowid':rowid})