File: nucleardb.py

package info (click to toggle)
utopia-documents 2.4.4-2
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 30,560 kB
  • ctags: 24,084
  • sloc: cpp: 179,735; ansic: 16,208; python: 13,446; xml: 1,937; sh: 1,918; ruby: 1,594; makefile: 527; sql: 6
file content (143 lines) | stat: -rw-r--r-- 5,611 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
###############################################################################
#   
#    This file is part of the Utopia Documents application.
#        Copyright (c) 2008-2014 Lost Island Labs
#            <info@utopiadocs.com>
#    
#    Utopia Documents is free software: you can redistribute it and/or modify
#    it under the terms of the GNU GENERAL PUBLIC LICENSE VERSION 3 as
#    published by the Free Software Foundation.
#    
#    Utopia Documents is distributed in the hope that it will be useful, but
#    WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
#    Public License for more details.
#    
#    In addition, as a special exception, the copyright holders give
#    permission to link the code of portions of this program with the OpenSSL
#    library under certain conditions as described in each individual source
#    file, and distribute linked combinations including the two.
#    
#    You must obey the GNU General Public License in all respects for all of
#    the code used other than OpenSSL. If you modify file(s) with this
#    exception, you may extend this exception to your version of the file(s),
#    but you are not obligated to do so. If you do not wish to do so, delete
#    this exception statement from your version.
#    
#    You should have received a copy of the GNU General Public License
#    along with Utopia Documents. If not, see <http://www.gnu.org/licenses/>
#   
###############################################################################

#? name: NucleaRDB
#? www: http://www.receptors.org/
#? urls: http://www.receptors.org/


# encoding: UTF-8

import base64
import common.eutils
import common.utils
import re
import spineapi
import suds.client
import urllib
import utopia.document
import urllib2
from lxml import etree

class NucleaRDBAnnotator(utopia.document.Annotator):
    '''Annotate with NucleaRDB'''

    def getMentions(self, text, pubmedId):
        print 'initialising now ...'

        serviceUrl = 'http://www.receptors.org/nucleardb/webservice'
        wsdlUrl = '%s?wsdl' % serviceUrl
        gc = suds.client.Client(wsdlUrl)
        gs = gc.service

        textBytes = base64.b64encode(text.encode('utf-8'))
        if pubmedId != None:
            pubmedId = pubmedId.encode('utf-8')
        else:
            pubmedId = ''

        #print text
        body = """
        <SOAP-ENV:Envelope
         xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/">
         <SOAP-ENV:Body>
          <ns:getMentions xmlns:ns="http://webservice.web.mcsis.cmbi.ru.nl/">
           <text>{0}</text>
           <pubmedId>{1}</pubmedId>
          </ns:getMentions>
         </SOAP-ENV:Body>
        </SOAP-ENV:Envelope>
        """.format(textBytes, pubmedId).encode('utf8')
        res = urllib2.urlopen(serviceUrl, body, timeout=8)
        res = res.read()
        return gs.getMentions(__inject={'reply': res})

    @utopia.document.buffer
    def on_activate_event(self, document):
        if len(document.annotations('NucleaRDB cache')) == 0:
            print 'annotating stuff . . .'

            pubmedId = common.utils.metadata(document, 'pmid')
            if pubmedId is not None:
                print 'found pubmed id: ' + pubmedId
            else:
                print 'did not find pubmed id'

            ns = {'r': 'GPCR'}

            textMentions = self.getMentions(document.text(), pubmedId)

            objectlist = []
            mention_cache = {}
            for mention in textMentions:
                if mention.mentionType != 'SPECIES':
                    mention_cache.setdefault(mention.html, [])
                    mention_cache[mention.html].append(mention)

            for html, mentions in mention_cache.iteritems():
                annotation = self.createAnnotation(document, html, mentions)
                annotation['displayRelevance']='2000'
                annotation['displayRank']= '2000'
                document.addAnnotation(annotation)

            document.addAnnotation(spineapi.Annotation(), 'NucleaRDB cache')

    def createAnnotation(self, document, html, mentions):
        annotation = spineapi.Annotation()
        annotation['concept'] = 'NucleaRDBInformation'
        annotation['property:name'] = '%s: "%s"' % (mentions[0].mentionType.title(), mentions[0].formalRepresentation)
        annotation['property:description'] = 'NucleaRDB %s record' % mentions[0].mentionType.title()
        annotation['property:sourceDatabase'] = 'nucleardb'
        annotation['property:sourceDescription'] = '<p>The <a href="http://www.receptors.org/nucleardb/">NucleaRDB</a> is a molecular-class information system that collects, combines, validates and stores large amounts of heterogenous data on nuclear hormone receptors.</p>'
        annotation['property:html'] = html

        for mention in mentions:
            #print mention
            start = int(mention.textStart)
            end = int(mention.textEnd)
            match = document.substr(start, end-start)
            annotation.addExtent(match)

        return annotation

class NucleaRDBVisualiser(utopia.document.Visualiser):
    """Viualiser for NucleaRDB entries"""

    def visualisable(self, annotation):
        return annotation.get('concept') == 'NucleaRDBInformation' and 'property:html' in annotation

    def visualise(self, annotation):
        html = annotation['property:html']
        if html.endswith('</tr>'):
            html += '</table>'
        return html

__all__ = ['NucleaRDBAnnotator', 'NucleaRDBVisualiser']