File: dryad.py

package info (click to toggle)
utopia-documents 2.4.4-2
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 30,560 kB
  • ctags: 24,084
  • sloc: cpp: 179,735; ansic: 16,208; python: 13,446; xml: 1,937; sh: 1,918; ruby: 1,594; makefile: 527; sql: 6
file content (124 lines) | stat: -rw-r--r-- 6,355 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
###############################################################################
#   
#    This file is part of the Utopia Documents application.
#        Copyright (c) 2008-2014 Lost Island Labs
#            <info@utopiadocs.com>
#    
#    Utopia Documents is free software: you can redistribute it and/or modify
#    it under the terms of the GNU GENERAL PUBLIC LICENSE VERSION 3 as
#    published by the Free Software Foundation.
#    
#    Utopia Documents is distributed in the hope that it will be useful, but
#    WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
#    Public License for more details.
#    
#    In addition, as a special exception, the copyright holders give
#    permission to link the code of portions of this program with the OpenSSL
#    library under certain conditions as described in each individual source
#    file, and distribute linked combinations including the two.
#    
#    You must obey the GNU General Public License in all respects for all of
#    the code used other than OpenSSL. If you modify file(s) with this
#    exception, you may extend this exception to your version of the file(s),
#    but you are not obligated to do so. If you do not wish to do so, delete
#    this exception statement from your version.
#    
#    You should have received a copy of the GNU General Public License
#    along with Utopia Documents. If not, see <http://www.gnu.org/licenses/>
#   
###############################################################################

#? name: Dryad
#? www: http://datadryad.org/
#? urls: http://datadryad.org/ https://utopia.cs.man.ac.uk/


import common.utils
import json
import re
import socket
import spineapi
import utopia.document
import urllib2
from lxml import etree
import string

ns = {
    'mets': 'http://www.loc.gov/METS/',
    'dim': 'http://www.dspace.org/xmlns/dspace/dim',
    'kend': 'http://utopia.cs.manchester.ac.uk/kend',
    'property': 'http://utopia.cs.manchester.ac.uk/kend/property#'
}

class Dryad(utopia.document.Annotator, utopia.document.Visualiser):
    """Generate Dryad information"""

    def on_ready_event(self, document):
        #print "RUNNING DRYAD PLUGIN"
        doi = common.utils.metadata(document, 'doi')

        if doi is not None:

            # see if kend knows about this DOI as a Dryad record
            response = urllib2.urlopen('https://utopia.cs.man.ac.uk/kend/0.7/define/lookup?database=dryad&term=%s&limit=1000' % doi, timeout=8)
            root = etree.fromstring(response.read())
            dryadShortHandle = root.findtext('kend:group/kend:annotation/kend:properties/property:databaseTerm', namespaces=ns)
            if dryadShortHandle is not None:
                # then we have a dryad short-form doi, so can now safely go to dryad to get the rest

                response = urllib2.urlopen('http://datadryad.org/solr/search/select/?q=dc.relation.isreferencedby:%s&fl=dc.identifier,dc.title_ac,dc.identifier.uri,dc.contributor.author,dc.date.issued.year,dc.identifier.citation,dc.description' % doi, timeout=8)
                root = etree.fromstring(response.read())
                #print etree.tostring(root, pretty_print=True, encoding='utf8')
                result = root.find('result')

                if result.attrib['numFound'] != '0':
                    # then we have found some datasets for this article DOI

                    packageDetails = urllib2.urlopen('http://datadryad.org/metadata/handle/%s/mets.xml' % dryadShortHandle, timeout=8)
                    root = etree.fromstring(packageDetails.read())
                    #print etree.tostring(root, pretty_print=True, encoding='utf8')

                    identifiers = root.findall('mets:dmdSec/mets:mdWrap/mets:xmlData/dim:dim/dim:field[@element="identifier"]', namespaces=ns)
                    packageDOI = None
                    for identifier in identifiers:
                        if identifier.text.startswith('doi:'):
                            packageDOI = identifier.text[4:]
                            break

                    contributors = root.findall('mets:dmdSec/mets:mdWrap/mets:xmlData/dim:dim/dim:field[@qualifier="author"]', namespaces=ns)
                    dataCitation = {
                        'year': result.findtext("doc/arr[@name='dc.date.issued.year']/int", namespaces=ns),
                        'title': root.findtext('mets:dmdSec/mets:mdWrap/mets:xmlData/dim:dim/dim:field[@element="title"]', namespaces=ns),
                        'authors': [string.capwords(a.text) for a in contributors],
                        'source': 'Dryad Digital Repository',
                        'doi': packageDOI,
                    }
                    articleCitation = root.findtext('mets:dmdSec/mets:mdWrap/mets:xmlData/dim:dim/dim:field[@qualifier="citation"][@element="identifier"]', namespaces=ns)

                    xhtml = '''
                        <p>
                          The data associated with this article are available via Dryad. When using
                          these data, please cite both the article:
                        </p>
                        <div class="box">{0}<br /><a href="http://dx.doi.org/{1}">doi:{1}</a></div>
                        <p>
                          and also the data package:
                        </p>
                        <div class="box">{2}<br /><a href="http://dx.doi.org/{3}">doi:{3}</a></div>
                    '''.format(articleCitation, doi, common.utils.format_citation(dataCitation), dataCitation['doi'])

                    a = spineapi.Annotation()
                    a['concept'] = 'Dryad'
                    a['property:name'] = 'Dryad'
                    a['property:sourceDatabase'] = 'dryad'
                    a['property:sourceDescription'] = '<p><a href="http://datadryad.org/">Dryad</a> is an international repository of data underlying peer-reviewed articles in the basic and applied biosciences.</p>'
                    a['property:description'] = 'Data associated with this article'
                    a['property:xhtml'] = xhtml
                    document.addAnnotation(a)

    def visualisable(self, a):
        return a.get('concept') == 'Dryad'

    def visualise(self, a):
        return a.get('property:xhtml')