File: kendarticlesearch.py

package info (click to toggle)
utopia-documents 2.4.4-2
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 30,560 kB
  • ctags: 24,084
  • sloc: cpp: 179,735; ansic: 16,208; python: 13,446; xml: 1,937; sh: 1,918; ruby: 1,594; makefile: 527; sql: 6
file content (109 lines) | stat: -rw-r--r-- 4,840 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
###############################################################################
#   
#    This file is part of the Utopia Documents application.
#        Copyright (c) 2008-2014 Lost Island Labs
#            <info@utopiadocs.com>
#    
#    Utopia Documents is free software: you can redistribute it and/or modify
#    it under the terms of the GNU GENERAL PUBLIC LICENSE VERSION 3 as
#    published by the Free Software Foundation.
#    
#    Utopia Documents is distributed in the hope that it will be useful, but
#    WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
#    Public License for more details.
#    
#    In addition, as a special exception, the copyright holders give
#    permission to link the code of portions of this program with the OpenSSL
#    library under certain conditions as described in each individual source
#    file, and distribute linked combinations including the two.
#    
#    You must obey the GNU General Public License in all respects for all of
#    the code used other than OpenSSL. If you modify file(s) with this
#    exception, you may extend this exception to your version of the file(s),
#    but you are not obligated to do so. If you do not wish to do so, delete
#    this exception statement from your version.
#    
#    You should have received a copy of the GNU General Public License
#    along with Utopia Documents. If not, see <http://www.gnu.org/licenses/>
#   
###############################################################################

import datetime
import kend.client
import kend.model
import utopia.library
from lxml import etree


class KendRemoteQuery(utopia.library.RemoteQuery):
    """Search the kend server for articles matching the search criteria"""

    def fetch(self, query, offset, limit):
        # Get the date range to search in
        last_searched = self.get_property('last_searched', '1970/01/01')
        today = datetime.date.today().strftime('%Y/%m/%d')

        # Get previous search metadata
        next_search_uri = self.get_property('next_search_uri')
        count = int(self.get_property('count', 0))
        offset = int(self.get_property('offset', 0))
        limit = int(self.get_property('limit', 100))
        last_id = self.get_property('last_id')

        # Unpack the search term(s)
        term = query.get('query')
        if term is not None:
            # Remove non-hashtags
            term = ' '.join((t for t in term.split() if t.startswith('#')))

            # List to hold the results
            results = []

            # If no previous search exists, search now
            if next_search_uri is None:
                results = []
                documents = kend.client.Client().searchDocuments(**{
                    'q': term,
                    'offset': unicode(offset),
                    'limit': unicode(limit),
                })
                for group in documents:
                    for k, v in group.output:
                        if k == 'next':
                            next_search_uri = v
                        elif k == 'count':
                            count = int(v)
                        elif k == 'offset':
                            offset = int(v)
                        elif k == 'limit':
                            limit = int(v)
                    for document in group.documents:
                        info = {}
                        metadata = {}
                        weights = {'crossref': 20, 'arxiv': 19, 'pubmed': 10, 'pmc': 9, 'document': 0}
                        for e in document.metadata:
                            metadata.setdefault(e.type, [])
                            metadata[e.type].append((e.data, weights.get(e.src, 5)))
                        for k, vs in metadata.iteritems():
                            vs.sort(key=lambda e: e[1])
                            v = vs[-1][0]
                            if k in ('doi', 'pmid', 'pmcid', 'arxivid', 'pii'):
                                info.setdefault('identifiers', {})
                                info['identifiers'][k] = v
                            else:
                                info[k] = v
                        print info
                        if 'authors' in info:
                            info['authors'] = info['authors'].split('; ')
                        print info
                        results.append(info)

                # Store history information
                self.set_property('next_search_uri', next_search_uri)
                self.set_property('count', count)
                self.set_property('offset', offset)
                self.set_property('limit', limit)

                # Return results
                return offset, limit, count, results