File: remote.py

package info (click to toggle)
openstructure 2.11.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 206,240 kB
  • sloc: cpp: 188,571; python: 36,686; ansic: 34,298; fortran: 3,275; sh: 312; xml: 146; makefile: 29
file content (148 lines) | stat: -rw-r--r-- 5,766 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#------------------------------------------------------------------------------
# This file is part of the OpenStructure project <www.openstructure.org>
#
# Copyright (C) 2008-2020 by the OpenStructure authors
#
# This library is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3.0 of the License, or (at your option)
# any later version.
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#------------------------------------------------------------------------------

import urllib.request, urllib.error, urllib.parse
import tempfile

from ost.io import LoadPDB, LoadMMCIF

class RemoteRepository:
  """
  A remote repository represents a structural database accessible through the 
  internet, e.g. the PDB or SWISS-MODEL template library.

  :param name:        Name of the repository
  :param url_pattern: URL pattern for repository. Required format is described
                      in :func:`URLForID`
  :param type:        Data format to expect at resolved URL must be in 
                      ('pdb', 'cif')
  :param id_transform: Transformation to apply to ID before resolving URL
                       in :func:`URLForID`. Must be in ('lower', 'upper')

  :type name:         :class:`str`
  :type url_pattern:  :class:`str`
  :type type:         :class:`str`
  :type id_transform: :class:`str`
  """
  def __init__(self, name, url_pattern, type, id_transform='upper'):
    self.name = name
    self.url_pattern = url_pattern
    self.type = type
    if type not in ('cif', 'pdb'):
      raise ValueError('only cif and pdb types are supported')
    self.id_transform = id_transform

  def URLForID(self, id):
    """
    Resolves URL given *url_pattern* and *id_transform* provided at object 
    initialization.
    The *url_pattern* must contain substring '$ID'. Given *id*, the URL to 
    the structure gets constructed by applying *id_transform* and inserting it
    at the location of '$ID'. e.g. 'https://files.rcsb.org/view/$ID.pdb' given 
    1ake as *id* and 'upper' as *id_transform* resolves to:
    'https://files.rcsb.org/view/1AKE.pdb'
    """
    if self.id_transform == 'upper':
      id = id.upper()
    if self.id_transform == 'lower':
      id = id.lower()
    return self.url_pattern.replace('$ID', id)

  def Get(self, id):
    """
    Resolves URL with :func:`URLForID`, dumps the content in a temporary file 
    and returns its path.
    
    :param id:          ID to resolve
    :type id:           :class:`str`
    """
    remote_url = self.URLForID(id)
    tmp_file_suffix = '.%s' % self.type
    if remote_url.endswith('.gz'):
      tmp_file_suffix+='.gz'

    try:
      connection = urllib.request.urlopen(remote_url)
      if hasattr(connection, 'code'):
        status = connection.code
      else:
        status = connection.getcode()
    except urllib.error.HTTPError as e:
      status = e.code
    if status != 200:
      raise IOError('Could not load %s from %s (status code %d, url %s)' \
                    % (id, self.name, status, remote_url))
    tmp_file = tempfile.NamedTemporaryFile(suffix=tmp_file_suffix)
    tmp_file.write(connection.read())
    tmp_file.flush()
    return tmp_file

  def Load(self, id):
    """
    Resolves URL with :func:`URLForID` and directly loads/returns the according 
    :class:`ost.mol.EntityHandle`. Loading invokes the 
    :func:`ost.io.LoadPDB`/:func:`ost.io.LoadMMCIF` with default parameterization. If you need
    custom settings, you might want to consider to call :func:`Get` and do the
    loading manually.
    
    :param id:          ID to resolve
    :type id:           :class:`str`
    """
    tmp_file = self.Get(id)
    if self.type == 'pdb':
      return LoadPDB(tmp_file.name)
    if self.type == 'cif':
      return LoadMMCIF(tmp_file.name)

REMOTE_REPOSITORIES = {
    'pdb' : RemoteRepository('rcsb.org (PDB)', 'https://files.rcsb.org/download/$ID.pdb.gz',
                   type='pdb', id_transform='upper'),
    'smtl' : RemoteRepository('SMTL', 'https://swissmodel.expasy.org/templates/$ID.pdb',
                   type='pdb', id_transform='lower'),
    'cif' : RemoteRepository('rcsb.org (mmCIF)', 'https://files.rcsb.org/download/$ID.cif.gz',
                   type='cif', id_transform='lower'),
    'pdb_redo' : RemoteRepository('pdbredo', 'https://pdb-redo.eu/db/$ID/$ID_besttls.pdb.gz',
                   type='pdb', id_transform='lower'),
}

def RemoteGet(id, from_repo='pdb'):
  """
  Invokes :func:`RemoteRepository.Get` on predefined repositories 
  ('pdb', 'smtl', 'cif', 'pdb_redo')

  :param from_repo:    One of the predefined repositories
  :type from_repo:     :class:`str`
  """
  remote_repo = REMOTE_REPOSITORIES.get(from_repo, None) 
  if not remote_repo:
    raise ValueError('%s is not a valid repository' % from_repo)
  return remote_repo.Get(id)

def RemoteLoad(id, from_repo='pdb'):
  """
  Invokes :func:`RemoteRepository.Load` on predefined repositories 
  ('pdb', 'smtl', 'cif', 'pdb_redo')

  :param from_repo:    One of the predefined repositories
  :type from_repo:     :class:`str`
  """
  remote_repo = REMOTE_REPOSITORIES.get(from_repo, None) 
  if not remote_repo:
    raise ValueError('%s is not a valid repository' % from_repo)
  return remote_repo.Load(id)