1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
|
"""
A caching proxy for CDS' Simbad object resolver.
"""
#c Copyright 2008-2020, the GAVO project
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import json
import os
import socket
from urllib import request, parse
if __name__=="__main__":
# see below on why this doesn't have normal unit tests.
os.environ["GAVO_OOTTEST"] = "dontcare"
from gavo.helpers import testhelpers
from gavo import base
from gavo.utils import ElementTree
class ObjectCache(object):
"""a cache for simbad queries kept in dc.metastore.
This used to be file-based, and used different keys for different
purposes. The different keys didn't seem to be useful, so they're
ignored now.
This only caches positive responses; there's too much that can
go wrong when caching negatives, and the expectation is that negatives
are so varying that there's little to win anyway.
The values passed in are json-encoded (for simbad, these are dictionaries).
"""
def addItem(self, key, value):
"""adds an item to the cache.
value is json-encoded before writing it.
"""
with base.getWritableAdminConn() as conn:
base.setDBMeta(conn,
'simbad:'+key,
json.dumps(value))
def getItem(self, key):
"""returns a previously stored object of key.
This raises a KeyError if nothing has been stored before.
"""
return json.loads(base.getDBMeta('simbad:'+key))
class Sesame(object):
"""is a simple interface to the simbad name resolver.
"""
# we're using several simbad mirrors if we have to, and only give
# up if all of them fail.
svc_urls = [
"http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-ox/SN?",
"http://vizier.cfa.harvard.edu/viz-bin/nph-sesame/-ox/SN?"]
def __init__(self):
self.cache = ObjectCache()
def _parseXML(self, simbadXML):
try:
et = ElementTree.fromstring(simbadXML)
except Exception as msg: # simbad returned weird XML
base.ui.notifyWarning("Bad XML from simbad (%s)"%str(msg))
return None
res = {}
nameMatch = et.find("Target/name")
if nameMatch is None:
# no such object, return a negative
return None
res["oname"] = nameMatch.text
firstResponse = et.find("Target/Resolver")
if not firstResponse:
return None
res["otype"] = getattr(firstResponse.find("otype"), "text", None)
try:
res["RA"] = float(firstResponse.find("jradeg").text)
res["dec"] = float(firstResponse.find("jdedeg").text)
except (ValueError, AttributeError):
# presumably null position
return None
return res
def query(self, ident):
try:
return self.cache.getItem(ident)
except KeyError:
# cache miss, fall through to actually querying sesame
pass
for svc_url in self.svc_urls:
try:
with request.urlopen(svc_url+parse.quote(ident)) as f:
newOb = self._parseXML(f.read())
self.cache.addItem(ident, newOb)
return newOb
except socket.error:
# Try next mirror
pass
else:
# all mirrors fail
raise base.ui.logOldExc(base.ValidationError(
"Simbad is offline, cannot query.",
"hscs_pos", # really, this should be added by the widget
hint="If this problem persists, complain to us rather than simbad."))
def getPositionFor(self, identifier):
rec = self.query(identifier)
if not rec:
raise KeyError(identifier)
return float(rec["RA"]), float(rec["dec"])
def getSimbadPositions(identifier):
"""returns ra and dec from Simbad for identifier.
It raises a KeyError if Simbad doesn't know identifier.
"""
return base.caches.getSesame("").getPositionFor(identifier)
# This used to accept a "key" to separate different uses of Sesame.
# That's not turned out to be useful, so we're now ignoring the
# key.
base.caches.makeCache("getSesame", lambda key="ignored": Sesame())
############## ADQL ufunc
from gavo import adql
@adql.userFunction("gavo_simbadpoint",
"(identifier TEXT) -> POINT",
"""
gavo_simbadpoint queries simbad for an identifier and returns the
corresponding point. Note that identifier can only be a literal,
i.e., as simple string rather than a column name. This is because
our database cannot query simbad, and we probably wouldn't want
to fire off millions of simbad queries anyway; use simbad's own
TAP service for this kind of applications.
""",
"point", ucd="pos.eq;src")
def _simbadpoint(args):
from gavo.adql import nodes
if len(args)!=1 or args[0].type!="characterStringLiteral":
raise adql.UfuncError(
"gavo_simbadpoint takes exactly one string literal as argument")
object = args[0].value
resolver = base.caches.getSesame()
try:
alpha, delta = resolver.getPositionFor(object)
except KeyError:
raise adql.UfuncError("No simbad position for '%s'"%object)
raise nodes.ReplaceNode(nodes.Point(cooSys=None,
x=nodes.Factor([repr(alpha)]), y=nodes.Factor([repr(delta)])))
def _getTestSuite():
import unittest
with base.getWritableAdminConn() as conn:
conn.execute("DELETE FROM dc.metastore WHERE key LIKE 'simbad:%%'")
sc = base.caches.getSesame("anything")
# NOTE: all these tests assume the cache has been cleared before
# them, and that the configured mirrors are up.
# Cache clearing happens a few lines up.
class QueryTest(testhelpers.VerboseTest):
def testBasic(self):
res = getSimbadPositions("Antares")
self.assertAlmostEqual(res[0], 247.351915, 5)
self.assertAlmostEqual(res[1], -26.432002, 5)
def testCaching(self):
res = getSimbadPositions("M31")
self.assertAlmostEqual(res[0], 10.684708, 5)
self.assertAlmostEqual(res[1], 41.26875, 5)
tmp = Sesame.svc_urls
Sesame.svcs_urls = []
try:
res = getSimbadPositions("M31")
self.assertAlmostEqual(res[0], 10.684708, 5)
self.assertAlmostEqual(res[1], 41.26875, 5)
finally:
Sesame.svcs_urls = tmp
def testMirrorFailover(self):
tmp = Sesame.svc_urls[0]
Sesame.svc_urls[0] = "http://localhost:39293?"
try:
res = getSimbadPositions("epsilon Eri")
self.assertAlmostEqual(res[0], 53.232687, 5)
self.assertAlmostEqual(res[1], -9.458258, 5)
finally:
Sesame.svc_urls[0] = tmp
def testCacheInstallation(self):
res = base.caches.getSesame("anything").getPositionFor("ε Eri")
self.assertAlmostEqual(res[0], 53.232687, 5)
self.assertAlmostEqual(res[1], -9.458258, 5)
l = locals()
tests = [l[name] for name in l
if isinstance(l[name], type) and issubclass(l[name], unittest.TestCase)]
loader = unittest.TestLoader()
suite = unittest.TestSuite([loader.loadTestsFromTestCase(t)
for t in tests])
return suite
if __name__=="__main__":
# we don't want to test this as part of the normal unit tests, as
# there's little to sensibly test without a live network connection
# (and we don't want to require that for the unit tests).
import unittest
suite = _getTestSuite()
unittest.TextTestRunner().run(suite)
|