1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
import os
import numpy as np
from astropy.cosmology import FlatLambdaCDM
from astropy.table import Table, vstack
from . import get_data_home
# We store the data in two parts to comply with GitHub 100Mb file size limit
DATA_URL1 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
"SDSSspecgalsDR8_1.fit.gz")
DATA_URL2 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
"SDSSspecgalsDR8_2.fit.gz")
def fetch_sdss_specgals(data_home=None, download_if_missing=True):
"""Loader for SDSS Galaxies with spectral information
Parameters
----------
data_home : optional, default=None
Specify another download and cache folder for the datasets. By default
all astroML data is stored in '~/astroML_data'.
download_if_missing : optional, default=True
If False, raise a IOError if the data is not locally available
instead of trying to download the data from the source site.
Returns
-------
data : recarray, shape = (661598,)
record array containing pipeline parameters
Notes
-----
These were compiled from the SDSS database using the following SQL query::
SELECT
G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers
--- basic spectral data
S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr,
--- some useful imaging parameters
G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r,
G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g,
G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i,
G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r,
--- line fluxes for BPT diagram and other derived spec. parameters
GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux,
GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err,
GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux,
GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass,
GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID
INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY
dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G,
GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE
WHERE N.objID = G.objID
AND GSI.specObjID = S.specObjID
AND GSL.specObjID = S.specObjID
AND GSX.specObjID = S.specObjID
AND GSE.specObjID = S.specObjID
--- add some quality cuts to get rid of obviously bad measurements
AND (G.petroMag_r > 10 AND G.petroMag_r < 18)
AND (G.modelMag_u-G.modelMag_r) > 0
AND (G.modelMag_u-G.modelMag_r) < 6
AND (modelMag_u > 10 AND modelMag_u < 25)
AND (modelMag_g > 10 AND modelMag_g < 25)
AND (modelMag_r > 10 AND modelMag_r < 25)
AND (modelMag_i > 10 AND modelMag_i < 25)
AND (modelMag_z > 10 AND modelMag_z < 25)
AND S.rChi2 < 2
AND (S.zErr > 0 AND S.zErr < 0.01)
AND S.z > 0.02
--- end of query ---
Examples
--------
>>> from astroML.datasets import fetch_sdss_specgals
>>> data = fetch_sdss_specgals() # doctest: +IGNORE_OUTPUT +REMOTE_DATA
>>> # number of objects in dataset
>>> data.shape # doctest: +REMOTE_DATA
(661598,)
>>> # first five column names
>>> data.dtype.names[:5] # doctest: +REMOTE_DATA
('ra', 'dec', 'mjd', 'plate', 'fiberID')
>>> # first three RA values
>>> print(data['ra'][:3]) # doctest: +REMOTE_DATA
[ 146.71419105 146.74414186 146.62857334]
>>> # first three declination values
>>> print(data['dec'][:3]) # doctest: +REMOTE_DATA
[-1.04127639 -0.6522198 -0.7651468 ]
"""
data_home = get_data_home(data_home)
archive_file1 = os.path.join(data_home, os.path.basename(DATA_URL1))
archive_file2 = os.path.join(data_home, os.path.basename(DATA_URL2))
if not (os.path.exists(archive_file1) and os.path.exists(archive_file2)):
if not download_if_missing:
raise IOError('data not present on disk. '
'set download_if_missing=True to download')
for url, name in zip([DATA_URL1, DATA_URL2],
[archive_file1, archive_file2]):
data = Table.read(url)
data.write(name)
data1 = Table.read(archive_file1)
data2 = Table.read(archive_file2)
data = vstack([data1, data2])
return np.asarray(data)
def fetch_great_wall(data_home=None, download_if_missing=True,
xlim=(-375, -175), ylim=(-300, 200), cosmo=None):
"""Get the 2D SDSS "Great Wall" distribution, following Cowan et al 2008
Parameters
----------
data_home : optional, default=None
Specify another download and cache folder for the datasets. By default
all astroML data is stored in '~/astroML_data'.
download_if_missing : optional, default=True
If False, raise a IOError if the data is not locally available
instead of trying to download the data from the source site.
xlim, ylim : tuples or None
the limits in Mpc of the data: default values are the same as that
used for the plots in Cowan 2008. If set to None, no cuts will
be performed.
cosmo : `astropy.cosmology` instance specifying cosmology
to use when generating the sample. If not provided,
a Flat Lambda CDM model with H0=73.2, Om0=0.27, Tcmb0=0 is used.
Returns
-------
data : ndarray, shape = (Ngals, 2)
grid of projected (x, y) locations of galaxies in Mpc
"""
# local imports so we don't need dependencies for loading module
from scipy.interpolate import interp1d
# We need some cosmological information to compute the r-band
# absolute magnitudes.
if cosmo is None:
cosmo = FlatLambdaCDM(H0=73.2, Om0=0.27, Tcmb0=0)
data = fetch_sdss_specgals(data_home, download_if_missing)
# cut to the part of the sky with the "great wall"
data = data[(data['dec'] > -7) & (data['dec'] < 7)]
data = data[(data['ra'] > 80) & (data['ra'] < 280)]
# do a redshift cut, following Cowan et al 2008
z = data['z']
data = data[(z > 0.01) & (z < 0.12)]
# first sample the distance modulus on a grid
zgrid = np.linspace(min(data['z']), max(data['z']), 100)
mugrid = cosmo.distmod(zgrid).value
f = interp1d(zgrid, mugrid)
mu = f(data['z'])
# do an absolute magnitude cut at -20
Mr = data['petroMag_r'] + data['extinction_r'] - mu
data = data[Mr < -21]
# compute distances in the equatorial plane
# first sample comoving distance
Dcgrid = cosmo.comoving_distance(zgrid).value
f = interp1d(zgrid, Dcgrid)
dist = f(data['z'])
locs = np.vstack([dist * np.cos(data['ra'] * np.pi / 180.),
dist * np.sin(data['ra'] * np.pi / 180.)]).T
# cut on x and y limits if specified
if xlim is not None:
locs = locs[(locs[:, 0] > xlim[0]) & (locs[:, 0] < xlim[1])]
if ylim is not None:
locs = locs[(locs[:, 1] > ylim[0]) & (locs[:, 1] < ylim[1])]
return locs
|