File: sdss_specgals.py

package info (click to toggle)
astroml 1.0.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 932 kB
  • sloc: python: 5,731; makefile: 3
file content (186 lines) | stat: -rw-r--r-- 7,221 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from astropy.table import Table, vstack

from . import get_data_home

# We store the data in two parts to comply with GitHub 100Mb file size limit
DATA_URL1 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
             "SDSSspecgalsDR8_1.fit.gz")
DATA_URL2 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
             "SDSSspecgalsDR8_2.fit.gz")


def fetch_sdss_specgals(data_home=None, download_if_missing=True):
    """Loader for SDSS Galaxies with spectral information

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all astroML data is stored in '~/astroML_data'.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : recarray, shape = (661598,)
        record array containing pipeline parameters

    Notes
    -----
    These were compiled from the SDSS database using the following SQL query::

        SELECT
          G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers
          --- basic spectral data
          S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr,
          --- some useful imaging parameters
          G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r,
          G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g,
          G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i,
          G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r,
          --- line fluxes for BPT diagram and other derived spec. parameters
          GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux,
          GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err,
          GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux,
          GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass,
          GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID
        INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY
          dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G,
          GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE
        WHERE N.objID = G.objID
          AND GSI.specObjID = S.specObjID
          AND GSL.specObjID = S.specObjID
          AND GSX.specObjID = S.specObjID
          AND GSE.specObjID = S.specObjID
          --- add some quality cuts to get rid of obviously bad measurements
          AND (G.petroMag_r > 10 AND G.petroMag_r < 18)
          AND (G.modelMag_u-G.modelMag_r) > 0
          AND (G.modelMag_u-G.modelMag_r) < 6
          AND (modelMag_u > 10 AND modelMag_u < 25)
          AND (modelMag_g > 10 AND modelMag_g < 25)
          AND (modelMag_r > 10 AND modelMag_r < 25)
          AND (modelMag_i > 10 AND modelMag_i < 25)
          AND (modelMag_z > 10 AND modelMag_z < 25)
          AND S.rChi2 < 2
          AND (S.zErr > 0 AND S.zErr < 0.01)
          AND S.z > 0.02
          --- end of query ---

    Examples
    --------
    >>> from astroML.datasets import fetch_sdss_specgals
    >>> data = fetch_sdss_specgals()  # doctest: +IGNORE_OUTPUT +REMOTE_DATA
    >>> # number of objects in dataset
    >>> data.shape  # doctest: +REMOTE_DATA
    (661598,)
    >>> # first five column names
    >>> data.dtype.names[:5]  # doctest: +REMOTE_DATA
    ('ra', 'dec', 'mjd', 'plate', 'fiberID')
    >>> # first three RA values
    >>> print(data['ra'][:3])  # doctest: +REMOTE_DATA
    [ 146.71419105  146.74414186  146.62857334]
    >>> # first three declination values
    >>> print(data['dec'][:3])  # doctest: +REMOTE_DATA
    [-1.04127639 -0.6522198  -0.7651468 ]
    """

    data_home = get_data_home(data_home)

    archive_file1 = os.path.join(data_home, os.path.basename(DATA_URL1))
    archive_file2 = os.path.join(data_home, os.path.basename(DATA_URL2))

    if not (os.path.exists(archive_file1) and os.path.exists(archive_file2)):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        for url, name in zip([DATA_URL1, DATA_URL2],
                             [archive_file1, archive_file2]):
            data = Table.read(url)
            data.write(name)

    data1 = Table.read(archive_file1)
    data2 = Table.read(archive_file2)

    data = vstack([data1, data2])
    return np.asarray(data)


def fetch_great_wall(data_home=None, download_if_missing=True,
                     xlim=(-375, -175), ylim=(-300, 200), cosmo=None):
    """Get the 2D SDSS "Great Wall" distribution, following Cowan et al 2008

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all astroML data is stored in '~/astroML_data'.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    xlim, ylim : tuples or None
        the limits in Mpc of the data: default values are the same as that
        used for the plots in Cowan 2008.  If set to None, no cuts will
        be performed.

    cosmo : `astropy.cosmology` instance specifying cosmology
        to use when generating the sample.  If not provided,
        a Flat Lambda CDM model with H0=73.2, Om0=0.27, Tcmb0=0 is used.

    Returns
    -------
    data : ndarray, shape = (Ngals, 2)
        grid of projected (x, y) locations of galaxies in Mpc
    """
    # local imports so we don't need dependencies for loading module
    from scipy.interpolate import interp1d

    # We need some cosmological information to compute the r-band
    #  absolute magnitudes.
    if cosmo is None:
        cosmo = FlatLambdaCDM(H0=73.2, Om0=0.27, Tcmb0=0)

    data = fetch_sdss_specgals(data_home, download_if_missing)

    # cut to the part of the sky with the "great wall"
    data = data[(data['dec'] > -7) & (data['dec'] < 7)]
    data = data[(data['ra'] > 80) & (data['ra'] < 280)]

    # do a redshift cut, following Cowan et al 2008
    z = data['z']
    data = data[(z > 0.01) & (z < 0.12)]

    # first sample the distance modulus on a grid
    zgrid = np.linspace(min(data['z']), max(data['z']), 100)
    mugrid = cosmo.distmod(zgrid).value
    f = interp1d(zgrid, mugrid)
    mu = f(data['z'])

    # do an absolute magnitude cut at -20
    Mr = data['petroMag_r'] + data['extinction_r'] - mu
    data = data[Mr < -21]

    # compute distances in the equatorial plane
    # first sample comoving distance
    Dcgrid = cosmo.comoving_distance(zgrid).value
    f = interp1d(zgrid, Dcgrid)
    dist = f(data['z'])

    locs = np.vstack([dist * np.cos(data['ra'] * np.pi / 180.),
                      dist * np.sin(data['ra'] * np.pi / 180.)]).T

    # cut on x and y limits if specified
    if xlim is not None:
        locs = locs[(locs[:, 0] > xlim[0]) & (locs[:, 0] < xlim[1])]
    if ylim is not None:
        locs = locs[(locs[:, 1] > ylim[0]) & (locs[:, 1] < ylim[1])]

    return locs