1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
"""
Example of downloading and processing SDSS spectra
--------------------------------------------------
This is the code used to create the files fetched by the routine
:func:`fetch_sdss_corrected_spectra`. Be aware that this routine
downloads a large amount of data (~700MB for 4000 spectra) and takes
a long time to run (~30 minutes for 4000 spectra).
"""
# Author: Jake VanderPlas <vanderplas@astro.washington.edu>
# License: BSD
# The figure is an example from astroML: see http://astroML.github.com
import sys
from urllib.error import HTTPError
import numpy as np
from astroML.datasets import fetch_sdss_spectrum
from astroML.datasets.tools import query_plate_mjd_fiber, TARGET_GALAXY
from astroML.dimensionality import iterative_pca
def fetch_and_shift_spectra(n_spectra,
outfile,
primtarget=TARGET_GALAXY,
zlim=(0, 0.7),
loglam_start=3.5,
loglam_end=3.9,
Nlam=1000):
"""
This function queries CAS for matching spectra, and then downloads
them and shifts them to a common redshift binning
"""
# First query for the list of spectra to download
plate, mjd, fiber = query_plate_mjd_fiber(n_spectra, primtarget,
zlim[0], zlim[1])
# Set up arrays to hold information gathered from the spectra
spec_cln = np.zeros(n_spectra, dtype=np.int32)
lineindex_cln = np.zeros(n_spectra, dtype=np.int32)
log_NII_Ha = np.zeros(n_spectra, dtype=np.float32)
log_OIII_Hb = np.zeros(n_spectra, dtype=np.float32)
z = np.zeros(n_spectra, dtype=np.float32)
zerr = np.zeros(n_spectra, dtype=np.float32)
spectra = np.zeros((n_spectra, Nlam), dtype=np.float32)
mask = np.zeros((n_spectra, Nlam), dtype=bool)
# Calculate new wavelength coefficients
new_coeff0 = loglam_start
new_coeff1 = (loglam_end - loglam_start) / Nlam
# Now download all the needed spectra, and resample to a common
# wavelength bin.
n_spectra = len(plate)
num_skipped = 0
i = 0
while i < n_spectra:
sys.stdout.write(' %i / %i spectra\r' % (i + 1, n_spectra))
sys.stdout.flush()
try:
spec = fetch_sdss_spectrum(plate[i], mjd[i], fiber[i])
except HTTPError:
num_skipped += 1
print("%i, %i, %i not found" % (plate[i], mjd[i], fiber[i]))
i += 1
continue
spec_rebin = spec.restframe().rebin(new_coeff0, new_coeff1, Nlam)
if np.all(spec_rebin.spectrum == 0):
num_skipped += 1
print("%i, %i, %i is all zero" % (plate[i], mjd[i], fiber[i]))
i += 1
continue
spec_cln[i] = spec.spec_cln
lineindex_cln[i], (log_NII_Ha[i], log_OIII_Hb[i])\
= spec.lineratio_index()
z[i] = spec.z
zerr[i] = spec.zerr
spectra[i] = spec_rebin.spectrum
mask[i] = spec_rebin.compute_mask(0.5, 5)
i += 1
sys.stdout.write('\n')
N = i
print(" %i spectra skipped" % num_skipped)
print(" %i spectra processed" % N)
print("saving to %s" % outfile)
np.savez(outfile,
spectra=spectra[:N],
mask=mask[:N],
coeff0=new_coeff0,
coeff1=new_coeff1,
spec_cln=spec_cln[:N],
lineindex_cln=lineindex_cln[:N],
log_NII_Ha=log_NII_Ha[:N],
log_OIII_Hb=log_OIII_Hb[:N],
z=z[:N],
zerr=zerr[:N])
def spec_iterative_pca(outfile, n_ev=10, n_iter=20, norm='L2'):
"""
This function takes the file outputted above, performs an iterative
PCA to fill in the gaps, and appends the results to the same file.
"""
data_in = np.load(outfile)
spectra = data_in['spectra']
mask = data_in['mask']
res = iterative_pca(spectra, mask,
n_ev=n_ev, n_iter=n_iter, norm=norm,
full_output=True)
input_dict = {key: data_in[key] for key in data_in.files}
# don't save the reconstructed spectrum: this can easily
# be recomputed from the other parameters.
input_dict['mu'] = res[1]
input_dict['evecs'] = res[2]
input_dict['evals'] = res[3]
input_dict['norms'] = res[4]
input_dict['coeffs'] = res[5]
np.savez(outfile, **input_dict)
if __name__ == '__main__':
fetch_and_shift_spectra(4000, 'spec4000.npz')
spec_iterative_pca('spec4000.npz')
|