File: sdss_corrected_spectra.py

package info (click to toggle)
astroml 1.0.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 932 kB
  • sloc: python: 5,731; makefile: 3
file content (105 lines) | stat: -rw-r--r-- 2,718 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import numpy as np
from . import get_data_home
from .tools import download_with_progress_bar

DATA_URL = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
            "spec4000.npz")


def reconstruct_spectra(data):
    """Compute the reconstructed spectra.

    Parameters
    ----------
    data: NpzFile
        numpy data object returned by fetch_sdss_corrected_spectra.

    Returns
    -------
    spec_recons: ndarray
        Reconstructed spectra, using principal components to interpolate
        across the masked region.
    """
    spectra = data['spectra']
    coeffs = data['coeffs']
    evecs = data['evecs']
    mask = data['mask']
    mu = data['mu']
    norms = data['norms']

    spec_recons = spectra.copy()

    nev = coeffs.shape[1]

    spec_fill = mu + np.dot(coeffs, evecs[:nev])
    spec_fill *= norms[:, np.newaxis]

    spec_recons[mask] = spec_fill[mask]

    return spec_recons


def compute_wavelengths(data):
    """Compute the wavelength associated with spectra.

    Paramters
    ---------

    Parameters
    ----------
    data: NpzFile
        numpy data object returned by fetch_sdss_corrected_spectra.

    Returns
    -------
    wavelength: ndarray
        One-dimensional wavelength array for spectra.
    """

    return 10 ** (data['coeff0']
                  + data['coeff1'] * np.arange(data['spectra'].shape[1]))


def fetch_sdss_corrected_spectra(data_home=None,
                                 download_if_missing=True):
    """Loader for Iterative PCA pre-processed galaxy spectra

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all astroML data is stored in '~/astroML_data'.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : NpzFile
        The data dictionary

    Notes
    -----
    This is the file created by the example script
    examples/datasets/compute_sdss_pca.py
    """
    data_home = get_data_home(data_home)

    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        print("downloading PCA-processed SDSS spectra from %s to %s"
              % (DATA_URL, data_home))

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    data = np.load(data_file)

    return data