# Author: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
#         Daniel Strohmeier <daniel.strohmeier@gmail.com>
#
# License: Simplified BSD

import numpy as np
from scipy import linalg, signal

from ..source_estimate import (SourceEstimate, VolSourceEstimate,
                               _BaseSourceEstimate)
from ..minimum_norm.inverse import (combine_xyz, _prepare_forward,
                                    _check_reference, _check_loose_forward)
from ..forward import (compute_orient_prior, is_fixed_orient,
                       convert_forward_solution)
from ..io.pick import pick_channels_evoked
from ..io.proj import deactivate_proj
from ..utils import logger, verbose
from ..dipole import Dipole
from ..externals.six.moves import xrange as range

from .mxne_optim import (mixed_norm_solver, iterative_mixed_norm_solver, _Phi,
                         norm_l2inf, tf_mixed_norm_solver, norm_epsilon_inf)


@verbose
def _prepare_weights(forward, gain, source_weighting, weights, weights_min):
    mask = None
    if isinstance(weights, _BaseSourceEstimate):
        weights = np.max(np.abs(weights.data), axis=1)
    weights_max = np.max(weights)
    if weights_min > weights_max:
        raise ValueError('weights_min > weights_max (%s > %s)' %
                         (weights_min, weights_max))
    weights_min = weights_min / weights_max
    weights = weights / weights_max
    n_dip_per_pos = 1 if is_fixed_orient(forward) else 3
    weights = np.ravel(np.tile(weights, [n_dip_per_pos, 1]).T)
    if len(weights) != gain.shape[1]:
        raise ValueError('weights do not have the correct dimension '
                         ' (%d != %d)' % (len(weights), gain.shape[1]))
    if len(source_weighting.shape) == 1:
        source_weighting *= weights
    else:
        source_weighting *= weights[:, None]
    gain *= weights[None, :]

    if weights_min is not None:
        mask = (weights > weights_min)
        gain = gain[:, mask]
        n_sources = np.sum(mask) // n_dip_per_pos
        logger.info("Reducing source space to %d sources" % n_sources)

    return gain, source_weighting, mask


@verbose
def _prepare_gain_column(forward, info, noise_cov, pca, depth, loose, weights,
                         weights_min, verbose=None):
    gain_info, gain, _, whitener, _ = _prepare_forward(forward, info,
                                                       noise_cov, pca)

    logger.info('Whitening lead field matrix.')
    gain = np.dot(whitener, gain)
    is_fixed_ori = is_fixed_orient(forward)

    if depth is not None:
        depth_prior = np.sum(gain ** 2, axis=0)
        if not is_fixed_ori:
            depth_prior = depth_prior.reshape(-1, 3).sum(axis=1)
        # Spherical leadfield can be zero at the center
        depth_prior[depth_prior == 0.] = np.min(
            depth_prior[depth_prior != 0.])
        depth_prior **= depth
        if not is_fixed_ori:
            depth_prior = np.repeat(depth_prior, 3)
        source_weighting = np.sqrt(1. / depth_prior)
    else:
        source_weighting = np.ones(gain.shape[1], dtype=gain.dtype)

    assert (is_fixed_ori or (0 <= loose <= 1))
    if loose is not None and loose < 1.:
        source_weighting *= np.sqrt(compute_orient_prior(forward, loose))

    gain *= source_weighting[None, :]

    if weights is None:
        mask = None
    else:
        gain, source_weighting, mask = _prepare_weights(forward, gain,
                                                        source_weighting,
                                                        weights, weights_min)

    return gain, gain_info, whitener, source_weighting, mask


def _prepare_gain(forward, info, noise_cov, pca, depth, loose, weights,
                  weights_min, verbose=None):
    if not isinstance(depth, float):
        raise ValueError('Invalid depth parameter. '
                         'A float is required (got %s).'
                         % type(depth))
    elif depth < 0.0:
        raise ValueError('Depth parameter must be positive (got %s).'
                         % depth)

    gain, gain_info, whitener, source_weighting, mask = \
        _prepare_gain_column(forward, info, noise_cov, pca, depth,
                             loose, weights, weights_min)

    return gain, gain_info, whitener, source_weighting, mask


def _reapply_source_weighting(X, source_weighting, active_set):
    X *= source_weighting[active_set][:, None]
    return X


def _compute_residual(forward, evoked, X, active_set, info):
    # OK, picking based on row_names is safe
    sel = [forward['sol']['row_names'].index(c) for c in info['ch_names']]
    residual = evoked.copy()
    residual = pick_channels_evoked(residual, include=info['ch_names'])
    r_tmp = residual.copy()

    r_tmp.data = np.dot(forward['sol']['data'][sel, :][:, active_set], X)

    # Take care of proj
    active_projs = list()
    non_active_projs = list()
    for p in evoked.info['projs']:
        if p['active']:
            active_projs.append(p)
        else:
            non_active_projs.append(p)

    if len(active_projs) > 0:
        r_tmp.info['projs'] = deactivate_proj(active_projs, copy=True)
        r_tmp.apply_proj()
        r_tmp.add_proj(non_active_projs, remove_existing=False)

    residual.data -= r_tmp.data

    return residual


@verbose
def _make_sparse_stc(X, active_set, forward, tmin, tstep,
                     active_is_idx=False, verbose=None):
    if not is_fixed_orient(forward):
        logger.info('combining the current components...')
        X = combine_xyz(X)

    if not active_is_idx:
        active_idx = np.where(active_set)[0]
    else:
        active_idx = active_set

    n_dip_per_pos = 1 if is_fixed_orient(forward) else 3
    if n_dip_per_pos > 1:
        active_idx = np.unique(active_idx // n_dip_per_pos)

    src = forward['src']

    if src.kind != 'surface':
        vertices = src[0]['vertno'][active_idx]
        stc = VolSourceEstimate(X, vertices=vertices, tmin=tmin, tstep=tstep)
    else:
        vertices = []
        n_points_so_far = 0
        for this_src in src:
            this_n_points_so_far = n_points_so_far + len(this_src['vertno'])
            this_active_idx = active_idx[(n_points_so_far <= active_idx) &
                                         (active_idx < this_n_points_so_far)]
            this_active_idx -= n_points_so_far
            this_vertno = this_src['vertno'][this_active_idx]
            n_points_so_far = this_n_points_so_far
            vertices.append(this_vertno)

        stc = SourceEstimate(X, vertices=vertices, tmin=tmin, tstep=tstep)

    return stc


@verbose
def _make_dipoles_sparse(X, active_set, forward, tmin, tstep, M, M_est,
                         active_is_idx=False, verbose=None):
    times = tmin + tstep * np.arange(X.shape[1])

    if not active_is_idx:
        active_idx = np.where(active_set)[0]
    else:
        active_idx = active_set

    n_dip_per_pos = 1 if is_fixed_orient(forward) else 3
    if n_dip_per_pos > 1:
        active_idx = np.unique(active_idx // n_dip_per_pos)

    gof = np.zeros(M_est.shape[1])
    M_norm2 = np.sum(M ** 2, axis=0)
    R_norm2 = np.sum((M - M_est) ** 2, axis=0)
    gof[M_norm2 > 0.0] = 1. - R_norm2[M_norm2 > 0.0] / M_norm2[M_norm2 > 0.0]
    gof *= 100.

    dipoles = []
    for k, i_dip in enumerate(active_idx):
        i_pos = forward['source_rr'][i_dip][np.newaxis, :]
        i_pos = i_pos.repeat(len(times), axis=0)
        X_ = X[k * n_dip_per_pos: (k + 1) * n_dip_per_pos]
        if n_dip_per_pos == 1:
            amplitude = X_[0]
            i_ori = forward['source_nn'][i_dip][np.newaxis, :]
            i_ori = i_ori.repeat(len(times), axis=0)
        else:
            if forward['surf_ori']:
                X_ = np.dot(forward['source_nn'][i_dip *
                            n_dip_per_pos:(i_dip + 1) * n_dip_per_pos].T, X_)

            amplitude = np.sqrt(np.sum(X_ ** 2, axis=0))
            i_ori = np.zeros((len(times), 3))
            i_ori[amplitude > 0.] = (X_[:, amplitude > 0.] /
                                     amplitude[amplitude > 0.]).T

        dipoles.append(Dipole(times, i_pos, amplitude, i_ori, gof))

    return dipoles


@verbose
def make_stc_from_dipoles(dipoles, src, verbose=None):
    """Convert a list of spatio-temporal dipoles into a SourceEstimate.

    Parameters
    ----------
    dipoles : Dipole | list of instances of Dipole
        The dipoles to convert.
    src : instance of SourceSpaces
        The source space used to generate the forward operator.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`
        and :ref:`Logging documentation <tut_logging>` for more).

    Returns
    -------
    stc : SourceEstimate
        The source estimate.
    """
    logger.info('Converting dipoles into a SourceEstimate.')
    if isinstance(dipoles, Dipole):
        dipoles = [dipoles]
    if not isinstance(dipoles, list):
        raise ValueError('Dipoles must be an instance of Dipole or '
                         'a list of instances of Dipole. '
                         'Got %s!' % type(dipoles))
    tmin = dipoles[0].times[0]
    tstep = dipoles[0].times[1] - tmin
    X = np.zeros((len(dipoles), len(dipoles[0].times)))
    source_rr = np.concatenate([_src['rr'][_src['vertno'], :] for _src in src],
                               axis=0)
    n_lh_points = len(src[0]['vertno'])
    lh_vertno = list()
    rh_vertno = list()
    for i in range(len(dipoles)):
        if not np.all(dipoles[i].pos == dipoles[i].pos[0]):
            raise ValueError('Only dipoles with fixed position over time '
                             'are supported!')
        X[i] = dipoles[i].amplitude
        idx = np.all(source_rr == dipoles[i].pos[0], axis=1)
        idx = np.where(idx)[0][0]
        if idx < n_lh_points:
            lh_vertno.append(src[0]['vertno'][idx])
        else:
            rh_vertno.append(src[1]['vertno'][idx - n_lh_points])
    vertices = [np.array(lh_vertno).astype(int),
                np.array(rh_vertno).astype(int)]
    stc = SourceEstimate(X, vertices=vertices, tmin=tmin, tstep=tstep,
                         subject=src[0]['subject_his_id'])
    logger.info('[done]')
    return stc


@verbose
def mixed_norm(evoked, forward, noise_cov, alpha, loose='auto', depth=0.8,
               maxit=3000, tol=1e-4, active_set_size=10, pca=True,
               debias=True, time_pca=True, weights=None, weights_min=None,
               solver='auto', n_mxne_iter=1, return_residual=False,
               return_as_dipoles=False, dgap_freq=10, verbose=None):
    """Mixed-norm estimate (MxNE) and iterative reweighted MxNE (irMxNE).

    Compute L1/L2 mixed-norm solution [1]_ or L0.5/L2 [2]_ mixed-norm
    solution on evoked data.

    Parameters
    ----------
    evoked : instance of Evoked or list of instances of Evoked
        Evoked data to invert.
    forward : dict
        Forward operator.
    noise_cov : instance of Covariance
        Noise covariance to compute whitener.
    alpha : float in range [0, 100)
        Regularization parameter. 0 means no regularization, 100 would give 0
        active dipole.
    loose : float in [0, 1] | 'auto'
        Value that weights the source variances of the dipole components
        that are parallel (tangential) to the cortical surface. If loose
        is 0 then the solution is computed with fixed orientation.
        If loose is 1, it corresponds to free orientations.
        The default value ('auto') is set to 0.2 for surface-oriented source
        space and set to 1.0 for volumic or discrete source space.
    depth: None | float in [0, 1]
        Depth weighting coefficients. If None, no depth weighting is performed.
    maxit : int
        Maximum number of iterations.
    tol : float
        Tolerance parameter.
    active_set_size : int | None
        Size of active set increment. If None, no active set strategy is used.
    pca : bool
        If True the rank of the data is reduced to true dimension.
    debias : bool
        Remove coefficient amplitude bias due to L1 penalty.
    time_pca : bool or int
        If True the rank of the concatenated epochs is reduced to
        its true dimension. If is 'int' the rank is limited to this value.
    weights : None | array | SourceEstimate
        Weight for penalty in mixed_norm. Can be None, a
        1d array with shape (n_sources,), or a SourceEstimate (e.g. obtained
        with wMNE, dSPM, or fMRI).
    weights_min : float
        Do not consider in the estimation sources for which weights
        is less than weights_min.
    solver : 'prox' | 'cd' | 'bcd' | 'auto'
        The algorithm to use for the optimization. 'prox' stands for
        proximal iterations using the FISTA algorithm, 'cd' uses
        coordinate descent, and 'bcd' applies block coordinate descent.
        'cd' is only available for fixed orientation.
    n_mxne_iter : int
        The number of MxNE iterations. If > 1, iterative reweighting
        is applied.
    return_residual : bool
        If True, the residual is returned as an Evoked instance.
    return_as_dipoles : bool
        If True, the sources are returned as a list of Dipole instances.
    dgap_freq : int or np.inf
        The duality gap is evaluated every dgap_freq iterations. Ignored if
        solver is 'cd'.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`
        and :ref:`Logging documentation <tut_logging>` for more).

    Returns
    -------
    stc : SourceEstimate | list of SourceEstimate
        Source time courses for each evoked data passed as input.
    residual : instance of Evoked
        The residual a.k.a. data not explained by the sources.
        Only returned if return_residual is True.

    See Also
    --------
    tf_mixed_norm

    References
    ----------
    .. [1] A. Gramfort, M. Kowalski, M. Hamalainen,
       "Mixed-norm estimates for the M/EEG inverse problem using accelerated
       gradient methods", Physics in Medicine and Biology, 2012.
       https://doi.org/10.1088/0031-9155/57/7/1937

    .. [2] D. Strohmeier, Y. Bekhti, J. Haueisen, A. Gramfort,
       "The Iterative Reweighted Mixed-Norm Estimate for Spatio-Temporal
       MEG/EEG Source Reconstruction", IEEE Transactions of Medical Imaging,
       Volume 35 (10), pp. 2218-2228, 2016.
    """
    if not (0. <= alpha < 100.):
        raise ValueError('alpha must be in [0, 100). '
                         'Got alpha = %s' % alpha)
    if n_mxne_iter < 1:
        raise ValueError('MxNE has to be computed at least 1 time. '
                         'Requires n_mxne_iter >= 1, got %d' % n_mxne_iter)
    if dgap_freq <= 0.:
        raise ValueError('dgap_freq must be a positive integer.'
                         ' Got dgap_freq = %s' % dgap_freq)

    if not isinstance(evoked, list):
        evoked = [evoked]

    _check_reference(evoked[0])

    all_ch_names = evoked[0].ch_names
    if not all(all_ch_names == evoked[i].ch_names
               for i in range(1, len(evoked))):
        raise Exception('All the datasets must have the same good channels.')

    loose, forward = _check_loose_forward(loose, forward)

    # put the forward solution in fixed orientation if it's not already
    if loose == 0. and not is_fixed_orient(forward):
        forward = convert_forward_solution(
            forward, surf_ori=True, force_fixed=True, copy=True, use_cps=True)

    gain, gain_info, whitener, source_weighting, mask = _prepare_gain(
        forward, evoked[0].info, noise_cov, pca, depth, loose, weights,
        weights_min)

    sel = [all_ch_names.index(name) for name in gain_info['ch_names']]
    M = np.concatenate([e.data[sel] for e in evoked], axis=1)

    # Whiten data
    logger.info('Whitening data matrix.')
    M = np.dot(whitener, M)

    if time_pca:
        U, s, Vh = linalg.svd(M, full_matrices=False)
        if not isinstance(time_pca, bool) and isinstance(time_pca, int):
            U = U[:, :time_pca]
            s = s[:time_pca]
            Vh = Vh[:time_pca]
        M = U * s

    # Scaling to make setting of alpha easy
    n_dip_per_pos = 1 if is_fixed_orient(forward) else 3
    alpha_max = norm_l2inf(np.dot(gain.T, M), n_dip_per_pos, copy=False)
    alpha_max *= 0.01
    gain /= alpha_max
    source_weighting /= alpha_max

    if n_mxne_iter == 1:
        X, active_set, E = mixed_norm_solver(
            M, gain, alpha, maxit=maxit, tol=tol,
            active_set_size=active_set_size, n_orient=n_dip_per_pos,
            debias=debias, solver=solver, dgap_freq=dgap_freq, verbose=verbose)
    else:
        X, active_set, E = iterative_mixed_norm_solver(
            M, gain, alpha, n_mxne_iter, maxit=maxit, tol=tol,
            n_orient=n_dip_per_pos, active_set_size=active_set_size,
            debias=debias, solver=solver, dgap_freq=dgap_freq, verbose=verbose)

    if time_pca:
        X = np.dot(X, Vh)
        M = np.dot(M, Vh)

    # Compute estimated whitened sensor data
    M_estimated = np.dot(gain[:, active_set], X)

    if mask is not None:
        active_set_tmp = np.zeros(len(mask), dtype=np.bool)
        active_set_tmp[mask] = active_set
        active_set = active_set_tmp
        del active_set_tmp

    if active_set.sum() == 0:
        raise Exception("No active dipoles found. alpha is too big.")

    # Reapply weights to have correct unit
    X = _reapply_source_weighting(X, source_weighting, active_set)

    outs = list()
    residual = list()
    cnt = 0
    for e in evoked:
        tmin = e.times[0]
        tstep = 1.0 / e.info['sfreq']
        Xe = X[:, cnt:(cnt + len(e.times))]
        if return_as_dipoles:
            out = _make_dipoles_sparse(
                Xe, active_set, forward, tmin, tstep,
                M[:, cnt:(cnt + len(e.times))],
                M_estimated[:, cnt:(cnt + len(e.times))], verbose=None)
        else:
            out = _make_sparse_stc(Xe, active_set, forward, tmin, tstep)
        outs.append(out)
        cnt += len(e.times)

        if return_residual:
            residual.append(_compute_residual(forward, e, Xe, active_set,
                            gain_info))

    logger.info('[done]')

    if len(outs) == 1:
        out = outs[0]
        if return_residual:
            residual = residual[0]
    else:
        out = outs

    if return_residual:
        out = out, residual

    return out


def _window_evoked(evoked, size):
    """Window evoked (size in seconds)."""
    if isinstance(size, (float, int)):
        lsize = rsize = float(size)
    else:
        lsize, rsize = size
    evoked = evoked.copy()
    sfreq = float(evoked.info['sfreq'])
    lsize = int(lsize * sfreq)
    rsize = int(rsize * sfreq)
    lhann = signal.hann(lsize * 2)
    rhann = signal.hann(rsize * 2)
    window = np.r_[lhann[:lsize],
                   np.ones(len(evoked.times) - lsize - rsize),
                   rhann[-rsize:]]
    evoked.data *= window[None, :]
    return evoked


@verbose
def tf_mixed_norm(evoked, forward, noise_cov,
                  loose='auto', depth=0.8, maxit=3000,
                  tol=1e-4, weights=None, weights_min=None, pca=True,
                  debias=True, wsize=64, tstep=4, window=0.02,
                  return_residual=False, return_as_dipoles=False,
                  alpha=None, l1_ratio=None, dgap_freq=10, verbose=None):
    """Time-Frequency Mixed-norm estimate (TF-MxNE).

    Compute L1/L2 + L1 mixed-norm solution on time-frequency
    dictionary. Works with evoked data [1]_ [2]_.

    Parameters
    ----------
    evoked : instance of Evoked
        Evoked data to invert.
    forward : dict
        Forward operator.
    noise_cov : instance of Covariance
        Noise covariance to compute whitener.
    loose : float in [0, 1] | 'auto'
        Value that weights the source variances of the dipole components
        that are parallel (tangential) to the cortical surface. If loose
        is 0 then the solution is computed with fixed orientation.
        If loose is 1, it corresponds to free orientations.
        The default value ('auto') is set to 0.2 for surface-oriented source
        space and set to 1.0 for volumic or discrete source space.
    depth: None | float in [0, 1]
        Depth weighting coefficients. If None, no depth weighting is performed.
    maxit : int
        Maximum number of iterations.
    tol : float
        Tolerance parameter.
    weights: None | array | SourceEstimate
        Weight for penalty in mixed_norm. Can be None or
        1d array of length n_sources or a SourceEstimate e.g. obtained
        with wMNE or dSPM or fMRI.
    weights_min: float
        Do not consider in the estimation sources for which weights
        is less than weights_min.
    pca: bool
        If True the rank of the data is reduced to true dimension.
    debias: bool
        Remove coefficient amplitude bias due to L1 penalty.
    wsize: int or array-like
        Length of the STFT window in samples (must be a multiple of 4).
        If an array is passed, multiple TF dictionaries are used (each having
        its own wsize and tstep) and each entry of wsize must be a multiple
        of 4. See [3]_.
    tstep: int or array-like
        Step between successive windows in samples (must be a multiple of 2,
        a divider of wsize and smaller than wsize/2) (default: wsize/2).
        If an array is passed, multiple TF dictionaries are used (each having
        its own wsize and tstep), and each entry of tstep must be a multiple
        of 2 and divide the corresponding entry of wsize. See [3]_.
    window : float or (float, float)
        Length of time window used to take care of edge artifacts in seconds.
        It can be one float or float if the values are different for left
        and right window length.
    return_residual : bool
        If True, the residual is returned as an Evoked instance.
    return_as_dipoles : bool
        If True, the sources are returned as a list of Dipole instances.
    alpha : float in [0, 100) or None
        Overall regularization parameter.
        If alpha and l1_ratio are not None, alpha_space and alpha_time are
        overridden by alpha * alpha_max * (1. - l1_ratio) and alpha * alpha_max
        * l1_ratio. 0 means no regularization, 100 would give 0 active dipole.
    l1_ratio : float in [0, 1] or None
        Proportion of temporal regularization.
        If l1_ratio and alpha are not None, alpha_space and alpha_time are
        overridden by alpha * alpha_max * (1. - l1_ratio) and alpha * alpha_max
        * l1_ratio. 0 means no time regularization aka MxNE.
    dgap_freq : int or np.inf
        The duality gap is evaluated every dgap_freq iterations.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`
        and :ref:`Logging documentation <tut_logging>` for more).


    Returns
    -------
    stc : instance of SourceEstimate
        Source time courses.
    residual : instance of Evoked
        The residual a.k.a. data not explained by the sources.
        Only returned if return_residual is True.

    See Also
    --------
    mixed_norm

    References
    ----------
    .. [1] A. Gramfort, D. Strohmeier, J. Haueisen, M. Hamalainen, M. Kowalski
       "Time-Frequency Mixed-Norm Estimates: Sparse M/EEG imaging with
       non-stationary source activations",
       Neuroimage, Volume 70, pp. 410-422, 15 April 2013.
       DOI: 10.1016/j.neuroimage.2012.12.051

    .. [2] A. Gramfort, D. Strohmeier, J. Haueisen, M. Hamalainen, M. Kowalski
       "Functional Brain Imaging with M/EEG Using Structured Sparsity in
       Time-Frequency Dictionaries",
       Proceedings Information Processing in Medical Imaging
       Lecture Notes in Computer Science, Volume 6801/2011, pp. 600-611, 2011.
       DOI: 10.1007/978-3-642-22092-0_49

    .. [3] Y. Bekhti, D. Strohmeier, M. Jas, R. Badeau, A. Gramfort.
       "M/EEG source localization with multiscale time-frequency dictionaries",
       6th International Workshop on Pattern Recognition in Neuroimaging
       (PRNI), 2016.
       DOI: 10.1109/PRNI.2016.7552337
    """
    _check_reference(evoked)

    all_ch_names = evoked.ch_names
    info = evoked.info

    if not (0. <= alpha < 100.):
        raise ValueError('alpha must be in [0, 100). '
                         'Got alpha = %s' % alpha)

    if not (0. <= l1_ratio <= 1.):
        raise ValueError('l1_ratio must be in range [0, 1].'
                         ' Got l1_ratio = %s' % l1_ratio)
    alpha_space = alpha * (1. - l1_ratio)
    alpha_time = alpha * l1_ratio

    if dgap_freq <= 0.:
        raise ValueError('dgap_freq must be a positive integer.'
                         ' Got dgap_freq = %s' % dgap_freq)

    tstep = np.atleast_1d(tstep)
    wsize = np.atleast_1d(wsize)
    if len(tstep) != len(wsize):
        raise ValueError('The same number of window sizes and steps must be '
                         'passed. Got tstep = %s and wsize = %s' %
                         (tstep, wsize))

    loose, forward = _check_loose_forward(loose, forward)

    # put the forward solution in fixed orientation if it's not already
    if loose == 0. and not is_fixed_orient(forward):
        forward = convert_forward_solution(
            forward, surf_ori=True, force_fixed=True, copy=True, use_cps=True)

    n_dip_per_pos = 1 if is_fixed_orient(forward) else 3

    gain, gain_info, whitener, source_weighting, mask = _prepare_gain(
        forward, evoked.info, noise_cov, pca, depth, loose, weights,
        weights_min)

    if window is not None:
        evoked = _window_evoked(evoked, window)

    sel = [all_ch_names.index(name) for name in gain_info["ch_names"]]
    M = evoked.data[sel]

    # Whiten data
    logger.info('Whitening data matrix.')
    M = np.dot(whitener, M)

    # Scaling to make setting of alpha easy
    n_steps = np.ceil(M.shape[1] / tstep.astype(float)).astype(int)
    n_freqs = wsize // 2 + 1
    n_coefs = n_steps * n_freqs
    phi = _Phi(wsize, tstep, n_coefs)

    alpha_max = norm_epsilon_inf(gain, M, phi, l1_ratio, n_dip_per_pos)
    alpha_max *= 0.01
    gain /= alpha_max
    source_weighting /= alpha_max

    X, active_set, E = tf_mixed_norm_solver(
        M, gain, alpha_space, alpha_time, wsize=wsize, tstep=tstep,
        maxit=maxit, tol=tol, verbose=verbose, n_orient=n_dip_per_pos,
        dgap_freq=dgap_freq, debias=debias)

    if active_set.sum() == 0:
        raise Exception("No active dipoles found. "
                        "alpha_space/alpha_time are too big.")

    # Compute estimated whitened sensor data
    M_estimated = np.dot(gain[:, active_set], X)

    if mask is not None:
        active_set_tmp = np.zeros(len(mask), dtype=np.bool)
        active_set_tmp[mask] = active_set
        active_set = active_set_tmp
        del active_set_tmp

    X = _reapply_source_weighting(X, source_weighting, active_set)

    if return_residual:
        residual = _compute_residual(
            forward, evoked, X, active_set, gain_info)

    if return_as_dipoles:
        out = _make_dipoles_sparse(
            X, active_set, forward, evoked.times[0], 1.0 / info['sfreq'],
            M, M_estimated, verbose=None)
    else:
        out = _make_sparse_stc(
            X, active_set, forward, evoked.times[0], 1.0 / info['sfreq'])

    logger.info('[done]')

    if return_residual:
        out = out, residual

    return out