File: xblas.hpp

package info (click to toggle)
xtensor-blas 0.23.0-1
links: PTS, VCS
area: main
in suites: sid
size: 13,860 kB
sloc: cpp: 98,000; makefile: 201; perl: 178; python: 153
file content (266 lines) | stat: -rw-r--r-- 10,281 bytes
/***************************************************************************
 * Copyright (c) Wolf Vollprecht, Johan Mabille and Sylvain Corlay          *
 * Copyright (c) QuantStack                                                 *
 *                                                                          *
 * Distributed under the terms of the BSD 3-Clause License.                 *
 *                                                                          *
 * The full license is in the file LICENSE, distributed with this software. *
 ****************************************************************************/

#ifndef XBLAS_HPP
#define XBLAS_HPP

#include <algorithm>

#include "xtensor/containers/xarray.hpp"
#include "xtensor/containers/xtensor.hpp"
#include "xtensor/io/xio.hpp"
#include "xtensor/misc/xcomplex.hpp"
#include "xtensor/utils/xutils.hpp"

#include "xflens/cxxblas/cxxblas.cxx"
#include "xtensor-blas/xblas_config.hpp"
#include "xtensor-blas/xblas_utils.hpp"

namespace xt
{

    namespace blas
    {
        /**
         * Calculate the 1-norm of a vector
         *
         * @param a vector of n elements
         * @returns scalar result
         */
        template <class E, class R>
        void asum(const xexpression<E>& a, R& result)
        {
            auto&& ad = view_eval<E::static_layout>(a.derived_cast());
            XTENSOR_ASSERT(ad.dimension() == 1);

            cxxblas::asum<blas_index_t>(
                static_cast<blas_index_t>(ad.shape()[0]),
                ad.data() + ad.data_offset(),
                stride_front(ad),
                result
            );
        }

        /**
         * Calculate the 2-norm of a vector
         *
         * @param a vector of n elements
         * @returns scalar result
         */
        template <class E, class R>
        void nrm2(const xexpression<E>& a, R& result)
        {
            auto&& ad = view_eval<E::static_layout>(a.derived_cast());
            XTENSOR_ASSERT(ad.dimension() == 1);

            cxxblas::nrm2<blas_index_t>(
                static_cast<blas_index_t>(ad.shape()[0]),
                ad.data() + ad.data_offset(),
                stride_front(ad),
                result
            );
        }

        /**
         * Calculate the dot product between two vectors, conjugating
         * the first argument \em a in the case of complex vectors.
         *
         * @param a vector of n elements
         * @param b vector of n elements
         * @returns scalar result
         */
        template <class E1, class E2, class R>
        void dot(const xexpression<E1>& a, const xexpression<E2>& b, R& result)
        {
            auto&& ad = view_eval<E1::static_layout>(a.derived_cast());
            auto&& bd = view_eval<E2::static_layout>(b.derived_cast());
            XTENSOR_ASSERT(ad.dimension() == 1);

            blas_index_t stride_a = stride_front(ad);
            blas_index_t stride_b = stride_front(bd);

            auto* adt = ad.data() + ad.data_offset();
            auto* bdt = bd.data() + bd.data_offset();

            // we need to have a pointer that points to the "real" start of the memory
            // not to the first element (BLAS is doing that transformation itself)
            if (stride_a < 0)
            {
                adt += (static_cast<blas_index_t>(ad.shape()[0]) - 1) * stride_a;  // go back to the start
            }
            if (stride_b < 0)
            {
                bdt += (static_cast<blas_index_t>(ad.shape()[0]) - 1) * stride_b;  // go back to the start
            }

            cxxblas::dot<blas_index_t>(static_cast<blas_index_t>(ad.shape()[0]), adt, stride_a, bdt, stride_b, result);
        }

        /**
         * Calculate the dot product between two complex vectors, not conjugating the
         * first argument \em a.
         *
         * @param a vector of n elements
         * @param b vector of n elements
         * @returns scalar result
         */
        template <class E1, class E2, class R>
        void dotu(const xexpression<E1>& a, const xexpression<E2>& b, R& result)
        {
            auto&& ad = view_eval<E1::static_layout>(a.derived_cast());
            auto&& bd = view_eval<E2::static_layout>(b.derived_cast());
            XTENSOR_ASSERT(ad.dimension() == 1);

            blas_index_t stride_a = stride_front(ad);
            blas_index_t stride_b = stride_front(bd);

            auto* adt = ad.data() + ad.data_offset();
            auto* bdt = bd.data() + bd.data_offset();

            // we need to have a pointer that points to the "real" start of the memory
            // not to the first element (BLAS is doing that transformation itself)
            if (stride_a < 0)
            {
                adt += (static_cast<blas_index_t>(ad.shape()[0]) - 1) * stride_a;  // go back to the start
            }
            if (stride_b < 0)
            {
                bdt += (static_cast<blas_index_t>(ad.shape()[0]) - 1) * stride_b;  // go back to the start
            }

            cxxblas::dotu<blas_index_t>(static_cast<blas_index_t>(ad.shape()[0]), adt, stride_a, bdt, stride_b, result);
        }

        /**
         * Calculate the general matrix times vector product according to
         * ``y := alpha * A * x + beta * y``.
         *
         * @param A matrix of n x m elements
         * @param x vector of n elements
         * @param transpose select if A should be transposed
         * @param alpha scalar scale factor
         * @returns the resulting vector
         */
        template <class E1, class E2, class R, class value_type = typename E1::value_type>
        void gemv(
            const xexpression<E1>& A,
            const xexpression<E2>& x,
            R& result,
            bool transpose_A = false,
            const value_type& alpha = value_type(1.0),
            const value_type& beta = value_type(0.0)
        )
        {
            auto&& dA = view_eval<E1::static_layout>(A.derived_cast());
            auto&& dx = view_eval<E2::static_layout>(x.derived_cast());

            cxxblas::gemv<blas_index_t>(
                get_blas_storage_order(result),
                transpose_A ? cxxblas::Transpose::Trans : cxxblas::Transpose::NoTrans,
                static_cast<blas_index_t>(dA.shape()[0]),
                static_cast<blas_index_t>(dA.shape()[1]),
                alpha,
                dA.data() + dA.data_offset(),
                get_leading_stride(dA),
                dx.data() + dx.data_offset(),
                get_leading_stride(dx),
                beta,
                result.data() + result.data_offset(),
                get_leading_stride(result)
            );
        }

        /**
         * Calculate the matrix-matrix product of matrix @A and matrix @B
         *
         * C := alpha * A * B + beta * C
         *
         * @param A matrix of m-by-n elements
         * @param B matrix of n-by-k elements
         * @param transpose_A transpose A on the fly
         * @param transpose_B transpose B on the fly
         * @param alpha scale factor for A * B (defaults to 1)
         * @param beta scale factor for C (defaults to 0)
         */
        template <class E, class F, class R, class value_type = typename E::value_type>
        void gemm(
            const xexpression<E>& A,
            const xexpression<F>& B,
            R& result,
            char transpose_A = false,
            char transpose_B = false,
            const value_type& alpha = value_type(1.0),
            const value_type& beta = value_type(0.0)
        )
        {
            static_assert(R::static_layout != layout_type::dynamic, "GEMM result layout cannot be dynamic.");
            auto&& dA = view_eval<R::static_layout>(A.derived_cast());
            auto&& dB = view_eval<R::static_layout>(B.derived_cast());

            XTENSOR_ASSERT(dA.layout() == dB.layout());
            XTENSOR_ASSERT(result.layout() == dA.layout());
            XTENSOR_ASSERT(dA.dimension() == 2);
            XTENSOR_ASSERT(dB.dimension() == 2);

            cxxblas::gemm<blas_index_t>(
                get_blas_storage_order(result),
                transpose_A ? cxxblas::Transpose::Trans : cxxblas::Transpose::NoTrans,
                transpose_B ? cxxblas::Transpose::Trans : cxxblas::Transpose::NoTrans,
                static_cast<blas_index_t>(transpose_A ? dA.shape()[1] : dA.shape()[0]),
                static_cast<blas_index_t>(transpose_B ? dB.shape()[0] : dB.shape()[1]),
                static_cast<blas_index_t>(transpose_B ? dB.shape()[1] : dB.shape()[0]),
                alpha,
                dA.data() + dA.data_offset(),
                get_leading_stride(dA),
                dB.data() + dB.data_offset(),
                get_leading_stride(dB),
                beta,
                result.data() + result.data_offset(),
                get_leading_stride(result)
            );
        }

        /**
         * Calculate the outer product of vector x and y.
         * According to A:= alpha * x * y' + A
         *
         * @param x vector of n elements
         * @param y vector of m elements
         * @param alpha scalar scale factor
         * @returns matrix of n-by-m elements
         */
        template <class E1, class E2, class R, class value_type = typename E1::value_type>
        void
        ger(const xexpression<E1>& x,
            const xexpression<E2>& y,
            R& result,
            const value_type& alpha = value_type(1.0))
        {
            auto&& dx = view_eval(x.derived_cast());
            auto&& dy = view_eval(y.derived_cast());

            XTENSOR_ASSERT(dx.dimension() == 1);
            XTENSOR_ASSERT(dy.dimension() == 1);

            cxxblas::ger<blas_index_t>(
                get_blas_storage_order(result),
                static_cast<blas_index_t>(dx.shape()[0]),
                static_cast<blas_index_t>(dy.shape()[0]),
                alpha,
                dx.data() + dx.data_offset(),
                get_leading_stride(dx),
                dy.data() + dy.data_offset(),
                get_leading_stride(dy),
                result.data() + result.data_offset(),
                get_leading_stride(result)
            );
        }
    }  // namespace blas
}  // namespace xt
#endif