1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause
// This program is free software: you can redistribute it and/or modify it under
// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
#ifndef BLAS_NRM2_HH
#define BLAS_NRM2_HH
#include "blas/util.hh"
#include <limits>
namespace blas {
// =============================================================================
/// @return 2-norm of vector,
/// $|| x ||_2 = (\sum_{i=0}^{n-1} |x_i|^2)^{1/2}$.
///
/// Generic implementation for arbitrary data types.
/// TODO: generic implementation does not currently scale to avoid over- or underflow.
///
/// @param[in] n
/// Number of elements in x. n >= 0.
///
/// @param[in] x
/// The n-element vector x, in an array of length (n-1)*incx + 1.
///
/// @param[in] incx
/// Stride between elements of x. incx > 0.
///
/// @ingroup nrm2
template <typename T>
real_type<T>
nrm2(
int64_t n,
T const * x, int64_t incx )
{
using std::sqrt;
using real_t = real_type<T>;
// check arguments
blas_error_if( n < 0 ); // standard BLAS returns, doesn't fail
blas_error_if( incx <= 0 ); // standard BLAS returns, doesn't fail
// todo: scale to avoid overflow & underflow
real_t result = 0;
if (incx == 1) {
// unit stride
for (int64_t i = 0; i < n; ++i) {
result += real(x[i]) * real(x[i]) + imag(x[i]) * imag(x[i]);
}
}
else {
// non-unit stride
int64_t ix = 0;
for (int64_t i = 0; i < n; ++i) {
result += real(x[ix]) * real(x[ix]) + imag(x[ix]) * imag(x[ix]);
ix += incx;
}
}
return sqrt( result );
}
} // namespace blas
#endif // #ifndef BLAS_NRM2_HH
|