1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#ifndef INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX
#define INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX
#include <cstdint>
#include <rtl/math.hxx>
#include <tools/simd.hxx>
#include <tools/cpuid.hxx>
namespace sc
{
struct ArraySumFunctor
{
private:
const double* mpArray;
size_t mnSize;
public:
ArraySumFunctor(const double* pArray, size_t nSize)
: mpArray(pArray)
, mnSize(nSize)
{
}
double operator() ()
{
const static bool hasSSE2 = cpuid::hasSSE2();
double fSum = 0.0;
size_t i = 0;
const double* pCurrent = mpArray;
if (hasSSE2)
{
while ( i < mnSize && !simd::isAligned<double, 16>(pCurrent))
{
fSum += *pCurrent++;
i++;
}
if( i < mnSize )
{
fSum += executeSSE2(i, pCurrent);
}
}
else
fSum += executeUnrolled(i, pCurrent);
// sum rest of the array
for (; i < mnSize; ++i)
fSum += mpArray[i];
// If the sum is a NaN, some of the terms were empty cells, probably.
// Re-calculate, carefully
if (!std::isfinite(fSum))
{
sal_uInt32 nErr = reinterpret_cast< sal_math_Double * >(&fSum)->nan_parts.fraction_lo;
if (nErr & 0xffff0000)
{
fSum = 0;
for (i = 0; i < mnSize; i++)
{
if (!std::isfinite(mpArray[i]))
{
nErr = reinterpret_cast< const sal_math_Double * >(&mpArray[i])->nan_parts.fraction_lo;
if (!(nErr & 0xffff0000))
fSum += mpArray[i]; // Let errors encoded as NaNs propagate ???
}
else
fSum += mpArray[i];
}
}
}
return fSum;
}
private:
double executeSSE2(size_t& i, const double* pCurrent) const;
double executeUnrolled(size_t& i, const double* pCurrent) const
{
size_t nRealSize = mnSize - i;
size_t nUnrolledSize = nRealSize - (nRealSize % 4);
if (nUnrolledSize > 0)
{
double sum0 = 0.0;
double sum1 = 0.0;
double sum2 = 0.0;
double sum3 = 0.0;
for (; i < nUnrolledSize; i += 4)
{
sum0 += *pCurrent++;
sum1 += *pCurrent++;
sum2 += *pCurrent++;
sum3 += *pCurrent++;
}
return sum0 + sum1 + sum2 + sum3;
}
return 0.0;
}
};
} // end namespace sc
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|