1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
/*
This file is a part of KMC software distributed under GNU GPL 3 licence.
The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
Version: 3.1.1
Date : 2019-05-19
*/
#ifndef _INTR_COPY_H
#define _INTR_COPY_H
#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/sse2.h>
#ifndef WIN32
typedef long long __int64;
#endif
// 64b copy function
// size - in 8B words (determined during execution)
// dest and src must be aligned to 8B
inline void IntrCopy64fun(void *_dest, void *_src, uint32_t size)
{
__int64* dest = (__int64 *)_dest;
__int64* src = (__int64 *)_src;
for (unsigned i = 0; i < size; ++i)
simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
}
// 64bit copy function
// SIZE - in 8B words
template <unsigned SIZE> struct IntrCopy64
{
static inline void Copy(void *_dest, void *_src)
{
__int64* dest = (__int64*)_dest;
__int64* src = (__int64*)_src;
for (unsigned i = 0; i < SIZE; ++i)
simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
}
};
template <unsigned SIZE, unsigned MODE> struct IntrCopy128
{
static inline void Copy(void *_dest, void *_src)
{
cerr << "Error\n";
}
};
// 128bit copy function
// SIZE - in 16B words
// dest - aligned to 16B
// src - aligned to 16B
template <unsigned SIZE> struct IntrCopy128<SIZE, 1>
{
static inline void Copy(void *_dest, void *_src)
{
__m128i *dest = (__m128i *) _dest;
__m128i *src = (__m128i *) _src;
for (unsigned i = 0; i < SIZE; ++i)
_mm_stream_si128(dest + i, _mm_load_si128(src + i));
}
};
// 128bit copy function
// SIZE - in 16B words
// dest - aligned to 8B
// src - aligned to 16B
template <unsigned SIZE> struct IntrCopy128<SIZE, 0>
{
static inline void Copy(void *dest, void *src)
{
if ((uint64_t)dest % 16) // if only 8B aligned use 64b copy
IntrCopy64<SIZE * 2>::Copy(dest, src);
else // if 16B aligned use 128b copy
IntrCopy128<SIZE, 1>::Copy(dest, src);
}
};
#endif
// ***** EOF
|