File: intr_copy.h

package info (click to toggle)
kmc 3.1.1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,376 kB
  • sloc: cpp: 33,006; python: 372; perl: 178; makefile: 135; sh: 34
file content (92 lines) | stat: -rw-r--r-- 2,095 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/*
  This file is a part of KMC software distributed under GNU GPL 3 licence.
  The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
  
  Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
  
  Version: 3.1.1
  Date   : 2019-05-19
*/

#ifndef _INTR_COPY_H
#define _INTR_COPY_H

#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/sse2.h> 

#ifndef WIN32
typedef long long __int64;
#endif

// 64b copy function
// size - in 8B words (determined during execution)
// dest and src must be aligned to 8B
inline void IntrCopy64fun(void *_dest, void *_src, uint32_t size)
{
	__int64* dest = (__int64 *)_dest;
	__int64* src = (__int64 *)_src;

	for (unsigned i = 0; i < size; ++i)
		simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
}


// 64bit copy function
// SIZE - in 8B words
template <unsigned SIZE> struct IntrCopy64
{
	static inline void Copy(void *_dest, void *_src)
	{
		__int64* dest = (__int64*)_dest;
		__int64* src = (__int64*)_src;

		for (unsigned i = 0; i < SIZE; ++i)
			simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
	}
};


template <unsigned SIZE, unsigned MODE> struct IntrCopy128
{
	static inline void Copy(void *_dest, void *_src)
	{
		cerr << "Error\n";
	}
};

// 128bit copy function
// SIZE - in 16B words
// dest - aligned to 16B
// src  - aligned to 16B
template <unsigned SIZE> struct IntrCopy128<SIZE, 1>
{
	static inline void Copy(void *_dest, void *_src)
	{
		__m128i *dest = (__m128i *) _dest;
		__m128i *src = (__m128i *) _src;

		for (unsigned i = 0; i < SIZE; ++i)
			_mm_stream_si128(dest + i, _mm_load_si128(src + i));
	}
};


// 128bit copy function
// SIZE - in 16B words
// dest - aligned to 8B
// src  - aligned to 16B
template <unsigned SIZE> struct IntrCopy128<SIZE, 0>
{
	static inline void Copy(void *dest, void *src)
	{
		if ((uint64_t)dest % 16)	// if only 8B aligned use 64b copy
			IntrCopy64<SIZE * 2>::Copy(dest, src);
		else // if 16B aligned use 128b copy
			IntrCopy128<SIZE, 1>::Copy(dest, src);
	}
};


#endif

// ***** EOF