1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
/*****************************************************************************
* *
* PrimeSense PSCommon Library *
* Copyright (C) 2012 PrimeSense Ltd. *
* *
* This file is part of PSCommon. *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*****************************************************************************/
#ifndef _XN_SIMD_SSE_H_
#define _XN_SIMD_SSE_H_
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include <XnOS.h>
typedef __m128i XN_INT128;
typedef XnInt32 XN_INT32;
typedef XnInt16 XN_INT16;
#define XnShiftRight128(a, imm) _mm_srli_si128(a, imm)
#define XnShiftLeft128(a, imm) _mm_slli_si128(a, imm)
#define XnShiftRight16(a, imm) _mm_srli_epi16(a, imm)
#define XnShiftLeft16(a, imm) _mm_slli_epi16(a, imm)
#define XnShiftRight16Sign(a, imm) _mm_srai_epi16(a, imm)
#ifdef WIN32
#define XN_FORCE_INLINE inline
#else
#define XN_FORCE_INLINE __inline __attribute__ ((__always_inline__))
#endif
static XN_FORCE_INLINE
XN_INT128 XnAnd128(XN_INT128 a, XN_INT128 b) // _mm_and_si128
{
return _mm_and_si128(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnAndNot128(XN_INT128 a, XN_INT128 b) // _mm_andnot_si128
{
return _mm_andnot_si128(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnAdd16(XN_INT128 a, XN_INT128 b) // _mm_add_epi16
{
return _mm_add_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnHAdd16(XN_INT128 a, XN_INT128 b) // _mm_hadd_epi16 - SSSE3!
{
return _mm_hadd_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnAdd16AndSaturates(XN_INT128 a, XN_INT128 b) // _mm_adds_epi16
{
return _mm_adds_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnAddUnsigned16AndSaturates(XN_INT128 a, XN_INT128 b) // _mm_adds_epu16
{
return _mm_adds_epu16 (a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnSub16(XN_INT128 a, XN_INT128 b) //_mm_sub_epi16
{
return _mm_sub_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnSubSigned16(XN_INT128 a, XN_INT128 b) // _mm_subs_epu16
{
return _mm_subs_epu16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnMult16(XN_INT128 a, XN_INT128 b) //_mm_mullo_epi16
{
return _mm_mullo_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnMin16(XN_INT128 a, XN_INT128 b) // _mm_min_epi16
{
return _mm_min_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnMax16(XN_INT128 a, XN_INT128 b) // _mm_max_epi16
{
return _mm_max_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnSetZero128() // _mm_setzero_si128
{
return _mm_setzero_si128();
}
static XN_FORCE_INLINE
XN_INT128 XnSetOne16(XN_INT16 a) //_mm_set1_epi16
{
return _mm_set1_epi16(a);
}
static XN_FORCE_INLINE
XN_INT128 XnSet16(XN_INT16 a7, XN_INT16 a6,
XN_INT16 a5, XN_INT16 a4,
XN_INT16 a3, XN_INT16 a2,
XN_INT16 a1, XN_INT16 a0) // _mm_set_epi16
{
return _mm_set_epi16(a7, a6, a5, a4, a3, a2, a1, a0);
}
static XN_FORCE_INLINE
XN_INT128 XnCompareGreaterThan(XN_INT128 a, XN_INT128 b) //_mm_cmpgt_epi16
{
return _mm_cmpgt_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnCompareLessThan(XN_INT128 a, XN_INT128 b) //_mm_cmplt_epi16
{
return _mm_cmplt_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnCompareEqual(XN_INT128 a, XN_INT128 b) // _mm_cmpeq_epi16
{
return _mm_cmpeq_epi16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnAverageUnsigned16(XN_INT128 a, XN_INT128 b) // _mm_avg_epu16
{
return _mm_avg_epu16(a, b);
}
static XN_FORCE_INLINE
XN_INT128 XnOr128(XN_INT128 a, XN_INT128 b) // _mm_or_si128
{
return _mm_or_si128(a,b);
}
static XN_FORCE_INLINE
XN_INT128 XnPacksSigned16(XN_INT128 a, XN_INT128 b) // _mm_packs_epi16
{
return _mm_packs_epi16(a,b);
}
#endif
|