File: vec_int_avx.hpp

package info (click to toggle)
supercollider 1%3A3.6.6~repack-2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 23,792 kB
  • ctags: 25,269
  • sloc: cpp: 177,129; lisp: 63,421; ansic: 11,297; python: 1,787; perl: 766; yacc: 311; sh: 286; lex: 181; ruby: 173; makefile: 168; xml: 13
file content (136 lines) | stat: -rw-r--r-- 4,258 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//  avx int vector class
//
//  Copyright (C) 2011 Tim Blechmann
//
//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; see the file COPYING.  If not, write to
//  the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
//  Boston, MA 02111-1307, USA.

#ifndef VEC_AVX_INT_HPP
#define VEC_AVX_INT_HPP

#include <pmmintrin.h>

#include "vec_int_sse2.hpp"
#include <functional>

namespace nova   {
namespace detail {

struct int_vec_avx
{
    __m256i data_;

    /* cast */
    explicit int_vec_avx(int arg):
        data_(_mm256_set1_epi32(arg))
    {}

    int_vec_avx(__m256i arg):
        data_(arg)
    {}

    int_vec_avx(__m256 arg):
        data_(_mm256_castps_si256(arg))
    {}

    int_vec_avx(int_vec_avx const & arg):
        data_(arg.data_)
    {}

    int_vec_avx(void)
    {}

#define APPLY_SSE_FUNCTION(op, function) \
    friend int_vec_avx op(int_vec_avx const & lhs, int_vec_avx const & rhs) \
    { \
        __m256 lhs_data = _mm256_castsi256_ps(lhs.data_);       \
        __m256 rhs_data = _mm256_castsi256_ps(rhs.data_);       \
        __m128 lhs_low =  _mm256_castps256_ps128(lhs_data);    \
        __m128 lhs_hi =   _mm256_extractf128_ps(lhs_data, 1);  \
        __m128 rhs_low =  _mm256_castps256_ps128(rhs_data);    \
        __m128 rhs_hi =   _mm256_extractf128_ps(rhs_data, 1);  \
\
        __m128i newlow = function(int_vec_sse2(lhs_low), int_vec_sse2(rhs_low)); \
        __m128i newhi  = function(int_vec_sse2(lhs_hi),  int_vec_sse2(rhs_hi)); \
\
        __m256i result = _mm256_castsi128_si256(newlow);  \
        result = _mm256_insertf128_si256(result,  newhi, 1);   \
        return result;   \
    }

    APPLY_SSE_FUNCTION(operator +, std::plus<int_vec_sse2>());
    APPLY_SSE_FUNCTION(operator -, std::minus<int_vec_sse2>());


    APPLY_SSE_FUNCTION(mask_lt, mask_lt)
    APPLY_SSE_FUNCTION(mask_gt, mask_gt)
    APPLY_SSE_FUNCTION(mask_eq, mask_eq)

#undef APPLY_SSE_FUNCTION

    friend int_vec_avx operator&(int_vec_avx const & lhs, int_vec_avx const & rhs)
    {
        return int_vec_avx(_mm256_and_ps(_mm256_castsi256_ps(lhs.data_),
                                        _mm256_castsi256_ps(rhs.data_)));
    }

    friend inline int_vec_avx andnot(int_vec_avx const & lhs, int_vec_avx const & rhs)
    {
        return int_vec_avx(_mm256_andnot_ps(_mm256_castsi256_ps(lhs.data_),
                                        _mm256_castsi256_ps(rhs.data_)));
    }


    // shift in zeros
    friend inline int_vec_avx slli(int_vec_avx const & arg, int count)
    {
        __m256 arg_data = _mm256_castsi256_ps(arg.data_);
        __m128 arg_low =  _mm256_castps256_ps128(arg_data);
        __m128 arg_hi =   _mm256_extractf128_ps(arg_data, 1);

        __m128 newlow = (__m128)_mm_slli_epi32((__m128i)arg_low, count);
        __m128 newhi  = (__m128)_mm_slli_epi32((__m128i)arg_hi,  count);

        __m256 result = _mm256_castps128_ps256(newlow);
        result = _mm256_insertf128_ps(result,  newhi, 1);
        return result;
    }

    // shift in zeros
    friend inline int_vec_avx srli(int_vec_avx const & arg, int count)
    {
        __m256 arg_data = _mm256_castsi256_ps(arg.data_);
        __m128 arg_low =  _mm256_castps256_ps128(arg_data);
        __m128 arg_hi =   _mm256_extractf128_ps(arg_data, 1);

        __m128 newlow = (__m128)_mm_srli_epi32((__m128i)arg_low, count);
        __m128 newhi  = (__m128)_mm_srli_epi32((__m128i)arg_hi,  count);

        __m256 result = _mm256_castps128_ps256(newlow);
        result = _mm256_insertf128_ps(result,  newhi, 1);
        return result;
    }

    __m256 convert_to_float(void) const
    {
        return _mm256_cvtepi32_ps(data_);
    }
};


}
}

#endif /* VEC_AVX_INT_HPP */