1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>Random123-1.09: Random123/features/sse.h Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javaScript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css"/>
</head>
<body onload='searchBox.OnSelectItem(0);'>
<div class="tabs"><ul class="tablist"><li style="padding-left: 1.5em; font-weight: bold">Random123-1.09 Documentation</li></ul></div>
<!-- Generated by Doxygen 1.7.1 -->
<script type="text/javascript"><!--
var searchBox = new SearchBox("searchBox", "search",false,'Search');
--></script>
<div class="navigation" id="top">
<div class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main Page</span></a></li>
<li><a href="pages.html"><span>Related Pages</span></a></li>
<li><a href="modules.html"><span>Modules</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li id="searchli">
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div class="tabs2">
<ul class="tablist">
<li><a href="files.html"><span>File List</span></a></li>
<li><a href="globals.html"><span>File Members</span></a></li>
</ul>
</div>
<div class="header">
<div class="headertitle">
<h1>Random123/features/sse.h</h1> </div>
</div>
<div class="contents">
<a href="sse_8h.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
<a name="l00002"></a>00002 <span class="comment">Copyright 2010-2011, D. E. Shaw Research.</span>
<a name="l00003"></a>00003 <span class="comment">All rights reserved.</span>
<a name="l00004"></a>00004 <span class="comment"></span>
<a name="l00005"></a>00005 <span class="comment">Redistribution and use in source and binary forms, with or without</span>
<a name="l00006"></a>00006 <span class="comment">modification, are permitted provided that the following conditions are</span>
<a name="l00007"></a>00007 <span class="comment">met:</span>
<a name="l00008"></a>00008 <span class="comment"></span>
<a name="l00009"></a>00009 <span class="comment">* Redistributions of source code must retain the above copyright</span>
<a name="l00010"></a>00010 <span class="comment"> notice, this list of conditions, and the following disclaimer.</span>
<a name="l00011"></a>00011 <span class="comment"></span>
<a name="l00012"></a>00012 <span class="comment">* Redistributions in binary form must reproduce the above copyright</span>
<a name="l00013"></a>00013 <span class="comment"> notice, this list of conditions, and the following disclaimer in the</span>
<a name="l00014"></a>00014 <span class="comment"> documentation and/or other materials provided with the distribution.</span>
<a name="l00015"></a>00015 <span class="comment"></span>
<a name="l00016"></a>00016 <span class="comment">* Neither the name of D. E. Shaw Research nor the names of its</span>
<a name="l00017"></a>00017 <span class="comment"> contributors may be used to endorse or promote products derived from</span>
<a name="l00018"></a>00018 <span class="comment"> this software without specific prior written permission.</span>
<a name="l00019"></a>00019 <span class="comment"></span>
<a name="l00020"></a>00020 <span class="comment">THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS</span>
<a name="l00021"></a>00021 <span class="comment">"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT</span>
<a name="l00022"></a>00022 <span class="comment">LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR</span>
<a name="l00023"></a>00023 <span class="comment">A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT</span>
<a name="l00024"></a>00024 <span class="comment">OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span>
<a name="l00025"></a>00025 <span class="comment">SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT</span>
<a name="l00026"></a>00026 <span class="comment">LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,</span>
<a name="l00027"></a>00027 <span class="comment">DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY</span>
<a name="l00028"></a>00028 <span class="comment">THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT</span>
<a name="l00029"></a>00029 <span class="comment">(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE</span>
<a name="l00030"></a>00030 <span class="comment">OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span>
<a name="l00031"></a>00031 <span class="comment">*/</span>
<a name="l00032"></a>00032 <span class="preprocessor">#ifndef _Random123_sse_dot_h__</span>
<a name="l00033"></a>00033 <span class="preprocessor"></span><span class="preprocessor">#define _Random123_sse_dot_h__</span>
<a name="l00034"></a>00034 <span class="preprocessor"></span>
<a name="l00035"></a>00035 <span class="preprocessor">#if R123_USE_SSE</span>
<a name="l00036"></a>00036 <span class="preprocessor"></span>
<a name="l00037"></a>00037 <span class="preprocessor">#if R123_USE_X86INTRIN_H</span>
<a name="l00038"></a>00038 <span class="preprocessor"></span><span class="preprocessor">#include <x86intrin.h></span>
<a name="l00039"></a>00039 <span class="preprocessor">#endif</span>
<a name="l00040"></a>00040 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_IA32INTRIN_H</span>
<a name="l00041"></a>00041 <span class="preprocessor"></span><span class="preprocessor">#include <ia32intrin.h></span>
<a name="l00042"></a>00042 <span class="preprocessor">#endif</span>
<a name="l00043"></a>00043 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_XMMINTRIN_H</span>
<a name="l00044"></a>00044 <span class="preprocessor"></span><span class="preprocessor">#include <xmmintrin.h></span>
<a name="l00045"></a>00045 <span class="preprocessor">#endif</span>
<a name="l00046"></a>00046 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_EMMINTRIN_H</span>
<a name="l00047"></a>00047 <span class="preprocessor"></span><span class="preprocessor">#include <emmintrin.h></span>
<a name="l00048"></a>00048 <span class="preprocessor">#endif</span>
<a name="l00049"></a>00049 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_SMMINTRIN_H</span>
<a name="l00050"></a>00050 <span class="preprocessor"></span><span class="preprocessor">#include <smmintrin.h></span>
<a name="l00051"></a>00051 <span class="preprocessor">#endif</span>
<a name="l00052"></a>00052 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_WMMINTRIN_H</span>
<a name="l00053"></a>00053 <span class="preprocessor"></span><span class="preprocessor">#include <wmmintrin.h></span>
<a name="l00054"></a>00054 <span class="preprocessor">#endif</span>
<a name="l00055"></a>00055 <span class="preprocessor"></span><span class="preprocessor">#if R123_USE_INTRIN_H</span>
<a name="l00056"></a>00056 <span class="preprocessor"></span><span class="preprocessor">#include <intrin.h></span>
<a name="l00057"></a>00057 <span class="preprocessor">#endif</span>
<a name="l00058"></a>00058 <span class="preprocessor"></span><span class="preprocessor">#ifdef __cplusplus</span>
<a name="l00059"></a>00059 <span class="preprocessor"></span><span class="preprocessor">#include <iostream></span>
<a name="l00060"></a>00060 <span class="preprocessor">#include <limits></span>
<a name="l00061"></a>00061 <span class="preprocessor">#include <stdexcept></span>
<a name="l00062"></a>00062 <span class="preprocessor">#endif</span>
<a name="l00063"></a>00063 <span class="preprocessor"></span>
<a name="l00064"></a>00064 <span class="preprocessor">#if R123_USE_ASM_GNU</span>
<a name="l00065"></a>00065 <span class="preprocessor"></span>
<a name="l00066"></a>00066 <span class="comment">/* bit25 of CX tells us whether AES is enabled. */</span>
<a name="l00067"></a>00067 R123_STATIC_INLINE <span class="keywordtype">int</span> <a class="code" href="sse_8h.html#a0b35a046e85316295476d7d552411044">haveAESNI</a>(){
<a name="l00068"></a>00068 <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> eax, ebx, ecx, edx;
<a name="l00069"></a>00069 __asm__ __volatile__ (<span class="stringliteral">"cpuid"</span>: <span class="stringliteral">"=a"</span> (eax), <span class="stringliteral">"=b"</span> (ebx), <span class="stringliteral">"=c"</span> (ecx), <span class="stringliteral">"=d"</span> (edx) :
<a name="l00070"></a>00070 <span class="stringliteral">"a"</span> (1));
<a name="l00071"></a>00071 <span class="keywordflow">return</span> (ecx>>25) & 1;
<a name="l00072"></a>00072 }
<a name="l00073"></a>00073 <span class="preprocessor">#elif R123_USE_CPUID_MSVC</span>
<a name="l00074"></a>00074 <span class="preprocessor"></span>R123_STATIC_INLINE <span class="keywordtype">int</span> <a class="code" href="sse_8h.html#a0b35a046e85316295476d7d552411044">haveAESNI</a>(){
<a name="l00075"></a>00075 <span class="keywordtype">int</span> CPUInfo[4];
<a name="l00076"></a>00076 __cpuid(CPUInfo, 1);
<a name="l00077"></a>00077 <span class="keywordflow">return</span> (CPUInfo[2]>>25)&1;
<a name="l00078"></a>00078 }
<a name="l00079"></a>00079 <span class="preprocessor">#else </span><span class="comment">/* R123_USE_CPUID_??? */</span>
<a name="l00080"></a>00080 <span class="preprocessor">#warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"</span>
<a name="l00081"></a><a class="code" href="sse_8h.html#a0b35a046e85316295476d7d552411044">00081</a> <span class="preprocessor"></span>R123_STATIC_INLINE <span class="keywordtype">int</span> <a class="code" href="sse_8h.html#a0b35a046e85316295476d7d552411044">haveAESNI</a>(){
<a name="l00082"></a>00082 <span class="keywordflow">return</span> 0;
<a name="l00083"></a>00083 }
<a name="l00084"></a>00084 <span class="preprocessor">#endif </span><span class="comment">/* R123_USE_ASM_GNU || R123_USE_CPUID_MSVC */</span>
<a name="l00085"></a>00085
<a name="l00086"></a>00086 <span class="comment">// There is a lot of annoying and inexplicable variation in the</span>
<a name="l00087"></a>00087 <span class="comment">// SSE intrinsics available in different compilation environments.</span>
<a name="l00088"></a>00088 <span class="comment">// The details seem to depend on the compiler, the version and</span>
<a name="l00089"></a>00089 <span class="comment">// the target architecture. Rather than insisting on</span>
<a name="l00090"></a>00090 <span class="comment">// R123_USE_feature tests for each of these in each of the</span>
<a name="l00091"></a>00091 <span class="comment">// compilerfeatures.h files we just keep the complexity localized</span>
<a name="l00092"></a>00092 <span class="comment">// to here...</span>
<a name="l00093"></a>00093 <span class="preprocessor">#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))</span>
<a name="l00094"></a>00094 <span class="preprocessor"></span><span class="comment">/* Is there an intrinsic to assemble an __m128i from two 64-bit words? </span>
<a name="l00095"></a>00095 <span class="comment"> If not, use the 4x32-bit intrisic instead. N.B. It looks like Intel</span>
<a name="l00096"></a>00096 <span class="comment"> added _mm_set_epi64x to icc version 12.1 in Jan 2012.</span>
<a name="l00097"></a>00097 <span class="comment">*/</span>
<a name="l00098"></a>00098 R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
<a name="l00099"></a>00099 <span class="keyword">union</span>{
<a name="l00100"></a>00100 uint64_t u64;
<a name="l00101"></a>00101 uint32_t u32[2];
<a name="l00102"></a>00102 } u1, u0;
<a name="l00103"></a>00103 u1.u64 = v1;
<a name="l00104"></a>00104 u0.u64 = v0;
<a name="l00105"></a>00105 <span class="keywordflow">return</span> _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
<a name="l00106"></a>00106 }
<a name="l00107"></a>00107 <span class="preprocessor">#endif</span>
<a name="l00108"></a>00108 <span class="preprocessor"></span><span class="comment">/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit</span>
<a name="l00109"></a>00109 <span class="comment"> word from an __m128i. The _mm_cvtsi128_si64 intrinsic does the job</span>
<a name="l00110"></a>00110 <span class="comment"> on 64-bit platforms. Unfortunately, both MSVC and Open64 fail</span>
<a name="l00111"></a>00111 <span class="comment"> assertions in ut_M128.cpp and ut_carray.cpp when we use the</span>
<a name="l00112"></a>00112 <span class="comment"> _mm_cvtsi128_si64 intrinsic. (See</span>
<a name="l00113"></a>00113 <span class="comment"> https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug).</span>
<a name="l00114"></a>00114 <span class="comment"> On 32-bit platforms, there's no MOVQ, so there's no intrinsic.</span>
<a name="l00115"></a>00115 <span class="comment"> Finally, even if the intrinsic exists, it may be spelled with or</span>
<a name="l00116"></a>00116 <span class="comment"> without the 'x'.</span>
<a name="l00117"></a>00117 <span class="comment">*/</span>
<a name="l00118"></a>00118 <span class="preprocessor">#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)</span>
<a name="l00119"></a><a class="code" href="sse_8h.html#adac6aaf79c4428abcd30bf583eeb5450">00119</a> <span class="preprocessor"></span>R123_STATIC_INLINE uint64_t <a class="code" href="sse_8h.html#adac6aaf79c4428abcd30bf583eeb5450">_mm_extract_lo64</a>(__m128i si){
<a name="l00120"></a>00120 <span class="keyword">union</span>{
<a name="l00121"></a>00121 uint64_t u64[2];
<a name="l00122"></a>00122 __m128i m;
<a name="l00123"></a>00123 }u;
<a name="l00124"></a>00124 _mm_store_si128(&u.m, si);
<a name="l00125"></a>00125 <span class="keywordflow">return</span> u.u64[0];
<a name="l00126"></a>00126 }
<a name="l00127"></a>00127 <span class="preprocessor">#elif defined(__llvm__) || defined(__ICC)</span>
<a name="l00128"></a>00128 <span class="preprocessor"></span>R123_STATIC_INLINE uint64_t <a class="code" href="sse_8h.html#adac6aaf79c4428abcd30bf583eeb5450">_mm_extract_lo64</a>(__m128i si){
<a name="l00129"></a>00129 <span class="keywordflow">return</span> (uint64_t)_mm_cvtsi128_si64(si);
<a name="l00130"></a>00130 }
<a name="l00131"></a>00131 <span class="preprocessor">#else </span><span class="comment">/* GNUC, others */</span>
<a name="l00132"></a>00132 <span class="comment">/* FWIW, gcc's emmintrin.h has had the 'x' spelling</span>
<a name="l00133"></a>00133 <span class="comment"> since at least gcc-3.4.4. The no-'x' spelling showed up</span>
<a name="l00134"></a>00134 <span class="comment"> around 4.2. */</span>
<a name="l00135"></a>00135 R123_STATIC_INLINE uint64_t <a class="code" href="sse_8h.html#adac6aaf79c4428abcd30bf583eeb5450">_mm_extract_lo64</a>(__m128i si){
<a name="l00136"></a>00136 <span class="keywordflow">return</span> (uint64_t)_mm_cvtsi128_si64x(si);
<a name="l00137"></a>00137 }
<a name="l00138"></a>00138 <span class="preprocessor">#endif</span>
<a name="l00139"></a>00139 <span class="preprocessor"></span><span class="preprocessor">#if defined(__GNUC__) && __GNUC__ < 4</span>
<a name="l00140"></a>00140 <span class="preprocessor"></span><span class="comment">/* the cast builtins showed up in gcc4. */</span>
<a name="l00141"></a>00141 R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
<a name="l00142"></a>00142 <span class="keywordflow">return</span> (__m128)si;
<a name="l00143"></a>00143 }
<a name="l00144"></a>00144 <span class="preprocessor">#endif</span>
<a name="l00145"></a>00145 <span class="preprocessor"></span>
<a name="l00146"></a>00146 <span class="preprocessor">#ifdef __cplusplus</span>
<a name="l00147"></a>00147 <span class="preprocessor"></span>
<a name="l00148"></a><a class="code" href="structr123m128i.html">00148</a> <span class="keyword">struct </span><a class="code" href="structr123m128i.html">r123m128i</a>{
<a name="l00149"></a><a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">00149</a> __m128i <a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>;
<a name="l00150"></a>00150 <span class="preprocessor">#if R123_USE_CXX11_UNRESTRICTED_UNIONS</span>
<a name="l00151"></a>00151 <span class="preprocessor"></span> <span class="comment">// C++98 forbids a union member from having *any* constructors.</span>
<a name="l00152"></a>00152 <span class="comment">// C++11 relaxes this, and allows union members to have constructors</span>
<a name="l00153"></a>00153 <span class="comment">// as long as there is a "trivial" default construtor. So in C++11</span>
<a name="l00154"></a>00154 <span class="comment">// we can provide a r123m128i constructor with an __m128i argument, and still</span>
<a name="l00155"></a>00155 <span class="comment">// have the default (and hence trivial) default constructor.</span>
<a name="l00156"></a>00156 <a class="code" href="structr123m128i.html">r123m128i</a>() = <span class="keywordflow">default</span>;
<a name="l00157"></a>00157 <a class="code" href="structr123m128i.html">r123m128i</a>(__m128i _m): <a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>(_m){}
<a name="l00158"></a>00158 <span class="preprocessor">#endif</span>
<a name="l00159"></a><a class="code" href="structr123m128i.html#a9842fbc85102e0019cc58d97c28e2d02">00159</a> <span class="preprocessor"></span> <a class="code" href="structr123m128i.html">r123m128i</a>& <a class="code" href="structr123m128i.html#a9842fbc85102e0019cc58d97c28e2d02">operator=</a>(<span class="keyword">const</span> __m128i& rhs){ <a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>=rhs; <span class="keywordflow">return</span> *<span class="keyword">this</span>;}
<a name="l00160"></a><a class="code" href="structr123m128i.html#a0da7ae920bce1b75efb91017ab6bc37c">00160</a> <a class="code" href="structr123m128i.html">r123m128i</a>& <a class="code" href="structr123m128i.html#a0da7ae920bce1b75efb91017ab6bc37c">operator=</a>(R123_ULONG_LONG n){ <a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a> = _mm_set_epi64x(0, n); <span class="keywordflow">return</span> *<span class="keyword">this</span>;}
<a name="l00161"></a>00161 <span class="preprocessor">#if R123_USE_CXX11_EXPLICIT_CONVERSIONS</span>
<a name="l00162"></a>00162 <span class="preprocessor"></span> <span class="comment">// With C++0x we can attach explicit to the bool conversion operator</span>
<a name="l00163"></a>00163 <span class="comment">// to disambiguate undesired promotions. For g++, this works</span>
<a name="l00164"></a>00164 <span class="comment">// only in 4.5 and above.</span>
<a name="l00165"></a>00165 <span class="keyword">explicit</span> operator bool()<span class="keyword"> const </span>{<span class="keywordflow">return</span> _bool();}
<a name="l00166"></a>00166 <span class="preprocessor">#else</span>
<a name="l00167"></a>00167 <span class="preprocessor"></span> <span class="comment">// Pre-C++0x, we have to do something else. Google for the "safe bool"</span>
<a name="l00168"></a>00168 <span class="comment">// idiom for other ideas...</span>
<a name="l00169"></a><a class="code" href="structr123m128i.html#ae5788785c2ce7f7ca6ff8dd4377771f3">00169</a> <a class="code" href="structr123m128i.html#ae5788785c2ce7f7ca6ff8dd4377771f3">operator const void*</a>()<span class="keyword"> const</span>{<span class="keywordflow">return</span> _bool()?<span class="keyword">this</span>:0;}
<a name="l00170"></a>00170 <span class="preprocessor">#endif</span>
<a name="l00171"></a><a class="code" href="structr123m128i.html#a80a5c24f76fec61b3ab0ed0e0353cfb7">00171</a> <span class="preprocessor"></span> <a class="code" href="structr123m128i.html#a80a5c24f76fec61b3ab0ed0e0353cfb7">operator __m128i</a>()<span class="keyword"> const </span>{<span class="keywordflow">return</span> <a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>;}
<a name="l00172"></a>00172
<a name="l00173"></a>00173 <span class="keyword">private</span>:
<a name="l00174"></a>00174 <span class="preprocessor">#if R123_USE_SSE4_1</span>
<a name="l00175"></a>00175 <span class="preprocessor"></span> <span class="keywordtype">bool</span> _bool()<span class="keyword"> const</span>{ <span class="keywordflow">return</span> !_mm_testz_si128(<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>,<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>); }
<a name="l00176"></a>00176 <span class="preprocessor">#else</span>
<a name="l00177"></a>00177 <span class="preprocessor"></span> <span class="keywordtype">bool</span> _bool()<span class="keyword"> const</span>{ <span class="keywordflow">return</span> 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>, _mm_setzero_si128()))); }
<a name="l00178"></a>00178 <span class="preprocessor">#endif</span>
<a name="l00179"></a>00179 <span class="preprocessor"></span>};
<a name="l00180"></a>00180
<a name="l00181"></a><a class="code" href="sse_8h.html#a2ffeb79e1a2cbb9cb35b0daf514a06a5">00181</a> R123_STATIC_INLINE <a class="code" href="structr123m128i.html">r123m128i</a>& <a class="code" href="sse_8h.html#a2ffeb79e1a2cbb9cb35b0daf514a06a5">operator++</a>(<a class="code" href="structr123m128i.html">r123m128i</a>& v){
<a name="l00182"></a>00182 __m128i& c = v.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>;
<a name="l00183"></a>00183 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
<a name="l00184"></a>00184 c = _mm_add_epi64(c, zeroone);
<a name="l00185"></a>00185 <span class="comment">//return c;</span>
<a name="l00186"></a>00186 <span class="preprocessor">#if R123_USE_SSE4_1</span>
<a name="l00187"></a>00187 <span class="preprocessor"></span> __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
<a name="l00188"></a>00188 <span class="keywordflow">if</span>( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
<a name="l00189"></a>00189 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
<a name="l00190"></a>00190 c = _mm_add_epi64(c, onezero);
<a name="l00191"></a>00191 }
<a name="l00192"></a>00192 <span class="preprocessor">#else</span>
<a name="l00193"></a>00193 <span class="preprocessor"></span> <span class="keywordtype">unsigned</span> mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
<a name="l00194"></a>00194 <span class="comment">// The low two bits of mask are 11 iff the low 64 bits of</span>
<a name="l00195"></a>00195 <span class="comment">// c are zero.</span>
<a name="l00196"></a>00196 <span class="keywordflow">if</span>( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
<a name="l00197"></a>00197 __m128i onezero = _mm_set_epi64x(1,0);
<a name="l00198"></a>00198 c = _mm_add_epi64(c, onezero);
<a name="l00199"></a>00199 }
<a name="l00200"></a>00200 <span class="preprocessor">#endif</span>
<a name="l00201"></a>00201 <span class="preprocessor"></span> <span class="keywordflow">return</span> v;
<a name="l00202"></a>00202 }
<a name="l00203"></a>00203
<a name="l00204"></a><a class="code" href="sse_8h.html#a436d4467bb1389d42bfa67686218fd98">00204</a> R123_STATIC_INLINE <a class="code" href="structr123m128i.html">r123m128i</a>& <a class="code" href="sse_8h.html#a436d4467bb1389d42bfa67686218fd98">operator+=</a>(<a class="code" href="structr123m128i.html">r123m128i</a>& lhs, R123_ULONG_LONG n){
<a name="l00205"></a>00205 __m128i c = lhs.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>;
<a name="l00206"></a>00206 __m128i incr128 = _mm_set_epi64x(0, n);
<a name="l00207"></a>00207 c = _mm_add_epi64(c, incr128);
<a name="l00208"></a>00208 <span class="comment">// return c; // NO CARRY! </span>
<a name="l00209"></a>00209
<a name="l00210"></a>00210 int64_t lo64 = <a class="code" href="sse_8h.html#adac6aaf79c4428abcd30bf583eeb5450">_mm_extract_lo64</a>(c);
<a name="l00211"></a>00211 <span class="keywordflow">if</span>((uint64_t)lo64 < n)
<a name="l00212"></a>00212 c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
<a name="l00213"></a>00213 lhs.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a> = c;
<a name="l00214"></a>00214 <span class="keywordflow">return</span> lhs;
<a name="l00215"></a>00215 }
<a name="l00216"></a>00216
<a name="l00217"></a>00217 <span class="comment">// We need this one because it's present, but never used in r123array1xm128i::incr</span>
<a name="l00218"></a><a class="code" href="sse_8h.html#a43e11de33821f27448feb163162ec895">00218</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#a43e11de33821f27448feb163162ec895">operator<=</a>(R123_ULONG_LONG, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &){
<a name="l00219"></a>00219 <span class="keywordflow">throw</span> std::runtime_error(<span class="stringliteral">"operator<=(unsigned long long, r123m128i) is unimplemented."</span>);}
<a name="l00220"></a>00220
<a name="l00221"></a>00221 <span class="comment">// The comparisons aren't implemented, but if we leave them out, and </span>
<a name="l00222"></a>00222 <span class="comment">// somebody writes, e.g., M1 < M2, the compiler will do an implicit</span>
<a name="l00223"></a>00223 <span class="comment">// conversion through void*. Sigh...</span>
<a name="l00224"></a><a class="code" href="sse_8h.html#ac2c263fa443dd074fe4ff96710219eb8">00224</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#ac2c263fa443dd074fe4ff96710219eb8">operator<</a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&){
<a name="l00225"></a>00225 <span class="keywordflow">throw</span> std::runtime_error(<span class="stringliteral">"operator<(r123m128i, r123m128i) is unimplemented."</span>);}
<a name="l00226"></a><a class="code" href="sse_8h.html#a9ce245b77f0638255e7138190d0ff4fd">00226</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#a43e11de33821f27448feb163162ec895">operator<=</a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&){
<a name="l00227"></a>00227 <span class="keywordflow">throw</span> std::runtime_error(<span class="stringliteral">"operator<=(r123m128i, r123m128i) is unimplemented."</span>);}
<a name="l00228"></a><a class="code" href="sse_8h.html#ad07b59f8110eac57511ee8c2fa697f93">00228</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#ad07b59f8110eac57511ee8c2fa697f93">operator></a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&){
<a name="l00229"></a>00229 <span class="keywordflow">throw</span> std::runtime_error(<span class="stringliteral">"operator>(r123m128i, r123m128i) is unimplemented."</span>);}
<a name="l00230"></a><a class="code" href="sse_8h.html#aa7bb0ad8bfceb39a1b71262b9476e151">00230</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#aa7bb0ad8bfceb39a1b71262b9476e151">operator>=</a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>&){
<a name="l00231"></a>00231 <span class="keywordflow">throw</span> std::runtime_error(<span class="stringliteral">"operator>=(r123m128i, r123m128i) is unimplemented."</span>);}
<a name="l00232"></a>00232
<a name="l00233"></a><a class="code" href="sse_8h.html#ab1389a6d8b14bc380c6d6a6d4de8853e">00233</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#ab1389a6d8b14bc380c6d6a6d4de8853e">operator==</a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &lhs, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &rhs){
<a name="l00234"></a>00234 <span class="keywordflow">return</span> 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
<a name="l00235"></a><a class="code" href="sse_8h.html#a9fc5dd462afc043229ab800abb8f7d64">00235</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#a9fc5dd462afc043229ab800abb8f7d64">operator!=</a>(<span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &lhs, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &rhs){
<a name="l00236"></a>00236 <span class="keywordflow">return</span> !(lhs==rhs);}
<a name="l00237"></a><a class="code" href="sse_8h.html#a01fbbba16839c5938b3fa29668870db2">00237</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#ab1389a6d8b14bc380c6d6a6d4de8853e">operator==</a>(R123_ULONG_LONG lhs, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &rhs){
<a name="l00238"></a>00238 <a class="code" href="structr123m128i.html">r123m128i</a> LHS; LHS.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>=_mm_set_epi64x(0, lhs); <span class="keywordflow">return</span> LHS == rhs; }
<a name="l00239"></a><a class="code" href="sse_8h.html#a76ef724b020add862a911a699a21451b">00239</a> R123_STATIC_INLINE <span class="keywordtype">bool</span> <a class="code" href="sse_8h.html#a9fc5dd462afc043229ab800abb8f7d64">operator!=</a>(R123_ULONG_LONG lhs, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a> &rhs){
<a name="l00240"></a>00240 <span class="keywordflow">return</span> !(lhs==rhs);}
<a name="l00241"></a><a class="code" href="sse_8h.html#af965b43e09e7305795c7b5a35d20c75f">00241</a> R123_STATIC_INLINE std::ostream& <a class="code" href="array_8h.html#af29a19da89789058ead246fdef153ce0">operator<<</a>(std::ostream& os, <span class="keyword">const</span> <a class="code" href="structr123m128i.html">r123m128i</a>& m){
<a name="l00242"></a>00242 <span class="keyword">union</span>{
<a name="l00243"></a>00243 uint64_t u64[2];
<a name="l00244"></a>00244 __m128i m;
<a name="l00245"></a>00245 }u;
<a name="l00246"></a>00246 _mm_storeu_si128(&u.m, m.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a>);
<a name="l00247"></a>00247 <span class="keywordflow">return</span> os << u.u64[0] << <span class="stringliteral">" "</span> << u.u64[1];
<a name="l00248"></a>00248 }
<a name="l00249"></a>00249
<a name="l00250"></a><a class="code" href="sse_8h.html#a202aef4bea97c2203a82676cb6abba40">00250</a> R123_STATIC_INLINE std::istream& <a class="code" href="array_8h.html#ace3112deae6709884feac904b1c260a1">operator>></a>(std::istream& is, <a class="code" href="structr123m128i.html">r123m128i</a>& m){
<a name="l00251"></a>00251 uint64_t u64[2];
<a name="l00252"></a>00252 is >> u64[0] >> u64[1];
<a name="l00253"></a>00253 m.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a> = _mm_set_epi64x(u64[1], u64[0]);
<a name="l00254"></a>00254 <span class="keywordflow">return</span> is;
<a name="l00255"></a>00255 }
<a name="l00256"></a>00256
<a name="l00257"></a>00257 <span class="keyword">template</span><<span class="keyword">typename</span> T> <span class="keyword">inline</span> T <a class="code" href="sse_8h.html#a99c42d1da8da3a4a273556a7817b5c1f">assemble_from_u32</a>(uint32_t *p32); <span class="comment">// forward declaration</span>
<a name="l00258"></a>00258
<a name="l00259"></a>00259 <span class="keyword">template</span> <>
<a name="l00260"></a>00260 <span class="keyword">inline</span> <a class="code" href="structr123m128i.html">r123m128i</a> <a class="code" href="sse_8h.html#a0cd6ca87c67023d7afda38319ab40e43">assemble_from_u32<r123m128i></a>(uint32_t *p32){
<a name="l00261"></a>00261 <a class="code" href="structr123m128i.html">r123m128i</a> ret;
<a name="l00262"></a>00262 ret.<a class="code" href="structr123m128i.html#a9b9908268281aace8028a3f34980634d">m</a> = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
<a name="l00263"></a>00263 <span class="keywordflow">return</span> ret;
<a name="l00264"></a>00264 }
<a name="l00265"></a>00265
<a name="l00266"></a>00266 <span class="preprocessor">#else</span>
<a name="l00267"></a>00267 <span class="preprocessor"></span>
<a name="l00268"></a>00268 <span class="keyword">typedef</span> <span class="keyword">struct </span>{
<a name="l00269"></a>00269 __m128i m;
<a name="l00270"></a>00270 } <a class="code" href="structr123m128i.html">r123m128i</a>;
<a name="l00271"></a>00271
<a name="l00272"></a>00272 <span class="preprocessor">#endif </span><span class="comment">/* __cplusplus */</span>
<a name="l00273"></a>00273
<a name="l00274"></a>00274 <span class="preprocessor">#else </span><span class="comment">/* !R123_USE_SSE */</span>
<a name="l00275"></a>00275 R123_STATIC_INLINE <span class="keywordtype">int</span> <a class="code" href="sse_8h.html#a0b35a046e85316295476d7d552411044">haveAESNI</a>(){
<a name="l00276"></a>00276 <span class="keywordflow">return</span> 0;
<a name="l00277"></a>00277 }
<a name="l00278"></a>00278 <span class="preprocessor">#endif </span><span class="comment">/* R123_USE_SSE */</span>
<a name="l00279"></a>00279
<a name="l00280"></a>00280 <span class="preprocessor">#endif </span><span class="comment">/* _Random123_sse_dot_h__ */</span>
</pre></div></div>
</div>
<!--- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark"> </span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark"> </span>Defines</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<hr class="footer"/><address class="footer"><small>Generated on Mon Mar 7 2016 18:34:00 for Random123-1.09 by
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.1 </small></address>
</body>
</html>
|