File: xor.h

package info (click to toggle)
libm4ri 20200125-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 2,560 kB
  • sloc: ansic: 12,633; sh: 4,304; makefile: 137
file content (126 lines) | stat: -rw-r--r-- 2,586 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
 * Functions for adding vectors.
 *
 * \author Martin Albrecht <martinralbrecht@googlemail.com>
 *
 */

#ifndef M4RI_XOR_H
#define M4RI_XOR_H

 /*******************************************************************
 *
 *                 M4RI:  Linear Algebra over GF(2)
 *
 *    Copyright (C) 2008-2013  Martin Albrecht <martinralbrecht@googlemail.com>
 *
 *  Distributed under the terms of the GNU General Public License (GPL)
 *  version 2 or higher.
 *
 *    This code is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    General Public License for more details.
 *
 *  The full text of the GPL is available at:
 *
 *                  http://www.gnu.org/licenses/
 *
 ********************************************************************/

#include <m4ri/m4ri_config.h>

#if __M4RI_HAVE_SSE2
#include <emmintrin.h>
#endif

#include <m4ri/misc.h>


/**
 * Compute c[i] += t1[i] for 0 <= i < wide
 *
 */

static inline void _mzd_combine(word *c, word const *t1, wi_t wide_in) {
  wi_t wide = wide_in;
#if __M4RI_HAVE_SSE2
  /* assuming c, t1 are alligned the same way */

  if (__M4RI_ALIGNMENT(c,16)==8 && wide) {
    *c++ ^= *t1++;
    wide--;
  }

  __m128i *__c = (__m128i*)c;
  __m128i *__t1 = (__m128i*)t1;
  const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xFUL);
  __m128i xmm1;
  
  
  while(__c < eof-1) {
    xmm1 = _mm_xor_si128(*__c, *__t1++);
    *__c++ = xmm1;
    xmm1 = _mm_xor_si128(*__c, *__t1++);
    *__c++ = xmm1;
  }

  if(__c < eof) {
    xmm1 = _mm_xor_si128(*__c, *__t1++); 
    *__c++ = xmm1;      
  }
  
  c  = (word*)__c;
  t1 = (word*)__t1;
  wide = ((sizeof(word) * wide) % 16) / sizeof(word);

  if(!wide) {
    __M4RI_DD_RAWROW(c, wide_in);
    return;
  }
#endif // __M4RI_HAVE_SSE2

  wi_t n = (wide + 7) / 8;
  switch (wide % 8) {
  case 0: do { *c++ ^= *t1++;
    case 7:    *c++ ^= *t1++;
    case 6:    *c++ ^= *t1++;
    case 5:    *c++ ^= *t1++;
    case 4:    *c++ ^= *t1++;
    case 3:    *c++ ^= *t1++;
    case 2:    *c++ ^= *t1++;
    case 1:    *c++ ^= *t1++;
    } while (--n > 0);
  }
  __M4RI_DD_RAWROW(c, wide_in);
}

#define N 2
#include "xor_template.h"
#undef N

#define N 3
#include "xor_template.h"
#undef N

#define N 4
#include "xor_template.h"
#undef N

#define N 5
#include "xor_template.h"
#undef N

#define N 6
#include "xor_template.h"
#undef N

#define N 7
#include "xor_template.h"
#undef N

#define N 8
#include "xor_template.h"
#undef N

#endif // M4RI_XOR_H