1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
|
/*
* Copyright (C) 2013 Andrea Mazzoleni
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef __RAID_INTERNAL_H
#define __RAID_INTERNAL_H
/*
* Supported instruction sets.
*
* It may happen that the assembler is too old to support
* all instructions, even if the architecture supports them.
* These defines allow to exclude from the build the not supported ones.
*
* If in your project you use a predefined assembler, you can define them
* using fixed values, instead of using the HAVE_* defines.
*/
#if HAVE_CONFIG_H
/* Includes the project configuration for HAVE_* defines */
#include "config.h"
/* If the compiler supports assembly */
#if HAVE_ASSEMBLY
/* Autodetect from the compiler */
#if defined(__i386__)
#define CONFIG_X86 1
#define CONFIG_X86_32 1
#endif
#if defined(__x86_64__)
#define CONFIG_X86 1
#define CONFIG_X86_64 1
#endif
#endif
/* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
#if HAVE_SSE2
#define CONFIG_SSE2 1
#endif
#if HAVE_SSSE3
#define CONFIG_SSSE3 1
#endif
#if HAVE_AVX2
#define CONFIG_AVX2 1
#endif
#else /* if HAVE_CONFIG_H is not defined */
/* Assume that assembly is always supported */
#if defined(__i386__)
#define CONFIG_X86 1
#define CONFIG_X86_32 1
#endif
#if defined(__x86_64__)
#define CONFIG_X86 1
#define CONFIG_X86_64 1
#endif
/* Assumes that the assembler supports everything */
#ifdef CONFIG_X86
#define CONFIG_SSE2 1
#define CONFIG_SSSE3 1
#define CONFIG_AVX2 1
#endif
#endif
/*
* Includes anything required for compatibility.
*/
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/*
* Inverse assert.
*/
#define BUG_ON(a) assert(!(a))
/*
* Forced inline.
*/
#ifndef __always_inline
#define __always_inline inline __attribute__((always_inline))
#endif
/*
* Forced alignment.
*/
#ifndef __aligned
#define __aligned(a) __attribute__((aligned(a)))
#endif
/*
* Align a pointer at the specified size.
*/
static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
{
uintptr_t offset = (uintptr_t)ptr;
offset = (offset + size - 1U) & ~(size - 1U);
return (void *)offset;
}
/*
* Includes the main interface headers.
*/
#include "raid.h"
#include "helper.h"
/*
* Internal functions.
*
* These are intended to provide access for testing.
*/
int raid_selftest(void);
void raid_gen_ref(int nd, int np, size_t size, void **vv);
void raid_invert(uint8_t *M, uint8_t *V, int n);
void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
void raid_rec1of1(int *id, int nd, size_t size, void **v);
void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
void raid_gen1_int32(int nd, size_t size, void **vv);
void raid_gen1_int64(int nd, size_t size, void **vv);
void raid_gen1_sse2(int nd, size_t size, void **vv);
void raid_gen1_avx2(int nd, size_t size, void **vv);
void raid_gen2_int32(int nd, size_t size, void **vv);
void raid_gen2_int64(int nd, size_t size, void **vv);
void raid_gen2_sse2(int nd, size_t size, void **vv);
void raid_gen2_avx2(int nd, size_t size, void **vv);
void raid_gen2_sse2ext(int nd, size_t size, void **vv);
void raid_genz_int32(int nd, size_t size, void **vv);
void raid_genz_int64(int nd, size_t size, void **vv);
void raid_genz_sse2(int nd, size_t size, void **vv);
void raid_genz_sse2ext(int nd, size_t size, void **vv);
void raid_genz_avx2ext(int nd, size_t size, void **vv);
void raid_gen3_int8(int nd, size_t size, void **vv);
void raid_gen3_ssse3(int nd, size_t size, void **vv);
void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
void raid_gen3_avx2ext(int nd, size_t size, void **vv);
void raid_gen4_int8(int nd, size_t size, void **vv);
void raid_gen4_ssse3(int nd, size_t size, void **vv);
void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
void raid_gen4_avx2ext(int nd, size_t size, void **vv);
void raid_gen5_int8(int nd, size_t size, void **vv);
void raid_gen5_ssse3(int nd, size_t size, void **vv);
void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
void raid_gen5_avx2ext(int nd, size_t size, void **vv);
void raid_gen6_int8(int nd, size_t size, void **vv);
void raid_gen6_ssse3(int nd, size_t size, void **vv);
void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
void raid_gen6_avx2ext(int nd, size_t size, void **vv);
void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
/*
* Internal naming.
*
* These are intended to provide access for testing.
*/
const char *raid_gen1_tag(void);
const char *raid_gen2_tag(void);
const char *raid_genz_tag(void);
const char *raid_gen3_tag(void);
const char *raid_gen4_tag(void);
const char *raid_gen5_tag(void);
const char *raid_gen6_tag(void);
const char *raid_rec1_tag(void);
const char *raid_rec2_tag(void);
const char *raid_recX_tag(void);
/*
* Internal forwarders.
*/
extern void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
extern void (*raid_genz_ptr)(int nd, size_t size, void **vv);
extern void (*raid_gen_ptr[RAID_PARITY_MAX])(
int nd, size_t size, void **vv);
extern void (*raid_rec_ptr[RAID_PARITY_MAX])(
int nr, int *id, int *ip, int nd, size_t size, void **vv);
/*
* Tables.
*/
extern const uint8_t raid_gfmul[256][256] __aligned(256);
extern const uint8_t raid_gfexp[256] __aligned(256);
extern const uint8_t raid_gfinv[256] __aligned(256);
extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
extern const uint8_t (*raid_gfgen)[256];
#define gfmul raid_gfmul
#define gfexp raid_gfexp
#define gfinv raid_gfinv
#define gfvandermonde raid_gfvandermonde
#define gfcauchy raid_gfcauchy
#define gfgenpshufb raid_gfcauchypshufb
#define gfmulpshufb raid_gfmulpshufb
#define gfgen raid_gfgen
/*
* Assembler blocks.
*/
#ifdef CONFIG_X86
#ifdef CONFIG_SSE2
static __always_inline void raid_sse_begin(void)
{
}
static __always_inline void raid_sse_end(void)
{
/* SSE and AVX code uses non-temporal writes, like MOVNTDQ, */
/* that use a weak memory model. To ensure that other processors */
/* see correctly the data written, we use a store-store memory */
/* barrier at the end of the asm code */
asm volatile ("sfence" : : : "memory");
/* clobbers registers used in the asm code */
/* this is required because in the Windows ABI, */
/* registers xmm6-xmm15 should be kept by the callee. */
/* this clobber list force the compiler to save any */
/* register that needs to be saved */
/* we check for __SSE2_ because we require that the */
/* compiler supports SSE2 registers in the clobber list */
#ifdef __SSE2__
asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
#ifdef CONFIG_X86_64
asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
#endif
#endif
}
#endif
#ifdef CONFIG_AVX2
static __always_inline void raid_avx_begin(void)
{
raid_sse_begin();
}
static __always_inline void raid_avx_end(void)
{
raid_sse_end();
/* reset the upper part of the ymm registers */
/* to avoid the 70 clocks penalty on the next */
/* xmm register use */
asm volatile ("vzeroupper" : : : "memory");
}
#endif
#endif /* CONFIG_X86 */
#endif
|