1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
|
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* Added helpers for unaligned native int access
*/
/*
* raid6int1.c
*
* 1-way unrolled portable integer math RAID-6 instruction set
*
* This file was postprocessed using unroll.pl and then ported to userspace
*/
#include "kerncompat.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "kernel-shared/volumes.h"
#include "kernel-shared/uapi/btrfs_tree.h"
#include "kernel-lib/raid56.h"
#include "common/messages.h"
/*
* This is the C data type to use
*/
/* Change this from BITS_PER_LONG if there is something better... */
#if BITS_PER_LONG == 64
# define NBYTES(x) ((x) * 0x0101010101010101UL)
# define NSIZE 8
# define NSHIFT 3
typedef uint64_t unative_t;
#define put_unaligned_native(val,p) put_unaligned_64((val),(p))
#define get_unaligned_native(p) get_unaligned_64((p))
#else
# define NBYTES(x) ((x) * 0x01010101U)
# define NSIZE 4
# define NSHIFT 2
typedef uint32_t unative_t;
#define put_unaligned_native(val,p) put_unaligned_32((val),(p))
#define get_unaligned_native(p) get_unaligned_32((p))
#endif
/*
* These sub-operations are separate inlines since they can sometimes be
* specially optimized using architecture-specific hacks.
*/
/*
* The SHLBYTE() operation shifts each byte left by 1, *not*
* rolling over into the next byte
*/
static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
{
unative_t vv;
vv = (v << 1) & NBYTES(0xfe);
return vv;
}
/*
* The MASK() operation returns 0xFF in any byte for which the high
* bit is 1, 0x00 for any byte for which the high bit is 0.
*/
static inline __attribute_const__ unative_t MASK(unative_t v)
{
unative_t vv;
vv = v & NBYTES(0x80);
vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
return vv;
}
void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
uint8_t **dptr = (uint8_t **)ptrs;
uint8_t *p, *q;
int d, z, z0;
unative_t wd0, wq0, wp0, w10, w20;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
wq0 = wp0 = get_unaligned_native(&dptr[z0][d+0*NSIZE]);
for ( z = z0-1 ; z >= 0 ; z-- ) {
wd0 = get_unaligned_native(&dptr[z][d+0*NSIZE]);
wp0 ^= wd0;
w20 = MASK(wq0);
w10 = SHLBYTE(wq0);
w20 &= NBYTES(0x1d);
w10 ^= w20;
wq0 = w10 ^ wd0;
}
put_unaligned_native(wp0, &p[d+NSIZE*0]);
put_unaligned_native(wq0, &q[d+NSIZE*0]);
}
}
static void xor_range(char *dst, const char*src, size_t size)
{
/* Move to DWORD aligned */
while (size && ((unsigned long)dst & sizeof(unsigned long))) {
*dst++ ^= *src++;
size--;
}
/* DWORD aligned part */
while (size >= sizeof(unsigned long)) {
*(unsigned long *)dst ^= *(unsigned long *)src;
src += sizeof(unsigned long);
dst += sizeof(unsigned long);
size -= sizeof(unsigned long);
}
/* Remaining */
while (size) {
*dst++ ^= *src++;
size--;
}
}
/*
* Generate desired data/parity stripe for RAID5
*
* @nr_devs: Total number of devices, including parity
* @stripe_len: Stripe length
* @data: Data, with special layout:
* data[0]: Data stripe 0
* data[nr_devs-2]: Last data stripe
* data[nr_devs-1]: RAID5 parity
* @dest: To generate which data. should follow above data layout
*/
int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
{
int i;
char *buf = data[dest];
/* Validation check */
if (stripe_len <= 0 || stripe_len != BTRFS_STRIPE_LEN) {
error("invalid parameter for %s", __func__);
return -EINVAL;
}
if (dest >= nr_devs || nr_devs < 2) {
error("invalid parameter for %s", __func__);
return -EINVAL;
}
/* Shortcut for 2 devs RAID5, which is just RAID1 */
if (nr_devs == 2) {
memcpy(data[dest], data[1 - dest], stripe_len);
return 0;
}
memset(buf, 0, stripe_len);
for (i = 0; i < nr_devs; i++) {
if (i == dest)
continue;
xor_range(buf, data[i], stripe_len);
}
return 0;
}
/*
* Raid 6 recovery code copied from kernel lib/raid6/recov.c.
* With modifications:
* - rename from raid6_2data_recov_intx1
* - kfree/free modification for btrfs-progs
*/
int raid6_recov_data2(int nr_devs, size_t stripe_len, int dest1, int dest2,
void **data)
{
u8 *p, *q, *dp, *dq;
u8 px, qx, db;
const u8 *pbmul; /* P multiplier table for B data */
const u8 *qmul; /* Q multiplier table (for both) */
char *zero_mem1, *zero_mem2;
int ret = 0;
/* Early check */
if (dest1 < 0 || dest1 >= nr_devs - 2 ||
dest2 < 0 || dest2 >= nr_devs - 2 || dest1 >= dest2)
return -EINVAL;
zero_mem1 = calloc(1, stripe_len);
zero_mem2 = calloc(1, stripe_len);
if (!zero_mem1 || !zero_mem2) {
free(zero_mem1);
free(zero_mem2);
return -ENOMEM;
}
p = (u8 *)data[nr_devs - 2];
q = (u8 *)data[nr_devs - 1];
/* Compute syndrome with zero for the missing data pages
Use the dead data pages as temporary storage for
delta p and delta q */
dp = (u8 *)data[dest1];
data[dest1] = (void *)zero_mem1;
data[nr_devs - 2] = dp;
dq = (u8 *)data[dest2];
data[dest2] = (void *)zero_mem2;
data[nr_devs - 1] = dq;
raid6_gen_syndrome(nr_devs, stripe_len, data);
/* Restore pointer table */
data[dest1] = dp;
data[dest2] = dq;
data[nr_devs - 2] = p;
data[nr_devs - 1] = q;
/* Now, pick the proper data tables */
pbmul = raid6_gfmul[raid6_gfexi[dest2 - dest1]];
qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]^raid6_gfexp[dest2]]];
/* Now do it... */
while ( stripe_len-- ) {
px = *p ^ *dp;
qx = qmul[*q ^ *dq];
*dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
*dp++ = db ^ px; /* Reconstructed A */
p++; q++;
}
free(zero_mem1);
free(zero_mem2);
return ret;
}
/*
* Raid 6 recover code copied from kernel lib/raid6/recov.c
* - rename from raid6_datap_recov_intx1()
* - parameter changed from faila to dest1
*/
int raid6_recov_datap(int nr_devs, size_t stripe_len, int dest1, void **data)
{
u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */
char *zero_mem;
p = (u8 *)data[nr_devs - 2];
q = (u8 *)data[nr_devs - 1];
zero_mem = calloc(1, stripe_len);
if (!zero_mem)
return -ENOMEM;
/* Compute syndrome with zero for the missing data page
Use the dead data page as temporary storage for delta q */
dq = (u8 *)data[dest1];
data[dest1] = (void *)zero_mem;
data[nr_devs - 1] = dq;
raid6_gen_syndrome(nr_devs, stripe_len, data);
/* Restore pointer table */
data[dest1] = dq;
data[nr_devs - 1] = q;
/* Now, pick the proper data tables */
qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]]];
/* Now do it... */
while ( stripe_len-- ) {
*p++ ^= *dq = qmul[*q ^ *dq];
q++; dq++;
}
return 0;
}
/* Original raid56 recovery wrapper */
int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1,
int dest2, void **data)
{
int min_devs;
int ret;
if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK)
min_devs = btrfs_bg_type_to_devs_min(profile);
else
return -EINVAL;
if (nr_devs < min_devs)
return -EINVAL;
/* Nothing to recover */
if (dest1 == -1 && dest2 == -1)
return 0;
/* Reorder dest1/2, so only dest2 can be -1 */
if (dest1 == -1) {
dest1 = dest2;
dest2 = -1;
} else if (dest2 != -1 && dest1 != -1) {
/* Reorder dest1/2, ensure dest2 > dest1 */
if (dest1 > dest2) {
int tmp;
tmp = dest2;
dest2 = dest1;
dest1 = tmp;
}
}
if (profile & BTRFS_BLOCK_GROUP_RAID5) {
if (dest2 != -1)
return 1;
return raid5_gen_result(nr_devs, stripe_len, dest1, data);
}
/* RAID6 one dev corrupted case*/
if (dest2 == -1) {
/* Regenerate P/Q */
if (dest1 == nr_devs - 1 || dest1 == nr_devs - 2) {
raid6_gen_syndrome(nr_devs, stripe_len, data);
return 0;
}
/* Regenerate data from P */
return raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
}
/* P/Q bot corrupted */
if (dest1 == nr_devs - 2 && dest2 == nr_devs - 1) {
raid6_gen_syndrome(nr_devs, stripe_len, data);
return 0;
}
/* 2 Data corrupted */
if (dest2 < nr_devs - 2)
return raid6_recov_data2(nr_devs, stripe_len, dest1, dest2,
data);
/* Data and P*/
if (dest2 == nr_devs - 2)
return raid6_recov_datap(nr_devs, stripe_len, dest1, data);
/*
* Final case, Data and Q, recover data first then regenerate Q
*/
ret = raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
if (ret < 0)
return ret;
raid6_gen_syndrome(nr_devs, stripe_len, data);
return 0;
}
|