1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
/*
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#define PARALLELISM_DEGREE 8
void blake2sp_init( blake2sp_state *S )
{
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
blake2s_init_param( &S->R, 0, 1 ); // Init root.
for( uint32 i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_init_param( &S->S[i], i, 0 ); // Init leaf.
S->R.last_node = 1;
S->S[PARALLELISM_DEGREE - 1].last_node = 1;
}
struct Blake2ThreadData
{
void Update();
blake2s_state *S;
const byte *in;
size_t inlen;
};
void Blake2ThreadData::Update()
{
size_t inlen__ = inlen;
const byte *in__ = ( const byte * )in;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
#ifdef USE_SSE
// We gain 5% in i7 SSE mode by prefetching next data block.
if (_SSE_Version>=SSE_SSE && inlen__ >= 2 * PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES)
_mm_prefetch((char*)(in__ + PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES), _MM_HINT_T0);
#endif
// We tried to _forceinline blake2s_update and blake2s_compress_sse,
// but it didn't improve performance.
blake2s_update( S, in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
}
#ifdef RAR_SMP
THREAD_PROC(Blake2Thread)
{
Blake2ThreadData *td=(Blake2ThreadData *)Data;
td->Update();
}
#endif
void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen )
{
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
Blake2ThreadData btd_array[PARALLELISM_DEGREE];
#ifdef RAR_SMP
uint ThreadNumber = inlen < 0x1000 ? 1 : S->MaxThreads;
if (ThreadNumber==6 || ThreadNumber==7) // 6 and 7 threads work slower than 4 here.
ThreadNumber=4;
#else
uint ThreadNumber=1;
#endif
for (size_t id__=0;id__<PARALLELISM_DEGREE;)
{
for (uint Thread=0;Thread<ThreadNumber && id__<PARALLELISM_DEGREE;Thread++)
{
Blake2ThreadData *btd=btd_array+Thread;
btd->inlen = inlen;
btd->in = in + id__ * BLAKE2S_BLOCKBYTES;
btd->S = &S->S[id__];
#ifdef RAR_SMP
if (ThreadNumber>1)
S->ThPool->AddTask(Blake2Thread,(void*)btd);
else
btd->Update();
#else
btd->Update();
#endif
id__++;
}
#ifdef RAR_SMP
if (S->ThPool!=NULL) // Can be NULL in -mt1 mode.
S->ThPool->WaitDone();
#endif // RAR_SMP
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, (size_t)inlen );
S->buflen = left + (size_t)inlen;
}
void blake2sp_final( blake2sp_state *S, byte *digest )
{
byte hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
}
blake2s_final( &S->S[i], hash[i] );
}
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( &S->R, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( &S->R, digest );
}
|