// Based on public domain code written in 2012 by Samuel Neves #include "rar.hpp" #ifdef USE_SSE #include "blake2s_sse.cpp" #endif static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth); static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen ); static void blake2s_final( blake2s_state *S, byte *digest ); #include "blake2sp.cpp" static const uint32 blake2s_IV[8] = { 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; static const byte blake2s_sigma[10][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , }; static inline void blake2s_set_lastnode( blake2s_state *S ) { S->f[1] = ~0U; } /* Some helper functions, not necessarily useful */ static inline void blake2s_set_lastblock( blake2s_state *S ) { if( S->last_node ) blake2s_set_lastnode( S ); S->f[0] = ~0U; } static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); } /* init2 xors IV with input parameter block */ void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth) { #ifdef USE_SSE if (_SSE_Version>=SSE_SSE2) blake2s_init_sse(); #endif S->init(); // Clean data. for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block. S->h[2] ^= node_offset; S->h[3] ^= (node_depth<<16)|0x20000000; } #define G(r,i,m,a,b,c,d) \ a = a + b + m[blake2s_sigma[r][2*i+0]]; \ d = rotr32(d ^ a, 16); \ c = c + d; \ b = rotr32(b ^ c, 12); \ a = a + b + m[blake2s_sigma[r][2*i+1]]; \ d = rotr32(d ^ a, 8); \ c = c + d; \ b = rotr32(b ^ c, 7); static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] ) { uint32 m[16]; uint32 v[16]; for( size_t i = 0; i < 16; ++i ) m[i] = RawGet4( block + i * 4 ); for( size_t i = 0; i < 8; ++i ) v[i] = S->h[i]; v[ 8] = blake2s_IV[0]; v[ 9] = blake2s_IV[1]; v[10] = blake2s_IV[2]; v[11] = blake2s_IV[3]; v[12] = S->t[0] ^ blake2s_IV[4]; v[13] = S->t[1] ^ blake2s_IV[5]; v[14] = S->f[0] ^ blake2s_IV[6]; v[15] = S->f[1] ^ blake2s_IV[7]; for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows. { G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]); G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]); G(r,2,m,v[ 2],v[ 6],v[10],v[14]); G(r,3,m,v[ 3],v[ 7],v[11],v[15]); G(r,4,m,v[ 0],v[ 5],v[10],v[15]); G(r,5,m,v[ 1],v[ 6],v[11],v[12]); G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]); G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]); } for( size_t i = 0; i < 8; ++i ) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; } void blake2s_update( blake2s_state *S, const byte *in, size_t inlen ) { while( inlen > 0 ) { size_t left = S->buflen; size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { memcpy( S->buf + left, in, fill ); // Fill buffer S->buflen += fill; blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); #ifdef USE_SSE #ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode. if (_SSE_Version>=SSE_SSE2) #else if (_SSE_Version>=SSE_SSSE3) #endif blake2s_compress_sse( S, S->buf ); else blake2s_compress( S, S->buf ); // Compress #else blake2s_compress( S, S->buf ); // Compress #endif memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left S->buflen -= BLAKE2S_BLOCKBYTES; in += fill; inlen -= fill; } else // inlen <= fill { memcpy( S->buf + left, in, (size_t)inlen ); S->buflen += (size_t)inlen; // Be lazy, do not compress in += inlen; inlen = 0; } } } void blake2s_final( blake2s_state *S, byte *digest ) { if( S->buflen > BLAKE2S_BLOCKBYTES ) { blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); blake2s_compress( S, S->buf ); S->buflen -= BLAKE2S_BLOCKBYTES; memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); } blake2s_increment_counter( S, ( uint32 )S->buflen ); blake2s_set_lastblock( S ); memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); for( int i = 0; i < 8; ++i ) /* Output full hash */ RawPut4( S->h[i], digest + 4 * i ); }