libclamunrar/blake2s.cpp
01eebc13
 // Based on public domain code written in 2012 by Samuel Neves
 
 #include "rar.hpp"
 
 #ifdef USE_SSE
 #include "blake2s_sse.cpp"
 #endif
 
 static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
 static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
 static void blake2s_final( blake2s_state *S, byte *digest );
 
 #include "blake2sp.cpp"
 
 static const uint32 blake2s_IV[8] =
 {
   0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
   0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
 };
 
 static const byte blake2s_sigma[10][16] =
 {
   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
 };
 
 static inline void blake2s_set_lastnode( blake2s_state *S )
 {
   S->f[1] = ~0U;
 }
 
 
 /* Some helper functions, not necessarily useful */
 static inline void blake2s_set_lastblock( blake2s_state *S )
 {
   if( S->last_node ) blake2s_set_lastnode( S );
 
   S->f[0] = ~0U;
 }
 
 
 static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
 {
   S->t[0] += inc;
   S->t[1] += ( S->t[0] < inc );
 }
 
 
 /* init2 xors IV with input parameter block */
 void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
 {
 #ifdef USE_SSE
   if (_SSE_Version>=SSE_SSE2)
     blake2s_init_sse();
 #endif
 
   S->init(); // Clean data.
   for( int i = 0; i < 8; ++i )
     S->h[i] = blake2s_IV[i];
 
   S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
   S->h[2] ^= node_offset;
   S->h[3] ^= (node_depth<<16)|0x20000000;
 }
 
 
 #define G(r,i,m,a,b,c,d) \
   a = a + b + m[blake2s_sigma[r][2*i+0]]; \
   d = rotr32(d ^ a, 16); \
   c = c + d; \
   b = rotr32(b ^ c, 12); \
   a = a + b + m[blake2s_sigma[r][2*i+1]]; \
   d = rotr32(d ^ a, 8); \
   c = c + d; \
   b = rotr32(b ^ c, 7);
 
 
 static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
 {
   uint32 m[16];
   uint32 v[16];
 
   for( size_t i = 0; i < 16; ++i )
     m[i] = RawGet4( block + i * 4 );
 
   for( size_t i = 0; i < 8; ++i )
     v[i] = S->h[i];
 
   v[ 8] = blake2s_IV[0];
   v[ 9] = blake2s_IV[1];
   v[10] = blake2s_IV[2];
   v[11] = blake2s_IV[3];
   v[12] = S->t[0] ^ blake2s_IV[4];
   v[13] = S->t[1] ^ blake2s_IV[5];
   v[14] = S->f[0] ^ blake2s_IV[6];
   v[15] = S->f[1] ^ blake2s_IV[7];
 
   for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
   {
     G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
     G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
     G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
     G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
     G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
     G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
     G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
     G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
   }
 
   for( size_t i = 0; i < 8; ++i )
     S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
 }
 
 
 void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
 {
   while( inlen > 0 )
   {
     size_t left = S->buflen;
     size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
 
     if( inlen > fill )
     {
       memcpy( S->buf + left, in, fill ); // Fill buffer
       S->buflen += fill;
       blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
 
 #ifdef USE_SSE
 #ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
       if (_SSE_Version>=SSE_SSE2)
 #else
       if (_SSE_Version>=SSE_SSSE3)
 #endif
         blake2s_compress_sse( S, S->buf );
       else
         blake2s_compress( S, S->buf ); // Compress
 #else
       blake2s_compress( S, S->buf ); // Compress
 #endif
       
       memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
       S->buflen -= BLAKE2S_BLOCKBYTES;
       in += fill;
       inlen -= fill;
     }
     else // inlen <= fill
     {
       memcpy( S->buf + left, in, (size_t)inlen );
       S->buflen += (size_t)inlen; // Be lazy, do not compress
       in += inlen;
       inlen = 0;
     }
   }
 }
 
 
 void blake2s_final( blake2s_state *S, byte *digest )
 {
   if( S->buflen > BLAKE2S_BLOCKBYTES )
   {
     blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
     blake2s_compress( S, S->buf );
     S->buflen -= BLAKE2S_BLOCKBYTES;
     memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
   }
 
   blake2s_increment_counter( S, ( uint32 )S->buflen );
   blake2s_set_lastblock( S );
   memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
   blake2s_compress( S, S->buf );
 
   for( int i = 0; i < 8; ++i ) /* Output full hash  */
     RawPut4( S->h[i], digest + 4 * i );
 }