Crypto++ 8.2
Free C&
sha.cpp
1// sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2
3// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
4// implemented Intel SHA extensions based on Intel articles and code by
5// Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
6// code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
7// All code is in the public domain.
8
9// In August 2017 JW reworked the internals to align all the implementations.
10// Formerly all hashes were software based, IterHashBase handled endian conversions,
11// and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform
12// then performed the single block hashing. It was repeated for multiple blocks.
13//
14// The rework added SHA{N}::HashMultipleBlocks (class) and SHA{N}_HashMultipleBlocks
15// (free standing). There are also hardware accelerated variations. Callers enter
16// SHA{N}::HashMultipleBlocks (class), and the function calls SHA{N}_HashMultipleBlocks
17// (free standing) or SHA{N}_HashBlock (free standing) as a fallback.
18//
19// An added wrinkle is hardware is little endian, C++ is big endian, and callers use
20// big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder for the incoming data
21// arrangement. Hardware based SHA{N}_HashMultipleBlock can often perform the endian
22// swap much easier by setting an EPI mask. Endian swap incurs no penalty on Intel SHA,
23// and 4-instruction penalty on ARM SHA. Under C++ the full software based swap penalty
24// is incurred due to use of ReverseBytes().
25//
26// The rework also removed the hacked-in pointers to implementations.
27
28// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
29
30#include "pch.h"
31#include "config.h"
32
33#if CRYPTOPP_MSC_VERSION
34# pragma warning(disable: 4100 4731)
35#endif
36
37#ifndef CRYPTOPP_IMPORTS
38#ifndef CRYPTOPP_GENERATE_X64_MASM
39
40#include "secblock.h"
41#include "sha.h"
42#include "misc.h"
43#include "cpu.h"
44
45#if defined(CRYPTOPP_DISABLE_SHA_ASM)
46# undef CRYPTOPP_X86_ASM_AVAILABLE
47# undef CRYPTOPP_X32_ASM_AVAILABLE
48# undef CRYPTOPP_X64_ASM_AVAILABLE
49# undef CRYPTOPP_SSE2_ASM_AVAILABLE
50#endif
51
52NAMESPACE_BEGIN(CryptoPP)
53
54#if CRYPTOPP_SHANI_AVAILABLE
55extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
56extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
57#endif
58
59#if CRYPTOPP_ARM_SHA1_AVAILABLE
60extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
61#endif
62
63#if CRYPTOPP_ARM_SHA2_AVAILABLE
64extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
65#endif
66
67#if CRYPTOPP_ARM_SHA512_AVAILABLE
68extern void SHA512_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
69#endif
70
71#if CRYPTOPP_POWER8_SHA_AVAILABLE
72extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order);
73extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order);
74#endif
75
76// We add extern to export table to sha_simd.cpp, but it
77// cleared http://github.com/weidai11/cryptopp/issues/502
78extern const word32 SHA256_K[64];
79extern const word64 SHA512_K[80];
80
81CRYPTOPP_ALIGN_DATA(16)
82const word64 SHA512_K[80] = {
83 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
84 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
85 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
86 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
87 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
88 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
89 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
90 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
91 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
92 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
93 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
94 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
95 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
96 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
97 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
98 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
99 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
100 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
101 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
102 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
103 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
104 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
105 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
106 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
107 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
108 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
109 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
110 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
111 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
112 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
113 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
114 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
115 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
116 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
117 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
118 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
119 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
120 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
121 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
122 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
123};
124
125CRYPTOPP_ALIGN_DATA(16)
126const word32 SHA256_K[64] = {
127
128 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
129 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
130 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
131 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
132 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
133 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
134 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
135 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
136 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
137 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
138 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
139 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
140 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
141 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
142 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
143 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
144};
145
146////////////////////////////////
147// start of Steve Reid's code //
148////////////////////////////////
149
150ANONYMOUS_NAMESPACE_BEGIN
151
152#define blk0(i) (W[i] = data[i])
153#define blk1(i) (W[i&15] = rotlConstant<1>(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15]))
154
155#define f1(x,y,z) (z^(x&(y^z)))
156#define f2(x,y,z) (x^y^z)
157#define f3(x,y,z) ((x&y)|(z&(x|y)))
158#define f4(x,y,z) (x^y^z)
159
160/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
161#define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
162#define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
163#define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlConstant<5>(v);w=rotlConstant<30>(w);
164#define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlConstant<5>(v);w=rotlConstant<30>(w);
165#define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlConstant<5>(v);w=rotlConstant<30>(w);
166
167void SHA1_HashBlock_CXX(word32 *state, const word32 *data)
168{
169 CRYPTOPP_ASSERT(state);
170 CRYPTOPP_ASSERT(data);
171
172 word32 W[16];
173 /* Copy context->state[] to working vars */
174 word32 a = state[0];
175 word32 b = state[1];
176 word32 c = state[2];
177 word32 d = state[3];
178 word32 e = state[4];
179 /* 4 rounds of 20 operations each. Loop unrolled. */
180 R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
181 R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
182 R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
183 R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
184 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
185 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
186 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
187 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
188 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
189 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
190 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
191 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
192 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
193 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
194 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
195 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
196 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
197 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
198 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
199 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
200 /* Add the working vars back into context.state[] */
201 state[0] += a;
202 state[1] += b;
203 state[2] += c;
204 state[3] += d;
205 state[4] += e;
206}
207
208#undef blk0
209#undef blk1
210#undef f1
211#undef f2
212#undef f3
213#undef f4
214#undef R1
215#undef R2
216#undef R3
217#undef R4
218
219ANONYMOUS_NAMESPACE_END
220
221//////////////////////////////
222// end of Steve Reid's code //
223//////////////////////////////
224
225std::string SHA1::AlgorithmProvider() const
226{
227#if CRYPTOPP_SHANI_AVAILABLE
228 if (HasSHA())
229 return "SHANI";
230#endif
231#if CRYPTOPP_SSE2_ASM_AVAILABLE
232 if (HasSSE2())
233 return "SSE2";
234#endif
235#if CRYPTOPP_ARM_SHA1_AVAILABLE
236 if (HasSHA1())
237 return "ARMv8";
238#endif
239 return "C++";
240}
241
242void SHA1::InitState(HashWordType *state)
243{
244 state[0] = 0x67452301;
245 state[1] = 0xEFCDAB89;
246 state[2] = 0x98BADCFE;
247 state[3] = 0x10325476;
248 state[4] = 0xC3D2E1F0;
249}
250
251void SHA1::Transform(word32 *state, const word32 *data)
252{
253 CRYPTOPP_ASSERT(state);
254 CRYPTOPP_ASSERT(data);
255
256#if CRYPTOPP_SHANI_AVAILABLE
257 if (HasSHA())
258 {
259 SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
260 return;
261 }
262#endif
263#if CRYPTOPP_ARM_SHA1_AVAILABLE
264 if (HasSHA1())
265 {
266 SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
267 return;
268 }
269#endif
270
271 SHA1_HashBlock_CXX(state, data);
272}
273
274size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
275{
276 CRYPTOPP_ASSERT(input);
277 CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
278
279#if CRYPTOPP_SHANI_AVAILABLE
280 if (HasSHA())
281 {
282 SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
283 return length & (SHA1::BLOCKSIZE - 1);
284 }
285#endif
286#if CRYPTOPP_ARM_SHA1_AVAILABLE
287 if (HasSHA1())
288 {
289 SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
290 return length & (SHA1::BLOCKSIZE - 1);
291 }
292#endif
293
294 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
295 word32 *dataBuf = this->DataBuf();
296 do
297 {
298 if (noReverse)
299 {
300 SHA1_HashBlock_CXX(m_state, input);
301 }
302 else
303 {
304 ByteReverse(dataBuf, input, SHA1::BLOCKSIZE);
305 SHA1_HashBlock_CXX(m_state, dataBuf);
306 }
307
308 input += SHA1::BLOCKSIZE/sizeof(word32);
309 length -= SHA1::BLOCKSIZE;
310 }
311 while (length >= SHA1::BLOCKSIZE);
312 return length;
313}
314
315// *************************************************************
316
317ANONYMOUS_NAMESPACE_BEGIN
318
319#define a(i) T[(0-i)&7]
320#define b(i) T[(1-i)&7]
321#define c(i) T[(2-i)&7]
322#define d(i) T[(3-i)&7]
323#define e(i) T[(4-i)&7]
324#define f(i) T[(5-i)&7]
325#define g(i) T[(6-i)&7]
326#define h(i) T[(7-i)&7]
327
328#define blk0(i) (W[i] = data[i])
329#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
330
331#define Ch(x,y,z) (z^(x&(y^z)))
332#define Maj(x,y,z) (y^((x^y)&(y^z)))
333
334#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
335 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
336
337// for SHA256
338#define s0(x) (rotrConstant<7>(x)^rotrConstant<18>(x)^(x>>3))
339#define s1(x) (rotrConstant<17>(x)^rotrConstant<19>(x)^(x>>10))
340#define S0(x) (rotrConstant<2>(x)^rotrConstant<13>(x)^rotrConstant<22>(x))
341#define S1(x) (rotrConstant<6>(x)^rotrConstant<11>(x)^rotrConstant<25>(x))
342
343void SHA256_HashBlock_CXX(word32 *state, const word32 *data)
344{
345 word32 W[16]={0}, T[8];
346 /* Copy context->state[] to working vars */
347 memcpy(T, state, sizeof(T));
348 /* 64 operations, partially loop unrolled */
349 for (unsigned int j=0; j<64; j+=16)
350 {
351 R( 0); R( 1); R( 2); R( 3);
352 R( 4); R( 5); R( 6); R( 7);
353 R( 8); R( 9); R(10); R(11);
354 R(12); R(13); R(14); R(15);
355 }
356 /* Add the working vars back into context.state[] */
357 state[0] += a(0);
358 state[1] += b(0);
359 state[2] += c(0);
360 state[3] += d(0);
361 state[4] += e(0);
362 state[5] += f(0);
363 state[6] += g(0);
364 state[7] += h(0);
365}
366
367#undef Ch
368#undef Maj
369#undef s0
370#undef s1
371#undef S0
372#undef S1
373#undef blk0
374#undef blk1
375#undef blk2
376#undef R
377
378#undef a
379#undef b
380#undef c
381#undef d
382#undef e
383#undef f
384#undef g
385#undef h
386
387ANONYMOUS_NAMESPACE_END
388
389std::string SHA256_AlgorithmProvider()
390{
391#if CRYPTOPP_SHANI_AVAILABLE
392 if (HasSHA())
393 return "SHANI";
394#endif
395#if CRYPTOPP_SSE2_ASM_AVAILABLE
396 if (HasSSE2())
397 return "SSE2";
398#endif
399#if CRYPTOPP_ARM_SHA2_AVAILABLE
400 if (HasSHA2())
401 return "ARMv8";
402#endif
403#if (CRYPTOPP_POWER8_SHA_AVAILABLE)
404 if (HasSHA256())
405 return "Power8";
406#endif
407 return "C++";
408}
409
410std::string SHA224::AlgorithmProvider() const
411{
412 return SHA256_AlgorithmProvider();
413}
414
415void SHA224::InitState(HashWordType *state)
416{
417 static const word32 s[8] = {
418 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
419 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
420 memcpy(state, s, sizeof(s));
421}
422
423void SHA256::InitState(HashWordType *state)
424{
425 static const word32 s[8] = {
426 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
427 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
428 memcpy(state, s, sizeof(s));
429}
430#endif // Not CRYPTOPP_GENERATE_X64_MASM
431
432#if defined(CRYPTOPP_X86_ASM_AVAILABLE)
433
434ANONYMOUS_NAMESPACE_BEGIN
435
436void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len)
437{
438 #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
439 #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
440 #define G(i) H(i+1)
441 #define F(i) H(i+2)
442 #define E(i) H(i+3)
443 #define D(i) H(i+4)
444 #define C(i) H(i+5)
445 #define B(i) H(i+6)
446 #define A(i) H(i+7)
447 #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
448 #define Wt_2(i) Wt((i)-2)
449 #define Wt_15(i) Wt((i)-15)
450 #define Wt_7(i) Wt((i)-7)
451 #define K_END [BASE+8*4+16*4+0*WORD_SZ]
452 #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
453 #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
454 #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
455 #define Kt(i) WORD_REG(si)+(i)*4
456#if CRYPTOPP_BOOL_X86
457 #define BASE esp+4
458#elif defined(__GNUC__)
459 #define BASE r8
460#else
461 #define BASE rsp
462#endif
463
464#define RA0(i, edx, edi) \
465 AS2( add edx, [Kt(i)] )\
466 AS2( add edx, [Wt(i)] )\
467 AS2( add edx, H(i) )\
468
469#define RA1(i, edx, edi)
470
471#define RB0(i, edx, edi)
472
473#define RB1(i, edx, edi) \
474 AS2( mov AS_REG_7d, [Wt_2(i)] )\
475 AS2( mov edi, [Wt_15(i)])\
476 AS2( mov ebx, AS_REG_7d )\
477 AS2( shr AS_REG_7d, 10 )\
478 AS2( ror ebx, 17 )\
479 AS2( xor AS_REG_7d, ebx )\
480 AS2( ror ebx, 2 )\
481 AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
482 AS2( add ebx, [Wt_7(i)])\
483 AS2( mov AS_REG_7d, edi )\
484 AS2( shr AS_REG_7d, 3 )\
485 AS2( ror edi, 7 )\
486 AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
487 AS2( xor AS_REG_7d, edi )\
488 AS2( add edx, [Kt(i)])\
489 AS2( ror edi, 11 )\
490 AS2( add edx, H(i) )\
491 AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
492 AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
493 AS2( mov [Wt(i)], AS_REG_7d)\
494 AS2( add edx, AS_REG_7d )\
495
496#define ROUND(i, r, eax, ecx, edi, edx)\
497 /* in: edi = E */\
498 /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
499 AS2( mov edx, F(i) )\
500 AS2( xor edx, G(i) )\
501 AS2( and edx, edi )\
502 AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
503 AS2( mov AS_REG_7d, edi )\
504 AS2( ror edi, 6 )\
505 AS2( ror AS_REG_7d, 25 )\
506 RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
507 AS2( xor AS_REG_7d, edi )\
508 AS2( ror edi, 5 )\
509 AS2( xor AS_REG_7d, edi )/* S1(E) */\
510 AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
511 RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
512 /* in: ecx = A, eax = B^C, edx = T1 */\
513 /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
514 AS2( mov ebx, ecx )\
515 AS2( xor ecx, B(i) )/* A^B */\
516 AS2( and eax, ecx )\
517 AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
518 AS2( mov AS_REG_7d, ebx )\
519 AS2( ror ebx, 2 )\
520 AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
521 AS2( add edx, D(i) )\
522 AS2( mov D(i), edx )\
523 AS2( ror AS_REG_7d, 22 )\
524 AS2( xor AS_REG_7d, ebx )\
525 AS2( ror ebx, 11 )\
526 AS2( xor AS_REG_7d, ebx )\
527 AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
528 AS2( mov H(i), eax )\
529
530// Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
531// complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
532#if CRYPTOPP_BOOL_X64
533#define SWAP_COPY(i) \
534 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
535 AS1( bswap WORD_REG(bx))\
536 AS2( mov [Wt(i*2+1)], WORD_REG(bx))
537#else // X86 and X32
538#define SWAP_COPY(i) \
539 AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
540 AS1( bswap WORD_REG(bx))\
541 AS2( mov [Wt(i)], WORD_REG(bx))
542#endif
543
544#if defined(__GNUC__)
545 #if CRYPTOPP_BOOL_X64
547 #endif
548 __asm__ __volatile__
549 (
550 #if CRYPTOPP_BOOL_X64
551 "lea %4, %%r8;"
552 #endif
553 INTEL_NOPREFIX
554#elif defined(CRYPTOPP_GENERATE_X64_MASM)
555 ALIGN 8
556 SHA256_HashMultipleBlocks_SSE2 PROC FRAME
557 rex_push_reg rsi
558 push_reg rdi
559 push_reg rbx
560 push_reg rbp
561 alloc_stack(LOCALS_SIZE+8)
562 .endprolog
563 mov rdi, r8
564 lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
565#endif
566
567#if CRYPTOPP_BOOL_X86
568 #ifndef __GNUC__
569 AS2( mov edi, [len])
570 AS2( lea WORD_REG(si), [SHA256_K+48*4])
571 #endif
572 #if !defined(_MSC_VER) || (_MSC_VER < 1400)
573 AS_PUSH_IF86(bx)
574 #endif
575
576 AS_PUSH_IF86(bp)
577 AS2( mov ebx, esp)
578 AS2( and esp, -16)
579 AS2( sub WORD_REG(sp), LOCALS_SIZE)
580 AS_PUSH_IF86(bx)
581#endif
582 AS2( mov STATE_SAVE, WORD_REG(cx))
583 AS2( mov DATA_SAVE, WORD_REG(dx))
584 AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
585 AS2( mov DATA_END, WORD_REG(ax))
586 AS2( mov K_END, WORD_REG(si))
587
588#if CRYPTOPP_SSE2_ASM_AVAILABLE
589#if CRYPTOPP_BOOL_X86
590 AS2( test edi, 1)
591 ASJ( jnz, 2, f)
592 AS1( dec DWORD PTR K_END)
593#endif
594 AS2( movdqu xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
595 AS2( movdqu xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
596#endif
597
598#if CRYPTOPP_BOOL_X86
599#if CRYPTOPP_SSE2_ASM_AVAILABLE
600 ASJ( jmp, 0, f)
601#endif
602 ASL(2) // non-SSE2
603 AS2( mov esi, ecx)
604 AS2( lea edi, A(0))
605 AS2( mov ecx, 8)
606ATT_NOPREFIX
607 AS1( rep movsd)
608INTEL_NOPREFIX
609 AS2( mov esi, K_END)
610 ASJ( jmp, 3, f)
611#endif
612
613#if CRYPTOPP_SSE2_ASM_AVAILABLE
614 ASL(0)
615 AS2( movdqu E(0), xmm1)
616 AS2( movdqu A(0), xmm0)
617#endif
618#if CRYPTOPP_BOOL_X86
619 ASL(3)
620#endif
621 AS2( sub WORD_REG(si), 48*4)
622 SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
623 SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
624#if CRYPTOPP_BOOL_X86
625 SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
626 SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
627#endif
628 AS2( mov edi, E(0)) // E
629 AS2( mov eax, B(0)) // B
630 AS2( xor eax, C(0)) // B^C
631 AS2( mov ecx, A(0)) // A
632
633 ROUND(0, 0, eax, ecx, edi, edx)
634 ROUND(1, 0, ecx, eax, edx, edi)
635 ROUND(2, 0, eax, ecx, edi, edx)
636 ROUND(3, 0, ecx, eax, edx, edi)
637 ROUND(4, 0, eax, ecx, edi, edx)
638 ROUND(5, 0, ecx, eax, edx, edi)
639 ROUND(6, 0, eax, ecx, edi, edx)
640 ROUND(7, 0, ecx, eax, edx, edi)
641 ROUND(8, 0, eax, ecx, edi, edx)
642 ROUND(9, 0, ecx, eax, edx, edi)
643 ROUND(10, 0, eax, ecx, edi, edx)
644 ROUND(11, 0, ecx, eax, edx, edi)
645 ROUND(12, 0, eax, ecx, edi, edx)
646 ROUND(13, 0, ecx, eax, edx, edi)
647 ROUND(14, 0, eax, ecx, edi, edx)
648 ROUND(15, 0, ecx, eax, edx, edi)
649
650 ASL(1)
651 AS2(add WORD_REG(si), 4*16)
652 ROUND(0, 1, eax, ecx, edi, edx)
653 ROUND(1, 1, ecx, eax, edx, edi)
654 ROUND(2, 1, eax, ecx, edi, edx)
655 ROUND(3, 1, ecx, eax, edx, edi)
656 ROUND(4, 1, eax, ecx, edi, edx)
657 ROUND(5, 1, ecx, eax, edx, edi)
658 ROUND(6, 1, eax, ecx, edi, edx)
659 ROUND(7, 1, ecx, eax, edx, edi)
660 ROUND(8, 1, eax, ecx, edi, edx)
661 ROUND(9, 1, ecx, eax, edx, edi)
662 ROUND(10, 1, eax, ecx, edi, edx)
663 ROUND(11, 1, ecx, eax, edx, edi)
664 ROUND(12, 1, eax, ecx, edi, edx)
665 ROUND(13, 1, ecx, eax, edx, edi)
666 ROUND(14, 1, eax, ecx, edi, edx)
667 ROUND(15, 1, ecx, eax, edx, edi)
668 AS2( cmp WORD_REG(si), K_END)
669 ATT_NOPREFIX
670 ASJ( jb, 1, b)
671 INTEL_NOPREFIX
672
673 AS2( mov WORD_REG(dx), DATA_SAVE)
674 AS2( add WORD_REG(dx), 64)
675 AS2( mov AS_REG_7, STATE_SAVE)
676 AS2( mov DATA_SAVE, WORD_REG(dx))
677
678#if CRYPTOPP_SSE2_ASM_AVAILABLE
679#if CRYPTOPP_BOOL_X86
680 AS2( test DWORD PTR K_END, 1)
681 ASJ( jz, 4, f)
682#endif
683 AS2( movdqu xmm1, XMMWORD_PTR [AS_REG_7+1*16])
684 AS2( movdqu xmm0, XMMWORD_PTR [AS_REG_7+0*16])
685 AS2( paddd xmm1, E(0))
686 AS2( paddd xmm0, A(0))
687 AS2( movdqu [AS_REG_7+1*16], xmm1)
688 AS2( movdqu [AS_REG_7+0*16], xmm0)
689 AS2( cmp WORD_REG(dx), DATA_END)
690 ATT_NOPREFIX
691 ASJ( jb, 0, b)
692 INTEL_NOPREFIX
693#endif
694
695#if CRYPTOPP_BOOL_X86
696#if CRYPTOPP_SSE2_ASM_AVAILABLE
697 ASJ( jmp, 5, f)
698 ASL(4) // non-SSE2
699#endif
700 AS2( add [AS_REG_7+0*4], ecx) // A
701 AS2( add [AS_REG_7+4*4], edi) // E
702 AS2( mov eax, B(0))
703 AS2( mov ebx, C(0))
704 AS2( mov ecx, D(0))
705 AS2( add [AS_REG_7+1*4], eax)
706 AS2( add [AS_REG_7+2*4], ebx)
707 AS2( add [AS_REG_7+3*4], ecx)
708 AS2( mov eax, F(0))
709 AS2( mov ebx, G(0))
710 AS2( mov ecx, H(0))
711 AS2( add [AS_REG_7+5*4], eax)
712 AS2( add [AS_REG_7+6*4], ebx)
713 AS2( add [AS_REG_7+7*4], ecx)
714 AS2( mov ecx, AS_REG_7d)
715 AS2( cmp WORD_REG(dx), DATA_END)
716 ASJ( jb, 2, b)
717#if CRYPTOPP_SSE2_ASM_AVAILABLE
718 ASL(5)
719#endif
720#endif
721
722 AS_POP_IF86(sp)
723 AS_POP_IF86(bp)
724 #if !defined(_MSC_VER) || (_MSC_VER < 1400)
725 AS_POP_IF86(bx)
726 #endif
727
728#ifdef CRYPTOPP_GENERATE_X64_MASM
729 add rsp, LOCALS_SIZE+8
730 pop rbp
731 pop rbx
732 pop rdi
733 pop rsi
734 ret
735 SHA256_HashMultipleBlocks_SSE2 ENDP
736#endif
737
738#ifdef __GNUC__
739 ATT_PREFIX
740 :
741 : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
742 #if CRYPTOPP_BOOL_X64
743 , "m" (workspace[0])
744 #endif
745 : "memory", "cc", "%eax"
746 #if CRYPTOPP_BOOL_X64
747 , "%rbx", "%r8", "%r10"
748 #endif
749 );
750#endif
751}
752
753ANONYMOUS_NAMESPACE_END
754
755#endif // CRYPTOPP_X86_ASM_AVAILABLE
756
757#ifndef CRYPTOPP_GENERATE_X64_MASM
758
759#ifdef CRYPTOPP_X64_MASM_AVAILABLE
760extern "C" {
761void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len);
762}
763#endif
764
765std::string SHA256::AlgorithmProvider() const
766{
767 return SHA256_AlgorithmProvider();
768}
769
770void SHA256::Transform(word32 *state, const word32 *data)
771{
772 CRYPTOPP_ASSERT(state);
773 CRYPTOPP_ASSERT(data);
774
775#if CRYPTOPP_SHANI_AVAILABLE
776 if (HasSHA())
777 {
778 SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
779 return;
780 }
781#endif
782#if CRYPTOPP_ARM_SHA2_AVAILABLE
783 if (HasSHA2())
784 {
785 SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
786 return;
787 }
788#endif
789#if CRYPTOPP_POWER8_SHA_AVAILABLE
790 if (HasSHA256())
791 {
792 SHA256_HashMultipleBlocks_POWER8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
793 return;
794 }
795#endif
796
797 SHA256_HashBlock_CXX(state, data);
798}
799
800size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
801{
802 CRYPTOPP_ASSERT(input);
803 CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
804
805#if CRYPTOPP_SHANI_AVAILABLE
806 if (HasSHA())
807 {
808 SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
809 return length & (SHA256::BLOCKSIZE - 1);
810 }
811#endif
812#if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
813 if (HasSSE2())
814 {
815 const size_t res = length & (SHA256::BLOCKSIZE - 1);
816 SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
817 return res;
818 }
819#endif
820#if CRYPTOPP_ARM_SHA2_AVAILABLE
821 if (HasSHA2())
822 {
823 SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
824 return length & (SHA256::BLOCKSIZE - 1);
825 }
826#endif
827#if CRYPTOPP_POWER8_SHA_AVAILABLE
828 if (HasSHA256())
829 {
830 SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
831 return length & (SHA256::BLOCKSIZE - 1);
832 }
833#endif
834
835 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
836 word32 *dataBuf = this->DataBuf();
837 do
838 {
839 if (noReverse)
840 {
841 SHA256_HashBlock_CXX(m_state, input);
842 }
843 else
844 {
845 ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
846 SHA256_HashBlock_CXX(m_state, dataBuf);
847 }
848
849 input += SHA256::BLOCKSIZE/sizeof(word32);
850 length -= SHA256::BLOCKSIZE;
851 }
852 while (length >= SHA256::BLOCKSIZE);
853 return length;
854}
855
856size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
857{
858 CRYPTOPP_ASSERT(input);
859 CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
860
861#if CRYPTOPP_SHANI_AVAILABLE
862 if (HasSHA())
863 {
864 SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
865 return length & (SHA256::BLOCKSIZE - 1);
866 }
867#endif
868#if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_X64_MASM_AVAILABLE
869 if (HasSSE2())
870 {
871 const size_t res = length & (SHA256::BLOCKSIZE - 1);
872 SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
873 return res;
874 }
875#endif
876#if CRYPTOPP_ARM_SHA2_AVAILABLE
877 if (HasSHA2())
878 {
879 SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
880 return length & (SHA256::BLOCKSIZE - 1);
881 }
882#endif
883#if CRYPTOPP_POWER8_SHA_AVAILABLE
884 if (HasSHA256())
885 {
886 SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
887 return length & (SHA256::BLOCKSIZE - 1);
888 }
889#endif
890
891 const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
892 word32 *dataBuf = this->DataBuf();
893 do
894 {
895 if (noReverse)
896 {
897 SHA256_HashBlock_CXX(m_state, input);
898 }
899 else
900 {
901 ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
902 SHA256_HashBlock_CXX(m_state, dataBuf);
903 }
904
905 input += SHA256::BLOCKSIZE/sizeof(word32);
906 length -= SHA256::BLOCKSIZE;
907 }
908 while (length >= SHA256::BLOCKSIZE);
909 return length;
910}
911
912// *************************************************************
913
914std::string SHA512_AlgorithmProvider()
915{
916#if CRYPTOPP_SSE2_ASM_AVAILABLE
917 if (HasSSE2())
918 return "SSE2";
919#endif
920#if (CRYPTOPP_POWER8_SHA_AVAILABLE)
921 if (HasSHA512())
922 return "Power8";
923#endif
924 return "C++";
925}
926
927std::string SHA384::AlgorithmProvider() const
928{
929 return SHA512_AlgorithmProvider();
930}
931
932std::string SHA512::AlgorithmProvider() const
933{
934 return SHA512_AlgorithmProvider();
935}
936
937void SHA384::InitState(HashWordType *state)
938{
939 const word64 s[8] = {
940 W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
941 W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
942 W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
943 W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
944 memcpy(state, s, sizeof(s));
945}
946
947void SHA512::InitState(HashWordType *state)
948{
949 const word64 s[8] = {
950 W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
951 W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
952 W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
953 W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
954 memcpy(state, s, sizeof(s));
955}
956
957#if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
958
959ANONYMOUS_NAMESPACE_BEGIN
960
961// No inlining due to https://github.com/weidai11/cryptopp/issues/684
962// g++ -DNDEBUG -g2 -O3 -pthread -pipe -c sha.cpp
963// sha.cpp: Assembler messages:
964// sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
965// sha.cpp:1155: Error: symbol `SHA512_Round' is already defined
966
967CRYPTOPP_NOINLINE CRYPTOPP_NAKED
968void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data)
969{
970#ifdef __GNUC__
971 __asm__ __volatile__
972 (
973 INTEL_NOPREFIX
974 AS_PUSH_IF86( bx)
975 AS2( mov ebx, eax)
976#else
977 AS1( push ebx)
978 AS1( push esi)
979 AS1( push edi)
980 AS2( lea ebx, SHA512_K)
981#endif
982
983 AS2( mov eax, esp)
984 AS2( and esp, 0xfffffff0)
985 AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
986 AS_PUSH_IF86( ax)
987 AS2( xor eax, eax)
988
989 AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
990 AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
991
992 AS2( movdqu xmm0, [ecx+0*16])
993 AS2( movdq2q mm4, xmm0)
994 AS2( movdqu [edi+0*16], xmm0)
995 AS2( movdqu xmm0, [ecx+1*16])
996 AS2( movdqu [edi+1*16], xmm0)
997 AS2( movdqu xmm0, [ecx+2*16])
998 AS2( movdq2q mm5, xmm0)
999 AS2( movdqu [edi+2*16], xmm0)
1000 AS2( movdqu xmm0, [ecx+3*16])
1001 AS2( movdqu [edi+3*16], xmm0)
1002 ASJ( jmp, 0, f)
1003
1004#define SSE2_S0_S1(r, a, b, c) \
1005 AS2( movq mm6, r)\
1006 AS2( psrlq r, a)\
1007 AS2( movq mm7, r)\
1008 AS2( psllq mm6, 64-c)\
1009 AS2( pxor mm7, mm6)\
1010 AS2( psrlq r, b-a)\
1011 AS2( pxor mm7, r)\
1012 AS2( psllq mm6, c-b)\
1013 AS2( pxor mm7, mm6)\
1014 AS2( psrlq r, c-b)\
1015 AS2( pxor r, mm7)\
1016 AS2( psllq mm6, b-a)\
1017 AS2( pxor r, mm6)
1018
1019#define SSE2_s0(r, a, b, c) \
1020 AS2( movdqu xmm6, r)\
1021 AS2( psrlq r, a)\
1022 AS2( movdqu xmm7, r)\
1023 AS2( psllq xmm6, 64-c)\
1024 AS2( pxor xmm7, xmm6)\
1025 AS2( psrlq r, b-a)\
1026 AS2( pxor xmm7, r)\
1027 AS2( psrlq r, c-b)\
1028 AS2( pxor r, xmm7)\
1029 AS2( psllq xmm6, c-a)\
1030 AS2( pxor r, xmm6)
1031
1032#define SSE2_s1(r, a, b, c) \
1033 AS2( movdqu xmm6, r)\
1034 AS2( psrlq r, a)\
1035 AS2( movdqu xmm7, r)\
1036 AS2( psllq xmm6, 64-c)\
1037 AS2( pxor xmm7, xmm6)\
1038 AS2( psrlq r, b-a)\
1039 AS2( pxor xmm7, r)\
1040 AS2( psllq xmm6, c-b)\
1041 AS2( pxor xmm7, xmm6)\
1042 AS2( psrlq r, c-b)\
1043 AS2( pxor r, xmm7)
1044 ASL(SHA512_Round)
1045
1046 // k + w is in mm0, a is in mm4, e is in mm5
1047 AS2( paddq mm0, [edi+7*8]) // h
1048 AS2( movq mm2, [edi+5*8]) // f
1049 AS2( movq mm3, [edi+6*8]) // g
1050 AS2( pxor mm2, mm3)
1051 AS2( pand mm2, mm5)
1052 SSE2_S0_S1(mm5,14,18,41)
1053 AS2( pxor mm2, mm3)
1054 AS2( paddq mm0, mm2) // h += Ch(e,f,g)
1055 AS2( paddq mm5, mm0) // h += S1(e)
1056 AS2( movq mm2, [edi+1*8]) // b
1057 AS2( movq mm1, mm2)
1058 AS2( por mm2, mm4)
1059 AS2( pand mm2, [edi+2*8]) // c
1060 AS2( pand mm1, mm4)
1061 AS2( por mm1, mm2)
1062 AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
1063 AS2( paddq mm5, [edi+3*8]) // e = d + h
1064 AS2( movq [edi+3*8], mm5)
1065 AS2( movq [edi+11*8], mm5)
1066 SSE2_S0_S1(mm4,28,34,39) // S0(a)
1067 AS2( paddq mm4, mm1) // a = temp + S0(a)
1068 AS2( movq [edi-8], mm4)
1069 AS2( movq [edi+7*8], mm4)
1070 AS1( ret)
1071
1072 // first 16 rounds
1073 ASL(0)
1074 AS2( movq mm0, [edx+eax*8])
1075 AS2( movq [esi+eax*8], mm0)
1076 AS2( movq [esi+eax*8+16*8], mm0)
1077 AS2( paddq mm0, [ebx+eax*8])
1078 ASC( call, SHA512_Round)
1079
1080 AS1( inc eax)
1081 AS2( sub edi, 8)
1082 AS2( test eax, 7)
1083 ASJ( jnz, 0, b)
1084 AS2( add edi, 8*8)
1085 AS2( cmp eax, 16)
1086 ASJ( jne, 0, b)
1087
1088 // rest of the rounds
1089 AS2( movdqu xmm0, [esi+(16-2)*8])
1090 ASL(1)
1091 // data expansion, W[i-2] already in xmm0
1092 AS2( movdqu xmm3, [esi])
1093 AS2( paddq xmm3, [esi+(16-7)*8])
1094 AS2( movdqu xmm2, [esi+(16-15)*8])
1095 SSE2_s1(xmm0, 6, 19, 61)
1096 AS2( paddq xmm0, xmm3)
1097 SSE2_s0(xmm2, 1, 7, 8)
1098 AS2( paddq xmm0, xmm2)
1099 AS2( movdq2q mm0, xmm0)
1100 AS2( movhlps xmm1, xmm0)
1101 AS2( paddq mm0, [ebx+eax*8])
1102 AS2( movlps [esi], xmm0)
1103 AS2( movlps [esi+8], xmm1)
1104 AS2( movlps [esi+8*16], xmm0)
1105 AS2( movlps [esi+8*17], xmm1)
1106 // 2 rounds
1107 ASC( call, SHA512_Round)
1108 AS2( sub edi, 8)
1109 AS2( movdq2q mm0, xmm1)
1110 AS2( paddq mm0, [ebx+eax*8+8])
1111 ASC( call, SHA512_Round)
1112 // update indices and loop
1113 AS2( add esi, 16)
1114 AS2( add eax, 2)
1115 AS2( sub edi, 8)
1116 AS2( test eax, 7)
1117 ASJ( jnz, 1, b)
1118 // do housekeeping every 8 rounds
1119 AS2( mov esi, 0xf)
1120 AS2( and esi, eax)
1121 AS2( lea esi, [esp+4+20*8+8+esi*8])
1122 AS2( add edi, 8*8)
1123 AS2( cmp eax, 80)
1124 ASJ( jne, 1, b)
1125
1126#define SSE2_CombineState(i) \
1127 AS2( movdqu xmm0, [edi+i*16])\
1128 AS2( paddq xmm0, [ecx+i*16])\
1129 AS2( movdqu [ecx+i*16], xmm0)
1130
1131 SSE2_CombineState(0)
1132 SSE2_CombineState(1)
1133 SSE2_CombineState(2)
1134 SSE2_CombineState(3)
1135
1136 AS_POP_IF86( sp)
1137 AS1( emms)
1138
1139#if defined(__GNUC__)
1140 AS_POP_IF86( bx)
1141 ATT_PREFIX
1142 :
1143 : "a" (SHA512_K), "c" (state), "d" (data)
1144 : "%esi", "%edi", "memory", "cc"
1145 );
1146#else
1147 AS1( pop edi)
1148 AS1( pop esi)
1149 AS1( pop ebx)
1150 AS1( ret)
1151#endif
1152}
1153
1154ANONYMOUS_NAMESPACE_END
1155
1156#endif // CRYPTOPP_SSE2_ASM_AVAILABLE
1157
1158ANONYMOUS_NAMESPACE_BEGIN
1159
1160#define a(i) T[(0-i)&7]
1161#define b(i) T[(1-i)&7]
1162#define c(i) T[(2-i)&7]
1163#define d(i) T[(3-i)&7]
1164#define e(i) T[(4-i)&7]
1165#define f(i) T[(5-i)&7]
1166#define g(i) T[(6-i)&7]
1167#define h(i) T[(7-i)&7]
1168
1169#define blk0(i) (W[i]=data[i])
1170#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
1171
1172#define Ch(x,y,z) (z^(x&(y^z)))
1173#define Maj(x,y,z) (y^((x^y)&(y^z)))
1174
1175#define s0(x) (rotrConstant<1>(x)^rotrConstant<8>(x)^(x>>7))
1176#define s1(x) (rotrConstant<19>(x)^rotrConstant<61>(x)^(x>>6))
1177#define S0(x) (rotrConstant<28>(x)^rotrConstant<34>(x)^rotrConstant<39>(x))
1178#define S1(x) (rotrConstant<14>(x)^rotrConstant<18>(x)^rotrConstant<41>(x))
1179
1180#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\
1181 (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
1182
1183void SHA512_HashBlock_CXX(word64 *state, const word64 *data)
1184{
1185 CRYPTOPP_ASSERT(state);
1186 CRYPTOPP_ASSERT(data);
1187
1188 word64 W[16]={0}, T[8];
1189
1190 /* Copy context->state[] to working vars */
1191 std::memcpy(T, state, sizeof(T));
1192
1193 /* 80 operations, partially loop unrolled */
1194 for (unsigned int j=0; j<80; j+=16)
1195 {
1196 R( 0); R( 1); R( 2); R( 3);
1197 R( 4); R( 5); R( 6); R( 7);
1198 R( 8); R( 9); R(10); R(11);
1199 R(12); R(13); R(14); R(15);
1200 }
1201
1202 state[0] += a(0);
1203 state[1] += b(0);
1204 state[2] += c(0);
1205 state[3] += d(0);
1206 state[4] += e(0);
1207 state[5] += f(0);
1208 state[6] += g(0);
1209 state[7] += h(0);
1210}
1211
1212ANONYMOUS_NAMESPACE_END
1213
1214void SHA512::Transform(word64 *state, const word64 *data)
1215{
1216 CRYPTOPP_ASSERT(state);
1217 CRYPTOPP_ASSERT(data);
1218
1219#if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)
1220 if (HasSSE2())
1221 {
1222 SHA512_HashBlock_SSE2(state, data);
1223 return;
1224 }
1225#endif
1226#if CRYPTOPP_POWER8_SHA_AVAILABLE
1227 if (HasSHA512())
1228 {
1229 SHA512_HashMultipleBlocks_POWER8(state, data, SHA512::BLOCKSIZE, BIG_ENDIAN_ORDER);
1230 return;
1231 }
1232#endif
1233
1234 SHA512_HashBlock_CXX(state, data);
1235}
1236
1237#undef Ch
1238#undef Maj
1239
1240#undef s0
1241#undef s1
1242#undef S0
1243#undef S1
1244
1245#undef blk0
1246#undef blk1
1247#undef blk2
1248
1249#undef R
1250
1251#undef a
1252#undef b
1253#undef c
1254#undef d
1255#undef e
1256#undef f
1257#undef g
1258#undef h
1259
1260NAMESPACE_END
1261
1262#endif // Not CRYPTOPP_GENERATE_X64_MASM
1263#endif // Not CRYPTOPP_IMPORTS
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:1090
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:251
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:242
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:415
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:423
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:770
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:937
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:1214
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:947
Library configuration file.
Functions for CPU features and intrinsics.
bool HasSHA512()
Determine if an ARM processor has SHA512 available.
Definition: cpu.h:509
bool HasSHA2()
Determine if an ARM processor has SHA256 available.
Definition: cpu.h:489
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:189
bool HasSHA1()
Determine if an ARM processor has SHA1 available.
Definition: cpu.h:469
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
bool HasSHA256()
Determine if a PowerPC processor has SHA256 available.
Definition: cpu.h:695
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:143
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition: cryptlib.h:147
Utility functions for the Crypto++ library.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1972
bool NativeByteOrderIs(ByteOrder order)
Determines whether order follows native byte ordering.
Definition: misc.h:1188
Crypto++ library namespace.
Precompiled header file.
Classes and functions for secure memory allocations.
Classes for SHA-1 and SHA-2 family of message digests.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69