Crypto++ 8.2
Free C&
vmac.cpp
1// vmac.cpp - originally written and placed in the public domain by Wei Dai
2// based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3
4#include "pch.h"
5#include "config.h"
6
7#include "vmac.h"
8#include "cpu.h"
9#include "argnames.h"
10#include "secblock.h"
11
12#if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
13#include <intrin.h>
14#endif
15
16#if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17# undef CRYPTOPP_X86_ASM_AVAILABLE
18# undef CRYPTOPP_X32_ASM_AVAILABLE
19# undef CRYPTOPP_X64_ASM_AVAILABLE
20# undef CRYPTOPP_SSE2_ASM_AVAILABLE
21#endif
22
23#if CRYPTOPP_MSC_VERSION
24# pragma warning(disable: 4731)
25#endif
26
27ANONYMOUS_NAMESPACE_BEGIN
28
29#if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30using CryptoPP::word128;
31using CryptoPP::word64;
32# define VMAC_BOOL_WORD128 1
33#else
34using CryptoPP::word64;
35# define VMAC_BOOL_WORD128 0
36#endif
37
38#ifdef __BORLANDC__
39#define const // Turbo C++ 2006 workaround
40#endif
41const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46#ifdef __BORLANDC__
47#undef const
48#endif
49#if VMAC_BOOL_WORD128
50#ifdef __powerpc__
51// workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52#define m126 ((word128(m62)<<64)|m64)
53#else
54const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
55#endif
56#endif
57
58ANONYMOUS_NAMESPACE_END
59
60NAMESPACE_BEGIN(CryptoPP)
61
62void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
63{
64 int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
65 if (digestLength != 8 && digestLength != 16)
66 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
67 m_is128 = digestLength == 16;
68
69 m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
70 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
71 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
72
73 AllocateBlocks();
74
75 BlockCipher &cipher = AccessCipher();
76 cipher.SetKey(userKey, keylength, params);
77 const unsigned int blockSize = cipher.BlockSize();
78 const unsigned int blockSizeInWords = blockSize / sizeof(word64);
81 in.CleanNew(blockSize);
82 size_t i;
83
84 /* Fill nh key */
85 in[0] = 0x80;
86 cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
87 ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
88
89 /* Fill poly key */
90 in[0] = 0xC0;
91 in[15] = 0;
92 for (i = 0; i <= (size_t)m_is128; i++)
93 {
94 cipher.ProcessBlock(in, out.BytePtr());
95 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
96 m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
97 in[15]++;
98 }
99
100 /* Fill ip key */
101 in[0] = 0xE0;
102 in[15] = 0;
103 word64 *l3Key = m_l3Key();
104 CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
105
106 for (i = 0; i <= (size_t)m_is128; i++)
107 do
108 {
109 cipher.ProcessBlock(in, out.BytePtr());
110 l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
111 l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
112 in[15]++;
113 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
114
115 m_padCached = false;
116 size_t nonceLength;
117 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
118 Resynchronize(nonce, (int)nonceLength);
119}
120
122{
124 IV[0] &= 0x7f;
125}
126
127void VMAC_Base::Resynchronize(const byte *nonce, int len)
128{
129 size_t length = ThrowIfInvalidIVLength(len);
130 size_t s = IVSize();
131 byte *storedNonce = m_nonce();
132
133 if (m_is128)
134 {
135 memset(storedNonce, 0, s-length);
136 memcpy(storedNonce+s-length, nonce, length);
137 AccessCipher().ProcessBlock(storedNonce, m_pad());
138 }
139 else
140 {
141 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
142 {
143 m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
144 for (size_t i=0; m_padCached && i<s-length; i++)
145 m_padCached = (storedNonce[i] == 0);
146 }
147 if (!m_padCached)
148 {
149 memset(storedNonce, 0, s-length);
150 memcpy(storedNonce+s-length, nonce, length-1);
151 storedNonce[s-1] = nonce[length-1] & 0xfe;
152 AccessCipher().ProcessBlock(storedNonce, m_pad());
153 m_padCached = true;
154 }
155 storedNonce[s-1] = nonce[length-1];
156 }
157 m_isFirstBlock = true;
158 Restart();
159}
160
161void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
162{
163 CRYPTOPP_UNUSED(data);
164 CRYPTOPP_ASSERT(false);
165 throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
166}
167
169{
170 return
171#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
172 HasSSE2() ? 16 :
173#endif
174 GetCipher().OptimalDataAlignment();
175}
176
177#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
178#if CRYPTOPP_MSC_VERSION
179# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
180#endif
181void
182#ifdef __GNUC__
183__attribute__ ((noinline)) // Intel Compiler 9.1 workaround
184#endif
185VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
186{
187 CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
188 CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
189
190 const word64 *nhK = m_nhKey();
191 word64 *polyS = (word64*)(void*)m_polyState();
192 word32 L1KeyLength = m_L1KeyLength;
193
194 // These are used in the ASM, but some analysis engines cnnot determine it.
195 CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
196 CRYPTOPP_UNUSED(blocksRemainingInWord64);
197
198#ifdef __GNUC__
199 word32 temp;
200 __asm__ __volatile__
201 (
202 AS2( mov %%ebx, %0)
203 AS2( mov %1, %%ebx)
204 INTEL_NOPREFIX
205#else
206 #if defined(__INTEL_COMPILER)
207 char isFirstBlock = m_isFirstBlock;
208 AS2( mov ebx, [L1KeyLength])
209 AS2( mov dl, [isFirstBlock])
210 #else
211 AS2( mov ecx, this)
212 AS2( mov ebx, [ecx+m_L1KeyLength])
213 AS2( mov dl, [ecx+m_isFirstBlock])
214 #endif
215 AS2( mov eax, tagPart)
216 AS2( shl eax, 4)
217 AS2( mov edi, nhK)
218 AS2( add edi, eax)
219 AS2( add eax, eax)
220 AS2( add eax, polyS)
221
222 AS2( mov esi, data)
223 AS2( mov ecx, blocksRemainingInWord64)
224#endif
225
226 AS2( shr ebx, 3)
227 AS_PUSH_IF86( bp)
228 AS2( sub esp, 12)
229 ASL(4)
230 AS2( mov ebp, ebx)
231 AS2( cmp ecx, ebx)
232 AS2( cmovl ebp, ecx)
233 AS2( sub ecx, ebp)
234 AS2( lea ebp, [edi+8*ebp]) // end of nhK
235 AS2( movq mm6, [esi])
236 AS2( paddq mm6, [edi])
237 AS2( movq mm5, [esi+8])
238 AS2( paddq mm5, [edi+8])
239 AS2( add esi, 16)
240 AS2( add edi, 16)
241 AS2( movq mm4, mm6)
242 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
243 AS2( pmuludq mm6, mm5)
244 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
245 AS2( pmuludq mm5, mm2)
246 AS2( pmuludq mm2, mm3)
247 AS2( pmuludq mm3, mm4)
248 AS2( pxor mm7, mm7)
249 AS2( movd [esp], mm6)
250 AS2( psrlq mm6, 32)
251 AS2( movd [esp+4], mm5)
252 AS2( psrlq mm5, 32)
253 AS2( cmp edi, ebp)
254 ASJ( je, 1, f)
255 ASL(0)
256 AS2( movq mm0, [esi])
257 AS2( paddq mm0, [edi])
258 AS2( movq mm1, [esi+8])
259 AS2( paddq mm1, [edi+8])
260 AS2( add esi, 16)
261 AS2( add edi, 16)
262 AS2( movq mm4, mm0)
263 AS2( paddq mm5, mm2)
264 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
265 AS2( pmuludq mm0, mm1)
266 AS2( movd [esp+8], mm3)
267 AS2( psrlq mm3, 32)
268 AS2( paddq mm5, mm3)
269 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
270 AS2( pmuludq mm1, mm2)
271 AS2( pmuludq mm2, mm3)
272 AS2( pmuludq mm3, mm4)
273 AS2( movd mm4, [esp])
274 AS2( paddq mm7, mm4)
275 AS2( movd mm4, [esp+4])
276 AS2( paddq mm6, mm4)
277 AS2( movd mm4, [esp+8])
278 AS2( paddq mm6, mm4)
279 AS2( movd [esp], mm0)
280 AS2( psrlq mm0, 32)
281 AS2( paddq mm6, mm0)
282 AS2( movd [esp+4], mm1)
283 AS2( psrlq mm1, 32)
284 AS2( paddq mm5, mm1)
285 AS2( cmp edi, ebp)
286 ASJ( jne, 0, b)
287 ASL(1)
288 AS2( paddq mm5, mm2)
289 AS2( movd [esp+8], mm3)
290 AS2( psrlq mm3, 32)
291 AS2( paddq mm5, mm3)
292 AS2( movd mm4, [esp])
293 AS2( paddq mm7, mm4)
294 AS2( movd mm4, [esp+4])
295 AS2( paddq mm6, mm4)
296 AS2( movd mm4, [esp+8])
297 AS2( paddq mm6, mm4)
298 AS2( lea ebp, [8*ebx])
299 AS2( sub edi, ebp) // reset edi to start of nhK
300
301 AS2( movd [esp], mm7)
302 AS2( psrlq mm7, 32)
303 AS2( paddq mm6, mm7)
304 AS2( movd [esp+4], mm6)
305 AS2( psrlq mm6, 32)
306 AS2( paddq mm5, mm6)
307 AS2( psllq mm5, 2)
308 AS2( psrlq mm5, 2)
309
310#define a0 [eax+2*4]
311#define a1 [eax+3*4]
312#define a2 [eax+0*4]
313#define a3 [eax+1*4]
314#define k0 [eax+2*8+2*4]
315#define k1 [eax+2*8+3*4]
316#define k2 [eax+2*8+0*4]
317#define k3 [eax+2*8+1*4]
318 AS2( test dl, dl)
319 ASJ( jz, 2, f)
320 AS2( movd mm1, k0)
321 AS2( movd mm0, [esp])
322 AS2( paddq mm0, mm1)
323 AS2( movd a0, mm0)
324 AS2( psrlq mm0, 32)
325 AS2( movd mm1, k1)
326 AS2( movd mm2, [esp+4])
327 AS2( paddq mm1, mm2)
328 AS2( paddq mm0, mm1)
329 AS2( movd a1, mm0)
330 AS2( psrlq mm0, 32)
331 AS2( paddq mm5, k2)
332 AS2( paddq mm0, mm5)
333 AS2( movq a2, mm0)
334 AS2( xor edx, edx)
335 ASJ( jmp, 3, f)
336 ASL(2)
337 AS2( movd mm0, a3)
338 AS2( movq mm4, mm0)
339 AS2( pmuludq mm0, k3) // a3*k3
340 AS2( movd mm1, a0)
341 AS2( pmuludq mm1, k2) // a0*k2
342 AS2( movd mm2, a1)
343 AS2( movd mm6, k1)
344 AS2( pmuludq mm2, mm6) // a1*k1
345 AS2( movd mm3, a2)
346 AS2( psllq mm0, 1)
347 AS2( paddq mm0, mm5)
348 AS2( movq mm5, mm3)
349 AS2( movd mm7, k0)
350 AS2( pmuludq mm3, mm7) // a2*k0
351 AS2( pmuludq mm4, mm7) // a3*k0
352 AS2( pmuludq mm5, mm6) // a2*k1
353 AS2( paddq mm0, mm1)
354 AS2( movd mm1, a1)
355 AS2( paddq mm4, mm5)
356 AS2( movq mm5, mm1)
357 AS2( pmuludq mm1, k2) // a1*k2
358 AS2( paddq mm0, mm2)
359 AS2( movd mm2, a0)
360 AS2( paddq mm0, mm3)
361 AS2( movq mm3, mm2)
362 AS2( pmuludq mm2, k3) // a0*k3
363 AS2( pmuludq mm3, mm7) // a0*k0
364 AS2( movd [esp+8], mm0)
365 AS2( psrlq mm0, 32)
366 AS2( pmuludq mm7, mm5) // a1*k0
367 AS2( pmuludq mm5, k3) // a1*k3
368 AS2( paddq mm0, mm1)
369 AS2( movd mm1, a2)
370 AS2( pmuludq mm1, k2) // a2*k2
371 AS2( paddq mm0, mm2)
372 AS2( paddq mm0, mm4)
373 AS2( movq mm4, mm0)
374 AS2( movd mm2, a3)
375 AS2( pmuludq mm2, mm6) // a3*k1
376 AS2( pmuludq mm6, a0) // a0*k1
377 AS2( psrlq mm0, 31)
378 AS2( paddq mm0, mm3)
379 AS2( movd mm3, [esp])
380 AS2( paddq mm0, mm3)
381 AS2( movd mm3, a2)
382 AS2( pmuludq mm3, k3) // a2*k3
383 AS2( paddq mm5, mm1)
384 AS2( movd mm1, a3)
385 AS2( pmuludq mm1, k2) // a3*k2
386 AS2( paddq mm5, mm2)
387 AS2( movd mm2, [esp+4])
388 AS2( psllq mm5, 1)
389 AS2( paddq mm0, mm5)
390 AS2( psllq mm4, 33)
391 AS2( movd a0, mm0)
392 AS2( psrlq mm0, 32)
393 AS2( paddq mm6, mm7)
394 AS2( movd mm7, [esp+8])
395 AS2( paddq mm0, mm6)
396 AS2( paddq mm0, mm2)
397 AS2( paddq mm3, mm1)
398 AS2( psllq mm3, 1)
399 AS2( paddq mm0, mm3)
400 AS2( psrlq mm4, 1)
401 AS2( movd a1, mm0)
402 AS2( psrlq mm0, 32)
403 AS2( por mm4, mm7)
404 AS2( paddq mm0, mm4)
405 AS2( movq a2, mm0)
406#undef a0
407#undef a1
408#undef a2
409#undef a3
410#undef k0
411#undef k1
412#undef k2
413#undef k3
414
415 ASL(3)
416 AS2( test ecx, ecx)
417 ASJ( jnz, 4, b)
418 AS2( add esp, 12)
419 AS_POP_IF86( bp)
420 AS1( emms)
421#ifdef __GNUC__
422 ATT_PREFIX
423 AS2( mov %0, %%ebx)
424 : "=m" (temp)
425 : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
426 : "memory", "cc"
427 );
428#endif
429}
430#endif
431
432#if VMAC_BOOL_WORD128
433 #define DeclareNH(a) word128 a=0
434 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
435 #define AccumulateNH(a, b, c) a += word128(b)*(c)
436 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
437#else
438 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
439 #define MUL32(a, b) __emulu(word32(a), word32(b))
440 #else
441 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
442 #endif
443 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
444 #define DeclareNH(a) word64 a##0=0, a##1=0
445 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
446 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
447 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
448 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
449 #define DeclareNH(a) word64 a##0=0, a##1=0
450 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
451 #define AccumulateNH(a, b, c) {\
452 word64 ph, pl;\
453 pl = _umul128(b,c,&ph);\
454 a##0 += pl;\
455 a##1 += ph + (a##0 < pl);}
456 #else
457 #define VMAC_BOOL_32BIT 1
458 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
459 #define MUL64(rh,rl,i1,i2) \
460 { word64 _i1 = (i1), _i2 = (i2); \
461 word64 m1= MUL32(_i1,_i2>>32); \
462 word64 m2= MUL32(_i1>>32,_i2); \
463 rh = MUL32(_i1>>32,_i2>>32); \
464 rl = MUL32(_i1,_i2); \
465 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
466 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
467 }
468 #define AccumulateNH(a, b, c) {\
469 word64 p = MUL32(b, c);\
470 a##1 += word32((p)>>32);\
471 a##0 += word32(p);\
472 p = MUL32((b)>>32, c);\
473 a##2 += word32((p)>>32);\
474 a##1 += word32(p);\
475 p = MUL32((b)>>32, (c)>>32);\
476 a##2 += p;\
477 p = MUL32(b, (c)>>32);\
478 a##1 += word32(p);\
479 a##2 += word32(p>>32);}
480 #endif
481#endif
482#ifndef VMAC_BOOL_32BIT
483 #define VMAC_BOOL_32BIT 0
484#endif
485#ifndef ADD128
486 #define ADD128(rh,rl,ih,il) \
487 { word64 _il = (il); \
488 (rl) += (_il); \
489 (rh) += (ih) + ((rl) < (_il)); \
490 }
491#endif
492
493template <bool T_128BitTag>
494void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
495{
496 CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
497 CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
498
499 #define INNER_LOOP_ITERATION(j) {\
500 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
501 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
502 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
503 if (T_128BitTag)\
504 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
505 }
506
507 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
508 size_t innerLoopEnd = L1KeyLengthInWord64;
509 const word64 *nhK = m_nhKey();
510 word64 *polyS = (word64*)(void*)m_polyState();
511 bool isFirstBlock = true;
512 size_t i;
513
514 #if !VMAC_BOOL_32BIT
515 #if VMAC_BOOL_WORD128
516 word128 a1=0, a2=0;
517 #else
518 word64 ah1=0, al1=0, ah2=0, al2=0;
519 #endif
520 word64 kh1, kl1, kh2, kl2;
521 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
522 if (T_128BitTag)
523 {
524 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
525 }
526 #endif
527
528 do
529 {
530 DeclareNH(nhA);
531 DeclareNH(nhB);
532
533 i = 0;
534 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
535 {
536 if (blocksRemainingInWord64 % 8)
537 {
538 innerLoopEnd = blocksRemainingInWord64 % 8;
539 for (; i<innerLoopEnd; i+=2)
540 INNER_LOOP_ITERATION(0);
541 }
542 innerLoopEnd = blocksRemainingInWord64;
543 }
544 for (; i<innerLoopEnd; i+=8)
545 {
546 INNER_LOOP_ITERATION(0);
547 INNER_LOOP_ITERATION(1);
548 INNER_LOOP_ITERATION(2);
549 INNER_LOOP_ITERATION(3);
550 }
551 blocksRemainingInWord64 -= innerLoopEnd;
552 data += innerLoopEnd;
553
554 #if VMAC_BOOL_32BIT
555 word32 nh0[2], nh1[2];
556 word64 nh2[2];
557
558 nh0[0] = word32(nhA0);
559 nhA1 += (nhA0 >> 32);
560 nh1[0] = word32(nhA1);
561 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
562
563 if (T_128BitTag)
564 {
565 nh0[1] = word32(nhB0);
566 nhB1 += (nhB0 >> 32);
567 nh1[1] = word32(nhB1);
568 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
569 }
570
571 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
572 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
573 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
574 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
575 #define aHi ((polyS+i*4)[0])
576 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
577 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
578 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
579 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
580 #define kHi ((polyS+i*4+2)[0])
581
582 if (isFirstBlock)
583 {
584 isFirstBlock = false;
585 if (m_isFirstBlock)
586 {
587 m_isFirstBlock = false;
588 for (i=0; i<=(size_t)T_128BitTag; i++)
589 {
590 word64 t = (word64)nh0[i] + k0;
591 a0 = (word32)t;
592 t = (t >> 32) + nh1[i] + k1;
593 a1 = (word32)t;
594 aHi = (t >> 32) + nh2[i] + kHi;
595 }
596 continue;
597 }
598 }
599 for (i=0; i<=(size_t)T_128BitTag; i++)
600 {
601 word64 p, t;
602 word32 t2;
603
604 p = MUL32(a3, 2*k3);
605 p += nh2[i];
606 p += MUL32(a0, k2);
607 p += MUL32(a1, k1);
608 p += MUL32(a2, k0);
609 t2 = (word32)p;
610 p >>= 32;
611 p += MUL32(a0, k3);
612 p += MUL32(a1, k2);
613 p += MUL32(a2, k1);
614 p += MUL32(a3, k0);
615 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
616 p >>= 31;
617 p += nh0[i];
618 p += MUL32(a0, k0);
619 p += MUL32(a1, 2*k3);
620 p += MUL32(a2, 2*k2);
621 p += MUL32(a3, 2*k1);
622 t2 = (word32)p;
623 p >>= 32;
624 p += nh1[i];
625 p += MUL32(a0, k1);
626 p += MUL32(a1, k0);
627 p += MUL32(a2, 2*k3);
628 p += MUL32(a3, 2*k2);
629 a0 = t2;
630 a1 = (word32)p;
631 aHi = (p >> 32) + t;
632 }
633
634 #undef a0
635 #undef a1
636 #undef a2
637 #undef a3
638 #undef aHi
639 #undef k0
640 #undef k1
641 #undef k2
642 #undef k3
643 #undef kHi
644 #else // #if VMAC_BOOL_32BIT
645 if (isFirstBlock)
646 {
647 isFirstBlock = false;
648 if (m_isFirstBlock)
649 {
650 m_isFirstBlock = false;
651 #if VMAC_BOOL_WORD128
652 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
653
654 first_poly_step(a1, kh1, kl1, nhA);
655 if (T_128BitTag)
656 first_poly_step(a2, kh2, kl2, nhB);
657 #else
658 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
659 mh &= m62;\
660 ADD128(mh, ml, kh, kl); \
661 ah = mh; al = ml;}
662
663 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
664 if (T_128BitTag)
665 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
666 #endif
667 continue;
668 }
669 else
670 {
671 #if VMAC_BOOL_WORD128
672 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
673 #else
674 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
675 #endif
676 if (T_128BitTag)
677 {
678 #if VMAC_BOOL_WORD128
679 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
680 #else
681 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
682 #endif
683 }
684 }
685 }
686
687 #if VMAC_BOOL_WORD128
688 #define poly_step(a, kh, kl, m) \
689 { word128 t1, t2, t3, t4;\
690 Multiply128(t2, a>>64, kl);\
691 Multiply128(t3, a, kh);\
692 Multiply128(t1, a, kl);\
693 Multiply128(t4, a>>64, 2*kh);\
694 t2 += t3;\
695 t4 += t1;\
696 t2 += t4>>64;\
697 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
698 t2 *= 2;\
699 a += m & m126;\
700 a += t2>>64;}
701
702 poly_step(a1, kh1, kl1, nhA);
703 if (T_128BitTag)
704 poly_step(a2, kh2, kl2, nhB);
705 #else
706 #define poly_step(ah, al, kh, kl, mh, ml) \
707 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
708 /* compute ab*cd, put bd into result registers */ \
709 MUL64(t2h,t2l,ah,kl); \
710 MUL64(t3h,t3l,al,kh); \
711 MUL64(t1h,t1l,ah,2*kh); \
712 MUL64(ah,al,al,kl); \
713 /* add together ad + bc */ \
714 ADD128(t2h,t2l,t3h,t3l); \
715 /* add 2 * ac to result */ \
716 ADD128(ah,al,t1h,t1l); \
717 /* now (ah,al), (t2l,2*t2h) need summing */ \
718 /* first add the high registers, carrying into t2h */ \
719 ADD128(t2h,ah,z,t2l); \
720 /* double t2h and add top bit of ah */ \
721 t2h += t2h + (ah >> 63); \
722 ah &= m63; \
723 /* now add the low registers */ \
724 mh &= m62; \
725 ADD128(ah,al,mh,ml); \
726 ADD128(ah,al,z,t2h); \
727 }
728
729 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
730 if (T_128BitTag)
731 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
732 #endif
733 #endif // #if VMAC_BOOL_32BIT
734 } while (blocksRemainingInWord64);
735
736 #if VMAC_BOOL_WORD128
737 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
738 if (T_128BitTag)
739 {
740 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
741 }
742 #elif !VMAC_BOOL_32BIT
743 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
744 if (T_128BitTag)
745 {
746 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
747 }
748 #endif
749}
750
751inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
752{
753#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
754 if (HasSSE2())
755 {
756 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
757 if (m_is128)
758 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
759 m_isFirstBlock = false;
760 }
761 else
762#endif
763 {
764 if (m_is128)
765 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
766 else
767 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
768 }
769}
770
771size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
772{
773 size_t remaining = ModPowerOf2(length, m_L1KeyLength);
774 VHASH_Update(data, (length-remaining)/8);
775 return remaining;
776}
777
778word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
779{
780 word64 rh, rl, t, z=0;
781 word64 p1 = input[0], p2 = input[1];
782 word64 k1 = l3Key[0], k2 = l3Key[1];
783
784 /* fully reduce (p1,p2)+(len,0) mod p127 */
785 t = p1 >> 63;
786 p1 &= m63;
787 ADD128(p1, p2, len, t);
788 /* At this point, (p1,p2) is at most 2^127+(len<<64) */
789 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
790 ADD128(p1, p2, z, t);
791 p1 &= m63;
792
793 /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
794 t = p1 + (p2 >> 32);
795 t += (t >> 32);
796 t += (word32)t > 0xfffffffeU;
797 p1 += (t >> 32);
798 p2 += (p1 << 32);
799
800 /* compute (p1+k1)%p64 and (p2+k2)%p64 */
801 p1 += k1;
802 p1 += (0 - (p1 < k1)) & 257;
803 p2 += k2;
804 p2 += (0 - (p2 < k2)) & 257;
805
806 /* compute (p1+k1)*(p2+k2)%p64 */
807 MUL64(rh, rl, p1, p2);
808 t = rh >> 56;
809 ADD128(t, rl, z, rh);
810 rh <<= 8;
811 ADD128(t, rl, z, rh);
812 t += t << 8;
813 rl += t;
814 rl += (0 - (rl < t)) & 257;
815 rl += (0 - (rl > p64-1)) & 257;
816 return rl;
817}
818
819void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
820{
821 CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
822 CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
823 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
824
825 if (len)
826 {
827 memset(m_data()+len, 0, (0-len)%16);
828 VHASH_Update(DataBuf(), ((len+15)/16)*2);
829 len *= 8; // convert to bits
830 }
831 else if (m_isFirstBlock)
832 {
833 // special case for empty string
834 m_polyState()[0] = m_polyState()[2];
835 m_polyState()[1] = m_polyState()[3];
836 if (m_is128)
837 {
838 m_polyState()[4] = m_polyState()[6];
839 m_polyState()[5] = m_polyState()[7];
840 }
841 }
842
843 if (m_is128)
844 {
845 word64 t[2];
846 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
847 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
848 if (size == 16)
849 {
850 PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
851 PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
852 }
853 else
854 {
857 memcpy(mac, t, size);
858 }
859 }
860 else
861 {
862 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
863 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
864 if (size == 8)
865 PutWord(false, BIG_ENDIAN_ORDER, mac, t);
866 else
867 {
869 memcpy(mac, &t, size);
870 }
871 }
872}
873
874NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
SecBlock using AllocatorWithCleanup<byte, true> typedef.
Definition: secblock.h:1062
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1251
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:141
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:851
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:190
@ BT_InBlockIsCounter
inBlock is a counter
Definition: cryptlib.h:889
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
An invalid argument was detected.
Definition: cryptlib.h:203
Interface for retrieving values given their names.
Definition: cryptlib.h:294
A method was called which was not implemented.
Definition: cryptlib.h:224
Interface for random number generators.
Definition: cryptlib.h:1384
Secure memory block with allocator and cleanup.
Definition: secblock.h:689
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:980
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:804
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:136
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:58
VMAC message authentication code base class.
Definition: vmac.h:25
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:819
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:168
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:127
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:121
Library configuration file.
Functions for CPU features and intrinsics.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition: cryptlib.h:147
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:1036
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1143
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2113
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2428
Crypto++ library namespace.
const char * DigestSize()
int, in bytes
Definition: argnames.h:79
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:80
Precompiled header file.
Classes and functions for secure memory allocations.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
Classes for the VMAC message authentication code.