27#if (CRYPTOPP_AESNI_AVAILABLE)
29# include <emmintrin.h>
30# include <smmintrin.h>
31# include <wmmintrin.h>
35#if (CRYPTOPP_ARM_NEON_AVAILABLE)
42#if (CRYPTOPP_ARM_ACLE_AVAILABLE)
47#if defined(CRYPTOPP_POWER8_AES_AVAILABLE)
52#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
57#ifndef EXCEPTION_EXECUTE_HANDLER
58# define EXCEPTION_EXECUTE_HANDLER 1
62#define M128_CAST(x) ((__m128i *)(void *)(x))
63#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
66extern const char RIJNDAEL_SIMD_FNAME[] = __FILE__;
72#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
74 typedef void (*SigHandler)(int);
76 static jmp_buf s_jmpSIGILL;
77 static void SigIllHandler(
int)
79 longjmp(s_jmpSIGILL, 1);
84#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARMV8)
87#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
89#elif (CRYPTOPP_ARM_AES_AVAILABLE)
90# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
91 volatile bool result =
true;
95 uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
96 uint8x16_t r1 = vaeseq_u8(data, key);
97 uint8x16_t r2 = vaesdq_u8(data, key);
101 result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
103 __except (EXCEPTION_EXECUTE_HANDLER)
112 volatile bool result =
true;
114 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
115 if (oldHandler == SIG_ERR)
118 volatile sigset_t oldMask;
119 if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
122 if (setjmp(s_jmpSIGILL))
126 uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
127 uint8x16_t r1 = vaeseq_u8(data, key);
128 uint8x16_t r2 = vaesdq_u8(data, key);
130 r2 = vaesimcq_u8(r2);
133 result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
136 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
137 signal(SIGILL, oldHandler);
148#if (CRYPTOPP_ARM_AES_AVAILABLE)
150ANONYMOUS_NAMESPACE_BEGIN
152static inline void ARMV8_Enc_Block(uint64x2_t &data,
const word32 *subkeys,
unsigned int rounds)
155 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
156 uint8x16_t block = vreinterpretq_u8_u64(data);
159 block = vaeseq_u8(block, vld1q_u8(keys+0*16));
161 block = vaesmcq_u8(block);
163 for (
unsigned int i=1; i<rounds-1; i+=2)
166 block = vaeseq_u8(block, vld1q_u8(keys+i*16));
168 block = vaesmcq_u8(block);
170 block = vaeseq_u8(block, vld1q_u8(keys+(i+1)*16));
172 block = vaesmcq_u8(block);
176 block = vaeseq_u8(block, vld1q_u8(keys+(rounds-1)*16));
178 block = veorq_u8(block, vld1q_u8(keys+rounds*16));
180 data = vreinterpretq_u64_u8(block);
183static inline void ARMV8_Enc_6_Blocks(uint64x2_t &data0, uint64x2_t &data1,
184 uint64x2_t &data2, uint64x2_t &data3, uint64x2_t &data4, uint64x2_t &data5,
185 const word32 *subkeys,
unsigned int rounds)
188 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
190 uint8x16_t block0 = vreinterpretq_u8_u64(data0);
191 uint8x16_t block1 = vreinterpretq_u8_u64(data1);
192 uint8x16_t block2 = vreinterpretq_u8_u64(data2);
193 uint8x16_t block3 = vreinterpretq_u8_u64(data3);
194 uint8x16_t block4 = vreinterpretq_u8_u64(data4);
195 uint8x16_t block5 = vreinterpretq_u8_u64(data5);
198 for (
unsigned int i=0; i<rounds-1; ++i)
200 key = vld1q_u8(keys+i*16);
202 block0 = vaeseq_u8(block0, key);
204 block0 = vaesmcq_u8(block0);
206 block1 = vaeseq_u8(block1, key);
208 block1 = vaesmcq_u8(block1);
210 block2 = vaeseq_u8(block2, key);
212 block2 = vaesmcq_u8(block2);
214 block3 = vaeseq_u8(block3, key);
216 block3 = vaesmcq_u8(block3);
218 block4 = vaeseq_u8(block4, key);
220 block4 = vaesmcq_u8(block4);
222 block5 = vaeseq_u8(block5, key);
224 block5 = vaesmcq_u8(block5);
228 key = vld1q_u8(keys+(rounds-1)*16);
229 block0 = vaeseq_u8(block0, key);
230 block1 = vaeseq_u8(block1, key);
231 block2 = vaeseq_u8(block2, key);
232 block3 = vaeseq_u8(block3, key);
233 block4 = vaeseq_u8(block4, key);
234 block5 = vaeseq_u8(block5, key);
237 key = vld1q_u8(keys+rounds*16);
238 data0 = vreinterpretq_u64_u8(veorq_u8(block0, key));
239 data1 = vreinterpretq_u64_u8(veorq_u8(block1, key));
240 data2 = vreinterpretq_u64_u8(veorq_u8(block2, key));
241 data3 = vreinterpretq_u64_u8(veorq_u8(block3, key));
242 data4 = vreinterpretq_u64_u8(veorq_u8(block4, key));
243 data5 = vreinterpretq_u64_u8(veorq_u8(block5, key));
246static inline void ARMV8_Dec_Block(uint64x2_t &data,
const word32 *subkeys,
unsigned int rounds)
249 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
250 uint8x16_t block = vreinterpretq_u8_u64(data);
253 block = vaesdq_u8(block, vld1q_u8(keys+0*16));
255 block = vaesimcq_u8(block);
257 for (
unsigned int i=1; i<rounds-1; i+=2)
260 block = vaesdq_u8(block, vld1q_u8(keys+i*16));
262 block = vaesimcq_u8(block);
264 block = vaesdq_u8(block, vld1q_u8(keys+(i+1)*16));
266 block = vaesimcq_u8(block);
270 block = vaesdq_u8(block, vld1q_u8(keys+(rounds-1)*16));
272 block = veorq_u8(block, vld1q_u8(keys+rounds*16));
274 data = vreinterpretq_u64_u8(block);
277static inline void ARMV8_Dec_6_Blocks(uint64x2_t &data0, uint64x2_t &data1,
278 uint64x2_t &data2, uint64x2_t &data3, uint64x2_t &data4, uint64x2_t &data5,
279 const word32 *subkeys,
unsigned int rounds)
282 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
284 uint8x16_t block0 = vreinterpretq_u8_u64(data0);
285 uint8x16_t block1 = vreinterpretq_u8_u64(data1);
286 uint8x16_t block2 = vreinterpretq_u8_u64(data2);
287 uint8x16_t block3 = vreinterpretq_u8_u64(data3);
288 uint8x16_t block4 = vreinterpretq_u8_u64(data4);
289 uint8x16_t block5 = vreinterpretq_u8_u64(data5);
292 for (
unsigned int i=0; i<rounds-1; ++i)
294 key = vld1q_u8(keys+i*16);
296 block0 = vaesdq_u8(block0, key);
298 block0 = vaesimcq_u8(block0);
300 block1 = vaesdq_u8(block1, key);
302 block1 = vaesimcq_u8(block1);
304 block2 = vaesdq_u8(block2, key);
306 block2 = vaesimcq_u8(block2);
308 block3 = vaesdq_u8(block3, key);
310 block3 = vaesimcq_u8(block3);
312 block4 = vaesdq_u8(block4, key);
314 block4 = vaesimcq_u8(block4);
316 block5 = vaesdq_u8(block5, key);
318 block5 = vaesimcq_u8(block5);
322 key = vld1q_u8(keys+(rounds-1)*16);
323 block0 = vaesdq_u8(block0, key);
324 block1 = vaesdq_u8(block1, key);
325 block2 = vaesdq_u8(block2, key);
326 block3 = vaesdq_u8(block3, key);
327 block4 = vaesdq_u8(block4, key);
328 block5 = vaesdq_u8(block5, key);
331 key = vld1q_u8(keys+rounds*16);
332 data0 = vreinterpretq_u64_u8(veorq_u8(block0, key));
333 data1 = vreinterpretq_u64_u8(veorq_u8(block1, key));
334 data2 = vreinterpretq_u64_u8(veorq_u8(block2, key));
335 data3 = vreinterpretq_u64_u8(veorq_u8(block3, key));
336 data4 = vreinterpretq_u64_u8(veorq_u8(block4, key));
337 data5 = vreinterpretq_u64_u8(veorq_u8(block5, key));
340ANONYMOUS_NAMESPACE_END
342size_t Rijndael_Enc_AdvancedProcessBlocks_ARMV8(
const word32 *subKeys,
size_t rounds,
343 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
345 return AdvancedProcessBlocks128_6x1_NEON(ARMV8_Enc_Block, ARMV8_Enc_6_Blocks,
346 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
349size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(
const word32 *subKeys,
size_t rounds,
350 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
352 return AdvancedProcessBlocks128_6x1_NEON(ARMV8_Dec_Block, ARMV8_Dec_6_Blocks,
353 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
360#if (CRYPTOPP_AESNI_AVAILABLE)
362ANONYMOUS_NAMESPACE_BEGIN
365CRYPTOPP_ALIGN_DATA(16)
366const word32 s_rconLE[] = {
367 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
370static inline void AESNI_Enc_Block(__m128i &block, MAYBE_CONST word32 *subkeys,
unsigned int rounds)
372 const __m128i* skeys =
reinterpret_cast<const __m128i*
>(subkeys);
374 block = _mm_xor_si128(block, skeys[0]);
375 for (
unsigned int i=1; i<rounds-1; i+=2)
377 block = _mm_aesenc_si128(block, skeys[i]);
378 block = _mm_aesenc_si128(block, skeys[i+1]);
380 block = _mm_aesenc_si128(block, skeys[rounds-1]);
381 block = _mm_aesenclast_si128(block, skeys[rounds]);
384static inline void AESNI_Enc_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2, __m128i &block3,
385 MAYBE_CONST word32 *subkeys,
unsigned int rounds)
387 const __m128i* skeys =
reinterpret_cast<const __m128i*
>(subkeys);
389 __m128i rk = skeys[0];
390 block0 = _mm_xor_si128(block0, rk);
391 block1 = _mm_xor_si128(block1, rk);
392 block2 = _mm_xor_si128(block2, rk);
393 block3 = _mm_xor_si128(block3, rk);
394 for (
unsigned int i=1; i<rounds; i++)
397 block0 = _mm_aesenc_si128(block0, rk);
398 block1 = _mm_aesenc_si128(block1, rk);
399 block2 = _mm_aesenc_si128(block2, rk);
400 block3 = _mm_aesenc_si128(block3, rk);
403 block0 = _mm_aesenclast_si128(block0, rk);
404 block1 = _mm_aesenclast_si128(block1, rk);
405 block2 = _mm_aesenclast_si128(block2, rk);
406 block3 = _mm_aesenclast_si128(block3, rk);
409static inline void AESNI_Dec_Block(__m128i &block, MAYBE_CONST word32 *subkeys,
unsigned int rounds)
411 const __m128i* skeys =
reinterpret_cast<const __m128i*
>(subkeys);
413 block = _mm_xor_si128(block, skeys[0]);
414 for (
unsigned int i=1; i<rounds-1; i+=2)
416 block = _mm_aesdec_si128(block, skeys[i]);
417 block = _mm_aesdec_si128(block, skeys[i+1]);
419 block = _mm_aesdec_si128(block, skeys[rounds-1]);
420 block = _mm_aesdeclast_si128(block, skeys[rounds]);
423static inline void AESNI_Dec_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2, __m128i &block3,
424 MAYBE_CONST word32 *subkeys,
unsigned int rounds)
426 const __m128i* skeys =
reinterpret_cast<const __m128i*
>(subkeys);
428 __m128i rk = skeys[0];
429 block0 = _mm_xor_si128(block0, rk);
430 block1 = _mm_xor_si128(block1, rk);
431 block2 = _mm_xor_si128(block2, rk);
432 block3 = _mm_xor_si128(block3, rk);
433 for (
unsigned int i=1; i<rounds; i++)
436 block0 = _mm_aesdec_si128(block0, rk);
437 block1 = _mm_aesdec_si128(block1, rk);
438 block2 = _mm_aesdec_si128(block2, rk);
439 block3 = _mm_aesdec_si128(block3, rk);
442 block0 = _mm_aesdeclast_si128(block0, rk);
443 block1 = _mm_aesdeclast_si128(block1, rk);
444 block2 = _mm_aesdeclast_si128(block2, rk);
445 block3 = _mm_aesdeclast_si128(block3, rk);
448ANONYMOUS_NAMESPACE_END
450void Rijndael_UncheckedSetKey_SSE4_AESNI(
const byte *userKey,
size_t keyLen, word32 *rk)
452 const size_t rounds = keyLen / 4 + 6;
453 const word32 *rc = s_rconLE;
455 __m128i temp = _mm_loadu_si128(M128_CAST(userKey+keyLen-16));
456 std::memcpy(rk, userKey, keyLen);
459 const size_t keySize = 4*(rounds+1);
460 const word32* end = rk + keySize;
464 rk[keyLen/4] = rk[0] ^ _mm_extract_epi32(_mm_aeskeygenassist_si128(temp, 0), 3) ^ *(rc++);
465 rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
466 rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
467 rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
469 if (rk + keyLen/4 + 4 == end)
474 rk[10] = rk[ 4] ^ rk[ 9];
475 rk[11] = rk[ 5] ^ rk[10];
476 temp = _mm_insert_epi32(temp, rk[11], 3);
478 else if (keyLen == 32)
480 temp = _mm_insert_epi32(temp, rk[11], 3);
481 rk[12] = rk[ 4] ^ _mm_extract_epi32(_mm_aeskeygenassist_si128(temp, 0), 2);
482 rk[13] = rk[ 5] ^ rk[12];
483 rk[14] = rk[ 6] ^ rk[13];
484 rk[15] = rk[ 7] ^ rk[14];
485 temp = _mm_insert_epi32(temp, rk[15], 3);
489 temp = _mm_insert_epi32(temp, rk[7], 3);
496void Rijndael_UncheckedSetKeyRev_AESNI(word32 *key,
unsigned int rounds)
501 vec_swap(*M128_CAST(key), *M128_CAST(key+4*rounds));
503 for (i = 4, j = 4*rounds-4; i < j; i += 4, j -= 4)
505 temp = _mm_aesimc_si128(*M128_CAST(key+i));
506 *M128_CAST(key+i) = _mm_aesimc_si128(*M128_CAST(key+j));
507 *M128_CAST(key+j) = temp;
510 *M128_CAST(key+i) = _mm_aesimc_si128(*M128_CAST(key+i));
513size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(
const word32 *subKeys,
size_t rounds,
514 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
517 MAYBE_CONST word32* sk = MAYBE_UNCONST_CAST(word32*, subKeys);
518 MAYBE_CONST
byte* ib = MAYBE_UNCONST_CAST(
byte*, inBlocks);
519 MAYBE_CONST
byte* xb = MAYBE_UNCONST_CAST(
byte*, xorBlocks);
521 return AdvancedProcessBlocks128_4x1_SSE(AESNI_Enc_Block, AESNI_Enc_4_Blocks,
522 sk, rounds, ib, xb, outBlocks, length, flags);
525size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(
const word32 *subKeys,
size_t rounds,
526 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
528 MAYBE_CONST word32* sk = MAYBE_UNCONST_CAST(word32*, subKeys);
529 MAYBE_CONST
byte* ib = MAYBE_UNCONST_CAST(
byte*, inBlocks);
530 MAYBE_CONST
byte* xb = MAYBE_UNCONST_CAST(
byte*, xorBlocks);
532 return AdvancedProcessBlocks128_4x1_SSE(AESNI_Dec_Block, AESNI_Dec_4_Blocks,
533 sk, rounds, ib, xb, outBlocks, length, flags);
540#if (CRYPTOPP_POWER8_AES_AVAILABLE)
542ANONYMOUS_NAMESPACE_BEGIN
545CRYPTOPP_ALIGN_DATA(16)
546static const uint32_t s_rconBE[] = {
547 0x01000000, 0x02000000, 0x04000000, 0x08000000,
548 0x10000000, 0x20000000, 0x40000000, 0x80000000,
549 0x1B000000, 0x36000000
552static inline void POWER8_Enc_Block(
uint32x4_p &block,
const word32 *subkeys,
unsigned int rounds)
555 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
560 for (
size_t i=1; i<rounds-1; i+=2)
572 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
575 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
578 block0 =
VecXor(block0, k);
579 block1 =
VecXor(block1, k);
580 block2 =
VecXor(block2, k);
581 block3 =
VecXor(block3, k);
582 block4 =
VecXor(block4, k);
583 block5 =
VecXor(block5, k);
585 for (
size_t i=1; i<rounds; ++i)
605static inline void POWER8_Dec_Block(
uint32x4_p &block,
const word32 *subkeys,
unsigned int rounds)
608 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
613 for (
size_t i=rounds-1; i>1; i-=2)
625 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
628 const byte *keys =
reinterpret_cast<const byte*
>(subkeys);
631 block0 =
VecXor(block0, k);
632 block1 =
VecXor(block1, k);
633 block2 =
VecXor(block2, k);
634 block3 =
VecXor(block3, k);
635 block4 =
VecXor(block4, k);
636 block5 =
VecXor(block5, k);
638 for (
size_t i=rounds-1; i>0; --i)
658ANONYMOUS_NAMESPACE_END
660void Rijndael_UncheckedSetKey_POWER8(
const byte* userKey,
size_t keyLen, word32* rk,
const byte* Se)
662 const size_t rounds = keyLen / 4 + 6;
663 const word32 *rc = s_rconBE;
664 word32 *rkey = rk, temp;
669 const size_t keySize = 4*(rounds+1);
670 const word32* end = rkey + keySize;
674 temp = rkey[keyLen/4-1];
675 word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
676 (word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
677 rkey[keyLen/4] = rkey[0] ^ x ^ *(rc++);
678 rkey[keyLen/4+1] = rkey[1] ^ rkey[keyLen/4];
679 rkey[keyLen/4+2] = rkey[2] ^ rkey[keyLen/4+1];
680 rkey[keyLen/4+3] = rkey[3] ^ rkey[keyLen/4+2];
682 if (rkey + keyLen/4 + 4 == end)
687 rkey[10] = rkey[ 4] ^ rkey[ 9];
688 rkey[11] = rkey[ 5] ^ rkey[10];
690 else if (keyLen == 32)
693 rkey[12] = rkey[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
694 rkey[13] = rkey[ 5] ^ rkey[12];
695 rkey[14] = rkey[ 6] ^ rkey[13];
696 rkey[15] = rkey[ 7] ^ rkey[14];
701#if (CRYPTOPP_LITTLE_ENDIAN)
703 const uint8x16_p mask = ((
uint8x16_p){12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3});
707 for (i=0; i<rounds; i+=2, rkey+=8)
709 const uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rkey);
710 const uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rkey);
711 vec_vsx_st(
VecPermute(d1, zero, mask), 0, (uint8_t*)rkey);
712 vec_vsx_st(
VecPermute(d2, zero, mask), 16, (uint8_t*)rkey);
715 for ( ; i<rounds+1; i++, rkey+=4)
717 const uint8x16_p d = vec_vsx_ld( 0, (uint8_t*)rkey);
718 vec_vsx_st(
VecPermute(d, zero, mask), 0, (uint8_t*)rkey);
723size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subKeys,
size_t rounds,
724 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
726 return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Enc_Block, POWER8_Enc_6_Blocks,
727 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
730size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(
const word32 *subKeys,
size_t rounds,
731 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
733 return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Dec_Block, POWER8_Dec_6_Blocks,
734 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Template for AdvancedProcessBlocks and SIMD processing.
Library configuration file.
@ BIG_ENDIAN_ORDER
byte order is big-endian
Utility functions for the Crypto++ library.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
void vec_swap(T &a, T &b)
Swaps two variables which are arrays.
Crypto++ library namespace.
Support functions for PowerPC and vector operations.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
T1 VecEncryptLast(const T1 state, const T2 key)
Final round of AES encryption.
T1 VecEncrypt(const T1 state, const T2 key)
One round of AES encryption.
T1 VecDecryptLast(const T1 state, const T2 key)
Final round of AES decryption.
T1 VecDecrypt(const T1 state, const T2 key)
One round of AES decryption.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.