Crypto++ 8.2
Free C&
neon_simd.cpp
1
2// crc_simd.cpp - written and placed in the public domain by
3// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
4//
5// This source file uses intrinsics to gain access to ARMv7a and
6// ARMv8a NEON instructions. A separate source file is needed
7// because additional CXXFLAGS are required to enable the
8// appropriate instructions sets in some build configurations.
9
10#include "pch.h"
11#include "config.h"
12#include "stdcpp.h"
13
14// C1189: error: This header is specific to ARM targets
15#if (CRYPTOPP_ARM_NEON_AVAILABLE) && !defined(_M_ARM64)
16# include <arm_neon.h>
17#endif
18
19#if (CRYPTOPP_ARM_ACLE_AVAILABLE)
20# include <stdint.h>
21# include <arm_acle.h>
22#endif
23
24#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
25# include <signal.h>
26# include <setjmp.h>
27#endif
28
29#ifndef EXCEPTION_EXECUTE_HANDLER
30# define EXCEPTION_EXECUTE_HANDLER 1
31#endif
32
33// Squash MS LNK4221 and libtool warnings
34extern const char NEON_SIMD_FNAME[] = __FILE__;
35
36NAMESPACE_BEGIN(CryptoPP)
37
38#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
39extern "C" {
40 typedef void (*SigHandler)(int);
41
42 static jmp_buf s_jmpSIGILL;
43 static void SigIllHandler(int)
44 {
45 longjmp(s_jmpSIGILL, 1);
46 }
47}
48#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
49
50bool CPU_ProbeARMv7()
51{
52#if defined(__aarch32__) || defined(__aarch64__)
53 return true;
54#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
55 return false;
56#elif (CRYPTOPP_ARM_NEON_AVAILABLE)
57# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
58 volatile bool result = true;
59 __try
60 {
61 // Modern MS hardware is ARMv7
62 result = true;
63 }
64 __except (EXCEPTION_EXECUTE_HANDLER)
65 {
66 return false;
67 }
68 return result;
69# elif defined(__arm__) && (__ARM_ARCH >= 7)
70 // longjmp and clobber warnings. Volatile is required.
71 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
72 volatile bool result = true;
73
74 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
75 if (oldHandler == SIG_ERR)
76 return false;
77
78 volatile sigset_t oldMask;
79 if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
80 return false;
81
82 if (setjmp(s_jmpSIGILL))
83 result = false;
84 else
85 {
86 // ARMv7 added movt and movw
87 int a;
88 asm volatile("movw %0,%1 \n"
89 "movt %0,%1 \n"
90 : "=r"(a) : "i"(0x1234));
91 result = (a == 0x12341234);
92 }
93
94 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
95 signal(SIGILL, oldHandler);
96 return result;
97# else
98 return false;
99# endif
100#else
101 return false;
102#endif // CRYPTOPP_ARM_NEON_AVAILABLE
103}
104
105bool CPU_ProbeNEON()
106{
107#if defined(__aarch32__) || defined(__aarch64__)
108 return true;
109#elif defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
110 return false;
111#elif (CRYPTOPP_ARM_NEON_AVAILABLE)
112# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
113 volatile bool result = true;
114 __try
115 {
116 uint32_t v1[4] = {1,1,1,1};
117 uint32x4_t x1 = vld1q_u32(v1);
118 uint64_t v2[2] = {1,1};
119 uint64x2_t x2 = vld1q_u64(v2);
120
121 uint32x4_t x3 = vdupq_n_u32(2);
122 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
123 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
124 uint64x2_t x4 = vdupq_n_u64(2);
125 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
126 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
127
128 result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
129 }
130 __except (EXCEPTION_EXECUTE_HANDLER)
131 {
132 return false;
133 }
134 return result;
135# else
136
137 // longjmp and clobber warnings. Volatile is required.
138 // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
139 volatile bool result = true;
140
141 volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
142 if (oldHandler == SIG_ERR)
143 return false;
144
145 volatile sigset_t oldMask;
146 if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
147 return false;
148
149 if (setjmp(s_jmpSIGILL))
150 result = false;
151 else
152 {
153 uint32_t v1[4] = {1,1,1,1};
154 uint32x4_t x1 = vld1q_u32(v1);
155 uint64_t v2[2] = {1,1};
156 uint64x2_t x2 = vld1q_u64(v2);
157
158 uint32x4_t x3 = {0,0,0,0};
159 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
160 x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
161 uint64x2_t x4 = {0,0};
162 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
163 x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
164
165 // Hack... GCC optimizes away the code and returns true
166 result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
167 }
168
169 sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
170 signal(SIGILL, oldHandler);
171 return result;
172# endif
173#else
174 return false;
175#endif // CRYPTOPP_ARM_NEON_AVAILABLE
176}
177
178NAMESPACE_END
Library configuration file.
Crypto++ library namespace.
Precompiled header file.
Common C++ header files.