1;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
2;; Copyright assigned to the Crypto++ project.
4;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool
5;; chains. You will need a modern Nasm, however. You can also use it in place
6;; of intrinsics. The routines below run a little faster than the intrinsic
9;; nasm -f elf32 rdrand.s -DX86 -g -o rdrand-x86.o
10;; nasm -f elfx32 rdrand.s -DX32 -g -o rdrand-x32.o
11;; nasm -f elf64 rdrand.s -DX64 -g -o rdrand-x64.o
13;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16;; C/C++ Function prototypes
18;; extern "C" void NASM_RDRAND_GenerateBlock(byte* ptr, size_t size);
19;; extern "C" void NASM_RDSEED_GenerateBlock(byte* ptr, size_t size);
21;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24%ifdef X86 ;; Set via the command line
29%define lsize dl ;; Used for tail bytes, 1-byte constants
30%define MWSIZE 04h ;; machine word size
32%elifdef X32 ;; Set via the command line
33%define buffer edi ;; Linux ABI
34%define bsize esi ;; Linux ABI
36%define MWSIZE 04h ;; machine word size
38%elifdef X64 ;; Set via the command line
39%ifdef CYGWIN ;; Cygwin follows Windows ABI here, not Linux ABI
40%define buffer rcx ;; Windows ABI
41%define bsize rdx ;; Windows ABI
42%define lsize dx ;; Used for tail bytes, 2-byte constants
44%define buffer rdi ;; Linux ABI
45%define bsize rsi ;; Linux ABI
46%define lsize si ;; Used for tail bytes, 2-byte constants
48%define MWSIZE 08h ;; machine word size
51%error Missing or unknown architecture
54;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60%define NASM_RDRAND_GenerateBlock _NASM_RDRAND_GenerateBlock
61%define NASM_RDSEED_GenerateBlock _NASM_RDSEED_GenerateBlock
66%define NASM_RDRAND_GenerateBlock _NASM_RDRAND_GenerateBlock
67%define NASM_RDSEED_GenerateBlock _NASM_RDSEED_GenerateBlock
71;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
72;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74%ifdef X86 ;; Set via the command line
76global NASM_RDRAND_GenerateBlock
80NASM_RDRAND_GenerateBlock:
87 ;; A block of 16-bytes appears to be optimal. Adding
88 ;; more rdrand calls degrades performance.
96 jnc .Call_RDRAND_EAX_4
101 jnc .Call_RDRAND_EAX_3
106 jnc .Call_RDRAND_EAX_2
111 jnc .Call_RDRAND_EAX_1
118 jae .GenerateBlock_16
120 ;; Fewer than 16 bytes remain
124 je .GenerateBlock_Return
129 jnc .Call_RDRAND_EAX_0
132 jb .Partial_Machine_Word
143 ;; 1,2,3 bytes remain
144.Partial_Machine_Word:
146 ;; Test bit 1 to see if size is at least 2
156 ;; Test bit 0 to see if size is at least 1
164 ;; We've hit all the bits
166.GenerateBlock_Return:
173;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
176%ifdef X64 or X32 ;; Set via the command line
178global NASM_RDRAND_GenerateBlock
182NASM_RDRAND_GenerateBlock:
184 ;; No need for Load_Arguments due to fastcall
186 ;; A block of 32-bytes appears to be optimal. Adding
187 ;; more rdrand calls degrades performance.
195 jnc .Call_RDRAND_RAX_4
200 jnc .Call_RDRAND_RAX_3
205 jnc .Call_RDRAND_RAX_2
210 jnc .Call_RDRAND_RAX_1
217 jae .GenerateBlock_32
219 ;; Fewer than 32 bytes remain
223 je .GenerateBlock_Return
227 jnc .Call_RDRAND_RAX_0
230 jb .Partial_Machine_Word
241 ;; 1,2,3,4,5,6,7 bytes remain
242.Partial_Machine_Word:
244 ;; Test bit 2 to see if size is at least 4
254 ;; Test bit 1 to see if size is at least 2
264 ;; Test bit 0 to see if size is at least 1
272 ;; We've hit all the bits
274.GenerateBlock_Return:
281;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
282;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
284%ifdef X86 ;; Set via the command line
286global NASM_RDSEED_GenerateBlock
290NASM_RDSEED_GenerateBlock:
297 ;; A block of 16-bytes appears to be optimal. Adding
298 ;; more rdrand calls degrades performance.
306 jnc .Call_RDSEED_EAX_4
311 jnc .Call_RDSEED_EAX_3
316 jnc .Call_RDSEED_EAX_2
321 jnc .Call_RDSEED_EAX_1
328 jae .GenerateBlock_16
330 ;; Fewer than 16 bytes remain
334 je .GenerateBlock_Return
339 jnc .Call_RDSEED_EAX_0
342 jb .Partial_Machine_Word
353 ;; 1,2,3 bytes remain
354.Partial_Machine_Word:
356 ;; Test bit 1 to see if size is at least 2
366 ;; Test bit 0 to see if size is at least 1
374 ;; We've hit all the bits
376.GenerateBlock_Return:
383;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
384;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
386%ifdef X64 or X32 ;; Set via the command line
388global NASM_RDSEED_GenerateBlock
392NASM_RDSEED_GenerateBlock:
394 ;; No need for Load_Arguments due to fastcall
396 ;; A block of 32-bytes appears to be optimal. Adding
397 ;; more rdrand calls degrades performance.
405 jnc .Call_RDSEED_RAX_4
410 jnc .Call_RDSEED_RAX_3
415 jnc .Call_RDSEED_RAX_2
420 jnc .Call_RDSEED_RAX_1
427 jae .GenerateBlock_32
429 ;; Fewer than 32 bytes remain
433 je .GenerateBlock_Return
437 jnc .Call_RDSEED_RAX_0
440 jb .Partial_Machine_Word
451 ;; 1,2,3,4,5,6,7 bytes remain
452.Partial_Machine_Word:
454 ;; Test bit 2 to see if size is at least 4
464 ;; Test bit 1 to see if size is at least 2
474 ;; Test bit 0 to see if size is at least 1
482 ;; We've hit all the bits
484.GenerateBlock_Return:
491;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
492;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;