runtime.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. #include <stddef.h>
  2. #include <stdint.h>
  3. #ifdef HAVE_ANDROID_GETCPUFEATURES
  4. # include <cpu-features.h>
  5. #endif
  6. #include "private/common.h"
  7. #include "runtime.h"
  8. typedef struct CPUFeatures_ {
  9. int initialized;
  10. int has_neon;
  11. int has_sse2;
  12. int has_sse3;
  13. int has_ssse3;
  14. int has_sse41;
  15. int has_avx;
  16. int has_avx2;
  17. int has_avx512f;
  18. int has_pclmul;
  19. int has_aesni;
  20. int has_rdrand;
  21. } CPUFeatures;
  22. static CPUFeatures _cpu_features;
  23. #define CPUID_EBX_AVX2 0x00000020
  24. #define CPUID_EBX_AVX512F 0x00010000
  25. #define CPUID_ECX_SSE3 0x00000001
  26. #define CPUID_ECX_PCLMUL 0x00000002
  27. #define CPUID_ECX_SSSE3 0x00000200
  28. #define CPUID_ECX_SSE41 0x00080000
  29. #define CPUID_ECX_AESNI 0x02000000
  30. #define CPUID_ECX_XSAVE 0x04000000
  31. #define CPUID_ECX_OSXSAVE 0x08000000
  32. #define CPUID_ECX_AVX 0x10000000
  33. #define CPUID_ECX_RDRAND 0x40000000
  34. #define CPUID_EDX_SSE2 0x04000000
  35. #define XCR0_SSE 0x00000002
  36. #define XCR0_AVX 0x00000004
  37. #define XCR0_OPMASK 0x00000020
  38. #define XCR0_ZMM_HI256 0x00000040
  39. #define XCR0_HI16_ZMM 0x00000080
  40. static int
  41. _sodium_runtime_arm_cpu_features(CPUFeatures * const cpu_features)
  42. {
  43. #ifndef __arm__
  44. cpu_features->has_neon = 0;
  45. return -1;
  46. #else
  47. # ifdef __APPLE__
  48. # ifdef __ARM_NEON__
  49. cpu_features->has_neon = 1;
  50. # else
  51. cpu_features->has_neon = 0;
  52. # endif
  53. # elif defined(HAVE_ANDROID_GETCPUFEATURES) && \
  54. defined(ANDROID_CPU_ARM_FEATURE_NEON)
  55. cpu_features->has_neon =
  56. (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0x0;
  57. # else
  58. cpu_features->has_neon = 0;
  59. # endif
  60. return 0;
  61. #endif
  62. }
  63. static void
  64. _cpuid(unsigned int cpu_info[4U], const unsigned int cpu_info_type)
  65. {
  66. #if defined(_MSC_VER) && \
  67. (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
  68. __cpuid((int *) cpu_info, cpu_info_type);
  69. #elif defined(HAVE_CPUID)
  70. cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
  71. # ifdef __i386__
  72. __asm__ __volatile__(
  73. "pushfl; pushfl; "
  74. "popl %0; "
  75. "movl %0, %1; xorl %2, %0; "
  76. "pushl %0; "
  77. "popfl; pushfl; popl %0; popfl"
  78. : "=&r"(cpu_info[0]), "=&r"(cpu_info[1])
  79. : "i"(0x200000));
  80. if (((cpu_info[0] ^ cpu_info[1]) & 0x200000) == 0x0) {
  81. return; /* LCOV_EXCL_LINE */
  82. }
  83. # endif
  84. # ifdef __i386__
  85. __asm__ __volatile__("xchgl %%ebx, %k1; cpuid; xchgl %%ebx, %k1"
  86. : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
  87. "=c"(cpu_info[2]), "=d"(cpu_info[3])
  88. : "0"(cpu_info_type), "2"(0U));
  89. # elif defined(__x86_64__)
  90. __asm__ __volatile__("xchgq %%rbx, %q1; cpuid; xchgq %%rbx, %q1"
  91. : "=a"(cpu_info[0]), "=&r"(cpu_info[1]),
  92. "=c"(cpu_info[2]), "=d"(cpu_info[3])
  93. : "0"(cpu_info_type), "2"(0U));
  94. # else
  95. __asm__ __volatile__("cpuid"
  96. : "=a"(cpu_info[0]), "=b"(cpu_info[1]),
  97. "=c"(cpu_info[2]), "=d"(cpu_info[3])
  98. : "0"(cpu_info_type), "2"(0U));
  99. # endif
  100. #else
  101. (void) cpu_info_type;
  102. cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
  103. #endif
  104. }
  105. static int
  106. _sodium_runtime_intel_cpu_features(CPUFeatures * const cpu_features)
  107. {
  108. unsigned int cpu_info[4];
  109. unsigned int id;
  110. uint32_t xcr0 = 0U;
  111. _cpuid(cpu_info, 0x0);
  112. if ((id = cpu_info[0]) == 0U) {
  113. return -1; /* LCOV_EXCL_LINE */
  114. }
  115. _cpuid(cpu_info, 0x00000001);
  116. #ifdef HAVE_EMMINTRIN_H
  117. cpu_features->has_sse2 = ((cpu_info[3] & CPUID_EDX_SSE2) != 0x0);
  118. #else
  119. cpu_features->has_sse2 = 0;
  120. #endif
  121. #ifdef HAVE_PMMINTRIN_H
  122. cpu_features->has_sse3 = ((cpu_info[2] & CPUID_ECX_SSE3) != 0x0);
  123. #else
  124. cpu_features->has_sse3 = 0;
  125. #endif
  126. #ifdef HAVE_TMMINTRIN_H
  127. cpu_features->has_ssse3 = ((cpu_info[2] & CPUID_ECX_SSSE3) != 0x0);
  128. #else
  129. cpu_features->has_ssse3 = 0;
  130. #endif
  131. #ifdef HAVE_SMMINTRIN_H
  132. cpu_features->has_sse41 = ((cpu_info[2] & CPUID_ECX_SSE41) != 0x0);
  133. #else
  134. cpu_features->has_sse41 = 0;
  135. #endif
  136. cpu_features->has_avx = 0;
  137. (void) xcr0;
  138. #ifdef HAVE_AVXINTRIN_H
  139. if ((cpu_info[2] & (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) ==
  140. (CPUID_ECX_AVX | CPUID_ECX_XSAVE | CPUID_ECX_OSXSAVE)) {
  141. xcr0 = 0U;
  142. # if defined(HAVE__XGETBV) || \
  143. (defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) && _MSC_FULL_VER >= 160040219)
  144. xcr0 = (uint32_t) _xgetbv(0);
  145. # elif defined(_MSC_VER) && defined(_M_IX86)
  146. /*
  147. * Visual Studio documentation states that eax/ecx/edx don't need to
  148. * be preserved in inline assembly code. But that doesn't seem to
  149. * always hold true on Visual Studio 2010.
  150. */
  151. __asm {
  152. push eax
  153. push ecx
  154. push edx
  155. xor ecx, ecx
  156. _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
  157. mov xcr0, eax
  158. pop edx
  159. pop ecx
  160. pop eax
  161. }
  162. # elif defined(HAVE_AVX_ASM)
  163. __asm__ __volatile__(".byte 0x0f, 0x01, 0xd0" /* XGETBV */
  164. : "=a"(xcr0)
  165. : "c"((uint32_t) 0U)
  166. : "%edx");
  167. # endif
  168. if ((xcr0 & (XCR0_SSE | XCR0_AVX)) == (XCR0_SSE | XCR0_AVX)) {
  169. cpu_features->has_avx = 1;
  170. }
  171. }
  172. #endif
  173. cpu_features->has_avx2 = 0;
  174. #ifdef HAVE_AVX2INTRIN_H
  175. if (cpu_features->has_avx) {
  176. unsigned int cpu_info7[4];
  177. _cpuid(cpu_info7, 0x00000007);
  178. cpu_features->has_avx2 = ((cpu_info7[1] & CPUID_EBX_AVX2) != 0x0);
  179. }
  180. #endif
  181. cpu_features->has_avx512f = 0;
  182. #ifdef HAVE_AVX512FINTRIN_H
  183. if (cpu_features->has_avx2) {
  184. unsigned int cpu_info7[4];
  185. _cpuid(cpu_info7, 0x00000007);
  186. /* LCOV_EXCL_START */
  187. if ((cpu_info7[1] & CPUID_EBX_AVX512F) == CPUID_EBX_AVX512F &&
  188. (xcr0 & (XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM))
  189. == (XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM)) {
  190. cpu_features->has_avx512f = 1;
  191. }
  192. /* LCOV_EXCL_STOP */
  193. }
  194. #endif
  195. #ifdef HAVE_WMMINTRIN_H
  196. cpu_features->has_pclmul = ((cpu_info[2] & CPUID_ECX_PCLMUL) != 0x0);
  197. cpu_features->has_aesni = ((cpu_info[2] & CPUID_ECX_AESNI) != 0x0);
  198. #else
  199. cpu_features->has_pclmul = 0;
  200. cpu_features->has_aesni = 0;
  201. #endif
  202. #ifdef HAVE_RDRAND
  203. cpu_features->has_rdrand = ((cpu_info[2] & CPUID_ECX_RDRAND) != 0x0);
  204. #else
  205. cpu_features->has_rdrand = 0;
  206. #endif
  207. return 0;
  208. }
  209. int
  210. _sodium_runtime_get_cpu_features(void)
  211. {
  212. int ret = -1;
  213. ret &= _sodium_runtime_arm_cpu_features(&_cpu_features);
  214. ret &= _sodium_runtime_intel_cpu_features(&_cpu_features);
  215. _cpu_features.initialized = 1;
  216. return ret;
  217. }
  218. int
  219. sodium_runtime_has_neon(void)
  220. {
  221. return _cpu_features.has_neon;
  222. }
  223. int
  224. sodium_runtime_has_sse2(void)
  225. {
  226. return _cpu_features.has_sse2;
  227. }
  228. int
  229. sodium_runtime_has_sse3(void)
  230. {
  231. return _cpu_features.has_sse3;
  232. }
  233. int
  234. sodium_runtime_has_ssse3(void)
  235. {
  236. return _cpu_features.has_ssse3;
  237. }
  238. int
  239. sodium_runtime_has_sse41(void)
  240. {
  241. return _cpu_features.has_sse41;
  242. }
  243. int
  244. sodium_runtime_has_avx(void)
  245. {
  246. return _cpu_features.has_avx;
  247. }
  248. int
  249. sodium_runtime_has_avx2(void)
  250. {
  251. return _cpu_features.has_avx2;
  252. }
  253. int
  254. sodium_runtime_has_avx512f(void)
  255. {
  256. return _cpu_features.has_avx512f;
  257. }
  258. int
  259. sodium_runtime_has_pclmul(void)
  260. {
  261. return _cpu_features.has_pclmul;
  262. }
  263. int
  264. sodium_runtime_has_aesni(void)
  265. {
  266. return _cpu_features.has_aesni;
  267. }
  268. int
  269. sodium_runtime_has_rdrand(void)
  270. {
  271. return _cpu_features.has_rdrand;
  272. }