Skip to content

Commit b865871

Browse files
authored
gh-99108: add support for SIMD-accelerated HMAC-BLAKE2 (#132120)
1 parent c55c020 commit b865871

File tree

1 file changed

+75
-4
lines changed

1 file changed

+75
-4
lines changed

Modules/hmacmodule.c

+75-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,21 @@
2020
#include "pycore_hashtable.h"
2121
#include "pycore_strhex.h" // _Py_strhex()
2222

23+
/*
24+
* Taken from blake2module.c. In the future, detection of SIMD support
25+
* should be delegated to https://github.com/python/cpython/pull/125011.
26+
*/
27+
#if defined(__x86_64__) && defined(__GNUC__)
28+
# include <cpuid.h>
29+
#elif defined(_M_X64)
30+
# include <intrin.h>
31+
#endif
32+
33+
#if defined(__APPLE__) && defined(__arm64__)
34+
# undef HACL_CAN_COMPILE_SIMD128
35+
# undef HACL_CAN_COMPILE_SIMD256
36+
#endif
37+
2338
// Small mismatch between the variable names Python defines as part of configure
2439
// at the ones HACL* expects to be set in order to enable those headers.
2540
#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
@@ -1667,17 +1682,73 @@ hmacmodule_init_strings(hmacmodule_state *state)
16671682
static void
16681683
hmacmodule_init_cpu_features(hmacmodule_state *state)
16691684
{
1685+
int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
1686+
int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
1687+
#if defined(__x86_64__) && defined(__GNUC__)
1688+
__cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
1689+
__cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
1690+
#elif defined(_M_X64)
1691+
int info1[4] = { 0 };
1692+
__cpuidex(info1, 1, 0);
1693+
eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3];
1694+
1695+
int info7[4] = { 0 };
1696+
__cpuidex(info7, 7, 0);
1697+
eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3];
1698+
#endif
1699+
// fmt: off
1700+
(void)eax1; (void)ebx1; (void)ecx1; (void)edx1;
1701+
(void)eax7; (void)ebx7; (void)ecx7; (void)edx7;
1702+
// fmt: on
1703+
1704+
#define EBX_AVX2 (1 << 5)
1705+
#define ECX_SSE3 (1 << 0)
1706+
#define ECX_SSSE3 (1 << 9)
1707+
#define ECX_SSE4_1 (1 << 19)
1708+
#define ECX_SSE4_2 (1 << 20)
1709+
#define ECX_AVX (1 << 28)
1710+
#define EDX_SSE (1 << 25)
1711+
#define EDX_SSE2 (1 << 26)
1712+
#define EDX_CMOV (1 << 15)
1713+
1714+
bool avx = (ecx1 & ECX_AVX) != 0;
1715+
bool avx2 = (ebx7 & EBX_AVX2) != 0;
1716+
1717+
bool sse = (edx1 & EDX_SSE) != 0;
1718+
bool sse2 = (edx1 & EDX_SSE2) != 0;
1719+
bool cmov = (edx1 & EDX_CMOV) != 0;
1720+
1721+
bool sse3 = (ecx1 & ECX_SSE3) != 0;
1722+
bool sse41 = (ecx1 & ECX_SSE4_1) != 0;
1723+
bool sse42 = (ecx1 & ECX_SSE4_2) != 0;
1724+
1725+
#undef EDX_CMOV
1726+
#undef EDX_SSE2
1727+
#undef EDX_SSE
1728+
#undef ECX_AVX
1729+
#undef ECX_SSE4_2
1730+
#undef ECX_SSE4_1
1731+
#undef ECX_SSSE3
1732+
#undef ECX_SSE3
1733+
#undef EBX_AVX2
1734+
16701735
#if HACL_CAN_COMPILE_SIMD128
1671-
// TODO: use py_cpuid_features (gh-125022) to deduce what we want
1672-
state->can_run_simd128 = false;
1736+
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
1737+
state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
16731738
#else
1739+
// fmt: off
1740+
(void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov;
1741+
// fmt: on
16741742
state->can_run_simd128 = false;
16751743
#endif
16761744

16771745
#if HACL_CAN_COMPILE_SIMD256
1678-
// TODO: use py_cpuid_features (gh-125022) to deduce what we want
1679-
state->can_run_simd256 = false;
1746+
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
1747+
state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
16801748
#else
1749+
// fmt: off
1750+
(void)avx; (void)avx2;
1751+
// fmt: on
16811752
state->can_run_simd256 = false;
16821753
#endif
16831754
}

0 commit comments

Comments
 (0)