From: "Rémi Bernon" Subject: [PATCH 3/4] msvcrt: Check for ERMS support and use __stosb for large memset calls. Message-Id: <20210913122341.2109469-3-rbernon@codeweavers.com> Date: Mon, 13 Sep 2021 14:23:40 +0200 In-Reply-To: <20210913122341.2109469-1-rbernon@codeweavers.com> References: <20210913122341.2109469-1-rbernon@codeweavers.com> Signed-off-by: Rémi Bernon --- dlls/msvcrt/math.c | 16 ++++++++++++++++ dlls/msvcrt/msvcrt.h | 1 + dlls/msvcrt/string.c | 5 +++++ 3 files changed, 22 insertions(+) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 7f59a4d20d4..6639bb5ee23 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "msvcrt.h" #include "winternl.h" @@ -64,11 +65,26 @@ typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *); static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL; +BOOL erms_supported; BOOL sse2_supported; static BOOL sse2_enabled; void msvcrt_init_math( void *module ) { +#if defined(__i386__) || defined(__x86_64__) + int regs[4]; + + __cpuid(regs, 0); + if (regs[0] < 7) erms_supported = FALSE; + else + { + __cpuidex(regs, 7, 0); + erms_supported = ((regs[1] >> 9) & 1); + } +#else + erms_supported = FALSE; +#endif + sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ); #if _MSVCR_VER <=71 sse2_enabled = FALSE; diff --git a/dlls/msvcrt/msvcrt.h b/dlls/msvcrt/msvcrt.h index 60f8c2f5ef2..022eced35d9 100644 --- a/dlls/msvcrt/msvcrt.h +++ b/dlls/msvcrt/msvcrt.h @@ -33,6 +33,7 @@ #undef strncpy #undef wcsncpy +extern BOOL erms_supported DECLSPEC_HIDDEN; extern BOOL sse2_supported DECLSPEC_HIDDEN; #define DBL80_MAX_10_EXP 4932 diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 3a7312572ab..d09b44fbcd6 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "msvcrt.h" #include "bnum.h" @@ -2857,6 +2858,10 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n) static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n) { +#if defined(__i386__) || defined(__x86_64__) + if (n >= 2048 && erms_supported) __stosb(d, v, n); + else +#endif while (n >= 32) { *(uint64_t*)(d + n - 32) = v; -- 2.33.0