From: Piotr Caban Subject: Re: [PATCH] msvcrt: Avoid disallowed unaligned writes in memset on ARM Message-Id: <00876200-ab90-e23e-86e5-df022b6b0199@gmail.com> Date: Thu, 16 Sep 2021 11:17:17 +0200 In-Reply-To: <20210915202745.3661089-1-martin@martin.st> References: <20210915202745.3661089-1-martin@martin.st> Hi Martin, On 9/15/21 10:27 PM, Martin Storsjo wrote: > ARM can do 64 bit writes with the STRD instruction, but that > instruction requires a 32 bit aligned address - while these stores > are unaligned. > > Two consecutive stores to uint32_t* pointers can also be fused > into one single STRD, as a uint32_t* is supposed to be properly > aligned - therefore, do these stores as stores to volatile uint32_t* > to avoid fusing them. How about letting the compiler know that the pointers are unaligned instead? Is attached patch working for you? Thanks, Piotr From b0c89696de25fa6dead492dea0cf1d9d4fd20639 Mon Sep 17 00:00:00 2001 From: Piotr Caban Date: Thu, 16 Sep 2021 11:08:02 +0200 Subject: [PATCH] fix_memset To: wine-devel --- dlls/msvcrt/string.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index f2b1b4a5b11..5655fbfe68a 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -2872,21 +2872,25 @@ static inline void memset_aligned_32(unsigned char *d, uint64_t v, size_t n) */ void *__cdecl memset(void *dst, int c, size_t n) { + typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64; + typedef uint32_t DECLSPEC_ALIGN(1) unaligned_ui32; + typedef uint16_t DECLSPEC_ALIGN(1) unaligned_ui16; + uint64_t v = 0x101010101010101ull * (unsigned char)c; unsigned char *d = (unsigned char *)dst; size_t a = 0x20 - ((uintptr_t)d & 0x1f); if (n >= 16) { - *(uint64_t *)(d + 0) = v; - *(uint64_t *)(d + 8) = v; - *(uint64_t *)(d + n - 16) = v; - *(uint64_t *)(d + n - 8) = v; + *(unaligned_ui64 *)(d + 0) = v; + *(unaligned_ui64 *)(d + 8) = v; + *(unaligned_ui64 *)(d + n - 16) = v; + *(unaligned_ui64 *)(d + n - 8) = v; if (n <= 32) return dst; - *(uint64_t *)(d + 16) = v; - *(uint64_t *)(d + 24) = v; - *(uint64_t *)(d + n - 32) = v; - *(uint64_t *)(d + n - 24) = v; + *(unaligned_ui64 *)(d + 16) = v; + *(unaligned_ui64 *)(d + 24) = v; + *(unaligned_ui64 *)(d + n - 32) = v; + *(unaligned_ui64 *)(d + n - 24) = v; if (n <= 64) return dst; n = (n - a) & ~0x1f; @@ -2895,20 +2899,20 @@ void *__cdecl memset(void *dst, int c, size_t n) } if (n >= 8) { - *(uint64_t *)d = v; - *(uint64_t *)(d + n - 8) = v; + *(unaligned_ui64 *)d = v; + *(unaligned_ui64 *)(d + n - 8) = v; return dst; } if (n >= 4) { - *(uint32_t *)d = v; - *(uint32_t *)(d + n - 4) = v; + *(unaligned_ui32 *)d = v; + *(unaligned_ui32 *)(d + n - 4) = v; return dst; } if (n >= 2) { - *(uint16_t *)d = v; - *(uint16_t *)(d + n - 2) = v; + *(unaligned_ui16 *)d = v; + *(unaligned_ui16 *)(d + n - 2) = v; return dst; } if (n >= 1) -- 2.32.0