From: Sebastian Lackner Subject: [2/3] vcomp: Implement _vcomp_reduction_r4 and add tests. Message-Id: Date: Thu, 8 Sep 2016 08:21:49 +0200 Signed-off-by: Sebastian Lackner --- Yes, there are really bool_and and bool_or reduction operations for float/double. I decided to explicitly write down the comparison with 0.0 in this case because it otherwise looks like a mistake. According to my tests all values != 0.0 are seen as true (without any small epsilon to avoid rounding errors... well). dlls/vcomp/main.c | 40 +++++++++++++++++++++++++++++++++ dlls/vcomp/tests/vcomp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ dlls/vcomp/vcomp.spec | 2 - dlls/vcomp100/vcomp100.spec | 2 - dlls/vcomp110/vcomp110.spec | 2 - dlls/vcomp120/vcomp120.spec | 2 - dlls/vcomp140/vcomp140.spec | 2 - dlls/vcomp90/vcomp90.spec | 2 - 8 files changed, 98 insertions(+), 6 deletions(-) diff --git a/dlls/vcomp/main.c b/dlls/vcomp/main.c index 8523155..d4c9366 100644 --- a/dlls/vcomp/main.c +++ b/dlls/vcomp/main.c @@ -810,6 +810,46 @@ void CDECL _vcomp_atomic_sub_r4(float *dest, float val) while (interlocked_cmpxchg((int *)dest, new, old) != old); } +static void CDECL _vcomp_atomic_bool_and_r4(float *dest, float val) +{ + int old, new; + do + { + old = *(int *)dest; + *(float *)&new = (*(float *)&old != 0.0) ? (val != 0.0) : 0.0; + } + while (interlocked_cmpxchg((int *)dest, new, old) != old); +} + +static void CDECL _vcomp_atomic_bool_or_r4(float *dest, float val) +{ + int old, new; + do + { + old = *(int *)dest; + *(float *)&new = (*(float *)&old != 0.0) ? *(float *)&old : (val != 0.0); + } + while (interlocked_cmpxchg((int *)dest, new, old) != old); +} + +void CDECL _vcomp_reduction_r4(unsigned int flags, float *dest, float val) +{ + static void (CDECL * const funcs[])(float *, float) = + { + _vcomp_atomic_add_r4, + _vcomp_atomic_add_r4, + _vcomp_atomic_mul_r4, + _vcomp_atomic_bool_or_r4, + _vcomp_atomic_bool_or_r4, + _vcomp_atomic_bool_or_r4, + _vcomp_atomic_bool_and_r4, + _vcomp_atomic_bool_or_r4, + }; + unsigned int op = (flags >> 8) & 0xf; + op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1); + funcs[op](dest, val); +} + void CDECL _vcomp_atomic_add_r8(double *dest, double val) { LONG64 old, new; diff --git a/dlls/vcomp/tests/vcomp.c b/dlls/vcomp/tests/vcomp.c index 088f256..52fecf5 100644 --- a/dlls/vcomp/tests/vcomp.c +++ b/dlls/vcomp/tests/vcomp.c @@ -107,6 +107,7 @@ static void (CDECL *p_vcomp_reduction_i1)(unsigned int flags, char *dest, cha static void (CDECL *p_vcomp_reduction_i2)(unsigned int flags, short *dest, short val); static void (CDECL *p_vcomp_reduction_i4)(unsigned int flags, int *dest, int val); static void (CDECL *p_vcomp_reduction_i8)(unsigned int flags, LONG64 *dest, LONG64 val); +static void (CDECL *p_vcomp_reduction_r4)(unsigned int flags, float *dest, float val); static void (CDECL *p_vcomp_reduction_u1)(unsigned int flags, unsigned char *dest, unsigned char val); static void (CDECL *p_vcomp_reduction_u2)(unsigned int flags, unsigned short *dest, unsigned short val); static void (CDECL *p_vcomp_reduction_u4)(unsigned int flags, unsigned int *dest, unsigned int val); @@ -356,6 +357,7 @@ static BOOL init_vcomp(void) VCOMP_GET_PROC(_vcomp_reduction_i2); VCOMP_GET_PROC(_vcomp_reduction_i4); VCOMP_GET_PROC(_vcomp_reduction_i8); + VCOMP_GET_PROC(_vcomp_reduction_r4); VCOMP_GET_PROC(_vcomp_reduction_u1); VCOMP_GET_PROC(_vcomp_reduction_u2); VCOMP_GET_PROC(_vcomp_reduction_u4); @@ -2108,6 +2110,55 @@ static void test_reduction_integer64(void) } } +static void test_reduction_float(void) +{ + static const struct + { + unsigned int flags; + float v1, v2, expected; + } + tests[] = + { + { 0x000, 42.0, 17.0, 42.0 + 17.0 }, + { VCOMP_REDUCTION_FLAGS_ADD, 42.0, 17.0, 42.0 + 17.0 }, + { VCOMP_REDUCTION_FLAGS_MUL, 42.0, 17.0, 42.0 * 17.0 }, + { 0x300, 0.0, 2.0, 1.0 }, + { 0x400, 0.0, 2.0, 1.0 }, + { 0x500, 0.0, 2.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0.0, 0.0, 0.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0.0, 2.0, 0.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 0.0, 0.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 1.0e-5, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 2.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2.0, 0.0, 0.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2.0, 2.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 0.0, 0.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 1.0e-5, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 2.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1.0, 0.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1.0, 2.0, 1.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2.0, 0.0, 2.0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2.0, 2.0, 2.0 }, + { 0x800, 0.0, 2.0, 1.0 }, + { 0x900, 0.0, 2.0, 1.0 }, + { 0xa00, 0.0, 2.0, 1.0 }, + { 0xb00, 0.0, 2.0, 1.0 }, + { 0xc00, 0.0, 2.0, 1.0 }, + { 0xd00, 0.0, 2.0, 1.0 }, + { 0xe00, 0.0, 2.0, 1.0 }, + { 0xf00, 0.0, 2.0, 1.0 }, + }; + int i; + + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + float val = tests[i].v1; + p_vcomp_reduction_r4(tests[i].flags, &val, tests[i].v2); + ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001, + "test %d: expected val == %f, got %f\n", i, tests[i].expected, val); + } +} + START_TEST(vcomp) { if (!init_vcomp()) @@ -2136,6 +2187,7 @@ START_TEST(vcomp) test_reduction_integer16(); test_reduction_integer32(); test_reduction_integer64(); + test_reduction_float(); release_vcomp(); } diff --git a/dlls/vcomp/vcomp.spec b/dlls/vcomp/vcomp.spec index 6f57746..22f06cb 100644 --- a/dlls/vcomp/vcomp.spec +++ b/dlls/vcomp/vcomp.spec @@ -77,7 +77,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) @ cdecl _vcomp_reduction_i4(long ptr long) @ cdecl _vcomp_reduction_i8(long ptr int64) -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) _vcomp_reduction_i1 @ cdecl _vcomp_reduction_u2(long ptr long) _vcomp_reduction_i2 diff --git a/dlls/vcomp100/vcomp100.spec b/dlls/vcomp100/vcomp100.spec index 564b3d7..b87bd8b 100644 --- a/dlls/vcomp100/vcomp100.spec +++ b/dlls/vcomp100/vcomp100.spec @@ -77,7 +77,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) vcomp._vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 diff --git a/dlls/vcomp110/vcomp110.spec b/dlls/vcomp110/vcomp110.spec index fa6c047..e86d50b 100644 --- a/dlls/vcomp110/vcomp110.spec +++ b/dlls/vcomp110/vcomp110.spec @@ -78,7 +78,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) vcomp._vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 diff --git a/dlls/vcomp120/vcomp120.spec b/dlls/vcomp120/vcomp120.spec index fa6c047..e86d50b 100644 --- a/dlls/vcomp120/vcomp120.spec +++ b/dlls/vcomp120/vcomp120.spec @@ -78,7 +78,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) vcomp._vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 diff --git a/dlls/vcomp140/vcomp140.spec b/dlls/vcomp140/vcomp140.spec index fa6c047..e86d50b 100644 --- a/dlls/vcomp140/vcomp140.spec +++ b/dlls/vcomp140/vcomp140.spec @@ -78,7 +78,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) vcomp._vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 diff --git a/dlls/vcomp90/vcomp90.spec b/dlls/vcomp90/vcomp90.spec index 564b3d7..b87bd8b 100644 --- a/dlls/vcomp90/vcomp90.spec +++ b/dlls/vcomp90/vcomp90.spec @@ -77,7 +77,7 @@ @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 -@ stub _vcomp_reduction_r4 +@ cdecl _vcomp_reduction_r4(long ptr float) vcomp._vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 -- 2.9.0