From: Nozomi Kodama Subject: d3dx9: Implement D3DXSHMultiply5 Message-Id: <1365662206.52643.YahooMailNeo@web28803.mail.ir2.yahoo.com> Date: Thu, 11 Apr 2013 07:36:46 +0100 (BST)
From 8e52e7509f601c492c95ac2d8ffa75bd5a97fd5d Mon Sep 17 00:00:00 2001 From: Nozomi Kodama Date: Wed, 10 Apr 2013 20:06:23 -1000 Subject: d3dx9: Implement D3DXSHMultiply5 --- dlls/d3dx9_36/d3dx9_36.spec | 2 +- dlls/d3dx9_36/math.c | 722 +++++++++++++++++++++++++++++++++++++++++++ dlls/d3dx9_36/tests/math.c | 55 ++++ include/d3dx9math.h | 1 + 4 files changed, 779 insertions(+), 1 deletion(-) diff --git a/dlls/d3dx9_36/d3dx9_36.spec b/dlls/d3dx9_36/d3dx9_36.spec index 9b83135..07d7f46 100644 --- a/dlls/d3dx9_36/d3dx9_36.spec +++ b/dlls/d3dx9_36/d3dx9_36.spec @@ -282,7 +282,7 @@ @ stdcall D3DXSHMultiply2(ptr ptr ptr) @ stdcall D3DXSHMultiply3(ptr ptr ptr) @ stdcall D3DXSHMultiply4(ptr ptr ptr) -@ stub D3DXSHMultiply5(ptr ptr ptr) +@ stdcall D3DXSHMultiply5(ptr ptr ptr) @ stub D3DXSHMultiply6(ptr ptr ptr) @ stub D3DXSHProjectCubeMap(long ptr ptr ptr ptr) @ stub D3DXSHPRTCompSplitMeshSC(ptr long long ptr long ptr long long ptr ptr long ptr ptr ptr ptr ptr) diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index 207d152..26af3f5 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -2967,6 +2967,728 @@ FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b) return out; } +FLOAT * WINAPI D3DXSHMultiply5(FLOAT *out, const FLOAT *a, const FLOAT *b) +{ + FLOAT ta, tb, t; + + TRACE("out %p, a %p, b %p\n", out, a, b); + + out[0] = 0.2820948064f * a[0] * b[0]; + + ta = 0.2820948064f * a[0] - 0.1261566281f * a[6] - 0.2185096890f * a[8]; + tb = 0.2820948064f * b[0] - 0.1261566281f * b[6] - 0.2185096890f * b[8]; + out[1] = ta * b[1] + tb * a[1]; + t = a[1] * b[1]; + out[0] += 0.2820948064f * t; + out[6] = -0.1261566281f * t; + out[8] = -0.2185096890f * t; + + ta = 0.2185096890f * a[3] - 0.0583991706f * a[13] - 0.2261790186f * a[15]; + tb = 0.2185096890f * b[3] - 0.0583991706f * b[13] - 0.2261790186f * b[15]; + out[1] += ta * b[4] + tb * a[4]; + out[4] = ta * b[1] + tb * a[1]; + t = a[1] * b[4] + a[4] * b[1]; + out[3] = 0.2185096890f * t; + out[13] = -0.0583991706f * t; + out[15] = -0.2261790186f * t; + + ta = 0.2185096890f * a[2] - 0.1430481672f * a[12] - 0.1846743971f * a[14]; + tb = 0.2185096890f * b[2] - 0.1430481672f * b[12] - 0.1846743971f * b[14]; + out[1] += ta * b[5] + tb * a[5]; + out[5] = ta * b[1] + tb * a[1]; + t = a[1] * b[5] + a[5] * b[1]; + out[2] = 0.2185096890f * t; + out[12] = -0.1430481672f * t; + out[14] = -0.1846743971f * t; + + ta = 0.2261790186f * a[8] - 0.0435281731f * a[22] - 0.2303294390f * a[24]; + tb = 0.2261790186f * b[8] - 0.0435281731f * b[22] - 0.2303294390f * b[24]; + out[1] += ta * b[9] + tb * a[9]; + out[9] = ta * b[1] + tb * a[1]; + t = a[1] * b[9] + a[9] * b[1]; + out[8] += 0.2261790186f * t; + out[22] = -0.0435281731f * t; + out[24] = -0.2303294390f * t; + + ta = 0.1846743971f * a[7] - 0.0753930062f * a[21] - 0.1994711459f * a[23]; + tb = 0.1846743971f * b[7] - 0.0753930062f * b[21] - 0.1994711459f * b[23]; + out[1] += ta * b[10] + tb * a[10]; + out[10] = ta * b[1] + tb * a[1]; + t = a[1] * b[10] + a[10] * b[1]; + out[7] = 0.1846743971f * t; + out[21] = -0.0753930062f * t; + out[23] = -0.1994711459f * t; + + ta = 0.2023006529f * a[6] + 0.0583991706f * a[8] - 0.1507860124f * a[20] - 0.1685838848f * a[22]; + tb = 0.2023006529f * b[6] + 0.0583991706f * b[8] - 0.1507860124f * b[20] - 0.1685838848f * b[22]; + out[1] += ta * b[11] + tb * a[11]; + out[11] = ta * b[1] + tb * a[1]; + t = a[1] * b[11] + a[11] * b[1]; + out[6] += 0.2023006529f * t; + out[8] += 0.0583991706f * t; + out[20] = -0.1507860124f * t; + out[22] -= 0.1685838848f * t; + + ta = 0.1946638972f * a[19]; + tb = 0.1946638972f * b[19]; + out[1] += ta * b[12] + tb * a[12]; + out[12] += ta * b[1] + tb * a[1]; + t = a[1] * b[12] + a[12] * b[1]; + out[19] = 0.1946638972f * t; + + ta = 0.1685838848f * a[18]; + tb = 0.1685838848f * b[18]; + out[1] += ta * b[13] + tb * a[13]; + out[13] += ta * b[1] + tb * a[1]; + t = a[1] * b[13] + a[13] * b[1]; + out[18] = 0.1685838848f * t; + + ta = 0.1994711459f * a[17] + 0.0753930062f * a[19]; + tb = 0.1994711459f * b[17] + 0.0753930062f * b[19]; + out[1] += ta * b[14] + tb * a[14]; + out[14] += ta * b[1] + tb * a[1]; + t = a[1] * b[14] + a[14] * b[1]; + out[17] = 0.1994711459f * t; + out[19] += 0.0753930062f * t; + + ta = 0.2303294390f * a[16] + 0.0435281731f * a[18]; + tb = 0.2303294390f * b[16] + 0.0435281731f * b[18]; + out[1] += ta * b[15] + tb * a[15]; + out[15] += ta * b[1] + tb * a[1]; + t = a[1] * b[15] + a[15] * b[1]; + out[16] = 0.2303294390f * t; + out[18] += 0.0435281731f * t; + + ta = 0.2820948064f * a[0] + 0.2523132563f * a[6]; + tb = 0.2820948064f * b[0] + 0.2523132563f * b[6]; + out[2] += ta * b[2] + tb * a[2]; + t = a[2] * b[2]; + out[0] += 0.2820948064f * t; + out[6] += 0.2523132563f * t; + + ta = 0.1846743971f * a[4] + 0.2132436186f * a[18]; + tb = 0.1846743971f * b[4] + 0.2132436186f * b[18]; + out[2] += ta * b[10] + tb * a[10]; + out[10] += ta * b[2] + tb * a[2]; + t = a[2] * b[10] + a[10] * b[2]; + out[4] += 0.1846743971f * t; + out[18] += 0.2132436186f * t; + + ta = 0.2477667034f * a[6] + 0.2462325394f * a[20]; + tb = 0.2477667034f * b[6] + 0.2462325394f * b[20]; + out[2] += ta * b[12] + tb * a[12]; + out[12] += ta * b[2] + tb * a[2]; + t = a[2] * b[12] + a[12] * b[2]; + out[6] += 0.2477667034f * t; + out[20] += 0.2462325394f * t; + + ta = 0.1846743971f * a[8] + 0.2132436186f * a[22]; + tb = 0.1846743971f * b[8] + 0.2132436186f * b[22]; + out[2] += ta * b[14] + tb * a[14]; + out[14] += ta * b[2] + tb * a[2]; + t = a[2] * b[14] + a[14] * b[2]; + out[8] += 0.1846743971f * t; + out[22] += 0.2132436186f * t; + + ta = 0.2820948064f * a[0] - 0.1261566281f * a[6] + 0.2185096890f * a[8]; + tb = 0.2820948064f * b[0] - 0.1261566281f * b[6] + 0.2185096890f * b[8]; + out[3] += ta * b[3] + tb * a[3]; + t = a[3] * b[3]; + out[0] += 0.2820948064f * t; + out[6] -= 0.1261566281f * t; + out[8] += 0.2185096890f * t; + + ta = 0.2185096890f * a[2] - 0.1430481672f * a[12] + 0.1846743971f * a[14]; + tb = 0.2185096890f * b[2] - 0.1430481672f * b[12] + 0.1846743971f * b[14]; + out[3] += ta * b[7] + tb * a[7]; + out[7] += ta * b[3] + tb * a[3]; + t = a[3] * b[7] + a[7] * b[3]; + out[2] += 0.2185096890f * t; + out[12] -= 0.1430481672f * t; + out[14] += 0.1846743971f * t; + + ta = 0.2261790186f * a[4] + 0.2303294390f * a[16] - 0.0435281731f * a[18]; + tb = 0.2261790186f * b[4] + 0.2303294390f * b[16] - 0.0435281731f * b[18]; + out[3] += ta * b[9] + tb * a[9]; + out[9] += ta * b[3] + tb * a[3]; + t = a[3] * b[9] + a[9] * b[3]; + out[4] += 0.2261790186f * t; + out[16] += 0.2303294390f * t; + out[18] -= 0.0435281731f * t; + + ta = 0.1846743971f * a[5] + 0.1994711459f * a[17] - 0.0753930062f * a[19]; + tb = 0.1846743971f * b[5] + 0.1994711459f * b[17] - 0.0753930062f * b[19]; + out[3] += ta * b[10] + tb * a[10]; + out[10] += ta * b[3] + tb * a[3]; + t = a[3] * b[10] + a[10] * b[3]; + out[5] += 0.1846743971f * t; + out[17] += 0.1994711459f * t; + out[19] -= 0.0753930062f * t; + + ta = 0.1946638972f * a[21]; + tb = 0.1946638972f * b[21]; + out[3] += ta * b[12] + tb * a[12]; + out[12] += ta * b[3] + tb * a[3]; + t = a[3] * b[12] + a[12] * b[3]; + out[21] += 0.1946638972f * t; + + ta = -0.0583991706f * a[8] + 0.2023006529f * a[6] - 0.1507860124f * a[20] + 0.1685838848f * a[22]; + tb = -0.0583991706f * b[8] + 0.2023006529f * b[6] - 0.1507860124f * b[20] + 0.1685838848f * b[22]; + out[3] += ta * b[13] + tb * a[13]; + out[13] += ta * b[3] + tb * a[3]; + t = a[3] * b[13] + a[13] * b[3]; + out[8] -= 0.0583991706f * t; + out[6] += 0.2023006529f * t; + out[20] -= 0.1507860124f * t; + out[22] += 0.1685838848f * t; + + ta = -0.0753930062f * a[21] + 0.1994711459f * a[23]; + tb = -0.0753930062f * b[21] + 0.1994711459f * b[23]; + out[3] += ta * b[14] + tb * a[14]; + out[14] += ta * b[3] + tb * a[3]; + t = a[3] * b[14] + a[14] * b[3]; + out[21] -= 0.0753930062f * t; + out[23] += 0.1994711459f * t; + + ta = 0.2261790186f * a[8] - 0.0435281731f * a[22] + 0.2303294390f * a[24]; + tb = 0.2261790186f * b[8] - 0.0435281731f * b[22] + 0.2303294390f * b[24]; + out[3] += ta * b[15] + tb * a[15]; + out[15] += ta * b[3] + tb * a[3]; + t = a[3] * b[15] + a[15] * b[3]; + out[8] += 0.2261790186f * t; + out[22] -= 0.0435281731f * t; + out[24] += 0.2303294390f * t; + + ta = 0.2820948064f * a[0] - 0.1802237481f * a[6] + 0.0402992554f * a[20] - 0.2384136170f * a[24]; + tb = 0.2820948064f * b[0] - 0.1802237481f * b[6] + 0.0402992554f * b[20] - 0.2384136170f * b[24]; + out[4] += ta * b[4] + tb * a[4]; + t = a[4] * b[4]; + out[0] += 0.2820948064f * t; + out[6] -= 0.1802237481f * t; + out[20] += 0.0402992554f * t; + out[24] -= 0.2384136170f * t; + + ta = 0.1560783535f * a[7] - 0.0637187213f * a[21] - 0.1685838848f * a[23]; + tb = 0.1560783535f * b[7] - 0.0637187213f * b[21] - 0.1685838848f * b[23]; + out[4] += ta * b[5] + tb * a[5]; + out[5] += ta * b[4] + tb * a[4]; + t = a[4] * b[5] + a[5] * b[4]; + out[7] += 0.1560783535f * t; + out[21] -= 0.0637187213f * t; + out[23] -= 0.1685838848f * t; + + ta = -0.0583991706f * a[3] + 0.1456731260f * a[13] + 0.0940315947f * a[15]; + tb = -0.0583991706f * b[3] + 0.1456731260f * b[13] + 0.0940315947f * b[15]; + out[4] += ta * b[11] + tb * a[11]; + out[11] += ta * b[4] + tb * a[4]; + t = a[4] * b[11] + a[11] * b[4]; + out[3] -= 0.0583991706f * t; + out[13] += 0.1456731260f * t; + out[15] += 0.0940315947f * t; + + ta = 0.2384136170f * a[8] - 0.0750808194f * a[22]; + tb = 0.2384136170f * b[8] - 0.0750808194f * b[22]; + out[4] += ta * b[16] + tb * a[16]; + out[16] += ta * b[4] + tb * a[4]; + t = a[4] * b[16] + a[16] * b[4]; + out[8] += 0.2384136170f * t; + out[22] -= 0.0750808194f * t; + + ta = 0.1560783535f * a[6] - 0.1903646141f * a[20] + 0.0750808194f * a[24]; + tb = 0.1560783535f * b[6] - 0.1903646141f * b[20] + 0.0750808194f * b[24]; + out[4] += ta * b[18] + tb * a[18]; + out[18] += ta * b[4] + tb * a[4]; + t = a[4] * b[18] + a[18] * b[4]; + out[6] += 0.1560783535f * t; + out[20] -= 0.1903646141f * t; + out[24] += 0.0750808194f * t; + + ta = -0.0637187213f * a[7] + 0.1418894082f * a[21] + 0.1126212254f * a[23]; + tb = -0.0637187213f * b[7] + 0.1418894082f * b[21] + 0.1126212254f * b[23]; + out[4] += ta * b[19] + tb * a[19]; + out[19] += ta * b[4] + tb * a[4]; + t = a[4] * b[19] + a[19] * b[4]; + out[7] -= 0.0637187213f * t; + out[21] += 0.1418894082f * t; + out[23] += 0.1126212254f * t; + + ta = 0.2820948064f * a[0] + 0.0901118740f * a[6] - 0.1560783535f * a[8] - 0.1611970216f * a[20] - 0.1802237481f * a[22]; + tb = 0.2820948064f * b[0] + 0.0901118740f * b[6] - 0.1560783535f * b[8] - 0.1611970216f * b[20] - 0.1802237481f * b[22]; + out[5] += ta * b[5] + tb * a[5]; + t = a[5] * b[5]; + out[0] += 0.2820948064f * t; + out[6] += 0.0901118740f * t; + out[8] -= 0.1560783535f * t; + out[20] -= 0.1611970216f * t; + out[22] -= 0.1802237481f * t; + + ta = 0.2335966825f * a[2] + 0.0594708025f * a[12] - 0.1151647195f * a[14]; + tb = 0.2335966825f * b[2] + 0.0594708025f * b[12] - 0.1151647195f * b[14]; + out[5] += ta * b[11] + tb * a[11]; + out[11] += ta * b[5] + tb * a[5]; + t = a[5] * b[11] + a[11] * b[5]; + out[2] += 0.2335966825f * t; + out[12] += 0.0594708025f * t; + out[14] -= 0.1151647195f * t; + + ta = 0.1685838848f * a[8] + 0.1327253878f * a[22] - 0.1404633522f * a[24]; + tb = 0.1685838848f * b[8] + 0.1327253878f * b[22] - 0.1404633522f * b[24]; + out[5] += ta * b[17] + tb * a[17]; + out[17] += ta * b[5] + tb * a[5]; + t = a[5] * b[17] + a[17] * b[5]; + out[8] += 0.1685838848f * t; + out[22] += 0.1327253878f * t; + out[24] -= 0.1404633522f * t; + + ta = 0.1802237481f * a[7] + 0.0902978629f * a[21] - 0.1327253878f * a[23]; + tb = 0.1802237481f * b[7] + 0.0902978629f * b[21] - 0.1327253878f * b[23]; + out[5] += ta * b[18] + tb * a[18]; + out[18] += ta * b[5] + tb * a[5]; + t = a[5] * b[18] + a[18] * b[5]; + out[7] += 0.1802237481f * t; + out[21] += 0.0902978629f * t; + out[23] -= 0.1327253878f * t; + + ta = 0.2207281142f * a[6] + 0.0637187213f * a[8] + 0.0448693708f * a[20] - 0.0902978629f * a[22]; + tb = 0.2207281142f * b[6] + 0.0637187213f * b[8] + 0.0448693708f * b[20] - 0.0902978629f * b[22]; + out[5] += ta * b[19] + tb * a[19]; + out[19] += ta * b[5] + tb * a[5]; + t = a[5] * b[19] + a[19] * b[5]; + out[6] += 0.2207281142f * t; + out[8] += 0.0637187213f * t; + out[20] += 0.0448693708f * t; + out[22] -= 0.0902978629f * t; + + ta = 0.2820948064f * a[0] + 0.2417955548f * a[20]; + tb = 0.2820948064f * b[0] + 0.2417955548f * b[20]; + out[6] += ta * b[6] + tb * a[6]; + t = a[6] * b[6]; + out[0] += 0.2820948064f * t; + out[6] += 0.1802237630f * t; + out[20] += 0.2417955548f * t; + + ta = 0.0901118740f * a[6] + 0.2820948064f * a[0] + 0.1560783535f * a[8] - 0.1611970216f * a[20] + 0.1802237481f * a[22]; + tb = 0.0901118740f * b[6] + 0.2820948064f * b[0] + 0.1560783535f * b[8] - 0.1611970216f * b[20] + 0.1802237481f * b[22]; + out[7] += ta * b[7] + tb * a[7]; + t = a[7] * b[7]; + out[6] += 0.0901118740f * t; + out[0] += 0.2820948064f * t; + out[8] += 0.1560783535f * t; + out[20] -= 0.1611970216f * t; + out[22] += 0.1802237481f * t; + + ta = 0.0594708025f * a[12] + 0.2335966825f * a[2] + 0.1151647195f * a[14]; + tb = 0.0594708025f * b[12] + 0.2335966825f * b[2] + 0.1151647195f * b[14]; + out[7] += ta * b[13] + tb * a[13]; + out[13] += ta * b[7] + tb * a[7]; + t = a[7] * b[13] + a[13] * b[7]; + out[12] += 0.0594708025f * t; + out[2] += 0.2335966825f * t; + out[14] += 0.1151647195f * t; + + ta = 0.1404633522f * a[16] + 0.1685838848f * a[4] + 0.1327253878f * a[18]; + tb = 0.1404633522f * b[16] + 0.1685838848f * b[4] + 0.1327253878f * b[18]; + out[7] += ta * b[17] + tb * a[17]; + out[17] += ta * b[7] + tb * a[7]; + t = a[7] * b[17] + a[17] * b[7]; + out[16] += 0.1404633522f * t; + out[4] += 0.1685838848f * t; + out[18] += 0.1327253878f * t; + + ta = -0.0637187213f * a[8] + 0.0448693708f * a[20] + 0.2207281142f * a[6] + 0.0902978629f * a[22]; + tb = -0.0637187213f * b[8] + 0.0448693708f * b[20] + 0.2207281142f * b[6] + 0.0902978629f * b[22]; + out[7] += ta * b[21] + tb * a[21]; + out[21] += ta * b[7] + tb * a[7]; + t = a[7] * b[21] + a[21] * b[7]; + out[8] -= 0.0637187213f * t; + out[20] += 0.0448693708f * t; + out[6] += 0.2207281142f * t; + out[22] += 0.0902978629f * t; + + ta = 0.1685838848f * a[8] + 0.1327253878f * a[22] + 0.1404633522f * a[24]; + tb = 0.1685838848f * b[8] + 0.1327253878f * b[22] + 0.1404633522f * b[24]; + out[7] += ta * b[23] + tb * a[23]; + out[23] += ta * b[7] + tb * a[7]; + t = a[7] * b[23] + a[23] * b[7]; + out[8] += 0.1685838848f * t; + out[22] += 0.1327253878f * t; + out[24] += 0.1404633522f * t; + + ta = 0.2820948064f * a[0] - 0.1802237481f * a[6] + 0.0402992554f * a[20] + 0.2384136170f * a[24]; + tb = 0.2820948064f * b[0] - 0.1802237481f * b[6] + 0.0402992554f * b[20] + 0.2384136170f * b[24]; + out[8] += ta * b[8] + tb * a[8]; + t = a[8] * b[8]; + out[0] += 0.2820948064f * t; + out[6] -= 0.1802237481f * t; + out[20] += 0.0402992554f * t; + out[24] += 0.2384136170f * t; + + ta = 0.1560783535f * a[6] - 0.1903646141f * a[20] - 0.0750808194f * a[24]; + tb = 0.1560783535f * b[6] - 0.1903646141f * b[20] - 0.0750808194f * b[24]; + out[8] += ta * b[22] + tb * a[22]; + out[22] += ta * b[8] + tb * a[8]; + t = a[8] * b[22] + a[22] * b[8]; + out[6] += 0.1560783535f * t; + out[20] -= 0.1903646141f * t; + out[24] -= 0.0750808194f * t; + + ta = -0.2102610469f * a[6] + 0.2820948064f * a[0] + 0.0769349411f * a[20]; + tb = -0.2102610469f * b[6] + 0.2820948064f * b[0] + 0.0769349411f * b[20]; + out[9] += ta * b[9] + tb * a[9]; + t = a[9] * b[9]; + out[6] -= 0.2102610469f * t; + out[0] += 0.2820948064f * t; + out[20] += 0.0769349411f * t; + + ta = 0.1486770064f * a[7] - 0.0993225873f * a[21]; + tb = 0.1486770064f * b[7] - 0.0993225873f * b[21]; + out[9] += ta * b[10] + tb * a[10]; + out[10] += ta * b[9] + tb * a[9]; + t = a[9] * b[10] + a[10] * b[9]; + out[7] += 0.1486770064f * t; + out[21] -= 0.0993225873f * t; + + ta = -0.0940315947f * a[8] + 0.1332552284f * a[22] + 0.1175200641f * a[24]; + tb = -0.0940315947f * b[8] + 0.1332552284f * b[22] + 0.1175200641f * b[24]; + out[9] += ta * b[11] + tb * a[11]; + out[11] += ta * b[9] + tb * a[9]; + t = a[9] * b[11] + a[11] * b[9]; + out[8] -= 0.0940315947f * t; + out[22] += 0.1332552284f * t; + out[24] += 0.1175200641f * t; + + ta = -0.0940315947f * a[4] - 0.1175200641f * a[16] + 0.1332552284f * a[18]; + tb = -0.0940315947f * b[4] - 0.1175200641f * b[16] + 0.1332552284f * b[18]; + out[9] += ta * b[13] + tb * a[13]; + out[13] += ta * b[9] + tb * a[9]; + t = a[9] * b[13] + a[13] * b[9]; + out[4] -= 0.0940315947f * t; + out[16] -= 0.1175200641f * t; + out[18] += 0.1332552284f * t; + + ta = 0.1486770064f * a[5] - 0.0993225873f * a[19]; + tb = 0.1486770064f * b[5] - 0.0993225873f * b[19]; + out[9] += ta * b[14] + tb * a[14]; + out[14] += ta * b[9] + tb * a[9]; + t = a[9] * b[14] + a[14] * b[9]; + out[5] += 0.1486770064f * t; + out[19] -= 0.0993225873f * t; + + ta = 0.1628675014f * a[2] - 0.2035507262f * a[12]; + tb = 0.1628675014f * b[2] - 0.2035507262f * b[12]; + out[9] += ta * b[17] + tb * a[17]; + out[17] += ta * b[9] + tb * a[9]; + t = a[9] * b[17] + a[17] * b[9]; + out[2] += 0.1628675014f * t; + out[12] -= 0.2035507262f * t; + + ta = 0.2820948064f * a[0] - 0.1795148700f * a[20] - 0.1517177522f * a[24]; + tb = 0.2820948064f * b[0] - 0.1795148700f * b[20] - 0.1517177522f * b[24]; + out[10] += ta * b[10] + tb * a[10]; + t = a[10] * b[10]; + out[0] += 0.2820948064f * t; + out[20] -= 0.1795148700f * t; + out[24] -= 0.1517177522f * t; + + ta = 0.1151647195f * a[7] + 0.1025799215f * a[21] - 0.0678502396f * a[23]; + tb = 0.1151647195f * b[7] + 0.1025799215f * b[21] - 0.0678502396f * b[23]; + out[10] += ta * b[11] + tb * a[11]; + out[11] += ta * b[10] + tb * a[10]; + t = a[10] * b[11] + a[11] * b[10]; + out[7] += 0.1151647195f * t; + out[21] += 0.1025799215f * t; + out[23] -= 0.0678502396f * t; + + ta = -0.1880631894f * a[4] - 0.0444184095f * a[18]; + tb = -0.1880631894f * b[4] - 0.0444184095f * b[18]; + out[10] += ta * b[12] + tb * a[12]; + out[12] += ta * b[10] + tb * a[10]; + t = a[10] * b[12] + a[12] * b[10]; + out[4] -= 0.1880631894f * t; + out[18] -= 0.0444184095f * t; + + ta = 0.1151647195f * a[5] + 0.0678502396f * a[17] + 0.1025799215f * a[19]; + tb = 0.1151647195f * b[5] + 0.0678502396f * b[17] + 0.1025799215f * b[19]; + out[10] += ta * b[13] + tb * a[13]; + out[13] += ta * b[10] + tb * a[10]; + t = a[10] * b[13] + a[13] * b[10]; + out[5] += 0.1151647195f * t; + out[17] += 0.0678502396f * t; + out[19] += 0.1025799215f * t; + + ta = 0.1517177522f * a[16]; + tb = 0.1517177522f * b[16]; + out[10] += ta * b[14] + tb * a[14]; + out[14] += ta * b[10] + tb * a[10]; + t = a[10] * b[14] + a[14] * b[10]; + out[16] += 0.1517177522f * t; + + ta = -0.1486770064f * a[5] + 0.0993225873f * a[19]; + tb = -0.1486770064f * b[5] + 0.0993225873f * b[19]; + out[10] += ta * b[15] + tb * a[15]; + out[15] += ta * b[10] + tb * a[10]; + t = a[10] * b[15] + a[15] * b[10]; + out[5] -= 0.1486770064f * t; + out[19] += 0.0993225873f * t; + + ta = 0.2820948064f * a[0] + 0.1261566281f * a[6] - 0.1456731260f * a[8] + 0.0256449804f * a[20] - 0.1146878451f * a[22]; + tb = 0.2820948064f * b[0] + 0.1261566281f * b[6] - 0.1456731260f * b[8] + 0.0256449804f * b[20] - 0.1146878451f * b[22]; + out[11] += ta * b[11] + tb * a[11]; + t = a[11] * b[11]; + out[0] += 0.2820948064f * t; + out[6] += 0.1261566281f * t; + out[8] -= 0.1456731260f * t; + out[20] += 0.0256449804f * t; + out[22] -= 0.1146878451f * t; + + ta = 0.0678502396f * a[17]; + tb = 0.0678502396f * b[17]; + out[11] += ta * b[14] + tb * a[14]; + out[14] += ta * b[11] + tb * a[11]; + t = a[11] * b[14] + a[14] * b[11]; + out[17] += 0.0678502396f * t; + + ta = -0.1175200641f * a[16]; + tb = -0.1175200641f * b[16]; + out[11] += ta * b[15] + tb * a[15]; + out[15] += ta * b[11] + tb * a[11]; + t = a[11] * b[15] + a[15] * b[11]; + out[16] -= 0.1175200641f * t; + + ta = 0.1685838848f * a[3] + 0.1146878451f * a[13] - 0.1332552284f * a[15]; + tb = 0.1685838848f * b[3] + 0.1146878451f * b[13] - 0.1332552284f * b[15]; + out[11] += ta * b[18] + tb * a[18]; + out[18] += ta * b[11] + tb * a[11]; + t = a[11] * b[18] + a[18] * b[11]; + out[3] += 0.1685838848f * t; + out[13] += 0.1146878451f * t; + out[15] -= 0.1332552284f * t; + + ta = 0.2384136170f * a[2] - 0.1025799215f * a[14] + 0.0993225873f * a[12]; + tb = 0.2384136170f * b[2] - 0.1025799215f * b[14] + 0.0993225873f * b[12]; + out[11] += ta * b[19] + tb * a[19]; + out[19] += ta * b[11] + tb * a[11]; + t = a[11] * b[19] + a[19] * b[11]; + out[2] += 0.2384136170f * t; + out[14] -= 0.1025799215f * t; + out[12] += 0.0993225873f * t; + + ta = 0.2820948064f * a[0] + 0.1682088524f * a[6] + 0.1538699120f * a[20]; + tb = 0.2820948064f * b[0] + 0.1682088524f * b[6] + 0.1538699120f * b[20]; + out[12] += ta * b[12] + tb * a[12]; + t = a[12] * b[12]; + out[0] += 0.2820948064f * t; + out[6] += 0.1682088524f * t; + out[20] += 0.1538699120f * t; + + ta = -0.1880631894f * a[8] - 0.0444184095f * a[22]; + tb = -0.1880631894f * b[8] - 0.0444184095f * b[22]; + out[12] += ta * b[14] + tb * a[14]; + out[14] += ta * b[12] + tb * a[12]; + t = a[12] * b[14] + a[14] * b[12]; + out[8] -= 0.1880631894f * t; + out[22] -= 0.0444184095f * t; + + ta = 0.2820948064f * a[0] + 0.1456731260f * a[8] + 0.1261566281f * a[6] + 0.0256449804f * a[20] + 0.1146878451f * a[22]; + tb = 0.2820948064f * b[0] + 0.1456731260f * b[8] + 0.1261566281f * b[6] + 0.0256449804f * b[20] + 0.1146878451f * b[22]; + out[13] += ta * b[13] + tb * a[13]; + t = a[13] * b[13]; + out[0] += 0.2820948064f * t; + out[8] += 0.1456731260f * t; + out[6] += 0.1261566281f * t; + out[20] += 0.0256449804f * t; + out[22] += 0.1146878451f * t; + + ta = 0.0678502396f * a[23]; + tb = 0.0678502396f * b[23]; + out[13] += ta * b[14] + tb * a[14]; + out[14] += ta * b[13] + tb * a[13]; + t = a[13] * b[14] + a[14] * b[13]; + out[23] += 0.0678502396f * t; + + ta = -0.0940315947f * a[8] + 0.1332552284f * a[22] - 0.1175200641f * a[24]; + tb = -0.0940315947f * b[8] + 0.1332552284f * b[22] - 0.1175200641f * b[24]; + out[13] += ta * b[15] + tb * a[15]; + out[15] += ta * b[13] + tb * a[13]; + t = a[13] * b[15] + a[15] * b[13]; + out[8] -= 0.0940315947f * t; + out[22] += 0.1332552284f * t; + out[24] -= 0.1175200641f * t; + + ta = 0.2384136170f * a[2] + 0.0993225873f * a[12] + 0.1025799215f * a[14]; + tb = 0.2384136170f * b[2] + 0.0993225873f * b[12] + 0.1025799215f * b[14]; + out[13] += ta * b[21] + tb * a[21]; + out[21] += ta * b[13] + tb * a[13]; + t = a[13] * b[21] + a[21] * b[13]; + out[2] += 0.2384136170f * t; + out[12] += 0.0993225873f * t; + out[14] += 0.1025799215f * t; + + ta = 0.2820948064f * a[0] - 0.1795148700f * a[20] + 0.1517177522f * a[24]; + tb = 0.2820948064f * b[0] - 0.1795148700f * b[20] + 0.1517177522f * b[24]; + out[14] += ta * b[14] + tb * a[14]; + t = a[14] * b[14]; + out[0] += 0.2820948064f * t; + out[20] -= 0.1795148700f * t; + out[24] += 0.1517177522f * t; + + ta = 0.1486770064f * a[7] - 0.0993225873f * a[21]; + tb = 0.1486770064f * b[7] - 0.0993225873f * b[21]; + out[14] += ta * b[15] + tb * a[15]; + out[15] += ta * b[14] + tb * a[14]; + t = a[14] * b[15] + a[15] * b[14]; + out[7] += 0.1486770064f * t; + out[21] -= 0.0993225873f * t; + + ta = 0.2820948064f * a[0] - 0.2102610469f * a[6] + 0.0769349411f * a[20]; + tb = 0.2820948064f * b[0] - 0.2102610469f * b[6] + 0.0769349411f * b[20]; + out[15] += ta * b[15] + tb * a[15]; + t = a[15] * b[15]; + out[0] += 0.2820948064f * t; + out[6] -= 0.2102610469f * t; + out[20] += 0.0769349411f * t; + + ta = -0.2035507262f * a[12] + 0.1628675014f * a[2]; + tb = -0.2035507262f * b[12] + 0.1628675014f * b[2]; + out[15] += ta * b[23] + tb * a[23]; + out[23] += ta * b[15] + tb * a[15]; + t = a[15] * b[23] + a[23] * b[15]; + out[12] -= 0.2035507262f * t; + out[2] += 0.1628675014f * t; + + ta = 0.2820948064f * a[0] - 0.2293756902f * a[6] + 0.1065253094f * a[20]; + tb = 0.2820948064f * b[0] - 0.2293756902f * b[6] + 0.1065253094f * b[20]; + out[16] += ta * b[16] + tb * a[16]; + t = a[16] * b[16]; + out[0] += 0.2820948064f * t; + out[6] -= 0.2293756902f * t; + out[20] += 0.1065253094f * t; + + ta = -0.0750808194f * a[8] + 0.1350454688f * a[22]; + tb = -0.0750808194f * b[8] + 0.1350454688f * b[22]; + out[16] += ta * b[18] + tb * a[18]; + out[18] += ta * b[16] + tb * a[16]; + t = a[16] * b[18] + a[18] * b[16]; + out[8] -= 0.0750808194f * t; + out[22] += 0.1350454688f * t; + + ta = -0.1190989092f * a[19] + 0.1404633522f * a[5]; + tb = -0.1190989092f * b[19] + 0.1404633522f * b[5]; + out[16] += ta * b[23] + tb * a[23]; + out[23] += ta * b[16] + tb * a[16]; + t = a[16] * b[23] + a[23] * b[16]; + out[19] -= 0.1190989092f * t; + out[5] += 0.1404633522f * t; + + ta = 0.2820948064f * a[0] - 0.0573439226f * a[6] - 0.1597879529f * a[20]; + tb = 0.2820948064f * b[0] - 0.0573439226f * b[6] - 0.1597879529f * b[20]; + out[17] += ta * b[17] + tb * a[17]; + t = a[17] * b[17]; + out[0] += 0.2820948064f * t; + out[6] -= 0.0573439226f * t; + out[20] -= 0.1597879529f * t; + + ta = -0.1126212254f * a[8] + 0.0450151563f * a[22] + 0.1190989092f * a[24]; + tb = -0.1126212254f * b[8] + 0.0450151563f * b[22] + 0.1190989092f * b[24]; + out[17] += ta * b[19] + tb * a[19]; + out[19] += ta * b[17] + tb * a[17]; + t = a[17] * b[19] + a[19] * b[17]; + out[8] -= 0.1126212254f * t; + out[22] += 0.0450151563f * t; + out[24] += 0.1190989092f * t; + + ta = -0.1190989092f * a[16] - 0.1126212254f * a[4] + 0.0450151563f * a[18]; + tb = -0.1190989092f * b[16] - 0.1126212254f * b[4] + 0.0450151563f * b[18]; + out[17] += ta * b[21] + tb * a[21]; + out[21] += ta * b[17] + tb * a[17]; + t = a[17] * b[21] + a[21] * b[17]; + out[16] -= 0.1190989092f * t; + out[4] -= 0.1126212254f * t; + out[18] += 0.0450151563f * t; + + ta = 0.0655359104f * a[6] + 0.2820948064f * a[0] - 0.0836984515f * a[20] - 0.1350454688f * a[24]; + tb = 0.0655359104f * b[6] + 0.2820948064f * b[0] - 0.0836984515f * b[20] - 0.1350454688f * b[24]; + out[18] += ta * b[18] + tb * a[18]; + t = a[18] * b[18]; + out[6] += 0.0655359104f * t; + out[0] += 0.2820948064f * t; + out[20] -= 0.0836984515f * t; + out[24] -= 0.1350454688f * t; + + ta = 0.0902978629f * a[7] + 0.1020847857f * a[21] - 0.0450151563f * a[23]; + tb = 0.0902978629f * b[7] + 0.1020847857f * b[21] - 0.0450151563f * b[23]; + out[18] += ta * b[19] + tb * a[19]; + out[19] += ta * b[18] + tb * a[18]; + t = a[18] * b[19] + a[19] * b[18]; + out[7] += 0.0902978629f * t; + out[21] += 0.1020847857f * t; + out[23] -= 0.0450151563f * t; + + ta = 0.1392638087f * a[6] - 0.1418894082f * a[8] + 0.2820948064f * a[0] + 0.0684805512f * a[20] - 0.1020847857f * a[22]; + tb = 0.1392638087f * b[6] - 0.1418894082f * b[8] + 0.2820948064f * b[0] + 0.0684805512f * b[20] - 0.1020847857f * b[22]; + out[19] += ta * b[19] + tb * a[19]; + t = a[19] * b[19]; + out[6] += 0.1392638087f * t; + out[8] -= 0.1418894082f * t; + out[0] += 0.2820948064f * t; + out[20] += 0.0684805512f * t; + out[22] -= 0.1020847857f * t; + + ta = 0.1638398021f * a[6] + 0.2820948064f * a[0]; + tb = 0.1638398021f * b[6] + 0.2820948064f * b[0]; + out[20] += ta * b[20] + tb * a[20]; + t = a[20] * b[20]; + out[6] += 0.1638398021f * t; + out[0] += 0.2820948064f * t; + out[20] += 0.1369611323f * t; + + ta = 0.1392638087f * a[6] + 0.0684805512f * a[20] + 0.2820948064f * a[0] + 0.1418894082f * a[8] + 0.1020847857f * a[22]; + tb = 0.1392638087f * b[6] + 0.0684805512f * b[20] + 0.2820948064f * b[0] + 0.1418894082f * b[8] + 0.1020847857f * b[22]; + out[21] += ta * b[21] + tb * a[21]; + t = a[21] * b[21]; + out[6] += 0.1392638087f * t; + out[20] += 0.0684805512f * t; + out[0] += 0.2820948064f * t; + out[8] += 0.1418894082f * t; + out[22] += 0.1020847857f * t; + + ta = -0.1126212254f * a[8] + 0.0450151563f * a[22] - 0.1190989092f * a[24]; + tb = -0.1126212254f * b[8] + 0.0450151563f * b[22] - 0.1190989092f * b[24]; + out[21] += ta * b[23] + tb * a[23]; + out[23] += ta * b[21] + tb * a[21]; + t = a[21] * b[23] + a[23] * b[21]; + out[8] -= 0.1126212254f * t; + out[22] += 0.0450151563f * t; + out[24] -= 0.1190989092f * t; + + ta = 0.0655359104f * a[6] - 0.0836984515f * a[20] + 0.2820948064f * a[0] + 0.1350454688f * a[24]; + tb = 0.0655359104f * b[6] - 0.0836984515f * b[20] + 0.2820948064f * b[0] + 0.1350454688f * b[24]; + out[22] += ta * b[22] + tb * a[22]; + t = a[22] * b[22]; + out[6] += 0.0655359104f * t; + out[20] -= 0.0836984515f * t; + out[0] += 0.2820948064f * t; + out[24] += 0.1350454688f * t; + + ta = -0.0573439226f * a[6] - 0.1597879529f * a[20] + 0.2820948064f * a[0]; + tb = -0.0573439226f * b[6] - 0.1597879529f * b[20] + 0.2820948064f * b[0]; + out[23] += ta * b[23] + tb * a[23]; + t = a[23] * b[23]; + out[6] -= 0.0573439226f * t; + out[20] -= 0.1597879529f * t; + out[0] += 0.2820948064f * t; + + ta = -0.2293756902f * a[6] + 0.2820948064f * a[0] + 0.1065253094f * a[20]; + tb = -0.2293756902f * b[6] + 0.2820948064f * b[0] + 0.1065253094f * b[20]; + out[24] += ta * b[24] + tb * a[24]; + t = a[24] * b[24]; + out[6] -= 0.2293756902f * t; + out[0] += 0.2820948064f * t; + out[20] += 0.1065253094f * t; + + return out; +} + static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in) { out[0] = in[0]; diff --git a/dlls/d3dx9_36/tests/math.c b/dlls/d3dx9_36/tests/math.c index 9d90662..de29450 100644 --- a/dlls/d3dx9_36/tests/math.c +++ b/dlls/d3dx9_36/tests/math.c @@ -3142,6 +3142,60 @@ static void test_D3DXSHMultiply4(void) ok(relative_error(c[i], expected[40 + i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[40 + i], c[i]); } +static void test_D3DXSHMultiply5(void) +{ + unsigned int i; + FLOAT a[30], b[30], c[30]; + /* D3DXSHMultiply5 only modifies the first 25 elements of the array */ + const FLOAT expected[] = + { /* c, a, b */ + 22.711451f, 2.882128f, 22.552637f, 16.430542f, 3.138507f, 5.190427f, + 12.287176f, 21.527941f, 4.956343f, 3.981998f, 5.665860f, 6.968652f, + 4.103147f, 14.564260f, 7.673847f, 2.091416f, 5.015005f, 6.404927f, + 8.255578f, 8.174298f, 0.632518f, 8.451941f, 6.848452f, 0.352981f, + 1.093912f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, + /* c, c, b */ + -1597321984.0f, -17.438896f, -12706934.0f, -703589.8125f, 3120106.75f, -5817931.0f, + 325112704.0f, -14045543.0f, -21412776.0f, -125394.679688f, -194438.375f, -827160.625f, + -1033827.6875f, -4951222.500000f, -6767728.50f, -5143970.0f, -8582170.0f, -15876184.0f, + -33210720.0f, -74493456.0f, -185191664.0f, -219981824.0f, -419315744.0f, -323124160.0f, + -807225984.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, + /* c, c, c */ + 0.270428f, -0.883795f, 0.062812f, 0.044687f, 0.037827f, 0.040280f, + -0.131082f, 0.043207f, -0.209910f, 0.355438f, 0.289053f, 0.315026f, + -0.360699f, -0.266331f, -0.342106f, -0.389738f, 0.192350f, 0.162355f, + 0.128752f, 0.172589f, 0.092927f, 0.038116f, 0.161411f, 0.021998f, + 0.088022f, 1.25f, 1.3f, 1.35f, 1.4f, 1.45f, }; + + for (i = 0; i < 30; i++) + { + a[i] = 1.0f + i / 100.0f; + b[i] = 3.0f - i / 100.0f; + c[i] = i; + } + + D3DXSHMultiply5(c, a, b); + for (i = 0; i < 30; i++) + ok(relative_error(c[i], expected[i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[i], c[i]); + + for (i = 0; i < 30; i++) + { + b[i] = 3.0f - i / 100.0f; + c[i] = i; + } + + D3DXSHMultiply5(c, c, b); + for (i = 0; i < 30; i++) + ok(relative_error(c[i], expected[30 + i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[30 + i], c[i]); + + for (i = 0; i < 30; i++) + c[i] = 0.05f * i; + + D3DXSHMultiply5(c, c, c); + for (i = 0; i < 30; i++) + ok(relative_error(c[i], expected[60 + i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[60 + i], c[i]); +} + static void test_D3DXSHRotate(void) { D3DXMATRIX m[4]; @@ -3399,6 +3453,7 @@ START_TEST(math) test_D3DXSHMultiply2(); test_D3DXSHMultiply3(); test_D3DXSHMultiply4(); + test_D3DXSHMultiply5(); test_D3DXSHRotate(); test_D3DXSHRotateZ(); test_D3DXSHScale(); diff --git a/include/d3dx9math.h b/include/d3dx9math.h index 36c9df0..3498f84 100644 --- a/include/d3dx9math.h +++ b/include/d3dx9math.h @@ -396,6 +396,7 @@ HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOA FLOAT* WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b); FLOAT* WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b); FLOAT* WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b); +FLOAT* WINAPI D3DXSHMultiply5(FLOAT *out, const FLOAT *a, const FLOAT *b); FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in); FLOAT* WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in); FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale); -- 1.7.10.4