From: Alex Henrie Subject: [PATCH 3/3] kernel32/tests: Add tests for UTF-7 conversion. (try 2) Message-Id: <20141005215215.595164a2c9d20f49086100df@gmail.com> Date: Sun, 5 Oct 2014 21:52:15 -0600 --- dlls/kernel32/tests/codepage.c | 219 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/dlls/kernel32/tests/codepage.c b/dlls/kernel32/tests/codepage.c index 8423c75..67927a1 100644 --- a/dlls/kernel32/tests/codepage.c +++ b/dlls/kernel32/tests/codepage.c @@ -412,6 +412,223 @@ static void test_string_conversion(LPBOOL bUsedDefaultChar) ok(GetLastError() == 0xdeadbeef, "GetLastError() is %u\n", GetLastError()); } +static void test_utf16_to_utf7(WCHAR* utf16_actual, char* utf7_expected, int utf7_expected_len) +{ + char c_buffer[1024]; + int len; + + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, utf16_actual, -1, c_buffer, sizeof(c_buffer), NULL, NULL); + ok(len == utf7_expected_len && strcmp(c_buffer, utf7_expected) == 0, + "src=%s dst=\"%s\" len=%i\n", wine_dbgstr_w(utf16_actual), c_buffer, len); +} + +static void test_utf7_to_utf16(char* utf7_actual, WCHAR* utf16_expected, int utf16_expected_len) +{ + WCHAR w_buffer[1024]; + int len; + + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, utf7_actual, -1, w_buffer, sizeof(w_buffer) / sizeof(WCHAR)); + ok(len == utf16_expected_len && winetest_strcmpW(w_buffer, utf16_expected) == 0, + "src=\"%s\" dst=%s len=%i\n", utf7_actual, wine_dbgstr_w(w_buffer), len); +} + +static void test_utf7_string_conversion(void) +{ + /* tests which one-byte characters are base64-encoded and which are not */ + WCHAR example_0_utf16[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19, + 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35, + 36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51, + 52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67, + 68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83, + 84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99, + 100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123, + 124,125,126,127,128,129,130,131,132,133,134,135, + 136,137,138,139,140,141,142,143,144,145,146,147, + 148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171, + 172,173,174,175,176,177,178,179,180,181,182,183, + 184,185,186,187,188,189,190,191,192,193,194,195, + 196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219, + 220,221,222,223,224,225,226,227,228,229,230,231, + 232,233,234,235,236,237,238,239,240,241,242,243, + 244,245,246,247,248,249,250,251,252,253,254,255, + 256,0}; /* throw in 256 for good measure */ + char example_0_utf7[] = "+AAEAAgADAAQABQAGAAcACA-\t\n+AAsADA-\r+AA4ADwAQABE" + "AEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAf- +ACEAIgAjAC" + "QAJQAm-'()+ACo-+-,-./0123456789:+ADsAPAA9AD4-?+AEA" + "-ABCDEFGHIJKLMNOPQRSTUVWXYZ+AFsAXABdAF4AXwBg-abcde" + "fghijklmnopqrstuvwxyz+AHsAfAB9AH4AfwCAAIEAggCDAIQA" + "hQCGAIcAiACJAIoAiwCMAI0AjgCPAJAAkQCSAJMAlACVAJYAlw" + "CYAJkAmgCbAJwAnQCeAJ8AoAChAKIAowCkAKUApgCnAKgAqQCq" + "AKsArACtAK4ArwCwALEAsgCzALQAtQC2ALcAuAC5ALoAuwC8AL" + "0AvgC/AMAAwQDCAMMAxADFAMYAxwDIAMkAygDLAMwAzQDOAM8A" + "0ADRANIA0wDUANUA1gDXANgA2QDaANsA3ADdAN4A3wDgAOEA4g" + "DjAOQA5QDmAOcA6ADpAOoA6wDsAO0A7gDvAPAA8QDyAPMA9AD1" + "APYA9wD4APkA+gD7APwA/QD+AP8BAA-"; + + /* this string is the Unicode for "5 + (++x) " + it tests: + - a + before a non-base64 character + - a + between two non-directly-encodable characters + - a + before a base64 character + - a base64 character before a non-base64 character */ + WCHAR example_1_utf16[] = {'5',' ','+',' ','(',0x0391,'+',0x0392,'+','x',')',' ',0x2260,' ',0x0391,0x0392,0}; + char example_1_utf7[] = "5 +- (+A5E-+-+A5I-+-x) +ImA- +A5EDkg-"; + + /* tests some invalid UTF-16 */ + /* (stray lead surrogate) */ + WCHAR example_2_utf16[] = {0xD801,0}; + char example_2_utf7[] = "+2AE-"; + + /* tests some more invalid UTF-16 */ + /* (codepoint does not exist) */ + WCHAR example_3_utf16[] = {0xFF00,0}; + char example_3_utf7[] = "+/wA-"; + + /* tests a UTF-7 sequence implicitly terminated by a non-base64 ASCII character*/ + char example_4_utf7[] = "+T2A hello"; + WCHAR example_4_utf16[] = {0x4F60,' ','h','e','l','l','o',0}; + + /* tests a UTF-7 sequence implicitly terminated by a non-ASCII character*/ + char example_5_utf7[] = "+T2A\xFFhello"; + WCHAR example_5_utf16[] = {0x4F60,0x00FF,'h','e','l','l','o',0}; + + /* tests a + sign immediately followed by a non-base64 character */ + /* (decoding should simply remove the + sign) */ + char example_6_utf7[] = "+ hello"; + WCHAR example_6_utf16[] = {' ','h','e','l','l','o',0}; + + /* tests some invalid UTF-7 */ + /* (number of bits in base64 sequence is not a multiple of 16 and the last bit is a 1) */ + char example_7_utf7[] = "+T2B-hello"; + WCHAR example_7_utf16[] = {0x4F60,'h','e','l','l','o',0}; + + /* tests some more invalid UTF-7 */ + /* (number of bits in base64 sequence is a multiple of 8 but not a multiple of 16) */ + char example_8_utf7[] = "+T2BZ-hello"; + WCHAR example_8_utf16[] = {0x4F60,'h','e','l','l','o',0}; + + /* tests UTF-7 followed by characters that should be encoded but aren't */ + char example_9_utf7[] = "+T2BZ-\x82\xFE"; + WCHAR example_9_utf16[] = {0x4F60,0x0082,0x00FE,0}; + + /* tests a null char before the end of the buffer */ + WCHAR example_10_utf16[] = {'a',0,'b',0}; + char example_10_utf7[] = "a\0b"; + + /* tests a buffer that runs out while not encoding a UTF-7 sequence */ + /* additionally tests srclen < strlen(src) */ + WCHAR example_11_utf16[] = {'h','e','l','l','o',0}; + + /* tests a buffer that runs out while not decoding a UTF-7 sequence */ + /* additionally tests srclen < strlen(src) */ + char example_12_utf7[] = "hello"; + + /* tests a buffer that runs out in the middle of encoding a UTF-7 sequence */ + WCHAR example_13_utf16[] = {0x4F60,0x597D,0}; + + /* tests a buffer that runs out in the middle of decoding a UTF-7 sequence */ + char example_14_utf7[] = "+T2BZfQ-"; + + char c_buffer[1024]; + WCHAR w_buffer[1024]; + int len; + + + /* WideCharToMultiByte tests */ + test_utf16_to_utf7(example_0_utf16, example_0_utf7, sizeof(example_0_utf7)); + test_utf16_to_utf7(example_1_utf16, example_1_utf7, sizeof(example_1_utf7)); + test_utf16_to_utf7(example_2_utf16, example_2_utf7, sizeof(example_2_utf7)); + test_utf16_to_utf7(example_3_utf16, example_3_utf7, sizeof(example_3_utf7)); + + /* MultiByteToWideChar tests */ + test_utf7_to_utf16(example_0_utf7, example_0_utf16, sizeof(example_0_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_1_utf7, example_1_utf16, sizeof(example_1_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_2_utf7, example_2_utf16, sizeof(example_2_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_3_utf7, example_3_utf16, sizeof(example_3_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_4_utf7, example_4_utf16, sizeof(example_4_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_5_utf7, example_5_utf16, sizeof(example_5_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_6_utf7, example_6_utf16, sizeof(example_6_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_7_utf7, example_7_utf16, sizeof(example_7_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_8_utf7, example_8_utf16, sizeof(example_8_utf16) / sizeof(WCHAR)); + test_utf7_to_utf16(example_9_utf7, example_9_utf16, sizeof(example_9_utf16) / sizeof(WCHAR)); + + + /* 4 tests to just compute the required length if dstlen is 0 */ + len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -1, NULL, 0, NULL, NULL); + ok(len == sizeof(example_0_utf7), "len=%i\n", len); + + len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -1, c_buffer, 0, NULL, NULL); + ok(len == sizeof(example_0_utf7), "len=%i\n", len); + + len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -1, NULL, 0); + ok(len == sizeof(example_0_utf16) / sizeof(WCHAR), "len=%i\n", len); + + len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -1, w_buffer, 0); + ok(len == sizeof(example_0_utf16) / sizeof(WCHAR), "len=%i\n", len); + + /* 2 more tests to check what happens if srclen < -1 */ + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -2, c_buffer, sizeof(c_buffer), NULL, NULL); + ok(len == sizeof(example_0_utf7) && strcmp(c_buffer, example_0_utf7) == 0, + "len=%i dst=\"%s\"\n", len, c_buffer); + + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -2, w_buffer, sizeof(w_buffer) / sizeof(WCHAR)); + ok(len == sizeof(example_0_utf16) / sizeof(WCHAR) && winetest_strcmpW(w_buffer, example_0_utf16) == 0, + "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer)); + + + /* example_10_utf16 and example_10_utf7 should convert to each other */ + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, example_10_utf16, sizeof(example_10_utf16) / sizeof(WCHAR), c_buffer, sizeof(c_buffer), NULL, NULL); + ok(len == sizeof(example_10_utf7) && c_buffer[0] == 'a' && c_buffer[1] == 0 && c_buffer[2] == 'b' && c_buffer[3] == 0, + "len=%i dst=\"%s\"\n", len, c_buffer); + + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, example_10_utf7, sizeof(example_10_utf7), w_buffer, sizeof(w_buffer) / sizeof(WCHAR)); + ok(len == sizeof(example_10_utf16) / sizeof(WCHAR) && w_buffer[0] == 'a' && w_buffer[1] == 0 && w_buffer[2] == 'b' && w_buffer[3] == 0, + "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer)); + + + /* example_11_utf16 with dstlen=2 should write two UTF-7 characters and stop without null-terminating */ + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, example_11_utf16, -1, c_buffer, 2, NULL, NULL); + ok(len == 0 && c_buffer[0] == 'h' && c_buffer[1] == 'e' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer); + + /* example_11_utf16 with srclen=2 should write two UTF-7 characters and stop without null-terminating */ + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, example_11_utf16, 2, c_buffer, sizeof(c_buffer), NULL, NULL); + ok(len == 2 && c_buffer[0] == 'h' && c_buffer[1] == 'e' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer); + + + /* example_12_utf7 with dstlen=2 should write two UTF-16 characters and stop without null-terminating */ + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, example_12_utf7, -1, w_buffer, 2); + ok(len == 0 && w_buffer[0] == 'h' && w_buffer[1] == 'e' && w_buffer[2] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer)); + + /* example_12_utf7 with srclen=2 should write two UTF-16 characters and stop without null-terminating */ + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, example_12_utf7, 2, w_buffer, sizeof(w_buffer) / sizeof(WCHAR)); + ok(len == 2 && w_buffer[0] == 'h' && w_buffer[1] == 'e' && w_buffer[2] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer)); + + + /* example_13_utf16 with dstlen=2 should write two UTF-16 characters and stop without null-terminating */ + memset(c_buffer, '#', sizeof(c_buffer)); + len = WideCharToMultiByte(CP_UTF7, 0, example_13_utf16, -1, c_buffer, 2, NULL, NULL); + ok(len == 0 && c_buffer[0] == '+' && c_buffer[1] == 'T' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer); + + + /* example_14_utf7 with dstlen=1 should write one UTF-16 character and stop without null-terminating */ + memset(w_buffer, '#', sizeof(w_buffer)); + len = MultiByteToWideChar(CP_UTF7, 0, example_14_utf7, -1, w_buffer, 1); + ok(len == 0 && w_buffer[0] == 0x4F60 && w_buffer[1] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer)); +} + static void test_undefined_byte_char(void) { static const struct tag_testset { @@ -618,6 +835,8 @@ START_TEST(codepage) test_string_conversion(NULL); test_string_conversion(&bUsedDefaultChar); + test_utf7_string_conversion(); + test_undefined_byte_char(); test_threadcp(); } -- 2.1.2