From: Daniel Lehman Subject: [PATCH] make_unicode: Change handling of Turkish i to match Windows Message-Id: <6a002e96d93944db98f8fa16f194e430@RED-INF-MXMB-P1.esri.com> Date: Thu, 29 Sep 2016 20:44:46 +0000 From 825c50aa4f9f55ab8162b6c11ab85a07b0bf9cdd Mon Sep 17 00:00:00 2001 From: Daniel Lehman Date: Wed, 28 Sep 2016 14:45:43 -0700 Subject: [PATCH] make_unicode: Change handling of Turkish i to match Windows Windows does not lowercase the Turkish dotted i (0x130) as 'i' (0x69) It also does not uppercase the small dotless I (0x131) to 'I' (0x49) Wine does the conversion for both, but Windows leaves them as-is the change here is to make Wine mimic Windows for the Turkish i Signed-off-by: Daniel Lehman --- dlls/msvcrt/tests/string.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ tools/make_unicode | 4 +++ 2 files changed, 74 insertions(+) diff --git a/dlls/msvcrt/tests/string.c b/dlls/msvcrt/tests/string.c index 8e78695..dbe0103 100644 --- a/dlls/msvcrt/tests/string.c +++ b/dlls/msvcrt/tests/string.c @@ -88,6 +88,12 @@ static errno_t (__cdecl *p_mbslwr_s)(unsigned char *str, size_t numberOfElements static int (__cdecl *p_wctob)(wint_t); static size_t (__cdecl *p_wcrtomb)(char*, wchar_t, mbstate_t*); static int (__cdecl *p_tolower)(int); +static wint_t (__cdecl *p_towlower)(wint_t); +static wint_t (__cdecl *p__towlower_l)(wint_t,_locale_t); +static wint_t (__cdecl *p_towupper)(wint_t); +static wint_t (__cdecl *p__towupper_l)(wint_t,_locale_t); +static _locale_t (__cdecl *p__create_locale)(int,const char*); +static void (__cdecl *p__free_locale)(_locale_t); static size_t (__cdecl *p_mbrlen)(const char*, size_t, mbstate_t*); static size_t (__cdecl *p_mbrtowc)(wchar_t*, const char*, size_t, mbstate_t*); static int (__cdecl *p__atodbl_l)(_CRT_DOUBLE*,char*,_locale_t); @@ -3113,6 +3119,63 @@ static void test__mbscmp(void) ok(ret == 1, "got %d\n", ret); } +static void test_casemap(void) +{ + int i; + wint_t ret; + _locale_t locale; + const char *locales[] = {"Turkish_Turkey.1254", "C"}; + + /* test case mappings for certain characters + where Windows deviates from Unicode */ + + for (i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) + { + if(!setlocale(LC_ALL, locales[i])) + { + win_skip("locale %s not available. skipping\n", locales[i]); + continue; + } + + ret = p_towlower('I'); + ok(ret == 'i', "ret = %x\n", ret); + + ret = p_towupper('i'); + ok(ret == 'I', "ret = %x\n", ret); + + /* Turkish capital dotted i - should lowercase to 'i' 0x69 */ + ret = p_towlower(0x130); + ok(ret == 0x130, "ret = %x\n", ret); + + /* Turkish small dotless i - should uppercase to 'I' 0x49 */ + ret = p_towupper(0x131); + ok(ret == 0x131, "ret = %x\n", ret); + } + + if (!p__towlower_l || !p__towupper_l || !p__create_locale) + { + win_skip("_towlower_l/_towupper_l/_create_locale not available\n"); + return; + } + + for (i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) + { + if (!(locale = p__create_locale(LC_ALL, locales[i]))) + { + win_skip("locale %s not available. skipping\n", locales[i]); + continue; + } + + ret = p__towlower_l(0x130, locale); + ok(ret == 0x130, "ret = %x\n", ret); + + ret = p__towupper_l(0x131, locale); + ok(ret == 0x131, "ret = %x\n", ret); + + p__free_locale(locale); + } +} + START_TEST(string) { char mem[100]; @@ -3157,6 +3220,12 @@ START_TEST(string) p_wctob = (void*)GetProcAddress(hMsvcrt, "wctob"); p_wcrtomb = (void*)GetProcAddress(hMsvcrt, "wcrtomb"); p_tolower = (void*)GetProcAddress(hMsvcrt, "tolower"); + p_towlower = (void*)GetProcAddress(hMsvcrt, "towlower"); + p__towlower_l = (void*)GetProcAddress(hMsvcrt, "_towlower_l"); + p_towupper = (void*)GetProcAddress(hMsvcrt, "towupper"); + p__towupper_l = (void*)GetProcAddress(hMsvcrt, "_towupper_l"); + p__create_locale = (void*)GetProcAddress(hMsvcrt, "_create_locale"); + p__free_locale = (void*)GetProcAddress(hMsvcrt, "_free_locale"); p_mbrlen = (void*)GetProcAddress(hMsvcrt, "mbrlen"); p_mbrtowc = (void*)GetProcAddress(hMsvcrt, "mbrtowc"); p_mbsrtowcs = (void*)GetProcAddress(hMsvcrt, "mbsrtowcs"); @@ -3227,4 +3296,5 @@ START_TEST(string) test__strnset_s(); test__wcsset_s(); test__mbscmp(); + test_casemap(); } diff --git a/tools/make_unicode b/tools/make_unicode index a0bcf8f..10821b8 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -542,6 +542,10 @@ sub READ_DEFAULTS($) my $flag = $ctype{$cat}; foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; } } + + # Windows does not convert these characters + $tolower_table[0x0130] = 0x0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE (Turkish) + $toupper_table[0x0131] = 0x0131; # LATIN SMALL LETTER DOTLESS I (Turkish) } -- 1.9.5