From: Alex Henrie Subject: kernel32: Add UTF-7 support Message-Id: <20120430232041.d1f5f789c03796cb88381ba1@gmail.com> Date: Mon, 30 Apr 2012 23:20:41 -0600 Fixes bug 27388. The MSDN documentation of these behaviors can be found at http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx dlls/kernel32/tests/locale.c already has some UTF-7 tests. If you think more tests are needed, please be very specific when you tell me what kind of tests you'd like to see. --- dlls/kernel32/locale.c | 40 ++++-- include/wine/unicode.h | 2 + libs/wine/Makefile.in | 1 + libs/wine/utf7.c | 340 ++++++++++++++++++++++++++++++++++++++++++++++++ libs/wine/wine.map | 2 + 5 files changed, 374 insertions(+), 11 deletions(-) create mode 100644 libs/wine/utf7.c diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c index b506f15..f9c70fb 100644 --- a/dlls/kernel32/locale.c +++ b/dlls/kernel32/locale.c @@ -1878,7 +1878,7 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen, const union cptable *table; int ret; - if (!src || (!dst && dstlen)) + if (!src || !srclen || (!dst && dstlen)) { SetLastError( ERROR_INVALID_PARAMETER ); return 0; @@ -1889,17 +1889,21 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen, switch(page) { case CP_SYMBOL: - if( flags) + if (flags) { - SetLastError( ERROR_INVALID_PARAMETER ); + SetLastError( ERROR_INVALID_FLAGS ); return 0; } ret = wine_cpsymbol_mbstowcs( src, srclen, dst, dstlen ); break; case CP_UTF7: - FIXME("UTF-7 not supported\n"); - SetLastError( ERROR_CALL_NOT_IMPLEMENTED ); - return 0; + if (flags) + { + SetLastError( ERROR_INVALID_FLAGS ); + return 0; + } + ret = wine_utf7_mbstowcs( src, srclen, dst, dstlen ); + break; case CP_UNIXCP: if (unix_cptable) { @@ -1969,7 +1973,7 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen, const union cptable *table; int ret, used_tmp; - if (!src || (!dst && dstlen)) + if (!src || !srclen || (!dst && dstlen)) { SetLastError( ERROR_INVALID_PARAMETER ); return 0; @@ -1980,17 +1984,31 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen, switch(page) { case CP_SYMBOL: - if( flags || defchar || used) + if (defchar || used) { SetLastError( ERROR_INVALID_PARAMETER ); return 0; } + if (flags) + { + SetLastError( ERROR_INVALID_FLAGS ); + return 0; + } ret = wine_cpsymbol_wcstombs( src, srclen, dst, dstlen ); break; case CP_UTF7: - FIXME("UTF-7 not supported\n"); - SetLastError( ERROR_CALL_NOT_IMPLEMENTED ); - return 0; + if (defchar || used) + { + SetLastError( ERROR_INVALID_PARAMETER ); + return 0; + } + if (flags) + { + SetLastError( ERROR_INVALID_FLAGS ); + return 0; + } + ret = wine_utf7_wcstombs( src, srclen, dst, dstlen ); + break; case CP_UNIXCP: if (unix_cptable) { diff --git a/include/wine/unicode.h b/include/wine/unicode.h index 35c6166..1827a8e 100644 --- a/include/wine/unicode.h +++ b/include/wine/unicode.h @@ -94,6 +94,8 @@ extern int wine_cp_wcstombs( const union cptable *table, int flags, char *dst, int dstlen, const char *defchar, int *used ); extern int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen ); extern int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen ); +extern INT wine_utf7_mbstowcs( LPCSTR pszUtf7, INT cchUtf7, LPWSTR pszWide, INT cchWide ); +extern INT wine_utf7_wcstombs( LPCWSTR pszWide, INT cchWide, LPSTR pszUtf7, INT cchUtf7 ); extern int wine_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int dstlen ); extern int wine_utf8_wcstombs( int flags, const WCHAR *src, int srclen, char *dst, int dstlen ); diff --git a/libs/wine/Makefile.in b/libs/wine/Makefile.in index ca93d26..431fafc 100644 --- a/libs/wine/Makefile.in +++ b/libs/wine/Makefile.in @@ -88,6 +88,7 @@ C_SRCS = \ port.c \ sortkey.c \ string.c \ + utf7.c \ utf8.c \ wctomb.c \ wctype.c diff --git a/libs/wine/utf7.c b/libs/wine/utf7.c new file mode 100644 index 0000000..7b67b2a --- /dev/null +++ b/libs/wine/utf7.c @@ -0,0 +1,340 @@ +/* + * UTF-7 support routines + * + * Copyright 2010 Katayama Hirofumi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "wine/unicode.h" +#include + +static const signed char +base64inv[] = +{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 +}; + +static VOID Utf7Base64Decode(BYTE *pbDest, LPCSTR pszSrc, INT cchSrc) +{ + INT i, j, n; + BYTE b; + + for(i = 0; i < cchSrc / 4 * 4; i += 4) + { + for(j = n = 0; j < 4; ) + { + b = (BYTE) base64inv[(BYTE) *pszSrc++]; + n |= (((INT) b) << ((3 - j) * 6)); + j++; + } + for(j = 0; j < 3; j++) + *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF); + } + for(j = n = 0; j < cchSrc % 4; ) + { + b = (BYTE) base64inv[(BYTE) *pszSrc++]; + n |= (((INT) b) << ((3 - j) * 6)); + j++; + } + for(j = 0; j < ((cchSrc % 4) * 6 / 8); j++) + *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF); +} + +static VOID myswab(LPVOID pv, INT cw) +{ + LPBYTE pb = (LPBYTE) pv; + BYTE b; + while(cw > 0) + { + b = *pb; + *pb = pb[1]; + pb[1] = b; + pb += 2; + cw--; + } +} + +static INT Utf7ToWideCharSize(LPCSTR pszUtf7, INT cchUtf7) +{ + INT n, c, cch; + CHAR ch; + LPCSTR pch; + + c = 0; + while(cchUtf7 > 0) + { + ch = *pszUtf7++; + if (ch == '+') + { + ch = *pszUtf7; + if (ch == '-') + { + c++; + pszUtf7++; + cchUtf7 -= 2; + continue; + } + cchUtf7--; + pch = pszUtf7; + while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 && + base64inv[(size_t)*pszUtf7] >= 0) + { + cchUtf7--; + pszUtf7++; + } + cch = pszUtf7 - pch; + n = (cch * 3) / 8; + c += n; + if (cchUtf7 > 0 && *pszUtf7 == '-') + { + pszUtf7++; + cchUtf7--; + } + } + else + { + c++; + cchUtf7--; + } + } + + return c; +} + +INT wine_utf7_mbstowcs(LPCSTR pszUtf7, INT cchUtf7, LPWSTR pszWide, INT cchWide) +{ + INT n, c, cch; + CHAR ch; + LPCSTR pch; + WORD *pwsz; + + c = Utf7ToWideCharSize(pszUtf7, cchUtf7); + if (cchWide == 0) + return c; + + if (cchWide < c) + { + return -1; + } + + while(cchUtf7 > 0) + { + ch = *pszUtf7++; + if (ch == '+') + { + if (*pszUtf7 == '-') + { + *pszWide++ = L'+'; + pszUtf7++; + cchUtf7 -= 2; + continue; + } + cchUtf7--; + pch = pszUtf7; + while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 && + base64inv[(size_t)*pszUtf7] >= 0) + { + cchUtf7--; + pszUtf7++; + } + cch = pszUtf7 - pch; + n = (cch * 3) / 8; + pwsz = (WORD *) malloc((n + 1) * sizeof(WORD)); + if (pwsz == NULL) + return 0; + ZeroMemory(pwsz, n * sizeof(WORD)); + Utf7Base64Decode((BYTE *) pwsz, pch, cch); + myswab(pwsz, n); + CopyMemory(pszWide, pwsz, n * sizeof(WORD)); + free(pwsz); + pszWide += n; + if (cchUtf7 > 0 && *pszUtf7 == '-') + { + pszUtf7++; + cchUtf7--; + } + } + else + { + *pszWide++ = (WCHAR) ch; + cchUtf7--; + } + } + + return c; +} + +static const char mustshift[] = +{ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 +}; + +static const char base64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static INT WideCharToUtf7Size(LPCWSTR pszWide, INT cchWide) +{ + WCHAR wch; + INT c = 0; + BOOL fShift = FALSE; + + while(cchWide > 0) + { + wch = *pszWide; + if (wch < 0x80 && !mustshift[wch]) + { + c++; + cchWide--; + pszWide++; + } + else + { + if (wch == L'+') + { + c++; + c++; + cchWide--; + pszWide++; + continue; + } + if (!fShift) + { + c++; + fShift = TRUE; + } + pszWide++; + cchWide--; + c += 3; + if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide])) + { + pszWide++; + cchWide--; + c += 3; + if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide])) + { + pszWide++; + cchWide--; + c += 2; + } + } + if (cchWide > 0 && *pszWide < 0x80 && !mustshift[*pszWide]) + { + c++; + fShift = FALSE; + } + } + } + if (fShift) + c++; + + return c; +} + +INT wine_utf7_wcstombs(LPCWSTR pszWide, INT cchWide, LPSTR pszUtf7, INT cchUtf7) +{ + WCHAR wch; + INT c, n; + WCHAR wsz[3] = {0}; + BOOL fShift = FALSE; + + c = WideCharToUtf7Size(pszWide, cchWide); + if (cchUtf7 == 0) + return c; + + if (cchUtf7 < c) + { + return -1; + } + + while(cchWide > 0) + { + wch = *pszWide; + if (wch < 0x80 && !mustshift[wch]) + { + *pszUtf7++ = (CHAR) wch; + cchWide--; + pszWide++; + } + else + { + if (wch == L'+') + { + *pszUtf7++ = '+'; + *pszUtf7++ = '-'; + cchWide--; + pszWide++; + continue; + } + if (!fShift) + { + *pszUtf7++ = '+'; + fShift = TRUE; + } + wsz[0] = *pszWide++; + cchWide--; + n = 1; + if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide])) + { + wsz[1] = *pszWide++; + cchWide--; + n++; + if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide])) + { + wsz[2] = *pszWide++; + cchWide--; + n++; + } + } + *pszUtf7++ = base64[wsz[0] >> 10]; + *pszUtf7++ = base64[(wsz[0] >> 4) & 0x3F]; + *pszUtf7++ = base64[(wsz[0] << 2 | wsz[1] >> 14) & 0x3F]; + if (n >= 2) + { + *pszUtf7++ = base64[(wsz[1] >> 8) & 0x3F]; + *pszUtf7++ = base64[(wsz[1] >> 2) & 0x3F]; + *pszUtf7++ = base64[(wsz[1] << 4 | wsz[2] >> 12) & 0x3F]; + if (n >= 3) + { + *pszUtf7++ = base64[(wsz[2] >> 6) & 0x3F]; + *pszUtf7++ = base64[wsz[2] & 0x3F]; + } + } + if (cchWide > 0 && *pszWide < 0x80 && !mustshift[*pszWide]) + { + *pszUtf7++ = '-'; + fShift = FALSE; + } + } + } + if (fShift) + *pszUtf7 = '-'; + + return c; +} diff --git a/libs/wine/wine.map b/libs/wine/wine.map index 2159fac..3e0e6db 100644 --- a/libs/wine/wine.map +++ b/libs/wine/wine.map @@ -114,6 +114,8 @@ WINE_1.0 wine_set_fs; wine_set_gs; wine_switch_to_stack; + wine_utf7_mbstowcs; + wine_utf7_wcstombs; wine_utf8_mbstowcs; wine_utf8_wcstombs; wine_wctype_table; -- 1.7.5.4