From: Thomas Faber Subject: [2/2] ntdll: Implement RtlUTF8ToUnicodeN. Message-Id: <555E6C78.4080708@reactos.org> Date: Thu, 21 May 2015 19:38:32 -0400 From 50e5976eac7132f3db223d4c055f431e77f21fcf Mon Sep 17 00:00:00 2001 From: Thomas Faber Date: Thu, 21 May 2015 19:29:07 -0400 Subject: ntdll: Implement RtlUTF8ToUnicodeN. --- dlls/ntdll/ntdll.spec | 1 + dlls/ntdll/rtlstr.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 0 deletions(-) diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 316f875..9959733 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -935,6 +935,7 @@ @ stdcall RtlUpperString(ptr ptr) @ stub RtlUsageHeap @ cdecl -i386 -norelay RtlUshortByteSwap() NTDLL_RtlUshortByteSwap +@ stdcall RtlUTF8ToUnicodeN(ptr long ptr ptr long) @ stdcall RtlValidAcl(ptr) # @ stub RtlValidRelativeSecurityDescriptor @ stdcall RtlValidSecurityDescriptor(ptr) diff --git a/dlls/ntdll/rtlstr.c b/dlls/ntdll/rtlstr.c index 5286b81..4b65c2d 100644 --- a/dlls/ntdll/rtlstr.c +++ b/dlls/ntdll/rtlstr.c @@ -2274,3 +2274,140 @@ NTSTATUS WINAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max, *utf8_bytes_written = written; return status; } + +/****************************************************************************** + * RtlUTF8ToUnicodeN [NTDLL.@] + */ +NTSTATUS WINAPI RtlUTF8ToUnicodeN(WCHAR *uni_dest, ULONG uni_bytes_max, + ULONG *uni_bytes_written, + const CHAR *utf8_src, ULONG utf8_bytes) +{ + NTSTATUS status; + ULONG i, j; + ULONG written; + ULONG ch; + ULONG utf8_trail_bytes; + WCHAR utf16_ch[3]; + ULONG utf16_ch_len; + + if (!utf8_src) + return STATUS_INVALID_PARAMETER_4; + if (!uni_bytes_written) + return STATUS_INVALID_PARAMETER; + + written = 0; + status = STATUS_SUCCESS; + + for (i = 0; i < utf8_bytes; i++) + { + /* read UTF-8 lead byte */ + ch = (BYTE)utf8_src[i]; + utf8_trail_bytes = 0; + if (ch >= 0xf5) + { + ch = 0xfffd; + status = STATUS_SOME_NOT_MAPPED; + } + else if (ch >= 0xf0) + { + ch &= 0x07; + utf8_trail_bytes = 3; + } + else if (ch >= 0xe0) + { + ch &= 0x0f; + utf8_trail_bytes = 2; + } + else if (ch >= 0xc2) + { + ch &= 0x1f; + utf8_trail_bytes = 1; + } + else if (ch >= 0x80) + { + /* overlong or trail byte */ + ch = 0xfffd; + status = STATUS_SOME_NOT_MAPPED; + } + + /* read UTF-8 trail bytes */ + if (i + utf8_trail_bytes < utf8_bytes) + { + for (j = 0; j < utf8_trail_bytes; j++) + { + if ((utf8_src[i + 1] & 0xc0) == 0x80) + { + ch <<= 6; + ch |= utf8_src[i + 1] & 0x3f; + i++; + } + else + { + ch = 0xfffd; + utf8_trail_bytes = 0; + status = STATUS_SOME_NOT_MAPPED; + break; + } + } + } + else + { + ch = 0xfffd; + utf8_trail_bytes = 0; + status = STATUS_SOME_NOT_MAPPED; + i = utf8_bytes; + } + + /* encode ch as UTF-16 */ + if ((ch > 0x10ffff) || + (ch >= 0xd800 && ch <= 0xdfff) || + (utf8_trail_bytes == 2 && ch < 0x00800) || + (utf8_trail_bytes == 3 && ch < 0x10000)) + { + /* invalid codepoint or overlong encoding */ + utf16_ch[0] = 0xfffd; + utf16_ch[1] = 0xfffd; + utf16_ch[2] = 0xfffd; + utf16_ch_len = utf8_trail_bytes; + status = STATUS_SOME_NOT_MAPPED; + } + else if (ch >= 0x10000) + { + /* surrogate pair */ + ch -= 0x010000; + utf16_ch[0] = 0xd800 + (ch >> 10 & 0x3ff); + utf16_ch[1] = 0xdc00 + (ch >> 0 & 0x3ff); + utf16_ch_len = 2; + } + else + { + /* single unit */ + utf16_ch[0] = ch; + utf16_ch_len = 1; + } + + if (!uni_dest) + { + written += utf16_ch_len; + continue; + } + + for (j = 0; j < utf16_ch_len; j++) + { + if (uni_bytes_max >= sizeof(WCHAR)) + { + *uni_dest++ = utf16_ch[j]; + uni_bytes_max -= sizeof(WCHAR); + written++; + } + else + { + uni_bytes_max = 0; + status = STATUS_BUFFER_TOO_SMALL; + } + } + } + + *uni_bytes_written = written * sizeof(WCHAR); + return status; +} -- 1.7.1