From: Damjan Jovanovic Subject: winex11.drv: import X11's "text/html" as "HTML Format" (try 3) Message-Id: Date: Fri, 8 Aug 2014 20:05:54 +0200 Implements proper importing of "text/html" into Windows's "HTML Format" which fixes pasting rich text into a large number of apps and closes #7372. Try 3 doesn't use libxml as requested by Alexandre (I still prefer it - much simpler and shorter to use and probably parses more correctly), correctly deals with both null terminated and non-terminated text/html data, and correctly calculates the lengths of Firefox's UTF-16LE strings. Damjan Jovanovic commit 381abb6b6d195c5bbe27bb1308fd507b82743a1d Author: Damjan Jovanovic Date: Sat Jul 19 14:37:08 2014 +0200 winex11.drv: import X11's "text/html" as "HTML Format" diff --git a/dlls/winex11.drv/clipboard.c b/dlls/winex11.drv/clipboard.c index b2705b4..f08eee3 100644 --- a/dlls/winex11.drv/clipboard.c +++ b/dlls/winex11.drv/clipboard.c @@ -148,6 +148,7 @@ static HANDLE X11DRV_CLIPBOARD_ImportImageBmp(Display *d, Window w, Atom prop); static HANDLE X11DRV_CLIPBOARD_ImportXAString(Display *d, Window w, Atom prop); static HANDLE X11DRV_CLIPBOARD_ImportUTF8(Display *d, Window w, Atom prop); static HANDLE X11DRV_CLIPBOARD_ImportCompoundText(Display *d, Window w, Atom prop); +static HANDLE X11DRV_CLIPBOARD_ImportTextHtml(Display *display, Window w, Atom prop); static HANDLE X11DRV_CLIPBOARD_ImportTextUriList(Display *display, Window w, Atom prop); static HANDLE X11DRV_CLIPBOARD_ExportClipboardData(Display *display, Window requestor, Atom aTarget, Atom rprop, LPWINE_CLIPDATA lpData, LPDWORD lpBytes); @@ -343,10 +344,11 @@ void X11DRV_InitClipboard(void) X11DRV_CLIPBOARD_InsertClipboardFormat( RegisterClipboardFormatW(PropertyFormatMap[i].lpszFormat), GET_ATOM(PropertyFormatMap[i].prop)); - /* Set up a conversion function from "HTML Format" to "text/html" */ + /* Set up a conversion function between "HTML Format" and "text/html" */ format = X11DRV_CLIPBOARD_InsertClipboardFormat( RegisterClipboardFormatW(wszHTMLFormat), GET_ATOM(XATOM_text_html)); format->lpDrvExportFunc = X11DRV_CLIPBOARD_ExportTextHtml; + format->lpDrvImportFunc = X11DRV_CLIPBOARD_ImportTextHtml; } @@ -1568,6 +1570,207 @@ static HANDLE X11DRV_CLIPBOARD_ImportEnhMetaFile(Display *display, Window w, Ato } +static char* read_and_standardize_text_html(Display *display, Window w, Atom prop) +{ + char *textHtml; + unsigned long textHtmlLen; + BOOL needHtmlTag; + BOOL needBodyTag; + int startOfMarkup; + char *fullHtml = NULL; + + if (!X11DRV_CLIPBOARD_ReadProperty(display, w, prop, (LPBYTE*)&textHtml, &textHtmlLen)) + return 0; + + /* Firefox uses UTF-16LE with byte order mark. Convert to UTF-8 without the BOM. */ + if (textHtmlLen >= 2 && ((BYTE*)textHtml)[0] == 0xff && ((BYTE*)textHtml)[1] == 0xfe) + { + char *textHtmlUtf8; + INT size = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)&textHtml[2], (textHtmlLen-2)/2, NULL, 0, NULL, NULL); + textHtmlUtf8 = HeapAlloc(GetProcessHeap(), 0, size); + if (textHtmlUtf8) + { + WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)&textHtml[2], (textHtmlLen-2)/2, textHtmlUtf8, size, NULL, NULL); + HeapFree(GetProcessHeap(), 0, textHtml); + textHtml = textHtmlUtf8; + textHtmlLen = size; + } + else + { + ERR("out of memory\n"); + goto end; + } + } + + /* Sometimes textHtml is null terminated. */ + while (textHtmlLen > 0 && textHtml[textHtmlLen-1] == '\0') { + --textHtmlLen; + } + + /* While HTML fragments are supposed to be valid in Windows, some apps only want + * to paste a complete HTML document. So if we got an HTML fragment, complete it. */ + needHtmlTag = FALSE; + needBodyTag = FALSE; + if (textHtmlLen >= 7 && strncasecmp(&textHtml[textHtmlLen - 7], "", 7)) + { + int i; + needHtmlTag = TRUE; + needBodyTag = TRUE; + for (i = textHtmlLen - 7; i > 0; i--) + { + if (!strncasecmp(&textHtml[i], "", 7)) + { + needBodyTag = FALSE; + break; + } + } + } + for (startOfMarkup = 0; startOfMarkup < textHtmlLen; startOfMarkup++) + { + if (textHtml[startOfMarkup] == '<' && + startOfMarkup + 1 < textHtmlLen && + textHtml[startOfMarkup+1] != '!' && /* or */ + textHtml[startOfMarkup+1] != '/') /* */ + { + if (memchr(&textHtml[startOfMarkup+1], '>', textHtmlLen - startOfMarkup - 1)) + break; + } + } + if (startOfMarkup == textHtmlLen) + { + ERR("text/html contents invalid\n"); + goto end; + } + fullHtml = HeapAlloc(GetProcessHeap(), 0, textHtmlLen + + (needBodyTag ? 6 + 7 : 0) + (needHtmlTag ? 6 + 7 : 0) + 1); + if (fullHtml) + { + int next; + memcpy(fullHtml, textHtml, startOfMarkup); + next = startOfMarkup; + if (needHtmlTag) + { + memcpy(&fullHtml[next], "", 6); + next += 6; + } + if (needBodyTag) + { + memcpy(&fullHtml[next], "", 6); + next += 6; + } + memcpy(&fullHtml[next], &textHtml[startOfMarkup], textHtmlLen - startOfMarkup); + next += textHtmlLen - startOfMarkup; + if (needBodyTag) + { + memcpy(&fullHtml[next], "", 7); + next += 7; + } + if (needHtmlTag) + { + memcpy(&fullHtml[next], "", 7); + next += 7; + } + fullHtml[next] = '\0'; + } + else + ERR("out of memory\n"); + +end: + HeapFree(GetProcessHeap(), 0, textHtml); + return fullHtml; +} + + +/************************************************************************** + * X11DRV_CLIPBOARD_ImportTextHtml + * + * Import text/html into "HTML Format". + */ +static HANDLE X11DRV_CLIPBOARD_ImportTextHtml(Display *display, Window w, Atom prop) +{ + static const char *startFragment = ""; + static const char *endFragment = ""; + char *textHtml = NULL; + int bodyStart = -1; + int bodyEnd = -1; + char description[256]; + HGLOBAL hClipData = NULL; + int i; + + textHtml = read_and_standardize_text_html(display, w, prop); + if (textHtml == NULL) + goto end; + + for (i = 0; textHtml[i]; i++) + { + if (strncasecmp(&textHtml[i], "", 6) == 0) + { + bodyStart = i + 6; + break; + } + } + if (bodyStart < 0) + { + ERR("HTML doesn't have \n"); + goto end; + } + + for (i = strlen(textHtml) - 1; i >= bodyStart; i--) + { + if (strncasecmp(&textHtml[i], "", 7) == 0) + { + bodyEnd = i; + break; + } + } + if (bodyEnd < 0) + { + ERR("HTML doesn't have \n"); + goto end; + } + + snprintf(description, sizeof(description), + "Version:0.9\n" /* 12 */ + "StartHTML:%010u\n" /* 21 */ + "EndHTML:%010u\n" /* 19 */ + "StartFragment:%010u\n" /* 25 */ + "EndFragment:%010u\n", /* 23 */ + 100, + 100 + (UINT)(strlen(textHtml) + strlen(startFragment) + strlen(endFragment)), + 100 + (UINT)(bodyStart + strlen(startFragment)), + 100 + (UINT)(strlen(startFragment) + bodyEnd)); + hClipData = GlobalAlloc(GMEM_MOVEABLE | GMEM_DDESHARE, + strlen(description) + strlen(textHtml) + strlen(startFragment) + strlen(endFragment) + 1); + if (hClipData) + { + char *htmlFormat; + char *next; + htmlFormat = GlobalLock(hClipData); + next = htmlFormat; + strcpy(next, description); + next += strlen(description); + memcpy(next, textHtml, bodyStart); + next += bodyStart; + memcpy(next, startFragment, strlen(startFragment)); + next += strlen(startFragment); + memcpy(next, &textHtml[bodyStart], bodyEnd - bodyStart); + next += (bodyEnd - bodyStart); + memcpy(next, endFragment, strlen(endFragment)); + next += strlen(endFragment); + memcpy(next, &textHtml[bodyEnd], strlen(textHtml) - bodyEnd); + next += (strlen(textHtml) - bodyEnd); + *next = 0; + GlobalUnlock(hClipData); + } + else + ERR("out of memory\n"); + +end: + HeapFree(GetProcessHeap(), 0, textHtml); + return hClipData; +} + + /************************************************************************** * X11DRV_CLIPBOARD_ImportTextUriList *