From: Piotr Caban Subject: [PATCH 4/6 v2] msxml3: Reimplement node_get_text helper so it uses information about ignorred white space nodes Message-Id: <564ECE01.2030203@codeweavers.com> Date: Fri, 20 Nov 2015 08:38:41 +0100 Signed-off-by: Piotr Caban --- dlls/msxml3/node.c | 47 +++++++++++++++++++++++++++++++++++++++------- dlls/msxml3/tests/domdoc.c | 4 ++-- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/dlls/msxml3/node.c b/dlls/msxml3/node.c index 8914e05..17e3838 100644 --- a/dlls/msxml3/node.c +++ b/dlls/msxml3/node.c @@ -742,7 +742,7 @@ static inline xmlChar* trim_whitespace(xmlChar* str) return ret; } -static xmlChar* do_get_text(xmlNodePtr node) +static xmlChar* do_get_text(xmlNodePtr node, BOOL trim, BOOL *trail_ig_ws) { xmlNodePtr child; xmlChar* str; @@ -751,20 +751,42 @@ static xmlChar* do_get_text(xmlNodePtr node) if (!node->children) { str = xmlNodeGetContent(node); + *trail_ig_ws = *(DWORD*)&node->_private & NODE_PRIV_CHILD_IGNORABLE_WS; } else { - xmlElementType prev_type = XML_TEXT_NODE; + BOOL ig_ws = FALSE; xmlChar* tmp; str = xmlStrdup(BAD_CAST ""); + + if (node->type != XML_DOCUMENT_NODE) + ig_ws = *(DWORD*)&node->_private & NODE_PRIV_CHILD_IGNORABLE_WS; + *trail_ig_ws = FALSE; + for (child = node->children; child != NULL; child = child->next) { switch (child->type) { case XML_ELEMENT_NODE: - tmp = do_get_text(child); + tmp = do_get_text(child, FALSE, trail_ig_ws); break; case XML_TEXT_NODE: + tmp = xmlNodeGetContent(child); + if (!preserving && tmp[0]) + { + xmlChar *beg; + + for (beg = tmp; *beg; beg++) + if (!isspace(*beg)) break; + + if (!*beg) + { + ig_ws = TRUE; + xmlFree(tmp); + tmp = NULL; + } + break; + } case XML_CDATA_SECTION_NODE: case XML_ENTITY_REF_NODE: case XML_ENTITY_NODE: @@ -779,14 +801,24 @@ static xmlChar* do_get_text(xmlNodePtr node) { if (*tmp) { - if (prev_type == XML_ELEMENT_NODE && child->type == XML_ELEMENT_NODE) + if (ig_ws && str[0]) str = xmlStrcat(str, BAD_CAST " "); str = xmlStrcat(str, tmp); - prev_type = child->type; + ig_ws = FALSE; } xmlFree(tmp); } + + if (!ig_ws) + { + ig_ws = *(DWORD*)&child->_private & NODE_PRIV_TRAILING_IGNORABLE_WS; + } + if (!ig_ws) + ig_ws = *trail_ig_ws; + *trail_ig_ws = FALSE; } + + *trail_ig_ws = ig_ws; } switch (node->type) @@ -797,7 +829,7 @@ static xmlChar* do_get_text(xmlNodePtr node) case XML_ENTITY_NODE: case XML_DOCUMENT_NODE: case XML_DOCUMENT_FRAG_NODE: - if (!preserving) + if (trim && !preserving) str = trim_whitespace(str); break; default: @@ -811,10 +843,11 @@ HRESULT node_get_text(const xmlnode *This, BSTR *text) { BSTR str = NULL; xmlChar *content; + BOOL tmp; if (!text) return E_INVALIDARG; - content = do_get_text(This->node); + content = do_get_text(This->node, TRUE, &tmp); if (content) { str = bstr_from_xmlChar(content); diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index 00a24b6..4201bac 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -4174,7 +4174,7 @@ static inline void _check_ws_ignored(int line, const char *ver, IXMLDOMDocument2 helper_ole_check_ver(IXMLDOMNode_get_text(node1, &bstr)); if (str) { - todo_wine helper_expect_bstr_and_release(bstr, str); + helper_expect_bstr_and_release(bstr, str); } else { @@ -4217,7 +4217,7 @@ static inline void _check_ws_preserved(int line, const char *ver, IXMLDOMDocumen } else { - todo_wine helper_expect_bstr_and_release(bstr, "\n This is a description. \n "); + helper_expect_bstr_and_release(bstr, "\n This is a description. \n "); } IXMLDOMNode_Release(node1);