1 /*
2 * Url functions
3 *
4 * Copyright 2000 Huw D M Davies for CodeWeavers.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "config.h"
22 #include "wine/port.h"
23 #include <stdarg.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winnls.h"
29 #include "winerror.h"
30 #include "wine/unicode.h"
31 #include "wininet.h"
32 #include "winreg.h"
33 #include "winternl.h"
34 #define NO_SHLWAPI_STREAM
35 #include "shlwapi.h"
36 #include "wine/debug.h"
37
38 HMODULE WINAPI MLLoadLibraryW(LPCWSTR,HMODULE,DWORD);
39 BOOL WINAPI MLFreeLibrary(HMODULE);
40 HRESULT WINAPI MLBuildResURLW(LPCWSTR,HMODULE,DWORD,LPCWSTR,LPWSTR,DWORD);
41
42 WINE_DEFAULT_DEBUG_CHANNEL(shell);
43
44 /* The following schemes were identified in the native version of
45 * SHLWAPI.DLL version 5.50
46 */
47 static const struct {
48 URL_SCHEME scheme_number;
49 WCHAR scheme_name[12];
50 } shlwapi_schemes[] = {
51 {URL_SCHEME_FTP, {'f','t','p',0}},
52 {URL_SCHEME_HTTP, {'h','t','t','p',0}},
53 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}},
54 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}},
55 {URL_SCHEME_NEWS, {'n','e','w','s',0}},
56 {URL_SCHEME_NNTP, {'n','n','t','p',0}},
57 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}},
58 {URL_SCHEME_WAIS, {'w','a','i','s',0}},
59 {URL_SCHEME_FILE, {'f','i','l','e',0}},
60 {URL_SCHEME_MK, {'m','k',0}},
61 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}},
62 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}},
63 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}},
64 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}},
65 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}},
66 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}},
67 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}},
68 {URL_SCHEME_RES, {'r','e','s',0}},
69 };
70
71 typedef struct {
72 LPCWSTR pScheme; /* [out] start of scheme */
73 DWORD szScheme; /* [out] size of scheme (until colon) */
74 LPCWSTR pUserName; /* [out] start of Username */
75 DWORD szUserName; /* [out] size of Username (until ":" or "@") */
76 LPCWSTR pPassword; /* [out] start of Password */
77 DWORD szPassword; /* [out] size of Password (until "@") */
78 LPCWSTR pHostName; /* [out] start of Hostname */
79 DWORD szHostName; /* [out] size of Hostname (until ":" or "/") */
80 LPCWSTR pPort; /* [out] start of Port */
81 DWORD szPort; /* [out] size of Port (until "/" or eos) */
82 LPCWSTR pQuery; /* [out] start of Query */
83 DWORD szQuery; /* [out] size of Query (until eos) */
84 } WINE_PARSE_URL;
85
86 typedef enum {
87 SCHEME,
88 HOST,
89 PORT,
90 USERPASS,
91 } WINE_URL_SCAN_TYPE;
92
93 static const CHAR hexDigits[] = "0123456789ABCDEF";
94
95 static const WCHAR fileW[] = {'f','i','l','e','\0'};
96
97 static const unsigned char HashDataLookup[256] = {
98 0x01, 0x0E, 0x6E, 0x19, 0x61, 0xAE, 0x84, 0x77, 0x8A, 0xAA, 0x7D, 0x76, 0x1B,
99 0xE9, 0x8C, 0x33, 0x57, 0xC5, 0xB1, 0x6B, 0xEA, 0xA9, 0x38, 0x44, 0x1E, 0x07,
100 0xAD, 0x49, 0xBC, 0x28, 0x24, 0x41, 0x31, 0xD5, 0x68, 0xBE, 0x39, 0xD3, 0x94,
101 0xDF, 0x30, 0x73, 0x0F, 0x02, 0x43, 0xBA, 0xD2, 0x1C, 0x0C, 0xB5, 0x67, 0x46,
102 0x16, 0x3A, 0x4B, 0x4E, 0xB7, 0xA7, 0xEE, 0x9D, 0x7C, 0x93, 0xAC, 0x90, 0xB0,
103 0xA1, 0x8D, 0x56, 0x3C, 0x42, 0x80, 0x53, 0x9C, 0xF1, 0x4F, 0x2E, 0xA8, 0xC6,
104 0x29, 0xFE, 0xB2, 0x55, 0xFD, 0xED, 0xFA, 0x9A, 0x85, 0x58, 0x23, 0xCE, 0x5F,
105 0x74, 0xFC, 0xC0, 0x36, 0xDD, 0x66, 0xDA, 0xFF, 0xF0, 0x52, 0x6A, 0x9E, 0xC9,
106 0x3D, 0x03, 0x59, 0x09, 0x2A, 0x9B, 0x9F, 0x5D, 0xA6, 0x50, 0x32, 0x22, 0xAF,
107 0xC3, 0x64, 0x63, 0x1A, 0x96, 0x10, 0x91, 0x04, 0x21, 0x08, 0xBD, 0x79, 0x40,
108 0x4D, 0x48, 0xD0, 0xF5, 0x82, 0x7A, 0x8F, 0x37, 0x69, 0x86, 0x1D, 0xA4, 0xB9,
109 0xC2, 0xC1, 0xEF, 0x65, 0xF2, 0x05, 0xAB, 0x7E, 0x0B, 0x4A, 0x3B, 0x89, 0xE4,
110 0x6C, 0xBF, 0xE8, 0x8B, 0x06, 0x18, 0x51, 0x14, 0x7F, 0x11, 0x5B, 0x5C, 0xFB,
111 0x97, 0xE1, 0xCF, 0x15, 0x62, 0x71, 0x70, 0x54, 0xE2, 0x12, 0xD6, 0xC7, 0xBB,
112 0x0D, 0x20, 0x5E, 0xDC, 0xE0, 0xD4, 0xF7, 0xCC, 0xC4, 0x2B, 0xF9, 0xEC, 0x2D,
113 0xF4, 0x6F, 0xB6, 0x99, 0x88, 0x81, 0x5A, 0xD9, 0xCA, 0x13, 0xA5, 0xE7, 0x47,
114 0xE6, 0x8E, 0x60, 0xE3, 0x3E, 0xB3, 0xF6, 0x72, 0xA2, 0x35, 0xA0, 0xD7, 0xCD,
115 0xB4, 0x2F, 0x6D, 0x2C, 0x26, 0x1F, 0x95, 0x87, 0x00, 0xD8, 0x34, 0x3F, 0x17,
116 0x25, 0x45, 0x27, 0x75, 0x92, 0xB8, 0xA3, 0xC8, 0xDE, 0xEB, 0xF8, 0xF3, 0xDB,
117 0x0A, 0x98, 0x83, 0x7B, 0xE5, 0xCB, 0x4C, 0x78, 0xD1 };
118
119 static DWORD get_scheme_code(LPCWSTR scheme, DWORD scheme_len)
120 {
121 unsigned int i;
122
123 for(i=0; i < sizeof(shlwapi_schemes)/sizeof(shlwapi_schemes[0]); i++) {
124 if(scheme_len == strlenW(shlwapi_schemes[i].scheme_name)
125 && !memcmp(scheme, shlwapi_schemes[i].scheme_name, scheme_len*sizeof(WCHAR)))
126 return shlwapi_schemes[i].scheme_number;
127 }
128
129 return URL_SCHEME_UNKNOWN;
130 }
131
132 /*************************************************************************
133 * @ [SHLWAPI.1]
134 *
135 * Parse a Url into its constituent parts.
136 *
137 * PARAMS
138 * x [I] Url to parse
139 * y [O] Undocumented structure holding the parsed information
140 *
141 * RETURNS
142 * Success: S_OK. y contains the parsed Url details.
143 * Failure: An HRESULT error code.
144 */
145 HRESULT WINAPI ParseURLA(LPCSTR x, PARSEDURLA *y)
146 {
147 WCHAR scheme[INTERNET_MAX_SCHEME_LENGTH];
148 DWORD cnt, len;
149
150 y->nScheme = URL_SCHEME_INVALID;
151 if (y->cbSize != sizeof(*y)) return E_INVALIDARG;
152 /* FIXME: leading white space generates error of 0x80041001 which
153 * is undefined
154 */
155 if (*x <= ' ') return 0x80041001;
156 cnt = 0;
157 y->cchProtocol = 0;
158 y->pszProtocol = x;
159 while (*x) {
160 if (*x == ':') {
161 y->cchProtocol = cnt;
162 cnt = -1;
163 y->pszSuffix = x+1;
164 break;
165 }
166 x++;
167 cnt++;
168 }
169
170 /* check for no scheme in string start */
171 /* (apparently schemes *must* be larger than a single character) */
172 if ((*x == '\0') || (y->cchProtocol <= 1)) {
173 y->pszProtocol = NULL;
174 return 0x80041001;
175 }
176
177 /* found scheme, set length of remainder */
178 y->cchSuffix = lstrlenA(y->pszSuffix);
179
180 len = MultiByteToWideChar(CP_ACP, 0, y->pszProtocol, y->cchProtocol,
181 scheme, sizeof(scheme)/sizeof(WCHAR));
182 y->nScheme = get_scheme_code(scheme, len);
183
184 return S_OK;
185 }
186
187 /*************************************************************************
188 * @ [SHLWAPI.2]
189 *
190 * Unicode version of ParseURLA.
191 */
192 HRESULT WINAPI ParseURLW(LPCWSTR x, PARSEDURLW *y)
193 {
194 DWORD cnt;
195
196 y->nScheme = URL_SCHEME_INVALID;
197 if (y->cbSize != sizeof(*y)) return E_INVALIDARG;
198 /* FIXME: leading white space generates error of 0x80041001 which
199 * is undefined
200 */
201 if (*x <= ' ') return 0x80041001;
202 cnt = 0;
203 y->cchProtocol = 0;
204 y->pszProtocol = x;
205 while (*x) {
206 if (*x == ':') {
207 y->cchProtocol = cnt;
208 cnt = -1;
209 y->pszSuffix = x+1;
210 break;
211 }
212 x++;
213 cnt++;
214 }
215
216 /* check for no scheme in string start */
217 /* (apparently schemes *must* be larger than a single character) */
218 if ((*x == '\0') || (y->cchProtocol <= 1)) {
219 y->pszProtocol = NULL;
220 return 0x80041001;
221 }
222
223 /* found scheme, set length of remainder */
224 y->cchSuffix = lstrlenW(y->pszSuffix);
225 y->nScheme = get_scheme_code(y->pszProtocol, y->cchProtocol);
226
227 return S_OK;
228 }
229
230 /*************************************************************************
231 * UrlCanonicalizeA [SHLWAPI.@]
232 *
233 * Canonicalize a Url.
234 *
235 * PARAMS
236 * pszUrl [I] Url to cCanonicalize
237 * pszCanonicalized [O] Destination for converted Url.
238 * pcchCanonicalized [I/O] Length of pszUrl, destination for length of pszCanonicalized
239 * dwFlags [I] Flags controlling the conversion.
240 *
241 * RETURNS
242 * Success: S_OK. The pszCanonicalized contains the converted Url.
243 * Failure: E_POINTER, if *pcchCanonicalized is too small.
244 *
245 * MSDN incorrectly describes the flags for this function. They should be:
246 *| URL_DONT_ESCAPE_EXTRA_INFO 0x02000000
247 *| URL_ESCAPE_SPACES_ONLY 0x04000000
248 *| URL_ESCAPE_PERCENT 0x00001000
249 *| URL_ESCAPE_UNSAFE 0x10000000
250 *| URL_UNESCAPE 0x10000000
251 *| URL_DONT_SIMPLIFY 0x08000000
252 *| URL_ESCAPE_SEGMENT_ONLY 0x00002000
253 */
254 HRESULT WINAPI UrlCanonicalizeA(LPCSTR pszUrl, LPSTR pszCanonicalized,
255 LPDWORD pcchCanonicalized, DWORD dwFlags)
256 {
257 LPWSTR base, canonical;
258 HRESULT ret;
259 DWORD len, len2;
260
261 TRACE("(%s, %p, %p, 0x%08x) *pcchCanonicalized: %d\n", debugstr_a(pszUrl), pszCanonicalized,
262 pcchCanonicalized, dwFlags, pcchCanonicalized ? *pcchCanonicalized : -1);
263
264 if(!pszUrl || !pszCanonicalized || !pcchCanonicalized)
265 return E_INVALIDARG;
266
267 base = HeapAlloc(GetProcessHeap(), 0,
268 (2*INTERNET_MAX_URL_LENGTH) * sizeof(WCHAR));
269 canonical = base + INTERNET_MAX_URL_LENGTH;
270
271 MultiByteToWideChar(0, 0, pszUrl, -1, base, INTERNET_MAX_URL_LENGTH);
272 len = INTERNET_MAX_URL_LENGTH;
273
274 ret = UrlCanonicalizeW(base, canonical, &len, dwFlags);
275 if (ret != S_OK) {
276 *pcchCanonicalized = len * 2;
277 HeapFree(GetProcessHeap(), 0, base);
278 return ret;
279 }
280
281 len2 = WideCharToMultiByte(0, 0, canonical, -1, 0, 0, 0, 0);
282 if (len2 > *pcchCanonicalized) {
283 *pcchCanonicalized = len2;
284 HeapFree(GetProcessHeap(), 0, base);
285 return E_POINTER;
286 }
287 WideCharToMultiByte(0, 0, canonical, -1, pszCanonicalized, *pcchCanonicalized, 0, 0);
288 *pcchCanonicalized = len;
289 HeapFree(GetProcessHeap(), 0, base);
290 return S_OK;
291 }
292
293 /*************************************************************************
294 * UrlCanonicalizeW [SHLWAPI.@]
295 *
296 * See UrlCanonicalizeA.
297 */
298 HRESULT WINAPI UrlCanonicalizeW(LPCWSTR pszUrl, LPWSTR pszCanonicalized,
299 LPDWORD pcchCanonicalized, DWORD dwFlags)
300 {
301 HRESULT hr = S_OK;
302 DWORD EscapeFlags;
303 LPWSTR lpszUrlCpy, wk1, wk2, mp, mp2, root;
304 INT state;
305 DWORD nByteLen, nLen, nWkLen;
306 WCHAR slash = '/';
307
308 static const WCHAR wszFile[] = {'f','i','l','e',':'};
309 static const WCHAR wszLocalhost[] = {'l','o','c','a','l','h','o','s','t'};
310
311 TRACE("(%s, %p, %p, 0x%08x) *pcchCanonicalized: %d\n", debugstr_w(pszUrl), pszCanonicalized,
312 pcchCanonicalized, dwFlags, pcchCanonicalized ? *pcchCanonicalized : -1);
313
314 if(!pszUrl || !pszCanonicalized || !pcchCanonicalized)
315 return E_INVALIDARG;
316
317 if(!*pszUrl) {
318 *pszCanonicalized = 0;
319 return S_OK;
320 }
321
322 nByteLen = (strlenW(pszUrl) + 1) * sizeof(WCHAR); /* length in bytes */
323 lpszUrlCpy = HeapAlloc(GetProcessHeap(), 0,
324 INTERNET_MAX_URL_LENGTH * sizeof(WCHAR));
325
326 if((dwFlags & URL_FILE_USE_PATHURL) && nByteLen >= sizeof(wszFile)
327 && !memcmp(wszFile, pszUrl, sizeof(wszFile)))
328 slash = '\\';
329
330 /*
331 * state =
332 * 0 initial 1,3
333 * 1 have 2[+] alnum 2,3
334 * 2 have scheme (found :) 4,6,3
335 * 3 failed (no location)
336 * 4 have // 5,3
337 * 5 have 1[+] alnum 6,3
338 * 6 have location (found /) save root location
339 */
340
341 wk1 = (LPWSTR)pszUrl;
342 wk2 = lpszUrlCpy;
343 state = 0;
344
345 if(pszUrl[1] == ':') { /* Assume path */
346 static const WCHAR wszFilePrefix[] = {'f','i','l','e',':','/','/','/'};
347
348 memcpy(wk2, wszFilePrefix, sizeof(wszFilePrefix));
349 wk2 += sizeof(wszFilePrefix)/sizeof(WCHAR);
350 if (dwFlags & URL_FILE_USE_PATHURL)
351 {
352 slash = '\\';
353 --wk2;
354 }
355 else
356 dwFlags |= URL_ESCAPE_UNSAFE;
357 state = 5;
358 }
359
360 while (*wk1) {
361 switch (state) {
362 case 0:
363 if (!isalnumW(*wk1)) {state = 3; break;}
364 *wk2++ = *wk1++;
365 if (!isalnumW(*wk1)) {state = 3; break;}
366 *wk2++ = *wk1++;
367 state = 1;
368 break;
369 case 1:
370 *wk2++ = *wk1;
371 if (*wk1++ == ':') state = 2;
372 break;
373 case 2:
374 *wk2++ = *wk1++;
375 if (*wk1 != '/') {state = 6; break;}
376 *wk2++ = *wk1++;
377 if((dwFlags & URL_FILE_USE_PATHURL) && nByteLen >= sizeof(wszLocalhost)
378 && !memcmp(wszLocalhost, wk1, sizeof(wszLocalhost))){
379 wk1 += sizeof(wszLocalhost)/sizeof(WCHAR);
380 while(*wk1 == '\\' && (dwFlags & URL_FILE_USE_PATHURL))
381 wk1++;
382 }
383 if(*wk1 == '/' && (dwFlags & URL_FILE_USE_PATHURL))
384 wk1++;
385 state = 4;
386 break;
387 case 3:
388 nWkLen = strlenW(wk1);
389 memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
390 mp = wk2;
391 wk1 += nWkLen;
392 wk2 += nWkLen;
393
394 while(mp < wk2) {
395 if(*mp == '/' || *mp == '\\')
396 *mp = slash;
397 mp++;
398 }
399 break;
400 case 4:
401 if (!isalnumW(*wk1) && (*wk1 != '-') && (*wk1 != '.') && (*wk1 != ':'))
402 {state = 3; break;}
403 while(isalnumW(*wk1) || (*wk1 == '-') || (*wk1 == '.') || (*wk1 == ':'))
404 *wk2++ = *wk1++;
405 state = 5;
406 if (!*wk1)
407 *wk2++ = slash;
408 break;
409 case 5:
410 if (*wk1 != '/' && *wk1 != '\\') {state = 3; break;}
411 while(*wk1 == '/' || *wk1 == '\\') {
412 *wk2++ = slash;
413 wk1++;
414 }
415 state = 6;
416 break;
417 case 6:
418 if(dwFlags & URL_DONT_SIMPLIFY) {
419 state = 3;
420 break;
421 }
422
423 /* Now at root location, cannot back up any more. */
424 /* "root" will point at the '/' */
425
426 root = wk2-1;
427 while (*wk1) {
428 mp = strchrW(wk1, '/');
429 mp2 = strchrW(wk1, '\\');
430 if(mp2 && (!mp || mp2 < mp))
431 mp = mp2;
432 if (!mp) {
433 nWkLen = strlenW(wk1);
434 memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
435 wk1 += nWkLen;
436 wk2 += nWkLen;
437 continue;
438 }
439 nLen = mp - wk1;
440 if(nLen) {
441 memcpy(wk2, wk1, nLen * sizeof(WCHAR));
442 wk2 += nLen;
443 wk1 += nLen;
444 }
445 *wk2++ = slash;
446 wk1++;
447
448 if (*wk1 == '.') {
449 TRACE("found '/.'\n");
450 if (wk1[1] == '/' || wk1[1] == '\\') {
451 /* case of /./ -> skip the ./ */
452 wk1 += 2;
453 }
454 else if (wk1[1] == '.') {
455 /* found /.. look for next / */
456 TRACE("found '/..'\n");
457 if (wk1[2] == '/' || wk1[2] == '\\' ||wk1[2] == '?'
458 || wk1[2] == '#' || !wk1[2]) {
459 /* case /../ -> need to backup wk2 */
460 TRACE("found '/../'\n");
461 *(wk2-1) = '\0'; /* set end of string */
462 mp = strrchrW(root, slash);
463 if (mp && (mp >= root)) {
464 /* found valid backup point */
465 wk2 = mp + 1;
466 if(wk1[2] != '/' && wk1[2] != '\\')
467 wk1 += 2;
468 else
469 wk1 += 3;
470 }
471 else {
472 /* did not find point, restore '/' */
473 *(wk2-1) = slash;
474 }
475 }
476 }
477 }
478 }
479 *wk2 = '\0';
480 break;
481 default:
482 FIXME("how did we get here - state=%d\n", state);
483 HeapFree(GetProcessHeap(), 0, lpszUrlCpy);
484 return E_INVALIDARG;
485 }
486 *wk2 = '\0';
487 TRACE("Simplified, orig <%s>, simple <%s>\n",
488 debugstr_w(pszUrl), debugstr_w(lpszUrlCpy));
489 }
490 nLen = lstrlenW(lpszUrlCpy);
491 while ((nLen > 0) && ((lpszUrlCpy[nLen-1] <= ' ')))
492 lpszUrlCpy[--nLen]=0;
493
494 if(dwFlags & (URL_UNESCAPE | URL_FILE_USE_PATHURL))
495 UrlUnescapeW(lpszUrlCpy, NULL, &nLen, URL_UNESCAPE_INPLACE);
496
497 if((EscapeFlags = dwFlags & (URL_ESCAPE_UNSAFE |
498 URL_ESCAPE_SPACES_ONLY |
499 URL_ESCAPE_PERCENT |
500 URL_DONT_ESCAPE_EXTRA_INFO |
501 URL_ESCAPE_SEGMENT_ONLY ))) {
502 EscapeFlags &= ~URL_ESCAPE_UNSAFE;
503 hr = UrlEscapeW(lpszUrlCpy, pszCanonicalized, pcchCanonicalized,
504 EscapeFlags);
505 } else { /* No escaping needed, just copy the string */
506 nLen = lstrlenW(lpszUrlCpy);
507 if(nLen < *pcchCanonicalized)
508 memcpy(pszCanonicalized, lpszUrlCpy, (nLen + 1)*sizeof(WCHAR));
509 else {
510 hr = E_POINTER;
511 nLen++;
512 }
513 *pcchCanonicalized = nLen;
514 }
515
516 HeapFree(GetProcessHeap(), 0, lpszUrlCpy);
517
518 if (hr == S_OK)
519 TRACE("result %s\n", debugstr_w(pszCanonicalized));
520
521 return hr;
522 }
523
524 /*************************************************************************
525 * UrlCombineA [SHLWAPI.@]
526 *
527 * Combine two Urls.
528 *
529 * PARAMS
530 * pszBase [I] Base Url
531 * pszRelative [I] Url to combine with pszBase
532 * pszCombined [O] Destination for combined Url
533 * pcchCombined [O] Destination for length of pszCombined
534 * dwFlags [I] URL_ flags from "shlwapi.h"
535 *
536 * RETURNS
537 * Success: S_OK. pszCombined contains the combined Url, pcchCombined
538 * contains its length.
539 * Failure: An HRESULT error code indicating the error.
540 */
541 HRESULT WINAPI UrlCombineA(LPCSTR pszBase, LPCSTR pszRelative,
542 LPSTR pszCombined, LPDWORD pcchCombined,
543 DWORD dwFlags)
544 {
545 LPWSTR base, relative, combined;
546 DWORD ret, len, len2;
547
548 TRACE("(base %s, Relative %s, Combine size %d, flags %08x) using W version\n",
549 debugstr_a(pszBase),debugstr_a(pszRelative),
550 pcchCombined?*pcchCombined:0,dwFlags);
551
552 if(!pszBase || !pszRelative || !pcchCombined)
553 return E_INVALIDARG;
554
555 base = HeapAlloc(GetProcessHeap(), 0,
556 (3*INTERNET_MAX_URL_LENGTH) * sizeof(WCHAR));
557 relative = base + INTERNET_MAX_URL_LENGTH;
558 combined = relative + INTERNET_MAX_URL_LENGTH;
559
560 MultiByteToWideChar(0, 0, pszBase, -1, base, INTERNET_MAX_URL_LENGTH);
561 MultiByteToWideChar(0, 0, pszRelative, -1, relative, INTERNET_MAX_URL_LENGTH);
562 len = *pcchCombined;
563
564 ret = UrlCombineW(base, relative, pszCombined?combined:NULL, &len, dwFlags);
565 if (ret != S_OK) {
566 *pcchCombined = len;
567 HeapFree(GetProcessHeap(), 0, base);
568 return ret;
569 }
570
571 len2 = WideCharToMultiByte(0, 0, combined, len, 0, 0, 0, 0);
572 if (len2 > *pcchCombined) {
573 *pcchCombined = len2;
574 HeapFree(GetProcessHeap(), 0, base);
575 return E_POINTER;
576 }
577 WideCharToMultiByte(0, 0, combined, len+1, pszCombined, (*pcchCombined)+1,
578 0, 0);
579 *pcchCombined = len2;
580 HeapFree(GetProcessHeap(), 0, base);
581 return S_OK;
582 }
583
584 /*************************************************************************
585 * UrlCombineW [SHLWAPI.@]
586 *
587 * See UrlCombineA.
588 */
589 HRESULT WINAPI UrlCombineW(LPCWSTR pszBase, LPCWSTR pszRelative,
590 LPWSTR pszCombined, LPDWORD pcchCombined,
591 DWORD dwFlags)
592 {
593 PARSEDURLW base, relative;
594 DWORD myflags, sizeloc = 0;
595 DWORD len, res1, res2, process_case = 0;
596 LPWSTR work, preliminary, mbase, mrelative;
597 static const WCHAR myfilestr[] = {'f','i','l','e',':','/','/','/','\0'};
598 HRESULT ret;
599
600 TRACE("(base %s, Relative %s, Combine size %d, flags %08x)\n",
601 debugstr_w(pszBase),debugstr_w(pszRelative),
602 pcchCombined?*pcchCombined:0,dwFlags);
603
604 if(!pszBase || !pszRelative || !pcchCombined)
605 return E_INVALIDARG;
606
607 base.cbSize = sizeof(base);
608 relative.cbSize = sizeof(relative);
609
610 /* Get space for duplicates of the input and the output */
611 preliminary = HeapAlloc(GetProcessHeap(), 0, (3*INTERNET_MAX_URL_LENGTH) *
612 sizeof(WCHAR));
613 mbase = preliminary + INTERNET_MAX_URL_LENGTH;
614 mrelative = mbase + INTERNET_MAX_URL_LENGTH;
615 *preliminary = '\0';
616
617 /* Canonicalize the base input prior to looking for the scheme */
618 myflags = dwFlags & (URL_DONT_SIMPLIFY | URL_UNESCAPE);
619 len = INTERNET_MAX_URL_LENGTH;
620 ret = UrlCanonicalizeW(pszBase, mbase, &len, myflags);
621
622 /* Canonicalize the relative input prior to looking for the scheme */
623 len = INTERNET_MAX_URL_LENGTH;
624 ret = UrlCanonicalizeW(pszRelative, mrelative, &len, myflags);
625
626 /* See if the base has a scheme */
627 res1 = ParseURLW(mbase, &base);
628 if (res1) {
629 /* if pszBase has no scheme, then return pszRelative */
630 TRACE("no scheme detected in Base\n");
631 process_case = 1;
632 }
633 else do {
634 /* mk is a special case */
635 if(base.nScheme == URL_SCHEME_MK) {
636 static const WCHAR wsz[] = {':',':',0};
637
638 WCHAR *ptr = strstrW(base.pszSuffix, wsz);
639 if(ptr) {
640 int delta;
641
642 ptr += 2;
643 delta = ptr-base.pszSuffix;
644 base.cchProtocol += delta;
645 base.pszSuffix += delta;
646 base.cchSuffix -= delta;
647 }
648 }else {
649 /* get size of location field (if it exists) */
650 work = (LPWSTR)base.pszSuffix;
651 sizeloc = 0;
652 if (*work++ == '/') {
653 if (*work++ == '/') {
654 /* At this point have start of location and
655 * it ends at next '/' or end of string.
656 */
657 while(*work && (*work != '/')) work++;
658 sizeloc = (DWORD)(work - base.pszSuffix);
659 }
660 }
661 }
662
663 /* Change .sizep2 to not have the last leaf in it,
664 * Note: we need to start after the location (if it exists)
665 */
666 work = strrchrW((base.pszSuffix+sizeloc), '/');
667 if (work) {
668 len = (DWORD)(work - base.pszSuffix + 1);
669 base.cchSuffix = len;
670 }
671
672 /*
673 * At this point:
674 * .pszSuffix points to location (starting with '//')
675 * .cchSuffix length of location (above) and rest less the last
676 * leaf (if any)
677 * sizeloc length of location (above) up to but not including
678 * the last '/'
679 */
680
681 res2 = ParseURLW(mrelative, &relative);
682 if (res2) {
683 /* no scheme in pszRelative */
684 TRACE("no scheme detected in Relative\n");
685 relative.pszSuffix = mrelative; /* case 3,4,5 depends on this */
686 relative.cchSuffix = strlenW(mrelative);
687 if (*pszRelative == ':') {
688 /* case that is either left alone or uses pszBase */
689 if (dwFlags & URL_PLUGGABLE_PROTOCOL) {
690 process_case = 5;
691 break;
692 }
693 process_case = 1;
694 break;
695 }
696 if (isalnum(*mrelative) && (*(mrelative + 1) == ':')) {
697 /* case that becomes "file:///" */
698 strcpyW(preliminary, myfilestr);
699 process_case = 1;
700 break;
701 }
702 if ((*mrelative == '/') && (*(mrelative+1) == '/')) {
703 /* pszRelative has location and rest */
704 process_case = 3;
705 break;
706 }
707 if (*mrelative == '/') {
708 /* case where pszRelative is root to location */
709 process_case = 4;
710 break;
711 }
712 process_case = (*base.pszSuffix == '/' || base.nScheme == URL_SCHEME_MK) ? 5 : 3;
713 break;
714 }
715
716 /* handle cases where pszRelative has scheme */
717 if ((base.cchProtocol == relative.cchProtocol) &&
718 (strncmpW(base.pszProtocol, relative.pszProtocol, base.cchProtocol) == 0)) {
719
720 /* since the schemes are the same */
721 if ((*relative.pszSuffix == '/') && (*(relative.pszSuffix+1) == '/')) {
722 /* case where pszRelative replaces location and following */
723 process_case = 3;
724 break;
725 }
726 if (*relative.pszSuffix == '/') {
727 /* case where pszRelative is root to location */
728 process_case = 4;
729 break;
730 }
731 /* replace either just location if base's location starts with a
732 * slash or otherwise everything */
733 process_case = (*base.pszSuffix == '/') ? 5 : 1;
734 break;
735 }
736 if ((*relative.pszSuffix == '/') && (*(relative.pszSuffix+1) == '/')) {
737 /* case where pszRelative replaces scheme, location,
738 * and following and handles PLUGGABLE
739 */
740 process_case = 2;
741 break;
742 }
743 process_case = 1;
744 break;
745 } while(FALSE); /* a little trick to allow easy exit from nested if's */
746
747 ret = S_OK;
748 switch (process_case) {
749
750 case 1: /*
751 * Return pszRelative appended to what ever is in pszCombined,
752 * (which may the string "file:///"
753 */
754 strcatW(preliminary, mrelative);
755 break;
756
757 case 2: /* case where pszRelative replaces scheme, and location */
758 strcpyW(preliminary, mrelative);
759 break;
760
761 case 3: /*
762 * Return the pszBase scheme with pszRelative. Basically
763 * keeps the scheme and replaces the domain and following.
764 */
765 memcpy(preliminary, base.pszProtocol, (base.cchProtocol + 1)*sizeof(WCHAR));
766 work = preliminary + base.cchProtocol + 1;
767 strcpyW(work, relative.pszSuffix);
768 break;
769
770 case 4: /*
771 * Return the pszBase scheme and location but everything
772 * after the location is pszRelative. (Replace document
773 * from root on.)
774 */
775 memcpy(preliminary, base.pszProtocol, (base.cchProtocol+1+sizeloc)*sizeof(WCHAR));
776 work = preliminary + base.cchProtocol + 1 + sizeloc;
777 if (dwFlags & URL_PLUGGABLE_PROTOCOL)
778 *(work++) = '/';
779 strcpyW(work, relative.pszSuffix);
780 break;
781
782 case 5: /*
783 * Return the pszBase without its document (if any) and
784 * append pszRelative after its scheme.
785 */
786 memcpy(preliminary, base.pszProtocol,
787 (base.cchProtocol+1+base.cchSuffix)*sizeof(WCHAR));
788 work = preliminary + base.cchProtocol+1+base.cchSuffix - 1;
789 if (*work++ != '/')
790 *(work++) = '/';
791 strcpyW(work, relative.pszSuffix);
792 break;
793
794 default:
795 FIXME("How did we get here????? process_case=%d\n", process_case);
796 ret = E_INVALIDARG;
797 }
798
799 if (ret == S_OK) {
800 /* Reuse mrelative as temp storage as its already allocated and not needed anymore */
801 ret = UrlCanonicalizeW(preliminary, mrelative, pcchCombined, (dwFlags & ~URL_FILE_USE_PATHURL));
802 if(SUCCEEDED(ret) && pszCombined) {
803 lstrcpyW(pszCombined, mrelative);
804 }
805 TRACE("return-%d len=%d, %s\n",
806 process_case, *pcchCombined, debugstr_w(pszCombined));
807 }
808 HeapFree(GetProcessHeap(), 0, preliminary);
809 return ret;
810 }
811
812 /*************************************************************************
813 * UrlEscapeA [SHLWAPI.@]
814 */
815
816 HRESULT WINAPI UrlEscapeA(
817 LPCSTR pszUrl,
818 LPSTR pszEscaped,
819 LPDWORD pcchEscaped,
820 DWORD dwFlags)
821 {
822 WCHAR bufW[INTERNET_MAX_URL_LENGTH];
823 WCHAR *escapedW = bufW;
824 UNICODE_STRING urlW;
825 HRESULT ret;
826 DWORD lenW = sizeof(bufW)/sizeof(WCHAR), lenA;
827
828 if (!pszEscaped || !pcchEscaped || !*pcchEscaped)
829 return E_INVALIDARG;
830
831 if(!RtlCreateUnicodeStringFromAsciiz(&urlW, pszUrl))
832 return E_INVALIDARG;
833 if((ret = UrlEscapeW(urlW.Buffer, escapedW, &lenW, dwFlags)) == E_POINTER) {
834 escapedW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
835 ret = UrlEscapeW(urlW.Buffer, escapedW, &lenW, dwFlags);
836 }
837 if(ret == S_OK) {
838 RtlUnicodeToMultiByteSize(&lenA, escapedW, lenW * sizeof(WCHAR));
839 if(*pcchEscaped > lenA) {
840 RtlUnicodeToMultiByteN(pszEscaped, *pcchEscaped - 1, &lenA, escapedW, lenW * sizeof(WCHAR));
841 pszEscaped[lenA] = 0;
842 *pcchEscaped = lenA;
843 } else {
844 *pcchEscaped = lenA + 1;
845 ret = E_POINTER;
846 }
847 }
848 if(escapedW != bufW) HeapFree(GetProcessHeap(), 0, escapedW);
849 RtlFreeUnicodeString(&urlW);
850 return ret;
851 }
852
853 #define WINE_URL_BASH_AS_SLASH 0x01
854 #define WINE_URL_COLLAPSE_SLASHES 0x02
855 #define WINE_URL_ESCAPE_SLASH 0x04
856 #define WINE_URL_ESCAPE_HASH 0x08
857 #define WINE_URL_ESCAPE_QUESTION 0x10
858 #define WINE_URL_STOP_ON_HASH 0x20
859 #define WINE_URL_STOP_ON_QUESTION 0x40
860
861 static inline BOOL URL_NeedEscapeW(WCHAR ch, DWORD dwFlags, DWORD int_flags)
862 {
863
864 if (isalnumW(ch))
865 return FALSE;
866
867 if(dwFlags & URL_ESCAPE_SPACES_ONLY) {
868 if(ch == ' ')
869 return TRUE;
870 else
871 return FALSE;
872 }
873
874 if ((dwFlags & URL_ESCAPE_PERCENT) && (ch == '%'))
875 return TRUE;
876
877 if (ch <= 31 || ch >= 127)
878 return TRUE;
879
880 else {
881 switch (ch) {
882 case ' ':
883 case '<':
884 case '>':
885 case '\"':
886 case '{':
887 case '}':
888 case '|':
889 case '\\':
890 case '^':
891 case ']':
892 case '[':
893 case '`':
894 case '&':
895 return TRUE;
896
897 case '/':
898 if (int_flags & WINE_URL_ESCAPE_SLASH) return TRUE;
899 return FALSE;
900
901 case '?':
902 if (int_flags & WINE_URL_ESCAPE_QUESTION) return TRUE;
903 return FALSE;
904
905 case '#':
906 if (int_flags & WINE_URL_ESCAPE_HASH) return TRUE;
907 return FALSE;
908
909 default:
910 return FALSE;
911 }
912 }
913 }
914
915
916 /*************************************************************************
917 * UrlEscapeW [SHLWAPI.@]
918 *
919 * Converts unsafe characters in a Url into escape sequences.
920 *
921 * PARAMS
922 * pszUrl [I] Url to modify
923 * pszEscaped [O] Destination for modified Url
924 * pcchEscaped [I/O] Length of pszUrl, destination for length of pszEscaped
925 * dwFlags [I] URL_ flags from "shlwapi.h"
926 *
927 * RETURNS
928 * Success: S_OK. pszEscaped contains the escaped Url, pcchEscaped
929 * contains its length.
930 * Failure: E_POINTER, if pszEscaped is not large enough. In this case
931 * pcchEscaped is set to the required length.
932 *
933 * Converts unsafe characters into their escape sequences.
934 *
935 * NOTES
936 * - By default this function stops converting at the first '?' or
937 * '#' character.
938 * - If dwFlags contains URL_ESCAPE_SPACES_ONLY then only spaces are
939 * converted, but the conversion continues past a '?' or '#'.
940 * - Note that this function did not work well (or at all) in shlwapi version 4.
941 *
942 * BUGS
943 * Only the following flags are implemented:
944 *| URL_ESCAPE_SPACES_ONLY
945 *| URL_DONT_ESCAPE_EXTRA_INFO
946 *| URL_ESCAPE_SEGMENT_ONLY
947 *| URL_ESCAPE_PERCENT
948 */
949 HRESULT WINAPI UrlEscapeW(
950 LPCWSTR pszUrl,
951 LPWSTR pszEscaped,
952 LPDWORD pcchEscaped,
953 DWORD dwFlags)
954 {
955 LPCWSTR src;
956 DWORD needed = 0, ret;
957 BOOL stop_escaping = FALSE;
958 WCHAR next[5], *dst = pszEscaped;
959 INT len;
960 PARSEDURLW parsed_url;
961 DWORD int_flags;
962 DWORD slashes = 0;
963 static const WCHAR localhost[] = {'l','o','c','a','l','h','o','s','t',0};
964
965 TRACE("(%s %p %p 0x%08x)\n", debugstr_w(pszUrl), pszEscaped,
966 pcchEscaped, dwFlags);
967
968 if(!pszUrl || !pcchEscaped)
969 return E_INVALIDARG;
970
971 if(dwFlags & ~(URL_ESCAPE_SPACES_ONLY |
972 URL_ESCAPE_SEGMENT_ONLY |
973 URL_DONT_ESCAPE_EXTRA_INFO |
974 URL_ESCAPE_PERCENT))
975 FIXME("Unimplemented flags: %08x\n", dwFlags);
976
977 /* fix up flags */
978 if (dwFlags & URL_ESCAPE_SPACES_ONLY)
979 /* if SPACES_ONLY specified, reset the other controls */
980 dwFlags &= ~(URL_DONT_ESCAPE_EXTRA_INFO |
981 URL_ESCAPE_PERCENT |
982 URL_ESCAPE_SEGMENT_ONLY);
983
984 else
985 /* if SPACES_ONLY *not* specified the assume DONT_ESCAPE_EXTRA_INFO */
986 dwFlags |= URL_DONT_ESCAPE_EXTRA_INFO;
987
988
989 int_flags = 0;
990 if(dwFlags & URL_ESCAPE_SEGMENT_ONLY) {
991 int_flags = WINE_URL_ESCAPE_QUESTION | WINE_URL_ESCAPE_HASH | WINE_URL_ESCAPE_SLASH;
992 } else {
993 parsed_url.cbSize = sizeof(parsed_url);
994 if(ParseURLW(pszUrl, &parsed_url) != S_OK)
995 parsed_url.nScheme = URL_SCHEME_INVALID;
996
997 TRACE("scheme = %d (%s)\n", parsed_url.nScheme, debugstr_wn(parsed_url.pszProtocol, parsed_url.cchProtocol));
998
999 if(dwFlags & URL_DONT_ESCAPE_EXTRA_INFO)
1000 int_flags = WINE_URL_STOP_ON_HASH | WINE_URL_STOP_ON_QUESTION;
1001
1002 switch(parsed_url.nScheme) {
1003 case URL_SCHEME_FILE:
1004 int_flags |= WINE_URL_BASH_AS_SLASH | WINE_URL_COLLAPSE_SLASHES | WINE_URL_ESCAPE_HASH;
1005 int_flags &= ~WINE_URL_STOP_ON_HASH;
1006 break;
1007
1008 case URL_SCHEME_HTTP:
1009 case URL_SCHEME_HTTPS:
1010 int_flags |= WINE_URL_BASH_AS_SLASH;
1011 if(parsed_url.pszSuffix[0] != '/' && parsed_url.pszSuffix[0] != '\\')
1012 int_flags |= WINE_URL_ESCAPE_SLASH;
1013 break;
1014
1015 case URL_SCHEME_MAILTO:
1016 int_flags |= WINE_URL_ESCAPE_SLASH | WINE_URL_ESCAPE_QUESTION | WINE_URL_ESCAPE_HASH;
1017 int_flags &= ~(WINE_URL_STOP_ON_QUESTION | WINE_URL_STOP_ON_HASH);
1018 break;
1019
1020 case URL_SCHEME_INVALID:
1021 break;
1022
1023 case URL_SCHEME_FTP:
1024 default:
1025 if(parsed_url.pszSuffix[0] != '/')
1026 int_flags |= WINE_URL_ESCAPE_SLASH;
1027 break;
1028 }
1029 }
1030
1031 for(src = pszUrl; *src; ) {
1032 WCHAR cur = *src;
1033 len = 0;
1034
1035 if((int_flags & WINE_URL_COLLAPSE_SLASHES) && src == pszUrl + parsed_url.cchProtocol + 1) {
1036 int localhost_len = sizeof(localhost)/sizeof(WCHAR) - 1;
1037 while(cur == '/' || cur == '\\') {
1038 slashes++;
1039 cur = *++src;
1040 }
1041 if(slashes == 2 && !strncmpiW(src, localhost, localhost_len)) { /* file://localhost/ -> file:/// */
1042 if(*(src + localhost_len) == '/' || *(src + localhost_len) == '\\')
1043 src += localhost_len + 1;
1044 slashes = 3;
1045 }
1046
1047 switch(slashes) {
1048 case 1:
1049 case 3:
1050 next[0] = next[1] = next[2] = '/';
1051 len = 3;
1052 break;
1053 case 0:
1054 len = 0;
1055 break;
1056 default:
1057 next[0] = next[1] = '/';
1058 len = 2;
1059 break;
1060 }
1061 }
1062 if(len == 0) {
1063
1064 if(cur == '#' && (int_flags & WINE_URL_STOP_ON_HASH))
1065 stop_escaping = TRUE;
1066
1067 if(cur == '?' && (int_flags & WINE_URL_STOP_ON_QUESTION))
1068 stop_escaping = TRUE;
1069
1070 if(cur == '\\' && (int_flags & WINE_URL_BASH_AS_SLASH) && !stop_escaping) cur = '/';
1071
1072 if(URL_NeedEscapeW(cur, dwFlags, int_flags) && stop_escaping == FALSE) {
1073 next[0] = '%';
1074 next[1] = hexDigits[(cur >> 4) & 0xf];
1075 next[2] =