1 /*
2 * Url functions
3 *
4 * Copyright 2000 Huw D M Davies for CodeWeavers.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "config.h"
22 #include "wine/port.h"
23 #include <stdarg.h>
24 #include <string.h>
25 #include <stdlib.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winnls.h"
29 #include "winerror.h"
30 #include "wine/unicode.h"
31 #include "wininet.h"
32 #include "winreg.h"
33 #include "winternl.h"
34 #define NO_SHLWAPI_STREAM
35 #include "shlwapi.h"
36 #include "wine/debug.h"
37
38 HMODULE WINAPI MLLoadLibraryW(LPCWSTR,HMODULE,DWORD);
39 BOOL WINAPI MLFreeLibrary(HMODULE);
40 HRESULT WINAPI MLBuildResURLW(LPCWSTR,HMODULE,DWORD,LPCWSTR,LPWSTR,DWORD);
41
42 WINE_DEFAULT_DEBUG_CHANNEL(shell);
43
44 /* The following schemes were identified in the native version of
45 * SHLWAPI.DLL version 5.50
46 */
47 static const struct {
48 URL_SCHEME scheme_number;
49 WCHAR scheme_name[12];
50 } shlwapi_schemes[] = {
51 {URL_SCHEME_FTP, {'f','t','p',0}},
52 {URL_SCHEME_HTTP, {'h','t','t','p',0}},
53 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}},
54 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}},
55 {URL_SCHEME_NEWS, {'n','e','w','s',0}},
56 {URL_SCHEME_NNTP, {'n','n','t','p',0}},
57 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}},
58 {URL_SCHEME_WAIS, {'w','a','i','s',0}},
59 {URL_SCHEME_FILE, {'f','i','l','e',0}},
60 {URL_SCHEME_MK, {'m','k',0}},
61 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}},
62 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}},
63 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}},
64 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}},
65 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}},
66 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}},
67 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}},
68 {URL_SCHEME_RES, {'r','e','s',0}},
69 };
70
71 typedef struct {
72 LPCWSTR pScheme; /* [out] start of scheme */
73 DWORD szScheme; /* [out] size of scheme (until colon) */
74 LPCWSTR pUserName; /* [out] start of Username */
75 DWORD szUserName; /* [out] size of Username (until ":" or "@") */
76 LPCWSTR pPassword; /* [out] start of Password */
77 DWORD szPassword; /* [out] size of Password (until "@") */
78 LPCWSTR pHostName; /* [out] start of Hostname */
79 DWORD szHostName; /* [out] size of Hostname (until ":" or "/") */
80 LPCWSTR pPort; /* [out] start of Port */
81 DWORD szPort; /* [out] size of Port (until "/" or eos) */
82 LPCWSTR pQuery; /* [out] start of Query */
83 DWORD szQuery; /* [out] size of Query (until eos) */
84 } WINE_PARSE_URL;
85
86 typedef enum {
87 SCHEME,
88 HOST,
89 PORT,
90 USERPASS,
91 } WINE_URL_SCAN_TYPE;
92
93 static const CHAR hexDigits[] = "0123456789ABCDEF";
94
95 static const WCHAR fileW[] = {'f','i','l','e','\0'};
96
97 static const unsigned char HashDataLookup[256] = {
98 0x01, 0x0E, 0x6E, 0x19, 0x61, 0xAE, 0x84, 0x77, 0x8A, 0xAA, 0x7D, 0x76, 0x1B,
99 0xE9, 0x8C, 0x33, 0x57, 0xC5, 0xB1, 0x6B, 0xEA, 0xA9, 0x38, 0x44, 0x1E, 0x07,
100 0xAD, 0x49, 0xBC, 0x28, 0x24, 0x41, 0x31, 0xD5, 0x68, 0xBE, 0x39, 0xD3, 0x94,
101 0xDF, 0x30, 0x73, 0x0F, 0x02, 0x43, 0xBA, 0xD2, 0x1C, 0x0C, 0xB5, 0x67, 0x46,
102 0x16, 0x3A, 0x4B, 0x4E, 0xB7, 0xA7, 0xEE, 0x9D, 0x7C, 0x93, 0xAC, 0x90, 0xB0,
103 0xA1, 0x8D, 0x56, 0x3C, 0x42, 0x80, 0x53, 0x9C, 0xF1, 0x4F, 0x2E, 0xA8, 0xC6,
104 0x29, 0xFE, 0xB2, 0x55, 0xFD, 0xED, 0xFA, 0x9A, 0x85, 0x58, 0x23, 0xCE, 0x5F,
105 0x74, 0xFC, 0xC0, 0x36, 0xDD, 0x66, 0xDA, 0xFF, 0xF0, 0x52, 0x6A, 0x9E, 0xC9,
106 0x3D, 0x03, 0x59, 0x09, 0x2A, 0x9B, 0x9F, 0x5D, 0xA6, 0x50, 0x32, 0x22, 0xAF,
107 0xC3, 0x64, 0x63, 0x1A, 0x96, 0x10, 0x91, 0x04, 0x21, 0x08, 0xBD, 0x79, 0x40,
108 0x4D, 0x48, 0xD0, 0xF5, 0x82, 0x7A, 0x8F, 0x37, 0x69, 0x86, 0x1D, 0xA4, 0xB9,
109 0xC2, 0xC1, 0xEF, 0x65, 0xF2, 0x05, 0xAB, 0x7E, 0x0B, 0x4A, 0x3B, 0x89, 0xE4,
110 0x6C, 0xBF, 0xE8, 0x8B, 0x06, 0x18, 0x51, 0x14, 0x7F, 0x11, 0x5B, 0x5C, 0xFB,
111 0x97, 0xE1, 0xCF, 0x15, 0x62, 0x71, 0x70, 0x54, 0xE2, 0x12, 0xD6, 0xC7, 0xBB,
112 0x0D, 0x20, 0x5E, 0xDC, 0xE0, 0xD4, 0xF7, 0xCC, 0xC4, 0x2B, 0xF9, 0xEC, 0x2D,
113 0xF4, 0x6F, 0xB6, 0x99, 0x88, 0x81, 0x5A, 0xD9, 0xCA, 0x13, 0xA5, 0xE7, 0x47,
114 0xE6, 0x8E, 0x60, 0xE3, 0x3E, 0xB3, 0xF6, 0x72, 0xA2, 0x35, 0xA0, 0xD7, 0xCD,
115 0xB4, 0x2F, 0x6D, 0x2C, 0x26, 0x1F, 0x95, 0x87, 0x00, 0xD8, 0x34, 0x3F, 0x17,
116 0x25, 0x45, 0x27, 0x75, 0x92, 0xB8, 0xA3, 0xC8, 0xDE, 0xEB, 0xF8, 0xF3, 0xDB,
117 0x0A, 0x98, 0x83, 0x7B, 0xE5, 0xCB, 0x4C, 0x78, 0xD1 };
118
119 static DWORD get_scheme_code(LPCWSTR scheme, DWORD scheme_len)
120 {
121 int i;
122
123 for(i=0; i < sizeof(shlwapi_schemes)/sizeof(shlwapi_schemes[0]); i++) {
124 if(scheme_len == strlenW(shlwapi_schemes[i].scheme_name)
125 && !memcmp(scheme, shlwapi_schemes[i].scheme_name, scheme_len*sizeof(WCHAR)))
126 return shlwapi_schemes[i].scheme_number;
127 }
128
129 return URL_SCHEME_UNKNOWN;
130 }
131
132 static BOOL URL_JustLocation(LPCWSTR str)
133 {
134 while(*str && (*str == '/')) str++;
135 if (*str) {
136 while (*str && ((*str == '-') ||
137 (*str == '.') ||
138 isalnumW(*str))) str++;
139 if (*str == '/') return FALSE;
140 }
141 return TRUE;
142 }
143
144
145 /*************************************************************************
146 * @ [SHLWAPI.1]
147 *
148 * Parse a Url into its constituent parts.
149 *
150 * PARAMS
151 * x [I] Url to parse
152 * y [O] Undocumented structure holding the parsed information
153 *
154 * RETURNS
155 * Success: S_OK. y contains the parsed Url details.
156 * Failure: An HRESULT error code.
157 */
158 HRESULT WINAPI ParseURLA(LPCSTR x, PARSEDURLA *y)
159 {
160 WCHAR scheme[INTERNET_MAX_SCHEME_LENGTH];
161 DWORD cnt, len;
162
163 y->nScheme = URL_SCHEME_INVALID;
164 if (y->cbSize != sizeof(*y)) return E_INVALIDARG;
165 /* FIXME: leading white space generates error of 0x80041001 which
166 * is undefined
167 */
168 if (*x <= ' ') return 0x80041001;
169 cnt = 0;
170 y->cchProtocol = 0;
171 y->pszProtocol = x;
172 while (*x) {
173 if (*x == ':') {
174 y->cchProtocol = cnt;
175 cnt = -1;
176 y->pszSuffix = x+1;
177 break;
178 }
179 x++;
180 cnt++;
181 }
182
183 /* check for no scheme in string start */
184 /* (apparently schemes *must* be larger than a single character) */
185 if ((*x == '\0') || (y->cchProtocol <= 1)) {
186 y->pszProtocol = NULL;
187 return 0x80041001;
188 }
189
190 /* found scheme, set length of remainder */
191 y->cchSuffix = lstrlenA(y->pszSuffix);
192
193 len = MultiByteToWideChar(CP_ACP, 0, y->pszProtocol, y->cchProtocol,
194 scheme, sizeof(scheme)/sizeof(WCHAR));
195 y->nScheme = get_scheme_code(scheme, len);
196
197 return S_OK;
198 }
199
200 /*************************************************************************
201 * @ [SHLWAPI.2]
202 *
203 * Unicode version of ParseURLA.
204 */
205 HRESULT WINAPI ParseURLW(LPCWSTR x, PARSEDURLW *y)
206 {
207 DWORD cnt;
208
209 y->nScheme = URL_SCHEME_INVALID;
210 if (y->cbSize != sizeof(*y)) return E_INVALIDARG;
211 /* FIXME: leading white space generates error of 0x80041001 which
212 * is undefined
213 */
214 if (*x <= ' ') return 0x80041001;
215 cnt = 0;
216 y->cchProtocol = 0;
217 y->pszProtocol = x;
218 while (*x) {
219 if (*x == ':') {
220 y->cchProtocol = cnt;
221 cnt = -1;
222 y->pszSuffix = x+1;
223 break;
224 }
225 x++;
226 cnt++;
227 }
228
229 /* check for no scheme in string start */
230 /* (apparently schemes *must* be larger than a single character) */
231 if ((*x == '\0') || (y->cchProtocol <= 1)) {
232 y->pszProtocol = NULL;
233 return 0x80041001;
234 }
235
236 /* found scheme, set length of remainder */
237 y->cchSuffix = lstrlenW(y->pszSuffix);
238 y->nScheme = get_scheme_code(y->pszProtocol, y->cchProtocol);
239
240 return S_OK;
241 }
242
243 /*************************************************************************
244 * UrlCanonicalizeA [SHLWAPI.@]
245 *
246 * Canonicalize a Url.
247 *
248 * PARAMS
249 * pszUrl [I] Url to cCanonicalize
250 * pszCanonicalized [O] Destination for converted Url.
251 * pcchCanonicalized [I/O] Length of pszUrl, destination for length of pszCanonicalized
252 * dwFlags [I] Flags controlling the conversion.
253 *
254 * RETURNS
255 * Success: S_OK. The pszCanonicalized contains the converted Url.
256 * Failure: E_POINTER, if *pcchCanonicalized is too small.
257 *
258 * MSDN incorrectly describes the flags for this function. They should be:
259 *| URL_DONT_ESCAPE_EXTRA_INFO 0x02000000
260 *| URL_ESCAPE_SPACES_ONLY 0x04000000
261 *| URL_ESCAPE_PERCENT 0x00001000
262 *| URL_ESCAPE_UNSAFE 0x10000000
263 *| URL_UNESCAPE 0x10000000
264 *| URL_DONT_SIMPLIFY 0x08000000
265 *| URL_ESCAPE_SEGMENT_ONLY 0x00002000
266 */
267 HRESULT WINAPI UrlCanonicalizeA(LPCSTR pszUrl, LPSTR pszCanonicalized,
268 LPDWORD pcchCanonicalized, DWORD dwFlags)
269 {
270 LPWSTR base, canonical;
271 HRESULT ret;
272 DWORD len, len2;
273
274 TRACE("(%s, %p, %p, 0x%08x) *pcchCanonicalized: %d\n", debugstr_a(pszUrl), pszCanonicalized,
275 pcchCanonicalized, dwFlags, pcchCanonicalized ? *pcchCanonicalized : -1);
276
277 if(!pszUrl || !pszCanonicalized || !pcchCanonicalized)
278 return E_INVALIDARG;
279
280 base = HeapAlloc(GetProcessHeap(), 0,
281 (2*INTERNET_MAX_URL_LENGTH) * sizeof(WCHAR));
282 canonical = base + INTERNET_MAX_URL_LENGTH;
283
284 MultiByteToWideChar(0, 0, pszUrl, -1, base, INTERNET_MAX_URL_LENGTH);
285 len = INTERNET_MAX_URL_LENGTH;
286
287 ret = UrlCanonicalizeW(base, canonical, &len, dwFlags);
288 if (ret != S_OK) {
289 *pcchCanonicalized = len * 2;
290 HeapFree(GetProcessHeap(), 0, base);
291 return ret;
292 }
293
294 len2 = WideCharToMultiByte(0, 0, canonical, -1, 0, 0, 0, 0);
295 if (len2 > *pcchCanonicalized) {
296 *pcchCanonicalized = len2;
297 HeapFree(GetProcessHeap(), 0, base);
298 return E_POINTER;
299 }
300 WideCharToMultiByte(0, 0, canonical, -1, pszCanonicalized, *pcchCanonicalized, 0, 0);
301 *pcchCanonicalized = len;
302 HeapFree(GetProcessHeap(), 0, base);
303 return S_OK;
304 }
305
306 /*************************************************************************
307 * UrlCanonicalizeW [SHLWAPI.@]
308 *
309 * See UrlCanonicalizeA.
310 */
311 HRESULT WINAPI UrlCanonicalizeW(LPCWSTR pszUrl, LPWSTR pszCanonicalized,
312 LPDWORD pcchCanonicalized, DWORD dwFlags)
313 {
314 HRESULT hr = S_OK;
315 DWORD EscapeFlags;
316 LPWSTR lpszUrlCpy, wk1, wk2, mp, mp2, root;
317 INT nByteLen, state;
318 DWORD nLen, nWkLen;
319 WCHAR slash = '/';
320
321 static const WCHAR wszFile[] = {'f','i','l','e',':'};
322 static const WCHAR wszLocalhost[] = {'l','o','c','a','l','h','o','s','t'};
323
324 TRACE("(%s, %p, %p, 0x%08x) *pcchCanonicalized: %d\n", debugstr_w(pszUrl), pszCanonicalized,
325 pcchCanonicalized, dwFlags, pcchCanonicalized ? *pcchCanonicalized : -1);
326
327 if(!pszUrl || !pszCanonicalized || !pcchCanonicalized)
328 return E_INVALIDARG;
329
330 if(!*pszUrl) {
331 *pszCanonicalized = 0;
332 return S_OK;
333 }
334
335 nByteLen = (lstrlenW(pszUrl) + 1) * sizeof(WCHAR); /* length in bytes */
336 lpszUrlCpy = HeapAlloc(GetProcessHeap(), 0,
337 INTERNET_MAX_URL_LENGTH * sizeof(WCHAR));
338
339 if((dwFlags & URL_FILE_USE_PATHURL) && nByteLen >= sizeof(wszFile)
340 && !memcmp(wszFile, pszUrl, sizeof(wszFile)))
341 slash = '\\';
342
343 /*
344 * state =
345 * 0 initial 1,3
346 * 1 have 2[+] alnum 2,3
347 * 2 have scheme (found :) 4,6,3
348 * 3 failed (no location)
349 * 4 have // 5,3
350 * 5 have 1[+] alnum 6,3
351 * 6 have location (found /) save root location
352 */
353
354 wk1 = (LPWSTR)pszUrl;
355 wk2 = lpszUrlCpy;
356 state = 0;
357
358 if(pszUrl[1] == ':') { /* Assume path */
359 static const WCHAR wszFilePrefix[] = {'f','i','l','e',':','/','/','/'};
360
361 memcpy(wk2, wszFilePrefix, sizeof(wszFilePrefix));
362 wk2 += sizeof(wszFilePrefix)/sizeof(WCHAR);
363 if (dwFlags & URL_FILE_USE_PATHURL)
364 {
365 slash = '\\';
366 --wk2;
367 }
368 else
369 dwFlags |= URL_ESCAPE_UNSAFE;
370 state = 5;
371 }
372
373 while (*wk1) {
374 switch (state) {
375 case 0:
376 if (!isalnumW(*wk1)) {state = 3; break;}
377 *wk2++ = *wk1++;
378 if (!isalnumW(*wk1)) {state = 3; break;}
379 *wk2++ = *wk1++;
380 state = 1;
381 break;
382 case 1:
383 *wk2++ = *wk1;
384 if (*wk1++ == ':') state = 2;
385 break;
386 case 2:
387 *wk2++ = *wk1++;
388 if (*wk1 != '/') {state = 6; break;}
389 *wk2++ = *wk1++;
390 if((dwFlags & URL_FILE_USE_PATHURL) && nByteLen >= sizeof(wszLocalhost)
391 && !memcmp(wszLocalhost, wk1, sizeof(wszLocalhost))){
392 wk1 += sizeof(wszLocalhost)/sizeof(WCHAR);
393 while(*wk1 == '\\' && (dwFlags & URL_FILE_USE_PATHURL))
394 wk1++;
395 }
396 if(*wk1 == '/' && (dwFlags & URL_FILE_USE_PATHURL))
397 wk1++;
398 state = 4;
399 break;
400 case 3:
401 nWkLen = strlenW(wk1);
402 memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
403 mp = wk2;
404 wk1 += nWkLen;
405 wk2 += nWkLen;
406
407 while(mp < wk2) {
408 if(*mp == '/' || *mp == '\\')
409 *mp = slash;
410 mp++;
411 }
412 break;
413 case 4:
414 if (!isalnumW(*wk1) && (*wk1 != '-') && (*wk1 != '.') && (*wk1 != ':'))
415 {state = 3; break;}
416 while(isalnumW(*wk1) || (*wk1 == '-') || (*wk1 == '.') || (*wk1 == ':'))
417 *wk2++ = *wk1++;
418 state = 5;
419 if (!*wk1)
420 *wk2++ = slash;
421 break;
422 case 5:
423 if (*wk1 != '/' && *wk1 != '\\') {state = 3; break;}
424 while(*wk1 == '/' || *wk1 == '\\') {
425 *wk2++ = slash;
426 wk1++;
427 }
428 state = 6;
429 break;
430 case 6:
431 if(dwFlags & URL_DONT_SIMPLIFY) {
432 state = 3;
433 break;
434 }
435
436 /* Now at root location, cannot back up any more. */
437 /* "root" will point at the '/' */
438
439 root = wk2-1;
440 while (*wk1) {
441 mp = strchrW(wk1, '/');
442 mp2 = strchrW(wk1, '\\');
443 if(mp2 && (!mp || mp2 < mp))
444 mp = mp2;
445 if (!mp) {
446 nWkLen = strlenW(wk1);
447 memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
448 wk1 += nWkLen;
449 wk2 += nWkLen;
450 continue;
451 }
452 nLen = mp - wk1;
453 if(nLen) {
454 memcpy(wk2, wk1, nLen * sizeof(WCHAR));
455 wk2 += nLen;
456 wk1 += nLen;
457 }
458 *wk2++ = slash;
459 wk1++;
460
461 if (*wk1 == '.') {
462 TRACE("found '/.'\n");
463 if (wk1[1] == '/' || wk1[1] == '\\') {
464 /* case of /./ -> skip the ./ */
465 wk1 += 2;
466 }
467 else if (wk1[1] == '.') {
468 /* found /.. look for next / */
469 TRACE("found '/..'\n");
470 if (wk1[2] == '/' || wk1[2] == '\\' ||wk1[2] == '?'
471 || wk1[2] == '#' || !wk1[2]) {
472 /* case /../ -> need to backup wk2 */
473 TRACE("found '/../'\n");
474 *(wk2-1) = '\0'; /* set end of string */
475 mp = strrchrW(root, slash);
476 if (mp && (mp >= root)) {
477 /* found valid backup point */
478 wk2 = mp + 1;
479 if(wk1[2] != '/' && wk1[2] != '\\')
480 wk1 += 2;
481 else
482 wk1 += 3;
483 }
484 else {
485 /* did not find point, restore '/' */
486 *(wk2-1) = slash;
487 }
488 }
489 }
490 }
491 }
492 *wk2 = '\0';
493 break;
494 default:
495 FIXME("how did we get here - state=%d\n", state);
496 HeapFree(GetProcessHeap(), 0, lpszUrlCpy);
497 return E_INVALIDARG;
498 }
499 *wk2 = '\0';
500 TRACE("Simplified, orig <%s>, simple <%s>\n",
501 debugstr_w(pszUrl), debugstr_w(lpszUrlCpy));
502 }
503 nLen = lstrlenW(lpszUrlCpy);
504 while ((nLen > 0) && ((lpszUrlCpy[nLen-1] <= ' ')))
505 lpszUrlCpy[--nLen]=0;
506
507 if(dwFlags & (URL_UNESCAPE | URL_FILE_USE_PATHURL))
508 UrlUnescapeW(lpszUrlCpy, NULL, &nLen, URL_UNESCAPE_INPLACE);
509
510 if((EscapeFlags = dwFlags & (URL_ESCAPE_UNSAFE |
511 URL_ESCAPE_SPACES_ONLY |
512 URL_ESCAPE_PERCENT |
513 URL_DONT_ESCAPE_EXTRA_INFO |
514 URL_ESCAPE_SEGMENT_ONLY ))) {
515 EscapeFlags &= ~URL_ESCAPE_UNSAFE;
516 hr = UrlEscapeW(lpszUrlCpy, pszCanonicalized, pcchCanonicalized,
517 EscapeFlags);
518 } else { /* No escaping needed, just copy the string */
519 nLen = lstrlenW(lpszUrlCpy);
520 if(nLen < *pcchCanonicalized)
521 memcpy(pszCanonicalized, lpszUrlCpy, (nLen + 1)*sizeof(WCHAR));
522 else {
523 hr = E_POINTER;
524 nLen++;
525 }
526 *pcchCanonicalized = nLen;
527 }
528
529 HeapFree(GetProcessHeap(), 0, lpszUrlCpy);
530
531 if (hr == S_OK)
532 TRACE("result %s\n", debugstr_w(pszCanonicalized));
533
534 return hr;
535 }
536
537 /*************************************************************************
538 * UrlCombineA [SHLWAPI.@]
539 *
540 * Combine two Urls.
541 *
542 * PARAMS
543 * pszBase [I] Base Url
544 * pszRelative [I] Url to combine with pszBase
545 * pszCombined [O] Destination for combined Url
546 * pcchCombined [O] Destination for length of pszCombined
547 * dwFlags [I] URL_ flags from "shlwapi.h"
548 *
549 * RETURNS
550 * Success: S_OK. pszCombined contains the combined Url, pcchCombined
551 * contains its length.
552 * Failure: An HRESULT error code indicating the error.
553 */
554 HRESULT WINAPI UrlCombineA(LPCSTR pszBase, LPCSTR pszRelative,
555 LPSTR pszCombined, LPDWORD pcchCombined,
556 DWORD dwFlags)
557 {
558 LPWSTR base, relative, combined;
559 DWORD ret, len, len2;
560
561 TRACE("(base %s, Relative %s, Combine size %d, flags %08x) using W version\n",
562 debugstr_a(pszBase),debugstr_a(pszRelative),
563 pcchCombined?*pcchCombined:0,dwFlags);
564
565 if(!pszBase || !pszRelative || !pcchCombined)
566 return E_INVALIDARG;
567
568 base = HeapAlloc(GetProcessHeap(), 0,
569 (3*INTERNET_MAX_URL_LENGTH) * sizeof(WCHAR));
570 relative = base + INTERNET_MAX_URL_LENGTH;
571 combined = relative + INTERNET_MAX_URL_LENGTH;
572
573 MultiByteToWideChar(0, 0, pszBase, -1, base, INTERNET_MAX_URL_LENGTH);
574 MultiByteToWideChar(0, 0, pszRelative, -1, relative, INTERNET_MAX_URL_LENGTH);
575 len = *pcchCombined;
576
577 ret = UrlCombineW(base, relative, pszCombined?combined:NULL, &len, dwFlags);
578 if (ret != S_OK) {
579 *pcchCombined = len;
580 HeapFree(GetProcessHeap(), 0, base);
581 return ret;
582 }
583
584 len2 = WideCharToMultiByte(0, 0, combined, len, 0, 0, 0, 0);
585 if (len2 > *pcchCombined) {
586 *pcchCombined = len2;
587 HeapFree(GetProcessHeap(), 0, base);
588 return E_POINTER;
589 }
590 WideCharToMultiByte(0, 0, combined, len+1, pszCombined, (*pcchCombined)+1,
591 0, 0);
592 *pcchCombined = len2;
593 HeapFree(GetProcessHeap(), 0, base);
594 return S_OK;
595 }
596
597 /*************************************************************************
598 * UrlCombineW [SHLWAPI.@]
599 *
600 * See UrlCombineA.
601 */
602 HRESULT WINAPI UrlCombineW(LPCWSTR pszBase, LPCWSTR pszRelative,
603 LPWSTR pszCombined, LPDWORD pcchCombined,
604 DWORD dwFlags)
605 {
606 PARSEDURLW base, relative;
607 DWORD myflags, sizeloc = 0;
608 DWORD len, res1, res2, process_case = 0;
609 LPWSTR work, preliminary, mbase, mrelative;
610 static const WCHAR myfilestr[] = {'f','i','l','e',':','/','/','/','\0'};
611 static const WCHAR single_slash[] = {'/','\0'};
612 HRESULT ret;
613
614 TRACE("(base %s, Relative %s, Combine size %d, flags %08x)\n",
615 debugstr_w(pszBase),debugstr_w(pszRelative),
616 pcchCombined?*pcchCombined:0,dwFlags);
617
618 if(!pszBase || !pszRelative || !pcchCombined)
619 return E_INVALIDARG;
620
621 base.cbSize = sizeof(base);
622 relative.cbSize = sizeof(relative);
623
624 /* Get space for duplicates of the input and the output */
625 preliminary = HeapAlloc(GetProcessHeap(), 0, (3*INTERNET_MAX_URL_LENGTH) *
626 sizeof(WCHAR));
627 mbase = preliminary + INTERNET_MAX_URL_LENGTH;
628 mrelative = mbase + INTERNET_MAX_URL_LENGTH;
629 *preliminary = '\0';
630
631 /* Canonicalize the base input prior to looking for the scheme */
632 myflags = dwFlags & (URL_DONT_SIMPLIFY | URL_UNESCAPE);
633 len = INTERNET_MAX_URL_LENGTH;
634 ret = UrlCanonicalizeW(pszBase, mbase, &len, myflags);
635
636 /* Canonicalize the relative input prior to looking for the scheme */
637 len = INTERNET_MAX_URL_LENGTH;
638 ret = UrlCanonicalizeW(pszRelative, mrelative, &len, myflags);
639
640 /* See if the base has a scheme */
641 res1 = ParseURLW(mbase, &base);
642 if (res1) {
643 /* if pszBase has no scheme, then return pszRelative */
644 TRACE("no scheme detected in Base\n");
645 process_case = 1;
646 }
647 else do {
648 /* mk is a special case */
649 if(base.nScheme == URL_SCHEME_MK) {
650 static const WCHAR wsz[] = {':',':',0};
651
652 WCHAR *ptr = strstrW(base.pszSuffix, wsz);
653 if(ptr) {
654 int delta;
655
656 ptr += 2;
657 delta = ptr-base.pszSuffix;
658 base.cchProtocol += delta;
659 base.pszSuffix += delta;
660 base.cchSuffix -= delta;
661 }
662 }else {
663 /* get size of location field (if it exists) */
664 work = (LPWSTR)base.pszSuffix;
665 sizeloc = 0;
666 if (*work++ == '/') {
667 if (*work++ == '/') {
668 /* At this point have start of location and
669 * it ends at next '/' or end of string.
670 */
671 while(*work && (*work != '/')) work++;
672 sizeloc = (DWORD)(work - base.pszSuffix);
673 }
674 }
675 }
676
677 /* Change .sizep2 to not have the last leaf in it,
678 * Note: we need to start after the location (if it exists)
679 */
680 work = strrchrW((base.pszSuffix+sizeloc), '/');
681 if (work) {
682 len = (DWORD)(work - base.pszSuffix + 1);
683 base.cchSuffix = len;
684 }
685
686 /*
687 * At this point:
688 * .pszSuffix points to location (starting with '//')
689 * .cchSuffix length of location (above) and rest less the last
690 * leaf (if any)
691 * sizeloc length of location (above) up to but not including
692 * the last '/'
693 */
694
695 res2 = ParseURLW(mrelative, &relative);
696 if (res2) {
697 /* no scheme in pszRelative */
698 TRACE("no scheme detected in Relative\n");
699 relative.pszSuffix = mrelative; /* case 3,4,5 depends on this */
700 relative.cchSuffix = strlenW(mrelative);
701 if (*pszRelative == ':') {
702 /* case that is either left alone or uses pszBase */
703 if (dwFlags & URL_PLUGGABLE_PROTOCOL) {
704 process_case = 5;
705 break;
706 }
707 process_case = 1;
708 break;
709 }
710 if (isalnum(*mrelative) && (*(mrelative + 1) == ':')) {
711 /* case that becomes "file:///" */
712 strcpyW(preliminary, myfilestr);
713 process_case = 1;
714 break;
715 }
716 if ((*mrelative == '/') && (*(mrelative+1) == '/')) {
717 /* pszRelative has location and rest */
718 process_case = 3;
719 break;
720 }
721 if (*mrelative == '/') {
722 /* case where pszRelative is root to location */
723 process_case = 4;
724 break;
725 }
726 process_case = (*base.pszSuffix == '/' || base.nScheme == URL_SCHEME_MK) ? 5 : 3;
727 break;
728 }
729
730 /* handle cases where pszRelative has scheme */
731 if ((base.cchProtocol == relative.cchProtocol) &&
732 (strncmpW(base.pszProtocol, relative.pszProtocol, base.cchProtocol) == 0)) {
733
734 /* since the schemes are the same */
735 if ((*relative.pszSuffix == '/') && (*(relative.pszSuffix+1) == '/')) {
736 /* case where pszRelative replaces location and following */
737 process_case = 3;
738 break;
739 }
740 if (*relative.pszSuffix == '/') {
741 /* case where pszRelative is root to location */
742 process_case = 4;
743 break;
744 }
745 /* replace either just location if base's location starts with a
746 * slash or otherwise everything */
747 process_case = (*base.pszSuffix == '/') ? 5 : 1;
748 break;
749 }
750 if ((*relative.pszSuffix == '/') && (*(relative.pszSuffix+1) == '/')) {
751 /* case where pszRelative replaces scheme, location,
752 * and following and handles PLUGGABLE
753 */
754 process_case = 2;
755 break;
756 }
757 process_case = 1;
758 break;
759 } while(FALSE); /* a little trick to allow easy exit from nested if's */
760
761 ret = S_OK;
762 switch (process_case) {
763
764 case 1: /*
765 * Return pszRelative appended to what ever is in pszCombined,
766 * (which may the string "file:///"
767 */
768 strcatW(preliminary, mrelative);
769 break;
770
771 case 2: /*
772 * Same as case 1, but if URL_PLUGGABLE_PROTOCOL was specified
773 * and pszRelative starts with "//", then append a "/"
774 */
775 strcpyW(preliminary, mrelative);
776 if (!(dwFlags & URL_PLUGGABLE_PROTOCOL) &&
777 URL_JustLocation(relative.pszSuffix))
778 strcatW(preliminary, single_slash);
779 break;
780
781 case 3: /*
782 * Return the pszBase scheme with pszRelative. Basically
783 * keeps the scheme and replaces the domain and following.
784 */
785 memcpy(preliminary, base.pszProtocol, (base.cchProtocol + 1)*sizeof(WCHAR));
786 work = preliminary + base.cchProtocol + 1;
787 strcpyW(work, relative.pszSuffix);
788 break;
789
790 case 4: /*
791 * Return the pszBase scheme and location but everything
792 * after the location is pszRelative. (Replace document
793 * from root on.)
794 */
795 memcpy(preliminary, base.pszProtocol, (base.cchProtocol+1+sizeloc)*sizeof(WCHAR));
796 work = preliminary + base.cchProtocol + 1 + sizeloc;
797 if (dwFlags & URL_PLUGGABLE_PROTOCOL)
798 *(work++) = '/';
799 strcpyW(work, relative.pszSuffix);
800 break;
801
802 case 5: /*
803 * Return the pszBase without its document (if any) and
804 * append pszRelative after its scheme.
805 */
806 memcpy(preliminary, base.pszProtocol,
807 (base.cchProtocol+1+base.cchSuffix)*sizeof(WCHAR));
808 work = preliminary + base.cchProtocol+1+base.cchSuffix - 1;
809 if (*work++ != '/')
810 *(work++) = '/';
811 strcpyW(work, relative.pszSuffix);
812 break;
813
814 default:
815 FIXME("How did we get here????? process_case=%d\n", process_case);
816 ret = E_INVALIDARG;
817 }
818
819 if (ret == S_OK) {
820 /* Reuse mrelative as temp storage as its already allocated and not needed anymore */
821 ret = UrlCanonicalizeW(preliminary, mrelative, pcchCombined, (dwFlags & ~URL_FILE_USE_PATHURL));
822 if(SUCCEEDED(ret) && pszCombined) {
823 lstrcpyW(pszCombined, mrelative);
824 }
825 TRACE("return-%d len=%d, %s\n",
826 process_case, *pcchCombined, debugstr_w(pszCombined));
827 }
828 HeapFree(GetProcessHeap(), 0, preliminary);
829 return ret;
830 }
831
832 /*************************************************************************
833 * UrlEscapeA [SHLWAPI.@]
834 */
835
836 HRESULT WINAPI UrlEscapeA(
837 LPCSTR pszUrl,
838 LPSTR pszEscaped,
839 LPDWORD pcchEscaped,
840 DWORD dwFlags)
841 {
842 WCHAR bufW[INTERNET_MAX_URL_LENGTH];
843 WCHAR *escapedW = bufW;
844 UNICODE_STRING urlW;
845 HRESULT ret;
846 DWORD lenW = sizeof(bufW)/sizeof(WCHAR), lenA;
847
848 if (!pszEscaped || !pcchEscaped || !*pcchEscaped)
849 return E_INVALIDARG;
850
851 if(!RtlCreateUnicodeStringFromAsciiz(&urlW, pszUrl))
852 return E_INVALIDARG;
853 if((ret = UrlEscapeW(urlW.Buffer, escapedW, &lenW, dwFlags)) == E_POINTER) {
854 escapedW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
855 ret = UrlEscapeW(urlW.Buffer, escapedW, &lenW, dwFlags);
856 }
857 if(ret == S_OK) {
858 RtlUnicodeToMultiByteSize(&lenA, escapedW, lenW * sizeof(WCHAR));
859 if(*pcchEscaped > lenA) {
860 RtlUnicodeToMultiByteN(pszEscaped, *pcchEscaped - 1, &lenA, escapedW, lenW * sizeof(WCHAR));
861 pszEscaped[lenA] = 0;
862 *pcchEscaped = lenA;
863 } else {
864 *pcchEscaped = lenA + 1;
865 ret = E_POINTER;
866 }
867 }
868 if(escapedW != bufW) HeapFree(GetProcessHeap(), 0, escapedW);
869 RtlFreeUnicodeString(&urlW);
870 return ret;
871 }
872
873 #define WINE_URL_BASH_AS_SLASH 0x01
874 #define WINE_URL_COLLAPSE_SLASHES 0x02
875 #define WINE_URL_ESCAPE_SLASH 0x04
876 #define WINE_URL_ESCAPE_HASH 0x08
877 #define WINE_URL_ESCAPE_QUESTION 0x10
878 #define WINE_URL_STOP_ON_HASH 0x20
879 #define WINE_URL_STOP_ON_QUESTION 0x40
880
881 static inline BOOL URL_NeedEscapeW(WCHAR ch, DWORD dwFlags, DWORD int_flags)
882 {
883
884 if (isalnumW(ch))
885 return FALSE;
886
887 if(dwFlags & URL_ESCAPE_SPACES_ONLY) {
888 if(ch == ' ')
889 return TRUE;
890 else
891 return FALSE;
892 }
893
894 if ((dwFlags & URL_ESCAPE_PERCENT) && (ch == '%'))
895 return TRUE;
896
897 if (ch <= 31 || ch >= 127)
898 return TRUE;
899
900 else {
901 switch (ch) {
902 case ' ':
903 case '<':
904 case '>':
905 case '\"':
906 case '{':
907 case '}':
908 case '|':
909 case '\\':
910 case '^':
911 case ']':
912 case '[':
913 case '`':
914 case '&':
915 return TRUE;
916
917 case '/':
918 if (int_flags & WINE_URL_ESCAPE_SLASH) return TRUE;
919 return FALSE;
920
921 case '?':
922 if (int_flags & WINE_URL_ESCAPE_QUESTION) return TRUE;
923 return FALSE;
924
925 case '#':
926 if (int_flags & WINE_URL_ESCAPE_HASH) return TRUE;
927 return FALSE;
928
929 default:
930 return FALSE;
931 }
932 }
933 }
934
935
936 /*************************************************************************
937 * UrlEscapeW [SHLWAPI.@]
938 *
939 * Converts unsafe characters in a Url into escape sequences.
940 *
941 * PARAMS
942 * pszUrl [I] Url to modify
943 * pszEscaped [O] Destination for modified Url
944 * pcchEscaped [I/O] Length of pszUrl, destination for length of pszEscaped
945 * dwFlags [I] URL_ flags from "shlwapi.h"
946 *
947 * RETURNS
948 * Success: S_OK. pszEscaped contains the escaped Url, pcchEscaped
949 * contains its length.
950 * Failure: E_POINTER, if pszEscaped is not large enough. In this case
951 * pcchEscaped is set to the required length.
952 *
953 * Converts unsafe characters into their escape sequences.
954 *
955 * NOTES
956 * - By default this function stops converting at the first '?' or
957 * '#' character.
958 * - If dwFlags contains URL_ESCAPE_SPACES_ONLY then only spaces are
959 * converted, but the conversion continues past a '?' or '#'.
960 * - Note that this function did not work well (or at all) in shlwapi version 4.
961 *
962 * BUGS
963 * Only the following flags are implemented:
964 *| URL_ESCAPE_SPACES_ONLY
965 *| URL_DONT_ESCAPE_EXTRA_INFO
966 *| URL_ESCAPE_SEGMENT_ONLY
967 *| URL_ESCAPE_PERCENT
968 */
969 HRESULT WINAPI UrlEscapeW(
970 LPCWSTR pszUrl,
971 LPWSTR pszEscaped,
972 LPDWORD pcchEscaped,
973 DWORD dwFlags)
974 {
975 LPCWSTR src;
976 DWORD needed = 0, ret;
977 BOOL stop_escaping = FALSE;
978 WCHAR next[5], *dst = pszEscaped;
979 INT len;
980 PARSEDURLW parsed_url;
981 DWORD int_flags;
982 DWORD slashes = 0;
983 static const WCHAR localhost[] = {'l','o','c','a','l','h','o','s','t',0};
984
985 TRACE("(%s %p %p 0x%08x)\n", debugstr_w(pszUrl), pszEscaped,
986 pcchEscaped, dwFlags);
987
988 if(!pszUrl || !pcchEscaped)
989 return E_INVALIDARG;
990
991 if(dwFlags & ~(URL_ESCAPE_SPACES_ONLY |
992 URL_ESCAPE_SEGMENT_ONLY |
993 URL_DONT_ESCAPE