1<?php 2 3namespace Sabre\Uri; 4 5/** 6 * This file contains all the uri handling functions. 7 * 8 * @copyright Copyright (C) fruux GmbH (https://fruux.com/) 9 * @author Evert Pot (http://evertpot.com/) 10 * @license http://sabre.io/license/ 11 */ 12 13/** 14 * Resolves relative urls, like a browser would. 15 * 16 * This function takes a basePath, which itself _may_ also be relative, and 17 * then applies the relative path on top of it. 18 * 19 * @param string $basePath 20 * @param string $newPath 21 * @return string 22 */ 23function resolve($basePath, $newPath) { 24 25 $base = parse($basePath); 26 $delta = parse($newPath); 27 28 $pick = function($part) use ($base, $delta) { 29 30 if ($delta[$part]) { 31 return $delta[$part]; 32 } elseif ($base[$part]) { 33 return $base[$part]; 34 } 35 return null; 36 37 }; 38 39 // If the new path defines a scheme, it's absolute and we can just return 40 // that. 41 if ($delta['scheme']) { 42 return build($delta); 43 } 44 45 $newParts = []; 46 47 $newParts['scheme'] = $pick('scheme'); 48 $newParts['host'] = $pick('host'); 49 $newParts['port'] = $pick('port'); 50 51 $path = ''; 52 if ($delta['path']) { 53 // If the path starts with a slash 54 if ($delta['path'][0] === '/') { 55 $path = $delta['path']; 56 } else { 57 // Removing last component from base path. 58 $path = $base['path']; 59 if (strpos($path, '/') !== false) { 60 $path = substr($path, 0, strrpos($path, '/')); 61 } 62 $path .= '/' . $delta['path']; 63 } 64 } else { 65 $path = $base['path'] ?: '/'; 66 } 67 // Removing .. and . 68 $pathParts = explode('/', $path); 69 $newPathParts = []; 70 foreach ($pathParts as $pathPart) { 71 72 switch ($pathPart) { 73 //case '' : 74 case '.' : 75 break; 76 case '..' : 77 array_pop($newPathParts); 78 break; 79 default : 80 $newPathParts[] = $pathPart; 81 break; 82 } 83 } 84 85 $path = implode('/', $newPathParts); 86 87 // If the source url ended with a /, we want to preserve that. 88 $newParts['path'] = $path; 89 if ($delta['query']) { 90 $newParts['query'] = $delta['query']; 91 } elseif (!empty($base['query']) && empty($delta['host']) && empty($delta['path'])) { 92 // Keep the old query if host and path didn't change 93 $newParts['query'] = $base['query']; 94 } 95 if ($delta['fragment']) { 96 $newParts['fragment'] = $delta['fragment']; 97 } 98 return build($newParts); 99 100} 101 102/** 103 * Takes a URI or partial URI as its argument, and normalizes it. 104 * 105 * After normalizing a URI, you can safely compare it to other URIs. 106 * This function will for instance convert a %7E into a tilde, according to 107 * rfc3986. 108 * 109 * It will also change a %3a into a %3A. 110 * 111 * @param string $uri 112 * @return string 113 */ 114function normalize($uri) { 115 116 $parts = parse($uri); 117 118 if (!empty($parts['path'])) { 119 $pathParts = explode('/', ltrim($parts['path'], '/')); 120 $newPathParts = []; 121 foreach ($pathParts as $pathPart) { 122 switch ($pathPart) { 123 case '.': 124 // skip 125 break; 126 case '..' : 127 // One level up in the hierarchy 128 array_pop($newPathParts); 129 break; 130 default : 131 // Ensuring that everything is correctly percent-encoded. 132 $newPathParts[] = rawurlencode(rawurldecode($pathPart)); 133 break; 134 } 135 } 136 $parts['path'] = '/' . implode('/', $newPathParts); 137 } 138 139 if ($parts['scheme']) { 140 $parts['scheme'] = strtolower($parts['scheme']); 141 $defaultPorts = [ 142 'http' => '80', 143 'https' => '443', 144 ]; 145 146 if (!empty($parts['port']) && isset($defaultPorts[$parts['scheme']]) && $defaultPorts[$parts['scheme']] == $parts['port']) { 147 // Removing default ports. 148 unset($parts['port']); 149 } 150 // A few HTTP specific rules. 151 switch ($parts['scheme']) { 152 case 'http' : 153 case 'https' : 154 if (empty($parts['path'])) { 155 // An empty path is equivalent to / in http. 156 $parts['path'] = '/'; 157 } 158 break; 159 } 160 } 161 162 if ($parts['host']) $parts['host'] = strtolower($parts['host']); 163 164 return build($parts); 165 166} 167 168/** 169 * Parses a URI and returns its individual components. 170 * 171 * This method largely behaves the same as PHP's parse_url, except that it will 172 * return an array with all the array keys, including the ones that are not 173 * set by parse_url, which makes it a bit easier to work with. 174 * 175 * Unlike PHP's parse_url, it will also convert any non-ascii characters to 176 * percent-encoded strings. PHP's parse_url corrupts these characters on OS X. 177 * 178 * @param string $uri 179 * @return array 180 */ 181function parse($uri) { 182 183 // Normally a URI must be ASCII, however. However, often it's not and 184 // parse_url might corrupt these strings. 185 // 186 // For that reason we take any non-ascii characters from the uri and 187 // uriencode them first. 188 $uri = preg_replace_callback( 189 '/[^[:ascii:]]/u', 190 function($matches) { 191 return rawurlencode($matches[0]); 192 }, 193 $uri 194 ); 195 196 $result = parse_url($uri); 197 if (!$result) { 198 $result = _parse_fallback($uri); 199 } 200 201 return 202 $result + [ 203 'scheme' => null, 204 'host' => null, 205 'path' => null, 206 'port' => null, 207 'user' => null, 208 'query' => null, 209 'fragment' => null, 210 ]; 211 212} 213 214/** 215 * This function takes the components returned from PHP's parse_url, and uses 216 * it to generate a new uri. 217 * 218 * @param array $parts 219 * @return string 220 */ 221function build(array $parts) { 222 223 $uri = ''; 224 225 $authority = ''; 226 if (!empty($parts['host'])) { 227 $authority = $parts['host']; 228 if (!empty($parts['user'])) { 229 $authority = $parts['user'] . '@' . $authority; 230 } 231 if (!empty($parts['port'])) { 232 $authority = $authority . ':' . $parts['port']; 233 } 234 } 235 236 if (!empty($parts['scheme'])) { 237 // If there's a scheme, there's also a host. 238 $uri = $parts['scheme'] . ':'; 239 240 } 241 if ($authority || (!empty($parts['scheme']) && $parts['scheme'] === 'file')) { 242 // No scheme, but there is a host. 243 $uri .= '//' . $authority; 244 245 } 246 247 if (!empty($parts['path'])) { 248 $uri .= $parts['path']; 249 } 250 if (!empty($parts['query'])) { 251 $uri .= '?' . $parts['query']; 252 } 253 if (!empty($parts['fragment'])) { 254 $uri .= '#' . $parts['fragment']; 255 } 256 257 return $uri; 258 259} 260 261/** 262 * Returns the 'dirname' and 'basename' for a path. 263 * 264 * The reason there is a custom function for this purpose, is because 265 * basename() is locale aware (behaviour changes if C locale or a UTF-8 locale 266 * is used) and we need a method that just operates on UTF-8 characters. 267 * 268 * In addition basename and dirname are platform aware, and will treat 269 * backslash (\) as a directory separator on windows. 270 * 271 * This method returns the 2 components as an array. 272 * 273 * If there is no dirname, it will return an empty string. Any / appearing at 274 * the end of the string is stripped off. 275 * 276 * @param string $path 277 * @return array 278 */ 279function split($path) { 280 281 $matches = []; 282 if (preg_match('/^(?:(?:(.*)(?:\/+))?([^\/]+))(?:\/?)$/u', $path, $matches)) { 283 return [$matches[1], $matches[2]]; 284 } 285 return [null,null]; 286 287} 288 289/** 290 * This function is another implementation of parse_url, except this one is 291 * fully written in PHP. 292 * 293 * The reason is that the PHP bug team is not willing to admit that there are 294 * bugs in the parse_url implementation. 295 * 296 * This function is only called if the main parse method fails. It's pretty 297 * crude and probably slow, so the original parse_url is usually preferred. 298 * 299 * @param string $uri 300 * @return array 301 */ 302function _parse_fallback($uri) { 303 304 // Normally a URI must be ASCII, however. However, often it's not and 305 // parse_url might corrupt these strings. 306 // 307 // For that reason we take any non-ascii characters from the uri and 308 // uriencode them first. 309 $uri = preg_replace_callback( 310 '/[^[:ascii:]]/u', 311 function($matches) { 312 return rawurlencode($matches[0]); 313 }, 314 $uri 315 ); 316 317 $result = [ 318 'scheme' => null, 319 'host' => null, 320 'port' => null, 321 'user' => null, 322 'path' => null, 323 'fragment' => null, 324 'query' => null, 325 ]; 326 327 if (preg_match('% ^([A-Za-z][A-Za-z0-9+-\.]+): %x', $uri, $matches)) { 328 329 $result['scheme'] = $matches[1]; 330 // Take what's left. 331 $uri = substr($uri, strlen($result['scheme']) + 1); 332 333 } 334 335 // Taking off a fragment part 336 if (strpos($uri, '#') !== false) { 337 list($uri, $result['fragment']) = explode('#', $uri, 2); 338 } 339 // Taking off the query part 340 if (strpos($uri, '?') !== false) { 341 list($uri, $result['query']) = explode('?', $uri, 2); 342 } 343 344 if (substr($uri, 0, 3) === '///') { 345 // The triple slash uris are a bit unusual, but we have special handling 346 // for them. 347 $result['path'] = substr($uri, 2); 348 $result['host'] = ''; 349 } elseif (substr($uri, 0, 2) === '//') { 350 // Uris that have an authority part. 351 $regex = ' 352 %^ 353 // 354 (?: (?<user> [^:@]+) (: (?<pass> [^@]+)) @)? 355 (?<host> ( [^:/]* | \[ [^\]]+ \] )) 356 (?: : (?<port> [0-9]+))? 357 (?<path> / .*)? 358 $%x 359 '; 360 if (!preg_match($regex, $uri, $matches)) { 361 throw new InvalidUriException('Invalid, or could not parse URI'); 362 } 363 if ($matches['host']) $result['host'] = $matches['host']; 364 if ($matches['port']) $result['port'] = (int)$matches['port']; 365 if (isset($matches['path'])) $result['path'] = $matches['path']; 366 if ($matches['user']) $result['user'] = $matches['user']; 367 if ($matches['pass']) $result['pass'] = $matches['pass']; 368 } else { 369 $result['path'] = $uri; 370 } 371 372 return $result; 373} 374