1<?php 2 3declare(strict_types=1); 4 5namespace GuzzleHttp\Psr7; 6 7use GuzzleHttp\Psr7\Exception\MalformedUriException; 8use Psr\Http\Message\UriInterface; 9 10/** 11 * PSR-7 URI implementation. 12 * 13 * @author Michael Dowling 14 * @author Tobias Schultze 15 * @author Matthew Weier O'Phinney 16 */ 17class Uri implements UriInterface, \JsonSerializable 18{ 19 /** 20 * Absolute http and https URIs require a host per RFC 7230 Section 2.7 21 * but in generic URIs the host can be empty. So for http(s) URIs 22 * we apply this default host when no host is given yet to form a 23 * valid URI. 24 */ 25 private const HTTP_DEFAULT_HOST = 'localhost'; 26 27 private const DEFAULT_PORTS = [ 28 'http' => 80, 29 'https' => 443, 30 'ftp' => 21, 31 'gopher' => 70, 32 'nntp' => 119, 33 'news' => 119, 34 'telnet' => 23, 35 'tn3270' => 23, 36 'imap' => 143, 37 'pop' => 110, 38 'ldap' => 389, 39 ]; 40 41 /** 42 * Unreserved characters for use in a regex. 43 * 44 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 45 */ 46 private const CHAR_UNRESERVED = 'a-zA-Z0-9_\-\.~'; 47 48 /** 49 * Sub-delims for use in a regex. 50 * 51 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 52 */ 53 private const CHAR_SUB_DELIMS = '!\$&\'\(\)\*\+,;='; 54 private const QUERY_SEPARATORS_REPLACEMENT = ['=' => '%3D', '&' => '%26']; 55 56 /** @var string Uri scheme. */ 57 private $scheme = ''; 58 59 /** @var string Uri user info. */ 60 private $userInfo = ''; 61 62 /** @var string Uri host. */ 63 private $host = ''; 64 65 /** @var int|null Uri port. */ 66 private $port; 67 68 /** @var string Uri path. */ 69 private $path = ''; 70 71 /** @var string Uri query string. */ 72 private $query = ''; 73 74 /** @var string Uri fragment. */ 75 private $fragment = ''; 76 77 /** @var string|null String representation */ 78 private $composedComponents; 79 80 public function __construct(string $uri = '') 81 { 82 if ($uri !== '') { 83 $parts = self::parse($uri); 84 if ($parts === false) { 85 throw new MalformedUriException("Unable to parse URI: $uri"); 86 } 87 $this->applyParts($parts); 88 } 89 } 90 91 /** 92 * UTF-8 aware \parse_url() replacement. 93 * 94 * The internal function produces broken output for non ASCII domain names 95 * (IDN) when used with locales other than "C". 96 * 97 * On the other hand, cURL understands IDN correctly only when UTF-8 locale 98 * is configured ("C.UTF-8", "en_US.UTF-8", etc.). 99 * 100 * @see https://bugs.php.net/bug.php?id=52923 101 * @see https://www.php.net/manual/en/function.parse-url.php#114817 102 * @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING 103 * 104 * @return array|false 105 */ 106 private static function parse(string $url) 107 { 108 // If IPv6 109 $prefix = ''; 110 if (preg_match('%^(.*://\[[0-9:a-f]+\])(.*?)$%', $url, $matches)) { 111 /** @var array{0:string, 1:string, 2:string} $matches */ 112 $prefix = $matches[1]; 113 $url = $matches[2]; 114 } 115 116 /** @var string */ 117 $encodedUrl = preg_replace_callback( 118 '%[^:/@?&=#]+%usD', 119 static function ($matches) { 120 return urlencode($matches[0]); 121 }, 122 $url 123 ); 124 125 $result = parse_url($prefix.$encodedUrl); 126 127 if ($result === false) { 128 return false; 129 } 130 131 return array_map('urldecode', $result); 132 } 133 134 public function __toString(): string 135 { 136 if ($this->composedComponents === null) { 137 $this->composedComponents = self::composeComponents( 138 $this->scheme, 139 $this->getAuthority(), 140 $this->path, 141 $this->query, 142 $this->fragment 143 ); 144 } 145 146 return $this->composedComponents; 147 } 148 149 /** 150 * Composes a URI reference string from its various components. 151 * 152 * Usually this method does not need to be called manually but instead is used indirectly via 153 * `Psr\Http\Message\UriInterface::__toString`. 154 * 155 * PSR-7 UriInterface treats an empty component the same as a missing component as 156 * getQuery(), getFragment() etc. always return a string. This explains the slight 157 * difference to RFC 3986 Section 5.3. 158 * 159 * Another adjustment is that the authority separator is added even when the authority is missing/empty 160 * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with 161 * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But 162 * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to 163 * that format). 164 * 165 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5.3 166 */ 167 public static function composeComponents(?string $scheme, ?string $authority, string $path, ?string $query, ?string $fragment): string 168 { 169 $uri = ''; 170 171 // weak type checks to also accept null until we can add scalar type hints 172 if ($scheme != '') { 173 $uri .= $scheme.':'; 174 } 175 176 if ($authority != '' || $scheme === 'file') { 177 $uri .= '//'.$authority; 178 } 179 180 if ($authority != '' && $path != '' && $path[0] != '/') { 181 $path = '/'.$path; 182 } 183 184 $uri .= $path; 185 186 if ($query != '') { 187 $uri .= '?'.$query; 188 } 189 190 if ($fragment != '') { 191 $uri .= '#'.$fragment; 192 } 193 194 return $uri; 195 } 196 197 /** 198 * Whether the URI has the default port of the current scheme. 199 * 200 * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used 201 * independently of the implementation. 202 */ 203 public static function isDefaultPort(UriInterface $uri): bool 204 { 205 return $uri->getPort() === null 206 || (isset(self::DEFAULT_PORTS[$uri->getScheme()]) && $uri->getPort() === self::DEFAULT_PORTS[$uri->getScheme()]); 207 } 208 209 /** 210 * Whether the URI is absolute, i.e. it has a scheme. 211 * 212 * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true 213 * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative 214 * to another URI, the base URI. Relative references can be divided into several forms: 215 * - network-path references, e.g. '//example.com/path' 216 * - absolute-path references, e.g. '/path' 217 * - relative-path references, e.g. 'subpath' 218 * 219 * @see Uri::isNetworkPathReference 220 * @see Uri::isAbsolutePathReference 221 * @see Uri::isRelativePathReference 222 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4 223 */ 224 public static function isAbsolute(UriInterface $uri): bool 225 { 226 return $uri->getScheme() !== ''; 227 } 228 229 /** 230 * Whether the URI is a network-path reference. 231 * 232 * A relative reference that begins with two slash characters is termed an network-path reference. 233 * 234 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 235 */ 236 public static function isNetworkPathReference(UriInterface $uri): bool 237 { 238 return $uri->getScheme() === '' && $uri->getAuthority() !== ''; 239 } 240 241 /** 242 * Whether the URI is a absolute-path reference. 243 * 244 * A relative reference that begins with a single slash character is termed an absolute-path reference. 245 * 246 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 247 */ 248 public static function isAbsolutePathReference(UriInterface $uri): bool 249 { 250 return $uri->getScheme() === '' 251 && $uri->getAuthority() === '' 252 && isset($uri->getPath()[0]) 253 && $uri->getPath()[0] === '/'; 254 } 255 256 /** 257 * Whether the URI is a relative-path reference. 258 * 259 * A relative reference that does not begin with a slash character is termed a relative-path reference. 260 * 261 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 262 */ 263 public static function isRelativePathReference(UriInterface $uri): bool 264 { 265 return $uri->getScheme() === '' 266 && $uri->getAuthority() === '' 267 && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/'); 268 } 269 270 /** 271 * Whether the URI is a same-document reference. 272 * 273 * A same-document reference refers to a URI that is, aside from its fragment 274 * component, identical to the base URI. When no base URI is given, only an empty 275 * URI reference (apart from its fragment) is considered a same-document reference. 276 * 277 * @param UriInterface $uri The URI to check 278 * @param UriInterface|null $base An optional base URI to compare against 279 * 280 * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.4 281 */ 282 public static function isSameDocumentReference(UriInterface $uri, UriInterface $base = null): bool 283 { 284 if ($base !== null) { 285 $uri = UriResolver::resolve($base, $uri); 286 287 return ($uri->getScheme() === $base->getScheme()) 288 && ($uri->getAuthority() === $base->getAuthority()) 289 && ($uri->getPath() === $base->getPath()) 290 && ($uri->getQuery() === $base->getQuery()); 291 } 292 293 return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === ''; 294 } 295 296 /** 297 * Creates a new URI with a specific query string value removed. 298 * 299 * Any existing query string values that exactly match the provided key are 300 * removed. 301 * 302 * @param UriInterface $uri URI to use as a base. 303 * @param string $key Query string key to remove. 304 */ 305 public static function withoutQueryValue(UriInterface $uri, string $key): UriInterface 306 { 307 $result = self::getFilteredQueryString($uri, [$key]); 308 309 return $uri->withQuery(implode('&', $result)); 310 } 311 312 /** 313 * Creates a new URI with a specific query string value. 314 * 315 * Any existing query string values that exactly match the provided key are 316 * removed and replaced with the given key value pair. 317 * 318 * A value of null will set the query string key without a value, e.g. "key" 319 * instead of "key=value". 320 * 321 * @param UriInterface $uri URI to use as a base. 322 * @param string $key Key to set. 323 * @param string|null $value Value to set 324 */ 325 public static function withQueryValue(UriInterface $uri, string $key, ?string $value): UriInterface 326 { 327 $result = self::getFilteredQueryString($uri, [$key]); 328 329 $result[] = self::generateQueryString($key, $value); 330 331 return $uri->withQuery(implode('&', $result)); 332 } 333 334 /** 335 * Creates a new URI with multiple specific query string values. 336 * 337 * It has the same behavior as withQueryValue() but for an associative array of key => value. 338 * 339 * @param UriInterface $uri URI to use as a base. 340 * @param (string|null)[] $keyValueArray Associative array of key and values 341 */ 342 public static function withQueryValues(UriInterface $uri, array $keyValueArray): UriInterface 343 { 344 $result = self::getFilteredQueryString($uri, array_keys($keyValueArray)); 345 346 foreach ($keyValueArray as $key => $value) { 347 $result[] = self::generateQueryString((string) $key, $value !== null ? (string) $value : null); 348 } 349 350 return $uri->withQuery(implode('&', $result)); 351 } 352 353 /** 354 * Creates a URI from a hash of `parse_url` components. 355 * 356 * @see https://www.php.net/manual/en/function.parse-url.php 357 * 358 * @throws MalformedUriException If the components do not form a valid URI. 359 */ 360 public static function fromParts(array $parts): UriInterface 361 { 362 $uri = new self(); 363 $uri->applyParts($parts); 364 $uri->validateState(); 365 366 return $uri; 367 } 368 369 public function getScheme(): string 370 { 371 return $this->scheme; 372 } 373 374 public function getAuthority(): string 375 { 376 $authority = $this->host; 377 if ($this->userInfo !== '') { 378 $authority = $this->userInfo.'@'.$authority; 379 } 380 381 if ($this->port !== null) { 382 $authority .= ':'.$this->port; 383 } 384 385 return $authority; 386 } 387 388 public function getUserInfo(): string 389 { 390 return $this->userInfo; 391 } 392 393 public function getHost(): string 394 { 395 return $this->host; 396 } 397 398 public function getPort(): ?int 399 { 400 return $this->port; 401 } 402 403 public function getPath(): string 404 { 405 return $this->path; 406 } 407 408 public function getQuery(): string 409 { 410 return $this->query; 411 } 412 413 public function getFragment(): string 414 { 415 return $this->fragment; 416 } 417 418 public function withScheme($scheme): UriInterface 419 { 420 $scheme = $this->filterScheme($scheme); 421 422 if ($this->scheme === $scheme) { 423 return $this; 424 } 425 426 $new = clone $this; 427 $new->scheme = $scheme; 428 $new->composedComponents = null; 429 $new->removeDefaultPort(); 430 $new->validateState(); 431 432 return $new; 433 } 434 435 public function withUserInfo($user, $password = null): UriInterface 436 { 437 $info = $this->filterUserInfoComponent($user); 438 if ($password !== null) { 439 $info .= ':'.$this->filterUserInfoComponent($password); 440 } 441 442 if ($this->userInfo === $info) { 443 return $this; 444 } 445 446 $new = clone $this; 447 $new->userInfo = $info; 448 $new->composedComponents = null; 449 $new->validateState(); 450 451 return $new; 452 } 453 454 public function withHost($host): UriInterface 455 { 456 $host = $this->filterHost($host); 457 458 if ($this->host === $host) { 459 return $this; 460 } 461 462 $new = clone $this; 463 $new->host = $host; 464 $new->composedComponents = null; 465 $new->validateState(); 466 467 return $new; 468 } 469 470 public function withPort($port): UriInterface 471 { 472 $port = $this->filterPort($port); 473 474 if ($this->port === $port) { 475 return $this; 476 } 477 478 $new = clone $this; 479 $new->port = $port; 480 $new->composedComponents = null; 481 $new->removeDefaultPort(); 482 $new->validateState(); 483 484 return $new; 485 } 486 487 public function withPath($path): UriInterface 488 { 489 $path = $this->filterPath($path); 490 491 if ($this->path === $path) { 492 return $this; 493 } 494 495 $new = clone $this; 496 $new->path = $path; 497 $new->composedComponents = null; 498 $new->validateState(); 499 500 return $new; 501 } 502 503 public function withQuery($query): UriInterface 504 { 505 $query = $this->filterQueryAndFragment($query); 506 507 if ($this->query === $query) { 508 return $this; 509 } 510 511 $new = clone $this; 512 $new->query = $query; 513 $new->composedComponents = null; 514 515 return $new; 516 } 517 518 public function withFragment($fragment): UriInterface 519 { 520 $fragment = $this->filterQueryAndFragment($fragment); 521 522 if ($this->fragment === $fragment) { 523 return $this; 524 } 525 526 $new = clone $this; 527 $new->fragment = $fragment; 528 $new->composedComponents = null; 529 530 return $new; 531 } 532 533 public function jsonSerialize(): string 534 { 535 return $this->__toString(); 536 } 537 538 /** 539 * Apply parse_url parts to a URI. 540 * 541 * @param array $parts Array of parse_url parts to apply. 542 */ 543 private function applyParts(array $parts): void 544 { 545 $this->scheme = isset($parts['scheme']) 546 ? $this->filterScheme($parts['scheme']) 547 : ''; 548 $this->userInfo = isset($parts['user']) 549 ? $this->filterUserInfoComponent($parts['user']) 550 : ''; 551 $this->host = isset($parts['host']) 552 ? $this->filterHost($parts['host']) 553 : ''; 554 $this->port = isset($parts['port']) 555 ? $this->filterPort($parts['port']) 556 : null; 557 $this->path = isset($parts['path']) 558 ? $this->filterPath($parts['path']) 559 : ''; 560 $this->query = isset($parts['query']) 561 ? $this->filterQueryAndFragment($parts['query']) 562 : ''; 563 $this->fragment = isset($parts['fragment']) 564 ? $this->filterQueryAndFragment($parts['fragment']) 565 : ''; 566 if (isset($parts['pass'])) { 567 $this->userInfo .= ':'.$this->filterUserInfoComponent($parts['pass']); 568 } 569 570 $this->removeDefaultPort(); 571 } 572 573 /** 574 * @param mixed $scheme 575 * 576 * @throws \InvalidArgumentException If the scheme is invalid. 577 */ 578 private function filterScheme($scheme): string 579 { 580 if (!is_string($scheme)) { 581 throw new \InvalidArgumentException('Scheme must be a string'); 582 } 583 584 return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'); 585 } 586 587 /** 588 * @param mixed $component 589 * 590 * @throws \InvalidArgumentException If the user info is invalid. 591 */ 592 private function filterUserInfoComponent($component): string 593 { 594 if (!is_string($component)) { 595 throw new \InvalidArgumentException('User info must be a string'); 596 } 597 598 return preg_replace_callback( 599 '/(?:[^%'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.']+|%(?![A-Fa-f0-9]{2}))/', 600 [$this, 'rawurlencodeMatchZero'], 601 $component 602 ); 603 } 604 605 /** 606 * @param mixed $host 607 * 608 * @throws \InvalidArgumentException If the host is invalid. 609 */ 610 private function filterHost($host): string 611 { 612 if (!is_string($host)) { 613 throw new \InvalidArgumentException('Host must be a string'); 614 } 615 616 return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'); 617 } 618 619 /** 620 * @param mixed $port 621 * 622 * @throws \InvalidArgumentException If the port is invalid. 623 */ 624 private function filterPort($port): ?int 625 { 626 if ($port === null) { 627 return null; 628 } 629 630 $port = (int) $port; 631 if (0 > $port || 0xFFFF < $port) { 632 throw new \InvalidArgumentException( 633 sprintf('Invalid port: %d. Must be between 0 and 65535', $port) 634 ); 635 } 636 637 return $port; 638 } 639 640 /** 641 * @param (string|int)[] $keys 642 * 643 * @return string[] 644 */ 645 private static function getFilteredQueryString(UriInterface $uri, array $keys): array 646 { 647 $current = $uri->getQuery(); 648 649 if ($current === '') { 650 return []; 651 } 652 653 $decodedKeys = array_map(function ($k): string { 654 return rawurldecode((string) $k); 655 }, $keys); 656 657 return array_filter(explode('&', $current), function ($part) use ($decodedKeys) { 658 return !in_array(rawurldecode(explode('=', $part)[0]), $decodedKeys, true); 659 }); 660 } 661 662 private static function generateQueryString(string $key, ?string $value): string 663 { 664 // Query string separators ("=", "&") within the key or value need to be encoded 665 // (while preventing double-encoding) before setting the query string. All other 666 // chars that need percent-encoding will be encoded by withQuery(). 667 $queryString = strtr($key, self::QUERY_SEPARATORS_REPLACEMENT); 668 669 if ($value !== null) { 670 $queryString .= '='.strtr($value, self::QUERY_SEPARATORS_REPLACEMENT); 671 } 672 673 return $queryString; 674 } 675 676 private function removeDefaultPort(): void 677 { 678 if ($this->port !== null && self::isDefaultPort($this)) { 679 $this->port = null; 680 } 681 } 682 683 /** 684 * Filters the path of a URI 685 * 686 * @param mixed $path 687 * 688 * @throws \InvalidArgumentException If the path is invalid. 689 */ 690 private function filterPath($path): string 691 { 692 if (!is_string($path)) { 693 throw new \InvalidArgumentException('Path must be a string'); 694 } 695 696 return preg_replace_callback( 697 '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/]++|%(?![A-Fa-f0-9]{2}))/', 698 [$this, 'rawurlencodeMatchZero'], 699 $path 700 ); 701 } 702 703 /** 704 * Filters the query string or fragment of a URI. 705 * 706 * @param mixed $str 707 * 708 * @throws \InvalidArgumentException If the query or fragment is invalid. 709 */ 710 private function filterQueryAndFragment($str): string 711 { 712 if (!is_string($str)) { 713 throw new \InvalidArgumentException('Query and fragment must be a string'); 714 } 715 716 return preg_replace_callback( 717 '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/\?]++|%(?![A-Fa-f0-9]{2}))/', 718 [$this, 'rawurlencodeMatchZero'], 719 $str 720 ); 721 } 722 723 private function rawurlencodeMatchZero(array $match): string 724 { 725 return rawurlencode($match[0]); 726 } 727 728 private function validateState(): void 729 { 730 if ($this->host === '' && ($this->scheme === 'http' || $this->scheme === 'https')) { 731 $this->host = self::HTTP_DEFAULT_HOST; 732 } 733 734 if ($this->getAuthority() === '') { 735 if (0 === strpos($this->path, '//')) { 736 throw new MalformedUriException('The path of a URI without an authority must not start with two slashes "//"'); 737 } 738 if ($this->scheme === '' && false !== strpos(explode('/', $this->path, 2)[0], ':')) { 739 throw new MalformedUriException('A relative URI must not have a path beginning with a segment containing a colon'); 740 } 741 } 742 } 743} 744