1<?php
2
3declare(strict_types=1);
4
5namespace GuzzleHttp\Psr7;
6
7use GuzzleHttp\Psr7\Exception\MalformedUriException;
8use Psr\Http\Message\UriInterface;
9
10/**
11 * PSR-7 URI implementation.
12 *
13 * @author Michael Dowling
14 * @author Tobias Schultze
15 * @author Matthew Weier O'Phinney
16 */
17class Uri implements UriInterface, \JsonSerializable
18{
19    /**
20     * Absolute http and https URIs require a host per RFC 7230 Section 2.7
21     * but in generic URIs the host can be empty. So for http(s) URIs
22     * we apply this default host when no host is given yet to form a
23     * valid URI.
24     */
25    private const HTTP_DEFAULT_HOST = 'localhost';
26
27    private const DEFAULT_PORTS = [
28        'http' => 80,
29        'https' => 443,
30        'ftp' => 21,
31        'gopher' => 70,
32        'nntp' => 119,
33        'news' => 119,
34        'telnet' => 23,
35        'tn3270' => 23,
36        'imap' => 143,
37        'pop' => 110,
38        'ldap' => 389,
39    ];
40
41    /**
42     * Unreserved characters for use in a regex.
43     *
44     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
45     */
46    private const CHAR_UNRESERVED = 'a-zA-Z0-9_\-\.~';
47
48    /**
49     * Sub-delims for use in a regex.
50     *
51     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
52     */
53    private const CHAR_SUB_DELIMS = '!\$&\'\(\)\*\+,;=';
54    private const QUERY_SEPARATORS_REPLACEMENT = ['=' => '%3D', '&' => '%26'];
55
56    /** @var string Uri scheme. */
57    private $scheme = '';
58
59    /** @var string Uri user info. */
60    private $userInfo = '';
61
62    /** @var string Uri host. */
63    private $host = '';
64
65    /** @var int|null Uri port. */
66    private $port;
67
68    /** @var string Uri path. */
69    private $path = '';
70
71    /** @var string Uri query string. */
72    private $query = '';
73
74    /** @var string Uri fragment. */
75    private $fragment = '';
76
77    /** @var string|null String representation */
78    private $composedComponents;
79
80    public function __construct(string $uri = '')
81    {
82        if ($uri !== '') {
83            $parts = self::parse($uri);
84            if ($parts === false) {
85                throw new MalformedUriException("Unable to parse URI: $uri");
86            }
87            $this->applyParts($parts);
88        }
89    }
90
91    /**
92     * UTF-8 aware \parse_url() replacement.
93     *
94     * The internal function produces broken output for non ASCII domain names
95     * (IDN) when used with locales other than "C".
96     *
97     * On the other hand, cURL understands IDN correctly only when UTF-8 locale
98     * is configured ("C.UTF-8", "en_US.UTF-8", etc.).
99     *
100     * @see https://bugs.php.net/bug.php?id=52923
101     * @see https://www.php.net/manual/en/function.parse-url.php#114817
102     * @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING
103     *
104     * @return array|false
105     */
106    private static function parse(string $url)
107    {
108        // If IPv6
109        $prefix = '';
110        if (preg_match('%^(.*://\[[0-9:a-f]+\])(.*?)$%', $url, $matches)) {
111            /** @var array{0:string, 1:string, 2:string} $matches */
112            $prefix = $matches[1];
113            $url = $matches[2];
114        }
115
116        /** @var string */
117        $encodedUrl = preg_replace_callback(
118            '%[^:/@?&=#]+%usD',
119            static function ($matches) {
120                return urlencode($matches[0]);
121            },
122            $url
123        );
124
125        $result = parse_url($prefix.$encodedUrl);
126
127        if ($result === false) {
128            return false;
129        }
130
131        return array_map('urldecode', $result);
132    }
133
134    public function __toString(): string
135    {
136        if ($this->composedComponents === null) {
137            $this->composedComponents = self::composeComponents(
138                $this->scheme,
139                $this->getAuthority(),
140                $this->path,
141                $this->query,
142                $this->fragment
143            );
144        }
145
146        return $this->composedComponents;
147    }
148
149    /**
150     * Composes a URI reference string from its various components.
151     *
152     * Usually this method does not need to be called manually but instead is used indirectly via
153     * `Psr\Http\Message\UriInterface::__toString`.
154     *
155     * PSR-7 UriInterface treats an empty component the same as a missing component as
156     * getQuery(), getFragment() etc. always return a string. This explains the slight
157     * difference to RFC 3986 Section 5.3.
158     *
159     * Another adjustment is that the authority separator is added even when the authority is missing/empty
160     * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with
161     * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But
162     * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to
163     * that format).
164     *
165     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-5.3
166     */
167    public static function composeComponents(?string $scheme, ?string $authority, string $path, ?string $query, ?string $fragment): string
168    {
169        $uri = '';
170
171        // weak type checks to also accept null until we can add scalar type hints
172        if ($scheme != '') {
173            $uri .= $scheme.':';
174        }
175
176        if ($authority != '' || $scheme === 'file') {
177            $uri .= '//'.$authority;
178        }
179
180        if ($authority != '' && $path != '' && $path[0] != '/') {
181            $path = '/'.$path;
182        }
183
184        $uri .= $path;
185
186        if ($query != '') {
187            $uri .= '?'.$query;
188        }
189
190        if ($fragment != '') {
191            $uri .= '#'.$fragment;
192        }
193
194        return $uri;
195    }
196
197    /**
198     * Whether the URI has the default port of the current scheme.
199     *
200     * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used
201     * independently of the implementation.
202     */
203    public static function isDefaultPort(UriInterface $uri): bool
204    {
205        return $uri->getPort() === null
206            || (isset(self::DEFAULT_PORTS[$uri->getScheme()]) && $uri->getPort() === self::DEFAULT_PORTS[$uri->getScheme()]);
207    }
208
209    /**
210     * Whether the URI is absolute, i.e. it has a scheme.
211     *
212     * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true
213     * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative
214     * to another URI, the base URI. Relative references can be divided into several forms:
215     * - network-path references, e.g. '//example.com/path'
216     * - absolute-path references, e.g. '/path'
217     * - relative-path references, e.g. 'subpath'
218     *
219     * @see Uri::isNetworkPathReference
220     * @see Uri::isAbsolutePathReference
221     * @see Uri::isRelativePathReference
222     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4
223     */
224    public static function isAbsolute(UriInterface $uri): bool
225    {
226        return $uri->getScheme() !== '';
227    }
228
229    /**
230     * Whether the URI is a network-path reference.
231     *
232     * A relative reference that begins with two slash characters is termed an network-path reference.
233     *
234     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2
235     */
236    public static function isNetworkPathReference(UriInterface $uri): bool
237    {
238        return $uri->getScheme() === '' && $uri->getAuthority() !== '';
239    }
240
241    /**
242     * Whether the URI is a absolute-path reference.
243     *
244     * A relative reference that begins with a single slash character is termed an absolute-path reference.
245     *
246     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2
247     */
248    public static function isAbsolutePathReference(UriInterface $uri): bool
249    {
250        return $uri->getScheme() === ''
251            && $uri->getAuthority() === ''
252            && isset($uri->getPath()[0])
253            && $uri->getPath()[0] === '/';
254    }
255
256    /**
257     * Whether the URI is a relative-path reference.
258     *
259     * A relative reference that does not begin with a slash character is termed a relative-path reference.
260     *
261     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.2
262     */
263    public static function isRelativePathReference(UriInterface $uri): bool
264    {
265        return $uri->getScheme() === ''
266            && $uri->getAuthority() === ''
267            && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/');
268    }
269
270    /**
271     * Whether the URI is a same-document reference.
272     *
273     * A same-document reference refers to a URI that is, aside from its fragment
274     * component, identical to the base URI. When no base URI is given, only an empty
275     * URI reference (apart from its fragment) is considered a same-document reference.
276     *
277     * @param UriInterface      $uri  The URI to check
278     * @param UriInterface|null $base An optional base URI to compare against
279     *
280     * @see https://datatracker.ietf.org/doc/html/rfc3986#section-4.4
281     */
282    public static function isSameDocumentReference(UriInterface $uri, UriInterface $base = null): bool
283    {
284        if ($base !== null) {
285            $uri = UriResolver::resolve($base, $uri);
286
287            return ($uri->getScheme() === $base->getScheme())
288                && ($uri->getAuthority() === $base->getAuthority())
289                && ($uri->getPath() === $base->getPath())
290                && ($uri->getQuery() === $base->getQuery());
291        }
292
293        return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === '';
294    }
295
296    /**
297     * Creates a new URI with a specific query string value removed.
298     *
299     * Any existing query string values that exactly match the provided key are
300     * removed.
301     *
302     * @param UriInterface $uri URI to use as a base.
303     * @param string       $key Query string key to remove.
304     */
305    public static function withoutQueryValue(UriInterface $uri, string $key): UriInterface
306    {
307        $result = self::getFilteredQueryString($uri, [$key]);
308
309        return $uri->withQuery(implode('&', $result));
310    }
311
312    /**
313     * Creates a new URI with a specific query string value.
314     *
315     * Any existing query string values that exactly match the provided key are
316     * removed and replaced with the given key value pair.
317     *
318     * A value of null will set the query string key without a value, e.g. "key"
319     * instead of "key=value".
320     *
321     * @param UriInterface $uri   URI to use as a base.
322     * @param string       $key   Key to set.
323     * @param string|null  $value Value to set
324     */
325    public static function withQueryValue(UriInterface $uri, string $key, ?string $value): UriInterface
326    {
327        $result = self::getFilteredQueryString($uri, [$key]);
328
329        $result[] = self::generateQueryString($key, $value);
330
331        return $uri->withQuery(implode('&', $result));
332    }
333
334    /**
335     * Creates a new URI with multiple specific query string values.
336     *
337     * It has the same behavior as withQueryValue() but for an associative array of key => value.
338     *
339     * @param UriInterface    $uri           URI to use as a base.
340     * @param (string|null)[] $keyValueArray Associative array of key and values
341     */
342    public static function withQueryValues(UriInterface $uri, array $keyValueArray): UriInterface
343    {
344        $result = self::getFilteredQueryString($uri, array_keys($keyValueArray));
345
346        foreach ($keyValueArray as $key => $value) {
347            $result[] = self::generateQueryString((string) $key, $value !== null ? (string) $value : null);
348        }
349
350        return $uri->withQuery(implode('&', $result));
351    }
352
353    /**
354     * Creates a URI from a hash of `parse_url` components.
355     *
356     * @see https://www.php.net/manual/en/function.parse-url.php
357     *
358     * @throws MalformedUriException If the components do not form a valid URI.
359     */
360    public static function fromParts(array $parts): UriInterface
361    {
362        $uri = new self();
363        $uri->applyParts($parts);
364        $uri->validateState();
365
366        return $uri;
367    }
368
369    public function getScheme(): string
370    {
371        return $this->scheme;
372    }
373
374    public function getAuthority(): string
375    {
376        $authority = $this->host;
377        if ($this->userInfo !== '') {
378            $authority = $this->userInfo.'@'.$authority;
379        }
380
381        if ($this->port !== null) {
382            $authority .= ':'.$this->port;
383        }
384
385        return $authority;
386    }
387
388    public function getUserInfo(): string
389    {
390        return $this->userInfo;
391    }
392
393    public function getHost(): string
394    {
395        return $this->host;
396    }
397
398    public function getPort(): ?int
399    {
400        return $this->port;
401    }
402
403    public function getPath(): string
404    {
405        return $this->path;
406    }
407
408    public function getQuery(): string
409    {
410        return $this->query;
411    }
412
413    public function getFragment(): string
414    {
415        return $this->fragment;
416    }
417
418    public function withScheme($scheme): UriInterface
419    {
420        $scheme = $this->filterScheme($scheme);
421
422        if ($this->scheme === $scheme) {
423            return $this;
424        }
425
426        $new = clone $this;
427        $new->scheme = $scheme;
428        $new->composedComponents = null;
429        $new->removeDefaultPort();
430        $new->validateState();
431
432        return $new;
433    }
434
435    public function withUserInfo($user, $password = null): UriInterface
436    {
437        $info = $this->filterUserInfoComponent($user);
438        if ($password !== null) {
439            $info .= ':'.$this->filterUserInfoComponent($password);
440        }
441
442        if ($this->userInfo === $info) {
443            return $this;
444        }
445
446        $new = clone $this;
447        $new->userInfo = $info;
448        $new->composedComponents = null;
449        $new->validateState();
450
451        return $new;
452    }
453
454    public function withHost($host): UriInterface
455    {
456        $host = $this->filterHost($host);
457
458        if ($this->host === $host) {
459            return $this;
460        }
461
462        $new = clone $this;
463        $new->host = $host;
464        $new->composedComponents = null;
465        $new->validateState();
466
467        return $new;
468    }
469
470    public function withPort($port): UriInterface
471    {
472        $port = $this->filterPort($port);
473
474        if ($this->port === $port) {
475            return $this;
476        }
477
478        $new = clone $this;
479        $new->port = $port;
480        $new->composedComponents = null;
481        $new->removeDefaultPort();
482        $new->validateState();
483
484        return $new;
485    }
486
487    public function withPath($path): UriInterface
488    {
489        $path = $this->filterPath($path);
490
491        if ($this->path === $path) {
492            return $this;
493        }
494
495        $new = clone $this;
496        $new->path = $path;
497        $new->composedComponents = null;
498        $new->validateState();
499
500        return $new;
501    }
502
503    public function withQuery($query): UriInterface
504    {
505        $query = $this->filterQueryAndFragment($query);
506
507        if ($this->query === $query) {
508            return $this;
509        }
510
511        $new = clone $this;
512        $new->query = $query;
513        $new->composedComponents = null;
514
515        return $new;
516    }
517
518    public function withFragment($fragment): UriInterface
519    {
520        $fragment = $this->filterQueryAndFragment($fragment);
521
522        if ($this->fragment === $fragment) {
523            return $this;
524        }
525
526        $new = clone $this;
527        $new->fragment = $fragment;
528        $new->composedComponents = null;
529
530        return $new;
531    }
532
533    public function jsonSerialize(): string
534    {
535        return $this->__toString();
536    }
537
538    /**
539     * Apply parse_url parts to a URI.
540     *
541     * @param array $parts Array of parse_url parts to apply.
542     */
543    private function applyParts(array $parts): void
544    {
545        $this->scheme = isset($parts['scheme'])
546            ? $this->filterScheme($parts['scheme'])
547            : '';
548        $this->userInfo = isset($parts['user'])
549            ? $this->filterUserInfoComponent($parts['user'])
550            : '';
551        $this->host = isset($parts['host'])
552            ? $this->filterHost($parts['host'])
553            : '';
554        $this->port = isset($parts['port'])
555            ? $this->filterPort($parts['port'])
556            : null;
557        $this->path = isset($parts['path'])
558            ? $this->filterPath($parts['path'])
559            : '';
560        $this->query = isset($parts['query'])
561            ? $this->filterQueryAndFragment($parts['query'])
562            : '';
563        $this->fragment = isset($parts['fragment'])
564            ? $this->filterQueryAndFragment($parts['fragment'])
565            : '';
566        if (isset($parts['pass'])) {
567            $this->userInfo .= ':'.$this->filterUserInfoComponent($parts['pass']);
568        }
569
570        $this->removeDefaultPort();
571    }
572
573    /**
574     * @param mixed $scheme
575     *
576     * @throws \InvalidArgumentException If the scheme is invalid.
577     */
578    private function filterScheme($scheme): string
579    {
580        if (!is_string($scheme)) {
581            throw new \InvalidArgumentException('Scheme must be a string');
582        }
583
584        return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
585    }
586
587    /**
588     * @param mixed $component
589     *
590     * @throws \InvalidArgumentException If the user info is invalid.
591     */
592    private function filterUserInfoComponent($component): string
593    {
594        if (!is_string($component)) {
595            throw new \InvalidArgumentException('User info must be a string');
596        }
597
598        return preg_replace_callback(
599            '/(?:[^%'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.']+|%(?![A-Fa-f0-9]{2}))/',
600            [$this, 'rawurlencodeMatchZero'],
601            $component
602        );
603    }
604
605    /**
606     * @param mixed $host
607     *
608     * @throws \InvalidArgumentException If the host is invalid.
609     */
610    private function filterHost($host): string
611    {
612        if (!is_string($host)) {
613            throw new \InvalidArgumentException('Host must be a string');
614        }
615
616        return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
617    }
618
619    /**
620     * @param mixed $port
621     *
622     * @throws \InvalidArgumentException If the port is invalid.
623     */
624    private function filterPort($port): ?int
625    {
626        if ($port === null) {
627            return null;
628        }
629
630        $port = (int) $port;
631        if (0 > $port || 0xFFFF < $port) {
632            throw new \InvalidArgumentException(
633                sprintf('Invalid port: %d. Must be between 0 and 65535', $port)
634            );
635        }
636
637        return $port;
638    }
639
640    /**
641     * @param (string|int)[] $keys
642     *
643     * @return string[]
644     */
645    private static function getFilteredQueryString(UriInterface $uri, array $keys): array
646    {
647        $current = $uri->getQuery();
648
649        if ($current === '') {
650            return [];
651        }
652
653        $decodedKeys = array_map(function ($k): string {
654            return rawurldecode((string) $k);
655        }, $keys);
656
657        return array_filter(explode('&', $current), function ($part) use ($decodedKeys) {
658            return !in_array(rawurldecode(explode('=', $part)[0]), $decodedKeys, true);
659        });
660    }
661
662    private static function generateQueryString(string $key, ?string $value): string
663    {
664        // Query string separators ("=", "&") within the key or value need to be encoded
665        // (while preventing double-encoding) before setting the query string. All other
666        // chars that need percent-encoding will be encoded by withQuery().
667        $queryString = strtr($key, self::QUERY_SEPARATORS_REPLACEMENT);
668
669        if ($value !== null) {
670            $queryString .= '='.strtr($value, self::QUERY_SEPARATORS_REPLACEMENT);
671        }
672
673        return $queryString;
674    }
675
676    private function removeDefaultPort(): void
677    {
678        if ($this->port !== null && self::isDefaultPort($this)) {
679            $this->port = null;
680        }
681    }
682
683    /**
684     * Filters the path of a URI
685     *
686     * @param mixed $path
687     *
688     * @throws \InvalidArgumentException If the path is invalid.
689     */
690    private function filterPath($path): string
691    {
692        if (!is_string($path)) {
693            throw new \InvalidArgumentException('Path must be a string');
694        }
695
696        return preg_replace_callback(
697            '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/]++|%(?![A-Fa-f0-9]{2}))/',
698            [$this, 'rawurlencodeMatchZero'],
699            $path
700        );
701    }
702
703    /**
704     * Filters the query string or fragment of a URI.
705     *
706     * @param mixed $str
707     *
708     * @throws \InvalidArgumentException If the query or fragment is invalid.
709     */
710    private function filterQueryAndFragment($str): string
711    {
712        if (!is_string($str)) {
713            throw new \InvalidArgumentException('Query and fragment must be a string');
714        }
715
716        return preg_replace_callback(
717            '/(?:[^'.self::CHAR_UNRESERVED.self::CHAR_SUB_DELIMS.'%:@\/\?]++|%(?![A-Fa-f0-9]{2}))/',
718            [$this, 'rawurlencodeMatchZero'],
719            $str
720        );
721    }
722
723    private function rawurlencodeMatchZero(array $match): string
724    {
725        return rawurlencode($match[0]);
726    }
727
728    private function validateState(): void
729    {
730        if ($this->host === '' && ($this->scheme === 'http' || $this->scheme === 'https')) {
731            $this->host = self::HTTP_DEFAULT_HOST;
732        }
733
734        if ($this->getAuthority() === '') {
735            if (0 === strpos($this->path, '//')) {
736                throw new MalformedUriException('The path of a URI without an authority must not start with two slashes "//"');
737            }
738            if ($this->scheme === '' && false !== strpos(explode('/', $this->path, 2)[0], ':')) {
739                throw new MalformedUriException('A relative URI must not have a path beginning with a segment containing a colon');
740            }
741        }
742    }
743}
744