xref: /dokuwiki/vendor/simplepie/simplepie/src/IRI.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-FileCopyrightText: 2008 Steve Minutillo
5// SPDX-License-Identifier: BSD-3-Clause
6
7declare(strict_types=1);
8
9namespace SimplePie;
10
11/**
12 * IRI parser/serialiser/normaliser
13 *
14 * @property ?string $scheme
15 * @property ?string $userinfo
16 * @property ?string $host
17 * @property ?int $port
18 * @property-write int|string|null $port
19 * @property ?string $authority
20 * @property string $path
21 * @property ?string $query
22 * @property ?string $fragment
23 */
24class IRI
25{
26    /**
27     * Scheme
28     *
29     * @var ?string
30     */
31    protected $scheme = null;
32
33    /**
34     * User Information
35     *
36     * @var ?string
37     */
38    protected $iuserinfo = null;
39
40    /**
41     * ihost
42     *
43     * @var ?string
44     */
45    protected $ihost = null;
46
47    /**
48     * Port
49     *
50     * @var ?int
51     */
52    protected $port = null;
53
54    /**
55     * ipath
56     *
57     * @var string
58     */
59    protected $ipath = '';
60
61    /**
62     * iquery
63     *
64     * @var ?string
65     */
66    protected $iquery = null;
67
68    /**
69     * ifragment
70     *
71     * @var ?string
72     */
73    protected $ifragment = null;
74
75    /**
76     * Normalization database
77     *
78     * Each key is the scheme, each value is an array with each key as the IRI
79     * part and value as the default value for that part.
80     *
81     * @var array<string, array<string, mixed>>
82     */
83    protected $normalization = [
84        'acap' => [
85            'port' => 674
86        ],
87        'dict' => [
88            'port' => 2628
89        ],
90        'file' => [
91            'ihost' => 'localhost'
92        ],
93        'http' => [
94            'port' => 80,
95            'ipath' => '/'
96        ],
97        'https' => [
98            'port' => 443,
99            'ipath' => '/'
100        ],
101    ];
102
103    /**
104     * Return the entire IRI when you try and read the object as a string
105     *
106     * @return string
107     */
108    public function __toString()
109    {
110        return (string) $this->get_iri();
111    }
112
113    /**
114     * Overload __set() to provide access via properties
115     *
116     * @param string $name Property name
117     * @param mixed $value Property value
118     * @return void
119     */
120    public function __set(string $name, $value)
121    {
122        $callable = [$this, 'set_' . $name];
123        if (is_callable($callable)) {
124            call_user_func($callable, $value);
125        } elseif (
126            $name === 'iauthority'
127            || $name === 'iuserinfo'
128            || $name === 'ihost'
129            || $name === 'ipath'
130            || $name === 'iquery'
131            || $name === 'ifragment'
132        ) {
133            call_user_func([$this, 'set_' . substr($name, 1)], $value);
134        }
135    }
136
137    /**
138     * Overload __get() to provide access via properties
139     *
140     * @param string $name Property name
141     * @return mixed
142     */
143    public function __get(string $name)
144    {
145        // isset() returns false for null, we don't want to do that
146        // Also why we use array_key_exists below instead of isset()
147        $props = get_object_vars($this);
148
149        if (
150            $name === 'iri' ||
151            $name === 'uri' ||
152            $name === 'iauthority' ||
153            $name === 'authority'
154        ) {
155            $return = $this->{"get_$name"}();
156        } elseif (array_key_exists($name, $props)) {
157            $return = $this->$name;
158        }
159        // host -> ihost
160        elseif (array_key_exists($prop = 'i' . $name, $props)) {
161            $name = $prop;
162            $return = $this->$prop;
163        }
164        // ischeme -> scheme
165        elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
166            $name = $prop;
167            $return = $this->$prop;
168        } else {
169            trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
170            $return = null;
171        }
172
173        if ($return === null && isset($this->normalization[$this->scheme][$name])) {
174            return $this->normalization[$this->scheme][$name];
175        }
176
177        return $return;
178    }
179
180    /**
181     * Overload __isset() to provide access via properties
182     *
183     * @param string $name Property name
184     * @return bool
185     */
186    public function __isset(string $name)
187    {
188        return method_exists($this, 'get_' . $name) || isset($this->$name);
189    }
190
191    /**
192     * Overload __unset() to provide access via properties
193     *
194     * @param string $name Property name
195     * @return void
196     */
197    public function __unset(string $name)
198    {
199        $callable = [$this, 'set_' . $name];
200        if (is_callable($callable)) {
201            call_user_func($callable, '');
202        }
203    }
204
205    /**
206     * Create a new IRI object, from a specified string
207     *
208     * @param string|null $iri
209     */
210    public function __construct(?string $iri = null)
211    {
212        $this->set_iri($iri);
213    }
214
215    /**
216     * Clean up
217     * @return void
218     */
219    public function __destruct()
220    {
221        $this->set_iri(null, true);
222        $this->set_path(null, true);
223        $this->set_authority(null, true);
224    }
225
226    /**
227     * Create a new IRI object by resolving a relative IRI
228     *
229     * Returns false if $base is not absolute, otherwise an IRI.
230     *
231     * @param IRI|string $base (Absolute) Base IRI
232     * @param IRI|string $relative Relative IRI
233     * @return IRI|false
234     */
235    public static function absolutize($base, $relative)
236    {
237        if (!($relative instanceof IRI)) {
238            $relative = new IRI($relative);
239        }
240        if (!$relative->is_valid()) {
241            return false;
242        } elseif ($relative->scheme !== null) {
243            return clone $relative;
244        } else {
245            if (!($base instanceof IRI)) {
246                $base = new IRI($base);
247            }
248            if ($base->scheme !== null && $base->is_valid()) {
249                if ($relative->get_iri() !== '') {
250                    if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
251                        $target = clone $relative;
252                        $target->scheme = $base->scheme;
253                    } else {
254                        $target = new IRI();
255                        $target->scheme = $base->scheme;
256                        $target->iuserinfo = $base->iuserinfo;
257                        $target->ihost = $base->ihost;
258                        $target->port = $base->port;
259                        if ($relative->ipath !== '') {
260                            if ($relative->ipath[0] === '/') {
261                                $target->ipath = $relative->ipath;
262                            } elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
263                                $target->ipath = '/' . $relative->ipath;
264                            } elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
265                                $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
266                            } else {
267                                $target->ipath = $relative->ipath;
268                            }
269                            $target->ipath = $target->remove_dot_segments($target->ipath);
270                            $target->iquery = $relative->iquery;
271                        } else {
272                            $target->ipath = $base->ipath;
273                            if ($relative->iquery !== null) {
274                                $target->iquery = $relative->iquery;
275                            } elseif ($base->iquery !== null) {
276                                $target->iquery = $base->iquery;
277                            }
278                        }
279                        $target->ifragment = $relative->ifragment;
280                    }
281                } else {
282                    $target = clone $base;
283                    $target->ifragment = null;
284                }
285                $target->scheme_normalization();
286                return $target;
287            }
288
289            return false;
290        }
291    }
292
293    /**
294     * Parse an IRI into scheme/authority/path/query/fragment segments
295     *
296     * @param string $iri
297     * @return array{
298     *   scheme: string|null,
299     *   authority: string|null,
300     *   path: string,
301     *   query: string|null,
302     *   fragment: string|null,
303     * }|false
304     */
305    protected function parse_iri(string $iri)
306    {
307        $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
308        if (preg_match('/^(?:(?P<scheme>[^:\/?#]+):)?(:?\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$/', $iri, $match, \PREG_UNMATCHED_AS_NULL)) {
309            // TODO: Remove once we require PHP ≥ 7.4.
310            $match['query'] = $match['query'] ?? null;
311            $match['fragment'] = $match['fragment'] ?? null;
312            return $match;
313        }
314
315        // This can occur when a paragraph is accidentally parsed as a URI
316        return false;
317    }
318
319    /**
320     * Remove dot segments from a path
321     *
322     * @param string $input
323     * @return string
324     */
325    protected function remove_dot_segments(string $input)
326    {
327        $output = '';
328        while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
329            // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
330            if (strpos($input, '../') === 0) {
331                $input = substr($input, 3);
332            } elseif (strpos($input, './') === 0) {
333                $input = substr($input, 2);
334            }
335            // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
336            elseif (strpos($input, '/./') === 0) {
337                $input = substr($input, 2);
338            } elseif ($input === '/.') {
339                $input = '/';
340            }
341            // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
342            elseif (strpos($input, '/../') === 0) {
343                $input = substr($input, 3);
344                $output = substr_replace($output, '', intval(strrpos($output, '/')));
345            } elseif ($input === '/..') {
346                $input = '/';
347                $output = substr_replace($output, '', intval(strrpos($output, '/')));
348            }
349            // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
350            elseif ($input === '.' || $input === '..') {
351                $input = '';
352            }
353            // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
354            elseif (($pos = strpos($input, '/', 1)) !== false) {
355                $output .= substr($input, 0, $pos);
356                $input = substr_replace($input, '', 0, $pos);
357            } else {
358                $output .= $input;
359                $input = '';
360            }
361        }
362        return $output . $input;
363    }
364
365    /**
366     * Replace invalid character with percent encoding
367     *
368     * @param string $string Input string
369     * @param string $extra_chars Valid characters not in iunreserved or
370     *                            iprivate (this is ASCII-only)
371     * @param bool $iprivate Allow iprivate
372     * @return string
373     */
374    protected function replace_invalid_with_pct_encoding(string $string, string $extra_chars, bool $iprivate = false)
375    {
376        // Normalize as many pct-encoded sections as possible
377        $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', [$this, 'remove_iunreserved_percent_encoded'], $string);
378        \assert(\is_string($string), "For PHPStan: Should not occur, the regex is valid");
379
380        // Replace invalid percent characters
381        $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
382        \assert(\is_string($string), "For PHPStan: Should not occur, the regex is valid");
383
384        // Add unreserved and % to $extra_chars (the latter is safe because all
385        // pct-encoded sections are now valid).
386        $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
387
388        // Now replace any bytes that aren't allowed with their pct-encoded versions
389        $position = 0;
390        $strlen = strlen($string);
391        while (($position += strspn($string, $extra_chars, $position)) < $strlen) {
392            $value = ord($string[$position]);
393            $character = 0;
394
395            // Start position
396            $start = $position;
397
398            // By default we are valid
399            $valid = true;
400
401            // No one byte sequences are valid due to the while.
402            // Two byte sequence:
403            if (($value & 0xE0) === 0xC0) {
404                $character = ($value & 0x1F) << 6;
405                $length = 2;
406                $remaining = 1;
407            }
408            // Three byte sequence:
409            elseif (($value & 0xF0) === 0xE0) {
410                $character = ($value & 0x0F) << 12;
411                $length = 3;
412                $remaining = 2;
413            }
414            // Four byte sequence:
415            elseif (($value & 0xF8) === 0xF0) {
416                $character = ($value & 0x07) << 18;
417                $length = 4;
418                $remaining = 3;
419            }
420            // Invalid byte:
421            else {
422                $valid = false;
423                $length = 1;
424                $remaining = 0;
425            }
426
427            if ($remaining) {
428                if ($position + $length <= $strlen) {
429                    for ($position++; $remaining; $position++) {
430                        $value = ord($string[$position]);
431
432                        // Check that the byte is valid, then add it to the character:
433                        if (($value & 0xC0) === 0x80) {
434                            $character |= ($value & 0x3F) << (--$remaining * 6);
435                        }
436                        // If it is invalid, count the sequence as invalid and reprocess the current byte:
437                        else {
438                            $valid = false;
439                            $position--;
440                            break;
441                        }
442                    }
443                } else {
444                    $position = $strlen - 1;
445                    $valid = false;
446                }
447            }
448
449            // Percent encode anything invalid or not in ucschar
450            if (
451                // Invalid sequences
452                !$valid
453                // Non-shortest form sequences are invalid
454                || $length > 1 && $character <= 0x7F
455                || $length > 2 && $character <= 0x7FF
456                || $length > 3 && $character <= 0xFFFF
457                // Outside of range of ucschar codepoints
458                // Noncharacters
459                || ($character & 0xFFFE) === 0xFFFE
460                || $character >= 0xFDD0 && $character <= 0xFDEF
461                || (
462                    // Everything else not in ucschar
463                    $character > 0xD7FF && $character < 0xF900
464                    || $character < 0xA0
465                    || $character > 0xEFFFD
466                )
467                && (
468                    // Everything not in iprivate, if it applies
469                    !$iprivate
470                    || $character < 0xE000
471                    || $character > 0x10FFFD
472                )
473            ) {
474                // If we were a character, pretend we weren't, but rather an error.
475                if ($valid) {
476                    $position--;
477                }
478
479                for ($j = $start; $j <= $position; $j++) {
480                    $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
481                    $j += 2;
482                    $position += 2;
483                    $strlen += 2;
484                }
485            }
486        }
487
488        return $string;
489    }
490
491    /**
492     * Callback function for preg_replace_callback.
493     *
494     * Removes sequences of percent encoded bytes that represent UTF-8
495     * encoded characters in iunreserved
496     *
497     * @param array{string} $match PCRE match, a capture group #0 consisting of a sequence of valid percent-encoded bytes
498     * @return string Replacement
499     */
500    protected function remove_iunreserved_percent_encoded(array $match)
501    {
502        // As we just have valid percent encoded sequences we can just explode
503        // and ignore the first member of the returned array (an empty string).
504        $bytes = explode('%', $match[0]);
505
506        // Initialize the new string (this is what will be returned) and that
507        // there are no bytes remaining in the current sequence (unsurprising
508        // at the first byte!).
509        $string = '';
510        $remaining = 0;
511
512        // these variables will be initialized in the loop but PHPStan is not able to detect it currently
513        $start = 0;
514        $character = 0;
515        $length = 0;
516        $valid = true;
517
518        // Loop over each and every byte, and set $value to its value
519        for ($i = 1, $len = count($bytes); $i < $len; $i++) {
520            $value = hexdec($bytes[$i]);
521
522            // If we're the first byte of sequence:
523            if (!$remaining) {
524                // Start position
525                $start = $i;
526
527                // By default we are valid
528                $valid = true;
529
530                // One byte sequence:
531                if ($value <= 0x7F) {
532                    $character = $value;
533                    $length = 1;
534                }
535                // Two byte sequence:
536                elseif (($value & 0xE0) === 0xC0) {
537                    $character = ($value & 0x1F) << 6;
538                    $length = 2;
539                    $remaining = 1;
540                }
541                // Three byte sequence:
542                elseif (($value & 0xF0) === 0xE0) {
543                    $character = ($value & 0x0F) << 12;
544                    $length = 3;
545                    $remaining = 2;
546                }
547                // Four byte sequence:
548                elseif (($value & 0xF8) === 0xF0) {
549                    $character = ($value & 0x07) << 18;
550                    $length = 4;
551                    $remaining = 3;
552                }
553                // Invalid byte:
554                else {
555                    $valid = false;
556                    $remaining = 0;
557                }
558            }
559            // Continuation byte:
560            else {
561                // Check that the byte is valid, then add it to the character:
562                if (($value & 0xC0) === 0x80) {
563                    $remaining--;
564                    $character |= ($value & 0x3F) << ($remaining * 6);
565                }
566                // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
567                else {
568                    $valid = false;
569                    $remaining = 0;
570                    $i--;
571                }
572            }
573
574            // If we've reached the end of the current byte sequence, append it to Unicode::$data
575            if (!$remaining) {
576                // Percent encode anything invalid or not in iunreserved
577                if (
578                    // Invalid sequences
579                    !$valid
580                    // Non-shortest form sequences are invalid
581                    || $length > 1 && $character <= 0x7F
582                    || $length > 2 && $character <= 0x7FF
583                    || $length > 3 && $character <= 0xFFFF
584                    // Outside of range of iunreserved codepoints
585                    || $character < 0x2D
586                    || $character > 0xEFFFD
587                    // Noncharacters
588                    || ($character & 0xFFFE) === 0xFFFE
589                    || $character >= 0xFDD0 && $character <= 0xFDEF
590                    // Everything else not in iunreserved (this is all BMP)
591                    || $character === 0x2F
592                    || $character > 0x39 && $character < 0x41
593                    || $character > 0x5A && $character < 0x61
594                    || $character > 0x7A && $character < 0x7E
595                    || $character > 0x7E && $character < 0xA0
596                    || $character > 0xD7FF && $character < 0xF900
597                ) {
598                    for ($j = $start; $j <= $i; $j++) {
599                        $string .= '%' . strtoupper($bytes[$j]);
600                    }
601                } else {
602                    for ($j = $start; $j <= $i; $j++) {
603                        // Cast for PHPStan, this will always be a number between 0 and 0xFF so hexdec will return int.
604                        $string .= chr((int) hexdec($bytes[$j]));
605                    }
606                }
607            }
608        }
609
610        // If we have any bytes left over they are invalid (i.e., we are
611        // mid-way through a multi-byte sequence)
612        if ($remaining) {
613            for ($j = $start; $j < $len; $j++) {
614                $string .= '%' . strtoupper($bytes[$j]);
615            }
616        }
617
618        return $string;
619    }
620
621    /**
622     * @return void
623     */
624    protected function scheme_normalization()
625    {
626        if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
627            $this->iuserinfo = null;
628        }
629        if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
630            $this->ihost = null;
631        }
632        if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
633            $this->port = null;
634        }
635        if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
636            $this->ipath = '';
637        }
638        if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
639            $this->iquery = null;
640        }
641        if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
642            $this->ifragment = null;
643        }
644    }
645
646    /**
647     * Check if the object represents a valid IRI. This needs to be done on each
648     * call as some things change depending on another part of the IRI.
649     *
650     * @return bool
651     */
652    public function is_valid()
653    {
654        if ($this->ipath === '') {
655            return true;
656        }
657
658        $isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
659            $this->port !== null;
660        if ($isauthority && $this->ipath[0] === '/') {
661            return true;
662        }
663
664        if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) {
665            return false;
666        }
667
668        // Relative urls cannot have a colon in the first path segment (and the
669        // slashes themselves are not included so skip the first character).
670        if (!$this->scheme && !$isauthority &&
671            strpos($this->ipath, ':') !== false &&
672            strpos($this->ipath, '/', 1) !== false &&
673            strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) {
674            return false;
675        }
676
677        return true;
678    }
679
680    /**
681     * Set the entire IRI. Returns true on success, false on failure (if there
682     * are any invalid characters).
683     *
684     * @param string|null $iri
685     * @return bool
686     */
687    public function set_iri(?string $iri, bool $clear_cache = false)
688    {
689        static $cache;
690        if ($clear_cache) {
691            $cache = null;
692            return false;
693        }
694        if (!$cache) {
695            $cache = [];
696        }
697
698        if ($iri === null) {
699            return true;
700        } elseif (isset($cache[$iri])) {
701            [
702                $this->scheme,
703                $this->iuserinfo,
704                $this->ihost,
705                $this->port,
706                $this->ipath,
707                $this->iquery,
708                $this->ifragment,
709                $return
710            ] = $cache[$iri];
711
712            return $return;
713        }
714
715        $parsed = $this->parse_iri((string) $iri);
716        if (!$parsed) {
717            return false;
718        }
719
720        $return = $this->set_scheme($parsed['scheme'])
721            && $this->set_authority($parsed['authority'])
722            && $this->set_path($parsed['path'])
723            && $this->set_query($parsed['query'])
724            && $this->set_fragment($parsed['fragment']);
725
726        $cache[$iri] = [
727            $this->scheme,
728            $this->iuserinfo,
729            $this->ihost,
730            $this->port,
731            $this->ipath,
732            $this->iquery,
733            $this->ifragment,
734            $return
735        ];
736
737        return $return;
738    }
739
740    /**
741     * Set the scheme. Returns true on success, false on failure (if there are
742     * any invalid characters).
743     *
744     * @param string|null $scheme
745     * @return bool
746     */
747    public function set_scheme(?string $scheme)
748    {
749        if ($scheme === null) {
750            $this->scheme = null;
751        } elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
752            $this->scheme = null;
753            return false;
754        } else {
755            $this->scheme = strtolower($scheme);
756        }
757        return true;
758    }
759
760    /**
761     * Set the authority. Returns true on success, false on failure (if there are
762     * any invalid characters).
763     *
764     * @param string|null $authority
765     * @return bool
766     */
767    public function set_authority(?string $authority, bool $clear_cache = false)
768    {
769        static $cache;
770        if ($clear_cache) {
771            $cache = null;
772            return false;
773        }
774        if (!$cache) {
775            $cache = [];
776        }
777
778        if ($authority === null) {
779            $this->iuserinfo = null;
780            $this->ihost = null;
781            $this->port = null;
782            return true;
783        } elseif (isset($cache[$authority])) {
784            [
785                $this->iuserinfo,
786                $this->ihost,
787                $this->port,
788                $return
789            ] = $cache[$authority];
790
791            return $return;
792        }
793
794        $remaining = $authority;
795        if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
796            // Cast for PHPStan on PHP < 8.0. It does not detect that
797            // the range is not flipped so substr cannot return false.
798            $iuserinfo = (string) substr($remaining, 0, $iuserinfo_end);
799            $remaining = substr($remaining, $iuserinfo_end + 1);
800        } else {
801            $iuserinfo = null;
802        }
803        if (($port_start = strpos($remaining, ':', intval(strpos($remaining, ']')))) !== false) {
804            $port = substr($remaining, $port_start + 1);
805            if ($port === false) {
806                $port = null;
807            }
808            $remaining = substr($remaining, 0, $port_start);
809        } else {
810            $port = null;
811        }
812
813        $return = $this->set_userinfo($iuserinfo) &&
814                  $this->set_host($remaining) &&
815                  $this->set_port($port);
816
817        $cache[$authority] = [
818            $this->iuserinfo,
819            $this->ihost,
820            $this->port,
821            $return
822        ];
823
824        return $return;
825    }
826
827    /**
828     * Set the iuserinfo.
829     *
830     * @param string|null $iuserinfo
831     * @return bool
832     */
833    public function set_userinfo(?string $iuserinfo)
834    {
835        if ($iuserinfo === null) {
836            $this->iuserinfo = null;
837        } else {
838            $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
839            $this->scheme_normalization();
840        }
841
842        return true;
843    }
844
845    /**
846     * Set the ihost. Returns true on success, false on failure (if there are
847     * any invalid characters).
848     *
849     * @param string|null $ihost
850     * @return bool
851     */
852    public function set_host(?string $ihost)
853    {
854        if ($ihost === null) {
855            $this->ihost = null;
856            return true;
857        } elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
858            if (\SimplePie\Net\IPv6::check_ipv6(substr($ihost, 1, -1))) {
859                $this->ihost = '[' . \SimplePie\Net\IPv6::compress(substr($ihost, 1, -1)) . ']';
860            } else {
861                $this->ihost = null;
862                return false;
863            }
864        } else {
865            $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
866
867            // Lowercase, but ignore pct-encoded sections (as they should
868            // remain uppercase). This must be done after the previous step
869            // as that can add unescaped characters.
870            $position = 0;
871            $strlen = strlen($ihost);
872            while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
873                if ($ihost[$position] === '%') {
874                    $position += 3;
875                } else {
876                    $ihost[$position] = strtolower($ihost[$position]);
877                    $position++;
878                }
879            }
880
881            $this->ihost = $ihost;
882        }
883
884        $this->scheme_normalization();
885
886        return true;
887    }
888
889    /**
890     * Set the port. Returns true on success, false on failure (if there are
891     * any invalid characters).
892     *
893     * @param string|int|null $port
894     * @return bool
895     */
896    public function set_port($port)
897    {
898        if ($port === null) {
899            $this->port = null;
900            return true;
901        } elseif (strspn((string) $port, '0123456789') === strlen((string) $port)) {
902            $this->port = (int) $port;
903            $this->scheme_normalization();
904            return true;
905        }
906
907        $this->port = null;
908        return false;
909    }
910
911    /**
912     * Set the ipath.
913     *
914     * @param string|null $ipath
915     * @return bool
916     */
917    public function set_path(?string $ipath, bool $clear_cache = false)
918    {
919        static $cache;
920        if ($clear_cache) {
921            $cache = null;
922            return false;
923        }
924        if (!$cache) {
925            $cache = [];
926        }
927
928        $ipath = (string) $ipath;
929
930        if (isset($cache[$ipath])) {
931            $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
932        } else {
933            $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
934            $removed = $this->remove_dot_segments($valid);
935
936            $cache[$ipath] = [$valid, $removed];
937            $this->ipath =  ($this->scheme !== null) ? $removed : $valid;
938        }
939
940        $this->scheme_normalization();
941        return true;
942    }
943
944    /**
945     * Set the iquery.
946     *
947     * @param string|null $iquery
948     * @return bool
949     */
950    public function set_query(?string $iquery)
951    {
952        if ($iquery === null) {
953            $this->iquery = null;
954        } else {
955            $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
956            $this->scheme_normalization();
957        }
958        return true;
959    }
960
961    /**
962     * Set the ifragment.
963     *
964     * @param string|null $ifragment
965     * @return bool
966     */
967    public function set_fragment(?string $ifragment)
968    {
969        if ($ifragment === null) {
970            $this->ifragment = null;
971        } else {
972            $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
973            $this->scheme_normalization();
974        }
975        return true;
976    }
977
978    /**
979     * Convert an IRI to a URI (or parts thereof)
980     *
981     * @param string $string
982     * @return string
983     */
984    public function to_uri(string $string)
985    {
986        static $non_ascii;
987        if (!$non_ascii) {
988            $non_ascii = implode('', range("\x80", "\xFF"));
989        }
990
991        $position = 0;
992        $strlen = strlen($string);
993        while (($position += strcspn($string, $non_ascii, $position)) < $strlen) {
994            $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
995            $position += 3;
996            $strlen += 2;
997        }
998
999        return $string;
1000    }
1001
1002    /**
1003     * Get the complete IRI
1004     *
1005     * @return string|false
1006     */
1007    public function get_iri()
1008    {
1009        if (!$this->is_valid()) {
1010            return false;
1011        }
1012
1013        $iri = '';
1014        if ($this->scheme !== null) {
1015            $iri .= $this->scheme . ':';
1016        }
1017        if (($iauthority = $this->get_iauthority()) !== null) {
1018            $iri .= '//' . $iauthority;
1019        }
1020        if ($this->ipath !== '') {
1021            $iri .= $this->ipath;
1022        } elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') {
1023            $iri .= $this->normalization[$this->scheme]['ipath'];
1024        }
1025        if ($this->iquery !== null) {
1026            $iri .= '?' . $this->iquery;
1027        }
1028        if ($this->ifragment !== null) {
1029            $iri .= '#' . $this->ifragment;
1030        }
1031
1032        return $iri;
1033    }
1034
1035    /**
1036     * Get the complete URI
1037     *
1038     * @return string
1039     */
1040    public function get_uri()
1041    {
1042        return $this->to_uri((string) $this->get_iri());
1043    }
1044
1045    /**
1046     * Get the complete iauthority
1047     *
1048     * @return ?string
1049     */
1050    protected function get_iauthority()
1051    {
1052        if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) {
1053            $iauthority = '';
1054            if ($this->iuserinfo !== null) {
1055                $iauthority .= $this->iuserinfo . '@';
1056            }
1057            if ($this->ihost !== null) {
1058                $iauthority .= $this->ihost;
1059            }
1060            if ($this->port !== null && $this->port !== 0) {
1061                $iauthority .= ':' . $this->port;
1062            }
1063            return $iauthority;
1064        }
1065
1066        return null;
1067    }
1068
1069    /**
1070     * Get the complete authority
1071     *
1072     * @return ?string
1073     */
1074    protected function get_authority()
1075    {
1076        $iauthority = $this->get_iauthority();
1077        if (is_string($iauthority)) {
1078            return $this->to_uri($iauthority);
1079        }
1080
1081        return $iauthority;
1082    }
1083}
1084
1085class_alias('SimplePie\IRI', 'SimplePie_IRI');
1086