1<?php
2
3declare(strict_types=1);
4/**
5 * SimplePie
6 *
7 * A PHP-Based RSS and Atom Feed Framework.
8 * Takes the hard work out of managing a complete RSS/Atom solution.
9 *
10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without modification, are
14 * permitted provided that the following conditions are met:
15 *
16 * 	* Redistributions of source code must retain the above copyright notice, this list of
17 * 	  conditions and the following disclaimer.
18 *
19 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
20 * 	  of conditions and the following disclaimer in the documentation and/or other materials
21 * 	  provided with the distribution.
22 *
23 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
24 * 	  to endorse or promote products derived from this software without specific prior
25 * 	  written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 * @package SimplePie
38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
39 * @author Ryan Parman
40 * @author Sam Sneddon
41 * @author Ryan McCue
42 * @link http://simplepie.org/ SimplePie
43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
44 */
45
46namespace SimplePie;
47
48/**
49 * IRI parser/serialiser/normaliser
50 *
51 * @package SimplePie
52 * @subpackage HTTP
53 * @author Sam Sneddon
54 * @author Steve Minutillo
55 * @author Ryan McCue
56 * @copyright 2007-2012 Sam Sneddon, Steve Minutillo, Ryan McCue
57 * @license http://www.opensource.org/licenses/bsd-license.php
58 */
59class IRI
60{
61    /**
62     * Scheme
63     *
64     * @var string
65     */
66    protected $scheme = null;
67
68    /**
69     * User Information
70     *
71     * @var string
72     */
73    protected $iuserinfo = null;
74
75    /**
76     * ihost
77     *
78     * @var string
79     */
80    protected $ihost = null;
81
82    /**
83     * Port
84     *
85     * @var string
86     */
87    protected $port = null;
88
89    /**
90     * ipath
91     *
92     * @var string
93     */
94    protected $ipath = '';
95
96    /**
97     * iquery
98     *
99     * @var string
100     */
101    protected $iquery = null;
102
103    /**
104     * ifragment
105     *
106     * @var string
107     */
108    protected $ifragment = null;
109
110    /**
111     * Normalization database
112     *
113     * Each key is the scheme, each value is an array with each key as the IRI
114     * part and value as the default value for that part.
115     */
116    protected $normalization = [
117        'acap' => [
118            'port' => 674
119        ],
120        'dict' => [
121            'port' => 2628
122        ],
123        'file' => [
124            'ihost' => 'localhost'
125        ],
126        'http' => [
127            'port' => 80,
128            'ipath' => '/'
129        ],
130        'https' => [
131            'port' => 443,
132            'ipath' => '/'
133        ],
134    ];
135
136    /**
137     * Return the entire IRI when you try and read the object as a string
138     *
139     * @return string
140     */
141    public function __toString()
142    {
143        return $this->get_iri();
144    }
145
146    /**
147     * Overload __set() to provide access via properties
148     *
149     * @param string $name Property name
150     * @param mixed $value Property value
151     */
152    public function __set($name, $value)
153    {
154        if (method_exists($this, 'set_' . $name)) {
155            call_user_func([$this, 'set_' . $name], $value);
156        } elseif (
157            $name === 'iauthority'
158            || $name === 'iuserinfo'
159            || $name === 'ihost'
160            || $name === 'ipath'
161            || $name === 'iquery'
162            || $name === 'ifragment'
163        ) {
164            call_user_func([$this, 'set_' . substr($name, 1)], $value);
165        }
166    }
167
168    /**
169     * Overload __get() to provide access via properties
170     *
171     * @param string $name Property name
172     * @return mixed
173     */
174    public function __get($name)
175    {
176        // isset() returns false for null, we don't want to do that
177        // Also why we use array_key_exists below instead of isset()
178        $props = get_object_vars($this);
179
180        if (
181            $name === 'iri' ||
182            $name === 'uri' ||
183            $name === 'iauthority' ||
184            $name === 'authority'
185        ) {
186            $return = $this->{"get_$name"}();
187        } elseif (array_key_exists($name, $props)) {
188            $return = $this->$name;
189        }
190        // host -> ihost
191        elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
192            $name = $prop;
193            $return = $this->$prop;
194        }
195        // ischeme -> scheme
196        elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
197            $name = $prop;
198            $return = $this->$prop;
199        } else {
200            trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
201            $return = null;
202        }
203
204        if ($return === null && isset($this->normalization[$this->scheme][$name])) {
205            return $this->normalization[$this->scheme][$name];
206        }
207
208        return $return;
209    }
210
211    /**
212     * Overload __isset() to provide access via properties
213     *
214     * @param string $name Property name
215     * @return bool
216     */
217    public function __isset($name)
218    {
219        return method_exists($this, 'get_' . $name) || isset($this->$name);
220    }
221
222    /**
223     * Overload __unset() to provide access via properties
224     *
225     * @param string $name Property name
226     */
227    public function __unset($name)
228    {
229        if (method_exists($this, 'set_' . $name)) {
230            call_user_func([$this, 'set_' . $name], '');
231        }
232    }
233
234    /**
235     * Create a new IRI object, from a specified string
236     *
237     * @param string $iri
238     */
239    public function __construct($iri = null)
240    {
241        $this->set_iri($iri);
242    }
243
244    /**
245     * Clean up
246     */
247    public function __destruct()
248    {
249        $this->set_iri(null, true);
250        $this->set_path(null, true);
251        $this->set_authority(null, true);
252    }
253
254    /**
255     * Create a new IRI object by resolving a relative IRI
256     *
257     * Returns false if $base is not absolute, otherwise an IRI.
258     *
259     * @param IRI|string $base (Absolute) Base IRI
260     * @param IRI|string $relative Relative IRI
261     * @return IRI|false
262     */
263    public static function absolutize($base, $relative)
264    {
265        if (!($relative instanceof IRI)) {
266            $relative = new IRI($relative);
267        }
268        if (!$relative->is_valid()) {
269            return false;
270        } elseif ($relative->scheme !== null) {
271            return clone $relative;
272        } else {
273            if (!($base instanceof IRI)) {
274                $base = new IRI($base);
275            }
276            if ($base->scheme !== null && $base->is_valid()) {
277                if ($relative->get_iri() !== '') {
278                    if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
279                        $target = clone $relative;
280                        $target->scheme = $base->scheme;
281                    } else {
282                        $target = new IRI();
283                        $target->scheme = $base->scheme;
284                        $target->iuserinfo = $base->iuserinfo;
285                        $target->ihost = $base->ihost;
286                        $target->port = $base->port;
287                        if ($relative->ipath !== '') {
288                            if ($relative->ipath[0] === '/') {
289                                $target->ipath = $relative->ipath;
290                            } elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
291                                $target->ipath = '/' . $relative->ipath;
292                            } elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
293                                $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
294                            } else {
295                                $target->ipath = $relative->ipath;
296                            }
297                            $target->ipath = $target->remove_dot_segments($target->ipath);
298                            $target->iquery = $relative->iquery;
299                        } else {
300                            $target->ipath = $base->ipath;
301                            if ($relative->iquery !== null) {
302                                $target->iquery = $relative->iquery;
303                            } elseif ($base->iquery !== null) {
304                                $target->iquery = $base->iquery;
305                            }
306                        }
307                        $target->ifragment = $relative->ifragment;
308                    }
309                } else {
310                    $target = clone $base;
311                    $target->ifragment = null;
312                }
313                $target->scheme_normalization();
314                return $target;
315            }
316
317            return false;
318        }
319    }
320
321    /**
322     * Parse an IRI into scheme/authority/path/query/fragment segments
323     *
324     * @param string $iri
325     * @return array
326     */
327    protected function parse_iri($iri)
328    {
329        $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
330        if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) {
331            if ($match[1] === '') {
332                $match['scheme'] = null;
333            }
334            if (!isset($match[3]) || $match[3] === '') {
335                $match['authority'] = null;
336            }
337            if (!isset($match[5])) {
338                $match['path'] = '';
339            }
340            if (!isset($match[6]) || $match[6] === '') {
341                $match['query'] = null;
342            }
343            if (!isset($match[8]) || $match[8] === '') {
344                $match['fragment'] = null;
345            }
346            return $match;
347        }
348
349        // This can occur when a paragraph is accidentally parsed as a URI
350        return false;
351    }
352
353    /**
354     * Remove dot segments from a path
355     *
356     * @param string $input
357     * @return string
358     */
359    protected function remove_dot_segments($input)
360    {
361        $output = '';
362        while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
363            // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
364            if (strpos($input, '../') === 0) {
365                $input = substr($input, 3);
366            } elseif (strpos($input, './') === 0) {
367                $input = substr($input, 2);
368            }
369            // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
370            elseif (strpos($input, '/./') === 0) {
371                $input = substr($input, 2);
372            } elseif ($input === '/.') {
373                $input = '/';
374            }
375            // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
376            elseif (strpos($input, '/../') === 0) {
377                $input = substr($input, 3);
378                $output = substr_replace($output, '', intval(strrpos($output, '/')));
379            } elseif ($input === '/..') {
380                $input = '/';
381                $output = substr_replace($output, '', intval(strrpos($output, '/')));
382            }
383            // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
384            elseif ($input === '.' || $input === '..') {
385                $input = '';
386            }
387            // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
388            elseif (($pos = strpos($input, '/', 1)) !== false) {
389                $output .= substr($input, 0, $pos);
390                $input = substr_replace($input, '', 0, $pos);
391            } else {
392                $output .= $input;
393                $input = '';
394            }
395        }
396        return $output . $input;
397    }
398
399    /**
400     * Replace invalid character with percent encoding
401     *
402     * @param string $string Input string
403     * @param string $extra_chars Valid characters not in iunreserved or
404     *                            iprivate (this is ASCII-only)
405     * @param bool $iprivate Allow iprivate
406     * @return string
407     */
408    protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
409    {
410        // Normalize as many pct-encoded sections as possible
411        $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', [$this, 'remove_iunreserved_percent_encoded'], $string);
412
413        // Replace invalid percent characters
414        $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
415
416        // Add unreserved and % to $extra_chars (the latter is safe because all
417        // pct-encoded sections are now valid).
418        $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
419
420        // Now replace any bytes that aren't allowed with their pct-encoded versions
421        $position = 0;
422        $strlen = strlen($string);
423        while (($position += strspn($string, $extra_chars, $position)) < $strlen) {
424            $value = ord($string[$position]);
425            $character = 0;
426
427            // Start position
428            $start = $position;
429
430            // By default we are valid
431            $valid = true;
432
433            // No one byte sequences are valid due to the while.
434            // Two byte sequence:
435            if (($value & 0xE0) === 0xC0) {
436                $character = ($value & 0x1F) << 6;
437                $length = 2;
438                $remaining = 1;
439            }
440            // Three byte sequence:
441            elseif (($value & 0xF0) === 0xE0) {
442                $character = ($value & 0x0F) << 12;
443                $length = 3;
444                $remaining = 2;
445            }
446            // Four byte sequence:
447            elseif (($value & 0xF8) === 0xF0) {
448                $character = ($value & 0x07) << 18;
449                $length = 4;
450                $remaining = 3;
451            }
452            // Invalid byte:
453            else {
454                $valid = false;
455                $length = 1;
456                $remaining = 0;
457            }
458
459            if ($remaining) {
460                if ($position + $length <= $strlen) {
461                    for ($position++; $remaining; $position++) {
462                        $value = ord($string[$position]);
463
464                        // Check that the byte is valid, then add it to the character:
465                        if (($value & 0xC0) === 0x80) {
466                            $character |= ($value & 0x3F) << (--$remaining * 6);
467                        }
468                        // If it is invalid, count the sequence as invalid and reprocess the current byte:
469                        else {
470                            $valid = false;
471                            $position--;
472                            break;
473                        }
474                    }
475                } else {
476                    $position = $strlen - 1;
477                    $valid = false;
478                }
479            }
480
481            // Percent encode anything invalid or not in ucschar
482            if (
483                // Invalid sequences
484                !$valid
485                // Non-shortest form sequences are invalid
486                || $length > 1 && $character <= 0x7F
487                || $length > 2 && $character <= 0x7FF
488                || $length > 3 && $character <= 0xFFFF
489                // Outside of range of ucschar codepoints
490                // Noncharacters
491                || ($character & 0xFFFE) === 0xFFFE
492                || $character >= 0xFDD0 && $character <= 0xFDEF
493                || (
494                    // Everything else not in ucschar
495                    $character > 0xD7FF && $character < 0xF900
496                    || $character < 0xA0
497                    || $character > 0xEFFFD
498                )
499                && (
500                    // Everything not in iprivate, if it applies
501                    !$iprivate
502                    || $character < 0xE000
503                    || $character > 0x10FFFD
504                )
505            ) {
506                // If we were a character, pretend we weren't, but rather an error.
507                if ($valid) {
508                    $position--;
509                }
510
511                for ($j = $start; $j <= $position; $j++) {
512                    $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
513                    $j += 2;
514                    $position += 2;
515                    $strlen += 2;
516                }
517            }
518        }
519
520        return $string;
521    }
522
523    /**
524     * Callback function for preg_replace_callback.
525     *
526     * Removes sequences of percent encoded bytes that represent UTF-8
527     * encoded characters in iunreserved
528     *
529     * @param array $match PCRE match
530     * @return string Replacement
531     */
532    protected function remove_iunreserved_percent_encoded($match)
533    {
534        // As we just have valid percent encoded sequences we can just explode
535        // and ignore the first member of the returned array (an empty string).
536        $bytes = explode('%', $match[0]);
537
538        // Initialize the new string (this is what will be returned) and that
539        // there are no bytes remaining in the current sequence (unsurprising
540        // at the first byte!).
541        $string = '';
542        $remaining = 0;
543
544        // these variables will be initialized in the loop but PHPStan is not able to detect it currently
545        $start = 0;
546        $character = 0;
547        $length = 0;
548        $valid = true;
549
550        // Loop over each and every byte, and set $value to its value
551        for ($i = 1, $len = count($bytes); $i < $len; $i++) {
552            $value = hexdec($bytes[$i]);
553
554            // If we're the first byte of sequence:
555            if (!$remaining) {
556                // Start position
557                $start = $i;
558
559                // By default we are valid
560                $valid = true;
561
562                // One byte sequence:
563                if ($value <= 0x7F) {
564                    $character = $value;
565                    $length = 1;
566                }
567                // Two byte sequence:
568                elseif (($value & 0xE0) === 0xC0) {
569                    $character = ($value & 0x1F) << 6;
570                    $length = 2;
571                    $remaining = 1;
572                }
573                // Three byte sequence:
574                elseif (($value & 0xF0) === 0xE0) {
575                    $character = ($value & 0x0F) << 12;
576                    $length = 3;
577                    $remaining = 2;
578                }
579                // Four byte sequence:
580                elseif (($value & 0xF8) === 0xF0) {
581                    $character = ($value & 0x07) << 18;
582                    $length = 4;
583                    $remaining = 3;
584                }
585                // Invalid byte:
586                else {
587                    $valid = false;
588                    $remaining = 0;
589                }
590            }
591            // Continuation byte:
592            else {
593                // Check that the byte is valid, then add it to the character:
594                if (($value & 0xC0) === 0x80) {
595                    $remaining--;
596                    $character |= ($value & 0x3F) << ($remaining * 6);
597                }
598                // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
599                else {
600                    $valid = false;
601                    $remaining = 0;
602                    $i--;
603                }
604            }
605
606            // If we've reached the end of the current byte sequence, append it to Unicode::$data
607            if (!$remaining) {
608                // Percent encode anything invalid or not in iunreserved
609                if (
610                    // Invalid sequences
611                    !$valid
612                    // Non-shortest form sequences are invalid
613                    || $length > 1 && $character <= 0x7F
614                    || $length > 2 && $character <= 0x7FF
615                    || $length > 3 && $character <= 0xFFFF
616                    // Outside of range of iunreserved codepoints
617                    || $character < 0x2D
618                    || $character > 0xEFFFD
619                    // Noncharacters
620                    || ($character & 0xFFFE) === 0xFFFE
621                    || $character >= 0xFDD0 && $character <= 0xFDEF
622                    // Everything else not in iunreserved (this is all BMP)
623                    || $character === 0x2F
624                    || $character > 0x39 && $character < 0x41
625                    || $character > 0x5A && $character < 0x61
626                    || $character > 0x7A && $character < 0x7E
627                    || $character > 0x7E && $character < 0xA0
628                    || $character > 0xD7FF && $character < 0xF900
629                ) {
630                    for ($j = $start; $j <= $i; $j++) {
631                        $string .= '%' . strtoupper($bytes[$j]);
632                    }
633                } else {
634                    for ($j = $start; $j <= $i; $j++) {
635                        $string .= chr(hexdec($bytes[$j]));
636                    }
637                }
638            }
639        }
640
641        // If we have any bytes left over they are invalid (i.e., we are
642        // mid-way through a multi-byte sequence)
643        if ($remaining) {
644            for ($j = $start; $j < $len; $j++) {
645                $string .= '%' . strtoupper($bytes[$j]);
646            }
647        }
648
649        return $string;
650    }
651
652    protected function scheme_normalization()
653    {
654        if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
655            $this->iuserinfo = null;
656        }
657        if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
658            $this->ihost = null;
659        }
660        if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
661            $this->port = null;
662        }
663        if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
664            $this->ipath = '';
665        }
666        if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
667            $this->iquery = null;
668        }
669        if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
670            $this->ifragment = null;
671        }
672    }
673
674    /**
675     * Check if the object represents a valid IRI. This needs to be done on each
676     * call as some things change depending on another part of the IRI.
677     *
678     * @return bool
679     */
680    public function is_valid()
681    {
682        if ($this->ipath === '') {
683            return true;
684        }
685
686        $isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
687            $this->port !== null;
688        if ($isauthority && $this->ipath[0] === '/') {
689            return true;
690        }
691
692        if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) {
693            return false;
694        }
695
696        // Relative urls cannot have a colon in the first path segment (and the
697        // slashes themselves are not included so skip the first character).
698        if (!$this->scheme && !$isauthority &&
699            strpos($this->ipath, ':') !== false &&
700            strpos($this->ipath, '/', 1) !== false &&
701            strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) {
702            return false;
703        }
704
705        return true;
706    }
707
708    /**
709     * Set the entire IRI. Returns true on success, false on failure (if there
710     * are any invalid characters).
711     *
712     * @param string $iri
713     * @return bool
714     */
715    public function set_iri($iri, $clear_cache = false)
716    {
717        static $cache;
718        if ($clear_cache) {
719            $cache = null;
720            return;
721        }
722        if (!$cache) {
723            $cache = [];
724        }
725
726        if ($iri === null) {
727            return true;
728        } elseif (isset($cache[$iri])) {
729            [
730                $this->scheme,
731                $this->iuserinfo,
732                $this->ihost,
733                $this->port,
734                $this->ipath,
735                $this->iquery,
736                $this->ifragment,
737                $return
738            ] = $cache[$iri];
739
740            return $return;
741        }
742
743        $parsed = $this->parse_iri((string) $iri);
744        if (!$parsed) {
745            return false;
746        }
747
748        $return = $this->set_scheme($parsed['scheme'])
749            && $this->set_authority($parsed['authority'])
750            && $this->set_path($parsed['path'])
751            && $this->set_query($parsed['query'])
752            && $this->set_fragment($parsed['fragment']);
753
754        $cache[$iri] = [
755            $this->scheme,
756            $this->iuserinfo,
757            $this->ihost,
758            $this->port,
759            $this->ipath,
760            $this->iquery,
761            $this->ifragment,
762            $return
763        ];
764
765        return $return;
766    }
767
768    /**
769     * Set the scheme. Returns true on success, false on failure (if there are
770     * any invalid characters).
771     *
772     * @param string $scheme
773     * @return bool
774     */
775    public function set_scheme($scheme)
776    {
777        if ($scheme === null) {
778            $this->scheme = null;
779        } elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
780            $this->scheme = null;
781            return false;
782        } else {
783            $this->scheme = strtolower($scheme);
784        }
785        return true;
786    }
787
788    /**
789     * Set the authority. Returns true on success, false on failure (if there are
790     * any invalid characters).
791     *
792     * @param string $authority
793     * @return bool
794     */
795    public function set_authority($authority, $clear_cache = false)
796    {
797        static $cache;
798        if ($clear_cache) {
799            $cache = null;
800            return;
801        }
802        if (!$cache) {
803            $cache = [];
804        }
805
806        if ($authority === null) {
807            $this->iuserinfo = null;
808            $this->ihost = null;
809            $this->port = null;
810            return true;
811        } elseif (isset($cache[$authority])) {
812            [
813                $this->iuserinfo,
814                $this->ihost,
815                $this->port,
816                $return
817            ] = $cache[$authority];
818
819            return $return;
820        }
821
822        $remaining = $authority;
823        if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
824            $iuserinfo = substr($remaining, 0, $iuserinfo_end);
825            $remaining = substr($remaining, $iuserinfo_end + 1);
826        } else {
827            $iuserinfo = null;
828        }
829        if (($port_start = strpos($remaining, ':', intval(strpos($remaining, ']')))) !== false) {
830            if (($port = substr($remaining, $port_start + 1)) === false) {
831                $port = null;
832            }
833            $remaining = substr($remaining, 0, $port_start);
834        } else {
835            $port = null;
836        }
837
838        $return = $this->set_userinfo($iuserinfo) &&
839                  $this->set_host($remaining) &&
840                  $this->set_port($port);
841
842        $cache[$authority] = [
843            $this->iuserinfo,
844            $this->ihost,
845            $this->port,
846            $return
847        ];
848
849        return $return;
850    }
851
852    /**
853     * Set the iuserinfo.
854     *
855     * @param string $iuserinfo
856     * @return bool
857     */
858    public function set_userinfo($iuserinfo)
859    {
860        if ($iuserinfo === null) {
861            $this->iuserinfo = null;
862        } else {
863            $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
864            $this->scheme_normalization();
865        }
866
867        return true;
868    }
869
870    /**
871     * Set the ihost. Returns true on success, false on failure (if there are
872     * any invalid characters).
873     *
874     * @param string $ihost
875     * @return bool
876     */
877    public function set_host($ihost)
878    {
879        if ($ihost === null) {
880            $this->ihost = null;
881            return true;
882        } elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
883            if (\SimplePie\Net\IPv6::check_ipv6(substr($ihost, 1, -1))) {
884                $this->ihost = '[' . \SimplePie\Net\IPv6::compress(substr($ihost, 1, -1)) . ']';
885            } else {
886                $this->ihost = null;
887                return false;
888            }
889        } else {
890            $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
891
892            // Lowercase, but ignore pct-encoded sections (as they should
893            // remain uppercase). This must be done after the previous step
894            // as that can add unescaped characters.
895            $position = 0;
896            $strlen = strlen($ihost);
897            while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
898                if ($ihost[$position] === '%') {
899                    $position += 3;
900                } else {
901                    $ihost[$position] = strtolower($ihost[$position]);
902                    $position++;
903                }
904            }
905
906            $this->ihost = $ihost;
907        }
908
909        $this->scheme_normalization();
910
911        return true;
912    }
913
914    /**
915     * Set the port. Returns true on success, false on failure (if there are
916     * any invalid characters).
917     *
918     * @param string $port
919     * @return bool
920     */
921    public function set_port($port)
922    {
923        if ($port === null) {
924            $this->port = null;
925            return true;
926        } elseif (strspn($port, '0123456789') === strlen($port)) {
927            $this->port = (int) $port;
928            $this->scheme_normalization();
929            return true;
930        }
931
932        $this->port = null;
933        return false;
934    }
935
936    /**
937     * Set the ipath.
938     *
939     * @param string $ipath
940     * @return bool
941     */
942    public function set_path($ipath, $clear_cache = false)
943    {
944        static $cache;
945        if ($clear_cache) {
946            $cache = null;
947            return;
948        }
949        if (!$cache) {
950            $cache = [];
951        }
952
953        $ipath = (string) $ipath;
954
955        if (isset($cache[$ipath])) {
956            $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
957        } else {
958            $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
959            $removed = $this->remove_dot_segments($valid);
960
961            $cache[$ipath] = [$valid, $removed];
962            $this->ipath =  ($this->scheme !== null) ? $removed : $valid;
963        }
964
965        $this->scheme_normalization();
966        return true;
967    }
968
969    /**
970     * Set the iquery.
971     *
972     * @param string $iquery
973     * @return bool
974     */
975    public function set_query($iquery)
976    {
977        if ($iquery === null) {
978            $this->iquery = null;
979        } else {
980            $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
981            $this->scheme_normalization();
982        }
983        return true;
984    }
985
986    /**
987     * Set the ifragment.
988     *
989     * @param string $ifragment
990     * @return bool
991     */
992    public function set_fragment($ifragment)
993    {
994        if ($ifragment === null) {
995            $this->ifragment = null;
996        } else {
997            $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
998            $this->scheme_normalization();
999        }
1000        return true;
1001    }
1002
1003    /**
1004     * Convert an IRI to a URI (or parts thereof)
1005     *
1006     * @return string
1007     */
1008    public function to_uri($string)
1009    {
1010        static $non_ascii;
1011        if (!$non_ascii) {
1012            $non_ascii = implode('', range("\x80", "\xFF"));
1013        }
1014
1015        $position = 0;
1016        $strlen = strlen($string);
1017        while (($position += strcspn($string, $non_ascii, $position)) < $strlen) {
1018            $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
1019            $position += 3;
1020            $strlen += 2;
1021        }
1022
1023        return $string;
1024    }
1025
1026    /**
1027     * Get the complete IRI
1028     *
1029     * @return string
1030     */
1031    public function get_iri()
1032    {
1033        if (!$this->is_valid()) {
1034            return false;
1035        }
1036
1037        $iri = '';
1038        if ($this->scheme !== null) {
1039            $iri .= $this->scheme . ':';
1040        }
1041        if (($iauthority = $this->get_iauthority()) !== null) {
1042            $iri .= '//' . $iauthority;
1043        }
1044        if ($this->ipath !== '') {
1045            $iri .= $this->ipath;
1046        } elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') {
1047            $iri .= $this->normalization[$this->scheme]['ipath'];
1048        }
1049        if ($this->iquery !== null) {
1050            $iri .= '?' . $this->iquery;
1051        }
1052        if ($this->ifragment !== null) {
1053            $iri .= '#' . $this->ifragment;
1054        }
1055
1056        return $iri;
1057    }
1058
1059    /**
1060     * Get the complete URI
1061     *
1062     * @return string
1063     */
1064    public function get_uri()
1065    {
1066        return $this->to_uri($this->get_iri());
1067    }
1068
1069    /**
1070     * Get the complete iauthority
1071     *
1072     * @return string
1073     */
1074    protected function get_iauthority()
1075    {
1076        if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) {
1077            $iauthority = '';
1078            if ($this->iuserinfo !== null) {
1079                $iauthority .= $this->iuserinfo . '@';
1080            }
1081            if ($this->ihost !== null) {
1082                $iauthority .= $this->ihost;
1083            }
1084            if ($this->port !== null && $this->port !== 0) {
1085                $iauthority .= ':' . $this->port;
1086            }
1087            return $iauthority;
1088        }
1089
1090        return null;
1091    }
1092
1093    /**
1094     * Get the complete authority
1095     *
1096     * @return string
1097     */
1098    protected function get_authority()
1099    {
1100        $iauthority = $this->get_iauthority();
1101        if (is_string($iauthority)) {
1102            return $this->to_uri($iauthority);
1103        }
1104
1105        return $iauthority;
1106    }
1107}
1108
1109class_alias('SimplePie\IRI', 'SimplePie_IRI');
1110