1<?php 2 3declare(strict_types=1); 4/** 5 * SimplePie 6 * 7 * A PHP-Based RSS and Atom Feed Framework. 8 * Takes the hard work out of managing a complete RSS/Atom solution. 9 * 10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without modification, are 14 * permitted provided that the following conditions are met: 15 * 16 * * Redistributions of source code must retain the above copyright notice, this list of 17 * conditions and the following disclaimer. 18 * 19 * * Redistributions in binary form must reproduce the above copyright notice, this list 20 * of conditions and the following disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 24 * to endorse or promote products derived from this software without specific prior 25 * written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 * @package SimplePie 38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue 39 * @author Ryan Parman 40 * @author Sam Sneddon 41 * @author Ryan McCue 42 * @link http://simplepie.org/ SimplePie 43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 44 */ 45 46namespace SimplePie; 47 48/** 49 * IRI parser/serialiser/normaliser 50 * 51 * @package SimplePie 52 * @subpackage HTTP 53 * @author Sam Sneddon 54 * @author Steve Minutillo 55 * @author Ryan McCue 56 * @copyright 2007-2012 Sam Sneddon, Steve Minutillo, Ryan McCue 57 * @license http://www.opensource.org/licenses/bsd-license.php 58 */ 59class IRI 60{ 61 /** 62 * Scheme 63 * 64 * @var string 65 */ 66 protected $scheme = null; 67 68 /** 69 * User Information 70 * 71 * @var string 72 */ 73 protected $iuserinfo = null; 74 75 /** 76 * ihost 77 * 78 * @var string 79 */ 80 protected $ihost = null; 81 82 /** 83 * Port 84 * 85 * @var string 86 */ 87 protected $port = null; 88 89 /** 90 * ipath 91 * 92 * @var string 93 */ 94 protected $ipath = ''; 95 96 /** 97 * iquery 98 * 99 * @var string 100 */ 101 protected $iquery = null; 102 103 /** 104 * ifragment 105 * 106 * @var string 107 */ 108 protected $ifragment = null; 109 110 /** 111 * Normalization database 112 * 113 * Each key is the scheme, each value is an array with each key as the IRI 114 * part and value as the default value for that part. 115 */ 116 protected $normalization = [ 117 'acap' => [ 118 'port' => 674 119 ], 120 'dict' => [ 121 'port' => 2628 122 ], 123 'file' => [ 124 'ihost' => 'localhost' 125 ], 126 'http' => [ 127 'port' => 80, 128 'ipath' => '/' 129 ], 130 'https' => [ 131 'port' => 443, 132 'ipath' => '/' 133 ], 134 ]; 135 136 /** 137 * Return the entire IRI when you try and read the object as a string 138 * 139 * @return string 140 */ 141 public function __toString() 142 { 143 return $this->get_iri(); 144 } 145 146 /** 147 * Overload __set() to provide access via properties 148 * 149 * @param string $name Property name 150 * @param mixed $value Property value 151 */ 152 public function __set($name, $value) 153 { 154 if (method_exists($this, 'set_' . $name)) { 155 call_user_func([$this, 'set_' . $name], $value); 156 } elseif ( 157 $name === 'iauthority' 158 || $name === 'iuserinfo' 159 || $name === 'ihost' 160 || $name === 'ipath' 161 || $name === 'iquery' 162 || $name === 'ifragment' 163 ) { 164 call_user_func([$this, 'set_' . substr($name, 1)], $value); 165 } 166 } 167 168 /** 169 * Overload __get() to provide access via properties 170 * 171 * @param string $name Property name 172 * @return mixed 173 */ 174 public function __get($name) 175 { 176 // isset() returns false for null, we don't want to do that 177 // Also why we use array_key_exists below instead of isset() 178 $props = get_object_vars($this); 179 180 if ( 181 $name === 'iri' || 182 $name === 'uri' || 183 $name === 'iauthority' || 184 $name === 'authority' 185 ) { 186 $return = $this->{"get_$name"}(); 187 } elseif (array_key_exists($name, $props)) { 188 $return = $this->$name; 189 } 190 // host -> ihost 191 elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) { 192 $name = $prop; 193 $return = $this->$prop; 194 } 195 // ischeme -> scheme 196 elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) { 197 $name = $prop; 198 $return = $this->$prop; 199 } else { 200 trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); 201 $return = null; 202 } 203 204 if ($return === null && isset($this->normalization[$this->scheme][$name])) { 205 return $this->normalization[$this->scheme][$name]; 206 } 207 208 return $return; 209 } 210 211 /** 212 * Overload __isset() to provide access via properties 213 * 214 * @param string $name Property name 215 * @return bool 216 */ 217 public function __isset($name) 218 { 219 return method_exists($this, 'get_' . $name) || isset($this->$name); 220 } 221 222 /** 223 * Overload __unset() to provide access via properties 224 * 225 * @param string $name Property name 226 */ 227 public function __unset($name) 228 { 229 if (method_exists($this, 'set_' . $name)) { 230 call_user_func([$this, 'set_' . $name], ''); 231 } 232 } 233 234 /** 235 * Create a new IRI object, from a specified string 236 * 237 * @param string $iri 238 */ 239 public function __construct($iri = null) 240 { 241 $this->set_iri($iri); 242 } 243 244 /** 245 * Clean up 246 */ 247 public function __destruct() 248 { 249 $this->set_iri(null, true); 250 $this->set_path(null, true); 251 $this->set_authority(null, true); 252 } 253 254 /** 255 * Create a new IRI object by resolving a relative IRI 256 * 257 * Returns false if $base is not absolute, otherwise an IRI. 258 * 259 * @param IRI|string $base (Absolute) Base IRI 260 * @param IRI|string $relative Relative IRI 261 * @return IRI|false 262 */ 263 public static function absolutize($base, $relative) 264 { 265 if (!($relative instanceof IRI)) { 266 $relative = new IRI($relative); 267 } 268 if (!$relative->is_valid()) { 269 return false; 270 } elseif ($relative->scheme !== null) { 271 return clone $relative; 272 } else { 273 if (!($base instanceof IRI)) { 274 $base = new IRI($base); 275 } 276 if ($base->scheme !== null && $base->is_valid()) { 277 if ($relative->get_iri() !== '') { 278 if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) { 279 $target = clone $relative; 280 $target->scheme = $base->scheme; 281 } else { 282 $target = new IRI(); 283 $target->scheme = $base->scheme; 284 $target->iuserinfo = $base->iuserinfo; 285 $target->ihost = $base->ihost; 286 $target->port = $base->port; 287 if ($relative->ipath !== '') { 288 if ($relative->ipath[0] === '/') { 289 $target->ipath = $relative->ipath; 290 } elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') { 291 $target->ipath = '/' . $relative->ipath; 292 } elseif (($last_segment = strrpos($base->ipath, '/')) !== false) { 293 $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; 294 } else { 295 $target->ipath = $relative->ipath; 296 } 297 $target->ipath = $target->remove_dot_segments($target->ipath); 298 $target->iquery = $relative->iquery; 299 } else { 300 $target->ipath = $base->ipath; 301 if ($relative->iquery !== null) { 302 $target->iquery = $relative->iquery; 303 } elseif ($base->iquery !== null) { 304 $target->iquery = $base->iquery; 305 } 306 } 307 $target->ifragment = $relative->ifragment; 308 } 309 } else { 310 $target = clone $base; 311 $target->ifragment = null; 312 } 313 $target->scheme_normalization(); 314 return $target; 315 } 316 317 return false; 318 } 319 } 320 321 /** 322 * Parse an IRI into scheme/authority/path/query/fragment segments 323 * 324 * @param string $iri 325 * @return array 326 */ 327 protected function parse_iri($iri) 328 { 329 $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); 330 if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) { 331 if ($match[1] === '') { 332 $match['scheme'] = null; 333 } 334 if (!isset($match[3]) || $match[3] === '') { 335 $match['authority'] = null; 336 } 337 if (!isset($match[5])) { 338 $match['path'] = ''; 339 } 340 if (!isset($match[6]) || $match[6] === '') { 341 $match['query'] = null; 342 } 343 if (!isset($match[8]) || $match[8] === '') { 344 $match['fragment'] = null; 345 } 346 return $match; 347 } 348 349 // This can occur when a paragraph is accidentally parsed as a URI 350 return false; 351 } 352 353 /** 354 * Remove dot segments from a path 355 * 356 * @param string $input 357 * @return string 358 */ 359 protected function remove_dot_segments($input) 360 { 361 $output = ''; 362 while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') { 363 // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, 364 if (strpos($input, '../') === 0) { 365 $input = substr($input, 3); 366 } elseif (strpos($input, './') === 0) { 367 $input = substr($input, 2); 368 } 369 // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, 370 elseif (strpos($input, '/./') === 0) { 371 $input = substr($input, 2); 372 } elseif ($input === '/.') { 373 $input = '/'; 374 } 375 // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, 376 elseif (strpos($input, '/../') === 0) { 377 $input = substr($input, 3); 378 $output = substr_replace($output, '', intval(strrpos($output, '/'))); 379 } elseif ($input === '/..') { 380 $input = '/'; 381 $output = substr_replace($output, '', intval(strrpos($output, '/'))); 382 } 383 // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, 384 elseif ($input === '.' || $input === '..') { 385 $input = ''; 386 } 387 // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer 388 elseif (($pos = strpos($input, '/', 1)) !== false) { 389 $output .= substr($input, 0, $pos); 390 $input = substr_replace($input, '', 0, $pos); 391 } else { 392 $output .= $input; 393 $input = ''; 394 } 395 } 396 return $output . $input; 397 } 398 399 /** 400 * Replace invalid character with percent encoding 401 * 402 * @param string $string Input string 403 * @param string $extra_chars Valid characters not in iunreserved or 404 * iprivate (this is ASCII-only) 405 * @param bool $iprivate Allow iprivate 406 * @return string 407 */ 408 protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) 409 { 410 // Normalize as many pct-encoded sections as possible 411 $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', [$this, 'remove_iunreserved_percent_encoded'], $string); 412 413 // Replace invalid percent characters 414 $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); 415 416 // Add unreserved and % to $extra_chars (the latter is safe because all 417 // pct-encoded sections are now valid). 418 $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; 419 420 // Now replace any bytes that aren't allowed with their pct-encoded versions 421 $position = 0; 422 $strlen = strlen($string); 423 while (($position += strspn($string, $extra_chars, $position)) < $strlen) { 424 $value = ord($string[$position]); 425 $character = 0; 426 427 // Start position 428 $start = $position; 429 430 // By default we are valid 431 $valid = true; 432 433 // No one byte sequences are valid due to the while. 434 // Two byte sequence: 435 if (($value & 0xE0) === 0xC0) { 436 $character = ($value & 0x1F) << 6; 437 $length = 2; 438 $remaining = 1; 439 } 440 // Three byte sequence: 441 elseif (($value & 0xF0) === 0xE0) { 442 $character = ($value & 0x0F) << 12; 443 $length = 3; 444 $remaining = 2; 445 } 446 // Four byte sequence: 447 elseif (($value & 0xF8) === 0xF0) { 448 $character = ($value & 0x07) << 18; 449 $length = 4; 450 $remaining = 3; 451 } 452 // Invalid byte: 453 else { 454 $valid = false; 455 $length = 1; 456 $remaining = 0; 457 } 458 459 if ($remaining) { 460 if ($position + $length <= $strlen) { 461 for ($position++; $remaining; $position++) { 462 $value = ord($string[$position]); 463 464 // Check that the byte is valid, then add it to the character: 465 if (($value & 0xC0) === 0x80) { 466 $character |= ($value & 0x3F) << (--$remaining * 6); 467 } 468 // If it is invalid, count the sequence as invalid and reprocess the current byte: 469 else { 470 $valid = false; 471 $position--; 472 break; 473 } 474 } 475 } else { 476 $position = $strlen - 1; 477 $valid = false; 478 } 479 } 480 481 // Percent encode anything invalid or not in ucschar 482 if ( 483 // Invalid sequences 484 !$valid 485 // Non-shortest form sequences are invalid 486 || $length > 1 && $character <= 0x7F 487 || $length > 2 && $character <= 0x7FF 488 || $length > 3 && $character <= 0xFFFF 489 // Outside of range of ucschar codepoints 490 // Noncharacters 491 || ($character & 0xFFFE) === 0xFFFE 492 || $character >= 0xFDD0 && $character <= 0xFDEF 493 || ( 494 // Everything else not in ucschar 495 $character > 0xD7FF && $character < 0xF900 496 || $character < 0xA0 497 || $character > 0xEFFFD 498 ) 499 && ( 500 // Everything not in iprivate, if it applies 501 !$iprivate 502 || $character < 0xE000 503 || $character > 0x10FFFD 504 ) 505 ) { 506 // If we were a character, pretend we weren't, but rather an error. 507 if ($valid) { 508 $position--; 509 } 510 511 for ($j = $start; $j <= $position; $j++) { 512 $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); 513 $j += 2; 514 $position += 2; 515 $strlen += 2; 516 } 517 } 518 } 519 520 return $string; 521 } 522 523 /** 524 * Callback function for preg_replace_callback. 525 * 526 * Removes sequences of percent encoded bytes that represent UTF-8 527 * encoded characters in iunreserved 528 * 529 * @param array $match PCRE match 530 * @return string Replacement 531 */ 532 protected function remove_iunreserved_percent_encoded($match) 533 { 534 // As we just have valid percent encoded sequences we can just explode 535 // and ignore the first member of the returned array (an empty string). 536 $bytes = explode('%', $match[0]); 537 538 // Initialize the new string (this is what will be returned) and that 539 // there are no bytes remaining in the current sequence (unsurprising 540 // at the first byte!). 541 $string = ''; 542 $remaining = 0; 543 544 // these variables will be initialized in the loop but PHPStan is not able to detect it currently 545 $start = 0; 546 $character = 0; 547 $length = 0; 548 $valid = true; 549 550 // Loop over each and every byte, and set $value to its value 551 for ($i = 1, $len = count($bytes); $i < $len; $i++) { 552 $value = hexdec($bytes[$i]); 553 554 // If we're the first byte of sequence: 555 if (!$remaining) { 556 // Start position 557 $start = $i; 558 559 // By default we are valid 560 $valid = true; 561 562 // One byte sequence: 563 if ($value <= 0x7F) { 564 $character = $value; 565 $length = 1; 566 } 567 // Two byte sequence: 568 elseif (($value & 0xE0) === 0xC0) { 569 $character = ($value & 0x1F) << 6; 570 $length = 2; 571 $remaining = 1; 572 } 573 // Three byte sequence: 574 elseif (($value & 0xF0) === 0xE0) { 575 $character = ($value & 0x0F) << 12; 576 $length = 3; 577 $remaining = 2; 578 } 579 // Four byte sequence: 580 elseif (($value & 0xF8) === 0xF0) { 581 $character = ($value & 0x07) << 18; 582 $length = 4; 583 $remaining = 3; 584 } 585 // Invalid byte: 586 else { 587 $valid = false; 588 $remaining = 0; 589 } 590 } 591 // Continuation byte: 592 else { 593 // Check that the byte is valid, then add it to the character: 594 if (($value & 0xC0) === 0x80) { 595 $remaining--; 596 $character |= ($value & 0x3F) << ($remaining * 6); 597 } 598 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: 599 else { 600 $valid = false; 601 $remaining = 0; 602 $i--; 603 } 604 } 605 606 // If we've reached the end of the current byte sequence, append it to Unicode::$data 607 if (!$remaining) { 608 // Percent encode anything invalid or not in iunreserved 609 if ( 610 // Invalid sequences 611 !$valid 612 // Non-shortest form sequences are invalid 613 || $length > 1 && $character <= 0x7F 614 || $length > 2 && $character <= 0x7FF 615 || $length > 3 && $character <= 0xFFFF 616 // Outside of range of iunreserved codepoints 617 || $character < 0x2D 618 || $character > 0xEFFFD 619 // Noncharacters 620 || ($character & 0xFFFE) === 0xFFFE 621 || $character >= 0xFDD0 && $character <= 0xFDEF 622 // Everything else not in iunreserved (this is all BMP) 623 || $character === 0x2F 624 || $character > 0x39 && $character < 0x41 625 || $character > 0x5A && $character < 0x61 626 || $character > 0x7A && $character < 0x7E 627 || $character > 0x7E && $character < 0xA0 628 || $character > 0xD7FF && $character < 0xF900 629 ) { 630 for ($j = $start; $j <= $i; $j++) { 631 $string .= '%' . strtoupper($bytes[$j]); 632 } 633 } else { 634 for ($j = $start; $j <= $i; $j++) { 635 $string .= chr(hexdec($bytes[$j])); 636 } 637 } 638 } 639 } 640 641 // If we have any bytes left over they are invalid (i.e., we are 642 // mid-way through a multi-byte sequence) 643 if ($remaining) { 644 for ($j = $start; $j < $len; $j++) { 645 $string .= '%' . strtoupper($bytes[$j]); 646 } 647 } 648 649 return $string; 650 } 651 652 protected function scheme_normalization() 653 { 654 if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) { 655 $this->iuserinfo = null; 656 } 657 if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) { 658 $this->ihost = null; 659 } 660 if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) { 661 $this->port = null; 662 } 663 if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) { 664 $this->ipath = ''; 665 } 666 if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) { 667 $this->iquery = null; 668 } 669 if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) { 670 $this->ifragment = null; 671 } 672 } 673 674 /** 675 * Check if the object represents a valid IRI. This needs to be done on each 676 * call as some things change depending on another part of the IRI. 677 * 678 * @return bool 679 */ 680 public function is_valid() 681 { 682 if ($this->ipath === '') { 683 return true; 684 } 685 686 $isauthority = $this->iuserinfo !== null || $this->ihost !== null || 687 $this->port !== null; 688 if ($isauthority && $this->ipath[0] === '/') { 689 return true; 690 } 691 692 if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) { 693 return false; 694 } 695 696 // Relative urls cannot have a colon in the first path segment (and the 697 // slashes themselves are not included so skip the first character). 698 if (!$this->scheme && !$isauthority && 699 strpos($this->ipath, ':') !== false && 700 strpos($this->ipath, '/', 1) !== false && 701 strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) { 702 return false; 703 } 704 705 return true; 706 } 707 708 /** 709 * Set the entire IRI. Returns true on success, false on failure (if there 710 * are any invalid characters). 711 * 712 * @param string $iri 713 * @return bool 714 */ 715 public function set_iri($iri, $clear_cache = false) 716 { 717 static $cache; 718 if ($clear_cache) { 719 $cache = null; 720 return; 721 } 722 if (!$cache) { 723 $cache = []; 724 } 725 726 if ($iri === null) { 727 return true; 728 } elseif (isset($cache[$iri])) { 729 [ 730 $this->scheme, 731 $this->iuserinfo, 732 $this->ihost, 733 $this->port, 734 $this->ipath, 735 $this->iquery, 736 $this->ifragment, 737 $return 738 ] = $cache[$iri]; 739 740 return $return; 741 } 742 743 $parsed = $this->parse_iri((string) $iri); 744 if (!$parsed) { 745 return false; 746 } 747 748 $return = $this->set_scheme($parsed['scheme']) 749 && $this->set_authority($parsed['authority']) 750 && $this->set_path($parsed['path']) 751 && $this->set_query($parsed['query']) 752 && $this->set_fragment($parsed['fragment']); 753 754 $cache[$iri] = [ 755 $this->scheme, 756 $this->iuserinfo, 757 $this->ihost, 758 $this->port, 759 $this->ipath, 760 $this->iquery, 761 $this->ifragment, 762 $return 763 ]; 764 765 return $return; 766 } 767 768 /** 769 * Set the scheme. Returns true on success, false on failure (if there are 770 * any invalid characters). 771 * 772 * @param string $scheme 773 * @return bool 774 */ 775 public function set_scheme($scheme) 776 { 777 if ($scheme === null) { 778 $this->scheme = null; 779 } elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) { 780 $this->scheme = null; 781 return false; 782 } else { 783 $this->scheme = strtolower($scheme); 784 } 785 return true; 786 } 787 788 /** 789 * Set the authority. Returns true on success, false on failure (if there are 790 * any invalid characters). 791 * 792 * @param string $authority 793 * @return bool 794 */ 795 public function set_authority($authority, $clear_cache = false) 796 { 797 static $cache; 798 if ($clear_cache) { 799 $cache = null; 800 return; 801 } 802 if (!$cache) { 803 $cache = []; 804 } 805 806 if ($authority === null) { 807 $this->iuserinfo = null; 808 $this->ihost = null; 809 $this->port = null; 810 return true; 811 } elseif (isset($cache[$authority])) { 812 [ 813 $this->iuserinfo, 814 $this->ihost, 815 $this->port, 816 $return 817 ] = $cache[$authority]; 818 819 return $return; 820 } 821 822 $remaining = $authority; 823 if (($iuserinfo_end = strrpos($remaining, '@')) !== false) { 824 $iuserinfo = substr($remaining, 0, $iuserinfo_end); 825 $remaining = substr($remaining, $iuserinfo_end + 1); 826 } else { 827 $iuserinfo = null; 828 } 829 if (($port_start = strpos($remaining, ':', intval(strpos($remaining, ']')))) !== false) { 830 if (($port = substr($remaining, $port_start + 1)) === false) { 831 $port = null; 832 } 833 $remaining = substr($remaining, 0, $port_start); 834 } else { 835 $port = null; 836 } 837 838 $return = $this->set_userinfo($iuserinfo) && 839 $this->set_host($remaining) && 840 $this->set_port($port); 841 842 $cache[$authority] = [ 843 $this->iuserinfo, 844 $this->ihost, 845 $this->port, 846 $return 847 ]; 848 849 return $return; 850 } 851 852 /** 853 * Set the iuserinfo. 854 * 855 * @param string $iuserinfo 856 * @return bool 857 */ 858 public function set_userinfo($iuserinfo) 859 { 860 if ($iuserinfo === null) { 861 $this->iuserinfo = null; 862 } else { 863 $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); 864 $this->scheme_normalization(); 865 } 866 867 return true; 868 } 869 870 /** 871 * Set the ihost. Returns true on success, false on failure (if there are 872 * any invalid characters). 873 * 874 * @param string $ihost 875 * @return bool 876 */ 877 public function set_host($ihost) 878 { 879 if ($ihost === null) { 880 $this->ihost = null; 881 return true; 882 } elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') { 883 if (\SimplePie\Net\IPv6::check_ipv6(substr($ihost, 1, -1))) { 884 $this->ihost = '[' . \SimplePie\Net\IPv6::compress(substr($ihost, 1, -1)) . ']'; 885 } else { 886 $this->ihost = null; 887 return false; 888 } 889 } else { 890 $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); 891 892 // Lowercase, but ignore pct-encoded sections (as they should 893 // remain uppercase). This must be done after the previous step 894 // as that can add unescaped characters. 895 $position = 0; 896 $strlen = strlen($ihost); 897 while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) { 898 if ($ihost[$position] === '%') { 899 $position += 3; 900 } else { 901 $ihost[$position] = strtolower($ihost[$position]); 902 $position++; 903 } 904 } 905 906 $this->ihost = $ihost; 907 } 908 909 $this->scheme_normalization(); 910 911 return true; 912 } 913 914 /** 915 * Set the port. Returns true on success, false on failure (if there are 916 * any invalid characters). 917 * 918 * @param string $port 919 * @return bool 920 */ 921 public function set_port($port) 922 { 923 if ($port === null) { 924 $this->port = null; 925 return true; 926 } elseif (strspn($port, '0123456789') === strlen($port)) { 927 $this->port = (int) $port; 928 $this->scheme_normalization(); 929 return true; 930 } 931 932 $this->port = null; 933 return false; 934 } 935 936 /** 937 * Set the ipath. 938 * 939 * @param string $ipath 940 * @return bool 941 */ 942 public function set_path($ipath, $clear_cache = false) 943 { 944 static $cache; 945 if ($clear_cache) { 946 $cache = null; 947 return; 948 } 949 if (!$cache) { 950 $cache = []; 951 } 952 953 $ipath = (string) $ipath; 954 955 if (isset($cache[$ipath])) { 956 $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; 957 } else { 958 $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); 959 $removed = $this->remove_dot_segments($valid); 960 961 $cache[$ipath] = [$valid, $removed]; 962 $this->ipath = ($this->scheme !== null) ? $removed : $valid; 963 } 964 965 $this->scheme_normalization(); 966 return true; 967 } 968 969 /** 970 * Set the iquery. 971 * 972 * @param string $iquery 973 * @return bool 974 */ 975 public function set_query($iquery) 976 { 977 if ($iquery === null) { 978 $this->iquery = null; 979 } else { 980 $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); 981 $this->scheme_normalization(); 982 } 983 return true; 984 } 985 986 /** 987 * Set the ifragment. 988 * 989 * @param string $ifragment 990 * @return bool 991 */ 992 public function set_fragment($ifragment) 993 { 994 if ($ifragment === null) { 995 $this->ifragment = null; 996 } else { 997 $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); 998 $this->scheme_normalization(); 999 } 1000 return true; 1001 } 1002 1003 /** 1004 * Convert an IRI to a URI (or parts thereof) 1005 * 1006 * @return string 1007 */ 1008 public function to_uri($string) 1009 { 1010 static $non_ascii; 1011 if (!$non_ascii) { 1012 $non_ascii = implode('', range("\x80", "\xFF")); 1013 } 1014 1015 $position = 0; 1016 $strlen = strlen($string); 1017 while (($position += strcspn($string, $non_ascii, $position)) < $strlen) { 1018 $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); 1019 $position += 3; 1020 $strlen += 2; 1021 } 1022 1023 return $string; 1024 } 1025 1026 /** 1027 * Get the complete IRI 1028 * 1029 * @return string 1030 */ 1031 public function get_iri() 1032 { 1033 if (!$this->is_valid()) { 1034 return false; 1035 } 1036 1037 $iri = ''; 1038 if ($this->scheme !== null) { 1039 $iri .= $this->scheme . ':'; 1040 } 1041 if (($iauthority = $this->get_iauthority()) !== null) { 1042 $iri .= '//' . $iauthority; 1043 } 1044 if ($this->ipath !== '') { 1045 $iri .= $this->ipath; 1046 } elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') { 1047 $iri .= $this->normalization[$this->scheme]['ipath']; 1048 } 1049 if ($this->iquery !== null) { 1050 $iri .= '?' . $this->iquery; 1051 } 1052 if ($this->ifragment !== null) { 1053 $iri .= '#' . $this->ifragment; 1054 } 1055 1056 return $iri; 1057 } 1058 1059 /** 1060 * Get the complete URI 1061 * 1062 * @return string 1063 */ 1064 public function get_uri() 1065 { 1066 return $this->to_uri($this->get_iri()); 1067 } 1068 1069 /** 1070 * Get the complete iauthority 1071 * 1072 * @return string 1073 */ 1074 protected function get_iauthority() 1075 { 1076 if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) { 1077 $iauthority = ''; 1078 if ($this->iuserinfo !== null) { 1079 $iauthority .= $this->iuserinfo . '@'; 1080 } 1081 if ($this->ihost !== null) { 1082 $iauthority .= $this->ihost; 1083 } 1084 if ($this->port !== null && $this->port !== 0) { 1085 $iauthority .= ':' . $this->port; 1086 } 1087 return $iauthority; 1088 } 1089 1090 return null; 1091 } 1092 1093 /** 1094 * Get the complete authority 1095 * 1096 * @return string 1097 */ 1098 protected function get_authority() 1099 { 1100 $iauthority = $this->get_iauthority(); 1101 if (is_string($iauthority)) { 1102 return $this->to_uri($iauthority); 1103 } 1104 1105 return $iauthority; 1106 } 1107} 1108 1109class_alias('SimplePie\IRI', 'SimplePie_IRI'); 1110