1<?php
2/**
3 * SimplePie
4 *
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
7 *
8 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
13 *
14 * 	* Redistributions of source code must retain the above copyright notice, this list of
15 * 	  conditions and the following disclaimer.
16 *
17 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
18 * 	  of conditions and the following disclaimer in the documentation and/or other materials
19 * 	  provided with the distribution.
20 *
21 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * 	  to endorse or promote products derived from this software without specific prior
23 * 	  written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * @package SimplePie
36 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
37 * @author Ryan Parman
38 * @author Sam Sneddon
39 * @author Ryan McCue
40 * @link http://simplepie.org/ SimplePie
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
42 */
43
44/**
45 * IRI parser/serialiser/normaliser
46 *
47 * @package SimplePie
48 * @subpackage HTTP
49 * @author Sam Sneddon
50 * @author Steve Minutillo
51 * @author Ryan McCue
52 * @copyright 2007-2012 Sam Sneddon, Steve Minutillo, Ryan McCue
53 * @license http://www.opensource.org/licenses/bsd-license.php
54 */
55class SimplePie_IRI
56{
57	/**
58	 * Scheme
59	 *
60	 * @var string
61	 */
62	protected $scheme = null;
63
64	/**
65	 * User Information
66	 *
67	 * @var string
68	 */
69	protected $iuserinfo = null;
70
71	/**
72	 * ihost
73	 *
74	 * @var string
75	 */
76	protected $ihost = null;
77
78	/**
79	 * Port
80	 *
81	 * @var string
82	 */
83	protected $port = null;
84
85	/**
86	 * ipath
87	 *
88	 * @var string
89	 */
90	protected $ipath = '';
91
92	/**
93	 * iquery
94	 *
95	 * @var string
96	 */
97	protected $iquery = null;
98
99	/**
100	 * ifragment
101	 *
102	 * @var string
103	 */
104	protected $ifragment = null;
105
106	/**
107	 * Normalization database
108	 *
109	 * Each key is the scheme, each value is an array with each key as the IRI
110	 * part and value as the default value for that part.
111	 */
112	protected $normalization = array(
113		'acap' => array(
114			'port' => 674
115		),
116		'dict' => array(
117			'port' => 2628
118		),
119		'file' => array(
120			'ihost' => 'localhost'
121		),
122		'http' => array(
123			'port' => 80,
124			'ipath' => '/'
125		),
126		'https' => array(
127			'port' => 443,
128			'ipath' => '/'
129		),
130	);
131
132	/**
133	 * Return the entire IRI when you try and read the object as a string
134	 *
135	 * @return string
136	 */
137	public function __toString()
138	{
139		return $this->get_iri();
140	}
141
142	/**
143	 * Overload __set() to provide access via properties
144	 *
145	 * @param string $name Property name
146	 * @param mixed $value Property value
147	 */
148	public function __set($name, $value)
149	{
150		if (method_exists($this, 'set_' . $name))
151		{
152			call_user_func(array($this, 'set_' . $name), $value);
153		}
154		elseif (
155			   $name === 'iauthority'
156			|| $name === 'iuserinfo'
157			|| $name === 'ihost'
158			|| $name === 'ipath'
159			|| $name === 'iquery'
160			|| $name === 'ifragment'
161		)
162		{
163			call_user_func(array($this, 'set_' . substr($name, 1)), $value);
164		}
165	}
166
167	/**
168	 * Overload __get() to provide access via properties
169	 *
170	 * @param string $name Property name
171	 * @return mixed
172	 */
173	public function __get($name)
174	{
175		// isset() returns false for null, we don't want to do that
176		// Also why we use array_key_exists below instead of isset()
177		$props = get_object_vars($this);
178
179		if (
180			$name === 'iri' ||
181			$name === 'uri' ||
182			$name === 'iauthority' ||
183			$name === 'authority'
184		)
185		{
186			$return = $this->{"get_$name"}();
187		}
188		elseif (array_key_exists($name, $props))
189		{
190			$return = $this->$name;
191		}
192		// host -> ihost
193		elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
194		{
195			$name = $prop;
196			$return = $this->$prop;
197		}
198		// ischeme -> scheme
199		elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
200		{
201			$name = $prop;
202			$return = $this->$prop;
203		}
204		else
205		{
206			trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
207			$return = null;
208		}
209
210		if ($return === null && isset($this->normalization[$this->scheme][$name]))
211		{
212			return $this->normalization[$this->scheme][$name];
213		}
214
215		return $return;
216	}
217
218	/**
219	 * Overload __isset() to provide access via properties
220	 *
221	 * @param string $name Property name
222	 * @return bool
223	 */
224	public function __isset($name)
225	{
226		return method_exists($this, 'get_' . $name) || isset($this->$name);
227	}
228
229	/**
230	 * Overload __unset() to provide access via properties
231	 *
232	 * @param string $name Property name
233	 */
234	public function __unset($name)
235	{
236		if (method_exists($this, 'set_' . $name))
237		{
238			call_user_func(array($this, 'set_' . $name), '');
239		}
240	}
241
242	/**
243	 * Create a new IRI object, from a specified string
244	 *
245	 * @param string $iri
246	 */
247	public function __construct($iri = null)
248	{
249		$this->set_iri($iri);
250	}
251
252	/**
253	 * Clean up
254	 */
255	public function __destruct() {
256	    $this->set_iri(null, true);
257	    $this->set_path(null, true);
258	    $this->set_authority(null, true);
259	}
260
261	/**
262	 * Create a new IRI object by resolving a relative IRI
263	 *
264	 * Returns false if $base is not absolute, otherwise an IRI.
265	 *
266	 * @param IRI|string $base (Absolute) Base IRI
267	 * @param IRI|string $relative Relative IRI
268	 * @return IRI|false
269	 */
270	public static function absolutize($base, $relative)
271	{
272		if (!($relative instanceof SimplePie_IRI))
273		{
274			$relative = new SimplePie_IRI($relative);
275		}
276		if (!$relative->is_valid())
277		{
278			return false;
279		}
280		elseif ($relative->scheme !== null)
281		{
282			return clone $relative;
283		}
284		else
285		{
286			if (!($base instanceof SimplePie_IRI))
287			{
288				$base = new SimplePie_IRI($base);
289			}
290			if ($base->scheme !== null && $base->is_valid())
291			{
292				if ($relative->get_iri() !== '')
293				{
294					if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
295					{
296						$target = clone $relative;
297						$target->scheme = $base->scheme;
298					}
299					else
300					{
301						$target = new SimplePie_IRI;
302						$target->scheme = $base->scheme;
303						$target->iuserinfo = $base->iuserinfo;
304						$target->ihost = $base->ihost;
305						$target->port = $base->port;
306						if ($relative->ipath !== '')
307						{
308							if ($relative->ipath[0] === '/')
309							{
310								$target->ipath = $relative->ipath;
311							}
312							elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
313							{
314								$target->ipath = '/' . $relative->ipath;
315							}
316							elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
317							{
318								$target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
319							}
320							else
321							{
322								$target->ipath = $relative->ipath;
323							}
324							$target->ipath = $target->remove_dot_segments($target->ipath);
325							$target->iquery = $relative->iquery;
326						}
327						else
328						{
329							$target->ipath = $base->ipath;
330							if ($relative->iquery !== null)
331							{
332								$target->iquery = $relative->iquery;
333							}
334							elseif ($base->iquery !== null)
335							{
336								$target->iquery = $base->iquery;
337							}
338						}
339						$target->ifragment = $relative->ifragment;
340					}
341				}
342				else
343				{
344					$target = clone $base;
345					$target->ifragment = null;
346				}
347				$target->scheme_normalization();
348				return $target;
349			}
350
351			return false;
352		}
353	}
354
355	/**
356	 * Parse an IRI into scheme/authority/path/query/fragment segments
357	 *
358	 * @param string $iri
359	 * @return array
360	 */
361	protected function parse_iri($iri)
362	{
363		$iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
364		if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
365		{
366			if ($match[1] === '')
367			{
368				$match['scheme'] = null;
369			}
370			if (!isset($match[3]) || $match[3] === '')
371			{
372				$match['authority'] = null;
373			}
374			if (!isset($match[5]))
375			{
376				$match['path'] = '';
377			}
378			if (!isset($match[6]) || $match[6] === '')
379			{
380				$match['query'] = null;
381			}
382			if (!isset($match[8]) || $match[8] === '')
383			{
384				$match['fragment'] = null;
385			}
386			return $match;
387		}
388
389		// This can occur when a paragraph is accidentally parsed as a URI
390		return false;
391	}
392
393	/**
394	 * Remove dot segments from a path
395	 *
396	 * @param string $input
397	 * @return string
398	 */
399	protected function remove_dot_segments($input)
400	{
401		$output = '';
402		while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
403		{
404			// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
405			if (strpos($input, '../') === 0)
406			{
407				$input = substr($input, 3);
408			}
409			elseif (strpos($input, './') === 0)
410			{
411				$input = substr($input, 2);
412			}
413			// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
414			elseif (strpos($input, '/./') === 0)
415			{
416				$input = substr($input, 2);
417			}
418			elseif ($input === '/.')
419			{
420				$input = '/';
421			}
422			// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
423			elseif (strpos($input, '/../') === 0)
424			{
425				$input = substr($input, 3);
426				$output = substr_replace($output, '', strrpos($output, '/'));
427			}
428			elseif ($input === '/..')
429			{
430				$input = '/';
431				$output = substr_replace($output, '', strrpos($output, '/'));
432			}
433			// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
434			elseif ($input === '.' || $input === '..')
435			{
436				$input = '';
437			}
438			// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
439			elseif (($pos = strpos($input, '/', 1)) !== false)
440			{
441				$output .= substr($input, 0, $pos);
442				$input = substr_replace($input, '', 0, $pos);
443			}
444			else
445			{
446				$output .= $input;
447				$input = '';
448			}
449		}
450		return $output . $input;
451	}
452
453	/**
454	 * Replace invalid character with percent encoding
455	 *
456	 * @param string $string Input string
457	 * @param string $extra_chars Valid characters not in iunreserved or
458	 *                            iprivate (this is ASCII-only)
459	 * @param bool $iprivate Allow iprivate
460	 * @return string
461	 */
462	protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
463	{
464		// Normalize as many pct-encoded sections as possible
465		$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
466
467		// Replace invalid percent characters
468		$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
469
470		// Add unreserved and % to $extra_chars (the latter is safe because all
471		// pct-encoded sections are now valid).
472		$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
473
474		// Now replace any bytes that aren't allowed with their pct-encoded versions
475		$position = 0;
476		$strlen = strlen($string);
477		while (($position += strspn($string, $extra_chars, $position)) < $strlen)
478		{
479			$value = ord($string[$position]);
480
481			// Start position
482			$start = $position;
483
484			// By default we are valid
485			$valid = true;
486
487			// No one byte sequences are valid due to the while.
488			// Two byte sequence:
489			if (($value & 0xE0) === 0xC0)
490			{
491				$character = ($value & 0x1F) << 6;
492				$length = 2;
493				$remaining = 1;
494			}
495			// Three byte sequence:
496			elseif (($value & 0xF0) === 0xE0)
497			{
498				$character = ($value & 0x0F) << 12;
499				$length = 3;
500				$remaining = 2;
501			}
502			// Four byte sequence:
503			elseif (($value & 0xF8) === 0xF0)
504			{
505				$character = ($value & 0x07) << 18;
506				$length = 4;
507				$remaining = 3;
508			}
509			// Invalid byte:
510			else
511			{
512				$valid = false;
513				$length = 1;
514				$remaining = 0;
515			}
516
517			if ($remaining)
518			{
519				if ($position + $length <= $strlen)
520				{
521					for ($position++; $remaining; $position++)
522					{
523						$value = ord($string[$position]);
524
525						// Check that the byte is valid, then add it to the character:
526						if (($value & 0xC0) === 0x80)
527						{
528							$character |= ($value & 0x3F) << (--$remaining * 6);
529						}
530						// If it is invalid, count the sequence as invalid and reprocess the current byte:
531						else
532						{
533							$valid = false;
534							$position--;
535							break;
536						}
537					}
538				}
539				else
540				{
541					$position = $strlen - 1;
542					$valid = false;
543				}
544			}
545
546			// Percent encode anything invalid or not in ucschar
547			if (
548				// Invalid sequences
549				!$valid
550				// Non-shortest form sequences are invalid
551				|| $length > 1 && $character <= 0x7F
552				|| $length > 2 && $character <= 0x7FF
553				|| $length > 3 && $character <= 0xFFFF
554				// Outside of range of ucschar codepoints
555				// Noncharacters
556				|| ($character & 0xFFFE) === 0xFFFE
557				|| $character >= 0xFDD0 && $character <= 0xFDEF
558				|| (
559					// Everything else not in ucschar
560					   $character > 0xD7FF && $character < 0xF900
561					|| $character < 0xA0
562					|| $character > 0xEFFFD
563				)
564				&& (
565					// Everything not in iprivate, if it applies
566					   !$iprivate
567					|| $character < 0xE000
568					|| $character > 0x10FFFD
569				)
570			)
571			{
572				// If we were a character, pretend we weren't, but rather an error.
573				if ($valid)
574					$position--;
575
576				for ($j = $start; $j <= $position; $j++)
577				{
578					$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
579					$j += 2;
580					$position += 2;
581					$strlen += 2;
582				}
583			}
584		}
585
586		return $string;
587	}
588
589	/**
590	 * Callback function for preg_replace_callback.
591	 *
592	 * Removes sequences of percent encoded bytes that represent UTF-8
593	 * encoded characters in iunreserved
594	 *
595	 * @param array $match PCRE match
596	 * @return string Replacement
597	 */
598	protected function remove_iunreserved_percent_encoded($match)
599	{
600		// As we just have valid percent encoded sequences we can just explode
601		// and ignore the first member of the returned array (an empty string).
602		$bytes = explode('%', $match[0]);
603
604		// Initialize the new string (this is what will be returned) and that
605		// there are no bytes remaining in the current sequence (unsurprising
606		// at the first byte!).
607		$string = '';
608		$remaining = 0;
609
610		// Loop over each and every byte, and set $value to its value
611		for ($i = 1, $len = count($bytes); $i < $len; $i++)
612		{
613			$value = hexdec($bytes[$i]);
614
615			// If we're the first byte of sequence:
616			if (!$remaining)
617			{
618				// Start position
619				$start = $i;
620
621				// By default we are valid
622				$valid = true;
623
624				// One byte sequence:
625				if ($value <= 0x7F)
626				{
627					$character = $value;
628					$length = 1;
629				}
630				// Two byte sequence:
631				elseif (($value & 0xE0) === 0xC0)
632				{
633					$character = ($value & 0x1F) << 6;
634					$length = 2;
635					$remaining = 1;
636				}
637				// Three byte sequence:
638				elseif (($value & 0xF0) === 0xE0)
639				{
640					$character = ($value & 0x0F) << 12;
641					$length = 3;
642					$remaining = 2;
643				}
644				// Four byte sequence:
645				elseif (($value & 0xF8) === 0xF0)
646				{
647					$character = ($value & 0x07) << 18;
648					$length = 4;
649					$remaining = 3;
650				}
651				// Invalid byte:
652				else
653				{
654					$valid = false;
655					$remaining = 0;
656				}
657			}
658			// Continuation byte:
659			else
660			{
661				// Check that the byte is valid, then add it to the character:
662				if (($value & 0xC0) === 0x80)
663				{
664					$remaining--;
665					$character |= ($value & 0x3F) << ($remaining * 6);
666				}
667				// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
668				else
669				{
670					$valid = false;
671					$remaining = 0;
672					$i--;
673				}
674			}
675
676			// If we've reached the end of the current byte sequence, append it to Unicode::$data
677			if (!$remaining)
678			{
679				// Percent encode anything invalid or not in iunreserved
680				if (
681					// Invalid sequences
682					!$valid
683					// Non-shortest form sequences are invalid
684					|| $length > 1 && $character <= 0x7F
685					|| $length > 2 && $character <= 0x7FF
686					|| $length > 3 && $character <= 0xFFFF
687					// Outside of range of iunreserved codepoints
688					|| $character < 0x2D
689					|| $character > 0xEFFFD
690					// Noncharacters
691					|| ($character & 0xFFFE) === 0xFFFE
692					|| $character >= 0xFDD0 && $character <= 0xFDEF
693					// Everything else not in iunreserved (this is all BMP)
694					|| $character === 0x2F
695					|| $character > 0x39 && $character < 0x41
696					|| $character > 0x5A && $character < 0x61
697					|| $character > 0x7A && $character < 0x7E
698					|| $character > 0x7E && $character < 0xA0
699					|| $character > 0xD7FF && $character < 0xF900
700				)
701				{
702					for ($j = $start; $j <= $i; $j++)
703					{
704						$string .= '%' . strtoupper($bytes[$j]);
705					}
706				}
707				else
708				{
709					for ($j = $start; $j <= $i; $j++)
710					{
711						$string .= chr(hexdec($bytes[$j]));
712					}
713				}
714			}
715		}
716
717		// If we have any bytes left over they are invalid (i.e., we are
718		// mid-way through a multi-byte sequence)
719		if ($remaining)
720		{
721			for ($j = $start; $j < $len; $j++)
722			{
723				$string .= '%' . strtoupper($bytes[$j]);
724			}
725		}
726
727		return $string;
728	}
729
730	protected function scheme_normalization()
731	{
732		if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
733		{
734			$this->iuserinfo = null;
735		}
736		if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
737		{
738			$this->ihost = null;
739		}
740		if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
741		{
742			$this->port = null;
743		}
744		if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
745		{
746			$this->ipath = '';
747		}
748		if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
749		{
750			$this->iquery = null;
751		}
752		if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
753		{
754			$this->ifragment = null;
755		}
756	}
757
758	/**
759	 * Check if the object represents a valid IRI. This needs to be done on each
760	 * call as some things change depending on another part of the IRI.
761	 *
762	 * @return bool
763	 */
764	public function is_valid()
765	{
766		if ($this->ipath === '') return true;
767
768		$isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
769			$this->port !== null;
770		if ($isauthority && $this->ipath[0] === '/') return true;
771
772		if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false;
773
774		// Relative urls cannot have a colon in the first path segment (and the
775		// slashes themselves are not included so skip the first character).
776		if (!$this->scheme && !$isauthority &&
777		    strpos($this->ipath, ':') !== false &&
778		    strpos($this->ipath, '/', 1) !== false &&
779		    strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false;
780
781		return true;
782	}
783
784	/**
785	 * Set the entire IRI. Returns true on success, false on failure (if there
786	 * are any invalid characters).
787	 *
788	 * @param string $iri
789	 * @return bool
790	 */
791	public function set_iri($iri, $clear_cache = false)
792	{
793		static $cache;
794		if ($clear_cache)
795		{
796			$cache = null;
797			return;
798		}
799		if (!$cache)
800		{
801			$cache = array();
802		}
803
804		if ($iri === null)
805		{
806			return true;
807		}
808		elseif (isset($cache[$iri]))
809		{
810			list($this->scheme,
811				 $this->iuserinfo,
812				 $this->ihost,
813				 $this->port,
814				 $this->ipath,
815				 $this->iquery,
816				 $this->ifragment,
817				 $return) = $cache[$iri];
818			return $return;
819		}
820
821		$parsed = $this->parse_iri((string) $iri);
822		if (!$parsed)
823		{
824			return false;
825		}
826
827		$return = $this->set_scheme($parsed['scheme'])
828			&& $this->set_authority($parsed['authority'])
829			&& $this->set_path($parsed['path'])
830			&& $this->set_query($parsed['query'])
831			&& $this->set_fragment($parsed['fragment']);
832
833		$cache[$iri] = array($this->scheme,
834							 $this->iuserinfo,
835							 $this->ihost,
836							 $this->port,
837							 $this->ipath,
838							 $this->iquery,
839							 $this->ifragment,
840							 $return);
841		return $return;
842	}
843
844	/**
845	 * Set the scheme. Returns true on success, false on failure (if there are
846	 * any invalid characters).
847	 *
848	 * @param string $scheme
849	 * @return bool
850	 */
851	public function set_scheme($scheme)
852	{
853		if ($scheme === null)
854		{
855			$this->scheme = null;
856		}
857		elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
858		{
859			$this->scheme = null;
860			return false;
861		}
862		else
863		{
864			$this->scheme = strtolower($scheme);
865		}
866		return true;
867	}
868
869	/**
870	 * Set the authority. Returns true on success, false on failure (if there are
871	 * any invalid characters).
872	 *
873	 * @param string $authority
874	 * @return bool
875	 */
876	public function set_authority($authority, $clear_cache = false)
877	{
878		static $cache;
879		if ($clear_cache)
880		{
881			$cache = null;
882			return;
883		}
884		if (!$cache)
885			$cache = array();
886
887		if ($authority === null)
888		{
889			$this->iuserinfo = null;
890			$this->ihost = null;
891			$this->port = null;
892			return true;
893		}
894		elseif (isset($cache[$authority]))
895		{
896			list($this->iuserinfo,
897				 $this->ihost,
898				 $this->port,
899				 $return) = $cache[$authority];
900
901			return $return;
902		}
903
904		$remaining = $authority;
905		if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
906		{
907			$iuserinfo = substr($remaining, 0, $iuserinfo_end);
908			$remaining = substr($remaining, $iuserinfo_end + 1);
909		}
910		else
911		{
912			$iuserinfo = null;
913		}
914		if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
915		{
916			if (($port = substr($remaining, $port_start + 1)) === false)
917			{
918				$port = null;
919			}
920			$remaining = substr($remaining, 0, $port_start);
921		}
922		else
923		{
924			$port = null;
925		}
926
927		$return = $this->set_userinfo($iuserinfo) &&
928				  $this->set_host($remaining) &&
929				  $this->set_port($port);
930
931		$cache[$authority] = array($this->iuserinfo,
932								   $this->ihost,
933								   $this->port,
934								   $return);
935
936		return $return;
937	}
938
939	/**
940	 * Set the iuserinfo.
941	 *
942	 * @param string $iuserinfo
943	 * @return bool
944	 */
945	public function set_userinfo($iuserinfo)
946	{
947		if ($iuserinfo === null)
948		{
949			$this->iuserinfo = null;
950		}
951		else
952		{
953			$this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
954			$this->scheme_normalization();
955		}
956
957		return true;
958	}
959
960	/**
961	 * Set the ihost. Returns true on success, false on failure (if there are
962	 * any invalid characters).
963	 *
964	 * @param string $ihost
965	 * @return bool
966	 */
967	public function set_host($ihost)
968	{
969		if ($ihost === null)
970		{
971			$this->ihost = null;
972			return true;
973		}
974		elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
975		{
976			if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
977			{
978				$this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
979			}
980			else
981			{
982				$this->ihost = null;
983				return false;
984			}
985		}
986		else
987		{
988			$ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
989
990			// Lowercase, but ignore pct-encoded sections (as they should
991			// remain uppercase). This must be done after the previous step
992			// as that can add unescaped characters.
993			$position = 0;
994			$strlen = strlen($ihost);
995			while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
996			{
997				if ($ihost[$position] === '%')
998				{
999					$position += 3;
1000				}
1001				else
1002				{
1003					$ihost[$position] = strtolower($ihost[$position]);
1004					$position++;
1005				}
1006			}
1007
1008			$this->ihost = $ihost;
1009		}
1010
1011		$this->scheme_normalization();
1012
1013		return true;
1014	}
1015
1016	/**
1017	 * Set the port. Returns true on success, false on failure (if there are
1018	 * any invalid characters).
1019	 *
1020	 * @param string $port
1021	 * @return bool
1022	 */
1023	public function set_port($port)
1024	{
1025		if ($port === null)
1026		{
1027			$this->port = null;
1028			return true;
1029		}
1030		elseif (strspn($port, '0123456789') === strlen($port))
1031		{
1032			$this->port = (int) $port;
1033			$this->scheme_normalization();
1034			return true;
1035		}
1036
1037		$this->port = null;
1038		return false;
1039	}
1040
1041	/**
1042	 * Set the ipath.
1043	 *
1044	 * @param string $ipath
1045	 * @return bool
1046	 */
1047	public function set_path($ipath, $clear_cache = false)
1048	{
1049		static $cache;
1050		if ($clear_cache)
1051		{
1052			$cache = null;
1053			return;
1054		}
1055		if (!$cache)
1056		{
1057			$cache = array();
1058		}
1059
1060		$ipath = (string) $ipath;
1061
1062		if (isset($cache[$ipath]))
1063		{
1064			$this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
1065		}
1066		else
1067		{
1068			$valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
1069			$removed = $this->remove_dot_segments($valid);
1070
1071			$cache[$ipath] = array($valid, $removed);
1072			$this->ipath =  ($this->scheme !== null) ? $removed : $valid;
1073		}
1074
1075		$this->scheme_normalization();
1076		return true;
1077	}
1078
1079	/**
1080	 * Set the iquery.
1081	 *
1082	 * @param string $iquery
1083	 * @return bool
1084	 */
1085	public function set_query($iquery)
1086	{
1087		if ($iquery === null)
1088		{
1089			$this->iquery = null;
1090		}
1091		else
1092		{
1093			$this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
1094			$this->scheme_normalization();
1095		}
1096		return true;
1097	}
1098
1099	/**
1100	 * Set the ifragment.
1101	 *
1102	 * @param string $ifragment
1103	 * @return bool
1104	 */
1105	public function set_fragment($ifragment)
1106	{
1107		if ($ifragment === null)
1108		{
1109			$this->ifragment = null;
1110		}
1111		else
1112		{
1113			$this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
1114			$this->scheme_normalization();
1115		}
1116		return true;
1117	}
1118
1119	/**
1120	 * Convert an IRI to a URI (or parts thereof)
1121	 *
1122	 * @return string
1123	 */
1124	public function to_uri($string)
1125	{
1126		static $non_ascii;
1127		if (!$non_ascii)
1128		{
1129			$non_ascii = implode('', range("\x80", "\xFF"));
1130		}
1131
1132		$position = 0;
1133		$strlen = strlen($string);
1134		while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
1135		{
1136			$string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
1137			$position += 3;
1138			$strlen += 2;
1139		}
1140
1141		return $string;
1142	}
1143
1144	/**
1145	 * Get the complete IRI
1146	 *
1147	 * @return string
1148	 */
1149	public function get_iri()
1150	{
1151		if (!$this->is_valid())
1152		{
1153			return false;
1154		}
1155
1156		$iri = '';
1157		if ($this->scheme !== null)
1158		{
1159			$iri .= $this->scheme . ':';
1160		}
1161		if (($iauthority = $this->get_iauthority()) !== null)
1162		{
1163			$iri .= '//' . $iauthority;
1164		}
1165		if ($this->ipath !== '')
1166		{
1167			$iri .= $this->ipath;
1168		}
1169        elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
1170		{
1171			$iri .= $this->normalization[$this->scheme]['ipath'];
1172		}
1173		if ($this->iquery !== null)
1174		{
1175			$iri .= '?' . $this->iquery;
1176		}
1177		if ($this->ifragment !== null)
1178		{
1179			$iri .= '#' . $this->ifragment;
1180		}
1181
1182		return $iri;
1183	}
1184
1185	/**
1186	 * Get the complete URI
1187	 *
1188	 * @return string
1189	 */
1190	public function get_uri()
1191	{
1192		return $this->to_uri($this->get_iri());
1193	}
1194
1195	/**
1196	 * Get the complete iauthority
1197	 *
1198	 * @return string
1199	 */
1200	protected function get_iauthority()
1201	{
1202		if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
1203		{
1204			$iauthority = '';
1205			if ($this->iuserinfo !== null)
1206			{
1207				$iauthority .= $this->iuserinfo . '@';
1208			}
1209			if ($this->ihost !== null)
1210			{
1211				$iauthority .= $this->ihost;
1212			}
1213            if ($this->port !== null && $this->port !== 0)
1214			{
1215				$iauthority .= ':' . $this->port;
1216			}
1217			return $iauthority;
1218		}
1219
1220		return null;
1221	}
1222
1223	/**
1224	 * Get the complete authority
1225	 *
1226	 * @return string
1227	 */
1228	protected function get_authority()
1229	{
1230		$iauthority = $this->get_iauthority();
1231		if (is_string($iauthority))
1232			return $this->to_uri($iauthority);
1233
1234		return $iauthority;
1235	}
1236}
1237