1<?php 2 3declare(strict_types=1); 4/** 5 * SimplePie 6 * 7 * A PHP-Based RSS and Atom Feed Framework. 8 * Takes the hard work out of managing a complete RSS/Atom solution. 9 * 10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without modification, are 14 * permitted provided that the following conditions are met: 15 * 16 * * Redistributions of source code must retain the above copyright notice, this list of 17 * conditions and the following disclaimer. 18 * 19 * * Redistributions in binary form must reproduce the above copyright notice, this list 20 * of conditions and the following disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 24 * to endorse or promote products derived from this software without specific prior 25 * written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 * @package SimplePie 38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue 39 * @author Ryan Parman 40 * @author Sam Sneddon 41 * @author Ryan McCue 42 * @link http://simplepie.org/ SimplePie 43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 44 */ 45 46namespace SimplePie; 47 48/** 49 * Used for feed auto-discovery 50 * 51 * 52 * This class can be overloaded with {@see \SimplePie\SimplePie::set_locator_class()} 53 * 54 * @package SimplePie 55 */ 56class Locator implements RegistryAware 57{ 58 public $useragent; 59 public $timeout; 60 public $file; 61 public $local = []; 62 public $elsewhere = []; 63 public $cached_entities = []; 64 public $http_base; 65 public $base; 66 public $base_location = 0; 67 public $checked_feeds = 0; 68 public $max_checked_feeds = 10; 69 public $force_fsockopen = false; 70 public $curl_options = []; 71 public $dom; 72 protected $registry; 73 74 public function __construct(\SimplePie\File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = []) 75 { 76 $this->file = $file; 77 $this->useragent = $useragent; 78 $this->timeout = $timeout; 79 $this->max_checked_feeds = $max_checked_feeds; 80 $this->force_fsockopen = $force_fsockopen; 81 $this->curl_options = $curl_options; 82 83 if (class_exists('DOMDocument') && $this->file->body != '') { 84 $this->dom = new \DOMDocument(); 85 86 set_error_handler(['SimplePie\Misc', 'silence_errors']); 87 try { 88 $this->dom->loadHTML($this->file->body); 89 } catch (\Throwable $ex) { 90 $this->dom = null; 91 } 92 restore_error_handler(); 93 } else { 94 $this->dom = null; 95 } 96 } 97 98 public function set_registry(\SimplePie\Registry $registry)/* : void */ 99 { 100 $this->registry = $registry; 101 } 102 103 public function find($type = \SimplePie\SimplePie::LOCATOR_ALL, &$working = null) 104 { 105 if ($this->is_feed($this->file)) { 106 return $this->file; 107 } 108 109 if ($this->file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) { 110 $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$this->file]); 111 if ($sniffer->get_type() !== 'text/html') { 112 return null; 113 } 114 } 115 116 if ($type & ~\SimplePie\SimplePie::LOCATOR_NONE) { 117 $this->get_base(); 118 } 119 120 if ($type & \SimplePie\SimplePie::LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) { 121 return $working[0]; 122 } 123 124 if ($type & (\SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION | \SimplePie\SimplePie::LOCATOR_LOCAL_BODY | \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION | \SimplePie\SimplePie::LOCATOR_REMOTE_BODY) && $this->get_links()) { 125 if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) { 126 return $working[0]; 127 } 128 129 if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) { 130 return $working[0]; 131 } 132 133 if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) { 134 return $working[0]; 135 } 136 137 if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) { 138 return $working[0]; 139 } 140 } 141 return null; 142 } 143 144 public function is_feed($file, $check_html = false) 145 { 146 if ($file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) { 147 $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]); 148 $sniffed = $sniffer->get_type(); 149 $mime_types = ['application/rss+xml', 'application/rdf+xml', 150 'text/rdf', 'application/atom+xml', 'text/xml', 151 'application/xml', 'application/x-rss+xml']; 152 if ($check_html) { 153 $mime_types[] = 'text/html'; 154 } 155 156 return in_array($sniffed, $mime_types); 157 } elseif ($file->method & \SimplePie\SimplePie::FILE_SOURCE_LOCAL) { 158 return true; 159 } else { 160 return false; 161 } 162 } 163 164 public function get_base() 165 { 166 if ($this->dom === null) { 167 throw new \SimplePie\Exception('DOMDocument not found, unable to use locator'); 168 } 169 $this->http_base = $this->file->url; 170 $this->base = $this->http_base; 171 $elements = $this->dom->getElementsByTagName('base'); 172 foreach ($elements as $element) { 173 if ($element->hasAttribute('href')) { 174 $base = $this->registry->call(Misc::class, 'absolutize_url', [trim($element->getAttribute('href')), $this->http_base]); 175 if ($base === false) { 176 continue; 177 } 178 $this->base = $base; 179 $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0; 180 break; 181 } 182 } 183 } 184 185 public function autodiscovery() 186 { 187 $done = []; 188 $feeds = []; 189 $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds)); 190 $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds)); 191 $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds)); 192 193 if (!empty($feeds)) { 194 return array_values($feeds); 195 } 196 197 return null; 198 } 199 200 protected function search_elements_by_tag($name, &$done, $feeds) 201 { 202 if ($this->dom === null) { 203 throw new \SimplePie\Exception('DOMDocument not found, unable to use locator'); 204 } 205 206 $links = $this->dom->getElementsByTagName($name); 207 foreach ($links as $link) { 208 if ($this->checked_feeds === $this->max_checked_feeds) { 209 break; 210 } 211 if ($link->hasAttribute('href') && $link->hasAttribute('rel')) { 212 $rel = array_unique($this->registry->call(Misc::class, 'space_separated_tokens', [strtolower($link->getAttribute('rel'))])); 213 $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1; 214 215 if ($this->base_location < $line) { 216 $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]); 217 } else { 218 $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]); 219 } 220 if ($href === false) { 221 continue; 222 } 223 224 if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call(Misc::class, 'parse_mime', [$link->getAttribute('type')])), ['text/html', 'application/rss+xml', 'application/atom+xml'])) && !isset($feeds[$href])) { 225 $this->checked_feeds++; 226 $headers = [ 227 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', 228 ]; 229 $feed = $this->registry->create(File::class, [$href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]); 230 if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true)) { 231 $feeds[$href] = $feed; 232 } 233 } 234 $done[] = $href; 235 } 236 } 237 238 return $feeds; 239 } 240 241 public function get_links() 242 { 243 if ($this->dom === null) { 244 throw new \SimplePie\Exception('DOMDocument not found, unable to use locator'); 245 } 246 247 $links = $this->dom->getElementsByTagName('a'); 248 foreach ($links as $link) { 249 if ($link->hasAttribute('href')) { 250 $href = trim($link->getAttribute('href')); 251 $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]); 252 if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) { 253 if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) { 254 $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]); 255 } else { 256 $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]); 257 } 258 if ($href === false) { 259 continue; 260 } 261 262 $current = $this->registry->call(Misc::class, 'parse_url', [$this->file->url]); 263 264 if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) { 265 $this->local[] = $href; 266 } else { 267 $this->elsewhere[] = $href; 268 } 269 } 270 } 271 } 272 $this->local = array_unique($this->local); 273 $this->elsewhere = array_unique($this->elsewhere); 274 if (!empty($this->local) || !empty($this->elsewhere)) { 275 return true; 276 } 277 return null; 278 } 279 280 public function get_rel_link($rel) 281 { 282 if ($this->dom === null) { 283 throw new \SimplePie\Exception('DOMDocument not found, unable to use '. 284 'locator'); 285 } 286 if (!class_exists('DOMXpath')) { 287 throw new \SimplePie\Exception('DOMXpath not found, unable to use '. 288 'get_rel_link'); 289 } 290 291 $xpath = new \DOMXpath($this->dom); 292 $query = '//a[@rel and @href] | //link[@rel and @href]'; 293 foreach ($xpath->query($query) as $link) { 294 $href = trim($link->getAttribute('href')); 295 $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]); 296 if ($parsed['scheme'] === '' || 297 preg_match('/^https?$/i', $parsed['scheme'])) { 298 if (method_exists($link, 'getLineNo') && 299 $this->base_location < $link->getLineNo()) { 300 $href = $this->registry->call( 301 Misc::class, 302 'absolutize_url', 303 [trim($link->getAttribute('href')), $this->base] 304 ); 305 } else { 306 $href = $this->registry->call( 307 Misc::class, 308 'absolutize_url', 309 [trim($link->getAttribute('href')), $this->http_base] 310 ); 311 } 312 if ($href === false) { 313 return null; 314 } 315 $rel_values = explode(' ', strtolower($link->getAttribute('rel'))); 316 if (in_array($rel, $rel_values)) { 317 return $href; 318 } 319 } 320 } 321 return null; 322 } 323 324 public function extension(&$array) 325 { 326 foreach ($array as $key => $value) { 327 if ($this->checked_feeds === $this->max_checked_feeds) { 328 break; 329 } 330 if (in_array(strtolower(strrchr($value, '.')), ['.rss', '.rdf', '.atom', '.xml'])) { 331 $this->checked_feeds++; 332 333 $headers = [ 334 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', 335 ]; 336 $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]); 337 if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) { 338 return [$feed]; 339 } else { 340 unset($array[$key]); 341 } 342 } 343 } 344 return null; 345 } 346 347 public function body(&$array) 348 { 349 foreach ($array as $key => $value) { 350 if ($this->checked_feeds === $this->max_checked_feeds) { 351 break; 352 } 353 if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value)) { 354 $this->checked_feeds++; 355 $headers = [ 356 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', 357 ]; 358 $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options]); 359 if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) { 360 return [$feed]; 361 } else { 362 unset($array[$key]); 363 } 364 } 365 } 366 return null; 367 } 368} 369 370class_alias('SimplePie\Locator', 'SimplePie_Locator', false); 371