1<?php
2
3declare(strict_types=1);
4/**
5 * SimplePie
6 *
7 * A PHP-Based RSS and Atom Feed Framework.
8 * Takes the hard work out of managing a complete RSS/Atom solution.
9 *
10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without modification, are
14 * permitted provided that the following conditions are met:
15 *
16 * 	* Redistributions of source code must retain the above copyright notice, this list of
17 * 	  conditions and the following disclaimer.
18 *
19 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
20 * 	  of conditions and the following disclaimer in the documentation and/or other materials
21 * 	  provided with the distribution.
22 *
23 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
24 * 	  to endorse or promote products derived from this software without specific prior
25 * 	  written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 * @package SimplePie
38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
39 * @author Ryan Parman
40 * @author Sam Sneddon
41 * @author Ryan McCue
42 * @link http://simplepie.org/ SimplePie
43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
44 */
45
46namespace SimplePie;
47
48/**
49 * Used for feed auto-discovery
50 *
51 *
52 * This class can be overloaded with {@see \SimplePie\SimplePie::set_locator_class()}
53 *
54 * @package SimplePie
55 */
56class Locator implements RegistryAware
57{
58    public $useragent;
59    public $timeout;
60    public $file;
61    public $local = [];
62    public $elsewhere = [];
63    public $cached_entities = [];
64    public $http_base;
65    public $base;
66    public $base_location = 0;
67    public $checked_feeds = 0;
68    public $max_checked_feeds = 10;
69    public $force_fsockopen = false;
70    public $curl_options = [];
71    public $dom;
72    protected $registry;
73
74    public function __construct(\SimplePie\File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = [])
75    {
76        $this->file = $file;
77        $this->useragent = $useragent;
78        $this->timeout = $timeout;
79        $this->max_checked_feeds = $max_checked_feeds;
80        $this->force_fsockopen = $force_fsockopen;
81        $this->curl_options = $curl_options;
82
83        if (class_exists('DOMDocument') && $this->file->body != '') {
84            $this->dom = new \DOMDocument();
85
86            set_error_handler(['SimplePie\Misc', 'silence_errors']);
87            try {
88                $this->dom->loadHTML($this->file->body);
89            } catch (\Throwable $ex) {
90                $this->dom = null;
91            }
92            restore_error_handler();
93        } else {
94            $this->dom = null;
95        }
96    }
97
98    public function set_registry(\SimplePie\Registry $registry)/* : void */
99    {
100        $this->registry = $registry;
101    }
102
103    public function find($type = \SimplePie\SimplePie::LOCATOR_ALL, &$working = null)
104    {
105        if ($this->is_feed($this->file)) {
106            return $this->file;
107        }
108
109        if ($this->file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) {
110            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$this->file]);
111            if ($sniffer->get_type() !== 'text/html') {
112                return null;
113            }
114        }
115
116        if ($type & ~\SimplePie\SimplePie::LOCATOR_NONE) {
117            $this->get_base();
118        }
119
120        if ($type & \SimplePie\SimplePie::LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) {
121            return $working[0];
122        }
123
124        if ($type & (\SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION | \SimplePie\SimplePie::LOCATOR_LOCAL_BODY | \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION | \SimplePie\SimplePie::LOCATOR_REMOTE_BODY) && $this->get_links()) {
125            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) {
126                return $working[0];
127            }
128
129            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) {
130                return $working[0];
131            }
132
133            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) {
134                return $working[0];
135            }
136
137            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) {
138                return $working[0];
139            }
140        }
141        return null;
142    }
143
144    public function is_feed($file, $check_html = false)
145    {
146        if ($file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) {
147            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]);
148            $sniffed = $sniffer->get_type();
149            $mime_types = ['application/rss+xml', 'application/rdf+xml',
150                                'text/rdf', 'application/atom+xml', 'text/xml',
151                                'application/xml', 'application/x-rss+xml'];
152            if ($check_html) {
153                $mime_types[] = 'text/html';
154            }
155
156            return in_array($sniffed, $mime_types);
157        } elseif ($file->method & \SimplePie\SimplePie::FILE_SOURCE_LOCAL) {
158            return true;
159        } else {
160            return false;
161        }
162    }
163
164    public function get_base()
165    {
166        if ($this->dom === null) {
167            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
168        }
169        $this->http_base = $this->file->url;
170        $this->base = $this->http_base;
171        $elements = $this->dom->getElementsByTagName('base');
172        foreach ($elements as $element) {
173            if ($element->hasAttribute('href')) {
174                $base = $this->registry->call(Misc::class, 'absolutize_url', [trim($element->getAttribute('href')), $this->http_base]);
175                if ($base === false) {
176                    continue;
177                }
178                $this->base = $base;
179                $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
180                break;
181            }
182        }
183    }
184
185    public function autodiscovery()
186    {
187        $done = [];
188        $feeds = [];
189        $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
190        $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
191        $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
192
193        if (!empty($feeds)) {
194            return array_values($feeds);
195        }
196
197        return null;
198    }
199
200    protected function search_elements_by_tag($name, &$done, $feeds)
201    {
202        if ($this->dom === null) {
203            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
204        }
205
206        $links = $this->dom->getElementsByTagName($name);
207        foreach ($links as $link) {
208            if ($this->checked_feeds === $this->max_checked_feeds) {
209                break;
210            }
211            if ($link->hasAttribute('href') && $link->hasAttribute('rel')) {
212                $rel = array_unique($this->registry->call(Misc::class, 'space_separated_tokens', [strtolower($link->getAttribute('rel'))]));
213                $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
214
215                if ($this->base_location < $line) {
216                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
217                } else {
218                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
219                }
220                if ($href === false) {
221                    continue;
222                }
223
224                if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call(Misc::class, 'parse_mime', [$link->getAttribute('type')])), ['text/html', 'application/rss+xml', 'application/atom+xml'])) && !isset($feeds[$href])) {
225                    $this->checked_feeds++;
226                    $headers = [
227                        'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
228                    ];
229                    $feed = $this->registry->create(File::class, [$href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]);
230                    if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true)) {
231                        $feeds[$href] = $feed;
232                    }
233                }
234                $done[] = $href;
235            }
236        }
237
238        return $feeds;
239    }
240
241    public function get_links()
242    {
243        if ($this->dom === null) {
244            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
245        }
246
247        $links = $this->dom->getElementsByTagName('a');
248        foreach ($links as $link) {
249            if ($link->hasAttribute('href')) {
250                $href = trim($link->getAttribute('href'));
251                $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
252                if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) {
253                    if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) {
254                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
255                    } else {
256                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
257                    }
258                    if ($href === false) {
259                        continue;
260                    }
261
262                    $current = $this->registry->call(Misc::class, 'parse_url', [$this->file->url]);
263
264                    if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) {
265                        $this->local[] = $href;
266                    } else {
267                        $this->elsewhere[] = $href;
268                    }
269                }
270            }
271        }
272        $this->local = array_unique($this->local);
273        $this->elsewhere = array_unique($this->elsewhere);
274        if (!empty($this->local) || !empty($this->elsewhere)) {
275            return true;
276        }
277        return null;
278    }
279
280    public function get_rel_link($rel)
281    {
282        if ($this->dom === null) {
283            throw new \SimplePie\Exception('DOMDocument not found, unable to use '.
284                                          'locator');
285        }
286        if (!class_exists('DOMXpath')) {
287            throw new \SimplePie\Exception('DOMXpath not found, unable to use '.
288                                          'get_rel_link');
289        }
290
291        $xpath = new \DOMXpath($this->dom);
292        $query = '//a[@rel and @href] | //link[@rel and @href]';
293        foreach ($xpath->query($query) as $link) {
294            $href = trim($link->getAttribute('href'));
295            $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
296            if ($parsed['scheme'] === '' ||
297                preg_match('/^https?$/i', $parsed['scheme'])) {
298                if (method_exists($link, 'getLineNo') &&
299                    $this->base_location < $link->getLineNo()) {
300                    $href = $this->registry->call(
301                        Misc::class,
302                        'absolutize_url',
303                        [trim($link->getAttribute('href')), $this->base]
304                    );
305                } else {
306                    $href = $this->registry->call(
307                        Misc::class,
308                        'absolutize_url',
309                        [trim($link->getAttribute('href')), $this->http_base]
310                    );
311                }
312                if ($href === false) {
313                    return null;
314                }
315                $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
316                if (in_array($rel, $rel_values)) {
317                    return $href;
318                }
319            }
320        }
321        return null;
322    }
323
324    public function extension(&$array)
325    {
326        foreach ($array as $key => $value) {
327            if ($this->checked_feeds === $this->max_checked_feeds) {
328                break;
329            }
330            if (in_array(strtolower(strrchr($value, '.')), ['.rss', '.rdf', '.atom', '.xml'])) {
331                $this->checked_feeds++;
332
333                $headers = [
334                    'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
335                ];
336                $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]);
337                if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) {
338                    return [$feed];
339                } else {
340                    unset($array[$key]);
341                }
342            }
343        }
344        return null;
345    }
346
347    public function body(&$array)
348    {
349        foreach ($array as $key => $value) {
350            if ($this->checked_feeds === $this->max_checked_feeds) {
351                break;
352            }
353            if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value)) {
354                $this->checked_feeds++;
355                $headers = [
356                    'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
357                ];
358                $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options]);
359                if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) {
360                    return [$feed];
361                } else {
362                    unset($array[$key]);
363                }
364            }
365        }
366        return null;
367    }
368}
369
370class_alias('SimplePie\Locator', 'SimplePie_Locator', false);
371