1<?php
2
3/**
4 * SimplePie
5 *
6 * A PHP-Based RSS and Atom Feed Framework.
7 * Takes the hard work out of managing a complete RSS/Atom solution.
8 *
9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without modification, are
13 * permitted provided that the following conditions are met:
14 *
15 * 	* Redistributions of source code must retain the above copyright notice, this list of
16 * 	  conditions and the following disclaimer.
17 *
18 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
19 * 	  of conditions and the following disclaimer in the documentation and/or other materials
20 * 	  provided with the distribution.
21 *
22 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
23 * 	  to endorse or promote products derived from this software without specific prior
24 * 	  written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
27 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
28 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
29 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 *
36 * @package SimplePie
37 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
38 * @author Ryan Parman
39 * @author Sam Sneddon
40 * @author Ryan McCue
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 */
44
45namespace SimplePie;
46
47/**
48 * Used for feed auto-discovery
49 *
50 *
51 * This class can be overloaded with {@see \SimplePie\SimplePie::set_locator_class()}
52 *
53 * @package SimplePie
54 */
55class Locator implements RegistryAware
56{
57    public $useragent;
58    public $timeout;
59    public $file;
60    public $local = [];
61    public $elsewhere = [];
62    public $cached_entities = [];
63    public $http_base;
64    public $base;
65    public $base_location = 0;
66    public $checked_feeds = 0;
67    public $max_checked_feeds = 10;
68    public $force_fsockopen = false;
69    public $curl_options = [];
70    public $dom;
71    protected $registry;
72
73    public function __construct(\SimplePie\File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10, $force_fsockopen = false, $curl_options = [])
74    {
75        $this->file = $file;
76        $this->useragent = $useragent;
77        $this->timeout = $timeout;
78        $this->max_checked_feeds = $max_checked_feeds;
79        $this->force_fsockopen = $force_fsockopen;
80        $this->curl_options = $curl_options;
81
82        if (class_exists('DOMDocument') && $this->file->body != '') {
83            $this->dom = new \DOMDocument();
84
85            set_error_handler(['SimplePie\Misc', 'silence_errors']);
86            try {
87                $this->dom->loadHTML($this->file->body);
88            } catch (\Throwable $ex) {
89                $this->dom = null;
90            }
91            restore_error_handler();
92        } else {
93            $this->dom = null;
94        }
95    }
96
97    public function set_registry(\SimplePie\Registry $registry)/* : void */
98    {
99        $this->registry = $registry;
100    }
101
102    public function find($type = \SimplePie\SimplePie::LOCATOR_ALL, &$working = null)
103    {
104        if ($this->is_feed($this->file)) {
105            return $this->file;
106        }
107
108        if ($this->file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) {
109            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$this->file]);
110            if ($sniffer->get_type() !== 'text/html') {
111                return null;
112            }
113        }
114
115        if ($type & ~\SimplePie\SimplePie::LOCATOR_NONE) {
116            $this->get_base();
117        }
118
119        if ($type & \SimplePie\SimplePie::LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) {
120            return $working[0];
121        }
122
123        if ($type & (\SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION | \SimplePie\SimplePie::LOCATOR_LOCAL_BODY | \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION | \SimplePie\SimplePie::LOCATOR_REMOTE_BODY) && $this->get_links()) {
124            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) {
125                return $working[0];
126            }
127
128            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) {
129                return $working[0];
130            }
131
132            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) {
133                return $working[0];
134            }
135
136            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) {
137                return $working[0];
138            }
139        }
140        return null;
141    }
142
143    public function is_feed($file, $check_html = false)
144    {
145        if ($file->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE) {
146            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]);
147            $sniffed = $sniffer->get_type();
148            $mime_types = ['application/rss+xml', 'application/rdf+xml',
149                                'text/rdf', 'application/atom+xml', 'text/xml',
150                                'application/xml', 'application/x-rss+xml'];
151            if ($check_html) {
152                $mime_types[] = 'text/html';
153            }
154
155            return in_array($sniffed, $mime_types);
156        } elseif ($file->method & \SimplePie\SimplePie::FILE_SOURCE_LOCAL) {
157            return true;
158        } else {
159            return false;
160        }
161    }
162
163    public function get_base()
164    {
165        if ($this->dom === null) {
166            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
167        }
168        $this->http_base = $this->file->url;
169        $this->base = $this->http_base;
170        $elements = $this->dom->getElementsByTagName('base');
171        foreach ($elements as $element) {
172            if ($element->hasAttribute('href')) {
173                $base = $this->registry->call(Misc::class, 'absolutize_url', [trim($element->getAttribute('href')), $this->http_base]);
174                if ($base === false) {
175                    continue;
176                }
177                $this->base = $base;
178                $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
179                break;
180            }
181        }
182    }
183
184    public function autodiscovery()
185    {
186        $done = [];
187        $feeds = [];
188        $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
189        $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
190        $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
191
192        if (!empty($feeds)) {
193            return array_values($feeds);
194        }
195
196        return null;
197    }
198
199    protected function search_elements_by_tag($name, &$done, $feeds)
200    {
201        if ($this->dom === null) {
202            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
203        }
204
205        $links = $this->dom->getElementsByTagName($name);
206        foreach ($links as $link) {
207            if ($this->checked_feeds === $this->max_checked_feeds) {
208                break;
209            }
210            if ($link->hasAttribute('href') && $link->hasAttribute('rel')) {
211                $rel = array_unique($this->registry->call(Misc::class, 'space_separated_tokens', [strtolower($link->getAttribute('rel'))]));
212                $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
213
214                if ($this->base_location < $line) {
215                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
216                } else {
217                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
218                }
219                if ($href === false) {
220                    continue;
221                }
222
223                if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call(Misc::class, 'parse_mime', [$link->getAttribute('type')])), ['text/html', 'application/rss+xml', 'application/atom+xml'])) && !isset($feeds[$href])) {
224                    $this->checked_feeds++;
225                    $headers = [
226                        'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
227                    ];
228                    $feed = $this->registry->create(File::class, [$href, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]);
229                    if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true)) {
230                        $feeds[$href] = $feed;
231                    }
232                }
233                $done[] = $href;
234            }
235        }
236
237        return $feeds;
238    }
239
240    public function get_links()
241    {
242        if ($this->dom === null) {
243            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
244        }
245
246        $links = $this->dom->getElementsByTagName('a');
247        foreach ($links as $link) {
248            if ($link->hasAttribute('href')) {
249                $href = trim($link->getAttribute('href'));
250                $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
251                if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) {
252                    if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) {
253                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
254                    } else {
255                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
256                    }
257                    if ($href === false) {
258                        continue;
259                    }
260
261                    $current = $this->registry->call(Misc::class, 'parse_url', [$this->file->url]);
262
263                    if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) {
264                        $this->local[] = $href;
265                    } else {
266                        $this->elsewhere[] = $href;
267                    }
268                }
269            }
270        }
271        $this->local = array_unique($this->local);
272        $this->elsewhere = array_unique($this->elsewhere);
273        if (!empty($this->local) || !empty($this->elsewhere)) {
274            return true;
275        }
276        return null;
277    }
278
279    public function get_rel_link($rel)
280    {
281        if ($this->dom === null) {
282            throw new \SimplePie\Exception('DOMDocument not found, unable to use '.
283                                          'locator');
284        }
285        if (!class_exists('DOMXpath')) {
286            throw new \SimplePie\Exception('DOMXpath not found, unable to use '.
287                                          'get_rel_link');
288        }
289
290        $xpath = new \DOMXpath($this->dom);
291        $query = '//a[@rel and @href] | //link[@rel and @href]';
292        foreach ($xpath->query($query) as $link) {
293            $href = trim($link->getAttribute('href'));
294            $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
295            if ($parsed['scheme'] === '' ||
296                preg_match('/^https?$/i', $parsed['scheme'])) {
297                if (method_exists($link, 'getLineNo') &&
298                    $this->base_location < $link->getLineNo()) {
299                    $href = $this->registry->call(
300                        Misc::class,
301                        'absolutize_url',
302                        [trim($link->getAttribute('href')), $this->base]
303                    );
304                } else {
305                    $href = $this->registry->call(
306                        Misc::class,
307                        'absolutize_url',
308                        [trim($link->getAttribute('href')), $this->http_base]
309                    );
310                }
311                if ($href === false) {
312                    return null;
313                }
314                $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
315                if (in_array($rel, $rel_values)) {
316                    return $href;
317                }
318            }
319        }
320        return null;
321    }
322
323    public function extension(&$array)
324    {
325        foreach ($array as $key => $value) {
326            if ($this->checked_feeds === $this->max_checked_feeds) {
327                break;
328            }
329            if (in_array(strtolower(strrchr($value, '.')), ['.rss', '.rdf', '.atom', '.xml'])) {
330                $this->checked_feeds++;
331
332                $headers = [
333                    'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
334                ];
335                $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options]);
336                if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) {
337                    return [$feed];
338                } else {
339                    unset($array[$key]);
340                }
341            }
342        }
343        return null;
344    }
345
346    public function body(&$array)
347    {
348        foreach ($array as $key => $value) {
349            if ($this->checked_feeds === $this->max_checked_feeds) {
350                break;
351            }
352            if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value)) {
353                $this->checked_feeds++;
354                $headers = [
355                    'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
356                ];
357                $feed = $this->registry->create(File::class, [$value, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen, $this->curl_options]);
358                if ($feed->success && ($feed->method & \SimplePie\SimplePie::FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) {
359                    return [$feed];
360                } else {
361                    unset($array[$key]);
362                }
363            }
364        }
365        return null;
366    }
367}
368
369class_alias('SimplePie\Locator', 'SimplePie_Locator', false);
370