xref: /dokuwiki/vendor/simplepie/simplepie/src/Locator.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8namespace SimplePie;
9
10use DomDocument;
11use Psr\Http\Client\ClientInterface;
12use Psr\Http\Message\RequestFactoryInterface;
13use Psr\Http\Message\UriFactoryInterface;
14use SimplePie\HTTP\Client;
15use SimplePie\HTTP\ClientException;
16use SimplePie\HTTP\FileClient;
17use SimplePie\HTTP\Psr18Client;
18use SimplePie\HTTP\Response;
19
20/**
21 * Used for feed auto-discovery
22 *
23 *
24 * This class can be overloaded with {@see \SimplePie\SimplePie::set_locator_class()}
25 */
26class Locator implements RegistryAware
27{
28    /** @var ?string */
29    public $useragent = null;
30    /** @var int */
31    public $timeout = 10;
32    /** @var File */
33    public $file;
34    /** @var string[] */
35    public $local = [];
36    /** @var string[] */
37    public $elsewhere = [];
38    /** @var array<mixed> */
39    public $cached_entities = [];
40    /** @var string */
41    public $http_base;
42    /** @var string */
43    public $base;
44    /** @var int */
45    public $base_location = 0;
46    /** @var int */
47    public $checked_feeds = 0;
48    /** @var int */
49    public $max_checked_feeds = 10;
50    /** @var bool */
51    public $force_fsockopen = false;
52    /** @var array<int, mixed> */
53    public $curl_options = [];
54    /** @var ?\DomDocument */
55    public $dom;
56    /** @var ?Registry */
57    protected $registry;
58
59    /**
60     * @var Client|null
61     */
62    private $http_client = null;
63
64    /**
65     * @param array<int, mixed> $curl_options
66     */
67    public function __construct(File $file, int $timeout = 10, ?string $useragent = null, int $max_checked_feeds = 10, bool $force_fsockopen = false, array $curl_options = [])
68    {
69        $this->file = $file;
70        $this->useragent = $useragent;
71        $this->timeout = $timeout;
72        $this->max_checked_feeds = $max_checked_feeds;
73        $this->force_fsockopen = $force_fsockopen;
74        $this->curl_options = $curl_options;
75
76        $body = $this->file->get_body_content();
77
78        if (class_exists('DOMDocument') && $body != '') {
79            $this->dom = new \DOMDocument();
80
81            set_error_handler([Misc::class, 'silence_errors']);
82            try {
83                $this->dom->loadHTML($body);
84            } catch (\Throwable $ex) {
85                $this->dom = null;
86            }
87            restore_error_handler();
88        } else {
89            $this->dom = null;
90        }
91    }
92
93    /**
94     * Set a PSR-18 client and PSR-17 factories
95     *
96     * Allows you to use your own HTTP client implementations.
97     */
98    final public function set_http_client(
99        ClientInterface $http_client,
100        RequestFactoryInterface $request_factory,
101        UriFactoryInterface $uri_factory
102    ): void {
103        $this->http_client = new Psr18Client($http_client, $request_factory, $uri_factory);
104    }
105
106    /**
107     * @return void
108     */
109    public function set_registry(\SimplePie\Registry $registry)
110    {
111        $this->registry = $registry;
112    }
113
114    /**
115     * @param SimplePie::LOCATOR_* $type
116     * @param array<Response>|null $working
117     * @return Response|null
118     */
119    public function find(int $type = \SimplePie\SimplePie::LOCATOR_ALL, ?array &$working = null)
120    {
121        assert($this->registry !== null);
122
123        if ($this->is_feed($this->file)) {
124            return $this->file;
125        }
126
127        if (Misc::is_remote_uri($this->file->get_final_requested_uri())) {
128            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$this->file]);
129            if ($sniffer->get_type() !== 'text/html') {
130                return null;
131            }
132        }
133
134        if ($type & ~\SimplePie\SimplePie::LOCATOR_NONE) {
135            $this->get_base();
136        }
137
138        if ($type & \SimplePie\SimplePie::LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) {
139            return $working[0];
140        }
141
142        if ($type & (\SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION | \SimplePie\SimplePie::LOCATOR_LOCAL_BODY | \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION | \SimplePie\SimplePie::LOCATOR_REMOTE_BODY) && $this->get_links()) {
143            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) {
144                return $working[0];
145            }
146
147            if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) {
148                return $working[0];
149            }
150
151            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) {
152                return $working[0];
153            }
154
155            if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) {
156                return $working[0];
157            }
158        }
159        return null;
160    }
161
162    /**
163     * @return bool
164     */
165    public function is_feed(Response $file, bool $check_html = false)
166    {
167        assert($this->registry !== null);
168
169        if (Misc::is_remote_uri($file->get_final_requested_uri())) {
170            $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]);
171            $sniffed = $sniffer->get_type();
172            $mime_types = ['application/rss+xml', 'application/rdf+xml',
173                                'text/rdf', 'application/atom+xml', 'text/xml',
174                                'application/xml', 'application/x-rss+xml'];
175            if ($check_html) {
176                $mime_types[] = 'text/html';
177            }
178
179            return in_array($sniffed, $mime_types);
180        } elseif (is_file($file->get_final_requested_uri())) {
181            return true;
182        } else {
183            return false;
184        }
185    }
186
187    /**
188     * @return void
189     */
190    public function get_base()
191    {
192        assert($this->registry !== null);
193
194        if ($this->dom === null) {
195            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
196        }
197        $this->http_base = $this->file->get_final_requested_uri();
198        $this->base = $this->http_base;
199        $elements = $this->dom->getElementsByTagName('base');
200        foreach ($elements as $element) {
201            if ($element->hasAttribute('href')) {
202                $base = $this->registry->call(Misc::class, 'absolutize_url', [trim($element->getAttribute('href')), $this->http_base]);
203                if ($base === false) {
204                    continue;
205                }
206                $this->base = $base;
207                $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
208                break;
209            }
210        }
211    }
212
213    /**
214     * @return array<Response>|null
215     */
216    public function autodiscovery()
217    {
218        $done = [];
219        $feeds = [];
220        $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
221        $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
222        $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
223
224        if (!empty($feeds)) {
225            return array_values($feeds);
226        }
227
228        return null;
229    }
230
231    /**
232     * @param string[] $done
233     * @param array<string, Response> $feeds
234     * @return array<string, Response>
235     */
236    protected function search_elements_by_tag(string $name, array &$done, array $feeds)
237    {
238        assert($this->registry !== null);
239
240        if ($this->dom === null) {
241            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
242        }
243
244        $links = $this->dom->getElementsByTagName($name);
245        foreach ($links as $link) {
246            if ($this->checked_feeds === $this->max_checked_feeds) {
247                break;
248            }
249            if ($link->hasAttribute('href') && $link->hasAttribute('rel')) {
250                $rel = array_unique($this->registry->call(Misc::class, 'space_separated_tokens', [strtolower($link->getAttribute('rel'))]));
251                $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
252
253                if ($this->base_location < $line) {
254                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
255                } else {
256                    $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
257                }
258                if ($href === false) {
259                    continue;
260                }
261
262                if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call(Misc::class, 'parse_mime', [$link->getAttribute('type')])), ['text/html', 'application/rss+xml', 'application/atom+xml'])) && !isset($feeds[$href])) {
263                    $this->checked_feeds++;
264                    $headers = [
265                        'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
266                    ];
267
268                    try {
269                        $feed = $this->get_http_client()->request(Client::METHOD_GET, $href, $headers);
270
271                        if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed, true)) {
272                            $feeds[$href] = $feed;
273                        }
274                    } catch (ClientException $th) {
275                        // Just mark it as done and continue.
276                    }
277                }
278                $done[] = $href;
279            }
280        }
281
282        return $feeds;
283    }
284
285    /**
286     * @return true|null
287     */
288    public function get_links()
289    {
290        assert($this->registry !== null);
291
292        if ($this->dom === null) {
293            throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
294        }
295
296        $links = $this->dom->getElementsByTagName('a');
297        foreach ($links as $link) {
298            if ($link->hasAttribute('href')) {
299                $href = trim($link->getAttribute('href'));
300                $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
301                if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) {
302                    if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) {
303                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
304                    } else {
305                        $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
306                    }
307                    if ($href === false) {
308                        continue;
309                    }
310
311                    $current = $this->registry->call(Misc::class, 'parse_url', [$this->file->get_final_requested_uri()]);
312
313                    if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) {
314                        $this->local[] = $href;
315                    } else {
316                        $this->elsewhere[] = $href;
317                    }
318                }
319            }
320        }
321        $this->local = array_unique($this->local);
322        $this->elsewhere = array_unique($this->elsewhere);
323        if (!empty($this->local) || !empty($this->elsewhere)) {
324            return true;
325        }
326        return null;
327    }
328
329    /**
330     * Extracts first `link` element with given `rel` attribute inside the `head` element.
331     *
332     * @return string|null
333     */
334    public function get_rel_link(string $rel)
335    {
336        assert($this->registry !== null);
337
338        if ($this->dom === null) {
339            throw new \SimplePie\Exception('DOMDocument not found, unable to use '.
340                                          'locator');
341        }
342        if (!class_exists('DOMXpath')) {
343            throw new \SimplePie\Exception('DOMXpath not found, unable to use '.
344                                          'get_rel_link');
345        }
346
347        $xpath = new \DOMXpath($this->dom);
348        $query = '(//head)[1]/link[@rel and @href]';
349        /** @var \DOMNodeList<\DOMElement> */
350        $queryResult = $xpath->query($query);
351        foreach ($queryResult as $link) {
352            $href = trim($link->getAttribute('href'));
353            $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
354            if ($parsed['scheme'] === '' ||
355                preg_match('/^https?$/i', $parsed['scheme'])) {
356                if (method_exists($link, 'getLineNo') &&
357                    $this->base_location < $link->getLineNo()) {
358                    $href = $this->registry->call(
359                        Misc::class,
360                        'absolutize_url',
361                        [trim($link->getAttribute('href')), $this->base]
362                    );
363                } else {
364                    $href = $this->registry->call(
365                        Misc::class,
366                        'absolutize_url',
367                        [trim($link->getAttribute('href')), $this->http_base]
368                    );
369                }
370                if ($href === false) {
371                    return null;
372                }
373                $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
374                if (in_array($rel, $rel_values)) {
375                    return $href;
376                }
377            }
378        }
379
380        return null;
381    }
382
383    /**
384     * @param string[] $array
385     * @return array<Response>|null
386     */
387    public function extension(array &$array)
388    {
389        foreach ($array as $key => $value) {
390            if ($this->checked_feeds === $this->max_checked_feeds) {
391                break;
392            }
393            $extension = strrchr($value, '.');
394            if ($extension !== false && in_array(strtolower($extension), ['.rss', '.rdf', '.atom', '.xml'])) {
395                $this->checked_feeds++;
396
397                $headers = [
398                    'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
399                ];
400
401                try {
402                    $feed = $this->get_http_client()->request(Client::METHOD_GET, $value, $headers);
403
404                    if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed)) {
405                        return [$feed];
406                    }
407                } catch (ClientException $th) {
408                    // Just unset and continue.
409                }
410
411                unset($array[$key]);
412            }
413        }
414        return null;
415    }
416
417    /**
418     * @param string[] $array
419     * @return array<Response>|null
420     */
421    public function body(array &$array)
422    {
423        foreach ($array as $key => $value) {
424            if ($this->checked_feeds === $this->max_checked_feeds) {
425                break;
426            }
427            if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value)) {
428                $this->checked_feeds++;
429                $headers = [
430                    'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
431                ];
432
433                try {
434                    $feed = $this->get_http_client()->request(Client::METHOD_GET, $value, $headers);
435
436                    if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed)) {
437                        return [$feed];
438                    }
439                } catch (ClientException $th) {
440                    // Just unset and continue.
441                }
442
443                unset($array[$key]);
444            }
445        }
446        return null;
447    }
448
449    /**
450     * Get a HTTP client
451     */
452    private function get_http_client(): Client
453    {
454        assert($this->registry !== null);
455
456        if ($this->http_client === null) {
457            $options = [
458                'timeout' => $this->timeout,
459                'redirects' => 5,
460                'force_fsockopen' => $this->force_fsockopen,
461                'curl_options' => $this->curl_options,
462            ];
463
464            if ($this->useragent !== null) {
465                $options['useragent'] = $this->useragent;
466            }
467
468            return new FileClient(
469                $this->registry,
470                $options
471            );
472        }
473
474        return $this->http_client;
475    }
476}
477
478class_alias('SimplePie\Locator', 'SimplePie_Locator', false);
479