xref: /dokuwiki/vendor/simplepie/simplepie/src/Sanitize.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8namespace SimplePie;
9
10use DOMDocument;
11use DOMXPath;
12use InvalidArgumentException;
13use Psr\Http\Client\ClientInterface;
14use Psr\Http\Message\RequestFactoryInterface;
15use Psr\Http\Message\UriFactoryInterface;
16use SimplePie\Cache\Base;
17use SimplePie\Cache\BaseDataCache;
18use SimplePie\Cache\CallableNameFilter;
19use SimplePie\Cache\DataCache;
20use SimplePie\Cache\NameFilter;
21use SimplePie\HTTP\Client;
22use SimplePie\HTTP\ClientException;
23use SimplePie\HTTP\FileClient;
24use SimplePie\HTTP\Psr18Client;
25
26/**
27 * Used for data cleanup and post-processing
28 *
29 *
30 * This class can be overloaded with {@see \SimplePie\SimplePie::set_sanitize_class()}
31 *
32 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
33 */
34class Sanitize implements RegistryAware
35{
36    // Private vars
37    /** @var string */
38    public $base = '';
39
40    // Options
41    /** @var bool */
42    public $remove_div = true;
43    /** @var string */
44    public $image_handler = '';
45    /** @var string[] */
46    public $strip_htmltags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'];
47    /** @var bool */
48    public $encode_instead_of_strip = false;
49    /** @var string[] */
50    public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'];
51    /** @var string[] */
52    public $rename_attributes = [];
53    /** @var array<string, array<string, string>> */
54    public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
55    /** @var bool */
56    public $strip_comments = false;
57    /** @var string */
58    public $output_encoding = 'UTF-8';
59    /** @var bool */
60    public $enable_cache = true;
61    /** @var string */
62    public $cache_location = './cache';
63    /** @var string&(callable(string): string) */
64    public $cache_name_function = 'md5';
65
66    /**
67     * @var NameFilter
68     */
69    private $cache_namefilter;
70    /** @var int */
71    public $timeout = 10;
72    /** @var string */
73    public $useragent = '';
74    /** @var bool */
75    public $force_fsockopen = false;
76    /** @var array<string, string|string[]> */
77    public $replace_url_attributes = [];
78    /**
79     * @var array<int, mixed> Custom curl options
80     * @see SimplePie::set_curl_options()
81     */
82    private $curl_options = [];
83
84    /** @var Registry */
85    public $registry;
86
87    /**
88     * @var DataCache|null
89     */
90    private $cache = null;
91
92    /**
93     * @var int Cache duration (in seconds)
94     */
95    private $cache_duration = 3600;
96
97    /**
98     * List of domains for which to force HTTPS.
99     * @see \SimplePie\Sanitize::set_https_domains()
100     * Array is a tree split at DNS levels. Example:
101     * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true)))
102     * @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>>
103     */
104    public $https_domains = [];
105
106    /**
107     * @var Client|null
108     */
109    private $http_client = null;
110
111    public function __construct()
112    {
113        // Set defaults
114        $this->set_url_replacements(null);
115    }
116
117    /**
118     * @return void
119     */
120    public function remove_div(bool $enable = true)
121    {
122        $this->remove_div = (bool) $enable;
123    }
124
125    /**
126     * @param string|false $page
127     * @return void
128     */
129    public function set_image_handler($page = false)
130    {
131        if ($page) {
132            $this->image_handler = (string) $page;
133        } else {
134            $this->image_handler = '';
135        }
136    }
137
138    /**
139     * @return void
140     */
141    public function set_registry(\SimplePie\Registry $registry)
142    {
143        $this->registry = $registry;
144    }
145
146    /**
147     * @param (string&(callable(string): string))|NameFilter $cache_name_function
148     * @param class-string<Cache> $cache_class
149     * @return void
150     */
151    public function pass_cache_data(bool $enable_cache = true, string $cache_location = './cache', $cache_name_function = 'md5', string $cache_class = Cache::class, ?DataCache $cache = null)
152    {
153        $this->enable_cache = $enable_cache;
154
155        if ($cache_location) {
156            $this->cache_location = $cache_location;
157        }
158
159        // @phpstan-ignore-next-line Enforce PHPDoc type.
160        if (!is_string($cache_name_function) && !$cache_name_function instanceof NameFilter) {
161            throw new InvalidArgumentException(sprintf(
162                '%s(): Argument #3 ($cache_name_function) must be of type %s',
163                __METHOD__,
164                NameFilter::class
165            ), 1);
166        }
167
168        // BC: $cache_name_function could be a callable as string
169        if (is_string($cache_name_function)) {
170            // trigger_error(sprintf('Providing $cache_name_function as string in "%s()" is deprecated since SimplePie 1.8.0, provide as "%s" instead.', __METHOD__, NameFilter::class), \E_USER_DEPRECATED);
171            $this->cache_name_function = $cache_name_function;
172
173            $cache_name_function = new CallableNameFilter($cache_name_function);
174        }
175
176        $this->cache_namefilter = $cache_name_function;
177
178        if ($cache !== null) {
179            $this->cache = $cache;
180        }
181    }
182
183    /**
184     * Set a PSR-18 client and PSR-17 factories
185     *
186     * Allows you to use your own HTTP client implementations.
187     */
188    final public function set_http_client(
189        ClientInterface $http_client,
190        RequestFactoryInterface $request_factory,
191        UriFactoryInterface $uri_factory
192    ): void {
193        $this->http_client = new Psr18Client($http_client, $request_factory, $uri_factory);
194    }
195
196    /**
197     * @deprecated since SimplePie 1.9.0, use \SimplePie\Sanitize::set_http_client() instead.
198     * @param class-string<File> $file_class
199     * @param array<int, mixed> $curl_options
200     * @return void
201     */
202    public function pass_file_data(string $file_class = File::class, int $timeout = 10, string $useragent = '', bool $force_fsockopen = false, array $curl_options = [])
203    {
204        // trigger_error(sprintf('SimplePie\Sanitize::pass_file_data() is deprecated since SimplePie 1.9.0, please use "SimplePie\Sanitize::set_http_client()" instead.'), \E_USER_DEPRECATED);
205        if ($timeout) {
206            $this->timeout = $timeout;
207        }
208
209        if ($useragent) {
210            $this->useragent = $useragent;
211        }
212
213        if ($force_fsockopen) {
214            $this->force_fsockopen = $force_fsockopen;
215        }
216
217        $this->curl_options = $curl_options;
218        // Invalidate the registered client.
219        $this->http_client = null;
220    }
221
222    /**
223     * @param string[]|string|false $tags Set a list of tags to strip, or set empty string to use default tags, or false to strip nothing.
224     * @return void
225     */
226    public function strip_htmltags($tags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'])
227    {
228        if ($tags) {
229            if (is_array($tags)) {
230                $this->strip_htmltags = $tags;
231            } else {
232                $this->strip_htmltags = explode(',', $tags);
233            }
234        } else {
235            $this->strip_htmltags = [];
236        }
237    }
238
239    /**
240     * @return void
241     */
242    public function encode_instead_of_strip(bool $encode = false)
243    {
244        $this->encode_instead_of_strip = $encode;
245    }
246
247    /**
248     * @param string[]|string $attribs
249     * @return void
250     */
251    public function rename_attributes($attribs = [])
252    {
253        if ($attribs) {
254            if (is_array($attribs)) {
255                $this->rename_attributes = $attribs;
256            } else {
257                $this->rename_attributes = explode(',', $attribs);
258            }
259        } else {
260            $this->rename_attributes = [];
261        }
262    }
263
264    /**
265     * @param string[]|string $attribs
266     * @return void
267     */
268    public function strip_attributes($attribs = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'])
269    {
270        if ($attribs) {
271            if (is_array($attribs)) {
272                $this->strip_attributes = $attribs;
273            } else {
274                $this->strip_attributes = explode(',', $attribs);
275            }
276        } else {
277            $this->strip_attributes = [];
278        }
279    }
280
281    /**
282     * @param array<string, array<string, string>> $attribs
283     * @return void
284     */
285    public function add_attributes(array $attribs = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']])
286    {
287        $this->add_attributes = $attribs;
288    }
289
290    /**
291     * @return void
292     */
293    public function strip_comments(bool $strip = false)
294    {
295        $this->strip_comments = $strip;
296    }
297
298    /**
299     * @return void
300     */
301    public function set_output_encoding(string $encoding = 'UTF-8')
302    {
303        $this->output_encoding = $encoding;
304    }
305
306    /**
307     * Set element/attribute key/value pairs of HTML attributes
308     * containing URLs that need to be resolved relative to the feed
309     *
310     * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite,
311     * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src,
312     * |ins|@cite, |q|@cite, |source|@src, |video|@src
313     *
314     * @since 1.0
315     * @param array<string, string|string[]>|null $element_attribute Element/attribute key/value pairs, null for default
316     * @return void
317     */
318    public function set_url_replacements(?array $element_attribute = null)
319    {
320        if ($element_attribute === null) {
321            $element_attribute = [
322                'a' => 'href',
323                'area' => 'href',
324                'audio' => 'src',
325                'blockquote' => 'cite',
326                'del' => 'cite',
327                'form' => 'action',
328                'img' => [
329                    'longdesc',
330                    'src'
331                ],
332                'input' => 'src',
333                'ins' => 'cite',
334                'q' => 'cite',
335                'source' => 'src',
336                'video' => [
337                    'poster',
338                    'src'
339                ]
340            ];
341        }
342        $this->replace_url_attributes = $element_attribute;
343    }
344
345    /**
346     * Set the list of domains for which to force HTTPS.
347     * @see \SimplePie\Misc::https_url()
348     * Example array('biz', 'example.com', 'example.org', 'www.example.net');
349     *
350     * @param string[] $domains list of domain names ['biz', 'example.com', 'example.org', 'www.example.net']
351     *
352     * @return void
353     */
354    public function set_https_domains(array $domains)
355    {
356        $this->https_domains = [];
357        foreach ($domains as $domain) {
358            $domain = trim($domain, ". \t\n\r\0\x0B");
359            $segments = array_reverse(explode('.', $domain));
360            /** @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>> */ // Needed for PHPStan.
361            $node = &$this->https_domains;
362            foreach ($segments as $segment) {//Build a tree
363                if ($node === true) {
364                    break;
365                }
366                if (!isset($node[$segment])) {
367                    $node[$segment] = [];
368                }
369                $node = &$node[$segment];
370            }
371            $node = true;
372        }
373    }
374
375    /**
376     * Check if the domain is in the list of forced HTTPS.
377     *
378     * @return bool
379     */
380    protected function is_https_domain(string $domain)
381    {
382        $domain = trim($domain, '. ');
383        $segments = array_reverse(explode('.', $domain));
384        $node = &$this->https_domains;
385        foreach ($segments as $segment) {//Explore the tree
386            if (isset($node[$segment])) {
387                $node = &$node[$segment];
388            } else {
389                break;
390            }
391        }
392        return $node === true;
393    }
394
395    /**
396     * Force HTTPS for selected Web sites.
397     *
398     * @return string
399     */
400    public function https_url(string $url)
401    {
402        return (
403            strtolower(substr($url, 0, 7)) === 'http://'
404            && ($parsed = parse_url($url, PHP_URL_HOST)) !== false // Malformed URL
405            && $parsed !== null // Missing host
406            && $this->is_https_domain($parsed) // Should be forced?
407        ) ? substr_replace($url, 's', 4, 0) // Add the 's' to HTTPS
408        : $url;
409    }
410
411    /**
412     * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
413     * @param string $base
414     * @return string Sanitized data; false if output encoding is changed to something other than UTF-8 and conversion fails
415     */
416    public function sanitize(string $data, int $type, string $base = '')
417    {
418        $data = trim($data);
419        if ($data !== '' || $type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
420            if ($type & \SimplePie\SimplePie::CONSTRUCT_MAYBE_HTML) {
421                if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . \SimplePie\SimplePie::PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
422                    $type |= \SimplePie\SimplePie::CONSTRUCT_HTML;
423                } else {
424                    $type |= \SimplePie\SimplePie::CONSTRUCT_TEXT;
425                }
426            }
427
428            if ($type & \SimplePie\SimplePie::CONSTRUCT_BASE64) {
429                $data = base64_decode($data);
430            }
431
432            if ($type & (\SimplePie\SimplePie::CONSTRUCT_HTML | \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
433                if (!class_exists('DOMDocument')) {
434                    throw new \SimplePie\Exception('DOMDocument not found, unable to use sanitizer');
435                }
436                $document = new \DOMDocument();
437                $document->encoding = 'UTF-8';
438
439                // PHPStan seems to have trouble resolving int-mask because bitwise
440                // operators are used when operators are used when passing this parameter.
441                // https://github.com/phpstan/phpstan/issues/9384
442                /** @var int-mask-of<SimplePie::CONSTRUCT_*> $type */
443                $data = $this->preprocess($data, $type);
444
445                set_error_handler([Misc::class, 'silence_errors']);
446                $document->loadHTML($data);
447                restore_error_handler();
448
449                $xpath = new \DOMXPath($document);
450
451                // Strip comments
452                if ($this->strip_comments) {
453                    /** @var \DOMNodeList<\DOMComment> */
454                    $comments = $xpath->query('//comment()');
455
456                    foreach ($comments as $comment) {
457                        $parentNode = $comment->parentNode;
458                        assert($parentNode !== null, 'For PHPStan, comment must have a parent');
459                        $parentNode->removeChild($comment);
460                    }
461                }
462
463                // Strip out HTML tags and attributes that might cause various security problems.
464                // Based on recommendations by Mark Pilgrim at:
465                // https://web.archive.org/web/20110902041826/http://diveintomark.org:80/archives/2003/06/12/how_to_consume_rss_safely
466                if ($this->strip_htmltags) {
467                    foreach ($this->strip_htmltags as $tag) {
468                        $this->strip_tag($tag, $document, $xpath, $type);
469                    }
470                }
471
472                if ($this->rename_attributes) {
473                    foreach ($this->rename_attributes as $attrib) {
474                        $this->rename_attr($attrib, $xpath);
475                    }
476                }
477
478                if ($this->strip_attributes) {
479                    foreach ($this->strip_attributes as $attrib) {
480                        $this->strip_attr($attrib, $xpath);
481                    }
482                }
483
484                if ($this->add_attributes) {
485                    foreach ($this->add_attributes as $tag => $valuePairs) {
486                        $this->add_attr($tag, $valuePairs, $document);
487                    }
488                }
489
490                // Replace relative URLs
491                $this->base = $base;
492                foreach ($this->replace_url_attributes as $element => $attributes) {
493                    $this->replace_urls($document, $element, $attributes);
494                }
495
496                // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
497                if ($this->image_handler !== '' && $this->enable_cache) {
498                    $images = $document->getElementsByTagName('img');
499
500                    foreach ($images as $img) {
501                        if ($img->hasAttribute('src')) {
502                            $image_url = $this->cache_namefilter->filter($img->getAttribute('src'));
503                            $cache = $this->get_cache($image_url);
504
505                            if ($cache->get_data($image_url, false)) {
506                                $img->setAttribute('src', $this->image_handler . $image_url);
507                            } else {
508                                try {
509                                    $file = $this->get_http_client()->request(
510                                        Client::METHOD_GET,
511                                        $img->getAttribute('src'),
512                                        ['X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']]
513                                    );
514                                } catch (ClientException $th) {
515                                    continue;
516                                }
517
518                                if ((!Misc::is_remote_uri($file->get_final_requested_uri()) || ($file->get_status_code() === 200 || $file->get_status_code() > 206 && $file->get_status_code() < 300))) {
519                                    if ($cache->set_data($image_url, ['headers' => $file->get_headers(), 'body' => $file->get_body_content()], $this->cache_duration)) {
520                                        $img->setAttribute('src', $this->image_handler . $image_url);
521                                    } else {
522                                        trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
523                                    }
524                                }
525                            }
526                        }
527                    }
528                }
529
530                // Get content node
531                $div = null;
532                if (($item = $document->getElementsByTagName('body')->item(0)) !== null) {
533                    $div = $item->firstChild;
534                }
535                // Finally, convert to a HTML string
536                $data = trim((string) $document->saveHTML($div));
537
538                if ($this->remove_div) {
539                    $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '', $data);
540                    // Cast for PHPStan, it is unable to validate a non-literal regex above.
541                    $data = preg_replace('/<\/div>$/', '', (string) $data);
542                } else {
543                    $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
544                }
545
546                // Cast for PHPStan, it is unable to validate a non-literal regex above.
547                $data = str_replace('</source>', '', (string) $data);
548            }
549
550            if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
551                $absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]);
552                if ($absolute !== false) {
553                    $data = $absolute;
554                }
555            }
556
557            if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) {
558                $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
559            }
560
561            if ($this->output_encoding !== 'UTF-8') {
562                // This really returns string|false but changing encoding is uncommon and we are going to deprecate it, so let’s just lie to PHPStan in the interest of cleaner annotations.
563                /** @var string */
564                $data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]);
565            }
566        }
567        return $data;
568    }
569
570    /**
571     * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
572     * @return string
573     */
574    protected function preprocess(string $html, int $type)
575    {
576        $ret = '';
577        $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
578        if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) {
579            // Atom XHTML constructs are wrapped with a div by default
580            // Note: No protection if $html contains a stray </div>!
581            $html = '<div>' . $html . '</div>';
582            $ret .= '<!DOCTYPE html>';
583            $content_type = 'text/html';
584        } else {
585            $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
586            $content_type = 'application/xhtml+xml';
587        }
588
589        $ret .= '<html><head>';
590        $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
591        $ret .= '</head><body>' . $html . '</body></html>';
592        return $ret;
593    }
594
595    /**
596     * @param array<string>|string $attributes
597     * @return void
598     */
599    public function replace_urls(DOMDocument $document, string $tag, $attributes)
600    {
601        if (!is_array($attributes)) {
602            $attributes = [$attributes];
603        }
604
605        if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) {
606            $elements = $document->getElementsByTagName($tag);
607            foreach ($elements as $element) {
608                foreach ($attributes as $attribute) {
609                    if ($element->hasAttribute($attribute)) {
610                        $value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
611                        if ($value !== false) {
612                            $value = $this->https_url($value);
613                            $element->setAttribute($attribute, $value);
614                        }
615                    }
616                }
617            }
618        }
619    }
620
621    /**
622     * @param array<int, string> $match
623     * @return string
624     */
625    public function do_strip_htmltags(array $match)
626    {
627        if ($this->encode_instead_of_strip) {
628            if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
629                $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
630                $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
631                return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
632            } else {
633                return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
634            }
635        } elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
636            return $match[4];
637        } else {
638            return '';
639        }
640    }
641
642    /**
643     * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
644     * @return void
645     */
646    protected function strip_tag(string $tag, DOMDocument $document, DOMXPath $xpath, int $type)
647    {
648        $elements = $xpath->query('body//' . $tag);
649
650        if ($elements === false) {
651            throw new \SimplePie\Exception(sprintf(
652                '%s(): Possibly malformed expression, check argument #1 ($tag)',
653                __METHOD__
654            ), 1);
655        }
656
657        if ($this->encode_instead_of_strip) {
658            foreach ($elements as $element) {
659                $fragment = $document->createDocumentFragment();
660
661                // For elements which aren't script or style, include the tag itself
662                if (!in_array($tag, ['script', 'style'])) {
663                    $text = '<' . $tag;
664                    if ($element->attributes !== null) {
665                        $attrs = [];
666                        foreach ($element->attributes as $name => $attr) {
667                            $value = $attr->value;
668
669                            // In XHTML, empty values should never exist, so we repeat the value
670                            if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
671                                $value = $name;
672                            }
673                            // For HTML, empty is fine
674                            elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) {
675                                $attrs[] = $name;
676                                continue;
677                            }
678
679                            // Standard attribute text
680                            $attrs[] = $name . '="' . $attr->value . '"';
681                        }
682                        $text .= ' ' . implode(' ', $attrs);
683                    }
684                    $text .= '>';
685                    $fragment->appendChild(new \DOMText($text));
686                }
687
688                $number = $element->childNodes->length;
689                for ($i = $number; $i > 0; $i--) {
690                    if (($child = $element->childNodes->item(0)) !== null) {
691                        $fragment->appendChild($child);
692                    }
693                }
694
695                if (!in_array($tag, ['script', 'style'])) {
696                    $fragment->appendChild(new \DOMText('</' . $tag . '>'));
697                }
698
699                if (($parentNode = $element->parentNode) !== null) {
700                    $parentNode->replaceChild($fragment, $element);
701                }
702            }
703
704            return;
705        } elseif (in_array($tag, ['script', 'style'])) {
706            foreach ($elements as $element) {
707                if (($parentNode = $element->parentNode) !== null) {
708                    $parentNode->removeChild($element);
709                }
710            }
711
712            return;
713        } else {
714            foreach ($elements as $element) {
715                $fragment = $document->createDocumentFragment();
716                $number = $element->childNodes->length;
717                for ($i = $number; $i > 0; $i--) {
718                    if (($child = $element->childNodes->item(0)) !== null) {
719                        $fragment->appendChild($child);
720                    }
721                }
722
723                if (($parentNode = $element->parentNode) !== null) {
724                    $parentNode->replaceChild($fragment, $element);
725                }
726            }
727        }
728    }
729
730    /**
731     * @return void
732     */
733    protected function strip_attr(string $attrib, DOMXPath $xpath)
734    {
735        $elements = $xpath->query('//*[@' . $attrib . ']');
736
737        if ($elements === false) {
738            throw new \SimplePie\Exception(sprintf(
739                '%s(): Possibly malformed expression, check argument #1 ($attrib)',
740                __METHOD__
741            ), 1);
742        }
743
744        /** @var \DOMElement $element */
745        foreach ($elements as $element) {
746            $element->removeAttribute($attrib);
747        }
748    }
749
750    /**
751     * @return void
752     */
753    protected function rename_attr(string $attrib, DOMXPath $xpath)
754    {
755        $elements = $xpath->query('//*[@' . $attrib . ']');
756
757        if ($elements === false) {
758            throw new \SimplePie\Exception(sprintf(
759                '%s(): Possibly malformed expression, check argument #1 ($attrib)',
760                __METHOD__
761            ), 1);
762        }
763
764        /** @var \DOMElement $element */
765        foreach ($elements as $element) {
766            $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib));
767            $element->removeAttribute($attrib);
768        }
769    }
770
771    /**
772     * @param array<string, string> $valuePairs
773     * @return void
774     */
775    protected function add_attr(string $tag, array $valuePairs, DOMDocument $document)
776    {
777        $elements = $document->getElementsByTagName($tag);
778        /** @var \DOMElement $element */
779        foreach ($elements as $element) {
780            foreach ($valuePairs as $attrib => $value) {
781                $element->setAttribute($attrib, $value);
782            }
783        }
784    }
785
786    /**
787     * Get a DataCache
788     *
789     * @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0
790     *
791     * @return DataCache
792     */
793    private function get_cache(string $image_url = ''): DataCache
794    {
795        if ($this->cache === null) {
796            // @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED);
797            $cache = $this->registry->call(Cache::class, 'get_handler', [
798                $this->cache_location,
799                $image_url,
800                Base::TYPE_IMAGE
801            ]);
802
803            return new BaseDataCache($cache);
804        }
805
806        return $this->cache;
807    }
808
809    /**
810     * Get a HTTP client
811     */
812    private function get_http_client(): Client
813    {
814        if ($this->http_client === null) {
815            $this->http_client = new FileClient(
816                $this->registry,
817                [
818                    'timeout' => $this->timeout,
819                    'redirects' => 5,
820                    'useragent' => $this->useragent,
821                    'force_fsockopen' => $this->force_fsockopen,
822                    'curl_options' => $this->curl_options,
823                ]
824            );
825        }
826
827        return $this->http_client;
828    }
829}
830
831class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize');
832