xref: /dokuwiki/vendor/simplepie/simplepie/src/File.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8namespace SimplePie;
9
10use SimplePie\HTTP\Response;
11
12/**
13 * Used for fetching remote files and reading local files
14 *
15 * Supports HTTP 1.0 via cURL or fsockopen, with spotty HTTP 1.1 support
16 *
17 * This class can be overloaded with {@see \SimplePie\SimplePie::set_file_class()}
18 *
19 * @todo Move to properly supporting RFC2616 (HTTP/1.1)
20 */
21class File implements Response
22{
23    /**
24     * @var string The final URL after following all redirects
25     * @deprecated Use `get_final_requested_uri()` method.
26     */
27    public $url;
28
29    /**
30     * @var ?string User agent to use in requests
31     * @deprecated Set the user agent in constructor.
32     */
33    public $useragent;
34
35    /** @var bool */
36    public $success = true;
37
38    /** @var array<string, non-empty-array<string>> Canonical representation of headers */
39    private $parsed_headers = [];
40    /** @var array<string, string> Last known value of $headers property (used to detect external modification) */
41    private $last_headers = [];
42    /**
43     * @var array<string, string> Headers as string for BC
44     * @deprecated Use `get_headers()` method.
45     */
46    public $headers = [];
47
48    /**
49     * @var ?string Body of the HTTP response
50     * @deprecated Use `get_body_content()` method.
51     */
52    public $body;
53
54    /**
55     * @var int Status code of the HTTP response
56     * @deprecated Use `get_status_code()` method.
57     */
58    public $status_code = 0;
59
60    /** @var non-negative-int Number of redirect that were already performed during this request sequence. */
61    public $redirects = 0;
62
63    /** @var ?string */
64    public $error;
65
66    /**
67     * @var int-mask-of<SimplePie::FILE_SOURCE_*> Bit mask representing the method used to fetch the file and whether it is a local file or remote file obtained over HTTP.
68     * @deprecated Backend is implementation detail which you should not care about; to see if the file was retrieved over HTTP, check if `get_final_requested_uri()` with `Misc::is_remote_uri()`.
69     */
70    public $method = \SimplePie\SimplePie::FILE_SOURCE_NONE;
71
72    /**
73     * @var string The permanent URL or the resource (first URL after the prefix of (only) permanent redirects)
74     * @deprecated Use `get_permanent_uri()` method.
75     */
76    public $permanent_url;
77    /** @var bool Whether the permanent URL is still writeable (prefix of permanent redirects has not ended) */
78    private $permanentUrlMutable = true;
79
80    /**
81     * @param string $url
82     * @param int $timeout
83     * @param int $redirects
84     * @param ?array<string, string> $headers
85     * @param ?string $useragent
86     * @param bool $force_fsockopen
87     * @param array<int, mixed> $curl_options
88     */
89    public function __construct(string $url, int $timeout = 10, int $redirects = 5, ?array $headers = null, ?string $useragent = null, bool $force_fsockopen = false, array $curl_options = [])
90    {
91        if (function_exists('idn_to_ascii')) {
92            $parsed = \SimplePie\Misc::parse_url($url);
93            if ($parsed['authority'] !== '' && !ctype_print($parsed['authority'])) {
94                $authority = (string) \idn_to_ascii($parsed['authority'], \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
95                $url = \SimplePie\Misc::compress_parse_url($parsed['scheme'], $authority, $parsed['path'], $parsed['query'], null);
96            }
97        }
98        $this->url = $url;
99        if ($this->permanentUrlMutable) {
100            $this->permanent_url = $url;
101        }
102        $this->useragent = $useragent;
103        if (preg_match('/^http(s)?:\/\//i', $url)) {
104            if ($useragent === null) {
105                $useragent = (string) ini_get('user_agent');
106                $this->useragent = $useragent;
107            }
108            if (!is_array($headers)) {
109                $headers = [];
110            }
111            if (!$force_fsockopen && function_exists('curl_exec')) {
112                $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_CURL;
113                $fp = curl_init();
114                $headers2 = [];
115                foreach ($headers as $key => $value) {
116                    $headers2[] = "$key: $value";
117                }
118                if (isset($curl_options[CURLOPT_HTTPHEADER])) {
119                    if (is_array($curl_options[CURLOPT_HTTPHEADER])) {
120                        $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]);
121                    }
122                    unset($curl_options[CURLOPT_HTTPHEADER]);
123                }
124                if (version_compare(\SimplePie\Misc::get_curl_version(), '7.10.5', '>=')) {
125                    curl_setopt($fp, CURLOPT_ENCODING, '');
126                }
127                curl_setopt($fp, CURLOPT_URL, $url);
128                curl_setopt($fp, CURLOPT_HEADER, 1);
129                curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
130                curl_setopt($fp, CURLOPT_FAILONERROR, 1);
131                curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
132                curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
133                curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url));
134                curl_setopt($fp, CURLOPT_USERAGENT, $useragent);
135                curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
136                foreach ($curl_options as $curl_param => $curl_value) {
137                    curl_setopt($fp, $curl_param, $curl_value);
138                }
139
140                $responseHeaders = curl_exec($fp);
141                if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) {
142                    curl_setopt($fp, CURLOPT_ENCODING, 'none');
143                    $responseHeaders = curl_exec($fp);
144                }
145                $this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE);
146                if (curl_errno($fp)) {
147                    $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
148                    $this->success = false;
149                } else {
150                    // Use the updated url provided by curl_getinfo after any redirects.
151                    if ($info = curl_getinfo($fp)) {
152                        $this->url = $info['url'];
153                    }
154                    // For PHPStan: We already checked that error did not occur.
155                    assert(is_array($info) && $info['redirect_count'] >= 0);
156                    if (\PHP_VERSION_ID < 80000) {
157                        curl_close($fp);
158                    }
159                    $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders((string) $responseHeaders, $info['redirect_count'] + 1);
160                    $parser = new \SimplePie\HTTP\Parser($responseHeaders, true);
161                    if ($parser->parse()) {
162                        $this->set_headers($parser->headers);
163                        $this->body = $parser->body;
164                        $this->status_code = $parser->status_code;
165                        if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) {
166                            $this->redirects++;
167                            $location = \SimplePie\Misc::absolutize_url($locationHeader, $url);
168                            if ($location === false) {
169                                $this->error = "Invalid redirect location, trying to base “{$locationHeader}” onto “{$url}”";
170                                $this->success = false;
171                                return;
172                            }
173                            $this->permanentUrlMutable = $this->permanentUrlMutable && ($this->status_code == 301 || $this->status_code == 308);
174                            $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen, $curl_options);
175                            return;
176                        }
177                    }
178                }
179            } else {
180                $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_FSOCKOPEN;
181                if (($url_parts = parse_url($url)) === false) {
182                    throw new \InvalidArgumentException('Malformed URL: ' . $url);
183                }
184                if (!isset($url_parts['host'])) {
185                    throw new \InvalidArgumentException('Missing hostname: ' . $url);
186                }
187                $socket_host = $url_parts['host'];
188                if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') {
189                    $socket_host = 'ssl://' . $socket_host;
190                    $url_parts['port'] = 443;
191                }
192                if (!isset($url_parts['port'])) {
193                    $url_parts['port'] = 80;
194                }
195                $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout);
196                if (!$fp) {
197                    $this->error = 'fsockopen error: ' . $errstr;
198                    $this->success = false;
199                } else {
200                    stream_set_timeout($fp, $timeout);
201                    if (isset($url_parts['path'])) {
202                        if (isset($url_parts['query'])) {
203                            $get = "$url_parts[path]?$url_parts[query]";
204                        } else {
205                            $get = $url_parts['path'];
206                        }
207                    } else {
208                        $get = '/';
209                    }
210                    $out = "GET $get HTTP/1.1\r\n";
211                    $out .= "Host: $url_parts[host]\r\n";
212                    $out .= "User-Agent: $useragent\r\n";
213                    if (extension_loaded('zlib')) {
214                        $out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n";
215                    }
216
217                    if (isset($url_parts['user']) && isset($url_parts['pass'])) {
218                        $out .= "Authorization: Basic " . base64_encode("$url_parts[user]:$url_parts[pass]") . "\r\n";
219                    }
220                    foreach ($headers as $key => $value) {
221                        $out .= "$key: $value\r\n";
222                    }
223                    $out .= "Connection: Close\r\n\r\n";
224                    fwrite($fp, $out);
225
226                    $info = stream_get_meta_data($fp);
227
228                    $responseHeaders = '';
229                    while (!$info['eof'] && !$info['timed_out']) {
230                        $responseHeaders .= fread($fp, 1160);
231                        $info = stream_get_meta_data($fp);
232                    }
233                    if (!$info['timed_out']) {
234                        $parser = new \SimplePie\HTTP\Parser($responseHeaders, true);
235                        if ($parser->parse()) {
236                            $this->set_headers($parser->headers);
237                            $this->body = $parser->body;
238                            $this->status_code = $parser->status_code;
239                            if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) {
240                                $this->redirects++;
241                                $location = \SimplePie\Misc::absolutize_url($locationHeader, $url);
242                                $this->permanentUrlMutable = $this->permanentUrlMutable && ($this->status_code == 301 || $this->status_code == 308);
243                                if ($location === false) {
244                                    $this->error = "Invalid redirect location, trying to base “{$locationHeader}” onto “{$url}”";
245                                    $this->success = false;
246                                    return;
247                                }
248                                $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen, $curl_options);
249                                return;
250                            }
251                            if (($contentEncodingHeader = $this->get_header_line('content-encoding')) !== '') {
252                                // Hey, we act dumb elsewhere, so let's do that here too
253                                switch (strtolower(trim($contentEncodingHeader, "\x09\x0A\x0D\x20"))) {
254                                    case 'gzip':
255                                    case 'x-gzip':
256                                        if (($decompressed = gzdecode($this->body)) === false) {
257                                            $this->error = 'Unable to decode HTTP "gzip" stream';
258                                            $this->success = false;
259                                        } else {
260                                            $this->body = $decompressed;
261                                        }
262                                        break;
263
264                                    case 'deflate':
265                                        if (($decompressed = gzinflate($this->body)) !== false) {
266                                            $this->body = $decompressed;
267                                        } elseif (($decompressed = gzuncompress($this->body)) !== false) {
268                                            $this->body = $decompressed;
269                                        } elseif (($decompressed = gzdecode($this->body)) !== false) {
270                                            $this->body = $decompressed;
271                                        } else {
272                                            $this->error = 'Unable to decode HTTP "deflate" stream';
273                                            $this->success = false;
274                                        }
275                                        break;
276
277                                    default:
278                                        $this->error = 'Unknown content coding';
279                                        $this->success = false;
280                                }
281                            }
282                        }
283                    } else {
284                        $this->error = 'fsocket timed out';
285                        $this->success = false;
286                    }
287                    fclose($fp);
288                }
289            }
290        } else {
291            $this->method = \SimplePie\SimplePie::FILE_SOURCE_LOCAL | \SimplePie\SimplePie::FILE_SOURCE_FILE_GET_CONTENTS;
292            if (empty($url) || !is_readable($url) ||  false === $filebody = file_get_contents($url)) {
293                $this->body = '';
294                $this->error = sprintf('file "%s" is not readable', $url);
295                $this->success = false;
296            } else {
297                $this->body = $filebody;
298                $this->status_code = 200;
299            }
300        }
301        if ($this->success) {
302            assert($this->body !== null); // For PHPStan
303            // Leading whitespace may cause XML parsing errors (XML declaration cannot be preceded by anything other than BOM) so we trim it.
304            // Note that unlike built-in `trim` function’s default settings, we do not trim `\x00` to avoid breaking characters in UTF-16 or UTF-32 encoded strings.
305            // We also only do that when the whitespace is followed by `<`, so that we do not break e.g. UTF-16LE encoded whitespace like `\n\x00` in half.
306            $this->body = preg_replace('/^[ \n\r\t\v]+</', '<', $this->body);
307        }
308    }
309
310    public function get_permanent_uri(): string
311    {
312        return (string) $this->permanent_url;
313    }
314
315    public function get_final_requested_uri(): string
316    {
317        return (string) $this->url;
318    }
319
320    public function get_status_code(): int
321    {
322        return (int) $this->status_code;
323    }
324
325    public function get_headers(): array
326    {
327        $this->maybe_update_headers();
328        return $this->parsed_headers;
329    }
330
331    public function has_header(string $name): bool
332    {
333        $this->maybe_update_headers();
334        return $this->get_header($name) !== [];
335    }
336
337    public function get_header(string $name): array
338    {
339        $this->maybe_update_headers();
340        return $this->parsed_headers[strtolower($name)] ?? [];
341    }
342
343    public function with_header(string $name, $value)
344    {
345        $this->maybe_update_headers();
346        $new = clone $this;
347
348        $newHeader = [
349            strtolower($name) => (array) $value,
350        ];
351        $new->set_headers($newHeader + $this->get_headers());
352
353        return $new;
354    }
355
356    public function get_header_line(string $name): string
357    {
358        $this->maybe_update_headers();
359        return implode(', ', $this->get_header($name));
360    }
361
362    public function get_body_content(): string
363    {
364        return (string) $this->body;
365    }
366
367    /**
368     * Check if the $headers property was changed and update the internal state accordingly.
369     */
370    private function maybe_update_headers(): void
371    {
372        if ($this->headers !== $this->last_headers) {
373            $this->parsed_headers = array_map(
374                function (string $header_line): array {
375                    if (strpos($header_line, ',') === false) {
376                        return [$header_line];
377                    } else {
378                        return array_map('trim', explode(',', $header_line));
379                    }
380                },
381                $this->headers
382            );
383        }
384        $this->last_headers = $this->headers;
385    }
386
387    /**
388     * Sets headers internally.
389     *
390     * @param array<string, non-empty-array<string>> $headers
391     */
392    private function set_headers(array $headers): void
393    {
394        $this->parsed_headers = $headers;
395        $this->headers = self::flatten_headers($headers);
396        $this->last_headers = $this->headers;
397    }
398
399    /**
400     * Converts PSR-7 compatible headers into a legacy format.
401     *
402     * @param array<string, non-empty-array<string>> $headers
403     *
404     * @return array<string, string>
405     */
406    private function flatten_headers(array $headers): array
407    {
408        return array_map(function (array $values): string {
409            return implode(',', $values);
410        }, $headers);
411    }
412
413    /**
414     * Create a File instance from another Response
415     *
416     * For BC reasons in some places there MUST be a `File` instance
417     * instead of a `Response` implementation
418     *
419     * @see Locator::__construct()
420     * @internal
421     */
422    final public static function fromResponse(Response $response): self
423    {
424        $headers = [];
425
426        foreach ($response->get_headers() as $name => $header) {
427            $headers[$name] = implode(', ', $header);
428        }
429
430        /** @var File */
431        $file = (new \ReflectionClass(File::class))->newInstanceWithoutConstructor();
432
433        $file->url = $response->get_final_requested_uri();
434        $file->useragent = null;
435        $file->headers = $headers;
436        $file->body = $response->get_body_content();
437        $file->status_code = $response->get_status_code();
438        $file->permanent_url = $response->get_permanent_uri();
439
440        return $file;
441    }
442}
443
444class_alias('SimplePie\File', 'SimplePie_File');
445