xref: /dokuwiki/vendor/simplepie/simplepie/src/HTTP/Parser.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8namespace SimplePie\HTTP;
9
10/**
11 * HTTP Response Parser
12 * @template Psr7Compatible of bool
13 */
14class Parser
15{
16    /**
17     * HTTP Version
18     *
19     * @var float
20     */
21    public $http_version = 0.0;
22
23    /**
24     * Status code
25     *
26     * @var int
27     */
28    public $status_code = 0;
29
30    /**
31     * Reason phrase
32     *
33     * @var string
34     */
35    public $reason = '';
36
37    /**
38     * @var Psr7Compatible whether headers are compatible with PSR-7 format.
39     */
40    private $psr7Compatible;
41
42    /**
43     * Key/value pairs of the headers
44     *
45     * @var (Psr7Compatible is true ? array<string, non-empty-array<string>> : array<string, string>)
46     */
47    public $headers = [];
48
49    /**
50     * Body of the response
51     *
52     * @var string
53     */
54    public $body = '';
55
56    private const STATE_HTTP_VERSION = 'http_version';
57
58    private const STATE_STATUS = 'status';
59
60    private const STATE_REASON = 'reason';
61
62    private const STATE_NEW_LINE = 'new_line';
63
64    private const STATE_BODY = 'body';
65
66    private const STATE_NAME = 'name';
67
68    private const STATE_VALUE = 'value';
69
70    private const STATE_VALUE_CHAR = 'value_char';
71
72    private const STATE_QUOTE = 'quote';
73
74    private const STATE_QUOTE_ESCAPED = 'quote_escaped';
75
76    private const STATE_QUOTE_CHAR = 'quote_char';
77
78    private const STATE_CHUNKED = 'chunked';
79
80    private const STATE_EMIT = 'emit';
81
82    private const STATE_ERROR = false;
83
84    /**
85     * Current state of the state machine
86     *
87     * @var self::STATE_*
88     */
89    protected $state = self::STATE_HTTP_VERSION;
90
91    /**
92     * Input data
93     *
94     * @var string
95     */
96    protected $data = '';
97
98    /**
99     * Input data length (to avoid calling strlen() everytime this is needed)
100     *
101     * @var int
102     */
103    protected $data_length = 0;
104
105    /**
106     * Current position of the pointer
107     *
108     * @var int
109     */
110    protected $position = 0;
111
112    /**
113     * Name of the header currently being parsed
114     *
115     * @var string
116     */
117    protected $name = '';
118
119    /**
120     * Value of the header currently being parsed
121     *
122     * @var string
123     */
124    protected $value = '';
125
126    /**
127     * Create an instance of the class with the input data
128     *
129     * @param string $data Input data
130     * @param Psr7Compatible $psr7Compatible Whether the data types are in format compatible with PSR-7.
131     */
132    public function __construct(string $data, bool $psr7Compatible = false)
133    {
134        $this->data = $data;
135        $this->data_length = strlen($this->data);
136        $this->psr7Compatible = $psr7Compatible;
137    }
138
139    /**
140     * Parse the input data
141     *
142     * @return bool true on success, false on failure
143     */
144    public function parse()
145    {
146        while ($this->state && $this->state !== self::STATE_EMIT && $this->has_data()) {
147            $state = $this->state;
148            $this->$state();
149        }
150        $this->data = '';
151        if ($this->state === self::STATE_EMIT || $this->state === self::STATE_BODY) {
152            return true;
153        }
154
155        // Reset the parser state.
156        $this->http_version = 0.0;
157        $this->status_code = 0;
158        $this->reason = '';
159        $this->headers = [];
160        $this->body = '';
161        return false;
162    }
163
164    /**
165     * Check whether there is data beyond the pointer
166     *
167     * @return bool true if there is further data, false if not
168     */
169    protected function has_data()
170    {
171        return (bool) ($this->position < $this->data_length);
172    }
173
174    /**
175     * See if the next character is LWS
176     *
177     * @return bool true if the next character is LWS, false if not
178     */
179    protected function is_linear_whitespace()
180    {
181        return (bool) ($this->data[$this->position] === "\x09"
182            || $this->data[$this->position] === "\x20"
183            || ($this->data[$this->position] === "\x0A"
184                && isset($this->data[$this->position + 1])
185                && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
186    }
187
188    /**
189     * Parse the HTTP version
190     * @return void
191     */
192    protected function http_version()
193    {
194        if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') {
195            $len = strspn($this->data, '0123456789.', 5);
196            $http_version = substr($this->data, 5, $len);
197            $this->position += 5 + $len;
198            if (substr_count($http_version, '.') <= 1) {
199                $this->http_version = (float) $http_version;
200                $this->position += strspn($this->data, "\x09\x20", $this->position);
201                $this->state = self::STATE_STATUS;
202            } else {
203                $this->state = self::STATE_ERROR;
204            }
205        } else {
206            $this->state = self::STATE_ERROR;
207        }
208    }
209
210    /**
211     * Parse the status code
212     * @return void
213     */
214    protected function status()
215    {
216        if ($len = strspn($this->data, '0123456789', $this->position)) {
217            $this->status_code = (int) substr($this->data, $this->position, $len);
218            $this->position += $len;
219            $this->state = self::STATE_REASON;
220        } else {
221            $this->state = self::STATE_ERROR;
222        }
223    }
224
225    /**
226     * Parse the reason phrase
227     * @return void
228     */
229    protected function reason()
230    {
231        $len = strcspn($this->data, "\x0A", $this->position);
232        $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
233        $this->position += $len + 1;
234        $this->state = self::STATE_NEW_LINE;
235    }
236
237    private function add_header(string $name, string $value): void
238    {
239        if ($this->psr7Compatible) {
240            // For PHPStan: should be enforced by template parameter but PHPStan is not smart enough.
241            /** @var array<string, non-empty-array<string>> */
242            $headers = &$this->headers;
243            $headers[$name][] = $value;
244        } else {
245            // For PHPStan: should be enforced by template parameter but PHPStan is not smart enough.
246            /** @var array<string, string>) */
247            $headers = &$this->headers;
248            $headers[$name] .= ', ' . $value;
249        }
250    }
251
252    private function replace_header(string $name, string $value): void
253    {
254        if ($this->psr7Compatible) {
255            // For PHPStan: should be enforced by template parameter but PHPStan is not smart enough.
256            /** @var array<string, non-empty-array<string>> */
257            $headers = &$this->headers;
258            $headers[$name] = [$value];
259        } else {
260            // For PHPStan: should be enforced by template parameter but PHPStan is not smart enough.
261            /** @var array<string, string>) */
262            $headers = &$this->headers;
263            $headers[$name] = $value;
264        }
265    }
266
267    /**
268     * Deal with a new line, shifting data around as needed
269     * @return void
270     */
271    protected function new_line()
272    {
273        $this->value = trim($this->value, "\x0D\x20");
274        if ($this->name !== '' && $this->value !== '') {
275            $this->name = strtolower($this->name);
276            // We should only use the last Content-Type header. c.f. issue #1
277            if (isset($this->headers[$this->name]) && $this->name !== 'content-type') {
278                $this->add_header($this->name, $this->value);
279            } else {
280                $this->replace_header($this->name, $this->value);
281            }
282        }
283        $this->name = '';
284        $this->value = '';
285        if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") {
286            $this->position += 2;
287            $this->state = self::STATE_BODY;
288        } elseif ($this->data[$this->position] === "\x0A") {
289            $this->position++;
290            $this->state = self::STATE_BODY;
291        } else {
292            $this->state = self::STATE_NAME;
293        }
294    }
295
296    /**
297     * Parse a header name
298     * @return void
299     */
300    protected function name()
301    {
302        $len = strcspn($this->data, "\x0A:", $this->position);
303        if (isset($this->data[$this->position + $len])) {
304            if ($this->data[$this->position + $len] === "\x0A") {
305                $this->position += $len;
306                $this->state = self::STATE_NEW_LINE;
307            } else {
308                $this->name = substr($this->data, $this->position, $len);
309                $this->position += $len + 1;
310                $this->state = self::STATE_VALUE;
311            }
312        } else {
313            $this->state = self::STATE_ERROR;
314        }
315    }
316
317    /**
318     * Parse LWS, replacing consecutive LWS characters with a single space
319     * @return void
320     */
321    protected function linear_whitespace()
322    {
323        do {
324            if (substr($this->data, $this->position, 2) === "\x0D\x0A") {
325                $this->position += 2;
326            } elseif ($this->data[$this->position] === "\x0A") {
327                $this->position++;
328            }
329            $this->position += strspn($this->data, "\x09\x20", $this->position);
330        } while ($this->has_data() && $this->is_linear_whitespace());
331        $this->value .= "\x20";
332    }
333
334    /**
335     * See what state to move to while within non-quoted header values
336     * @return void
337     */
338    protected function value()
339    {
340        if ($this->is_linear_whitespace()) {
341            $this->linear_whitespace();
342        } else {
343            switch ($this->data[$this->position]) {
344                case '"':
345                    // Workaround for ETags: we have to include the quotes as
346                    // part of the tag.
347                    if (strtolower($this->name) === 'etag') {
348                        $this->value .= '"';
349                        $this->position++;
350                        $this->state = self::STATE_VALUE_CHAR;
351                        break;
352                    }
353                    $this->position++;
354                    $this->state = self::STATE_QUOTE;
355                    break;
356
357                case "\x0A":
358                    $this->position++;
359                    $this->state = self::STATE_NEW_LINE;
360                    break;
361
362                default:
363                    $this->state = self::STATE_VALUE_CHAR;
364                    break;
365            }
366        }
367    }
368
369    /**
370     * Parse a header value while outside quotes
371     * @return void
372     */
373    protected function value_char()
374    {
375        $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
376        $this->value .= substr($this->data, $this->position, $len);
377        $this->position += $len;
378        $this->state = self::STATE_VALUE;
379    }
380
381    /**
382     * See what state to move to while within quoted header values
383     * @return void
384     */
385    protected function quote()
386    {
387        if ($this->is_linear_whitespace()) {
388            $this->linear_whitespace();
389        } else {
390            switch ($this->data[$this->position]) {
391                case '"':
392                    $this->position++;
393                    $this->state = self::STATE_VALUE;
394                    break;
395
396                case "\x0A":
397                    $this->position++;
398                    $this->state = self::STATE_NEW_LINE;
399                    break;
400
401                case '\\':
402                    $this->position++;
403                    $this->state = self::STATE_QUOTE_ESCAPED;
404                    break;
405
406                default:
407                    $this->state = self::STATE_QUOTE_CHAR;
408                    break;
409            }
410        }
411    }
412
413    /**
414     * Parse a header value while within quotes
415     * @return void
416     */
417    protected function quote_char()
418    {
419        $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
420        $this->value .= substr($this->data, $this->position, $len);
421        $this->position += $len;
422        $this->state = self::STATE_VALUE;
423    }
424
425    /**
426     * Parse an escaped character within quotes
427     * @return void
428     */
429    protected function quote_escaped()
430    {
431        $this->value .= $this->data[$this->position];
432        $this->position++;
433        $this->state = self::STATE_QUOTE;
434    }
435
436    /**
437     * Parse the body
438     * @return void
439     */
440    protected function body()
441    {
442        $this->body = substr($this->data, $this->position);
443        if (!empty($this->headers['transfer-encoding'])) {
444            unset($this->headers['transfer-encoding']);
445            $this->state = self::STATE_CHUNKED;
446        } else {
447            $this->state = self::STATE_EMIT;
448        }
449    }
450
451    /**
452     * Parsed a "Transfer-Encoding: chunked" body
453     * @return void
454     */
455    protected function chunked()
456    {
457        if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) {
458            $this->state = self::STATE_EMIT;
459            return;
460        }
461
462        $decoded = '';
463        $encoded = $this->body;
464
465        while (true) {
466            $is_chunked = (bool) preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches);
467            if (!$is_chunked) {
468                // Looks like it's not chunked after all
469                $this->state = self::STATE_EMIT;
470                return;
471            }
472
473            $length = hexdec(trim($matches[1]));
474            // For PHPStan: this will only be float when larger than PHP_INT_MAX.
475            // But even on 32-bit systems, it would mean 2GiB chunk, which sounds unlikely.
476            \assert(\is_int($length), "Length needs to be shorter than PHP_INT_MAX");
477            if ($length === 0) {
478                // Ignore trailer headers
479                $this->state = self::STATE_EMIT;
480                $this->body = $decoded;
481                return;
482            }
483
484            $chunk_length = strlen($matches[0]);
485            $decoded .= substr($encoded, $chunk_length, $length);
486            $encoded = substr($encoded, $chunk_length + $length + 2);
487
488            // BC for PHP < 8.0: substr() can return bool instead of string
489            $encoded = ($encoded === false) ? '' : $encoded;
490
491            if (trim($encoded) === '0' || empty($encoded)) {
492                $this->state = self::STATE_EMIT;
493                $this->body = $decoded;
494                return;
495            }
496        }
497    }
498
499    /**
500     * Prepare headers (take care of proxies headers)
501     *
502     * @param string  $headers Raw headers
503     * @param non-negative-int $count Redirection count. Default to 1.
504     *
505     * @return string
506     */
507    public static function prepareHeaders(string $headers, int $count = 1)
508    {
509        $data = explode("\r\n\r\n", $headers, $count);
510        $data = array_pop($data);
511        if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) {
512            $exploded = explode("\r\n\r\n", $data, 2);
513            $data = end($exploded);
514        }
515        if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) {
516            $exploded = explode("\r\n\r\n", $data, 2);
517            $data = end($exploded);
518        }
519        return $data;
520    }
521}
522
523class_alias('SimplePie\HTTP\Parser', 'SimplePie_HTTP_Parser');
524