1<?php
2/**
3 * SimplePie
4 *
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
7 *
8 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
13 *
14 * 	* Redistributions of source code must retain the above copyright notice, this list of
15 * 	  conditions and the following disclaimer.
16 *
17 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
18 * 	  of conditions and the following disclaimer in the documentation and/or other materials
19 * 	  provided with the distribution.
20 *
21 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * 	  to endorse or promote products derived from this software without specific prior
23 * 	  written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * @package SimplePie
36 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
37 * @author Ryan Parman
38 * @author Sam Sneddon
39 * @author Ryan McCue
40 * @link http://simplepie.org/ SimplePie
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
42 */
43
44
45/**
46 * HTTP Response Parser
47 *
48 * @package SimplePie
49 * @subpackage HTTP
50 */
51class SimplePie_HTTP_Parser
52{
53	/**
54	 * HTTP Version
55	 *
56	 * @var float
57	 */
58	public $http_version = 0.0;
59
60	/**
61	 * Status code
62	 *
63	 * @var int
64	 */
65	public $status_code = 0;
66
67	/**
68	 * Reason phrase
69	 *
70	 * @var string
71	 */
72	public $reason = '';
73
74	/**
75	 * Key/value pairs of the headers
76	 *
77	 * @var array
78	 */
79	public $headers = array();
80
81	/**
82	 * Body of the response
83	 *
84	 * @var string
85	 */
86	public $body = '';
87
88	/**
89	 * Current state of the state machine
90	 *
91	 * @var string
92	 */
93	protected $state = 'http_version';
94
95	/**
96	 * Input data
97	 *
98	 * @var string
99	 */
100	protected $data = '';
101
102	/**
103	 * Input data length (to avoid calling strlen() everytime this is needed)
104	 *
105	 * @var int
106	 */
107	protected $data_length = 0;
108
109	/**
110	 * Current position of the pointer
111	 *
112	 * @var int
113	 */
114	protected $position = 0;
115
116	/**
117	 * Name of the hedaer currently being parsed
118	 *
119	 * @var string
120	 */
121	protected $name = '';
122
123	/**
124	 * Value of the hedaer currently being parsed
125	 *
126	 * @var string
127	 */
128	protected $value = '';
129
130	/**
131	 * Create an instance of the class with the input data
132	 *
133	 * @param string $data Input data
134	 */
135	public function __construct($data)
136	{
137		$this->data = $data;
138		$this->data_length = strlen($this->data);
139	}
140
141	/**
142	 * Parse the input data
143	 *
144	 * @return bool true on success, false on failure
145	 */
146	public function parse()
147	{
148		while ($this->state && $this->state !== 'emit' && $this->has_data())
149		{
150			$state = $this->state;
151			$this->$state();
152		}
153		$this->data = '';
154		if ($this->state === 'emit' || $this->state === 'body')
155		{
156			return true;
157		}
158
159		$this->http_version = '';
160		$this->status_code = '';
161		$this->reason = '';
162		$this->headers = array();
163		$this->body = '';
164		return false;
165	}
166
167	/**
168	 * Check whether there is data beyond the pointer
169	 *
170	 * @return bool true if there is further data, false if not
171	 */
172	protected function has_data()
173	{
174		return (bool) ($this->position < $this->data_length);
175	}
176
177	/**
178	 * See if the next character is LWS
179	 *
180	 * @return bool true if the next character is LWS, false if not
181	 */
182	protected function is_linear_whitespace()
183	{
184		return (bool) ($this->data[$this->position] === "\x09"
185			|| $this->data[$this->position] === "\x20"
186			|| ($this->data[$this->position] === "\x0A"
187				&& isset($this->data[$this->position + 1])
188				&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
189	}
190
191	/**
192	 * Parse the HTTP version
193	 */
194	protected function http_version()
195	{
196		if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
197		{
198			$len = strspn($this->data, '0123456789.', 5);
199			$this->http_version = substr($this->data, 5, $len);
200			$this->position += 5 + $len;
201			if (substr_count($this->http_version, '.') <= 1)
202			{
203				$this->http_version = (float) $this->http_version;
204				$this->position += strspn($this->data, "\x09\x20", $this->position);
205				$this->state = 'status';
206			}
207			else
208			{
209				$this->state = false;
210			}
211		}
212		else
213		{
214			$this->state = false;
215		}
216	}
217
218	/**
219	 * Parse the status code
220	 */
221	protected function status()
222	{
223		if ($len = strspn($this->data, '0123456789', $this->position))
224		{
225			$this->status_code = (int) substr($this->data, $this->position, $len);
226			$this->position += $len;
227			$this->state = 'reason';
228		}
229		else
230		{
231			$this->state = false;
232		}
233	}
234
235	/**
236	 * Parse the reason phrase
237	 */
238	protected function reason()
239	{
240		$len = strcspn($this->data, "\x0A", $this->position);
241		$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
242		$this->position += $len + 1;
243		$this->state = 'new_line';
244	}
245
246	/**
247	 * Deal with a new line, shifting data around as needed
248	 */
249	protected function new_line()
250	{
251		$this->value = trim($this->value, "\x0D\x20");
252		if ($this->name !== '' && $this->value !== '')
253		{
254			$this->name = strtolower($this->name);
255			// We should only use the last Content-Type header. c.f. issue #1
256			if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
257			{
258				$this->headers[$this->name] .= ', ' . $this->value;
259			}
260			else
261			{
262				$this->headers[$this->name] = $this->value;
263			}
264		}
265		$this->name = '';
266		$this->value = '';
267		if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
268		{
269			$this->position += 2;
270			$this->state = 'body';
271		}
272		elseif ($this->data[$this->position] === "\x0A")
273		{
274			$this->position++;
275			$this->state = 'body';
276		}
277		else
278		{
279			$this->state = 'name';
280		}
281	}
282
283	/**
284	 * Parse a header name
285	 */
286	protected function name()
287	{
288		$len = strcspn($this->data, "\x0A:", $this->position);
289		if (isset($this->data[$this->position + $len]))
290		{
291			if ($this->data[$this->position + $len] === "\x0A")
292			{
293				$this->position += $len;
294				$this->state = 'new_line';
295			}
296			else
297			{
298				$this->name = substr($this->data, $this->position, $len);
299				$this->position += $len + 1;
300				$this->state = 'value';
301			}
302		}
303		else
304		{
305			$this->state = false;
306		}
307	}
308
309	/**
310	 * Parse LWS, replacing consecutive LWS characters with a single space
311	 */
312	protected function linear_whitespace()
313	{
314		do
315		{
316			if (substr($this->data, $this->position, 2) === "\x0D\x0A")
317			{
318				$this->position += 2;
319			}
320			elseif ($this->data[$this->position] === "\x0A")
321			{
322				$this->position++;
323			}
324			$this->position += strspn($this->data, "\x09\x20", $this->position);
325		} while ($this->has_data() && $this->is_linear_whitespace());
326		$this->value .= "\x20";
327	}
328
329	/**
330	 * See what state to move to while within non-quoted header values
331	 */
332	protected function value()
333	{
334		if ($this->is_linear_whitespace())
335		{
336			$this->linear_whitespace();
337		}
338		else
339		{
340			switch ($this->data[$this->position])
341			{
342				case '"':
343					// Workaround for ETags: we have to include the quotes as
344					// part of the tag.
345					if (strtolower($this->name) === 'etag')
346					{
347						$this->value .= '"';
348						$this->position++;
349						$this->state = 'value_char';
350						break;
351					}
352					$this->position++;
353					$this->state = 'quote';
354					break;
355
356				case "\x0A":
357					$this->position++;
358					$this->state = 'new_line';
359					break;
360
361				default:
362					$this->state = 'value_char';
363					break;
364			}
365		}
366	}
367
368	/**
369	 * Parse a header value while outside quotes
370	 */
371	protected function value_char()
372	{
373		$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
374		$this->value .= substr($this->data, $this->position, $len);
375		$this->position += $len;
376		$this->state = 'value';
377	}
378
379	/**
380	 * See what state to move to while within quoted header values
381	 */
382	protected function quote()
383	{
384		if ($this->is_linear_whitespace())
385		{
386			$this->linear_whitespace();
387		}
388		else
389		{
390			switch ($this->data[$this->position])
391			{
392				case '"':
393					$this->position++;
394					$this->state = 'value';
395					break;
396
397				case "\x0A":
398					$this->position++;
399					$this->state = 'new_line';
400					break;
401
402				case '\\':
403					$this->position++;
404					$this->state = 'quote_escaped';
405					break;
406
407				default:
408					$this->state = 'quote_char';
409					break;
410			}
411		}
412	}
413
414	/**
415	 * Parse a header value while within quotes
416	 */
417	protected function quote_char()
418	{
419		$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
420		$this->value .= substr($this->data, $this->position, $len);
421		$this->position += $len;
422		$this->state = 'value';
423	}
424
425	/**
426	 * Parse an escaped character within quotes
427	 */
428	protected function quote_escaped()
429	{
430		$this->value .= $this->data[$this->position];
431		$this->position++;
432		$this->state = 'quote';
433	}
434
435	/**
436	 * Parse the body
437	 */
438	protected function body()
439	{
440		$this->body = substr($this->data, $this->position);
441		if (!empty($this->headers['transfer-encoding']))
442		{
443			unset($this->headers['transfer-encoding']);
444			$this->state = 'chunked';
445		}
446		else
447		{
448			$this->state = 'emit';
449		}
450	}
451
452	/**
453	 * Parsed a "Transfer-Encoding: chunked" body
454	 */
455	protected function chunked()
456	{
457		if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
458		{
459			$this->state = 'emit';
460			return;
461		}
462
463		$decoded = '';
464		$encoded = $this->body;
465
466		while (true)
467		{
468			$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
469			if (!$is_chunked)
470			{
471				// Looks like it's not chunked after all
472				$this->state = 'emit';
473				return;
474			}
475
476			$length = hexdec(trim($matches[1]));
477			if ($length === 0)
478			{
479				// Ignore trailer headers
480				$this->state = 'emit';
481				$this->body = $decoded;
482				return;
483			}
484
485			$chunk_length = strlen($matches[0]);
486			$decoded .= $part = substr($encoded, $chunk_length, $length);
487			$encoded = substr($encoded, $chunk_length + $length + 2);
488
489			if (trim($encoded) === '0' || empty($encoded))
490			{
491				$this->state = 'emit';
492				$this->body = $decoded;
493				return;
494			}
495		}
496	}
497
498	/**
499	 * Prepare headers (take care of proxies headers)
500	 *
501	 * @param string  $headers Raw headers
502	 * @param integer $count   Redirection count. Default to 1.
503	 *
504	 * @return string
505	 */
506	static public function prepareHeaders($headers, $count = 1)
507	{
508		$data = explode("\r\n\r\n", $headers, $count);
509		$data = array_pop($data);
510		if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n\r\n")) {
511			$data = str_ireplace("HTTP/1.0 200 Connection established\r\n\r\n", '', $data);
512		}
513		if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n\r\n")) {
514			$data = str_ireplace("HTTP/1.1 200 Connection established\r\n\r\n", '', $data);
515		}
516		return $data;
517	}
518}
519