1<?php
2
3/*************************************************
4
5Snoopy - the PHP net client
6Author: Monte Ohrt <monte@ispi.net>
7Copyright (c): 1999-2008 New Digital Group, all rights reserved
8Version: 1.2.4
9
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24You may contact the author of Snoopy by e-mail at:
25monte@ohrt.com
26
27The latest version of Snoopy can be obtained from:
28http://snoopy.sourceforge.net/
29
30*************************************************/
31
32class Snoopy
33{
34	/**** Public variables ****/
35
36	/* user definable vars */
37
38	var $host			=	"www.php.net";		// host name we are connecting to
39	var $port			=	80;					// port we are connecting to
40	var $proxy_host		=	"";					// proxy host to use
41	var $proxy_port		=	"";					// proxy port to use
42	var $proxy_user		=	"";					// proxy user to use
43	var $proxy_pass		=	"";					// proxy password to use
44
45	var $agent			=	"Snoopy v1.2.4";	// agent we masquerade as
46	var	$referer		=	"";					// referer info to pass
47	var $cookies		=	array();			// array of cookies to pass
48												// $cookies["username"]="joe";
49	var	$rawheaders		=	array();			// array of raw headers to send
50												// $rawheaders["Content-type"]="text/html";
51
52	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
53	var $lastredirectaddr	=	"";				// contains address of last redirected address
54	var	$offsiteok		=	true;				// allows redirection off-site
55	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
56	var $expandlinks	=	true;				// expand links to fully qualified URLs.
57												// this only applies to fetchlinks()
58												// submitlinks(), and submittext()
59	var $passcookies	=	true;				// pass set cookies back through redirects
60												// NOTE: this currently does not respect
61												// dates, domains or paths.
62
63	var	$user			=	"";					// user for http authentication
64	var	$pass			=	"";					// password for http authentication
65
66	// http accept types
67	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
68
69	var $results		=	"";					// where the content is put
70
71	var $error			=	"";					// error messages sent here
72	var	$response_code	=	"";					// response code returned from server
73	var	$headers		=	array();			// headers returned from server sent here
74	var	$maxlength		=	500000;				// max return data length (body)
75	var $read_timeout	=	0;					// timeout on read operations, in seconds
76												// supported only since PHP 4 Beta 4
77												// set to 0 to disallow timeouts
78	var $timed_out		=	false;				// if a read operation timed out
79	var	$status			=	0;					// http request status
80
81	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
82												// has permission to write to.
83												// under Windows, this should be C:\temp
84
85	var	$curl_path		=	"/usr/local/bin/curl";
86												// Snoopy will use cURL for fetching
87												// SSL content if a full system path to
88												// the cURL binary is supplied here.
89												// set to false if you do not have
90												// cURL installed. See http://curl.haxx.se
91												// for details on installing cURL.
92												// Snoopy does *not* use the cURL
93												// library functions built into php,
94												// as these functions are not stable
95												// as of this Snoopy release.
96
97	/**** Private variables ****/
98
99	var	$_maxlinelen	=	4096;				// max line length (headers)
100
101	var $_httpmethod	=	"GET";				// default http request method
102	var $_httpversion	=	"HTTP/1.0";			// default http request version
103	var $_submit_method	=	"POST";				// default submit method
104	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
105	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
106	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
107	var $_redirectdepth	=	0;					// increments on an http redirect
108	var $_frameurls		= 	array();			// frame src urls
109	var $_framedepth	=	0;					// increments on frame depth
110
111	var $_isproxy		=	false;				// set if using a proxy server
112	var $_fp_timeout	=	30;					// timeout for socket connection
113
114/*======================================================================*\
115	Function:	fetch
116	Purpose:	fetch the contents of a web page
117				(and possibly other protocols in the
118				future like ftp, nntp, gopher, etc.)
119	Input:		$URI	the location of the page to fetch
120	Output:		$this->results	the output text from the fetch
121\*======================================================================*/
122
123	function fetch($URI)
124	{
125
126		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
127		$URI_PARTS = parse_url($URI);
128		if (!empty($URI_PARTS["user"]))
129			$this->user = $URI_PARTS["user"];
130		if (!empty($URI_PARTS["pass"]))
131			$this->pass = $URI_PARTS["pass"];
132		if (empty($URI_PARTS["query"]))
133			$URI_PARTS["query"] = '';
134		if (empty($URI_PARTS["path"]))
135			$URI_PARTS["path"] = '';
136
137		switch(strtolower($URI_PARTS["scheme"]))
138		{
139			case "http":
140				$this->host = $URI_PARTS["host"];
141				if(!empty($URI_PARTS["port"]))
142					$this->port = $URI_PARTS["port"];
143				if($this->_connect($fp))
144				{
145					if($this->_isproxy)
146					{
147						// using proxy, send entire URI
148						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
149					}
150					else
151					{
152						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
153						// no proxy, send only the path
154						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155					}
156
157					$this->_disconnect($fp);
158
159					if($this->_redirectaddr)
160					{
161						/* url was redirected, check if we've hit the max depth */
162						if($this->maxredirs > $this->_redirectdepth)
163						{
164							// only follow redirect if it's on this site, or offsiteok is true
165							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
166							{
167								/* follow the redirect */
168								$this->_redirectdepth++;
169								$this->lastredirectaddr=$this->_redirectaddr;
170								$this->fetch($this->_redirectaddr);
171							}
172						}
173					}
174
175					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
176					{
177						$frameurls = $this->_frameurls;
178						$this->_frameurls = array();
179
180						while(list(,$frameurl) = each($frameurls))
181						{
182							if($this->_framedepth < $this->maxframes)
183							{
184								$this->fetch($frameurl);
185								$this->_framedepth++;
186							}
187							else
188								break;
189						}
190					}
191				}
192				else
193				{
194					return false;
195				}
196				return true;
197				break;
198			case "https":
199				if(!$this->curl_path)
200					return false;
201				if(function_exists("is_executable"))
202				    if (!is_executable($this->curl_path))
203				        return false;
204				$this->host = $URI_PARTS["host"];
205				if(!empty($URI_PARTS["port"]))
206					$this->port = $URI_PARTS["port"];
207				if($this->_isproxy)
208				{
209					// using proxy, send entire URI
210					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
211				}
212				else
213				{
214					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
215					// no proxy, send only the path
216					$this->_httpsrequest($path, $URI, $this->_httpmethod);
217				}
218
219				if($this->_redirectaddr)
220				{
221					/* url was redirected, check if we've hit the max depth */
222					if($this->maxredirs > $this->_redirectdepth)
223					{
224						// only follow redirect if it's on this site, or offsiteok is true
225						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
226						{
227							/* follow the redirect */
228							$this->_redirectdepth++;
229							$this->lastredirectaddr=$this->_redirectaddr;
230							$this->fetch($this->_redirectaddr);
231						}
232					}
233				}
234
235				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
236				{
237					$frameurls = $this->_frameurls;
238					$this->_frameurls = array();
239
240					while(list(,$frameurl) = each($frameurls))
241					{
242						if($this->_framedepth < $this->maxframes)
243						{
244							$this->fetch($frameurl);
245							$this->_framedepth++;
246						}
247						else
248							break;
249					}
250				}
251				return true;
252				break;
253			default:
254				// not a valid protocol
255				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
256				return false;
257				break;
258		}
259		return true;
260	}
261
262/*======================================================================*\
263	Function:	submit
264	Purpose:	submit an http form
265	Input:		$URI	the location to post the data
266				$formvars	the formvars to use.
267					format: $formvars["var"] = "val";
268				$formfiles  an array of files to submit
269					format: $formfiles["var"] = "/dir/filename.ext";
270	Output:		$this->results	the text output from the post
271\*======================================================================*/
272
273	function submit($URI, $formvars="", $formfiles="")
274	{
275		unset($postdata);
276
277		$postdata = $this->_prepare_post_body($formvars, $formfiles);
278
279		$URI_PARTS = parse_url($URI);
280		if (!empty($URI_PARTS["user"]))
281			$this->user = $URI_PARTS["user"];
282		if (!empty($URI_PARTS["pass"]))
283			$this->pass = $URI_PARTS["pass"];
284		if (empty($URI_PARTS["query"]))
285			$URI_PARTS["query"] = '';
286		if (empty($URI_PARTS["path"]))
287			$URI_PARTS["path"] = '';
288
289		switch(strtolower($URI_PARTS["scheme"]))
290		{
291			case "http":
292				$this->host = $URI_PARTS["host"];
293				if(!empty($URI_PARTS["port"]))
294					$this->port = $URI_PARTS["port"];
295				if($this->_connect($fp))
296				{
297					if($this->_isproxy)
298					{
299						// using proxy, send entire URI
300						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
301					}
302					else
303					{
304						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
305						// no proxy, send only the path
306						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
307					}
308
309					$this->_disconnect($fp);
310
311					if($this->_redirectaddr)
312					{
313						/* url was redirected, check if we've hit the max depth */
314						if($this->maxredirs > $this->_redirectdepth)
315						{
316							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
317								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
318
319							// only follow redirect if it's on this site, or offsiteok is true
320							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
321							{
322								/* follow the redirect */
323								$this->_redirectdepth++;
324								$this->lastredirectaddr=$this->_redirectaddr;
325								if( strpos( $this->_redirectaddr, "?" ) > 0 )
326									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
327								else
328									$this->submit($this->_redirectaddr,$formvars, $formfiles);
329							}
330						}
331					}
332
333					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
334					{
335						$frameurls = $this->_frameurls;
336						$this->_frameurls = array();
337
338						while(list(,$frameurl) = each($frameurls))
339						{
340							if($this->_framedepth < $this->maxframes)
341							{
342								$this->fetch($frameurl);
343								$this->_framedepth++;
344							}
345							else
346								break;
347						}
348					}
349
350				}
351				else
352				{
353					return false;
354				}
355				return true;
356				break;
357			case "https":
358				if(!$this->curl_path)
359					return false;
360				if(function_exists("is_executable"))
361				    if (!is_executable($this->curl_path))
362				        return false;
363				$this->host = $URI_PARTS["host"];
364				if(!empty($URI_PARTS["port"]))
365					$this->port = $URI_PARTS["port"];
366				if($this->_isproxy)
367				{
368					// using proxy, send entire URI
369					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
370				}
371				else
372				{
373					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
374					// no proxy, send only the path
375					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376				}
377
378				if($this->_redirectaddr)
379				{
380					/* url was redirected, check if we've hit the max depth */
381					if($this->maxredirs > $this->_redirectdepth)
382					{
383						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
384							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
385
386						// only follow redirect if it's on this site, or offsiteok is true
387						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
388						{
389							/* follow the redirect */
390							$this->_redirectdepth++;
391							$this->lastredirectaddr=$this->_redirectaddr;
392							if( strpos( $this->_redirectaddr, "?" ) > 0 )
393								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
394							else
395								$this->submit($this->_redirectaddr,$formvars, $formfiles);
396						}
397					}
398				}
399
400				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
401				{
402					$frameurls = $this->_frameurls;
403					$this->_frameurls = array();
404
405					while(list(,$frameurl) = each($frameurls))
406					{
407						if($this->_framedepth < $this->maxframes)
408						{
409							$this->fetch($frameurl);
410							$this->_framedepth++;
411						}
412						else
413							break;
414					}
415				}
416				return true;
417				break;
418
419			default:
420				// not a valid protocol
421				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
422				return false;
423				break;
424		}
425		return true;
426	}
427
428/*======================================================================*\
429	Function:	fetchlinks
430	Purpose:	fetch the links from a web page
431	Input:		$URI	where you are fetching from
432	Output:		$this->results	an array of the URLs
433\*======================================================================*/
434
435	function fetchlinks($URI)
436	{
437		if ($this->fetch($URI))
438		{
439			if($this->lastredirectaddr)
440				$URI = $this->lastredirectaddr;
441			if(is_array($this->results))
442			{
443				for($x=0;$x<count($this->results);$x++)
444					$this->results[$x] = $this->_striplinks($this->results[$x]);
445			}
446			else
447				$this->results = $this->_striplinks($this->results);
448
449			if($this->expandlinks)
450				$this->results = $this->_expandlinks($this->results, $URI);
451			return true;
452		}
453		else
454			return false;
455	}
456
457/*======================================================================*\
458	Function:	fetchform
459	Purpose:	fetch the form elements from a web page
460	Input:		$URI	where you are fetching from
461	Output:		$this->results	the resulting html form
462\*======================================================================*/
463
464	function fetchform($URI)
465	{
466
467		if ($this->fetch($URI))
468		{
469
470			if(is_array($this->results))
471			{
472				for($x=0;$x<count($this->results);$x++)
473					$this->results[$x] = $this->_stripform($this->results[$x]);
474			}
475			else
476				$this->results = $this->_stripform($this->results);
477
478			return true;
479		}
480		else
481			return false;
482	}
483
484
485/*======================================================================*\
486	Function:	fetchtext
487	Purpose:	fetch the text from a web page, stripping the links
488	Input:		$URI	where you are fetching from
489	Output:		$this->results	the text from the web page
490\*======================================================================*/
491
492	function fetchtext($URI)
493	{
494		if($this->fetch($URI))
495		{
496			if(is_array($this->results))
497			{
498				for($x=0;$x<count($this->results);$x++)
499					$this->results[$x] = $this->_striptext($this->results[$x]);
500			}
501			else
502				$this->results = $this->_striptext($this->results);
503			return true;
504		}
505		else
506			return false;
507	}
508
509/*======================================================================*\
510	Function:	submitlinks
511	Purpose:	grab links from a form submission
512	Input:		$URI	where you are submitting from
513	Output:		$this->results	an array of the links from the post
514\*======================================================================*/
515
516	function submitlinks($URI, $formvars="", $formfiles="")
517	{
518		if($this->submit($URI,$formvars, $formfiles))
519		{
520			if($this->lastredirectaddr)
521				$URI = $this->lastredirectaddr;
522			if(is_array($this->results))
523			{
524				for($x=0;$x<count($this->results);$x++)
525				{
526					$this->results[$x] = $this->_striplinks($this->results[$x]);
527					if($this->expandlinks)
528						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
529				}
530			}
531			else
532			{
533				$this->results = $this->_striplinks($this->results);
534				if($this->expandlinks)
535					$this->results = $this->_expandlinks($this->results,$URI);
536			}
537			return true;
538		}
539		else
540			return false;
541	}
542
543/*======================================================================*\
544	Function:	submittext
545	Purpose:	grab text from a form submission
546	Input:		$URI	where you are submitting from
547	Output:		$this->results	the text from the web page
548\*======================================================================*/
549
550	function submittext($URI, $formvars = "", $formfiles = "")
551	{
552		if($this->submit($URI,$formvars, $formfiles))
553		{
554			if($this->lastredirectaddr)
555				$URI = $this->lastredirectaddr;
556			if(is_array($this->results))
557			{
558				for($x=0;$x<count($this->results);$x++)
559				{
560					$this->results[$x] = $this->_striptext($this->results[$x]);
561					if($this->expandlinks)
562						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
563				}
564			}
565			else
566			{
567				$this->results = $this->_striptext($this->results);
568				if($this->expandlinks)
569					$this->results = $this->_expandlinks($this->results,$URI);
570			}
571			return true;
572		}
573		else
574			return false;
575	}
576
577
578
579/*======================================================================*\
580	Function:	set_submit_multipart
581	Purpose:	Set the form submission content type to
582				multipart/form-data
583\*======================================================================*/
584	function set_submit_multipart()
585	{
586		$this->_submit_type = "multipart/form-data";
587	}
588
589
590/*======================================================================*\
591	Function:	set_submit_normal
592	Purpose:	Set the form submission content type to
593				application/x-www-form-urlencoded
594\*======================================================================*/
595	function set_submit_normal()
596	{
597		$this->_submit_type = "application/x-www-form-urlencoded";
598	}
599
600
601
602
603/*======================================================================*\
604	Private functions
605\*======================================================================*/
606
607
608/*======================================================================*\
609	Function:	_striplinks
610	Purpose:	strip the hyperlinks from an html document
611	Input:		$document	document to strip.
612	Output:		$match		an array of the links
613\*======================================================================*/
614
615	function _striplinks($document)
616	{
617		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
618						([\"\'])?					# find single or double quote
619						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
620													# quote, otherwise match up to next space
621						'isx",$document,$links);
622
623
624		// catenate the non-empty matches from the conditional subpattern
625
626		while(list($key,$val) = each($links[2]))
627		{
628			if(!empty($val))
629				$match[] = $val;
630		}
631
632		while(list($key,$val) = each($links[3]))
633		{
634			if(!empty($val))
635				$match[] = $val;
636		}
637
638		// return the links
639		return $match;
640	}
641
642/*======================================================================*\
643	Function:	_stripform
644	Purpose:	strip the form elements from an html document
645	Input:		$document	document to strip.
646	Output:		$match		an array of the links
647\*======================================================================*/
648
649	function _stripform($document)
650	{
651		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
652
653		// catenate the matches
654		$match = implode("\r\n",$elements[0]);
655
656		// return the links
657		return $match;
658	}
659
660
661
662/*======================================================================*\
663	Function:	_striptext
664	Purpose:	strip the text from an html document
665	Input:		$document	document to strip.
666	Output:		$text		the resulting text
667\*======================================================================*/
668
669	function _striptext($document)
670	{
671
672		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
673		// so, list your entities one by one here. I included some of the
674		// more common ones.
675
676		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
677						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
678						"'([\r\n])[\s]+'",					// strip out white space
679						"'&(quot|#34|#034|#x22);'i",		// replace html entities
680						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
681						"'&(lt|#60|#060|#x3c);'i",
682						"'&(gt|#62|#062|#x3e);'i",
683						"'&(nbsp|#160|#xa0);'i",
684						"'&(iexcl|#161);'i",
685						"'&(cent|#162);'i",
686						"'&(pound|#163);'i",
687						"'&(copy|#169);'i",
688						"'&(reg|#174);'i",
689						"'&(deg|#176);'i",
690						"'&(#39|#039|#x27);'",
691						"'&(euro|#8364);'i",				// europe
692						"'&a(uml|UML);'",					// german
693						"'&o(uml|UML);'",
694						"'&u(uml|UML);'",
695						"'&A(uml|UML);'",
696						"'&O(uml|UML);'",
697						"'&U(uml|UML);'",
698						"'&szlig;'i",
699						);
700		$replace = array(	"",
701							"",
702							"\\1",
703							"\"",
704							"&",
705							"<",
706							">",
707							" ",
708							chr(161),
709							chr(162),
710							chr(163),
711							chr(169),
712							chr(174),
713							chr(176),
714							chr(39),
715							chr(128),
716							"�",
717							"�",
718							"�",
719							"�",
720							"�",
721							"�",
722							"�",
723						);
724
725		$text = preg_replace($search,$replace,$document);
726
727		return $text;
728	}
729
730/*======================================================================*\
731	Function:	_expandlinks
732	Purpose:	expand each link into a fully qualified URL
733	Input:		$links			the links to qualify
734				$URI			the full URI to get the base from
735	Output:		$expandedLinks	the expanded links
736\*======================================================================*/
737
738	function _expandlinks($links,$URI)
739	{
740
741		preg_match("/^[^\?]+/",$URI,$match);
742
743		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
744		$match = preg_replace("|/$|","",$match);
745		$match_part = parse_url($match);
746		$match_root =
747		$match_part["scheme"]."://".$match_part["host"];
748
749		$search = array( 	"|^http://".preg_quote($this->host)."|i",
750							"|^(\/)|i",
751							"|^(?!http://)(?!mailto:)|i",
752							"|/\./|",
753							"|/[^\/]+/\.\./|"
754						);
755
756		$replace = array(	"",
757							$match_root."/",
758							$match."/",
759							"/",
760							"/"
761						);
762
763		$expandedLinks = preg_replace($search,$replace,$links);
764
765		return $expandedLinks;
766	}
767
768/*======================================================================*\
769	Function:	_httprequest
770	Purpose:	go get the http data from the server
771	Input:		$url		the url to fetch
772				$fp			the current open file pointer
773				$URI		the full URI
774				$body		body contents to send if any (POST)
775	Output:
776\*======================================================================*/
777
778	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
779	{
780		$cookie_headers = '';
781		if($this->passcookies && $this->_redirectaddr)
782			$this->setcookies();
783
784		$URI_PARTS = parse_url($URI);
785		if(empty($url))
786			$url = "/";
787		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
788		if(!empty($this->agent))
789			$headers .= "User-Agent: ".$this->agent."\r\n";
790		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
791			$headers .= "Host: ".$this->host;
792			if(!empty($this->port))
793				$headers .= ":".$this->port;
794			$headers .= "\r\n";
795		}
796		if(!empty($this->accept))
797			$headers .= "Accept: ".$this->accept."\r\n";
798		if(!empty($this->referer))
799			$headers .= "Referer: ".$this->referer."\r\n";
800		if(!empty($this->cookies))
801		{
802			if(!is_array($this->cookies))
803				$this->cookies = (array)$this->cookies;
804
805			reset($this->cookies);
806			if ( count($this->cookies) > 0 ) {
807				$cookie_headers .= 'Cookie: ';
808				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
809				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
810				}
811				$headers .= substr($cookie_headers,0,-2) . "\r\n";
812			}
813		}
814		if(!empty($this->rawheaders))
815		{
816			if(!is_array($this->rawheaders))
817				$this->rawheaders = (array)$this->rawheaders;
818			while(list($headerKey,$headerVal) = each($this->rawheaders))
819				$headers .= $headerKey.": ".$headerVal."\r\n";
820		}
821		if(!empty($content_type)) {
822			$headers .= "Content-type: $content_type";
823			if ($content_type == "multipart/form-data")
824				$headers .= "; boundary=".$this->_mime_boundary;
825			$headers .= "\r\n";
826		}
827		if(!empty($body))
828			$headers .= "Content-length: ".strlen($body)."\r\n";
829		if(!empty($this->user) || !empty($this->pass))
830			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
831
832		//add proxy auth headers
833		if(!empty($this->proxy_user))
834			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
835
836
837		$headers .= "\r\n";
838
839		// set the read timeout if needed
840		if ($this->read_timeout > 0)
841			socket_set_timeout($fp, $this->read_timeout);
842		$this->timed_out = false;
843
844		fwrite($fp,$headers.$body,strlen($headers.$body));
845
846		$this->_redirectaddr = false;
847		unset($this->headers);
848
849		while($currentHeader = fgets($fp,$this->_maxlinelen))
850		{
851			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
852			{
853				$this->status=-100;
854				return false;
855			}
856
857			if($currentHeader == "\r\n")
858				break;
859
860			// if a header begins with Location: or URI:, set the redirect
861			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
862			{
863				// get URL portion of the redirect
864				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
865				// look for :// in the Location header to see if hostname is included
866				if(!preg_match("|\:\/\/|",$matches[2]))
867				{
868					// no host in the path, so prepend
869					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
870					// eliminate double slash
871					if(!preg_match("|^/|",$matches[2]))
872							$this->_redirectaddr .= "/".$matches[2];
873					else
874							$this->_redirectaddr .= $matches[2];
875				}
876				else
877					$this->_redirectaddr = $matches[2];
878			}
879
880			if(preg_match("|^HTTP/|",$currentHeader))
881			{
882                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
883				{
884					$this->status= $status[1];
885                }
886				$this->response_code = $currentHeader;
887			}
888
889			$this->headers[] = $currentHeader;
890		}
891
892		$results = '';
893		do {
894    		$_data = fread($fp, $this->maxlength);
895    		if (strlen($_data) == 0) {
896        		break;
897    		}
898    		$results .= $_data;
899		} while(true);
900
901		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
902		{
903			$this->status=-100;
904			return false;
905		}
906
907		// check if there is a a redirect meta tag
908
909		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
910
911		{
912			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
913		}
914
915		// have we hit our frame depth and is there frame src to fetch?
916		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
917		{
918			$this->results[] = $results;
919			for($x=0; $x<count($match[1]); $x++)
920				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
921		}
922		// have we already fetched framed content?
923		elseif(is_array($this->results))
924			$this->results[] = $results;
925		// no framed content
926		else
927			$this->results = $results;
928
929		return true;
930	}
931
932/*======================================================================*\
933	Function:	_httpsrequest
934	Purpose:	go get the https data from the server using curl
935	Input:		$url		the url to fetch
936				$URI		the full URI
937				$body		body contents to send if any (POST)
938	Output:
939\*======================================================================*/
940
941	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
942	{
943		if($this->passcookies && $this->_redirectaddr)
944			$this->setcookies();
945
946		$headers = array();
947
948		$URI_PARTS = parse_url($URI);
949		if(empty($url))
950			$url = "/";
951		// GET ... header not needed for curl
952		//$headers[] = $http_method." ".$url." ".$this->_httpversion;
953		if(!empty($this->agent))
954			$headers[] = "User-Agent: ".$this->agent;
955		if(!empty($this->host))
956			if(!empty($this->port))
957				$headers[] = "Host: ".$this->host.":".$this->port;
958			else
959				$headers[] = "Host: ".$this->host;
960		if(!empty($this->accept))
961			$headers[] = "Accept: ".$this->accept;
962		if(!empty($this->referer))
963			$headers[] = "Referer: ".$this->referer;
964		if(!empty($this->cookies))
965		{
966			if(!is_array($this->cookies))
967				$this->cookies = (array)$this->cookies;
968
969			reset($this->cookies);
970			if ( count($this->cookies) > 0 ) {
971				$cookie_str = 'Cookie: ';
972				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
973				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
974				}
975				$headers[] = substr($cookie_str,0,-2);
976			}
977		}
978		if(!empty($this->rawheaders))
979		{
980			if(!is_array($this->rawheaders))
981				$this->rawheaders = (array)$this->rawheaders;
982			while(list($headerKey,$headerVal) = each($this->rawheaders))
983				$headers[] = $headerKey.": ".$headerVal;
984		}
985		if(!empty($content_type)) {
986			if ($content_type == "multipart/form-data")
987				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
988			else
989				$headers[] = "Content-type: $content_type";
990		}
991		if(!empty($body))
992			$headers[] = "Content-length: ".strlen($body);
993		if(!empty($this->user) || !empty($this->pass))
994			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
995
996		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
997			$safer_header = strtr( $headers[$curr_header], "\"", " " );
998			$cmdline_params .= " -H \"".$safer_header."\"";
999		}
1000
1001		if(!empty($body))
1002			$cmdline_params .= " -d \"$body\"";
1003
1004		if($this->read_timeout > 0)
1005			$cmdline_params .= " -m ".$this->read_timeout;
1006
1007		$headerfile = tempnam($temp_dir, "sno");
1008
1009		exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010
1011		if($return)
1012		{
1013			$this->error = "Error: cURL could not retrieve the document, error $return.";
1014			return false;
1015		}
1016
1017
1018		$results = implode("\r\n",$results);
1019
1020		$result_headers = file("$headerfile");
1021
1022		$this->_redirectaddr = false;
1023		unset($this->headers);
1024
1025		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026		{
1027
1028			// if a header begins with Location: or URI:, set the redirect
1029			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030			{
1031				// get URL portion of the redirect
1032				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033				// look for :// in the Location header to see if hostname is included
1034				if(!preg_match("|\:\/\/|",$matches[2]))
1035				{
1036					// no host in the path, so prepend
1037					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038					// eliminate double slash
1039					if(!preg_match("|^/|",$matches[2]))
1040							$this->_redirectaddr .= "/".$matches[2];
1041					else
1042							$this->_redirectaddr .= $matches[2];
1043				}
1044				else
1045					$this->_redirectaddr = $matches[2];
1046			}
1047
1048			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049				$this->response_code = $result_headers[$currentHeader];
1050
1051			$this->headers[] = $result_headers[$currentHeader];
1052		}
1053
1054		// check if there is a a redirect meta tag
1055
1056		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057		{
1058			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1059		}
1060
1061		// have we hit our frame depth and is there frame src to fetch?
1062		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063		{
1064			$this->results[] = $results;
1065			for($x=0; $x<count($match[1]); $x++)
1066				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067		}
1068		// have we already fetched framed content?
1069		elseif(is_array($this->results))
1070			$this->results[] = $results;
1071		// no framed content
1072		else
1073			$this->results = $results;
1074
1075		unlink("$headerfile");
1076
1077		return true;
1078	}
1079
1080/*======================================================================*\
1081	Function:	setcookies()
1082	Purpose:	set cookies for a redirection
1083\*======================================================================*/
1084
1085	function setcookies()
1086	{
1087		for($x=0; $x<count($this->headers); $x++)
1088		{
1089		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090			$this->cookies[$match[1]] = urldecode($match[2]);
1091		}
1092	}
1093
1094
1095/*======================================================================*\
1096	Function:	_check_timeout
1097	Purpose:	checks whether timeout has occurred
1098	Input:		$fp	file pointer
1099\*======================================================================*/
1100
1101	function _check_timeout($fp)
1102	{
1103		if ($this->read_timeout > 0) {
1104			$fp_status = socket_get_status($fp);
1105			if ($fp_status["timed_out"]) {
1106				$this->timed_out = true;
1107				return true;
1108			}
1109		}
1110		return false;
1111	}
1112
1113/*======================================================================*\
1114	Function:	_connect
1115	Purpose:	make a socket connection
1116	Input:		$fp	file pointer
1117\*======================================================================*/
1118
1119	function _connect(&$fp)
1120	{
1121		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122			{
1123				$this->_isproxy = true;
1124
1125				$host = $this->proxy_host;
1126				$port = $this->proxy_port;
1127			}
1128		else
1129		{
1130			$host = $this->host;
1131			$port = $this->port;
1132		}
1133
1134		$this->status = 0;
1135
1136		if($fp = fsockopen(
1137					$host,
1138					$port,
1139					$errno,
1140					$errstr,
1141					$this->_fp_timeout
1142					))
1143		{
1144			// socket connection succeeded
1145
1146			return true;
1147		}
1148		else
1149		{
1150			// socket connection failed
1151			$this->status = $errno;
1152			switch($errno)
1153			{
1154				case -3:
1155					$this->error="socket creation failed (-3)";
1156				case -4:
1157					$this->error="dns lookup failure (-4)";
1158				case -5:
1159					$this->error="connection refused or timed out (-5)";
1160				default:
1161					$this->error="connection failed (".$errno.")";
1162			}
1163			return false;
1164		}
1165	}
1166/*======================================================================*\
1167	Function:	_disconnect
1168	Purpose:	disconnect a socket connection
1169	Input:		$fp	file pointer
1170\*======================================================================*/
1171
1172	function _disconnect($fp)
1173	{
1174		return(fclose($fp));
1175	}
1176
1177
1178/*======================================================================*\
1179	Function:	_prepare_post_body
1180	Purpose:	Prepare post body according to encoding type
1181	Input:		$formvars  - form variables
1182				$formfiles - form upload files
1183	Output:		post body
1184\*======================================================================*/
1185
1186	function _prepare_post_body($formvars, $formfiles)
1187	{
1188		settype($formvars, "array");
1189		settype($formfiles, "array");
1190		$postdata = '';
1191
1192		if (count($formvars) == 0 && count($formfiles) == 0)
1193			return;
1194
1195		switch ($this->_submit_type) {
1196			case "application/x-www-form-urlencoded":
1197				reset($formvars);
1198				while(list($key,$val) = each($formvars)) {
1199					if (is_array($val) || is_object($val)) {
1200						while (list($cur_key, $cur_val) = each($val)) {
1201							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202						}
1203					} else
1204						$postdata .= urlencode($key)."=".urlencode($val)."&";
1205				}
1206				break;
1207
1208			case "multipart/form-data":
1209				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210
1211				reset($formvars);
1212				while(list($key,$val) = each($formvars)) {
1213					if (is_array($val) || is_object($val)) {
1214						while (list($cur_key, $cur_val) = each($val)) {
1215							$postdata .= "--".$this->_mime_boundary."\r\n";
1216							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217							$postdata .= "$cur_val\r\n";
1218						}
1219					} else {
1220						$postdata .= "--".$this->_mime_boundary."\r\n";
1221						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222						$postdata .= "$val\r\n";
1223					}
1224				}
1225
1226				reset($formfiles);
1227				while (list($field_name, $file_names) = each($formfiles)) {
1228					settype($file_names, "array");
1229					while (list(, $file_name) = each($file_names)) {
1230						if (!is_readable($file_name)) continue;
1231
1232						$fp = fopen($file_name, "r");
1233						$file_content = fread($fp, filesize($file_name));
1234						fclose($fp);
1235						$base_name = basename($file_name);
1236
1237						$postdata .= "--".$this->_mime_boundary."\r\n";
1238						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239						$postdata .= "$file_content\r\n";
1240					}
1241				}
1242				$postdata .= "--".$this->_mime_boundary."--\r\n";
1243				break;
1244		}
1245
1246		return $postdata;
1247	}
1248}
1249
1250?>
1251