(file) Return to snoopy.class.php CVS log (file) (dir) Up to [RizwankCVS] / geekymedia_web / photo / inc

   1 rizwank 1.1 <?php
   2             
   3             /*************************************************
   4             
   5             Snoopy - the PHP net client
   6             Author: Monte Ohrt <monte@ispi.net>
   7             Copyright (c): 1999-2000 ispi, all rights reserved
   8             Version: 1.2
   9             
  10              * This library is free software; you can redistribute it and/or
  11              * modify it under the terms of the GNU Lesser General Public
  12              * License as published by the Free Software Foundation; either
  13              * version 2.1 of the License, or (at your option) any later version.
  14              *
  15              * This library is distributed in the hope that it will be useful,
  16              * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17              * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18              * Lesser General Public License for more details.
  19              *
  20              * You should have received a copy of the GNU Lesser General Public
  21              * License along with this library; if not, write to the Free Software
  22 rizwank 1.1  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23             
  24             You may contact the author of Snoopy by e-mail at:
  25             monte@ispi.net
  26             
  27             Or, write to:
  28             Monte Ohrt
  29             CTO, ispi
  30             237 S. 70th suite 220
  31             Lincoln, NE 68510
  32             
  33             The latest version of Snoopy can be obtained from:
  34             http://snoopy.sourceforge.net/
  35             
  36             *************************************************/
  37             
  38             class Snoopy
  39             {
  40             	/**** Public variables ****/
  41             	
  42             	/* user definable vars */
  43 rizwank 1.1 
  44             	var $host			=	"www.php.net";		// host name we are connecting to
  45             	var $port			=	80;					// port we are connecting to
  46             	var $proxy_host		=	"";					// proxy host to use
  47             	var $proxy_port		=	"";					// proxy port to use
  48             	var $proxy_user		=	"";					// proxy user to use
  49             	var $proxy_pass		=	"";					// proxy password to use
  50             	
  51             	var $agent			=	"Snoopy v1.2";		// agent we masquerade as
  52             	var	$referer		=	"";					// referer info to pass
  53             	var $cookies		=	array();			// array of cookies to pass
  54             												// $cookies["username"]="joe";
  55             	var	$rawheaders		=	array();			// array of raw headers to send
  56             												// $rawheaders["Content-type"]="text/html";
  57             
  58             	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
  59             	var $lastredirectaddr	=	"";				// contains address of last redirected address
  60             	var	$offsiteok		=	true;				// allows redirection off-site
  61             	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
  62             	var $expandlinks	=	true;				// expand links to fully qualified URLs.
  63             												// this only applies to fetchlinks()
  64 rizwank 1.1 												// or submitlinks()
  65             	var $passcookies	=	true;				// pass set cookies back through redirects
  66             												// NOTE: this currently does not respect
  67             												// dates, domains or paths.
  68             	
  69             	var	$user			=	"";					// user for http authentication
  70             	var	$pass			=	"";					// password for http authentication
  71             	
  72             	// http accept types
  73             	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  74             	
  75             	var $results		=	"";					// where the content is put
  76             		
  77             	var $error			=	"";					// error messages sent here
  78             	var	$response_code	=	"";					// response code returned from server
  79             	var	$headers		=	array();			// headers returned from server sent here
  80             	var	$maxlength		=	500000;				// max return data length (body)
  81             	var $read_timeout	=	0;					// timeout on read operations, in seconds
  82             												// supported only since PHP 4 Beta 4
  83             												// set to 0 to disallow timeouts
  84             	var $timed_out		=	false;				// if a read operation timed out
  85 rizwank 1.1 	var	$status			=	0;					// http request status
  86             
  87             	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
  88             												// has permission to write to.
  89             												// under Windows, this should be C:\temp
  90             
  91             	var	$curl_path		=	"/usr/local/bin/curl";
  92             												// Snoopy will use cURL for fetching
  93             												// SSL content if a full system path to
  94             												// the cURL binary is supplied here.
  95             												// set to false if you do not have
  96             												// cURL installed. See http://curl.haxx.se
  97             												// for details on installing cURL.
  98             												// Snoopy does *not* use the cURL
  99             												// library functions built into php,
 100             												// as these functions are not stable
 101             												// as of this Snoopy release.
 102             	
 103             	/**** Private variables ****/	
 104             	
 105             	var	$_maxlinelen	=	4096;				// max line length (headers)
 106 rizwank 1.1 	
 107             	var $_httpmethod	=	"GET";				// default http request method
 108             	var $_httpversion	=	"HTTP/1.0";			// default http request version
 109             	var $_submit_method	=	"POST";				// default submit method
 110             	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
 111             	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
 112             	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
 113             	var $_redirectdepth	=	0;					// increments on an http redirect
 114             	var $_frameurls		= 	array();			// frame src urls
 115             	var $_framedepth	=	0;					// increments on frame depth
 116             	
 117             	var $_isproxy		=	false;				// set if using a proxy server
 118             	var $_fp_timeout	=	30;					// timeout for socket connection
 119             
 120             /*======================================================================*\
 121             	Function:	fetch
 122             	Purpose:	fetch the contents of a web page
 123             				(and possibly other protocols in the
 124             				future like ftp, nntp, gopher, etc.)
 125             	Input:		$URI	the location of the page to fetch
 126             	Output:		$this->results	the output text from the fetch
 127 rizwank 1.1 \*======================================================================*/
 128             
 129             	function fetch($URI)
 130             	{
 131             	
 132             		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 133             		$URI_PARTS = parse_url($URI);
 134             		if (!empty($URI_PARTS["user"]))
 135             			$this->user = $URI_PARTS["user"];
 136             		if (!empty($URI_PARTS["pass"]))
 137             			$this->pass = $URI_PARTS["pass"];
 138             		if (empty($URI_PARTS["query"]))
 139             			$URI_PARTS["query"] = '';
 140             				
 141             		switch($URI_PARTS["scheme"])
 142             		{
 143             			case "http":
 144             				$this->host = $URI_PARTS["host"];
 145             				if(!empty($URI_PARTS["port"]))
 146             					$this->port = $URI_PARTS["port"];
 147             				if($this->_connect($fp))
 148 rizwank 1.1 				{
 149             					if($this->_isproxy)
 150             					{
 151             						// using proxy, send entire URI
 152             						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 153             					}
 154             					else
 155             					{
 156             						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 157             						// no proxy, send only the path
 158             						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 159             					}
 160             					
 161             					$this->_disconnect($fp);
 162             
 163             					if($this->_redirectaddr)
 164             					{
 165             						/* url was redirected, check if we've hit the max depth */
 166             						if($this->maxredirs > $this->_redirectdepth)
 167             						{
 168             							// only follow redirect if it's on this site, or offsiteok is true
 169 rizwank 1.1 							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 170             							{
 171             								/* follow the redirect */
 172             								$this->_redirectdepth++;
 173             								$this->lastredirectaddr=$this->_redirectaddr;
 174             								$this->fetch($this->_redirectaddr);
 175             							}
 176             						}
 177             					}
 178             
 179             					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 180             					{
 181             						$frameurls = $this->_frameurls;
 182             						$this->_frameurls = array();
 183             						
 184             						while(list(,$frameurl) = each($frameurls))
 185             						{
 186             							if($this->_framedepth < $this->maxframes)
 187             							{
 188             								$this->fetch($frameurl);
 189             								$this->_framedepth++;
 190 rizwank 1.1 							}
 191             							else
 192             								break;
 193             						}
 194             					}					
 195             				}
 196             				else
 197             				{
 198             					return false;
 199             				}
 200             				return true;					
 201             				break;
 202             			case "https":
 203             				if(!$this->curl_path)
 204             					return false;
 205             				if(function_exists("is_executable"))
 206             				    if (!is_executable($this->curl_path))
 207             				        return false;
 208             				$this->host = $URI_PARTS["host"];
 209             				if(!empty($URI_PARTS["port"]))
 210             					$this->port = $URI_PARTS["port"];
 211 rizwank 1.1 				if($this->_isproxy)
 212             				{
 213             					// using proxy, send entire URI
 214             					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
 215             				}
 216             				else
 217             				{
 218             					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 219             					// no proxy, send only the path
 220             					$this->_httpsrequest($path, $URI, $this->_httpmethod);
 221             				}
 222             
 223             				if($this->_redirectaddr)
 224             				{
 225             					/* url was redirected, check if we've hit the max depth */
 226             					if($this->maxredirs > $this->_redirectdepth)
 227             					{
 228             						// only follow redirect if it's on this site, or offsiteok is true
 229             						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 230             						{
 231             							/* follow the redirect */
 232 rizwank 1.1 							$this->_redirectdepth++;
 233             							$this->lastredirectaddr=$this->_redirectaddr;
 234             							$this->fetch($this->_redirectaddr);
 235             						}
 236             					}
 237             				}
 238             
 239             				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 240             				{
 241             					$frameurls = $this->_frameurls;
 242             					$this->_frameurls = array();
 243             
 244             					while(list(,$frameurl) = each($frameurls))
 245             					{
 246             						if($this->_framedepth < $this->maxframes)
 247             						{
 248             							$this->fetch($frameurl);
 249             							$this->_framedepth++;
 250             						}
 251             						else
 252             							break;
 253 rizwank 1.1 					}
 254             				}					
 255             				return true;					
 256             				break;
 257             			default:
 258             				// not a valid protocol
 259             				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 260             				return false;
 261             				break;
 262             		}		
 263             		return true;
 264             	}
 265             
 266             /*======================================================================*\
 267             	Function:	submit
 268             	Purpose:	submit an http form
 269             	Input:		$URI	the location to post the data
 270             				$formvars	the formvars to use.
 271             					format: $formvars["var"] = "val";
 272             				$formfiles  an array of files to submit
 273             					format: $formfiles["var"] = "/dir/filename.ext";
 274 rizwank 1.1 	Output:		$this->results	the text output from the post
 275             \*======================================================================*/
 276             
 277             	function submit($URI, $formvars="", $formfiles="")
 278             	{
 279             		unset($postdata);
 280             		
 281             		$postdata = $this->_prepare_post_body($formvars, $formfiles);
 282             			
 283             		$URI_PARTS = parse_url($URI);
 284             		if (!empty($URI_PARTS["user"]))
 285             			$this->user = $URI_PARTS["user"];
 286             		if (!empty($URI_PARTS["pass"]))
 287             			$this->pass = $URI_PARTS["pass"];
 288             		if (empty($URI_PARTS["query"]))
 289             			$URI_PARTS["query"] = '';
 290             
 291             		switch($URI_PARTS["scheme"])
 292             		{
 293             			case "http":
 294             				$this->host = $URI_PARTS["host"];
 295 rizwank 1.1 				if(!empty($URI_PARTS["port"]))
 296             					$this->port = $URI_PARTS["port"];
 297             				if($this->_connect($fp))
 298             				{
 299             					if($this->_isproxy)
 300             					{
 301             						// using proxy, send entire URI
 302             						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 303             					}
 304             					else
 305             					{
 306             						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 307             						// no proxy, send only the path
 308             						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 309             					}
 310             					
 311             					$this->_disconnect($fp);
 312             
 313             					if($this->_redirectaddr)
 314             					{
 315             						/* url was redirected, check if we've hit the max depth */
 316 rizwank 1.1 						if($this->maxredirs > $this->_redirectdepth)
 317             						{						
 318             							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 319             								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
 320             							
 321             							// only follow redirect if it's on this site, or offsiteok is true
 322             							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 323             							{
 324             								/* follow the redirect */
 325             								$this->_redirectdepth++;
 326             								$this->lastredirectaddr=$this->_redirectaddr;
 327             								if( strpos( $this->_redirectaddr, "?" ) > 0 )
 328             									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 329             								else
 330             									$this->submit($this->_redirectaddr,$formvars, $formfiles);
 331             							}
 332             						}
 333             					}
 334             
 335             					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 336             					{
 337 rizwank 1.1 						$frameurls = $this->_frameurls;
 338             						$this->_frameurls = array();
 339             						
 340             						while(list(,$frameurl) = each($frameurls))
 341             						{														
 342             							if($this->_framedepth < $this->maxframes)
 343             							{
 344             								$this->fetch($frameurl);
 345             								$this->_framedepth++;
 346             							}
 347             							else
 348             								break;
 349             						}
 350             					}					
 351             					
 352             				}
 353             				else
 354             				{
 355             					return false;
 356             				}
 357             				return true;					
 358 rizwank 1.1 				break;
 359             			case "https":
 360             				if(!$this->curl_path)
 361             					return false;
 362             				if(function_exists("is_executable"))
 363             				    if (!is_executable($this->curl_path))
 364             				        return false;
 365             				$this->host = $URI_PARTS["host"];
 366             				if(!empty($URI_PARTS["port"]))
 367             					$this->port = $URI_PARTS["port"];
 368             				if($this->_isproxy)
 369             				{
 370             					// using proxy, send entire URI
 371             					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 372             				}
 373             				else
 374             				{
 375             					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 376             					// no proxy, send only the path
 377             					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 378             				}
 379 rizwank 1.1 
 380             				if($this->_redirectaddr)
 381             				{
 382             					/* url was redirected, check if we've hit the max depth */
 383             					if($this->maxredirs > $this->_redirectdepth)
 384             					{						
 385             						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 386             							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
 387             
 388             						// only follow redirect if it's on this site, or offsiteok is true
 389             						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 390             						{
 391             							/* follow the redirect */
 392             							$this->_redirectdepth++;
 393             							$this->lastredirectaddr=$this->_redirectaddr;
 394             							if( strpos( $this->_redirectaddr, "?" ) > 0 )
 395             								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 396             							else
 397             								$this->submit($this->_redirectaddr,$formvars, $formfiles);
 398             						}
 399             					}
 400 rizwank 1.1 				}
 401             
 402             				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 403             				{
 404             					$frameurls = $this->_frameurls;
 405             					$this->_frameurls = array();
 406             
 407             					while(list(,$frameurl) = each($frameurls))
 408             					{														
 409             						if($this->_framedepth < $this->maxframes)
 410             						{
 411             							$this->fetch($frameurl);
 412             							$this->_framedepth++;
 413             						}
 414             						else
 415             							break;
 416             					}
 417             				}					
 418             				return true;					
 419             				break;
 420             				
 421 rizwank 1.1 			default:
 422             				// not a valid protocol
 423             				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 424             				return false;
 425             				break;
 426             		}		
 427             		return true;
 428             	}
 429             
 430             /*======================================================================*\
 431             	Function:	fetchlinks
 432             	Purpose:	fetch the links from a web page
 433             	Input:		$URI	where you are fetching from
 434             	Output:		$this->results	an array of the URLs
 435             \*======================================================================*/
 436             
 437             	function fetchlinks($URI)
 438             	{
 439             		if ($this->fetch($URI))
 440             		{			
 441             
 442 rizwank 1.1 			if(is_array($this->results))
 443             			{
 444             				for($x=0;$x<count($this->results);$x++)
 445             					$this->results[$x] = $this->_striplinks($this->results[$x]);
 446             			}
 447             			else
 448             				$this->results = $this->_striplinks($this->results);
 449             
 450             			if($this->expandlinks)
 451             				$this->results = $this->_expandlinks($this->results, $URI);
 452             			return true;
 453             		}
 454             		else
 455             			return false;
 456             	}
 457             
 458             /*======================================================================*\
 459             	Function:	fetchform
 460             	Purpose:	fetch the form elements from a web page
 461             	Input:		$URI	where you are fetching from
 462             	Output:		$this->results	the resulting html form
 463 rizwank 1.1 \*======================================================================*/
 464             
 465             	function fetchform($URI)
 466             	{
 467             		
 468             		if ($this->fetch($URI))
 469             		{			
 470             
 471             			if(is_array($this->results))
 472             			{
 473             				for($x=0;$x<count($this->results);$x++)
 474             					$this->results[$x] = $this->_stripform($this->results[$x]);
 475             			}
 476             			else
 477             				$this->results = $this->_stripform($this->results);
 478             			
 479             			return true;
 480             		}
 481             		else
 482             			return false;
 483             	}
 484 rizwank 1.1 	
 485             	
 486             /*======================================================================*\
 487             	Function:	fetchtext
 488             	Purpose:	fetch the text from a web page, stripping the links
 489             	Input:		$URI	where you are fetching from
 490             	Output:		$this->results	the text from the web page
 491             \*======================================================================*/
 492             
 493             	function fetchtext($URI)
 494             	{
 495             		if($this->fetch($URI))
 496             		{			
 497             			if(is_array($this->results))
 498             			{
 499             				for($x=0;$x<count($this->results);$x++)
 500             					$this->results[$x] = $this->_striptext($this->results[$x]);
 501             			}
 502             			else
 503             				$this->results = $this->_striptext($this->results);
 504             			return true;
 505 rizwank 1.1 		}
 506             		else
 507             			return false;
 508             	}
 509             
 510             /*======================================================================*\
 511             	Function:	submitlinks
 512             	Purpose:	grab links from a form submission
 513             	Input:		$URI	where you are submitting from
 514             	Output:		$this->results	an array of the links from the post
 515             \*======================================================================*/
 516             
 517             	function submitlinks($URI, $formvars="", $formfiles="")
 518             	{
 519             		if($this->submit($URI,$formvars, $formfiles))
 520             		{			
 521             			if(is_array($this->results))
 522             			{
 523             				for($x=0;$x<count($this->results);$x++)
 524             				{
 525             					$this->results[$x] = $this->_striplinks($this->results[$x]);
 526 rizwank 1.1 					if($this->expandlinks)
 527             						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 528             				}
 529             			}
 530             			else
 531             			{
 532             				$this->results = $this->_striplinks($this->results);
 533             				if($this->expandlinks)
 534             					$this->results = $this->_expandlinks($this->results,$URI);
 535             			}
 536             			return true;
 537             		}
 538             		else
 539             			return false;
 540             	}
 541             
 542             /*======================================================================*\
 543             	Function:	submittext
 544             	Purpose:	grab text from a form submission
 545             	Input:		$URI	where you are submitting from
 546             	Output:		$this->results	the text from the web page
 547 rizwank 1.1 \*======================================================================*/
 548             
 549             	function submittext($URI, $formvars = "", $formfiles = "")
 550             	{
 551             		if($this->submit($URI,$formvars, $formfiles))
 552             		{			
 553             			if(is_array($this->results))
 554             			{
 555             				for($x=0;$x<count($this->results);$x++)
 556             				{
 557             					$this->results[$x] = $this->_striptext($this->results[$x]);
 558             					if($this->expandlinks)
 559             						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 560             				}
 561             			}
 562             			else
 563             			{
 564             				$this->results = $this->_striptext($this->results);
 565             				if($this->expandlinks)
 566             					$this->results = $this->_expandlinks($this->results,$URI);
 567             			}
 568 rizwank 1.1 			return true;
 569             		}
 570             		else
 571             			return false;
 572             	}
 573             
 574             	
 575             
 576             /*======================================================================*\
 577             	Function:	set_submit_multipart
 578             	Purpose:	Set the form submission content type to
 579             				multipart/form-data
 580             \*======================================================================*/
 581             	function set_submit_multipart()
 582             	{
 583             		$this->_submit_type = "multipart/form-data";
 584             	}
 585             
 586             	
 587             /*======================================================================*\
 588             	Function:	set_submit_normal
 589 rizwank 1.1 	Purpose:	Set the form submission content type to
 590             				application/x-www-form-urlencoded
 591             \*======================================================================*/
 592             	function set_submit_normal()
 593             	{
 594             		$this->_submit_type = "application/x-www-form-urlencoded";
 595             	}
 596             
 597             	
 598             	
 599             
 600             /*======================================================================*\
 601             	Private functions
 602             \*======================================================================*/
 603             	
 604             	
 605             /*======================================================================*\
 606             	Function:	_striplinks
 607             	Purpose:	strip the hyperlinks from an html document
 608             	Input:		$document	document to strip.
 609             	Output:		$match		an array of the links
 610 rizwank 1.1 \*======================================================================*/
 611             
 612             	function _striplinks($document)
 613             	{	
 614             		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
 615             						([\"\'])?					# find single or double quote
 616             						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
 617             													# quote, otherwise match up to next space
 618             						'isx",$document,$links);
 619             						
 620             
 621             		// catenate the non-empty matches from the conditional subpattern
 622             
 623             		while(list($key,$val) = each($links[2]))
 624             		{
 625             			if(!empty($val))
 626             				$match[] = $val;
 627             		}				
 628             		
 629             		while(list($key,$val) = each($links[3]))
 630             		{
 631 rizwank 1.1 			if(!empty($val))
 632             				$match[] = $val;
 633             		}		
 634             		
 635             		// return the links
 636             		return $match;
 637             	}
 638             
 639             /*======================================================================*\
 640             	Function:	_stripform
 641             	Purpose:	strip the form elements from an html document
 642             	Input:		$document	document to strip.
 643             	Output:		$match		an array of the links
 644             \*======================================================================*/
 645             
 646             	function _stripform($document)
 647             	{	
 648             		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 649             		
 650             		// catenate the matches
 651             		$match = implode("\r\n",$elements[0]);
 652 rizwank 1.1 				
 653             		// return the links
 654             		return $match;
 655             	}
 656             
 657             	
 658             	
 659             /*======================================================================*\
 660             	Function:	_striptext
 661             	Purpose:	strip the text from an html document
 662             	Input:		$document	document to strip.
 663             	Output:		$text		the resulting text
 664             \*======================================================================*/
 665             
 666             	function _striptext($document)
 667             	{
 668             		
 669             		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
 670             		// so, list your entities one by one here. I included some of the
 671             		// more common ones.
 672             								
 673 rizwank 1.1 		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
 674             						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
 675             						"'([\r\n])[\s]+'",					// strip out white space
 676             						"'&(quot|#34|#034|#x22);'i",		// replace html entities
 677             						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
 678             						"'&(lt|#60|#060|#x3c);'i",
 679             						"'&(gt|#62|#062|#x3e);'i",
 680             						"'&(nbsp|#160|#xa0);'i",
 681             						"'&(iexcl|#161);'i",
 682             						"'&(cent|#162);'i",
 683             						"'&(pound|#163);'i",
 684             						"'&(copy|#169);'i",
 685             						"'&(reg|#174);'i",
 686             						"'&(deg|#176);'i",
 687             						"'&(#39|#039|#x27);'",
 688             						"'&(euro|#8364);'i",				// europe
 689             						"'&a(uml|UML);'",					// german
 690             						"'&o(uml|UML);'",
 691             						"'&u(uml|UML);'",
 692             						"'&A(uml|UML);'",
 693             						"'&O(uml|UML);'",
 694 rizwank 1.1 						"'&U(uml|UML);'",
 695             						"'&szlig;'i",
 696             						);
 697             		$replace = array(	"",
 698             							"",
 699             							"\\1",
 700             							"\"",
 701             							"&",
 702             							"<",
 703             							">",
 704             							" ",
 705             							chr(161),
 706             							chr(162),
 707             							chr(163),
 708             							chr(169),
 709             							chr(174),
 710             							chr(176),
 711             							chr(39),
 712             							chr(128),
 713             							"ä",
 714             							"ö",
 715 rizwank 1.1 							"ü",
 716             							"Ä",
 717             							"Ö",
 718             							"Ü",
 719             							"ß",
 720             						);
 721             					
 722             		$text = preg_replace($search,$replace,$document);
 723             								
 724             		return $text;
 725             	}
 726             
 727             /*======================================================================*\
 728             	Function:	_expandlinks
 729             	Purpose:	expand each link into a fully qualified URL
 730             	Input:		$links			the links to qualify
 731             				$URI			the full URI to get the base from
 732             	Output:		$expandedLinks	the expanded links
 733             \*======================================================================*/
 734             
 735             	function _expandlinks($links,$URI)
 736 rizwank 1.1 	{
 737             		
 738             		preg_match("/^[^\?]+/",$URI,$match);
 739             
 740             		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 741             		$match = preg_replace("|/$|","",$match);
 742             				
 743             		$search = array( 	"|^http://".preg_quote($this->host)."|i",
 744             							"|^(?!http://)(\/)?(?!mailto:)|i",
 745             							"|/\./|",
 746             							"|/[^\/]+/\.\./|"
 747             						);
 748             						
 749             		$replace = array(	"",
 750             							$match."/",
 751             							"/",
 752             							"/"
 753             						);			
 754             				
 755             		$expandedLinks = preg_replace($search,$replace,$links);
 756             
 757 rizwank 1.1 		return $expandedLinks;
 758             	}
 759             
 760             /*======================================================================*\
 761             	Function:	_httprequest
 762             	Purpose:	go get the http data from the server
 763             	Input:		$url		the url to fetch
 764             				$fp			the current open file pointer
 765             				$URI		the full URI
 766             				$body		body contents to send if any (POST)
 767             	Output:		
 768             \*======================================================================*/
 769             	
 770             	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 771             	{
 772             		$cookie_headers = '';
 773             		if($this->passcookies && $this->_redirectaddr)
 774             			$this->setcookies();
 775             			
 776             		$URI_PARTS = parse_url($URI);
 777             		if(empty($url))
 778 rizwank 1.1 			$url = "/";
 779             		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";		
 780             		if(!empty($this->agent))
 781             			$headers .= "User-Agent: ".$this->agent."\r\n";
 782             		if(!empty($this->host) && !isset($this->rawheaders['Host']))
 783             			$headers .= "Host: ".$this->host."\r\n";
 784             		if(!empty($this->accept))
 785             			$headers .= "Accept: ".$this->accept."\r\n";
 786             		if(!empty($this->referer))
 787             			$headers .= "Referer: ".$this->referer."\r\n";
 788             		if(!empty($this->cookies))
 789             		{			
 790             			if(!is_array($this->cookies))
 791             				$this->cookies = (array)$this->cookies;
 792             	
 793             			reset($this->cookies);
 794             			if ( count($this->cookies) > 0 ) {
 795             				$cookie_headers .= 'Cookie: ';
 796             				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 797             				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 798             				}
 799 rizwank 1.1 				$headers .= substr($cookie_headers,0,-2) . "\r\n";
 800             			} 
 801             		}
 802             		if(!empty($this->rawheaders))
 803             		{
 804             			if(!is_array($this->rawheaders))
 805             				$this->rawheaders = (array)$this->rawheaders;
 806             			while(list($headerKey,$headerVal) = each($this->rawheaders))
 807             				$headers .= $headerKey.": ".$headerVal."\r\n";
 808             		}
 809             		if(!empty($content_type)) {
 810             			$headers .= "Content-type: $content_type";
 811             			if ($content_type == "multipart/form-data")
 812             				$headers .= "; boundary=".$this->_mime_boundary;
 813             			$headers .= "\r\n";
 814             		}
 815             		if(!empty($body))	
 816             			$headers .= "Content-length: ".strlen($body)."\r\n";
 817             		if(!empty($this->user) || !empty($this->pass))	
 818             			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 819             		
 820 rizwank 1.1 		//add proxy auth headers
 821             		if(!empty($this->proxy_user))	
 822             			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 823             
 824             
 825             		$headers .= "\r\n";
 826             		
 827             		// set the read timeout if needed
 828             		if ($this->read_timeout > 0)
 829             			socket_set_timeout($fp, $this->read_timeout);
 830             		$this->timed_out = false;
 831             		
 832             		fwrite($fp,$headers.$body,strlen($headers.$body));
 833             		
 834             		$this->_redirectaddr = false;
 835             		unset($this->headers);
 836             						
 837             		while($currentHeader = fgets($fp,$this->_maxlinelen))
 838             		{
 839             			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 840             			{
 841 rizwank 1.1 				$this->status=-100;
 842             				return false;
 843             			}
 844             				
 845             			if($currentHeader == "\r\n")
 846             				break;
 847             						
 848             			// if a header begins with Location: or URI:, set the redirect
 849             			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 850             			{
 851             				// get URL portion of the redirect
 852             				preg_match("/^(Location:|URI:)[ ]+(.*)/",chop($currentHeader),$matches);
 853             				// look for :// in the Location header to see if hostname is included
 854             				if(!preg_match("|\:\/\/|",$matches[2]))
 855             				{
 856             					// no host in the path, so prepend
 857             					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 858             					// eliminate double slash
 859             					if(!preg_match("|^/|",$matches[2]))
 860             							$this->_redirectaddr .= "/".$matches[2];
 861             					else
 862 rizwank 1.1 							$this->_redirectaddr .= $matches[2];
 863             				}
 864             				else
 865             					$this->_redirectaddr = $matches[2];
 866             			}
 867             		
 868             			if(preg_match("|^HTTP/|",$currentHeader))
 869             			{
 870                             if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 871             				{
 872             					$this->status= $status[1];
 873                             }				
 874             				$this->response_code = $currentHeader;
 875             			}
 876             				
 877             			$this->headers[] = $currentHeader;
 878             		}
 879             
 880             		$results = '';
 881             		do {
 882                 		$_data = fread($fp, $this->maxlength);
 883 rizwank 1.1     		if (strlen($_data) == 0) {
 884                     		break;
 885                 		}
 886                 		$results .= $_data;
 887             		} while(true);
 888             
 889             		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 890             		{
 891             			$this->status=-100;
 892             			return false;
 893             		}
 894             		
 895             		// check if there is a a redirect meta tag
 896             		
 897             		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 898             
 899             		{
 900             			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
 901             		}
 902             
 903             		// have we hit our frame depth and is there frame src to fetch?
 904 rizwank 1.1 		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 905             		{
 906             			$this->results[] = $results;
 907             			for($x=0; $x<count($match[1]); $x++)
 908             				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 909             		}
 910             		// have we already fetched framed content?
 911             		elseif(is_array($this->results))
 912             			$this->results[] = $results;
 913             		// no framed content
 914             		else
 915             			$this->results = $results;
 916             		
 917             		return true;
 918             	}
 919             
 920             /*======================================================================*\
 921             	Function:	_httpsrequest
 922             	Purpose:	go get the https data from the server using curl
 923             	Input:		$url		the url to fetch
 924             				$URI		the full URI
 925 rizwank 1.1 				$body		body contents to send if any (POST)
 926             	Output:		
 927             \*======================================================================*/
 928             	
 929             	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 930             	{
 931             		if($this->passcookies && $this->_redirectaddr)
 932             			$this->setcookies();
 933             
 934             		$headers = array();		
 935             					
 936             		$URI_PARTS = parse_url($URI);
 937             		if(empty($url))
 938             			$url = "/";
 939             		// GET ... header not needed for curl
 940             		//$headers[] = $http_method." ".$url." ".$this->_httpversion;		
 941             		if(!empty($this->agent))
 942             			$headers[] = "User-Agent: ".$this->agent;
 943             		if(!empty($this->host))
 944             			$headers[] = "Host: ".$this->host;
 945             		if(!empty($this->accept))
 946 rizwank 1.1 			$headers[] = "Accept: ".$this->accept;
 947             		if(!empty($this->referer))
 948             			$headers[] = "Referer: ".$this->referer;
 949             		if(!empty($this->cookies))
 950             		{			
 951             			if(!is_array($this->cookies))
 952             				$this->cookies = (array)$this->cookies;
 953             	
 954             			reset($this->cookies);
 955             			if ( count($this->cookies) > 0 ) {
 956             				$cookie_str = 'Cookie: ';
 957             				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 958             				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 959             				}
 960             				$headers[] = substr($cookie_str,0,-2);
 961             			}
 962             		}
 963             		if(!empty($this->rawheaders))
 964             		{
 965             			if(!is_array($this->rawheaders))
 966             				$this->rawheaders = (array)$this->rawheaders;
 967 rizwank 1.1 			while(list($headerKey,$headerVal) = each($this->rawheaders))
 968             				$headers[] = $headerKey.": ".$headerVal;
 969             		}
 970             		if(!empty($content_type)) {
 971             			if ($content_type == "multipart/form-data")
 972             				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 973             			else
 974             				$headers[] = "Content-type: $content_type";
 975             		}
 976             		if(!empty($body))	
 977             			$headers[] = "Content-length: ".strlen($body);
 978             		if(!empty($this->user) || !empty($this->pass))	
 979             			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
 980             			
 981             		for($curr_header = 0; $curr_header < count($headers); $curr_header++)
 982             			$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
 983             		
 984             		if(!empty($body))
 985             			$cmdline_params .= " -d \"$body\"";
 986             		
 987             		if($this->read_timeout > 0)
 988 rizwank 1.1 			$cmdline_params .= " -m ".$this->read_timeout;
 989             		
 990             		$headerfile = tempnam($temp_dir, "sno");
 991             
 992             		$safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
 993             		exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
 994             		
 995             		if($return)
 996             		{
 997             			$this->error = "Error: cURL could not retrieve the document, error $return.";
 998             			return false;
 999             		}
1000             			
1001             			
1002             		$results = implode("\r\n",$results);
1003             		
1004             		$result_headers = file("$headerfile");
1005             						
1006             		$this->_redirectaddr = false;
1007             		unset($this->headers);
1008             						
1009 rizwank 1.1 		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1010             		{
1011             			
1012             			// if a header begins with Location: or URI:, set the redirect
1013             			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1014             			{
1015             				// get URL portion of the redirect
1016             				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1017             				// look for :// in the Location header to see if hostname is included
1018             				if(!preg_match("|\:\/\/|",$matches[2]))
1019             				{
1020             					// no host in the path, so prepend
1021             					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1022             					// eliminate double slash
1023             					if(!preg_match("|^/|",$matches[2]))
1024             							$this->_redirectaddr .= "/".$matches[2];
1025             					else
1026             							$this->_redirectaddr .= $matches[2];
1027             				}
1028             				else
1029             					$this->_redirectaddr = $matches[2];
1030 rizwank 1.1 			}
1031             		
1032             			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1033             				$this->response_code = $result_headers[$currentHeader];
1034             
1035             			$this->headers[] = $result_headers[$currentHeader];
1036             		}
1037             
1038             		// check if there is a a redirect meta tag
1039             		
1040             		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1041             		{
1042             			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
1043             		}
1044             
1045             		// have we hit our frame depth and is there frame src to fetch?
1046             		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1047             		{
1048             			$this->results[] = $results;
1049             			for($x=0; $x<count($match[1]); $x++)
1050             				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1051 rizwank 1.1 		}
1052             		// have we already fetched framed content?
1053             		elseif(is_array($this->results))
1054             			$this->results[] = $results;
1055             		// no framed content
1056             		else
1057             			$this->results = $results;
1058             
1059             		unlink("$headerfile");
1060             		
1061             		return true;
1062             	}
1063             
1064             /*======================================================================*\
1065             	Function:	setcookies()
1066             	Purpose:	set cookies for a redirection
1067             \*======================================================================*/
1068             	
1069             	function setcookies()
1070             	{
1071             		for($x=0; $x<count($this->headers); $x++)
1072 rizwank 1.1 		{
1073             		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1074             			$this->cookies[$match[1]] = urldecode($match[2]);
1075             		}
1076             	}
1077             
1078             	
1079             /*======================================================================*\
1080             	Function:	_check_timeout
1081             	Purpose:	checks whether timeout has occurred
1082             	Input:		$fp	file pointer
1083             \*======================================================================*/
1084             
1085             	function _check_timeout($fp)
1086             	{
1087             		if ($this->read_timeout > 0) {
1088             			$fp_status = socket_get_status($fp);
1089             			if ($fp_status["timed_out"]) {
1090             				$this->timed_out = true;
1091             				return true;
1092             			}
1093 rizwank 1.1 		}
1094             		return false;
1095             	}
1096             
1097             /*======================================================================*\
1098             	Function:	_connect
1099             	Purpose:	make a socket connection
1100             	Input:		$fp	file pointer
1101             \*======================================================================*/
1102             	
1103             	function _connect(&$fp)
1104             	{
1105             		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1106             			{
1107             				$this->_isproxy = true;
1108             				
1109             				$host = $this->proxy_host;
1110             				$port = $this->proxy_port;
1111             			}
1112             		else
1113             		{
1114 rizwank 1.1 			$host = $this->host;
1115             			$port = $this->port;
1116             		}
1117             	
1118             		$this->status = 0;
1119             		
1120             		if($fp = fsockopen(
1121             					$host,
1122             					$port,
1123             					$errno,
1124             					$errstr,
1125             					$this->_fp_timeout
1126             					))
1127             		{
1128             			// socket connection succeeded
1129             
1130             			return true;
1131             		}
1132             		else
1133             		{
1134             			// socket connection failed
1135 rizwank 1.1 			$this->status = $errno;
1136             			switch($errno)
1137             			{
1138             				case -3:
1139             					$this->error="socket creation failed (-3)";
1140             				case -4:
1141             					$this->error="dns lookup failure (-4)";
1142             				case -5:
1143             					$this->error="connection refused or timed out (-5)";
1144             				default:
1145             					$this->error="connection failed (".$errno.")";
1146             			}
1147             			return false;
1148             		}
1149             	}
1150             /*======================================================================*\
1151             	Function:	_disconnect
1152             	Purpose:	disconnect a socket connection
1153             	Input:		$fp	file pointer
1154             \*======================================================================*/
1155             	
1156 rizwank 1.1 	function _disconnect($fp)
1157             	{
1158             		return(fclose($fp));
1159             	}
1160             
1161             	
1162             /*======================================================================*\
1163             	Function:	_prepare_post_body
1164             	Purpose:	Prepare post body according to encoding type
1165             	Input:		$formvars  - form variables
1166             				$formfiles - form upload files
1167             	Output:		post body
1168             \*======================================================================*/
1169             	
1170             	function _prepare_post_body($formvars, $formfiles)
1171             	{
1172             		settype($formvars, "array");
1173             		settype($formfiles, "array");
1174             		$postdata = '';
1175             
1176             		if (count($formvars) == 0 && count($formfiles) == 0)
1177 rizwank 1.1 			return;
1178             		
1179             		switch ($this->_submit_type) {
1180             			case "application/x-www-form-urlencoded":
1181             				reset($formvars);
1182             				while(list($key,$val) = each($formvars)) {
1183             					if (is_array($val) || is_object($val)) {
1184             						while (list($cur_key, $cur_val) = each($val)) {
1185             							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1186             						}
1187             					} else
1188             						$postdata .= urlencode($key)."=".urlencode($val)."&";
1189             				}
1190             				break;
1191             
1192             			case "multipart/form-data":
1193             				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1194             				
1195             				reset($formvars);
1196             				while(list($key,$val) = each($formvars)) {
1197             					if (is_array($val) || is_object($val)) {
1198 rizwank 1.1 						while (list($cur_key, $cur_val) = each($val)) {
1199             							$postdata .= "--".$this->_mime_boundary."\r\n";
1200             							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1201             							$postdata .= "$cur_val\r\n";
1202             						}
1203             					} else {
1204             						$postdata .= "--".$this->_mime_boundary."\r\n";
1205             						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1206             						$postdata .= "$val\r\n";
1207             					}
1208             				}
1209             				
1210             				reset($formfiles);
1211             				while (list($field_name, $file_names) = each($formfiles)) {
1212             					settype($file_names, "array");
1213             					while (list(, $file_name) = each($file_names)) {
1214             						if (!is_readable($file_name)) continue;
1215             
1216             						$fp = fopen($file_name, "r");
1217             						$file_content = fread($fp, filesize($file_name));
1218             						fclose($fp);
1219 rizwank 1.1 						$base_name = basename($file_name);
1220             
1221             						$postdata .= "--".$this->_mime_boundary."\r\n";
1222             						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1223             						$postdata .= "$file_content\r\n";
1224             					}
1225             				}
1226             				$postdata .= "--".$this->_mime_boundary."--\r\n";
1227             				break;
1228             		}
1229             
1230             		return $postdata;
1231             	}
1232             }
1233             
1234             ?>

Rizwan Kassim
Powered by
ViewCVS 0.9.2