1 rizwank 1.1 <?php
2
3 /*************************************************
4
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2000 ispi, all rights reserved
8 Version: 1.2
9
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 rizwank 1.1 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
24 You may contact the author of Snoopy by e-mail at:
25 monte@ispi.net
26
27 Or, write to:
28 Monte Ohrt
29 CTO, ispi
30 237 S. 70th suite 220
31 Lincoln, NE 68510
32
33 The latest version of Snoopy can be obtained from:
34 http://snoopy.sourceforge.net/
35
36 *************************************************/
37
38 class Snoopy
39 {
40 /**** Public variables ****/
41
42 /* user definable vars */
43 rizwank 1.1
44 var $host = "www.php.net"; // host name we are connecting to
45 var $port = 80; // port we are connecting to
46 var $proxy_host = ""; // proxy host to use
47 var $proxy_port = ""; // proxy port to use
48 var $proxy_user = ""; // proxy user to use
49 var $proxy_pass = ""; // proxy password to use
50
51 var $agent = "Snoopy v1.2"; // agent we masquerade as
52 var $referer = ""; // referer info to pass
53 var $cookies = array(); // array of cookies to pass
54 // $cookies["username"]="joe";
55 var $rawheaders = array(); // array of raw headers to send
56 // $rawheaders["Content-type"]="text/html";
57
58 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
59 var $lastredirectaddr = ""; // contains address of last redirected address
60 var $offsiteok = true; // allows redirection off-site
61 var $maxframes = 0; // frame content depth maximum. 0 = disallow
62 var $expandlinks = true; // expand links to fully qualified URLs.
63 // this only applies to fetchlinks()
64 rizwank 1.1 // or submitlinks()
65 var $passcookies = true; // pass set cookies back through redirects
66 // NOTE: this currently does not respect
67 // dates, domains or paths.
68
69 var $user = ""; // user for http authentication
70 var $pass = ""; // password for http authentication
71
72 // http accept types
73 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
74
75 var $results = ""; // where the content is put
76
77 var $error = ""; // error messages sent here
78 var $response_code = ""; // response code returned from server
79 var $headers = array(); // headers returned from server sent here
80 var $maxlength = 500000; // max return data length (body)
81 var $read_timeout = 0; // timeout on read operations, in seconds
82 // supported only since PHP 4 Beta 4
83 // set to 0 to disallow timeouts
84 var $timed_out = false; // if a read operation timed out
85 rizwank 1.1 var $status = 0; // http request status
86
87 var $temp_dir = "/tmp"; // temporary directory that the webserver
88 // has permission to write to.
89 // under Windows, this should be C:\temp
90
91 var $curl_path = "/usr/local/bin/curl";
92 // Snoopy will use cURL for fetching
93 // SSL content if a full system path to
94 // the cURL binary is supplied here.
95 // set to false if you do not have
96 // cURL installed. See http://curl.haxx.se
97 // for details on installing cURL.
98 // Snoopy does *not* use the cURL
99 // library functions built into php,
100 // as these functions are not stable
101 // as of this Snoopy release.
102
103 /**** Private variables ****/
104
105 var $_maxlinelen = 4096; // max line length (headers)
106 rizwank 1.1
107 var $_httpmethod = "GET"; // default http request method
108 var $_httpversion = "HTTP/1.0"; // default http request version
109 var $_submit_method = "POST"; // default submit method
110 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
111 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
112 var $_redirectaddr = false; // will be set if page fetched is a redirect
113 var $_redirectdepth = 0; // increments on an http redirect
114 var $_frameurls = array(); // frame src urls
115 var $_framedepth = 0; // increments on frame depth
116
117 var $_isproxy = false; // set if using a proxy server
118 var $_fp_timeout = 30; // timeout for socket connection
119
120 /*======================================================================*\
121 Function: fetch
122 Purpose: fetch the contents of a web page
123 (and possibly other protocols in the
124 future like ftp, nntp, gopher, etc.)
125 Input: $URI the location of the page to fetch
126 Output: $this->results the output text from the fetch
127 rizwank 1.1 \*======================================================================*/
128
129 function fetch($URI)
130 {
131
132 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
133 $URI_PARTS = parse_url($URI);
134 if (!empty($URI_PARTS["user"]))
135 $this->user = $URI_PARTS["user"];
136 if (!empty($URI_PARTS["pass"]))
137 $this->pass = $URI_PARTS["pass"];
138 if (empty($URI_PARTS["query"]))
139 $URI_PARTS["query"] = '';
140
141 switch($URI_PARTS["scheme"])
142 {
143 case "http":
144 $this->host = $URI_PARTS["host"];
145 if(!empty($URI_PARTS["port"]))
146 $this->port = $URI_PARTS["port"];
147 if($this->_connect($fp))
148 rizwank 1.1 {
149 if($this->_isproxy)
150 {
151 // using proxy, send entire URI
152 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
153 }
154 else
155 {
156 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
157 // no proxy, send only the path
158 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
159 }
160
161 $this->_disconnect($fp);
162
163 if($this->_redirectaddr)
164 {
165 /* url was redirected, check if we've hit the max depth */
166 if($this->maxredirs > $this->_redirectdepth)
167 {
168 // only follow redirect if it's on this site, or offsiteok is true
169 rizwank 1.1 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
170 {
171 /* follow the redirect */
172 $this->_redirectdepth++;
173 $this->lastredirectaddr=$this->_redirectaddr;
174 $this->fetch($this->_redirectaddr);
175 }
176 }
177 }
178
179 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
180 {
181 $frameurls = $this->_frameurls;
182 $this->_frameurls = array();
183
184 while(list(,$frameurl) = each($frameurls))
185 {
186 if($this->_framedepth < $this->maxframes)
187 {
188 $this->fetch($frameurl);
189 $this->_framedepth++;
190 rizwank 1.1 }
191 else
192 break;
193 }
194 }
195 }
196 else
197 {
198 return false;
199 }
200 return true;
201 break;
202 case "https":
203 if(!$this->curl_path)
204 return false;
205 if(function_exists("is_executable"))
206 if (!is_executable($this->curl_path))
207 return false;
208 $this->host = $URI_PARTS["host"];
209 if(!empty($URI_PARTS["port"]))
210 $this->port = $URI_PARTS["port"];
211 rizwank 1.1 if($this->_isproxy)
212 {
213 // using proxy, send entire URI
214 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
215 }
216 else
217 {
218 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
219 // no proxy, send only the path
220 $this->_httpsrequest($path, $URI, $this->_httpmethod);
221 }
222
223 if($this->_redirectaddr)
224 {
225 /* url was redirected, check if we've hit the max depth */
226 if($this->maxredirs > $this->_redirectdepth)
227 {
228 // only follow redirect if it's on this site, or offsiteok is true
229 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
230 {
231 /* follow the redirect */
232 rizwank 1.1 $this->_redirectdepth++;
233 $this->lastredirectaddr=$this->_redirectaddr;
234 $this->fetch($this->_redirectaddr);
235 }
236 }
237 }
238
239 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
240 {
241 $frameurls = $this->_frameurls;
242 $this->_frameurls = array();
243
244 while(list(,$frameurl) = each($frameurls))
245 {
246 if($this->_framedepth < $this->maxframes)
247 {
248 $this->fetch($frameurl);
249 $this->_framedepth++;
250 }
251 else
252 break;
253 rizwank 1.1 }
254 }
255 return true;
256 break;
257 default:
258 // not a valid protocol
259 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
260 return false;
261 break;
262 }
263 return true;
264 }
265
266 /*======================================================================*\
267 Function: submit
268 Purpose: submit an http form
269 Input: $URI the location to post the data
270 $formvars the formvars to use.
271 format: $formvars["var"] = "val";
272 $formfiles an array of files to submit
273 format: $formfiles["var"] = "/dir/filename.ext";
274 rizwank 1.1 Output: $this->results the text output from the post
275 \*======================================================================*/
276
277 function submit($URI, $formvars="", $formfiles="")
278 {
279 unset($postdata);
280
281 $postdata = $this->_prepare_post_body($formvars, $formfiles);
282
283 $URI_PARTS = parse_url($URI);
284 if (!empty($URI_PARTS["user"]))
285 $this->user = $URI_PARTS["user"];
286 if (!empty($URI_PARTS["pass"]))
287 $this->pass = $URI_PARTS["pass"];
288 if (empty($URI_PARTS["query"]))
289 $URI_PARTS["query"] = '';
290
291 switch($URI_PARTS["scheme"])
292 {
293 case "http":
294 $this->host = $URI_PARTS["host"];
295 rizwank 1.1 if(!empty($URI_PARTS["port"]))
296 $this->port = $URI_PARTS["port"];
297 if($this->_connect($fp))
298 {
299 if($this->_isproxy)
300 {
301 // using proxy, send entire URI
302 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
303 }
304 else
305 {
306 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
307 // no proxy, send only the path
308 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
309 }
310
311 $this->_disconnect($fp);
312
313 if($this->_redirectaddr)
314 {
315 /* url was redirected, check if we've hit the max depth */
316 rizwank 1.1 if($this->maxredirs > $this->_redirectdepth)
317 {
318 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
319 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
320
321 // only follow redirect if it's on this site, or offsiteok is true
322 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
323 {
324 /* follow the redirect */
325 $this->_redirectdepth++;
326 $this->lastredirectaddr=$this->_redirectaddr;
327 if( strpos( $this->_redirectaddr, "?" ) > 0 )
328 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
329 else
330 $this->submit($this->_redirectaddr,$formvars, $formfiles);
331 }
332 }
333 }
334
335 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
336 {
337 rizwank 1.1 $frameurls = $this->_frameurls;
338 $this->_frameurls = array();
339
340 while(list(,$frameurl) = each($frameurls))
341 {
342 if($this->_framedepth < $this->maxframes)
343 {
344 $this->fetch($frameurl);
345 $this->_framedepth++;
346 }
347 else
348 break;
349 }
350 }
351
352 }
353 else
354 {
355 return false;
356 }
357 return true;
358 rizwank 1.1 break;
359 case "https":
360 if(!$this->curl_path)
361 return false;
362 if(function_exists("is_executable"))
363 if (!is_executable($this->curl_path))
364 return false;
365 $this->host = $URI_PARTS["host"];
366 if(!empty($URI_PARTS["port"]))
367 $this->port = $URI_PARTS["port"];
368 if($this->_isproxy)
369 {
370 // using proxy, send entire URI
371 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
372 }
373 else
374 {
375 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
376 // no proxy, send only the path
377 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
378 }
379 rizwank 1.1
380 if($this->_redirectaddr)
381 {
382 /* url was redirected, check if we've hit the max depth */
383 if($this->maxredirs > $this->_redirectdepth)
384 {
385 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
386 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
387
388 // only follow redirect if it's on this site, or offsiteok is true
389 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
390 {
391 /* follow the redirect */
392 $this->_redirectdepth++;
393 $this->lastredirectaddr=$this->_redirectaddr;
394 if( strpos( $this->_redirectaddr, "?" ) > 0 )
395 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
396 else
397 $this->submit($this->_redirectaddr,$formvars, $formfiles);
398 }
399 }
400 rizwank 1.1 }
401
402 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
403 {
404 $frameurls = $this->_frameurls;
405 $this->_frameurls = array();
406
407 while(list(,$frameurl) = each($frameurls))
408 {
409 if($this->_framedepth < $this->maxframes)
410 {
411 $this->fetch($frameurl);
412 $this->_framedepth++;
413 }
414 else
415 break;
416 }
417 }
418 return true;
419 break;
420
421 rizwank 1.1 default:
422 // not a valid protocol
423 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
424 return false;
425 break;
426 }
427 return true;
428 }
429
430 /*======================================================================*\
431 Function: fetchlinks
432 Purpose: fetch the links from a web page
433 Input: $URI where you are fetching from
434 Output: $this->results an array of the URLs
435 \*======================================================================*/
436
437 function fetchlinks($URI)
438 {
439 if ($this->fetch($URI))
440 {
441
442 rizwank 1.1 if(is_array($this->results))
443 {
444 for($x=0;$x<count($this->results);$x++)
445 $this->results[$x] = $this->_striplinks($this->results[$x]);
446 }
447 else
448 $this->results = $this->_striplinks($this->results);
449
450 if($this->expandlinks)
451 $this->results = $this->_expandlinks($this->results, $URI);
452 return true;
453 }
454 else
455 return false;
456 }
457
458 /*======================================================================*\
459 Function: fetchform
460 Purpose: fetch the form elements from a web page
461 Input: $URI where you are fetching from
462 Output: $this->results the resulting html form
463 rizwank 1.1 \*======================================================================*/
464
465 function fetchform($URI)
466 {
467
468 if ($this->fetch($URI))
469 {
470
471 if(is_array($this->results))
472 {
473 for($x=0;$x<count($this->results);$x++)
474 $this->results[$x] = $this->_stripform($this->results[$x]);
475 }
476 else
477 $this->results = $this->_stripform($this->results);
478
479 return true;
480 }
481 else
482 return false;
483 }
484 rizwank 1.1
485
486 /*======================================================================*\
487 Function: fetchtext
488 Purpose: fetch the text from a web page, stripping the links
489 Input: $URI where you are fetching from
490 Output: $this->results the text from the web page
491 \*======================================================================*/
492
493 function fetchtext($URI)
494 {
495 if($this->fetch($URI))
496 {
497 if(is_array($this->results))
498 {
499 for($x=0;$x<count($this->results);$x++)
500 $this->results[$x] = $this->_striptext($this->results[$x]);
501 }
502 else
503 $this->results = $this->_striptext($this->results);
504 return true;
505 rizwank 1.1 }
506 else
507 return false;
508 }
509
510 /*======================================================================*\
511 Function: submitlinks
512 Purpose: grab links from a form submission
513 Input: $URI where you are submitting from
514 Output: $this->results an array of the links from the post
515 \*======================================================================*/
516
517 function submitlinks($URI, $formvars="", $formfiles="")
518 {
519 if($this->submit($URI,$formvars, $formfiles))
520 {
521 if(is_array($this->results))
522 {
523 for($x=0;$x<count($this->results);$x++)
524 {
525 $this->results[$x] = $this->_striplinks($this->results[$x]);
526 rizwank 1.1 if($this->expandlinks)
527 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
528 }
529 }
530 else
531 {
532 $this->results = $this->_striplinks($this->results);
533 if($this->expandlinks)
534 $this->results = $this->_expandlinks($this->results,$URI);
535 }
536 return true;
537 }
538 else
539 return false;
540 }
541
542 /*======================================================================*\
543 Function: submittext
544 Purpose: grab text from a form submission
545 Input: $URI where you are submitting from
546 Output: $this->results the text from the web page
547 rizwank 1.1 \*======================================================================*/
548
549 function submittext($URI, $formvars = "", $formfiles = "")
550 {
551 if($this->submit($URI,$formvars, $formfiles))
552 {
553 if(is_array($this->results))
554 {
555 for($x=0;$x<count($this->results);$x++)
556 {
557 $this->results[$x] = $this->_striptext($this->results[$x]);
558 if($this->expandlinks)
559 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
560 }
561 }
562 else
563 {
564 $this->results = $this->_striptext($this->results);
565 if($this->expandlinks)
566 $this->results = $this->_expandlinks($this->results,$URI);
567 }
568 rizwank 1.1 return true;
569 }
570 else
571 return false;
572 }
573
574
575
576 /*======================================================================*\
577 Function: set_submit_multipart
578 Purpose: Set the form submission content type to
579 multipart/form-data
580 \*======================================================================*/
581 function set_submit_multipart()
582 {
583 $this->_submit_type = "multipart/form-data";
584 }
585
586
587 /*======================================================================*\
588 Function: set_submit_normal
589 rizwank 1.1 Purpose: Set the form submission content type to
590 application/x-www-form-urlencoded
591 \*======================================================================*/
592 function set_submit_normal()
593 {
594 $this->_submit_type = "application/x-www-form-urlencoded";
595 }
596
597
598
599
600 /*======================================================================*\
601 Private functions
602 \*======================================================================*/
603
604
605 /*======================================================================*\
606 Function: _striplinks
607 Purpose: strip the hyperlinks from an html document
608 Input: $document document to strip.
609 Output: $match an array of the links
610 rizwank 1.1 \*======================================================================*/
611
612 function _striplinks($document)
613 {
614 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
615 ([\"\'])? # find single or double quote
616 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
617 # quote, otherwise match up to next space
618 'isx",$document,$links);
619
620
621 // catenate the non-empty matches from the conditional subpattern
622
623 while(list($key,$val) = each($links[2]))
624 {
625 if(!empty($val))
626 $match[] = $val;
627 }
628
629 while(list($key,$val) = each($links[3]))
630 {
631 rizwank 1.1 if(!empty($val))
632 $match[] = $val;
633 }
634
635 // return the links
636 return $match;
637 }
638
639 /*======================================================================*\
640 Function: _stripform
641 Purpose: strip the form elements from an html document
642 Input: $document document to strip.
643 Output: $match an array of the links
644 \*======================================================================*/
645
646 function _stripform($document)
647 {
648 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
649
650 // catenate the matches
651 $match = implode("\r\n",$elements[0]);
652 rizwank 1.1
653 // return the links
654 return $match;
655 }
656
657
658
659 /*======================================================================*\
660 Function: _striptext
661 Purpose: strip the text from an html document
662 Input: $document document to strip.
663 Output: $text the resulting text
664 \*======================================================================*/
665
666 function _striptext($document)
667 {
668
669 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
670 // so, list your entities one by one here. I included some of the
671 // more common ones.
672
673 rizwank 1.1 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
674 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
675 "'([\r\n])[\s]+'", // strip out white space
676 "'&(quot|#34|#034|#x22);'i", // replace html entities
677 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
678 "'&(lt|#60|#060|#x3c);'i",
679 "'&(gt|#62|#062|#x3e);'i",
680 "'&(nbsp|#160|#xa0);'i",
681 "'&(iexcl|#161);'i",
682 "'&(cent|#162);'i",
683 "'&(pound|#163);'i",
684 "'&(copy|#169);'i",
685 "'&(reg|#174);'i",
686 "'&(deg|#176);'i",
687 "'&(#39|#039|#x27);'",
688 "'&(euro|#8364);'i", // europe
689 "'&a(uml|UML);'", // german
690 "'&o(uml|UML);'",
691 "'&u(uml|UML);'",
692 "'&A(uml|UML);'",
693 "'&O(uml|UML);'",
694 rizwank 1.1 "'&U(uml|UML);'",
695 "'ß'i",
696 );
697 $replace = array( "",
698 "",
699 "\\1",
700 "\"",
701 "&",
702 "<",
703 ">",
704 " ",
705 chr(161),
706 chr(162),
707 chr(163),
708 chr(169),
709 chr(174),
710 chr(176),
711 chr(39),
712 chr(128),
713 "ä",
714 "ö",
715 rizwank 1.1 "ü",
716 "Ä",
717 "Ö",
718 "Ü",
719 "ß",
720 );
721
722 $text = preg_replace($search,$replace,$document);
723
724 return $text;
725 }
726
727 /*======================================================================*\
728 Function: _expandlinks
729 Purpose: expand each link into a fully qualified URL
730 Input: $links the links to qualify
731 $URI the full URI to get the base from
732 Output: $expandedLinks the expanded links
733 \*======================================================================*/
734
735 function _expandlinks($links,$URI)
736 rizwank 1.1 {
737
738 preg_match("/^[^\?]+/",$URI,$match);
739
740 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
741 $match = preg_replace("|/$|","",$match);
742
743 $search = array( "|^http://".preg_quote($this->host)."|i",
744 "|^(?!http://)(\/)?(?!mailto:)|i",
745 "|/\./|",
746 "|/[^\/]+/\.\./|"
747 );
748
749 $replace = array( "",
750 $match."/",
751 "/",
752 "/"
753 );
754
755 $expandedLinks = preg_replace($search,$replace,$links);
756
757 rizwank 1.1 return $expandedLinks;
758 }
759
760 /*======================================================================*\
761 Function: _httprequest
762 Purpose: go get the http data from the server
763 Input: $url the url to fetch
764 $fp the current open file pointer
765 $URI the full URI
766 $body body contents to send if any (POST)
767 Output:
768 \*======================================================================*/
769
770 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
771 {
772 $cookie_headers = '';
773 if($this->passcookies && $this->_redirectaddr)
774 $this->setcookies();
775
776 $URI_PARTS = parse_url($URI);
777 if(empty($url))
778 rizwank 1.1 $url = "/";
779 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
780 if(!empty($this->agent))
781 $headers .= "User-Agent: ".$this->agent."\r\n";
782 if(!empty($this->host) && !isset($this->rawheaders['Host']))
783 $headers .= "Host: ".$this->host."\r\n";
784 if(!empty($this->accept))
785 $headers .= "Accept: ".$this->accept."\r\n";
786 if(!empty($this->referer))
787 $headers .= "Referer: ".$this->referer."\r\n";
788 if(!empty($this->cookies))
789 {
790 if(!is_array($this->cookies))
791 $this->cookies = (array)$this->cookies;
792
793 reset($this->cookies);
794 if ( count($this->cookies) > 0 ) {
795 $cookie_headers .= 'Cookie: ';
796 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
797 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
798 }
799 rizwank 1.1 $headers .= substr($cookie_headers,0,-2) . "\r\n";
800 }
801 }
802 if(!empty($this->rawheaders))
803 {
804 if(!is_array($this->rawheaders))
805 $this->rawheaders = (array)$this->rawheaders;
806 while(list($headerKey,$headerVal) = each($this->rawheaders))
807 $headers .= $headerKey.": ".$headerVal."\r\n";
808 }
809 if(!empty($content_type)) {
810 $headers .= "Content-type: $content_type";
811 if ($content_type == "multipart/form-data")
812 $headers .= "; boundary=".$this->_mime_boundary;
813 $headers .= "\r\n";
814 }
815 if(!empty($body))
816 $headers .= "Content-length: ".strlen($body)."\r\n";
817 if(!empty($this->user) || !empty($this->pass))
818 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
819
820 rizwank 1.1 //add proxy auth headers
821 if(!empty($this->proxy_user))
822 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
823
824
825 $headers .= "\r\n";
826
827 // set the read timeout if needed
828 if ($this->read_timeout > 0)
829 socket_set_timeout($fp, $this->read_timeout);
830 $this->timed_out = false;
831
832 fwrite($fp,$headers.$body,strlen($headers.$body));
833
834 $this->_redirectaddr = false;
835 unset($this->headers);
836
837 while($currentHeader = fgets($fp,$this->_maxlinelen))
838 {
839 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
840 {
841 rizwank 1.1 $this->status=-100;
842 return false;
843 }
844
845 if($currentHeader == "\r\n")
846 break;
847
848 // if a header begins with Location: or URI:, set the redirect
849 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
850 {
851 // get URL portion of the redirect
852 preg_match("/^(Location:|URI:)[ ]+(.*)/",chop($currentHeader),$matches);
853 // look for :// in the Location header to see if hostname is included
854 if(!preg_match("|\:\/\/|",$matches[2]))
855 {
856 // no host in the path, so prepend
857 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
858 // eliminate double slash
859 if(!preg_match("|^/|",$matches[2]))
860 $this->_redirectaddr .= "/".$matches[2];
861 else
862 rizwank 1.1 $this->_redirectaddr .= $matches[2];
863 }
864 else
865 $this->_redirectaddr = $matches[2];
866 }
867
868 if(preg_match("|^HTTP/|",$currentHeader))
869 {
870 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
871 {
872 $this->status= $status[1];
873 }
874 $this->response_code = $currentHeader;
875 }
876
877 $this->headers[] = $currentHeader;
878 }
879
880 $results = '';
881 do {
882 $_data = fread($fp, $this->maxlength);
883 rizwank 1.1 if (strlen($_data) == 0) {
884 break;
885 }
886 $results .= $_data;
887 } while(true);
888
889 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
890 {
891 $this->status=-100;
892 return false;
893 }
894
895 // check if there is a a redirect meta tag
896
897 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
898
899 {
900 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
901 }
902
903 // have we hit our frame depth and is there frame src to fetch?
904 rizwank 1.1 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
905 {
906 $this->results[] = $results;
907 for($x=0; $x<count($match[1]); $x++)
908 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
909 }
910 // have we already fetched framed content?
911 elseif(is_array($this->results))
912 $this->results[] = $results;
913 // no framed content
914 else
915 $this->results = $results;
916
917 return true;
918 }
919
920 /*======================================================================*\
921 Function: _httpsrequest
922 Purpose: go get the https data from the server using curl
923 Input: $url the url to fetch
924 $URI the full URI
925 rizwank 1.1 $body body contents to send if any (POST)
926 Output:
927 \*======================================================================*/
928
929 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
930 {
931 if($this->passcookies && $this->_redirectaddr)
932 $this->setcookies();
933
934 $headers = array();
935
936 $URI_PARTS = parse_url($URI);
937 if(empty($url))
938 $url = "/";
939 // GET ... header not needed for curl
940 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
941 if(!empty($this->agent))
942 $headers[] = "User-Agent: ".$this->agent;
943 if(!empty($this->host))
944 $headers[] = "Host: ".$this->host;
945 if(!empty($this->accept))
946 rizwank 1.1 $headers[] = "Accept: ".$this->accept;
947 if(!empty($this->referer))
948 $headers[] = "Referer: ".$this->referer;
949 if(!empty($this->cookies))
950 {
951 if(!is_array($this->cookies))
952 $this->cookies = (array)$this->cookies;
953
954 reset($this->cookies);
955 if ( count($this->cookies) > 0 ) {
956 $cookie_str = 'Cookie: ';
957 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
958 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
959 }
960 $headers[] = substr($cookie_str,0,-2);
961 }
962 }
963 if(!empty($this->rawheaders))
964 {
965 if(!is_array($this->rawheaders))
966 $this->rawheaders = (array)$this->rawheaders;
967 rizwank 1.1 while(list($headerKey,$headerVal) = each($this->rawheaders))
968 $headers[] = $headerKey.": ".$headerVal;
969 }
970 if(!empty($content_type)) {
971 if ($content_type == "multipart/form-data")
972 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
973 else
974 $headers[] = "Content-type: $content_type";
975 }
976 if(!empty($body))
977 $headers[] = "Content-length: ".strlen($body);
978 if(!empty($this->user) || !empty($this->pass))
979 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
980
981 for($curr_header = 0; $curr_header < count($headers); $curr_header++)
982 $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
983
984 if(!empty($body))
985 $cmdline_params .= " -d \"$body\"";
986
987 if($this->read_timeout > 0)
988 rizwank 1.1 $cmdline_params .= " -m ".$this->read_timeout;
989
990 $headerfile = tempnam($temp_dir, "sno");
991
992 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
993 exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
994
995 if($return)
996 {
997 $this->error = "Error: cURL could not retrieve the document, error $return.";
998 return false;
999 }
1000
1001
1002 $results = implode("\r\n",$results);
1003
1004 $result_headers = file("$headerfile");
1005
1006 $this->_redirectaddr = false;
1007 unset($this->headers);
1008
1009 rizwank 1.1 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1010 {
1011
1012 // if a header begins with Location: or URI:, set the redirect
1013 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1014 {
1015 // get URL portion of the redirect
1016 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1017 // look for :// in the Location header to see if hostname is included
1018 if(!preg_match("|\:\/\/|",$matches[2]))
1019 {
1020 // no host in the path, so prepend
1021 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1022 // eliminate double slash
1023 if(!preg_match("|^/|",$matches[2]))
1024 $this->_redirectaddr .= "/".$matches[2];
1025 else
1026 $this->_redirectaddr .= $matches[2];
1027 }
1028 else
1029 $this->_redirectaddr = $matches[2];
1030 rizwank 1.1 }
1031
1032 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1033 $this->response_code = $result_headers[$currentHeader];
1034
1035 $this->headers[] = $result_headers[$currentHeader];
1036 }
1037
1038 // check if there is a a redirect meta tag
1039
1040 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1041 {
1042 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1043 }
1044
1045 // have we hit our frame depth and is there frame src to fetch?
1046 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1047 {
1048 $this->results[] = $results;
1049 for($x=0; $x<count($match[1]); $x++)
1050 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1051 rizwank 1.1 }
1052 // have we already fetched framed content?
1053 elseif(is_array($this->results))
1054 $this->results[] = $results;
1055 // no framed content
1056 else
1057 $this->results = $results;
1058
1059 unlink("$headerfile");
1060
1061 return true;
1062 }
1063
1064 /*======================================================================*\
1065 Function: setcookies()
1066 Purpose: set cookies for a redirection
1067 \*======================================================================*/
1068
1069 function setcookies()
1070 {
1071 for($x=0; $x<count($this->headers); $x++)
1072 rizwank 1.1 {
1073 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1074 $this->cookies[$match[1]] = urldecode($match[2]);
1075 }
1076 }
1077
1078
1079 /*======================================================================*\
1080 Function: _check_timeout
1081 Purpose: checks whether timeout has occurred
1082 Input: $fp file pointer
1083 \*======================================================================*/
1084
1085 function _check_timeout($fp)
1086 {
1087 if ($this->read_timeout > 0) {
1088 $fp_status = socket_get_status($fp);
1089 if ($fp_status["timed_out"]) {
1090 $this->timed_out = true;
1091 return true;
1092 }
1093 rizwank 1.1 }
1094 return false;
1095 }
1096
1097 /*======================================================================*\
1098 Function: _connect
1099 Purpose: make a socket connection
1100 Input: $fp file pointer
1101 \*======================================================================*/
1102
1103 function _connect(&$fp)
1104 {
1105 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1106 {
1107 $this->_isproxy = true;
1108
1109 $host = $this->proxy_host;
1110 $port = $this->proxy_port;
1111 }
1112 else
1113 {
1114 rizwank 1.1 $host = $this->host;
1115 $port = $this->port;
1116 }
1117
1118 $this->status = 0;
1119
1120 if($fp = fsockopen(
1121 $host,
1122 $port,
1123 $errno,
1124 $errstr,
1125 $this->_fp_timeout
1126 ))
1127 {
1128 // socket connection succeeded
1129
1130 return true;
1131 }
1132 else
1133 {
1134 // socket connection failed
1135 rizwank 1.1 $this->status = $errno;
1136 switch($errno)
1137 {
1138 case -3:
1139 $this->error="socket creation failed (-3)";
1140 case -4:
1141 $this->error="dns lookup failure (-4)";
1142 case -5:
1143 $this->error="connection refused or timed out (-5)";
1144 default:
1145 $this->error="connection failed (".$errno.")";
1146 }
1147 return false;
1148 }
1149 }
1150 /*======================================================================*\
1151 Function: _disconnect
1152 Purpose: disconnect a socket connection
1153 Input: $fp file pointer
1154 \*======================================================================*/
1155
1156 rizwank 1.1 function _disconnect($fp)
1157 {
1158 return(fclose($fp));
1159 }
1160
1161
1162 /*======================================================================*\
1163 Function: _prepare_post_body
1164 Purpose: Prepare post body according to encoding type
1165 Input: $formvars - form variables
1166 $formfiles - form upload files
1167 Output: post body
1168 \*======================================================================*/
1169
1170 function _prepare_post_body($formvars, $formfiles)
1171 {
1172 settype($formvars, "array");
1173 settype($formfiles, "array");
1174 $postdata = '';
1175
1176 if (count($formvars) == 0 && count($formfiles) == 0)
1177 rizwank 1.1 return;
1178
1179 switch ($this->_submit_type) {
1180 case "application/x-www-form-urlencoded":
1181 reset($formvars);
1182 while(list($key,$val) = each($formvars)) {
1183 if (is_array($val) || is_object($val)) {
1184 while (list($cur_key, $cur_val) = each($val)) {
1185 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1186 }
1187 } else
1188 $postdata .= urlencode($key)."=".urlencode($val)."&";
1189 }
1190 break;
1191
1192 case "multipart/form-data":
1193 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1194
1195 reset($formvars);
1196 while(list($key,$val) = each($formvars)) {
1197 if (is_array($val) || is_object($val)) {
1198 rizwank 1.1 while (list($cur_key, $cur_val) = each($val)) {
1199 $postdata .= "--".$this->_mime_boundary."\r\n";
1200 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1201 $postdata .= "$cur_val\r\n";
1202 }
1203 } else {
1204 $postdata .= "--".$this->_mime_boundary."\r\n";
1205 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1206 $postdata .= "$val\r\n";
1207 }
1208 }
1209
1210 reset($formfiles);
1211 while (list($field_name, $file_names) = each($formfiles)) {
1212 settype($file_names, "array");
1213 while (list(, $file_name) = each($file_names)) {
1214 if (!is_readable($file_name)) continue;
1215
1216 $fp = fopen($file_name, "r");
1217 $file_content = fread($fp, filesize($file_name));
1218 fclose($fp);
1219 rizwank 1.1 $base_name = basename($file_name);
1220
1221 $postdata .= "--".$this->_mime_boundary."\r\n";
1222 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1223 $postdata .= "$file_content\r\n";
1224 }
1225 }
1226 $postdata .= "--".$this->_mime_boundary."--\r\n";
1227 break;
1228 }
1229
1230 return $postdata;
1231 }
1232 }
1233
1234 ?>
|