(file) Return to URLHelper.php CVS log (file) (dir) Up to [RizwankCVS] / geekymedia_web

  1 rizwank 1.1 <?php
  2             ////////////////////////////////////////////////////////////////
  3             /*
  4             
  5             This class give you access to the http header information and 
  6             provides some help for retrieving and parsing urls.
  7             
  8             
  9             For the lastest version go to:
 10             http://www.phpclasses.org/browse.html/package/803.html
 11             
 12             
 13             FUNCTIONS:
 14                 function isURLAvailable($url)
 15                 function isValidURLFormat($url, $strict=false)
 16                 function addHTTPtoURL($url)
 17                 function getHTTPStatusCode($url)
 18                 function getRealURL ($url, $simple = true, $method = "HEAD")
 19                 function getHTTPHeader($url)
 20                 function getMD5FromURL($url, $estFilesize=500000)
 21                 function getTitle($url)
 22 rizwank 1.1 
 23                 function _openHTTPConnection($url, $method = "HEAD")
 24             
 25             ////////////////////////////////////////////////////////////////
 26             
 27             For HTTP Status Codes see:
 28                 http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
 29             
 30             ////////////////////////////////////////////////////////////////
 31             
 32                 This library is free software; you can redistribute it and/or
 33                 modify it under the terms of the GNU Lesser General Public
 34                 License as published by the Free Software Foundation; either
 35                 version 2.1 of the License, or (at your option) any later version.
 36                 
 37                 This library is distributed in the hope that it will be useful,
 38                 but WITHOUT ANY WARRANTY; without even the implied warranty of
 39                 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 40                 Lesser General Public License for more details.
 41                 
 42                 You should have received a copy of the GNU Lesser General Public
 43 rizwank 1.1     License along with this library; if not, write to the Free Software
 44                 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 45             
 46             ////////////////////////////////////////////////////////////////
 47             */
 48             /**
 49             * Class for accessing URLs and the HTTP data
 50             *
 51             *
 52             * @author	    Lennart Groetzbach <lennartg_at_web_dot_de>
 53             * @copyright	Lennart Groetzbach <lennartg_at_web_dot_de> - distributed under the LGPL
 54             * @version 	    1.2 - 2002/12/30
 55             * <p>
 56             * History / Changes<br>
 57             * <table border="1" width="100%" cellpadding="3"><tr>
 58             *       <th>Version</th>    <th>Reported By</th>        <th>File / Function Changed</th>    <th>Date of Change</th> <th>Commment</th>
 59             * </tr><tr>
 60             *       <td>1.2</td>        <td>-</td>        <td>-</td>            <td>2002/12/30</td>     <td>now all functions are static, so you methods can be called 'URLHelper::name()'</td>
 61             * </tr><tr>
 62             *       <td>1.2</td>        <td>-</td>        <td>getTitle()</td>            <td>2002/12/30</td>     <td>new method to extract the page title</td>
 63             * </tr><tr>
 64 rizwank 1.1 *       <td>1.1</td>        <td>N.Narayanan</td>        <td>getHTTPHeader()</td>            <td>2002/10/14</td>     <td>removed warning for variable</td>
 65             * </tr><tr>
 66             *       <td>1.1</td>        <td>M.Oelke</td>            <td>_openHTTPConnection()</td>      <td>2002/10/14</td>     <td>added possibility to access all ports</td>
 67             * </tr></table>
 68             *
 69             * @access       public
 70             */
 71             class URLHelper {
 72             
 73             ////////////////////////////////////////////////////////////////
 74             /**
 75             * Adds "http://" to url if needed
 76             * 
 77             * @access   public
 78             * @param	String      $url    the url
 79             *
 80             * @return   String      the updated url
 81             */
 82             function addHTTPtoURL($url) {
 83             	if ($url != "") {
 84             		$pos = strpos(strtoupper($url), "HTTP");
 85 rizwank 1.1 		if ($pos === false) {
 86             			$url = "http://" . $url ;
 87             		}
 88             	}
 89             	return $url;
 90             }
 91             
 92             ////////////////////////////////////////////////////////////////
 93             /**
 94             * Checks if url is in valid format
 95             * 
 96             * @access   public
 97             * @param	String      $url    the url
 98             * @param	boolean     $strict    stricter checking?
 99             *
100             * @return   boolean     is it valid?
101             */
102             
103             function isValidURLFormat($url, $strict=false) {
104                 $str="";
105             	if ($strict == true) {
106 rizwank 1.1 		$str .= "/^http:\\/\\/([A-Za-z-\\.]*)\\//";
107             	} else {
108             		$str .= "/^http:\\/\\/([A-Za-z-\\.]*)/";
109             	}
110             	return @preg_match($str, $url);
111             }
112             
113             ////////////////////////////////////////////////////////////////
114             /**
115             * Checks if url is in valid format
116             * 
117             * @access   public
118             * @param	String      $url    the url
119             *
120             * @return   boolean     does it exist?
121             */
122             
123             function isURLAvailable($url) {
124                 $fd = @fopen($url, "rb");
125                 @fclose($fd);
126                 return ($fd != "");
127 rizwank 1.1 }
128             
129             ////////////////////////////////////////////////////////////////
130             /**
131             * Checks if url is in valid format
132             * 
133             * @access   private
134             * @param	String      $url    the url
135             * @param	String      $method what type of HTTP method
136             *
137             * @return   integer         file pointegerer
138             */
139             
140             function _openHTTPConnection($url, $method = "HEAD") {
141                 $info = parse_url($url); 
142                 if (!array_key_exists('port', $info)) { 
143                     $info["port"] = 80;
144                 }
145                 $path = ($info["path"]) ? $info["path"] : "/"; 
146                 if (@$info["query"]) {
147                     $path = $path . "?" . $info["query"]; 
148 rizwank 1.1     }
149                 // open connection
150                 $conn = fsockopen(@$info["host"], $info["port"], $errno, $errstr); 
151                 if ($conn) { 
152                 	$host = $info["host"];
153                     // send request
154                 	fwrite ($conn, "$method $path HTTP/1.0\r\nHost: $host\r\n\r\n"); 
155                 }
156                 return $conn;
157             }
158             
159             ////////////////////////////////////////////////////////////////
160             /**
161             * Returns the HTTP status code
162             * 
163             * @access   public
164             * @param	String      $url    the url
165             *
166             * @return   integer         the status code
167             * @link     http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html  Status Code Definition
168             */
169 rizwank 1.1 
170             function getHTTPStatusCode($url) {
171                 $count = 0;
172                 $conn = URLHelper::_openHTTPConnection($url);
173                 if ($conn) {
174                 	$buffer = fgets($conn, 1028);
175                     // extract code
176                     $temp = explode(" ", $buffer, 3);
177                     $count = $temp[1];
178                     fclose($conn);
179                 }
180                 else {
181                     echo "getHTTPStatusCode(): Cannot open connection!<br>\n";
182                     return -1;
183                 }
184                 return $count;
185             }
186             
187             ////////////////////////////////////////////////////////////////
188             /**
189             * Returns the "real" URL, if the status code 302 or 405 were sent
190 rizwank 1.1 * 
191             * @access   public
192             * @param	String      $url    the url
193             * @param	boolean     $simple try several times to get url?
194             * @param	String      $method what type of HTTP method?
195             *
196             * @return   String      the url
197             */
198             
199             function getRealURL ($url, $simple = true, $method = "HEAD") {
200                 $count = 0;
201                 $conn = URLHelper::_openHTTPConnection($url, $method);
202                 if ($conn) {
203                 	$buffer = fgets($conn, 1028);
204                     // extract code
205                     $temp = explode(" ", $buffer);
206                     $count = $temp[1];
207                     // is there a redirect?
208                     switch ($count) {
209                         case '302':
210                             do {
211 rizwank 1.1                     // find new location
212                                	$buffer = fgets($conn, 4028);
213                                 if (eregi("LOCATION:", $buffer)) {
214                                     $tmp = substr(strstr($buffer, ":"), 1);
215                                     // is it relative?
216                                     if (strpos($tmp, '/') == 1)
217                                         $tmp = $url . substr($tmp, 2);
218                                     $url = URLHelper::getRealURL(trim($tmp), $simple);
219                                     break;
220                                 }
221                             } while ($buffer);
222                             break;
223                         case '405':
224                             if (!$simple) {
225                                 do {
226                                    	$buffer = fgets($conn, 4028);
227                                     if (eregi("ALLOW:", $buffer)) {
228                                         $tmp = trim(substr(strstr($buffer, ":"), 1));
229                                         $allowed = explode(",", $tmp);
230                                         $url = URLHelper::getRealURL($url, $simple, $allowed[0]);
231                                         break;
232 rizwank 1.1                         }
233                                 } while ($buffer);
234                             }
235                             break;
236                     }
237                     fclose($conn);
238                     return $url;
239                 }
240                 else {
241                     echo "getRealURL(): Cannot open connection!<br>\n";
242                     return -1;
243                 }
244             }
245             
246             ////////////////////////////////////////////////////////////////
247             /**
248             * Returns the complete header
249             * 
250             * @access   public
251             * @param	String      $url    the url
252             *
253 rizwank 1.1 * @return   String      the header
254             */
255             
256             function getHTTPHeader($url) {
257                 $header = '';
258                 $conn = URLHelper::_openHTTPConnection($url);
259                 if ($conn) {
260                     do {
261                         $buffer = fgets($conn, 1028);
262                         $header .= $buffer;
263                     } while ($buffer);
264                     fclose($conn);
265                 }
266                 return $header;
267             }
268             
269             ////////////////////////////////////////////////////////////////
270             /**
271             * Returns the MD5 hash code of an url
272             * 
273             * @access   public
274 rizwank 1.1 * @param	String      $url    the url
275             * @param	integer         $estFilesize    the approximate file size
276             *
277             * @return   String      the hash code
278             */
279             
280             function getMD5FromURL($url, $estFilesize=500000){
281                 $fd = @fopen($url, "rb");
282                 if ($fd){
283                     $fileContents = fread($fd, $estFilesize);
284                     return md5($fileContents);
285                     @fclose($fd);
286                 } else {
287                     return false;
288                 }
289             }
290             
291             ////////////////////////////////////////////////////////////////
292             /**
293             * Returns the page title
294             * 
295 rizwank 1.1 * @access   public
296             * @param	String      $url    the url
297             *
298             * @return   mixed      title string or false;
299             */
300             function getTitle($url) {
301                 $fp = @fopen ($url, 'r'); 
302                 if ($fp) {
303                     $page = '';
304                     while (!feof($fp)) { 
305                         $page .= fgets ($fp, 1024); 
306                         if (stristr($page, '<\title>')) { 
307                              fclose();
308                              break; 
309                         } 
310                     } 
311                     if (eregi("<title>(.*)</title>", $page, $out)) { 
312                         return $out[1]; 
313                     } 
314                 return false; 
315                 }
316 rizwank 1.1 }
317             
318             ////////////////////////////////////////////////////////////////
319             }
320             ////////////////////////////////////////////////////////////////
321             ?>
322             

Rizwan Kassim
Powered by
ViewCVS 0.9.2