1 rizwank 1.1 <?php
2 ////////////////////////////////////////////////////////////////
3 /*
4
5 This class give you access to the http header information and
6 provides some help for retrieving and parsing urls.
7
8
9 For the lastest version go to:
10 http://www.phpclasses.org/browse.html/package/803.html
11
12
13 FUNCTIONS:
14 function isURLAvailable($url)
15 function isValidURLFormat($url, $strict=false)
16 function addHTTPtoURL($url)
17 function getHTTPStatusCode($url)
18 function getRealURL ($url, $simple = true, $method = "HEAD")
19 function getHTTPHeader($url)
20 function getMD5FromURL($url, $estFilesize=500000)
21 function getTitle($url)
22 rizwank 1.1
23 function _openHTTPConnection($url, $method = "HEAD")
24
25 ////////////////////////////////////////////////////////////////
26
27 For HTTP Status Codes see:
28 http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
29
30 ////////////////////////////////////////////////////////////////
31
32 This library is free software; you can redistribute it and/or
33 modify it under the terms of the GNU Lesser General Public
34 License as published by the Free Software Foundation; either
35 version 2.1 of the License, or (at your option) any later version.
36
37 This library is distributed in the hope that it will be useful,
38 but WITHOUT ANY WARRANTY; without even the implied warranty of
39 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
40 Lesser General Public License for more details.
41
42 You should have received a copy of the GNU Lesser General Public
43 rizwank 1.1 License along with this library; if not, write to the Free Software
44 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
45
46 ////////////////////////////////////////////////////////////////
47 */
48 /**
49 * Class for accessing URLs and the HTTP data
50 *
51 *
52 * @author Lennart Groetzbach <lennartg_at_web_dot_de>
53 * @copyright Lennart Groetzbach <lennartg_at_web_dot_de> - distributed under the LGPL
54 * @version 1.2 - 2002/12/30
55 * <p>
56 * History / Changes<br>
57 * <table border="1" width="100%" cellpadding="3"><tr>
58 * <th>Version</th> <th>Reported By</th> <th>File / Function Changed</th> <th>Date of Change</th> <th>Commment</th>
59 * </tr><tr>
60 * <td>1.2</td> <td>-</td> <td>-</td> <td>2002/12/30</td> <td>now all functions are static, so you methods can be called 'URLHelper::name()'</td>
61 * </tr><tr>
62 * <td>1.2</td> <td>-</td> <td>getTitle()</td> <td>2002/12/30</td> <td>new method to extract the page title</td>
63 * </tr><tr>
64 rizwank 1.1 * <td>1.1</td> <td>N.Narayanan</td> <td>getHTTPHeader()</td> <td>2002/10/14</td> <td>removed warning for variable</td>
65 * </tr><tr>
66 * <td>1.1</td> <td>M.Oelke</td> <td>_openHTTPConnection()</td> <td>2002/10/14</td> <td>added possibility to access all ports</td>
67 * </tr></table>
68 *
69 * @access public
70 */
71 class URLHelper {
72
73 ////////////////////////////////////////////////////////////////
74 /**
75 * Adds "http://" to url if needed
76 *
77 * @access public
78 * @param String $url the url
79 *
80 * @return String the updated url
81 */
82 function addHTTPtoURL($url) {
83 if ($url != "") {
84 $pos = strpos(strtoupper($url), "HTTP");
85 rizwank 1.1 if ($pos === false) {
86 $url = "http://" . $url ;
87 }
88 }
89 return $url;
90 }
91
92 ////////////////////////////////////////////////////////////////
93 /**
94 * Checks if url is in valid format
95 *
96 * @access public
97 * @param String $url the url
98 * @param boolean $strict stricter checking?
99 *
100 * @return boolean is it valid?
101 */
102
103 function isValidURLFormat($url, $strict=false) {
104 $str="";
105 if ($strict == true) {
106 rizwank 1.1 $str .= "/^http:\\/\\/([A-Za-z-\\.]*)\\//";
107 } else {
108 $str .= "/^http:\\/\\/([A-Za-z-\\.]*)/";
109 }
110 return @preg_match($str, $url);
111 }
112
113 ////////////////////////////////////////////////////////////////
114 /**
115 * Checks if url is in valid format
116 *
117 * @access public
118 * @param String $url the url
119 *
120 * @return boolean does it exist?
121 */
122
123 function isURLAvailable($url) {
124 $fd = @fopen($url, "rb");
125 @fclose($fd);
126 return ($fd != "");
127 rizwank 1.1 }
128
129 ////////////////////////////////////////////////////////////////
130 /**
131 * Checks if url is in valid format
132 *
133 * @access private
134 * @param String $url the url
135 * @param String $method what type of HTTP method
136 *
137 * @return integer file pointegerer
138 */
139
140 function _openHTTPConnection($url, $method = "HEAD") {
141 $info = parse_url($url);
142 if (!array_key_exists('port', $info)) {
143 $info["port"] = 80;
144 }
145 $path = ($info["path"]) ? $info["path"] : "/";
146 if (@$info["query"]) {
147 $path = $path . "?" . $info["query"];
148 rizwank 1.1 }
149 // open connection
150 $conn = fsockopen(@$info["host"], $info["port"], $errno, $errstr);
151 if ($conn) {
152 $host = $info["host"];
153 // send request
154 fwrite ($conn, "$method $path HTTP/1.0\r\nHost: $host\r\n\r\n");
155 }
156 return $conn;
157 }
158
159 ////////////////////////////////////////////////////////////////
160 /**
161 * Returns the HTTP status code
162 *
163 * @access public
164 * @param String $url the url
165 *
166 * @return integer the status code
167 * @link http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html Status Code Definition
168 */
169 rizwank 1.1
170 function getHTTPStatusCode($url) {
171 $count = 0;
172 $conn = URLHelper::_openHTTPConnection($url);
173 if ($conn) {
174 $buffer = fgets($conn, 1028);
175 // extract code
176 $temp = explode(" ", $buffer, 3);
177 $count = $temp[1];
178 fclose($conn);
179 }
180 else {
181 echo "getHTTPStatusCode(): Cannot open connection!<br>\n";
182 return -1;
183 }
184 return $count;
185 }
186
187 ////////////////////////////////////////////////////////////////
188 /**
189 * Returns the "real" URL, if the status code 302 or 405 were sent
190 rizwank 1.1 *
191 * @access public
192 * @param String $url the url
193 * @param boolean $simple try several times to get url?
194 * @param String $method what type of HTTP method?
195 *
196 * @return String the url
197 */
198
199 function getRealURL ($url, $simple = true, $method = "HEAD") {
200 $count = 0;
201 $conn = URLHelper::_openHTTPConnection($url, $method);
202 if ($conn) {
203 $buffer = fgets($conn, 1028);
204 // extract code
205 $temp = explode(" ", $buffer);
206 $count = $temp[1];
207 // is there a redirect?
208 switch ($count) {
209 case '302':
210 do {
211 rizwank 1.1 // find new location
212 $buffer = fgets($conn, 4028);
213 if (eregi("LOCATION:", $buffer)) {
214 $tmp = substr(strstr($buffer, ":"), 1);
215 // is it relative?
216 if (strpos($tmp, '/') == 1)
217 $tmp = $url . substr($tmp, 2);
218 $url = URLHelper::getRealURL(trim($tmp), $simple);
219 break;
220 }
221 } while ($buffer);
222 break;
223 case '405':
224 if (!$simple) {
225 do {
226 $buffer = fgets($conn, 4028);
227 if (eregi("ALLOW:", $buffer)) {
228 $tmp = trim(substr(strstr($buffer, ":"), 1));
229 $allowed = explode(",", $tmp);
230 $url = URLHelper::getRealURL($url, $simple, $allowed[0]);
231 break;
232 rizwank 1.1 }
233 } while ($buffer);
234 }
235 break;
236 }
237 fclose($conn);
238 return $url;
239 }
240 else {
241 echo "getRealURL(): Cannot open connection!<br>\n";
242 return -1;
243 }
244 }
245
246 ////////////////////////////////////////////////////////////////
247 /**
248 * Returns the complete header
249 *
250 * @access public
251 * @param String $url the url
252 *
253 rizwank 1.1 * @return String the header
254 */
255
256 function getHTTPHeader($url) {
257 $header = '';
258 $conn = URLHelper::_openHTTPConnection($url);
259 if ($conn) {
260 do {
261 $buffer = fgets($conn, 1028);
262 $header .= $buffer;
263 } while ($buffer);
264 fclose($conn);
265 }
266 return $header;
267 }
268
269 ////////////////////////////////////////////////////////////////
270 /**
271 * Returns the MD5 hash code of an url
272 *
273 * @access public
274 rizwank 1.1 * @param String $url the url
275 * @param integer $estFilesize the approximate file size
276 *
277 * @return String the hash code
278 */
279
280 function getMD5FromURL($url, $estFilesize=500000){
281 $fd = @fopen($url, "rb");
282 if ($fd){
283 $fileContents = fread($fd, $estFilesize);
284 return md5($fileContents);
285 @fclose($fd);
286 } else {
287 return false;
288 }
289 }
290
291 ////////////////////////////////////////////////////////////////
292 /**
293 * Returns the page title
294 *
295 rizwank 1.1 * @access public
296 * @param String $url the url
297 *
298 * @return mixed title string or false;
299 */
300 function getTitle($url) {
301 $fp = @fopen ($url, 'r');
302 if ($fp) {
303 $page = '';
304 while (!feof($fp)) {
305 $page .= fgets ($fp, 1024);
306 if (stristr($page, '<\title>')) {
307 fclose();
308 break;
309 }
310 }
311 if (eregi("<title>(.*)</title>", $page, $out)) {
312 return $out[1];
313 }
314 return false;
315 }
316 rizwank 1.1 }
317
318 ////////////////////////////////////////////////////////////////
319 }
320 ////////////////////////////////////////////////////////////////
321 ?>
322
|