* @copyright Lennart Groetzbach - distributed under the LGPL * @version 1.2 - 2002/12/30 *

* History / Changes
* * * * * * * * * * *
Version Reported By File / Function Changed Date of Change Commment
1.2 - - 2002/12/30 now all functions are static, so you methods can be called 'URLHelper::name()'
1.2 - getTitle() 2002/12/30 new method to extract the page title
1.1 N.Narayanan getHTTPHeader() 2002/10/14 removed warning for variable
1.1 M.Oelke _openHTTPConnection() 2002/10/14 added possibility to access all ports
* * @access public */ class URLHelper { //////////////////////////////////////////////////////////////// /** * Adds "http://" to url if needed * * @access public * @param String $url the url * * @return String the updated url */ function addHTTPtoURL($url) { if ($url != "") { $pos = strpos(strtoupper($url), "HTTP"); if ($pos === false) { $url = "http://" . $url ; } } return $url; } //////////////////////////////////////////////////////////////// /** * Checks if url is in valid format * * @access public * @param String $url the url * @param boolean $strict stricter checking? * * @return boolean is it valid? */ function isValidURLFormat($url, $strict=false) { $str=""; if ($strict == true) { $str .= "/^http:\\/\\/([A-Za-z-\\.]*)\\//"; } else { $str .= "/^http:\\/\\/([A-Za-z-\\.]*)/"; } return @preg_match($str, $url); } //////////////////////////////////////////////////////////////// /** * Checks if url is in valid format * * @access public * @param String $url the url * * @return boolean does it exist? */ function isURLAvailable($url) { $fd = @fopen($url, "rb"); @fclose($fd); return ($fd != ""); } //////////////////////////////////////////////////////////////// /** * Checks if url is in valid format * * @access private * @param String $url the url * @param String $method what type of HTTP method * * @return integer file pointegerer */ function _openHTTPConnection($url, $method = "HEAD") { $info = parse_url($url); if (!array_key_exists('port', $info)) { $info["port"] = 80; } $path = ($info["path"]) ? $info["path"] : "/"; if (@$info["query"]) { $path = $path . "?" . $info["query"]; } // open connection $conn = fsockopen(@$info["host"], $info["port"], $errno, $errstr); if ($conn) { $host = $info["host"]; // send request fwrite ($conn, "$method $path HTTP/1.0\r\nHost: $host\r\n\r\n"); } return $conn; } //////////////////////////////////////////////////////////////// /** * Returns the HTTP status code * * @access public * @param String $url the url * * @return integer the status code * @link http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html Status Code Definition */ function getHTTPStatusCode($url) { $count = 0; $conn = URLHelper::_openHTTPConnection($url); if ($conn) { $buffer = fgets($conn, 1028); // extract code $temp = explode(" ", $buffer, 3); $count = $temp[1]; fclose($conn); } else { echo "getHTTPStatusCode(): Cannot open connection!
\n"; return -1; } return $count; } //////////////////////////////////////////////////////////////// /** * Returns the "real" URL, if the status code 302 or 405 were sent * * @access public * @param String $url the url * @param boolean $simple try several times to get url? * @param String $method what type of HTTP method? * * @return String the url */ function getRealURL ($url, $simple = true, $method = "HEAD") { $count = 0; $conn = URLHelper::_openHTTPConnection($url, $method); if ($conn) { $buffer = fgets($conn, 1028); // extract code $temp = explode(" ", $buffer); $count = $temp[1]; // is there a redirect? switch ($count) { case '302': do { // find new location $buffer = fgets($conn, 4028); if (eregi("LOCATION:", $buffer)) { $tmp = substr(strstr($buffer, ":"), 1); // is it relative? if (strpos($tmp, '/') == 1) $tmp = $url . substr($tmp, 2); $url = URLHelper::getRealURL(trim($tmp), $simple); break; } } while ($buffer); break; case '405': if (!$simple) { do { $buffer = fgets($conn, 4028); if (eregi("ALLOW:", $buffer)) { $tmp = trim(substr(strstr($buffer, ":"), 1)); $allowed = explode(",", $tmp); $url = URLHelper::getRealURL($url, $simple, $allowed[0]); break; } } while ($buffer); } break; } fclose($conn); return $url; } else { echo "getRealURL(): Cannot open connection!
\n"; return -1; } } //////////////////////////////////////////////////////////////// /** * Returns the complete header * * @access public * @param String $url the url * * @return String the header */ function getHTTPHeader($url) { $header = ''; $conn = URLHelper::_openHTTPConnection($url); if ($conn) { do { $buffer = fgets($conn, 1028); $header .= $buffer; } while ($buffer); fclose($conn); } return $header; } //////////////////////////////////////////////////////////////// /** * Returns the MD5 hash code of an url * * @access public * @param String $url the url * @param integer $estFilesize the approximate file size * * @return String the hash code */ function getMD5FromURL($url, $estFilesize=500000){ $fd = @fopen($url, "rb"); if ($fd){ $fileContents = fread($fd, $estFilesize); return md5($fileContents); @fclose($fd); } else { return false; } } //////////////////////////////////////////////////////////////// /** * Returns the page title * * @access public * @param String $url the url * * @return mixed title string or false; */ function getTitle($url) { $fp = @fopen ($url, 'r'); if ($fp) { $page = ''; while (!feof($fp)) { $page .= fgets ($fp, 1024); if (stristr($page, '<\title>')) { fclose(); break; } } if (eregi("(.*)", $page, $out)) { return $out[1]; } return false; } } //////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////// ?>