⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 linkcheck.php

📁 PHP4_0入门与提高源程序代码
💻 PHP
字号:
<?
//各种状态的说明文本
$text['N/A'] = "Ikke HTTP";
$text[OK]    = "Valid hostname";
$text[FEJL]  = "Invalid hostname";
$text[D鴇]   = "No responce";
$text[100]   = "Continue";
$text[101]   = "Switching Protocols";
$text[200]   = "OK";
$text[201]   = "Created";
$text[202]   = "Accepted";
$text[203]   = "Non-Authoritative Information";
$text[204]   = "No Content";
$text[205]   = "Reset Content";
$text[206]   = "Partial Content";
$text[300]   = "Multiple Choices";
$text[301]   = "Moved Permanently";
$text[302]   = "Found";
$text[303]   = "See Other";
$text[304]   = "Not Modified";
$text[305]   = "Use Proxy";
$text[307]   = "Temporary Redirect";
$text[400]   = "Bad Request";
$text[401]   = "Unauthorized";
$text[402]   = "Payment Required";
$text[403]   = "Forbidden";
$text[404]   = "Not Found";
$text[405]   = "Method Not Allowed";
$text[406]   = "Not Acceptable";
$text[407]   = "Proxy Authentication Required";
$text[408]   = "Request Timeout";
$text[409]   = "Conflict";
$text[410]   = "Gone";
$text[411]   = "Length Required";
$text[412]   = "Precondition Failed";
$text[413]   = "Request Entity Too Large";
$text[414]   = "Request-URI Too Long";
$text[415]   = "Unsupported Media Type";
$text[416]   = "Requested Range Not Satisfiable";
$text[417]   = "Expectation Failed";
$text[500]   = "Internal Server Error";
$text[501]   = "Not Implemented";
$text[502]   = "Bad Gateway";
$text[503]   = "Service Unavailable";
$text[504]   = "Gateway Timeout";
$text[505]   = "HTTP Version Not Supported";

//根据$base和$path得出正确的路径
function specialconcat($base,$path) {
	$base = ereg_replace("(.*/)[^/]*","\\1", $base);
	$path = ereg_replace("^(\.){1}/", "", $path);
	if (ereg("^/", $path)) {
	   $base = ereg_replace("^(http://([^/]+))/{1}(.*)", "\\1", $base);
	}
	return $base.$path;
}

//遍历数组并返回数组中的各个元素的值
function sortarray($arr) {
   if (count($arr) == 0) return $arr;  //如果$arr为空,返回空数组$arr
   reset($arr);	//重置数组中的指针到起始位置
   //把数组$arr中的key和value颠倒并保存在$newarr中
   while (list($key,$value) = each($arr)) $newarr[$value] = $key;  
   reset($newarr);  //重置数组$newarr中的指针到起始位置
   //把$newarr中的关键字,即$arr的元素值保存到$sortedarr数组中
   while (list($key,$value) = each($newarr)) $sortedarr[] = $key;  
   return $sortedarr;	//返回这些关键字
}

function firstArd($url) {
   $urlArray = parse_url($url);	//取得$url的相关信息
   if (!$urlArray[port]) $urlArray[port] = "80";	//如果没有端口号则设为80
   if (!$urlArray[path]) $urlArray[path] = "/";		//如果没有路径则设为/
   if ($urlArray[query]) $urlArray[path] .= "?$urlArray[query]";	//如果包含查询字符串则附加到路径中
   $sock = fsockopen($urlArray[host], $urlArray[port]);		//建立与$url的连接,并返回文件描述符供fputs()使用
   if ($sock) {	//如果连接建立成功
      $dump .= "GET $urlArray[path] HTTP/1.1\r\n";
      $dump .= "Host: $urlArray[host]\r\nConnection: close\r\n";
      $dump .= "Connection: close\r\n\r\n";
      fputs($sock, $dump);	//把$dump字符串写入到$sock中
	   while($str = fgets($sock, 1024)) $headers[] = $str;	//读取$sock的前1k字符并保存到数组$headers中
	   fclose($sock);	//关闭连接
      flush();	//清空
	   for($i=0; $i<count($headers); $i++) {
         if (eregi("^HTTP/[0-9]+\.[0-9]+ 200", $headers[$i])) $location = $url;
         if (eregi("^Location: ", $headers[$i])) $location = eregi_replace("^Location:( )?", "", $headers[$i]);
	   }
   }
   $location = trim($location);
   return $location;
}

//检查$url状态
function check($url) {
   if (!eregi("^http://", $url)) {	//如果$rul不是以http://开头
      if (eregi("^mailto:", $url)) {	//如果$url以mailto:开头
	     $url = trim(eregi_replace("^mailto:(.+)", "\\1", $url));	//去掉mailto:字符串,并去掉多余空格
		 list($brugernavn, $host) = split("@", $url);	//把email按照@分开为两个部分
		 $dnsCheck = checkdnsrr($host,"MX");	//检查邮件服务器是否可用
		 if ($dnsCheck) $return[code] = "OK";
		 else $return[code] = "ERROR";
	  }
      else $return[code] = "N/A";
   }
   //如果$url确实以http://开头
   else {
      $urlArray = parse_url($url);	//取得$url的相关信息
         if (!$urlArray[port]) $urlArray[port] = "80";	//如果没有端口信息则设为80
         if (!$urlArray[path]) $urlArray[path] = "/";	//如果没有路径则设为/
         $sock = fsockopen($urlArray[host], $urlArray[port], &$errnum, &$errstr);	//建立与$url的连接
         if (!$sock) $return[code] = "D鴇";
         else {
            $dump .= "GET $urlArray[path] HTTP/1.1\r\n";
            $dump .= "Host: $urlArray[host]\r\nConnection: close\r\n";
            $dump .= "Connection: close\r\n\r\n";
            fputs($sock, $dump);
	        while($str = fgets($sock, 1024)) {
			  //取得状态代码
	          if (eregi("^http/[0-9]+.[0-9]+ ([0-9]{3}) [a-z ]*", $str)) $return[code]        = trim(eregi_replace("^http/[0-9]+.[0-9]+ ([0-9]{3}) [a-z ]*", "\\1", $str));	
			   //取得内容类型
		       if (eregi("^Content-Type: ", $str))                        $return[contentType] = trim(eregi_replace("^Content-Type: ", "", $str));
	        }
	        fclose($sock);
            flush();
         }
   }
	  return $return;
}

function liste($url) {
   global $Comments;
   global $otherLinks;
   global $removeQ;
   $text = implode("", file($url));	//把包含$url内容的数组连成一个字符串
   $text = eregi_replace("<!--([^-]|-[^-]|--[^>])*-->","", $text);	//把字符串中<!-- -->的内容去掉
   
//把得到的字符串中包含的链接提取出来并保存到$regs数组中
//每次执行eregi()函数就提取出一个,并返回去掉提取出来的字符后的字符串
//把每次提取出来的链接保存到数组$mylist中
   while (eregi("[:space:]*(href|src)[:space:]*=[:space:]*([^ >]+)", $text, $regs)) {
      $regs[2] = ereg_replace("\"", "", $regs[2]); //去掉反斜杠
      $regs[2] = ereg_replace("'", "", $regs[2]);  //去掉单引号'
      if ($removeQ) $mylist[] = ereg_replace("\?.*$", "", $regs[2]); //去掉?后面的字符 
      else $mylist[] = ereg_replace("#.*$", "", $regs[2]);  //去掉#后面的字符
      $text = substr($text, strpos($text, $regs[1]) + strlen($regs[1]));  //返回去掉一个链接后剩下的字符串
   }

   //处理数组$mylist中的每一个元素,并把结果保存到$return数组中
   $mylist = sortarray($mylist);
   for($i=0; $i<count($mylist); $i++) {
      $temp = "";
      if (!eregi("^(mailto|news|javascript|ftp)+:(//)?", $mylist[$i])) {
	  //如果$mylist[$i]中不包含http://则根据$url和$mylist[$i]的出路径
         if (!eregi("^http://", $mylist[$i])) $temp = specialconcat($url, $mylist[$i]); 
		 else $temp = $mylist[$i];
      }
	  else {
	  	//如果$otherLinks被选中,就把mailto,news等链接加入到$return数组中
	     if ($otherLinks) $temp = $mylist[$i];	
	  }
	  if ($temp && $temp != $url) $return[] = $temp;
   }
   if (count($return) != 0) return $return;
   else return false;
}
//如果$url不为空并且不是以http://开头,则在它的前面加上http://
if ($url && !eregi("^http://", $url)) $url = "http://$url";	


if ($url && (eregi("^http://[0-9a-z.-@:]+", $url) || !eregi("^http://.*/.*[|><]", $url))) {
   if ($removeQ) $url = ereg_replace("\?.*$", "", $url);	//如果$removeQ选中,则去掉查询字符串
   $urlArray = parse_url($url);	//取得$url的信息
   if (!$urlArray[port]) $urlArray[port] = "80";	//如果没有端口信息,则默认为80
   if (!$urlArray[path]) $urlArray[path] = "/";		//如果没有路径,则设为/
   if ($urlArray[query]) $urlArray[path] .= "?$urlArray[query]";	
   $uri = "http://".$extra.$urlArray[host].$urlArray[path];
   while($uri != firstArd($uri) && $trin++ < 5) {
      $uri = firstArd($uri);
	  $steps[] = $uri;
   }
}

?>

<html>

<head>
<title>链接有效性检查</title>
<style type="text/css">
body {
  background-color: rgb(255,255,255);
}

td, p, li, .p {
  font-family:verdana, sans-serif;
  font-size:10pt;
}

th {
  font-family:verdana, sans-serif;
  font-size:10pt;
  background-color:rgb(0,0,0);
  color:rgb(255,255,255);
}

h1 {
  font-family:verdana, sans-serif;
}
</style>
</head>

<body>

<h1>链接有效性检查</h1>

<? if ($url) { ?>

   <table width="100%">

   <tr><td valign="top">

<? } ?>
//输出表单
<table>
 <form action="<? print basename($PHP_SELF) ?>" name="submitForm">
  <tr><td colspan="2"><input name="url" size="40" class="oneline" value="<? $uri ? print $uri : print $url ?>"></td></tr>
  <tr><td><input type="checkbox" name="removeQ" value="1" <? if ($removeQ) print "checked"; ?> ></td><td>Remove querystring</td></tr>
  <tr><td><input type="checkbox" name="otherLinks" value="1" <? if ($otherLinks) print "checked"; ?>></td><td>Other links</td></tr>
  <tr><td>&nbsp;</td><td><input type="submit" value="  Check  " class="button"> <input type="reset" value="  Reset  " class="button">  </td></tr>
 </form>
</table>

<? if ($url) { ?>

   </td><td valign="top" rowspan="2">

<? } ?>

<?

//输出各个连接的状态表
if ($uri) {
   $liste = liste($uri);
   if (is_array($liste)) {
      print "<table border=\"1\" bordercolor=\"black\" cellspacing=\"0\">\n";
      print "<tr><th>Status</th><th>Description</th><th>URL</th></tr>";
      for($i=0; $i<count($liste); $i++) {

	     if ($i == count($liste)-1) $printTemp = $uri;

		 else {
			$procent = number_format($i*100/count($liste),0,".","");	//格式化输出百分数
		    $printTemp = "$procent% - $liste[$i]";
	     }

       $check = check($liste[$i]);	//检查$liste[$i]的状态
		 $code = $check[code];
		 $check[contentType] ? $contentType = ereg_replace(";.*$", "", $check[contentType]) : $contentType = "Ukendt";
		 $statCode[$code]++;	//数组$statCode保存了各种状态的个数
		 $statContentType[$contentType]++;	//数组$statContentType保存了各种内容类型的个数
         print "<tr>
		 <td style=\"font-size:80%;\" nowrap>$code</td>
		 <td style=\"font-size:80%;\" nowrap>$text[$code]</td>
		 <td style=\"font-size:80%;\" nowrap>";
		 if (eregi("^text/html", $contentType) && ereg("^(2|3)+[0-9]{2}", $code)) {
		    print "<a href=\"./".basename($PHP_SELF)."?url=".rawurlencode($liste[$i])."\">".rawurldecode($liste[$i])."</a>";
		 }
		 else print rawurldecode($liste[$i]);
		 print "</td nowrap>
		 </tr>\n";
      }

      print "</table>\n";
   }
   else print "<p><strong>I didn't find any links.</strong></p>";

   print '</td></tr><tr><td valign="top">';


   if (count($steps) >= 1) {
      print "<p><strong>Passerede</strong></p>";
	  print "<ol style=\"font-size:80%;\">";
      for($i=0; $i<count($steps); $i++) print "<li><p>$steps[$i]</li>\n";
	  print "</ol>";
   }
//输出各个状态所占的比例
   if (count($statCode) >= 1) {
      while(list($key, $value) = each($statCode)) {
		 $procent = ereg_replace('(\.)?0+$', '', number_format(($value*100/count($liste)),2,".",""));
		 $space = "";
		 for($i=0; $i<$procent/3; $i++) $space .= "&nbsp;";
         $print_statsCode .= "<tr>
		 <td>$text[$key]</td>
		 <td style=\"text-align:right;\">$value</td>
		 <td><span style=\"background-color:navy;\">$space</span>&nbsp;$procent%</td>
		 </tr>\n";
      }
	  print "<p><strong>Responce codes</strong></p>";
	  print "<table xborder=\"1\" bordercolor=\"black\" cellspacing=\"0\">";
	  print "<tr><th nowrap>Status&nbsp;&nbsp;</th>
	             <th nowrap>Number&nbsp;&nbsp;</th>
				 <th nowrap>Percent</th></tr>";
	  print $print_statsCode;
	  print "</table>";
   }

   //输出各个内容类型所占的比例
   if (count($statContentType) >= 1) {
      while(list($key, $value) = each($statContentType)) {
	  	 //去除小数部分
		 $procent = ereg_replace('(\.)?0+$', '', number_format(($value*100/count($liste)),2,".",""));	
		 $space = "";
		 for($i=0; $i<$procent/3; $i++) $space .= "&nbsp;";
         $print_statsContent .= "<tr>
		 <td>$key</td>
		 <td style=\"text-align:right;\">$value</td>
		 <td><span style=\"background-color:navy;\">$space</span>&nbsp;$procent%</td>
		 </tr>\n";
      }
	  print "<p><strong>Content-Type</strong></p>";
	  print "<table xborder=\"1\" bordercolor=\"black\" cellspacing=\"0\">";
	  print "<tr><th nowrap>Content-Type&nbsp;&nbsp;</th>
	             <th nowrap>Number&nbsp;&nbsp;</th>
				 <th nowrap>Percent</th></tr>";
	  print $print_statsContent;
	  print "</table>";
   }
   print '</td></tr>';
}
if ($url && !$uri) print "<div align=\"center\"><p><strong>Invalid adresse.</strong></p></div>";
?>
<? if ($url) { ?>
   </td></tr>
<? } ?>
</table>
</body>
</html>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -