📄 linkcheck.php
字号:
<?
//各种状态的说明文本
$text['N/A'] = "Ikke HTTP";
$text[OK] = "Valid hostname";
$text[FEJL] = "Invalid hostname";
$text[D鴇] = "No responce";
$text[100] = "Continue";
$text[101] = "Switching Protocols";
$text[200] = "OK";
$text[201] = "Created";
$text[202] = "Accepted";
$text[203] = "Non-Authoritative Information";
$text[204] = "No Content";
$text[205] = "Reset Content";
$text[206] = "Partial Content";
$text[300] = "Multiple Choices";
$text[301] = "Moved Permanently";
$text[302] = "Found";
$text[303] = "See Other";
$text[304] = "Not Modified";
$text[305] = "Use Proxy";
$text[307] = "Temporary Redirect";
$text[400] = "Bad Request";
$text[401] = "Unauthorized";
$text[402] = "Payment Required";
$text[403] = "Forbidden";
$text[404] = "Not Found";
$text[405] = "Method Not Allowed";
$text[406] = "Not Acceptable";
$text[407] = "Proxy Authentication Required";
$text[408] = "Request Timeout";
$text[409] = "Conflict";
$text[410] = "Gone";
$text[411] = "Length Required";
$text[412] = "Precondition Failed";
$text[413] = "Request Entity Too Large";
$text[414] = "Request-URI Too Long";
$text[415] = "Unsupported Media Type";
$text[416] = "Requested Range Not Satisfiable";
$text[417] = "Expectation Failed";
$text[500] = "Internal Server Error";
$text[501] = "Not Implemented";
$text[502] = "Bad Gateway";
$text[503] = "Service Unavailable";
$text[504] = "Gateway Timeout";
$text[505] = "HTTP Version Not Supported";
//根据$base和$path得出正确的路径
function specialconcat($base,$path) {
$base = ereg_replace("(.*/)[^/]*","\\1", $base);
$path = ereg_replace("^(\.){1}/", "", $path);
if (ereg("^/", $path)) {
$base = ereg_replace("^(http://([^/]+))/{1}(.*)", "\\1", $base);
}
return $base.$path;
}
//遍历数组并返回数组中的各个元素的值
function sortarray($arr) {
if (count($arr) == 0) return $arr; //如果$arr为空,返回空数组$arr
reset($arr); //重置数组中的指针到起始位置
//把数组$arr中的key和value颠倒并保存在$newarr中
while (list($key,$value) = each($arr)) $newarr[$value] = $key;
reset($newarr); //重置数组$newarr中的指针到起始位置
//把$newarr中的关键字,即$arr的元素值保存到$sortedarr数组中
while (list($key,$value) = each($newarr)) $sortedarr[] = $key;
return $sortedarr; //返回这些关键字
}
function firstArd($url) {
$urlArray = parse_url($url); //取得$url的相关信息
if (!$urlArray[port]) $urlArray[port] = "80"; //如果没有端口号则设为80
if (!$urlArray[path]) $urlArray[path] = "/"; //如果没有路径则设为/
if ($urlArray[query]) $urlArray[path] .= "?$urlArray[query]"; //如果包含查询字符串则附加到路径中
$sock = fsockopen($urlArray[host], $urlArray[port]); //建立与$url的连接,并返回文件描述符供fputs()使用
if ($sock) { //如果连接建立成功
$dump .= "GET $urlArray[path] HTTP/1.1\r\n";
$dump .= "Host: $urlArray[host]\r\nConnection: close\r\n";
$dump .= "Connection: close\r\n\r\n";
fputs($sock, $dump); //把$dump字符串写入到$sock中
while($str = fgets($sock, 1024)) $headers[] = $str; //读取$sock的前1k字符并保存到数组$headers中
fclose($sock); //关闭连接
flush(); //清空
for($i=0; $i<count($headers); $i++) {
if (eregi("^HTTP/[0-9]+\.[0-9]+ 200", $headers[$i])) $location = $url;
if (eregi("^Location: ", $headers[$i])) $location = eregi_replace("^Location:( )?", "", $headers[$i]);
}
}
$location = trim($location);
return $location;
}
//检查$url状态
function check($url) {
if (!eregi("^http://", $url)) { //如果$rul不是以http://开头
if (eregi("^mailto:", $url)) { //如果$url以mailto:开头
$url = trim(eregi_replace("^mailto:(.+)", "\\1", $url)); //去掉mailto:字符串,并去掉多余空格
list($brugernavn, $host) = split("@", $url); //把email按照@分开为两个部分
$dnsCheck = checkdnsrr($host,"MX"); //检查邮件服务器是否可用
if ($dnsCheck) $return[code] = "OK";
else $return[code] = "ERROR";
}
else $return[code] = "N/A";
}
//如果$url确实以http://开头
else {
$urlArray = parse_url($url); //取得$url的相关信息
if (!$urlArray[port]) $urlArray[port] = "80"; //如果没有端口信息则设为80
if (!$urlArray[path]) $urlArray[path] = "/"; //如果没有路径则设为/
$sock = fsockopen($urlArray[host], $urlArray[port], &$errnum, &$errstr); //建立与$url的连接
if (!$sock) $return[code] = "D鴇";
else {
$dump .= "GET $urlArray[path] HTTP/1.1\r\n";
$dump .= "Host: $urlArray[host]\r\nConnection: close\r\n";
$dump .= "Connection: close\r\n\r\n";
fputs($sock, $dump);
while($str = fgets($sock, 1024)) {
//取得状态代码
if (eregi("^http/[0-9]+.[0-9]+ ([0-9]{3}) [a-z ]*", $str)) $return[code] = trim(eregi_replace("^http/[0-9]+.[0-9]+ ([0-9]{3}) [a-z ]*", "\\1", $str));
//取得内容类型
if (eregi("^Content-Type: ", $str)) $return[contentType] = trim(eregi_replace("^Content-Type: ", "", $str));
}
fclose($sock);
flush();
}
}
return $return;
}
function liste($url) {
global $Comments;
global $otherLinks;
global $removeQ;
$text = implode("", file($url)); //把包含$url内容的数组连成一个字符串
$text = eregi_replace("<!--([^-]|-[^-]|--[^>])*-->","", $text); //把字符串中<!-- -->的内容去掉
//把得到的字符串中包含的链接提取出来并保存到$regs数组中
//每次执行eregi()函数就提取出一个,并返回去掉提取出来的字符后的字符串
//把每次提取出来的链接保存到数组$mylist中
while (eregi("[:space:]*(href|src)[:space:]*=[:space:]*([^ >]+)", $text, $regs)) {
$regs[2] = ereg_replace("\"", "", $regs[2]); //去掉反斜杠
$regs[2] = ereg_replace("'", "", $regs[2]); //去掉单引号'
if ($removeQ) $mylist[] = ereg_replace("\?.*$", "", $regs[2]); //去掉?后面的字符
else $mylist[] = ereg_replace("#.*$", "", $regs[2]); //去掉#后面的字符
$text = substr($text, strpos($text, $regs[1]) + strlen($regs[1])); //返回去掉一个链接后剩下的字符串
}
//处理数组$mylist中的每一个元素,并把结果保存到$return数组中
$mylist = sortarray($mylist);
for($i=0; $i<count($mylist); $i++) {
$temp = "";
if (!eregi("^(mailto|news|javascript|ftp)+:(//)?", $mylist[$i])) {
//如果$mylist[$i]中不包含http://则根据$url和$mylist[$i]的出路径
if (!eregi("^http://", $mylist[$i])) $temp = specialconcat($url, $mylist[$i]);
else $temp = $mylist[$i];
}
else {
//如果$otherLinks被选中,就把mailto,news等链接加入到$return数组中
if ($otherLinks) $temp = $mylist[$i];
}
if ($temp && $temp != $url) $return[] = $temp;
}
if (count($return) != 0) return $return;
else return false;
}
//如果$url不为空并且不是以http://开头,则在它的前面加上http://
if ($url && !eregi("^http://", $url)) $url = "http://$url";
if ($url && (eregi("^http://[0-9a-z.-@:]+", $url) || !eregi("^http://.*/.*[|><]", $url))) {
if ($removeQ) $url = ereg_replace("\?.*$", "", $url); //如果$removeQ选中,则去掉查询字符串
$urlArray = parse_url($url); //取得$url的信息
if (!$urlArray[port]) $urlArray[port] = "80"; //如果没有端口信息,则默认为80
if (!$urlArray[path]) $urlArray[path] = "/"; //如果没有路径,则设为/
if ($urlArray[query]) $urlArray[path] .= "?$urlArray[query]";
$uri = "http://".$extra.$urlArray[host].$urlArray[path];
while($uri != firstArd($uri) && $trin++ < 5) {
$uri = firstArd($uri);
$steps[] = $uri;
}
}
?>
<html>
<head>
<title>链接有效性检查</title>
<style type="text/css">
body {
background-color: rgb(255,255,255);
}
td, p, li, .p {
font-family:verdana, sans-serif;
font-size:10pt;
}
th {
font-family:verdana, sans-serif;
font-size:10pt;
background-color:rgb(0,0,0);
color:rgb(255,255,255);
}
h1 {
font-family:verdana, sans-serif;
}
</style>
</head>
<body>
<h1>链接有效性检查</h1>
<? if ($url) { ?>
<table width="100%">
<tr><td valign="top">
<? } ?>
//输出表单
<table>
<form action="<? print basename($PHP_SELF) ?>" name="submitForm">
<tr><td colspan="2"><input name="url" size="40" class="oneline" value="<? $uri ? print $uri : print $url ?>"></td></tr>
<tr><td><input type="checkbox" name="removeQ" value="1" <? if ($removeQ) print "checked"; ?> ></td><td>Remove querystring</td></tr>
<tr><td><input type="checkbox" name="otherLinks" value="1" <? if ($otherLinks) print "checked"; ?>></td><td>Other links</td></tr>
<tr><td> </td><td><input type="submit" value=" Check " class="button"> <input type="reset" value=" Reset " class="button"> </td></tr>
</form>
</table>
<? if ($url) { ?>
</td><td valign="top" rowspan="2">
<? } ?>
<?
//输出各个连接的状态表
if ($uri) {
$liste = liste($uri);
if (is_array($liste)) {
print "<table border=\"1\" bordercolor=\"black\" cellspacing=\"0\">\n";
print "<tr><th>Status</th><th>Description</th><th>URL</th></tr>";
for($i=0; $i<count($liste); $i++) {
if ($i == count($liste)-1) $printTemp = $uri;
else {
$procent = number_format($i*100/count($liste),0,".",""); //格式化输出百分数
$printTemp = "$procent% - $liste[$i]";
}
$check = check($liste[$i]); //检查$liste[$i]的状态
$code = $check[code];
$check[contentType] ? $contentType = ereg_replace(";.*$", "", $check[contentType]) : $contentType = "Ukendt";
$statCode[$code]++; //数组$statCode保存了各种状态的个数
$statContentType[$contentType]++; //数组$statContentType保存了各种内容类型的个数
print "<tr>
<td style=\"font-size:80%;\" nowrap>$code</td>
<td style=\"font-size:80%;\" nowrap>$text[$code]</td>
<td style=\"font-size:80%;\" nowrap>";
if (eregi("^text/html", $contentType) && ereg("^(2|3)+[0-9]{2}", $code)) {
print "<a href=\"./".basename($PHP_SELF)."?url=".rawurlencode($liste[$i])."\">".rawurldecode($liste[$i])."</a>";
}
else print rawurldecode($liste[$i]);
print "</td nowrap>
</tr>\n";
}
print "</table>\n";
}
else print "<p><strong>I didn't find any links.</strong></p>";
print '</td></tr><tr><td valign="top">';
if (count($steps) >= 1) {
print "<p><strong>Passerede</strong></p>";
print "<ol style=\"font-size:80%;\">";
for($i=0; $i<count($steps); $i++) print "<li><p>$steps[$i]</li>\n";
print "</ol>";
}
//输出各个状态所占的比例
if (count($statCode) >= 1) {
while(list($key, $value) = each($statCode)) {
$procent = ereg_replace('(\.)?0+$', '', number_format(($value*100/count($liste)),2,".",""));
$space = "";
for($i=0; $i<$procent/3; $i++) $space .= " ";
$print_statsCode .= "<tr>
<td>$text[$key]</td>
<td style=\"text-align:right;\">$value</td>
<td><span style=\"background-color:navy;\">$space</span> $procent%</td>
</tr>\n";
}
print "<p><strong>Responce codes</strong></p>";
print "<table xborder=\"1\" bordercolor=\"black\" cellspacing=\"0\">";
print "<tr><th nowrap>Status </th>
<th nowrap>Number </th>
<th nowrap>Percent</th></tr>";
print $print_statsCode;
print "</table>";
}
//输出各个内容类型所占的比例
if (count($statContentType) >= 1) {
while(list($key, $value) = each($statContentType)) {
//去除小数部分
$procent = ereg_replace('(\.)?0+$', '', number_format(($value*100/count($liste)),2,".",""));
$space = "";
for($i=0; $i<$procent/3; $i++) $space .= " ";
$print_statsContent .= "<tr>
<td>$key</td>
<td style=\"text-align:right;\">$value</td>
<td><span style=\"background-color:navy;\">$space</span> $procent%</td>
</tr>\n";
}
print "<p><strong>Content-Type</strong></p>";
print "<table xborder=\"1\" bordercolor=\"black\" cellspacing=\"0\">";
print "<tr><th nowrap>Content-Type </th>
<th nowrap>Number </th>
<th nowrap>Percent</th></tr>";
print $print_statsContent;
print "</table>";
}
print '</td></tr>';
}
if ($url && !$uri) print "<div align=\"center\"><p><strong>Invalid adresse.</strong></p></div>";
?>
<? if ($url) { ?>
</td></tr>
<? } ?>
</table>
</body>
</html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -