📄 wm_spy.js
字号:
MessageBox.write("正在返回状态报告……");
break;
case 4:
var ret = http.responseText;
if (http.status == 200)
{
MessageBox.close();
if (ret.indexOf("Msgbox.show") != -1)
{
eval(ret);
}
else
{
jspp.Spy.responseContent = ret;
frm.btnChkContentRule.disabled = false;
frm.btnChkPageRule.disabled = false;
frm.btnContent.disabled = false;
frm.btnContent.value = "页面预览";
document.getElementById("_CONTENT_BODY").style.display = "";
document.getElementById("_CONTENT_BODY").innerText = ret;
}
}
else
{
MessageBox.write("<font color=\"#FF0000\">获取连接失败:" + ret + "</font>");
MessageBox.setType(MSG_WARNING);
}
rpc.close();
rpc = null;
break;
}
};
rpc.doGet("admin_spy.asp", true);
},
showContent : function(obj)
{
if (obj.value == "页面预览")
{
document.getElementById("_CONTENT_HTML").style.display = "";
document.getElementById("_CONTENT_HTML").src = obj.form.ListPage.value;
document.getElementById("_CONTENT_BODY").style.display = "none";
obj.value = "查看源码";
}
else
{
document.getElementById("_CONTENT_HTML").style.display = "none";
document.getElementById("_CONTENT_BODY").style.display = "";
document.getElementById("_CONTENT_BODY").innerText = jspp.Spy.responseContent;
obj.value = "页面预览";
}
},
chkContentRule : function(frm)
{
var rle = frm.ContentRule.value;
if (rle == "")
{
MessageBox.show(MSG_WARNING, "请先输入内容规则");
return;
}
document.getElementById("_CONTENT").style.display = "none";
var reg = new RegExp(rle.myreg(), "g");
var arr = reg.exec(jspp.Spy.responseContent);
if (arr == null)
{
MessageBox.show(MSG_WARNING, "检测内容规则失败");
}
else
{
var tmp = "";
while (arr)
{
tmp += arr[1];
arr = reg.exec(jspp.Spy.responseContent);
}
tmp = tmp.replace(/\t/g, " ").replace(/ /g, " ").replace(/ /g, " ");
var mak = Form.check("frmMain.Mark");
if (mak != "")
{
mak = mak.replace(/,/g, "|") + "|script|select";
}
else
{
mak = "script|select";
}
var del = new RegExp("<(" + mak + ")[^>]*>[\\s\\S]+?</\\1>", "gi");
tmp = tmp.replace(del, "");
if (mak.indexOf("img") != -1) tmp = tmp.replace(/<img[^>]+>/gi, "");
tmp = FormatMark(frm.ListPage.value, FilterMark(tmp));
document.getElementById("_CONTENT").style.display = "";
document.getElementById("_CONTENT").innerText = tmp;
frm.Check.value = "1";
}
},
chkPageRule : function(frm)
{
var rle = frm.PageRule.value;
if (rle == "")
{
MessageBox.show(MSG_WARNING, "请先输入分页规则");
return;
}
var reg = new RegExp(rle.myreg(), "g");
var arr = reg.exec(jspp.Spy.responseContent);
if (arr == null)
{
MessageBox.show(MSG_WARNING, "找不到分页点,或者分页规则错误");
}
else
{
var tmp = "";
while (arr)
{
var u = getFullURL(frm.ListPage.value, arr[1])
tmp += arr[2] + " - <a href=\"" + u + "\" target=\"_blank\">" + u + "</a>";
arr = reg.exec(jspp.Spy.responseContent);
}
MessageBox.show(MSG_HINT, "找到下列分页" + tmp);
}
},
run : function(frm)
{
var seq = Form.check("frmMain.SeqId");
if (seq == "")
{
MessageBox.show(MSG_WARNING, "请选择您要运行的采集");
}
else
{
window.open("admin_spy.asp?Handle=Run&SeqId=" + escape(seq), "spy", "width=700px,height=500px,scrollbars=yes");
}
}
};
String.prototype.reg = function()
{
var ret = this;
ret = ret.replace(/\\/g, "\\\\");
ret = ret.replace(/\./g, "\\.");
ret = ret.replace(/\^/g, "\\^");
ret = ret.replace(/\$/g, "\\$");
ret = ret.replace(/\(/g, "\\(");
ret = ret.replace(/\)/g, "\\)");
ret = ret.replace(/\[/g, "\\[");
ret = ret.replace(/\]/g, "\\]");
ret = ret.replace(/\*/g, "\\*");
ret = ret.replace(/\?/g, "\\?");
ret = ret.replace(/\!/g, "\\!");
ret = ret.replace(/\r/g, "\\r");
ret = ret.replace(/\n/g, "\\n");
return ret;
};
String.prototype.myreg = function()
{
var ret = this.reg();
ret = ret.replace("{Href}", "([^\"\\s>]*?)");
ret = ret.replace(/\{Href\}/gi, "\\1");
ret = ret.replace("{Title}", "(.+?)");
ret = ret.replace(/\{Title\}/gi, "\\2");
ret = ret.replace("{Time}", "(.*?)");
ret = ret.replace("{Page}", "(.*?)");
ret = ret.replace("{Content}", "([\\s\\S]*?)");
ret = ret.replace(/\{\}/g, ".*?");
ret = ret.replace(/{/g, "(");
ret = ret.replace(/@@@/g, "|");
ret = ret.replace(/}/g, ")");
return ret;
};
function getFullURL(strURL1, strURL2)
{
if (strURL2.substr(0, 7) == "http://") return strURL2;
if (strURL2.substr(0, 1) == "/") return getHost(strURL1) + strURL2;
return getPath(strURL1) + strURL2;
}
function getHost(strURL)
{
var ret = strURL.match(/^http:\/\/([^\/]+)/);
return ret[0];
}
function getPath(strURL)
{
var ret = strURL;
var pos = ret.indexOf("?");
if (pos != -1) ret = ret.substr(0, pos);
pos = ret.lastIndexOf("/");
if (pos > 7) return ret.substr(0, pos + 1);
return ret;
}
function FilterMark(strData)
{
var reg = /<(b|u|i|strong|p|h\d)[^>]*>([\s\S]+?)<\/\1>|<img[^>]+?>|<br[^>]+>/gi;
var arr = reg.exec(strData);
var pos = 0;
var ret = "";
var mak = null;
if (arr != null)
{
while (arr != null)
{
ret += ClearMark(strData.substring(pos, arr.index));
pos = arr.lastIndex;
if (arr[1] != "")
{
mak = arr[1].toLowerCase();
ret += "<" + mak + ">" + FilterMark(String(arr[2]).trim()) + "</" + mak + ">";
}
else
{
ret += arr[0];
}
arr = reg.exec(strData);
}
ret += ClearMark(strData.substring(pos));
return ret;
}
else
{
return ClearMark(strData);
}
}
function ClearMark(strData)
{
var reg = /<([^\s>]+)[^>]*>([\s\S]+?)<\/\1>|<([^\s>]+)[^>]*>/g;
var arr = reg.exec(strData);
var pos = 0;
var ret = "";
if (arr)
{
while (arr)
{
ret += strData.substring(pos, arr.index);
pos = arr.lastIndex;
ret += arr[2];
if (/^(div|p|table)$/i.test(arr[1]) || /^(\/div|\/p|\/table)$/i.test(arr[3]))
{
ret += "\r\n";
}
var arr = reg.exec(strData);
}
ret += strData.substr(pos);
return ClearMark(ret);
}
return strData;
}
function FormatMark(strURL, strData)
{
var reg = /<img.+?src=[\"\']*([^\"\'\s]+)[^>]*>/gi;
var arr = reg.exec(strData);
var pos = 0;
var ret = "";
if (arr)
{
while (arr)
{
ret += strData.substring(pos, arr.index);
pos = arr.lastIndex;
ret += "<img src=\"" + getFullURL(strURL, arr[1]) + "\" alt=\"装载中……\" />";
var arr = reg.exec(strData);
}
ret += strData.substr(pos);
return FormatLine(ret);
}
return FormatLine(strData);
}
function FormatLine(strData)
{
var arr = strData.replace(/<p>([\s\S]*?)<\/p>/gi, "$1\r\n").split(/\r\n/g);
var ret = "";
var tmp;
for (var i = 0; i < arr.length; i++)
{
tmp = arr[i].trim();
if (tmp != "")
{
ret += " " + tmp + "\r\n";
}
}
return ret;
}
var newSpy = function()
{
Kernel.setFollow(Form.child("frmMain"), "Staple", 0, 0, 0);
// Form.setValue("frmMain.List", "http://news.sina.com.cn/china/pl/index.html");
// Form.setValue("frmMain.ListRule", "<a href={Href} TARGET=_blank>{Title}</a><FONT style=\"FONT-SIZE:12px\"> ({Time})</FONT>");
// Form.setValue("frmMain.ContentRule", "<!--正文内容开始-->{Content}{<!--正文内容结束-->@@@<span id=\"_function_code_page\">@@@<span id=_function_code_page>}");
// Form.setValue("frmMain.PageRule", "<a href=\"{Href}\">\[{Page}]</a>");
};
var opt;
window.onload = function()
{
Kernel.initStaple();
opt = new OptionCard();
opt.add("内容采集");
opt.add("新建采集");
opt.make(document.body, 5, 5);
opt.callBack[0] = function()
{
jspp.Spy.doPage(jspp.Spy.page);
};
opt.callBack[1] = newSpy;
opt.lock(0);
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -