📄 getweblink.aspx.cs
字号:
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;
namespace Example_12_25
{
/// <summary>
/// GetWebLink 的摘要说明。
/// </summary>
public class GetWebLink : System.Web.UI.Page
{
protected System.Web.UI.WebControls.Label Label1;
protected System.Web.UI.WebControls.TextBox urlTextBox;
protected System.Web.UI.WebControls.Button scrapeButton;
protected System.Web.UI.WebControls.Label TipResult;
protected System.Web.UI.WebControls.TextBox resultLabel;
private StringBuilder report = new StringBuilder();
private String webPage;
private int countOfMatches;
private void Page_Load(object sender, System.EventArgs e)
{
Label1.Text = "请输入一个URL地址:";
scrapeButton.Text = "分离Href链接";
}
#region Web 窗体设计器生成的代码
override protected void OnInit(EventArgs e)
{
//
// CODEGEN: 该调用是 ASP.NET Web 窗体设计器所必需的。
//
InitializeComponent();
base.OnInit(e);
}
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.scrapeButton.Click += new System.EventHandler(this.scrapeButton_Click);
this.Load += new System.EventHandler(this.Page_Load);
}
#endregion
private void scrapeButton_Click(object sender, System.EventArgs e)
{
webPage = GrabUrl();
MatchEvaluator myDelegate = new MatchEvaluator(MatchHandler);
Regex linksExpression = new Regex(@"\<a.+?href=['""](?!http\:\/\/)(?!mailto\:)(?>foundAnchor>[^'"">]+?)[^>]*?\>",
RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
String newWebPage = linksExpression.Replace(webPage, myDelegate);
TipResult.Text = "<h2>从 " + urlTextBox.Text + "分离出的Href链接</h2>" + "<b>找到并整理"
+ countOfMatches.ToString() + " 个链接</b><br><br>" + report.ToString().Replace(Environment.NewLine, "<br>");
TipResult.Text += "<h2>整理过的页面</h2><script>window.document.title='抓取网页中的链接'</script>";
resultLabel.Text = newWebPage;
}
private String MatchHandler(Match m)
{
String link = m.Groups["foundAnchor"].Value;
Regex rTol = new Regex("^",RegexOptions.Multiline | RegexOptions.RightToLeft);
int col,row;
int lineBegin = rTol.Match(webPage,m.Index).Index;
row = rTol.Matches(webPage,m.Index).Count;
col = m.Index - lineBegin;
report.AppendFormat("Link <b>{0}</b>, fixed at row:{1},col:{2}{3}",
Server.HtmlEncode(m.Groups[0].Value),row,col,Environment.NewLine);
String newLink;
if(link.StartsWith("/") == true)
{
newLink = link.Substring(1);
}
else
{
newLink = link;
}
countOfMatches++;
return(m.Groups[0].Value.Replace(link,newLink));
}
private String GrabUrl()
{
WebClient wclient = new WebClient();
Stream stream = wclient.OpenRead(urlTextBox.Text);
StreamReader readStream = new StreamReader(stream,Encoding.Default);
String url = readStream.ReadToEnd();
stream.Close();
return(url);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -