📄 htmlanalysis.cs
字号:
using System;
using System.Collections;
namespace spider
{
/// <summary>
/// Html解析器
/// </summary>
public class Analysis:AttributeList
{
private string HtmlSourse;//html网页
private int HtmlId;
public char delim;
//<a href="http://www.qq.com"></a>
public string AnaName;//属性名 href
public string AnaValue;//属性值 http://www.qq.com
public string HtmlTag;//html 标记 <a></a>
#region 方法
public bool IsEnd()
{return (HtmlId>=HtmlSourse.Length);}
public bool IsBlank(char ch)
{return ("\t\n\r ".IndexOf(ch)!=-1);}
public void EatBlank() {while(!IsEnd())
{ if(!IsBlank(GetCurrentChar())) return; HtmlId++; } }
//获取当前字符
public char GetCurrentChar()
{return GetCurrentChar(0);
}
//获取下一段字符
public char GetCurrentChar(int peek)
{ if((HtmlId+peek)<HtmlSourse.Length)
return HtmlSourse[HtmlId+peek];
else
return (char)0;
}
//获取下一字符
public char AdvanceCurrentChar()
{ return HtmlSourse[HtmlId++];
} public void Advance() { HtmlId++; }
public void GetAnaName() { EatBlank(); while (!IsEnd())
{ if (IsBlank(GetCurrentChar())||(GetCurrentChar()=='=')||(GetCurrentChar()=='>') ) break; AnaName+=GetCurrentChar(); HtmlId++; } EatBlank(); }
public void GetAnaValue() { if ( delim!=0 ) return; if (GetCurrentChar()=='=')
{ HtmlId++; EatBlank(); if ((GetCurrentChar()=='\'')||(GetCurrentChar()=='\"'))
{ delim=GetCurrentChar(); HtmlId++; while(GetCurrentChar()!=delim)
{ AnaValue+=GetCurrentChar(); HtmlId++; } HtmlId++; }
else
{ while ( !IsEnd()&&!IsBlank(GetCurrentChar())&&(GetCurrentChar()!='>'))
{ AnaValue+=GetCurrentChar(); HtmlId++; } } EatBlank(); } }
public void AddAttribute() { Attribute a = new Attribute(AnaName,AnaValue,delim); Add(a); }
#endregion
#region 构造函数
public Analysis()
{
}
#endregion
#region 属性
public string ParseName { get { return AnaName; } set { AnaName = value; } } public string ParseValue { get { return AnaValue; } set { AnaValue = value; } } public char ParseDelim { get { return delim; } set { delim = value; } } public string Source { get { return HtmlSourse; } set { HtmlSourse = value; } }
#endregion
}
public class HtmlAnalysis:Analysis
{
#region 方法
public AttributeList GetTag() { AttributeList tag = new AttributeList(); tag.Name = HtmlTag; foreach(Attribute x in List) { tag.Add((Attribute)x.Clone()); } return tag; }
protected void ParseTag() { HtmlTag=""; Clear(); if ((GetCurrentChar()=='!')&&(GetCurrentChar(1)=='-')&&(GetCurrentChar(2)=='-'))
{ while ( !IsEnd() )
{ if ((GetCurrentChar()=='-')&&(GetCurrentChar(1)=='-')&&(GetCurrentChar(2)=='>')) break; if (GetCurrentChar()!='\r') HtmlTag+=GetCurrentChar(); Advance(); } HtmlTag+="--"; Advance(); Advance(); Advance(); delim=(char)0; return; } while ( !IsEnd() )
{ if ( IsBlank(GetCurrentChar())||(GetCurrentChar()=='>')) break; HtmlTag+=GetCurrentChar(); Advance(); } EatBlank(); while (GetCurrentChar()!='>')
{ ParseName = ""; ParseValue = ""; ParseDelim = (char)0; GetAnaName(); if ( GetCurrentChar()=='>')
{ AddAttribute(); break; } GetAnaValue(); AddAttribute(); } Advance(); } public char Parse() { if( GetCurrentChar()=='<' )
{ Advance(); char ch=char.ToUpper(GetCurrentChar()); if ((ch>='A')&&(ch<='Z')||(ch=='!')||(ch=='/'))
{ ParseTag(); return (char)0; }
else return(AdvanceCurrentChar()); }
else return(AdvanceCurrentChar()); }
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -