📄 parser.cs
字号:
namespace Imps.Utils.TagParser
{
using System;
using System.Collections.Specialized;
using System.Text;
internal class Parser
{
private static TagParserConfiguration config = TagParserConfiguration.LoadConfiguration();
private bool mRemoveEmptyElementText;
private static char[] WHITESPACE_CHARS = " \t\r\n".ToCharArray();
public TagElement CreateElementByTagName(string tag_name)
{
TagElement element = null;
if (config.Configurations.ContainsKey(tag_name.ToLower()))
{
element = (TagElement) config.Configurations.get_Item(tag_name.ToLower()).CreateInstance();
}
if (element == null)
{
element = new TagElement(tag_name);
}
return element;
}
private static string DecodeScript(string script)
{
return script.Replace("[AMIGO-SCRIPT-LT]", "<").Replace("[AMIGO-SCRIPT-GT]", ">").Replace("[AMIGO-SCRIPT-CR]", "\r").Replace("[AMIGO-SCRIPT-LF]", "\n");
}
private static string EncodeScript(string script)
{
return script.Replace("<", "[AMIGO-SCRIPT-LT]").Replace(">", "[AMIGO-SCRIPT-GT]").Replace("\r", "[AMIGO-SCRIPT-CR]").Replace("\n", "[AMIGO-SCRIPT-LF]");
}
private int FindTagOpenNodeIndex(TagNodeCollection nodes, string name)
{
for (int i = nodes.Count - 1; i >= 0; i--)
{
if (((nodes[i] is TagElement) && ((TagElement) nodes[i]).Name.ToLower().Equals(name.ToLower())) && ((((TagElement) nodes[i]).Nodes.Count == 0) && !((TagElement) nodes[i]).IsTerminated))
{
return i;
}
}
return -1;
}
private StringCollection GetTokens(string input)
{
StringCollection strings = new StringCollection();
int startIndex = 0;
ParseStatus readText = ParseStatus.ReadText;
while (startIndex < input.Length)
{
switch (readText)
{
case ParseStatus.ReadText:
{
if (((startIndex + 2) < input.Length) && input.Substring(startIndex, 2).Equals("</"))
{
startIndex += 2;
strings.Add("</");
readText = ParseStatus.ReadEndTag;
}
else if (input.Substring(startIndex, 1).Equals("<"))
{
startIndex++;
strings.Add("<");
readText = ParseStatus.ReadStartTag;
}
else
{
int index = input.IndexOf("<", startIndex);
if (index == -1)
{
strings.Add(input.Substring(startIndex));
return strings;
}
strings.Add(input.Substring(startIndex, index - startIndex));
startIndex = index;
}
continue;
}
case ParseStatus.ReadStartTag:
{
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
int num3 = startIndex;
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(" \r\n\t/>".ToCharArray()) == -1))
{
startIndex++;
}
strings.Add(input.Substring(num3, startIndex - num3));
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
if (((startIndex + 1) < input.Length) && input.Substring(startIndex, 1).Equals("/>"))
{
strings.Add("/>");
readText = ParseStatus.ReadText;
startIndex += 2;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals(">"))
{
strings.Add(">");
readText = ParseStatus.ReadText;
startIndex++;
}
else
{
readText = ParseStatus.ReadAttributeName;
}
continue;
}
case ParseStatus.ReadEndTag:
{
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
int num4 = startIndex;
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(" \r\n\t>".ToCharArray()) == -1))
{
startIndex++;
}
strings.Add(input.Substring(num4, startIndex - num4));
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals(">"))
{
strings.Add(">");
readText = ParseStatus.ReadText;
startIndex++;
}
continue;
}
case ParseStatus.ReadAttributeName:
{
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
int num5 = startIndex;
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(" \r\n\t/>=".ToCharArray()) == -1))
{
startIndex++;
}
strings.Add(input.Substring(num5, startIndex - num5));
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
if (((startIndex + 1) < input.Length) && input.Substring(startIndex, 2).Equals("/>"))
{
strings.Add("/>");
readText = ParseStatus.ReadText;
startIndex += 2;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals(">"))
{
strings.Add(">");
readText = ParseStatus.ReadText;
startIndex++;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("="))
{
strings.Add("=");
startIndex++;
readText = ParseStatus.ReadAttributeValue;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("/"))
{
startIndex++;
}
continue;
}
}
if (readText == ParseStatus.ReadAttributeValue)
{
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("\""))
{
int num6 = startIndex;
startIndex++;
while ((startIndex < input.Length) && !input.Substring(startIndex, 1).Equals("\""))
{
startIndex++;
}
if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("\""))
{
startIndex++;
}
strings.Add(input.Substring(num6 + 1, (startIndex - num6) - 2));
readText = ParseStatus.ReadAttributeName;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("'"))
{
int num7 = startIndex;
startIndex++;
while ((startIndex < input.Length) && !input.Substring(startIndex, 1).Equals("'"))
{
startIndex++;
}
if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals("'"))
{
startIndex++;
}
strings.Add(input.Substring(num7 + 1, (startIndex - num7) - 2));
readText = ParseStatus.ReadAttributeName;
}
else
{
int num8 = startIndex;
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(" \r\n\t/>".ToCharArray()) == -1))
{
startIndex++;
}
strings.Add(input.Substring(num8, startIndex - num8));
while ((startIndex < input.Length) && (input.Substring(startIndex, 1).IndexOfAny(WHITESPACE_CHARS) != -1))
{
startIndex++;
}
readText = ParseStatus.ReadAttributeName;
}
if (((startIndex + 1) < input.Length) && input.Substring(startIndex, 2).Equals("/>"))
{
strings.Add("/>");
readText = ParseStatus.ReadText;
startIndex += 2;
}
else if ((startIndex < input.Length) && input.Substring(startIndex, 1).Equals(">"))
{
strings.Add(">");
startIndex++;
readText = ParseStatus.ReadText;
}
}
}
return strings;
}
private void MoveNodesDown(ref TagNodeCollection nodes, int node_index, TagElement new_parent)
{
for (int i = node_index; i < nodes.Count; i++)
{
new_parent.Nodes.Add(nodes[i]);
nodes[i].SetParent(new_parent);
}
int count = nodes.Count;
for (int j = node_index; j < count; j++)
{
nodes.RemoveAt(node_index);
}
new_parent.IsExplicitlyTerminated = true;
}
public TagNodeCollection Parse(string html)
{
TagNodeCollection nodes = new TagNodeCollection(null);
html = this.PreprocessScript(html, "script");
html = this.PreprocessScript(html, "style");
html = this.RemoveComments(html);
html = this.RemoveSGMLComments(html);
StringCollection tokens = this.GetTokens(html);
int num = 0;
TagElement node = null;
while (num < tokens.Count)
{
if ("<".Equals(tokens[num]))
{
num++;
if (num >= tokens.Count)
{
return nodes;
}
string text = tokens[num];
num++;
node = this.CreateElementByTagName(text);
while (((num < tokens.Count) && !">".Equals(tokens[num])) && !"/>".Equals(tokens[num]))
{
string name = tokens[num];
num++;
if ((num < tokens.Count) && "=".Equals(tokens[num]))
{
string text3;
num++;
if (num < tokens.Count)
{
text3 = tokens[num];
}
else
{
text3 = null;
}
num++;
TagAttribute attribute = new TagAttribute(name, TagEncoder.DecodeValue(text3));
node.Attributes.Add(attribute);
}
else if (num < tokens.Count)
{
TagAttribute attribute2 = new TagAttribute(name, null);
node.Attributes.Add(attribute2);
}
}
nodes.Add(node);
if ((num < tokens.Count) && "/>".Equals(tokens[num]))
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -