📄 htmlutils.java
字号:
// Decompiled by Jad v1.5.8e2. Copyright 2001 Pavel Kouznetsov.
// Jad home page: http://kpdus.tripod.com/jad.html
// Decompiler options: packimports(3) fieldsfirst ansi space
// Source File Name: HTMLUtils.java
package org.gudy.azureus2.core3.html;
import java.io.PrintStream;
import java.util.*;
import org.gudy.azureus2.core3.xml.util.XUXmlWriter;
public class HTMLUtils
{
public HTMLUtils()
{
}
public static List convertHTMLToText(String indent, String text)
{
int pos = 0;
text = text.replaceAll("<ol>", "");
text = text.replaceAll("</ol>", "");
text = text.replaceAll("<ul>", "");
text = text.replaceAll("</ul>", "");
text = text.replaceAll("</li>", "");
text = text.replaceAll("<li>", "\n\t*");
String lc_text = text.toLowerCase();
List lines = new ArrayList();
int p1;
do
{
String tokens[] = {
"<br>", "<p>"
};
String token = null;
p1 = -1;
for (int i = 0; i < tokens.length; i++)
{
int x = lc_text.indexOf(tokens[i], pos);
if (x != -1 && (p1 == -1 || x < p1))
{
token = tokens[i];
p1 = x;
}
}
String line;
if (p1 == -1)
{
line = text.substring(pos);
} else
{
line = text.substring(pos, p1);
pos = p1 + token.length();
}
lines.add((new StringBuilder()).append(indent).append(line).toString());
} while (p1 != -1);
return lines;
}
public static String convertListToString(List list)
{
StringBuffer result = new StringBuffer();
String separator = "";
for (Iterator iter = list.iterator(); iter.hasNext();)
{
String line = iter.next().toString();
result.append(separator);
result.append(line);
separator = "\n";
}
return result.toString();
}
public static String convertHTMLToText2(String content)
{
int pos = 0;
String res = "";
content = removeTagPairs(content, "script");
content = content.replaceAll(" ", " ");
content = content.replaceAll("[\\s]+", " ");
do
{
int p1 = content.indexOf("<", pos);
if (p1 == -1)
{
res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
break;
}
int p2 = content.indexOf(">", p1);
if (p2 == -1)
{
res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
break;
}
String tag = content.substring(p1 + 1, p2).toLowerCase();
res = (new StringBuilder()).append(res).append(content.substring(pos, p1)).toString();
if ((tag.equals("p") || tag.equals("br")) && res.length() > 0 && res.charAt(res.length() - 1) != '\n')
res = (new StringBuilder()).append(res).append("\n").toString();
pos = p2 + 1;
} while (true);
res = res.replaceAll("[ \\t\\x0B\\f\\r]+", " ");
res = res.replaceAll("[ \\t\\x0B\\f\\r]+\\n", "\n");
res = res.replaceAll("\\n[ \\t\\x0B\\f\\r]+", "\n");
if (res.length() > 0 && Character.isWhitespace(res.charAt(0)))
res = res.substring(1);
return res;
}
public static String splitWithLineLength(String str, int length)
{
String res = "";
StringTokenizer tok = new StringTokenizer(str, "\n");
do
{
if (!tok.hasMoreTokens())
break;
String line = tok.nextToken();
do
{
if (line.length() <= length)
break;
if (res.length() > 0)
res = (new StringBuilder()).append(res).append("\n").toString();
boolean done = false;
int i = length - 1;
do
{
if (i < 0)
break;
if (Character.isWhitespace(line.charAt(i)))
{
done = true;
res = (new StringBuilder()).append(res).append(line.substring(0, i)).toString();
line = line.substring(i + 1);
break;
}
i--;
} while (true);
if (!done)
{
res = (new StringBuilder()).append(res).append(line.substring(0, length)).toString();
line = line.substring(length);
}
} while (true);
if (res.length() > 0 && line.length() > 0)
{
res = (new StringBuilder()).append(res).append("\n").toString();
res = (new StringBuilder()).append(res).append(line).toString();
}
} while (true);
return res;
}
public static String removeTagPairs(String content, String tag_name)
{
tag_name = tag_name.toLowerCase();
String lc_content = content.toLowerCase();
int pos = 0;
String res = "";
int level = 0;
int start_pos = -1;
do
{
int start_tag_start = lc_content.indexOf((new StringBuilder()).append("<").append(tag_name).toString(), pos);
int end_tag_start = lc_content.indexOf((new StringBuilder()).append("</").append(tag_name).toString(), pos);
if (level == 0)
{
if (start_tag_start == -1)
{
res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
break;
}
res = (new StringBuilder()).append(res).append(content.substring(pos, start_tag_start)).toString();
start_pos = start_tag_start;
level = 1;
pos = start_pos + 1;
continue;
}
if (end_tag_start == -1)
{
res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
break;
}
if (start_tag_start == -1 || end_tag_start < start_tag_start)
{
level--;
int end_end = lc_content.indexOf('>', end_tag_start);
if (end_end == -1)
break;
pos = end_end + 1;
continue;
}
if (start_tag_start == -1)
{
res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
break;
}
level++;
pos = start_tag_start + 1;
} while (true);
return res;
}
public static Object[] getLinks(String content_in)
{
int pos = 0;
List urls = new ArrayList();
String content_out = "";
String current_url = null;
int current_url_start = -1;
do
{
int p1 = content_in.indexOf("<", pos);
if (p1 == -1)
break;
p1++;
int p2 = content_in.indexOf(">", p1);
if (p2 == -1)
break;
if (p1 > pos)
content_out = (new StringBuilder()).append(content_out).append(content_in.substring(pos, p1 - 1)).toString();
pos = p2 + 1;
String tag = content_in.substring(p1, p2).trim();
String lc_tag = tag.toLowerCase();
if (lc_tag.startsWith("a "))
{
int hr_start = lc_tag.indexOf("href");
if (hr_start != -1)
{
hr_start = lc_tag.indexOf("=", hr_start);
if (hr_start != -1)
{
for (hr_start++; hr_start < lc_tag.length() && Character.isWhitespace(lc_tag.charAt(hr_start)); hr_start++);
int hr_end;
for (hr_end = lc_tag.length() - 1; hr_end >= lc_tag.length() && Character.isWhitespace(lc_tag.charAt(hr_end)); hr_end--);
String href = tag.substring(hr_start, hr_end + 1).trim();
if (href.startsWith("\""))
href = href.substring(1, href.length() - 1);
current_url = href;
current_url_start = content_out.length();
}
}
} else
if (lc_tag.startsWith("/") && lc_tag.substring(1).trim().equals("a"))
{
if (current_url != null)
{
int len = content_out.length() - current_url_start;
urls.add(((Object) (new Object[] {
current_url, new int[] {
current_url_start, len
}
})));
}
current_url = null;
}
} while (true);
if (pos < content_in.length())
content_out = (new StringBuilder()).append(content_out).append(content_in.substring(pos)).toString();
return (new Object[] {
content_out, urls
});
}
public static String expand(String str)
{
str = XUXmlWriter.unescapeXML(str);
str = str.replaceAll(" ", " ");
return str;
}
public static void main(String args[])
{
Object obj[] = getLinks("aaaaaaa <a href=\"http://here/parp \">link< / a > prute <a href=\"http://here/pa\">klink</a>");
System.out.println(obj[0]);
List urls = (List)obj[1];
for (int i = 0; i < urls.size(); i++)
{
Object entry[] = (Object[])(Object[])urls.get(i);
System.out.println((new StringBuilder()).append(" ").append(entry[0]).append(((int[])(int[])entry[1])[0]).append(",").append(((int[])(int[])entry[1])[1]).toString());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -