📄 htmlutils.java

📁 java 文件下载器。可自定义
💻 JAVA
字号:
// Decompiled by Jad v1.5.8e2. Copyright 2001 Pavel Kouznetsov.
// Jad home page: http://kpdus.tripod.com/jad.html
// Decompiler options: packimports(3) fieldsfirst ansi space 
// Source File Name:   HTMLUtils.java

package org.gudy.azureus2.core3.html;

import java.io.PrintStream;
import java.util.*;
import org.gudy.azureus2.core3.xml.util.XUXmlWriter;

public class HTMLUtils
{

	public HTMLUtils()
	{
	}

	public static List convertHTMLToText(String indent, String text)
	{
		int pos = 0;
		text = text.replaceAll("<ol>", "");
		text = text.replaceAll("</ol>", "");
		text = text.replaceAll("<ul>", "");
		text = text.replaceAll("</ul>", "");
		text = text.replaceAll("</li>", "");
		text = text.replaceAll("<li>", "\n\t*");
		String lc_text = text.toLowerCase();
		List lines = new ArrayList();
		int p1;
		do
		{
			String tokens[] = {
				"<br>", "<p>"
			};
			String token = null;
			p1 = -1;
			for (int i = 0; i < tokens.length; i++)
			{
				int x = lc_text.indexOf(tokens[i], pos);
				if (x != -1 && (p1 == -1 || x < p1))
				{
					token = tokens[i];
					p1 = x;
				}
			}

			String line;
			if (p1 == -1)
			{
				line = text.substring(pos);
			} else
			{
				line = text.substring(pos, p1);
				pos = p1 + token.length();
			}
			lines.add((new StringBuilder()).append(indent).append(line).toString());
		} while (p1 != -1);
		return lines;
	}

	public static String convertListToString(List list)
	{
		StringBuffer result = new StringBuffer();
		String separator = "";
		for (Iterator iter = list.iterator(); iter.hasNext();)
		{
			String line = iter.next().toString();
			result.append(separator);
			result.append(line);
			separator = "\n";
		}

		return result.toString();
	}

	public static String convertHTMLToText2(String content)
	{
		int pos = 0;
		String res = "";
		content = removeTagPairs(content, "script");
		content = content.replaceAll("&nbsp;", " ");
		content = content.replaceAll("[\\s]+", " ");
		do
		{
			int p1 = content.indexOf("<", pos);
			if (p1 == -1)
			{
				res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
				break;
			}
			int p2 = content.indexOf(">", p1);
			if (p2 == -1)
			{
				res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
				break;
			}
			String tag = content.substring(p1 + 1, p2).toLowerCase();
			res = (new StringBuilder()).append(res).append(content.substring(pos, p1)).toString();
			if ((tag.equals("p") || tag.equals("br")) && res.length() > 0 && res.charAt(res.length() - 1) != '\n')
				res = (new StringBuilder()).append(res).append("\n").toString();
			pos = p2 + 1;
		} while (true);
		res = res.replaceAll("[ \\t\\x0B\\f\\r]+", " ");
		res = res.replaceAll("[ \\t\\x0B\\f\\r]+\\n", "\n");
		res = res.replaceAll("\\n[ \\t\\x0B\\f\\r]+", "\n");
		if (res.length() > 0 && Character.isWhitespace(res.charAt(0)))
			res = res.substring(1);
		return res;
	}

	public static String splitWithLineLength(String str, int length)
	{
		String res = "";
		StringTokenizer tok = new StringTokenizer(str, "\n");
		do
		{
			if (!tok.hasMoreTokens())
				break;
			String line = tok.nextToken();
			do
			{
				if (line.length() <= length)
					break;
				if (res.length() > 0)
					res = (new StringBuilder()).append(res).append("\n").toString();
				boolean done = false;
				int i = length - 1;
				do
				{
					if (i < 0)
						break;
					if (Character.isWhitespace(line.charAt(i)))
					{
						done = true;
						res = (new StringBuilder()).append(res).append(line.substring(0, i)).toString();
						line = line.substring(i + 1);
						break;
					}
					i--;
				} while (true);
				if (!done)
				{
					res = (new StringBuilder()).append(res).append(line.substring(0, length)).toString();
					line = line.substring(length);
				}
			} while (true);
			if (res.length() > 0 && line.length() > 0)
			{
				res = (new StringBuilder()).append(res).append("\n").toString();
				res = (new StringBuilder()).append(res).append(line).toString();
			}
		} while (true);
		return res;
	}

	public static String removeTagPairs(String content, String tag_name)
	{
		tag_name = tag_name.toLowerCase();
		String lc_content = content.toLowerCase();
		int pos = 0;
		String res = "";
		int level = 0;
		int start_pos = -1;
		do
		{
			int start_tag_start = lc_content.indexOf((new StringBuilder()).append("<").append(tag_name).toString(), pos);
			int end_tag_start = lc_content.indexOf((new StringBuilder()).append("</").append(tag_name).toString(), pos);
			if (level == 0)
			{
				if (start_tag_start == -1)
				{
					res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
					break;
				}
				res = (new StringBuilder()).append(res).append(content.substring(pos, start_tag_start)).toString();
				start_pos = start_tag_start;
				level = 1;
				pos = start_pos + 1;
				continue;
			}
			if (end_tag_start == -1)
			{
				res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
				break;
			}
			if (start_tag_start == -1 || end_tag_start < start_tag_start)
			{
				level--;
				int end_end = lc_content.indexOf('>', end_tag_start);
				if (end_end == -1)
					break;
				pos = end_end + 1;
				continue;
			}
			if (start_tag_start == -1)
			{
				res = (new StringBuilder()).append(res).append(content.substring(pos)).toString();
				break;
			}
			level++;
			pos = start_tag_start + 1;
		} while (true);
		return res;
	}

	public static Object[] getLinks(String content_in)
	{
		int pos = 0;
		List urls = new ArrayList();
		String content_out = "";
		String current_url = null;
		int current_url_start = -1;
		do
		{
			int p1 = content_in.indexOf("<", pos);
			if (p1 == -1)
				break;
			p1++;
			int p2 = content_in.indexOf(">", p1);
			if (p2 == -1)
				break;
			if (p1 > pos)
				content_out = (new StringBuilder()).append(content_out).append(content_in.substring(pos, p1 - 1)).toString();
			pos = p2 + 1;
			String tag = content_in.substring(p1, p2).trim();
			String lc_tag = tag.toLowerCase();
			if (lc_tag.startsWith("a "))
			{
				int hr_start = lc_tag.indexOf("href");
				if (hr_start != -1)
				{
					hr_start = lc_tag.indexOf("=", hr_start);
					if (hr_start != -1)
					{
						for (hr_start++; hr_start < lc_tag.length() && Character.isWhitespace(lc_tag.charAt(hr_start)); hr_start++);
						int hr_end;
						for (hr_end = lc_tag.length() - 1; hr_end >= lc_tag.length() && Character.isWhitespace(lc_tag.charAt(hr_end)); hr_end--);
						String href = tag.substring(hr_start, hr_end + 1).trim();
						if (href.startsWith("\""))
							href = href.substring(1, href.length() - 1);
						current_url = href;
						current_url_start = content_out.length();
					}
				}
			} else
			if (lc_tag.startsWith("/") && lc_tag.substring(1).trim().equals("a"))
			{
				if (current_url != null)
				{
					int len = content_out.length() - current_url_start;
					urls.add(((Object) (new Object[] {
						current_url, new int[] {
							current_url_start, len
						}
					})));
				}
				current_url = null;
			}
		} while (true);
		if (pos < content_in.length())
			content_out = (new StringBuilder()).append(content_out).append(content_in.substring(pos)).toString();
		return (new Object[] {
			content_out, urls
		});
	}

	public static String expand(String str)
	{
		str = XUXmlWriter.unescapeXML(str);
		str = str.replaceAll("&nbsp;", " ");
		return str;
	}

	public static void main(String args[])
	{
		Object obj[] = getLinks("aaaaaaa <a href=\"http://here/parp  \">link< / a > prute <a href=\"http://here/pa\">klink</a>");
		System.out.println(obj[0]);
		List urls = (List)obj[1];
		for (int i = 0; i < urls.size(); i++)
		{
			Object entry[] = (Object[])(Object[])urls.get(i);
			System.out.println((new StringBuilder()).append("    ").append(entry[0]).append(((int[])(int[])entry[1])[0]).append(",").append(((int[])(int[])entry[1])[1]).toString());
		}

	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -