📄 parseurl.java
字号:
package cn.yicha.subject.spider.store;
import java.net.URL;
public class ParseUrl
{
/**
* 从内容中读取HTML标题
*/
public static String getTitle(String content)
{
final String prefix = "<title>";
final String appendix = "</title>";
String transContent = content.toLowerCase();
int pos1 = transContent.indexOf(prefix);
if (pos1 < 0)
return "";
pos1 += prefix.length();
int pos2 = transContent.indexOf(appendix);
if (pos2 < 0)
return "";
String title = transContent.substring(pos1, pos2);
return title;
}
/**
* 去掉HTML网页的标记,导出原始内容
*/
public static String exportContext(String content)
{
final String beginTag = "<";
final String endTag = ">";
int loc = 0;
StringBuffer result = new StringBuffer();
int pos1 = content.indexOf(endTag, loc);
while (pos1 >= 0)
{
int pos2 = content.indexOf(beginTag, pos1);
if (pos2 < 0)
break;
if (pos2 > pos1 + 1)
{
result.append(content.substring(pos1+1, pos2));
result.append(" ");
}
pos1 = content.indexOf(endTag, pos2);
}
return result.toString();
}
/**
* 取得URL地址的域名
*/
public static String getDomainName(URL url)
{
String host = url.getHost();
int pos = host.indexOf(".");
if (pos > 0) {
return host.substring(pos);
}
else {
return host;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -