📄 extract163mp3.java
字号:
package com.luceneheritrixbook.extractor.com163;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.Date;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.util.NodeList;
import com.luceneheritrixbook.extractor.Extractor;
import com.luceneheritrixbook.util.StringUtils;
public class Extract163Mp3 extends Extractor {
public void extract() {
// TODO Auto-generated method stub
String filterStr = "td";
BufferedWriter bw = null;
NodeFilter filter = new TagNameFilter(filterStr);
try {
NodeList nodes = this.getParser().extractAllNodesThatMatch(filter);
for (int i = 0; i < nodes.size(); i++) {
TableColumn node = (TableColumn) nodes.elementAt(i);
String text = node.getChildrenHTML();
if (node.getAttribute("width") != null
&& node.getAttribute("width").equals("31%")) {
String result = getProp(
"<td width=\"31%\" align=\"left\" bgcolor=\"#F1F1F1\" class=\"fB\">(.*)</td>",
node.toHtml(), 1);
TableColumn nodeExt = (TableColumn) node.getNextSibling()
.getNextSibling();
bw.write(StringUtils.trim(result) + ":"
+ StringUtils.trim(nodeExt.getChildrenHTML())
+ "\n");
continue;
} else if (node.getAttribute("class") != null
&& node.getAttribute("class").equals("f14px fB cWhite")
&& node.getAttribute("width") != null
&& node.getAttribute("width").equals("141")) {
String[] names = node.getChildrenHTML().split(" ");
StringBuffer title = new StringBuffer();
for (int k = 0; k < names.length; k++) {
title.append(names[k]).append("-");
}
title.append((new Date()).getTime());
String title_str = title.toString().replaceAll("/", "_");
bw = new BufferedWriter(new FileWriter(new File(this
.getOutputPath()
+ title_str)));
int startPos = getInuputFilePath().indexOf("mirror") + 6;
String url_seg = getInuputFilePath().substring(startPos);
url_seg = url_seg.replaceAll("\\\\", "/");
String url = "http:/" + url_seg;
bw.write(url + "\n");
bw.write(names[0] + "\n");
}
}
if (bw != null)
bw.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -