📄 recordtransformer.java
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University * * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;import java.io.*;import java.net.URL;import websphinx.util.Str;public class RecordTransformer extends RewritableLinkTransformer { String prolog = "<HTML><HEAD><TITLE>Extracted Records</TITLE></HEAD><BODY><TABLE>\n"; String epilog = "</TABLE></BODY></HTML>\n"; String recordStart = "<TR>\n<TD><A HREF=\"%u\">%n.</A>\n"; String recordEnd = "\n"; String recordDivider = ""; String fieldStart = " <TD>"; String fieldEnd = "\n"; String fieldDivider = ""; int nRecords = 0; public RecordTransformer (String filename) throws IOException { super (filename); } public synchronized void setProlog (String prolog) { this.prolog = prolog; } public synchronized String getProlog () { return prolog; } public synchronized void setEpilog (String epilog) { this.epilog = epilog; } public synchronized String getEpilog () { return epilog; } public synchronized void setRecordStart (String recordStart) { this.recordStart = recordStart; } public synchronized String getRecordStart () { return recordStart; } public synchronized void setRecordEnd (String recordEnd) { this.recordEnd = recordEnd; } public synchronized String getRecordEnd () { return recordEnd; } public synchronized void setRecordDivider (String recordDivider) { this.recordDivider = recordDivider; } public synchronized String getRecordDivider () { return recordDivider; } public synchronized void setFieldStart (String fieldStart) { this.fieldStart = fieldStart; } public synchronized String getFieldStart () { return fieldStart; } public synchronized void setFieldEnd (String fieldEnd) { this.fieldEnd = fieldEnd; } public synchronized String getFieldEnd () { return fieldEnd; } public synchronized void setFieldDivider (String fieldDivider) { this.fieldDivider = fieldDivider; } public synchronized String getFieldDivider () { return fieldDivider; } /** * Flush the record page to disk. Temporarily writes the epilog. */ public synchronized void flush () throws IOException { long p = getFilePointer (); if (nRecords == 0) emit (prolog); emit (epilog); seek (p); super.flush (); } public synchronized int getRecordCount () { return nRecords; } public synchronized void writeRecord (Object[] fields, boolean asText) throws IOException { ++nRecords; emit ((nRecords == 1) ? prolog : recordDivider); URL url = urlOfFirstRegion (fields); emitTemplate (recordStart, url, nRecords); for (int i=0; i<fields.length; ++i) { if (i > 0) emit (fieldDivider); emit (fieldStart); Object f = fields[i]; if (f instanceof Region) { Region r = (Region)fields[i]; if (asText) write (r.toText()); else write (r); } else write (f.toString ()); emit (fieldEnd); } emitTemplate (recordEnd, url, nRecords); } private URL urlOfFirstRegion (Object[] fields) { for (int i=0; i<fields.length; ++i) if (fields[i] instanceof Region) { Region r = (Region)fields[i]; return r.getSource().getURL(); } return null; } private void emitTemplate (String template, URL url, int record) throws IOException { if (template == null || template.length() == 0) return; template = Str.replace (template, "%n", String.valueOf (record)); template = Str.replace (template, "%u", url != null ? url.toString () : ""); emit (template); } /* * Testing * public static void main (String[] args) throws Exception { Pattern p = new Tagexp (args[0].replace ('_', ' ') ); RecordTransformer records = new RecordTransformer (args[1]); for (int i=2; i<args.length; ++i) { Page page = new Page (new Link (args[i])); PatternMatcher m = p.match (page); for (Region r = m.nextMatch(); r != null; r = m.nextMatch()) records.writeRecord (r.getFields (Pattern.groups), false); } records.close (); } */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -