📄 invertedindex.java
字号:
import com.sun.javadoc.*;
import java.util.*;
import java.io.*;
public class InvertedIndex {
public static final String TOC_PAGE_NAME = "InvIndex.html";
public static final String INDEX_PAGE_NAME = "InvIndexConcord.html";
public static final String HELP_PAGE_NAME = "InvIndexHelp.html";
public static final String PAGE_PREFIX = "InvIndex-";
public static final String PAGE_SUFFIX = ".html";
public static final int NAME_LIMIT = 42;
private HashSet nonwords;
private TreeSet entries;
PrintWriter tocPageOut;
PrintWriter pageOut;
PrintWriter indexPageOut;
PrintWriter helpPageOut;
int linesPer;
String title;
ArrayList charlist;
String mostRecentLinkLine = "<br>";
public InvertedIndex(String t) {
nonwords = new HashSet();
title = t;
for(int nx = 0; nx < nonwordList.length; nx++) {
nonwords.add(nonwordList[nx]);
}
entries = new TreeSet();
}
public void process(List docs, DocErrorReporter rep) {
for(int ix = 0; ix < docs.size(); ix++) {
RiEntry r = (RiEntry)(docs.get(ix));
// rep.printNotice(r.name + ": " + getDesc(r.doc.commentText(), r.doc));
processClass(r, rep);
}
}
public void processClass(RiEntry r, DocErrorReporter rep) {
StringTokenizer st = new StringTokenizer(getDesc(r.doc.commentText(), r.doc), " \t\r\n\f()[]");
ArrayList a = new ArrayList();
HashSet hs = new HashSet();
int ix, wx;
String wrd;
for(ix = 0; st.hasMoreTokens(); ix++) {
wrd = st.nextToken();
a.add(wrd);
}
for(wx = 0; wx < ix; wx++) {
String sw = stripword((String)(a.get(wx))).toLowerCase();
if (sw.length() > 2 && !(nonwords.contains(sw))) {
TEntry t1 = new TEntry(r, wx, sw, a);
if (!hs.contains(sw)) {
// rep.printNotice("Using word: " + sw);
entries.add(t1);
hs.add(sw);
}
}
}
return;
}
public String stripword(String s) {
// gotta finish this
int sp = 0;
int len = s.length();
int ep = len;
char c1;
for(sp = 0; sp < len; sp++) {
c1 = s.charAt(sp);
if (Character.isLetterOrDigit(c1)) break;
}
if (sp == len) return "";
for(ep--; ep > sp; ep--) {
c1 = s.charAt(ep);
if (Character.isLetterOrDigit(c1)) break;
}
return s.substring(sp,ep+1);
}
public static final String filebase = "invIndex-";
public boolean writeOutput(RootDoc root, File outputDir, String base, DocErrorReporter rep) {
Iterator it;
TEntry entry;
char c1;
// first, see what letters we have to work with
charlist = new ArrayList();
char prevchar = '@';
char tchar;
for(it = entries.iterator(); it.hasNext(); ) {
entry = (TEntry)(it.next());
tchar = entry.word.charAt(0);
if (tchar != prevchar) {
charlist.add(new Character(tchar));
prevchar = tchar;
}
}
try {
tocPageOut = new PrintWriter(new FileWriter(new File(outputDir, TOC_PAGE_NAME)));
indexPageOut = new PrintWriter(new FileWriter(new File(outputDir, INDEX_PAGE_NAME)));
helpPageOut = new PrintWriter(new FileWriter(new File(outputDir, HELP_PAGE_NAME)));
}
catch (IOException ie) {
rep.printError("Error in opening output page: " + ie);
return false;
}
// write the table-of-contents page
writeLines(tocPageOut, headerLines);
tocPageOut.println("<ul>");
tocPageOut.println("<p>This is the table of contents page for the inverted index.");
tocPageOut.println("Click on one of the letters below to jump to that part of the");
tocPageOut.println("index.");
tocPageOut.println("<center>");
tocPageOut.println("<p><font size='+1'><b>");
for(int cx = 0; cx < charlist.size(); cx++) {
c1 = ((Character)(charlist.get(cx))).charValue();
String nam = getPageFileName(c1);
tocPageOut.println("<a href=\"" + nam + "\">" +
Character.toUpperCase(c1) +
"</a> ");
}
tocPageOut.println("</b></font><p>");
tocPageOut.print("<a href=\"" + INDEX_PAGE_NAME + "\">Word Index</a>");
tocPageOut.print(" ");
tocPageOut.print("<a href=\"" + HELP_PAGE_NAME + "\">Help</a>");
tocPageOut.println("</p>");
tocPageOut.println("</center>");
tocPageOut.println("<p> </p>");
tocPageOut.println("This inverted index was created from the");
PackageDoc [] pds = root.specifiedPackages();
tocPageOut.println("following Java packages:<ul>");
int classcnt, intcnt;
for(int px = 0; px < pds.length; px++) {
tocPageOut.print("<li><a href='" +
linkFor(pds[px], base) +
"'><tt>");
tocPageOut.println(pds[px].name() + "</tt></a> <small>");
classcnt = pds[px].allClasses().length;
intcnt = pds[px].interfaces().length;
tocPageOut.print("(" + (classcnt - intcnt) +
(((classcnt - intcnt)==1)?(" class"):(" classes")) +
", " + intcnt +
((intcnt==1)?(" interface"):(" interfaces")) +
")</small>");
tocPageOut.println("</li>");
}
tocPageOut.println("</ul>");
tocPageOut.println("</ul>");
writeLines(tocPageOut, footerLines);
tocPageOut.close();
// write out the help file
writeLines(helpPageOut, headerLines);
writeLines(helpPageOut, helpLines);
writeLines(helpPageOut, footerLines);
helpPageOut.close();
// write the beginning of the index page
writeLines(indexPageOut, headerLines);
indexPageOut.println("<p> </p>");
indexPageOut.println("<h2>Word Index</h2>");
indexPageOut.println("<ul><p>");
indexPageOut.println("This page shows each word found in the inverted index, along");
indexPageOut.println("with the classes whose one-sentence description includes that");
indexPageOut.println("word. Click on a letter below to jump to that part of the word");
indexPageOut.println("index. Click on a word in the index to jump to that part of");
indexPageOut.println("the inverted index.");
indexPageOut.println("</p><center><p>");
indexPageOut.println("<font size='+1'><b>");
for(int cx = 0; cx < charlist.size(); cx++) {
c1 = ((Character)(charlist.get(cx))).charValue();
indexPageOut.println("<a href='#" +
Character.toUpperCase(c1) + "'>" +
Character.toUpperCase(c1) + "</a> ");
}
indexPageOut.println("</b></font><p>");
indexPageOut.print("<a href=\"" + TOC_PAGE_NAME + "\">Main Page</a>");
indexPageOut.print(" ");
indexPageOut.print("<a href=\"" + HELP_PAGE_NAME + "\">Help</a>");
indexPageOut.println("</p></center>");
indexPageOut.println("<p> </p>");
indexPageOut.println("This word index was created from the");
indexPageOut.println("following Java packages:<ul>");
for(int px = 0; px < pds.length; px++) {
indexPageOut.print("<li><a href='" +
linkFor(pds[px], base) +
"'><tt>");
indexPageOut.println(pds[px].name() + "</tt></a> <small>");
classcnt = pds[px].allClasses().length;
intcnt = pds[px].interfaces().length;
indexPageOut.print("(" + (classcnt - intcnt) +
(((classcnt - intcnt)==1)?(" class"):(" classes")) +
", " + intcnt +
((intcnt==1)?(" interface"):(" interfaces")) +
")</small>");
indexPageOut.println("</li>");
}
indexPageOut.println("</ul>");
indexPageOut.println("<p> </p>");
// write the individual pages
int wx;
String linkdest;
String prevword = "";
for(it = entries.iterator(); it.hasNext(); ) {
entry = (TEntry)(it.next());
setPageOut(entry, outputDir, base, rep);
if ((linesPer % 100) == 0) {
if (linesPer != 0)
pageOut.println("</table><br>\n");
pageOut.println("<table border=1 cellspacing=1 cellpadding=2 width='98%'>");
}
pageOut.println("<tr><td width=120 valign=top nowrap><small><code>");
if (!(entry.getWord().equalsIgnoreCase(prevword))) {
char wc, pwc;
pageOut.println("<a name='" + entry.getWord() + "'></a>");
if (prevword.length() > 0) {
indexPageOut.println("</ul>");
pwc = Character.toUpperCase(prevword.charAt(0));
}
else pwc = (char)0;
wc = Character.toUpperCase(entry.getWord().charAt(0));
if (pwc != wc) {
if (pwc != (char)0)
indexPageOut.println("</ul>");
indexPageOut.println("<h1><a name='" + wc + "'></a>"
+ wc + "</h1>");
indexPageOut.println("<ul>");
}
indexPageOut.println("<p><font size='+1'><b>" +
"<a href='" + getPageFileName(wc) +
"#" + entry.getWord() + "'>" +
entry.getWord() +
"</a></b></font><ul>");
prevword = entry.getWord();
}
indexPageOut.println("<br><a href='" +
linkFor(entry,base) +
"'>" + entry.formatNameFull() +
"</a>");
linkdest = linkFor(entry, base);
pageOut.println("<a href='" + linkdest + "'>" +
entry.formatName() + "</a>");
pageOut.println("</code></small></td>");
pageOut.println("<td width=220 align=right valign=top><small>");
for(wx = 0; wx < entry.whichword; wx++) {
if (wx > 0) pageOut.print(" ");
pageOut.print(reverseIndexDoclet.encode((String)(entry.words.get(wx))));
}
if (wx == 0) pageOut.print(" ");
pageOut.println("</small></td>");
pageOut.println("<td valign=bottom align=left><small>");
for( ; wx < entry.words.size(); wx++) {
pageOut.print(" ");
if (wx == entry.whichword) {
pageOut.print("<b>");
pageOut.print(reverseIndexDoclet.encode((String)(entry.words.get(wx))));
pageOut.print("</b>");
}
else {
pageOut.print(reverseIndexDoclet.decode((String)(entry.words.get(wx))));
}
}
pageOut.println("</small></td>");
pageOut.println("</tr>");
linesPer++;
}
setPageOut(null, outputDir, base, rep);
// write the end of the index page
indexPageOut.println("</ul>");
indexPageOut.println("</ul>");
writeLines(indexPageOut, footerLines);
// all done
indexPageOut.close();
return true;
}
// utility functions for various stuff
/**
*
*/
public static final String delimSet = " \t\n<>\r";
public String getDesc(String ct, Doc doc) {
StringTokenizer st;
String clname;
String ctd = reverseIndexDoclet.decode(ct);
int px = doc.name().lastIndexOf('.');
if (px > 0) clname = doc.name().substring(px + 1);
else clname = doc.name();
if (ctd.indexOf(clname) < 0) {
ctd = clname + ": " + ctd;
}
st = new StringTokenizer(ctd, delimSet, true);
StringBuffer bf = new StringBuffer();
boolean intag = false;
boolean inwht = false;
while(st.hasMoreTokens()) {
String token = st.nextToken();
char c1 = token.charAt(0);
if (c1 == '<') {
intag = true;
}
else if (c1 == '>') {
intag = false;
}
else if (!intag) {
if (c1 == '\n' || c1 == '\t' || c1 == '\r') {
token = " "; c1 = ' ';
}
if (token.length() == 1 && Character.isWhitespace(c1)) {
if (!inwht) bf.append(" ");
else inwht = true;
}
else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -