⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 invertedindex.java

📁 This code sample shows how to write a simple Javadoc 1.2 Doclet. Used with Javadoc, it can generate
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
import com.sun.javadoc.*;
import java.util.*;
import java.io.*;

public class InvertedIndex {
    public static final String TOC_PAGE_NAME = "InvIndex.html";
    public static final String INDEX_PAGE_NAME = "InvIndexConcord.html";
    public static final String HELP_PAGE_NAME = "InvIndexHelp.html";
    public static final String PAGE_PREFIX = "InvIndex-";
    public static final String PAGE_SUFFIX = ".html";

    public static final int NAME_LIMIT = 42;

    private HashSet nonwords;
    private TreeSet entries;
    PrintWriter tocPageOut;
    PrintWriter pageOut;
    PrintWriter indexPageOut;
    PrintWriter helpPageOut;
    int linesPer;
    String title;
    ArrayList charlist;
    String mostRecentLinkLine = "<br>";

    public InvertedIndex(String t) {
	nonwords = new HashSet();
	title = t;
	for(int nx = 0; nx < nonwordList.length; nx++) {
	    nonwords.add(nonwordList[nx]);
	}
	entries = new TreeSet();
    }

    public void process(List docs, DocErrorReporter rep) {
	for(int ix = 0; ix < docs.size(); ix++) {
	    RiEntry r = (RiEntry)(docs.get(ix));
	    // rep.printNotice(r.name + ": " + getDesc(r.doc.commentText(), r.doc));
	    processClass(r, rep);
	}
    }


    public void processClass(RiEntry r, DocErrorReporter rep) {
	StringTokenizer st = new StringTokenizer(getDesc(r.doc.commentText(), r.doc), " \t\r\n\f()[]");
	ArrayList a = new ArrayList();
	HashSet hs = new HashSet();
	int ix, wx;
	String wrd;
	for(ix = 0; st.hasMoreTokens(); ix++) {
	    wrd = st.nextToken();
	    a.add(wrd);
	}
	for(wx = 0; wx < ix; wx++) {
	    String sw = stripword((String)(a.get(wx))).toLowerCase();
	    if (sw.length() > 2 && !(nonwords.contains(sw))) {
		TEntry t1 = new TEntry(r, wx, sw, a);
		if (!hs.contains(sw)) {
		    // rep.printNotice("Using word: " + sw);
		    entries.add(t1);
		    hs.add(sw);
		}
	    }
	}
	return;
    }

    public String stripword(String s) {
	// gotta finish this
	int sp = 0;
	int len = s.length();
	int ep = len;
	char c1;
	for(sp = 0; sp < len; sp++) {
	    c1 = s.charAt(sp);
	    if (Character.isLetterOrDigit(c1)) break;
	}
	if (sp == len) return "";

	for(ep--; ep > sp; ep--) {
	    c1 = s.charAt(ep);
	    if (Character.isLetterOrDigit(c1)) break;
	}
	return s.substring(sp,ep+1);
    }

    public static final String filebase = "invIndex-";

    public boolean writeOutput(RootDoc root, File outputDir, String base, DocErrorReporter rep) {
	Iterator it;
	TEntry entry;
	char c1;

	// first, see what letters we have to work with
	charlist = new ArrayList();
	char prevchar = '@';
	char tchar;
	for(it = entries.iterator(); it.hasNext(); ) {
	    entry = (TEntry)(it.next());
	    tchar = entry.word.charAt(0);
	    if (tchar != prevchar) {
		charlist.add(new Character(tchar));
		prevchar = tchar;
	    }
	}

	try {
	    tocPageOut = new PrintWriter(new FileWriter(new File(outputDir, TOC_PAGE_NAME)));
	    indexPageOut = new PrintWriter(new FileWriter(new File(outputDir, INDEX_PAGE_NAME)));
	    helpPageOut =  new PrintWriter(new FileWriter(new File(outputDir, HELP_PAGE_NAME)));
	}
	catch (IOException ie) {
	    rep.printError("Error in opening output page: " + ie);
	    return false;
	}
	    
	// write the table-of-contents page
	writeLines(tocPageOut, headerLines);
	tocPageOut.println("<ul>");
	tocPageOut.println("<p>This is the table of contents page for the inverted index.");
	tocPageOut.println("Click on one of the letters below to jump to that part of the");
	tocPageOut.println("index.");
	tocPageOut.println("<center>");
	tocPageOut.println("<p><font size='+1'><b>");
	for(int cx = 0; cx < charlist.size(); cx++) {
	    c1 = ((Character)(charlist.get(cx))).charValue();
	    String nam = getPageFileName(c1);
	    tocPageOut.println("<a href=\"" + nam + "\">" + 
			       Character.toUpperCase(c1) + 
			       "</a> &nbsp;");
	}
	tocPageOut.println("</b></font><p>");
	tocPageOut.print("<a href=\"" + INDEX_PAGE_NAME + "\">Word Index</a>");
	tocPageOut.print("  &nbsp;&nbsp;&nbsp;");
	tocPageOut.print("<a href=\"" + HELP_PAGE_NAME + "\">Help</a>");
	tocPageOut.println("</p>");	
	tocPageOut.println("</center>");
	tocPageOut.println("<p> &nbsp; </p>");
	tocPageOut.println("This inverted index was created from the");
	PackageDoc [] pds = root.specifiedPackages();
	tocPageOut.println("following Java packages:<ul>");
	int classcnt, intcnt;
	for(int px = 0; px < pds.length; px++) {
	    tocPageOut.print("<li><a href='" +
			       linkFor(pds[px], base) +
			       "'><tt>");
	    tocPageOut.println(pds[px].name() + "</tt></a> &nbsp;<small>");
	    classcnt = pds[px].allClasses().length;
	    intcnt = pds[px].interfaces().length;
	    tocPageOut.print("(" + (classcnt - intcnt) + 
			       (((classcnt - intcnt)==1)?(" class"):(" classes")) +
			       ", " + intcnt + 
			       ((intcnt==1)?(" interface"):(" interfaces")) +
			       ")</small>");
            tocPageOut.println("</li>");
	}
	tocPageOut.println("</ul>");
	tocPageOut.println("</ul>");
	writeLines(tocPageOut, footerLines);
	tocPageOut.close();

	// write out the help file
	writeLines(helpPageOut, headerLines);
	writeLines(helpPageOut, helpLines);
	writeLines(helpPageOut, footerLines);
	helpPageOut.close();

	// write the beginning of the index page
	writeLines(indexPageOut, headerLines);
	indexPageOut.println("<p> &nbsp; </p>");
	indexPageOut.println("<h2>Word Index</h2>");
	indexPageOut.println("<ul><p>");
	indexPageOut.println("This page shows each word found in the inverted index, along");
	indexPageOut.println("with the classes whose one-sentence description includes that");
	indexPageOut.println("word.  Click on a letter below to jump to that part of the word");
	indexPageOut.println("index.  Click on a word in the index to jump to that part of");
	indexPageOut.println("the inverted index.");
	indexPageOut.println("</p><center><p>");
	indexPageOut.println("<font size='+1'><b>");
	for(int cx = 0; cx < charlist.size(); cx++) {
	    c1 = ((Character)(charlist.get(cx))).charValue();
	    indexPageOut.println("<a href='#" + 
			       Character.toUpperCase(c1) + "'>" +
			       Character.toUpperCase(c1) + "</a> &nbsp;");
	}
	indexPageOut.println("</b></font><p>");
	indexPageOut.print("<a href=\"" + TOC_PAGE_NAME + "\">Main Page</a>");
	indexPageOut.print("  &nbsp;&nbsp;&nbsp;");
	indexPageOut.print("<a href=\"" + HELP_PAGE_NAME + "\">Help</a>");
	indexPageOut.println("</p></center>");	
	indexPageOut.println("<p> &nbsp; </p>");
	indexPageOut.println("This word index was created from the");
	indexPageOut.println("following Java packages:<ul>");
	for(int px = 0; px < pds.length; px++) {
	    indexPageOut.print("<li><a href='" +
			       linkFor(pds[px], base) +
			       "'><tt>");
	    indexPageOut.println(pds[px].name() + "</tt></a> &nbsp;<small>");
	    classcnt = pds[px].allClasses().length;
	    intcnt = pds[px].interfaces().length;
	    indexPageOut.print("(" + (classcnt - intcnt) + 
			       (((classcnt - intcnt)==1)?(" class"):(" classes")) +
			       ", " + intcnt + 
			       ((intcnt==1)?(" interface"):(" interfaces")) +
			       ")</small>");
            indexPageOut.println("</li>");
	}
	indexPageOut.println("</ul>");
	indexPageOut.println("<p> &nbsp; </p>");

	// write the individual pages 
	int wx;
	String linkdest;
	String prevword = "";
	for(it = entries.iterator(); it.hasNext(); ) {
	    entry = (TEntry)(it.next());
	    setPageOut(entry, outputDir, base, rep);
	    if ((linesPer % 100) == 0) {
		if (linesPer != 0) 
		    pageOut.println("</table><br>\n");
		pageOut.println("<table border=1 cellspacing=1 cellpadding=2 width='98%'>");
	    }
	    pageOut.println("<tr><td width=120 valign=top nowrap><small><code>");
	    if (!(entry.getWord().equalsIgnoreCase(prevword))) {
		char wc, pwc;
		pageOut.println("<a name='" + entry.getWord() + "'></a>");
		if (prevword.length() > 0) {
		    indexPageOut.println("</ul>");
		    pwc = Character.toUpperCase(prevword.charAt(0));
		}
		else pwc = (char)0;
		wc = Character.toUpperCase(entry.getWord().charAt(0));
		if (pwc != wc) {
		    if (pwc != (char)0) 
			indexPageOut.println("</ul>");
		    indexPageOut.println("<h1><a name='" + wc + "'></a>" 
					 + wc + "</h1>");
		    indexPageOut.println("<ul>");
		}
		indexPageOut.println("<p><font size='+1'><b>" +
				     "<a href='" + getPageFileName(wc) +
				     "#" + entry.getWord() + "'>" + 
				     entry.getWord() +
				     "</a></b></font><ul>");
		prevword = entry.getWord();
	    }
	    indexPageOut.println("<br><a href='" +
				 linkFor(entry,base) + 
				 "'>" + entry.formatNameFull() +
				 "</a>");
	    
	    linkdest = linkFor(entry, base);
	    pageOut.println("<a href='" + linkdest + "'>" +
			    entry.formatName() + "</a>");
	    pageOut.println("</code></small></td>");
	    
	    pageOut.println("<td width=220 align=right valign=top><small>");
	    for(wx = 0; wx < entry.whichword; wx++) {
		if (wx > 0) pageOut.print(" ");
		pageOut.print(reverseIndexDoclet.encode((String)(entry.words.get(wx))));
	    }
	    if (wx == 0) pageOut.print(" &nbsp; ");
	    pageOut.println("</small></td>");

	    pageOut.println("<td valign=bottom align=left><small>");
	    for( ; wx < entry.words.size(); wx++) {
		pageOut.print(" ");
		if (wx == entry.whichword) {
		    pageOut.print("<b>");
		    pageOut.print(reverseIndexDoclet.encode((String)(entry.words.get(wx))));
		    pageOut.print("</b>");
		}
		else {
		    pageOut.print(reverseIndexDoclet.decode((String)(entry.words.get(wx))));
		}
	    }
	    pageOut.println("</small></td>");
	    pageOut.println("</tr>");
	    linesPer++;
	}
	setPageOut(null, outputDir, base, rep);
	
	// write the end of the index page
	indexPageOut.println("</ul>");
	indexPageOut.println("</ul>");
	writeLines(indexPageOut, footerLines);

	// all done
	indexPageOut.close();
	return true;
    }

    // utility functions for various stuff

    /**
     * 
     */
    public static final String delimSet = " \t\n<>\r";

    public String getDesc(String ct, Doc doc) {
	StringTokenizer st;
	String clname;
	String ctd = reverseIndexDoclet.decode(ct);
	int px = doc.name().lastIndexOf('.');
	if (px > 0) clname = doc.name().substring(px + 1);
	else clname = doc.name();
	if (ctd.indexOf(clname) < 0) {
	    ctd = clname + ": " + ctd;
	}
	st = new StringTokenizer(ctd, delimSet, true);
	StringBuffer bf = new StringBuffer();
	boolean intag = false;
	boolean inwht = false;
	while(st.hasMoreTokens()) {
	    String token = st.nextToken();
	    char c1 = token.charAt(0);
	    if (c1 == '<') {
		intag = true;
	    }
	    else if (c1 == '>') {
		intag = false;
	    }
	    else if (!intag) {
		if (c1 == '\n' || c1 == '\t' || c1 == '\r') {
		    token = " "; c1 = ' ';
		}
		if (token.length() == 1 && Character.isWhitespace(c1)) {
		    if (!inwht) bf.append(" ");
		    else inwht = true;
		}
		else {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -