latticeviewer.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 483 行 · 第 1/2 页

JAVA
483
字号
  {    out.println ("  <tr class=\"input\">");    out.println ("    <td class=\"label\"></td>");    for (int ip = start; ip < end; ip++) {      out.print ("<td>"+input.getToken(ip).getText()+"</td>");    }    out.println ("  </tr>");  }  private static void outputIndices (PrintWriter out, int start, int end)  {    out.println ("  <tr class=\"indices\">");    out.println ("    <td class=\"label\"></td>");    for (int ip = start; ip < end; ip++) {      out.print ("<td>"+ip+"</td>");    }    out.println ("  </tr>");  }  private static void outputTableRow (PrintWriter out, String cssClass, Sequence seq1, Sequence seq2, int start, int end)  {    out.println ("  <tr class=\""+cssClass+"\">");    out.println ("    <td class=\"label\">"+cssClass+"</td>");    for (int i = start; i < end; i++) {      if (seqMatches (seq1, seq2, i)) {        out.print ("<td>");      } else {        out.print ("<td class=\"error\">");      }      out.print (seq1.get(i));      out.print ("</td>");    }    out.println ("  </tr>");  }  private static void outputFeatures (PrintWriter out, FeatureVectorSequence fvs, Sequence in, Sequence output, int start, int end)  {    out.println ("  <tr class=\"features\">\n<td class=\"label\">Features</td>");    for (int i = start; i < end; i++) {      if (!seqMatches (in, output, i)) {        out.print ("<td>");        FeatureVector fv = fvs.getFeatureVector (i);        for (int k = 0; k < fv.numLocations (); k++) {          out.print (fv.getAlphabet ().lookupObject (fv.indexAtLocation (k)));          if (fv.valueAtLocation (k) != 1.0) {            out.print (" "+fv.valueAtLocation (k));          }          out.println ("<br />");        }        out.println ("</td>");      } else {        out.println ("<td></td>");      }    }    out.println ("  </tr>");  }  private static boolean seqMatches (Sequence seq1, Sequence seq2, int i)  {    return seq1.get(i).toString().equals (seq2.get(i).toString());  }  private static boolean allSeqMatches (Sequence seq1, Sequence seq2, int start, int end)  {    for (int i = start; i < end; i++) {      if (!seqMatches (seq1, seq2, i)) return false;    }    return true;  }  public static void extraction2html (Extraction extraction, CRFExtractor extor, PrintStream out)  {    PrintWriter writer = new PrintWriter (new OutputStreamWriter (out), true);    extraction2html (extraction, extor, out, false);  }  public static void extraction2html (Extraction extraction, CRFExtractor extor, PrintWriter out)  {    extraction2html (extraction, extor, out, false);  }  public static void extraction2html (Extraction extraction, CRFExtractor extor, PrintStream out, boolean showLattice)  {    PrintWriter writer = new PrintWriter (new OutputStreamWriter (out), true);    extraction2html (extraction, extor, writer, showLattice);  }  public static void extraction2html (Extraction extraction, CRFExtractor extor, PrintWriter out, boolean showLattice)  {    writeHeader (out);    for (int i = 0; i < extraction.getNumDocuments (); i++) {      DocumentExtraction docextr = extraction.getDocumentExtraction (i);      String desc = docextr.getName();      String doc = ((CharSequence) docextr.getDocument ()).toString();      ExtorInfo info = infoForDoc (doc, desc, "N"+i, docextr, extor, showLattice);      if (!showLattice) info.link = "lattice.html";      lattice2html (out, info);    }    writeFooter (out);  }  private static class ExtorInfo {    TokenSequence input;    Sequence predicted;    LabelSequence target;    FeatureVectorSequence fvs;    Transducer.ViterbiPath lattice;    Sequence bestStates;    String link; // If non-null, name of HTML file to use for cross-links    String desc;    String idx;    public ExtorInfo (TokenSequence input, Sequence predicted, LabelSequence target, String desc, String idx)    {      this.input = input;      this.predicted = predicted;      this.target = target;      this.desc = desc;      this.idx = idx;    }  }  private static ExtorInfo infoForDoc (String doc, String desc, String idx, DocumentExtraction docextr,                                         CRFExtractor extor, boolean showLattice)  {//    Instance c2 = new Instance (doc, null, null, null, extor.getTokenizationPipe ());//    TokenSequence input = (TokenSequence) c2.getData ();    TokenSequence input = (TokenSequence) docextr.getInput ();     LabelSequence target = docextr.getTarget ();    Sequence predicted = docextr.getPredictedLabels ();    ExtorInfo info = new ExtorInfo (input, predicted, target, desc, idx);    if (showLattice == true) {      CRF4 crf = extor.getCrf();      // xxx perhaps the next two lines could be a transducer method???      Instance carrier = new Instance (input, null, null, null, extor.getFeaturePipe ());      info.fvs = (FeatureVectorSequence) carrier.getData ();      info.lattice = crf.viterbiPath ((Sequence) carrier.getData(), true);      info.bestStates = info.lattice.getBestStates ();    }    return info;  }  // Lattice files get too large if too many instances are written to one file  private static final int EXTRACTIONS_PER_FILE = 25;  public static void viewDualResults (File dir, Extraction e1, CRFExtractor extor1, Extraction e2, CRFExtractor extor2) throws IOException  {    if (e1.getNumDocuments () != e2.getNumDocuments ())      throw new IllegalArgumentException ("Extractions don't match: different number of docs.");    PrintWriter errorStr = new PrintWriter (new FileWriter (new File (dir, "errors.html")));    writeDualExtractions (errorStr, e1, extor1, e2, extor2, 0, e1.getNumDocuments (), false);    errorStr.close ();    int max = e1.getNumDocuments ();    for (int start = 0; start < max; start += EXTRACTIONS_PER_FILE) {      int end = Math.min (start + EXTRACTIONS_PER_FILE, max);      PrintWriter latticeStr = new PrintWriter (new FileWriter (new File (dir, "lattice-"+start+".html")));      writeDualExtractions (latticeStr, e1, extor1, e2, extor2, start, end, true);      latticeStr.close ();    }  }  private static String computeLatticeFname (int docIdx)  {    int htmlDocNo = docIdx / EXTRACTIONS_PER_FILE; // this will get integer truncated    int start = EXTRACTIONS_PER_FILE * htmlDocNo;    return "lattice-"+start+".html";  }  private static void writeDualExtractions (PrintWriter out, Extraction e1, CRFExtractor extor1, Extraction e2, CRFExtractor extor2,                                           int start, int end, boolean showLattice)  {    writeHeader (out);    for (int i = start; i < end; i++) {      DocumentExtraction doc1 = e1.getDocumentExtraction (i);      DocumentExtraction doc2 = e2.getDocumentExtraction (i);      String desc = doc1.getName();      String doc1Str = ((CharSequence) doc1.getDocument ()).toString();      String doc2Str = ((CharSequence) doc2.getDocument ()).toString();      if (!doc1Str.equals (doc2Str)) {        System.err.println ("Skipping document "+i+": Extractions don't match");        continue;      }      Sequence targ1 = doc1.getPredictedLabels ();      Sequence targ2 = doc2.getPredictedLabels ();      if (!predictionsMatch (targ1, targ2)) {       ExtorInfo info1 = infoForDoc (doc1Str, "CRF1::"+desc, "C1I"+i, doc1, extor1, showLattice);       ExtorInfo info2 = infoForDoc (doc1Str, "CRF2::"+desc, "C2I"+i, doc2, extor2, showLattice);        if (!showLattice) { // add links from errors.html --> lattice.html          info1.link = info2.link = computeLatticeFname (i);        }       dualLattice2html (out, desc, info1, info2);      }    }    writeFooter (out);  }  // if lattice == null, no alpha, beta values printed  public static void dualLattice2html (PrintWriter out, String desc, ExtorInfo info1, ExtorInfo info2)  {    assert (info1.predicted.size() == info1.target.size());    assert (info1.input.size() == info1.predicted.size());    assert (info2.input.size() == info2.predicted.size());    assert (info2.predicted.size() == info2.target.size());    int N = info1.target.size();    for (int start = 0; start < N; start += LENGTH - 1) {      int end = Math.min (info1.predicted.size(), start + LENGTH);      if (!allSeqMatches (info1.predicted, info2.predicted, start, end)) {        error2html (out, info1, start, end);        error2html (out, info2, start, end);      }    }  }  private static boolean predictionsMatch (Sequence targ1, Sequence targ2)  {    if (targ1.size() != targ2.size()) return false;    for (int i = 0; i < targ1.size(); i++)      if (!targ1.get(i).toString().equals (targ2.get(i).toString()))        return false;    return true;  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?