⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 abstractdataindexer.java~40~

📁 垃圾邮件过滤器源代码
💻 JAVA~40~
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.maxent;import gnu.trove.TObjectIntHashMap;import gnu.trove.TObjectIntProcedure;import gnu.trove.TDoubleIntHashMap;import gnu.trove.TDoubleIntProcedure;import java.util.Collections;import java.util.List;/** * Abstract class for collecting event and context counts used in training. * */public abstract class AbstractDataIndexer implements DataIndexer {  /** The integer contexts associated with each unique event. */  protected int[][] contexts;  /** The integer weights associated with each unique event. */  protected int[][] weights;  /** The integer outcome associated with each unique event. */  protected int[] outcomeList;  /** The number of times an event occured in the training data. */  protected int[] numTimesEventsSeen;  /** The predicate/context names. */  protected String[] predLabels;  /** The names of the outcomes. */  protected String[] outcomeLabels;  protected Double[] weightLabels;  public int[][] getContexts() {    return contexts;  }  public int[][] getWeights(){    return weights;  }  public int[] getNumTimesEventsSeen() {    return numTimesEventsSeen;  }  public int[] getOutcomeList() {    return outcomeList;  }  public String[] getPredLabels() {    return predLabels;  }  public String[] getOutcomeLabels() {    return outcomeLabels;  }  public Double[] getWeightLabels(){    return weightLabels;  }/*  //此方法完成填写上述属性的功能,与下面的sortAndMerge方法作用不同的是:  //不作事件的比较和唯一化过程  protected void ReArrange( List eventsToCompare){     Collections.sort(eventsToCompare);     int numEvents = eventsToCompare.size();  }*/  /**       * Sorts and uniques the array of comparable events.  This method       * will alter the eventsToCompare array -- it does an in place       * sort, followed by an in place edit to remove duplicates.       *       * @param eventsToCompare a <code>ComparableEvent[]</code> value       * @since maxent 1.2.6       */  protected void sortAndMerge(List eventsToCompare) {    Collections.sort(eventsToCompare);    // 排序,事件整体作为一个对象,可以排序,涉及到权重,但调用的是ComparableEvent的CompareTo事件,已正确修改    int numEvents = eventsToCompare.size();    int numUniqueEvents = 1; // assertion: eventsToCompare.length >= 1    if (numEvents <= 1) {      return; // nothing to do; edge case (see assertion)    }    // 分析出事件集中不同事件的个数,删除重复事件    ComparableEvent ce = (ComparableEvent) eventsToCompare.get(0);    for (int i = 1; i < numEvents; i++) {      ComparableEvent ce2 = (ComparableEvent) eventsToCompare.get(i);      if (ce.compareTo(ce2) == 0) {  // 两个事件相同,出现次数++,删除重复事件        ce.seen++; // increment the seen count        eventsToCompare.set(i, null); // kill the duplicate      }      else {        ce = ce2; // a new champion emerges...        numUniqueEvents++; // increment the # of unique events      }    }    System.out.println("done. Reduced " + numEvents + " events to " + numUniqueEvents + ".");    //得到每个唯一事件的相关属性    contexts = new int[numUniqueEvents][];    weights = new int[numUniqueEvents][];    outcomeList = new int[numUniqueEvents];    numTimesEventsSeen = new int[numUniqueEvents];    for (int i = 0, j = 0; i < numEvents; i++) {      ComparableEvent evt = (ComparableEvent) eventsToCompare.get(i);      if (null == evt) {        continue; // this was a dupe, skip over it. 被删除的重复事件      }      numTimesEventsSeen[j] = evt.seen;      outcomeList[j] = evt.outcome;      contexts[j] = evt.predIndexes;      weights[j] = evt.weightIndexes;      ++j;    }  }  /**       * Utility method for creating a String[] array from a map whose       * keys are labels (Strings) to be stored in the array and whose       * values are the indices (Integers) at which the corresponding       * labels should be inserted.       *       * @param labelToIndexMap a <code>TObjectIntHashMap</code> value       * @return a <code>String[]</code> value       * @since maxent 1.2.6       */  protected static String[] toIndexedStringArray(TObjectIntHashMap labelToIndexMap) {      final String[] array = new String[labelToIndexMap.size()];      labelToIndexMap.forEachEntry(new TObjectIntProcedure() {              public boolean execute(Object str, int index) {                  array[index] = (String)str;                  return true;              }          });      return array;  }  protected static Double[] toIndexedDoubleArray(TDoubleIntHashMap labelToIndexMap) {      final Double[] array = new Double[labelToIndexMap.size()];      labelToIndexMap.forEachEntry(new TDoubleIntProcedure() {              public boolean execute(Object str, int index) {                  array[index] = (Double)str;                  return true;              }          });      return array;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -