📄 abstractdataindexer.java~41~
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the//GNU General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.maxent;import gnu.trove.TObjectIntHashMap;import gnu.trove.TObjectIntProcedure;import gnu.trove.TDoubleIntHashMap;import gnu.trove.TDoubleIntProcedure;import java.util.Collections;import java.util.List;/** * Abstract class for collecting event and context counts used in training. * */public abstract class AbstractDataIndexer implements DataIndexer { /** The integer contexts associated with each unique event. */ protected int[][] contexts; /** The integer weights associated with each unique event. */ protected int[][] weights; /** The integer outcome associated with each unique event. */ protected int[] outcomeList; /** The number of times an event occured in the training data. */ protected int[] numTimesEventsSeen; /** The predicate/context names. */ protected String[] predLabels; /** The names of the outcomes. */ protected String[] outcomeLabels; protected Double[] weightLabels; public int[][] getContexts() { return contexts; } public int[][] getWeights(){ return weights; } public int[] getNumTimesEventsSeen() { return numTimesEventsSeen; } public int[] getOutcomeList() { return outcomeList; } public String[] getPredLabels() { return predLabels; } public String[] getOutcomeLabels() { return outcomeLabels; } public Double[] getWeightLabels(){ return weightLabels; }/* //此方法完成填写上述属性的功能,与下面的sortAndMerge方法作用不同的是: //不作事件的比较和唯一化过程 protected void ReArrange( List eventsToCompare){ Collections.sort(eventsToCompare); int numEvents = eventsToCompare.size(); }*/ /** * Sorts and uniques the array of comparable events. This method * will alter the eventsToCompare array -- it does an in place * sort, followed by an in place edit to remove duplicates. * * @param eventsToCompare a <code>ComparableEvent[]</code> value * @since maxent 1.2.6 */ protected void sortAndMerge(List eventsToCompare) { Collections.sort(eventsToCompare); // 排序,事件整体作为一个对象,可以排序,涉及到权重,但调用的是ComparableEvent的CompareTo事件,已正确修改 int numEvents = eventsToCompare.size(); int numUniqueEvents = 1; // assertion: eventsToCompare.length >= 1 if (numEvents <= 1) { return; // nothing to do; edge case (see assertion) } // 分析出事件集中不同事件的个数,删除重复事件 ComparableEvent ce = (ComparableEvent) eventsToCompare.get(0); for (int i = 1; i < numEvents; i++) { ComparableEvent ce2 = (ComparableEvent) eventsToCompare.get(i); if (ce.compareTo(ce2) == 0) { // 两个事件相同,出现次数++,删除重复事件 ce.seen++; // increment the seen count eventsToCompare.set(i, null); // kill the duplicate } else { ce = ce2; // a new champion emerges... numUniqueEvents++; // increment the # of unique events } } System.out.println("done. Reduced " + numEvents + " events to " + numUniqueEvents + "."); //得到每个唯一事件的相关属性 contexts = new int[numUniqueEvents][]; weights = new int[numUniqueEvents][]; outcomeList = new int[numUniqueEvents]; numTimesEventsSeen = new int[numUniqueEvents]; for (int i = 0, j = 0; i < numEvents; i++) { ComparableEvent evt = (ComparableEvent) eventsToCompare.get(i); if (null == evt) { continue; // this was a dupe, skip over it. 被删除的重复事件 } numTimesEventsSeen[j] = evt.seen; outcomeList[j] = evt.outcome; contexts[j] = evt.predIndexes; weights[j] = evt.weightIndexes; ++j; } } /** * Utility method for creating a String[] array from a map whose * keys are labels (Strings) to be stored in the array and whose * values are the indices (Integers) at which the corresponding * labels should be inserted. * * @param labelToIndexMap a <code>TObjectIntHashMap</code> value * @return a <code>String[]</code> value * @since maxent 1.2.6 */ protected static String[] toIndexedStringArray(TObjectIntHashMap labelToIndexMap) { final String[] array = new String[labelToIndexMap.size()]; labelToIndexMap.forEachEntry(new TObjectIntProcedure() { public boolean execute(Object str, int index) { array[index] = (String)str; return true; } }); return array; } protected static Double[] toIndexedDoubleArray(TDoubleIntHashMap labelToIndexMap) { final Double[] array = new Double[labelToIndexMap.size()]; labelToIndexMap.forEachEntry(new TDoubleIntProcedure() { public boolean execute(Object str, int index) { array[index] = (Double)str; return true; } }); return array; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -