📄 instancelist.java
字号:
public class Iterator implements java.util.Iterator, Serializable { int index; public Iterator () { this.index = 0; } public boolean hasNext () { return index < instances.size(); } public Instance nextInstance () { return (Instance)instances.get(index++); } public double getInstanceWeight () { return instanceWeights == null ? 1.0 : instanceWeights.get(index); } public Object next () { return nextInstance(); } public void remove () { throw new UnsupportedOperationException(); } // Serialization of InstanceListIterator private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeInt(index); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); index = in.readInt(); } } public int size () { return instances.size(); } /** Returns the class of the object contained in the data field of the * first <code>Instance</code> in this list. */ public Class getDataClass () { if (instances.size() == 0) return null; else return getInstance(0).getData().getClass(); } /** Returns the pipe through which each added <code>Instance</code> is passed, * which may be <code>null</code>. */ public Pipe getPipe () { return pipe; } /** Returns the <code>Alphabet</code> mapping features of the data to * integers. */ public Alphabet getDataAlphabet () { if (dataVocab == null && pipe != null) { dataVocab = pipe.getDataAlphabet (); } assert (pipe == null || pipe.getDataAlphabet () == null || pipe.getDataAlphabet () == dataVocab); return dataVocab; } /** Returns the <code>Alphabet</code> mapping target output labels to * integers. */ public Alphabet getTargetAlphabet () { if (targetVocab == null && pipe != null) { targetVocab = pipe.getTargetAlphabet (); } assert (pipe == null || pipe.getTargetAlphabet () == null || pipe.getTargetAlphabet () == targetVocab); return targetVocab; } public LabelVector targetLabelDistribution () { if (instances.size() == 0) return null; if (!(getInstance(0).getTarget() instanceof Labeling)) throw new IllegalStateException ("Target is not a labeling."); double[] counts = new double[getTargetAlphabet().size()]; for (int i = 0; i < instances.size(); i++) { Instance instance = (Instance) instances.get(i); Labeling l = (Labeling) instance.getTarget(); l.addTo (counts, getInstanceWeight(i)); } return new LabelVector ((LabelAlphabet)getTargetAlphabet(), counts); } // For PipeOutputAccumulator interface public void pipeOutputAccumulate (Instance carrier, Pipe iteratedPipe) { // xxx ??? assert (iteratedPipe == pipe); // The assertion above won't be true when using IteratedPipe... //logger.fine ("pipeOutputAccumulate target="+target); // These various add() methods below will make sure that the Pipes match appropriately if (carrier.getData() instanceof InstanceList) add ((InstanceList)carrier.getData()); else if (carrier.getData() instanceof PipeInputIterator) add ((PipeInputIterator)carrier.getData()); else if (carrier.getData() instanceof Instance) add ((Instance)carrier.getData()); else { if (pipe == notYetSetPipe) pipe = iteratedPipe; //System.out.println ("Instance.pipeOuputAccumulate carrier.getSource()="+carrier.getSource()); add (new Instance (carrier.getData(), carrier.getTarget(), carrier.name, carrier.getSource(), iteratedPipe)); } } public PipeOutputAccumulator clonePipeOutputAccumulator () { return (PipeOutputAccumulator)shallowClone(); } public Iterator iterator () { return new Iterator(); } public CrossValidationIterator crossValidationIterator (int nfolds, int seed) { return new CrossValidationIterator(nfolds, seed); } public CrossValidationIterator crossValidationIterator (int nfolds) { return crossValidationIterator(nfolds); } /** Adds to this list every instance generated by the iterator, * passing each one through this list's pipe. */ public void add (PipeInputIterator pi) { while (pi.hasNext()) { Instance carrier = pi.nextInstance(); // xxx Perhaps try to arrange this so that a new Instance does not have to allocated. add (new Instance (carrier.getData(), carrier.getTarget(), carrier.name, carrier.getSource(), this.pipe)); } } /** * <p>Adds to this list each instance in the input list.</p> * * <p>The lists' pipes must match, except that this list's * pipe is allowed to be "not yet set", and the input list's * pipe is allowed to be null.</p> */ public void add (InstanceList ilist) { if (ilist.pipe == pipe) { Iterator iter = ilist.iterator(); while (iter.hasNext()) add(iter.nextInstance ()); } else if (pipe == notYetSetPipe) { // This InstanceList doesn't have a pipe defined, but "ilist" does. // Take ilist's pipe as our own, and add its Instances directly. if (this.instances.size() > 0) // We don't want to have some instances in this list passed through // no pipe, and others passing through the new pipe. throw new IllegalArgumentException ( "Trying to set this InstanceList's pipe, but it already has instances."); this.pipe = ilist.pipe; Iterator iter = ilist.iterator(); while (iter.hasNext()) add (iter.nextInstance()); } else if (ilist.pipe == null) { // Treat the data from the instances in ilist as inputData for our pipe. Iterator iter = ilist.iterator(); while (iter.hasNext()) add (iter.nextInstance ()); } else // xxx Another thing to consider is that we could take the // ilist instances that were passed through its pipe, and pass // them through this InstanceList's pipe. This seems // dangerous, though, and this InstanceList's pipe doesn't // reflect all processing. throw new IllegalArgumentException ( "Instances to be added to a InstanceList cannot already have been piped, " +"unless the pipes are equal, or one of the pipes is null."); } /** Constructs and appends an instance to this list, passing it through this * list's pipe and assigning it the specified weight. * @return <code>true</code> */ public boolean add (Object data, Object target, Object name, Object source, double instanceWeight) { return add (new Instance (data, target, name, source, pipe), instanceWeight); } /** Constructs and appends an instance to this list, passing it through this * list's pipe. Default weight is 1.0. * @return <code>true</code> */ public boolean add (Object data, Object target, Object name, Object source) { return add (data, target, name, source, 1.0); } /** Appends the instance to this list. * @return <code>true</code> */ public boolean add (Instance instance) { if (pipe == notYetSetPipe) pipe = instance.getPipe(); else if (instance.getPipe() != pipe) // Making sure that the Instance has the same pipe as us. // xxx This also is a good time check that the constituent data is // of a consistent type? throw new IllegalArgumentException ("pipes don't match: instance: "+ instance.getPipe()+" Instance.list: "+ this.pipe); if (dataClass == null) { dataClass = instance.data.getClass(); if (pipe != null && pipe.isTargetProcessing()) targetClass = instance.target.getClass(); } return instances.add (instance); } /** Appends the instance to this list, assigning it the specified weight. * @return <code>true</code> */ public boolean add (Instance instance, double instanceWeight) { // Call the add method above and make sure we // correctly handle adding the first instance to this list boolean ret = this.add(instance); if (instanceWeight != 1.0) { if (instanceWeights == null) { if (instances.size() == 1) instanceWeights = new DoubleList(1, instanceWeight); else instanceWeights = new DoubleList (instances.size()-1, 1.0); } instanceWeights.add (instanceWeight); } return ret; } /* // xxx Does this really belong here? // How would we match this result if we read more test instances with a pipe? public void trimFeaturesByCount (int minCount) { Alphabet oldv = pipe.getDataAlphabet (); Alphabet newv = new Alphabet (); int[] counts = new int[this.size()]; // Get counts for (int i = 0; i < this.size(); i++) { Object data = this.getInstance(i).getData (); if (data instanceof FeatureVectorSequence) { FeatureVectorSequence fvs = (FeatureVectorSequence) data; for (int j = 0; j < fvs.size(); j++) { FeatureVector fv = fvs.getFeatureVector (j); for (int k = fv.numLocations()-1; k >= 0; k--) counts[fv.indexAtLocation(k)]++; } } else { throw new IllegalArgumentException ("Doesn't handle data of type "+data.getClass().getName()); } } // Substitute in the new Alphabet dataDict = newv; // xxx Do this for the pipe too! // xxx Do it with a new method Pipe.setDataAlphabet (); // Build replacement FeatureVectorSequences with pruned features and new Alphabet for (int i = 0; i < instances.size(); i++) { Instance instance = this.getInstance(i); Object data = instance.getData (); if (data instanceof FeatureVectorSequence) { FeatureVectorSequence fvs = (FeatureVectorSequence) data; FeatureVector[] fva = new FeatureVector[fvs.size()]; for (int j = 0; j < fvs.size(); j++) { FeatureVector fv = fvs.getFeatureVector (j); AugmentableFeatureVector afv = new AugmentableFeatureVector (newv, fv.isBinary()); for (int k = fv.numLocations()-1; k >= 0; k--) if (counts[fv.indexAtLocation(k)] >= minCount) afv.add (fv.indexAtLocation(k), fv.valueAtLocation(k)); fva[j] = fv instanceof AugmentableFeatureVector ? afv : afv.toFeatureVector(); } instance.data = new FeatureVectorSequence (fva); } else { throw new IllegalArgumentException ("Doesn't handle data of type "+data.getClass().getName()); } } } */ // xxx Perhaps make public? // A collection of instances without random access (perhaps because backed on disk) protected interface Stream extends PipeOutputAccumulator { public Iterator iterator (); // Returns -1 for an "infinite" stream public int size(); public Pipe getInstancePipe (); public Alphabet getTargetAlphabet (); public Alphabet getDataAlphabet (); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -