pagedinstancelist.java
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 679 行 · 第 1/2 页
JAVA
679 行
sumProbs += r.nextDouble(); probabilities[i] = sumProbs; } MatrixOps.timesEquals(probabilities, sumOfWeights / sumProbs); // make sure rounding didn't mess things up probabilities[size() - 1] = sumOfWeights; // do sampling int a = 0; int b = 0; sumProbs = 0; while (a < size() && b < size()) { sumProbs += weights[b]; while (a < size() && probabilities[a] <= sumProbs) { newList.add(getInstance(b)); newList.setInstanceWeight(a, 1); a++; } b++; } return newList; } // PAGING METHODS /** Swap in the page for Instance at <code>index</code>. Swap out * all other pages. * @param index index in the <code>instances</code> list of the * Instance we want. */ private void swapIn (int index) { long start = System.currentTimeMillis (); if (instancesPerPage == -1) { throw new IllegalStateException ("instancesPerPage not set => swapOut not yet called => swapIn cannot be called yet."); } int bin = index / instancesPerPage; if (pageNotInMemory.get(bin)) { logger.info ("Swapping in instance " + index + " from page " + bin); swapOutExcept (index); try { ObjectInputStream in = new ObjectInputStream (new FileInputStream (new File (swapDir, id + "." + String.valueOf(bin)))); for (int ii=0; ii < instancesPerPage; ii++) { // xxx What if now we don't have enough memory to swap in // entire page?!?! Instance inst = (Instance) in.readObject(); int newIndex = (instancesPerPage*bin) + ii; inst.unLock(); inst.setPipe (pipe); inst.setLock(); if (inMemory.get(newIndex)) throw new IllegalStateException (newIndex + " already in memory! "); instances.set (newIndex, inst); inMemory.set (newIndex); if (newIndex == size()-1) // for last bin break; } pageNotInMemory.set (bin, false); } catch (Exception e) { System.err.println (e); System.exit(-1); } } long end = System.currentTimeMillis (); logger.info ("PagedInstaceList swap-in time (ms) = "+(end-start)); } /** Save all instances to disk and set to null to free memory.*/ public void swapOutAll () { swapOutExcept (size()); } /** Swap out all pages except the page for index. * @param index index in the <code>instances</code> list of the * Instance we want. */ private void swapOutExcept (int index) { long start = System.currentTimeMillis (); if (index < 0 || inMemory.cardinality() < 1) { logger.warning ("nothing to swap out to read instance " + index); return; } if (instancesPerPage == -1) { // set to half the # of instances we can store in mem instancesPerPage = Math.max(size()/2,1); } int binToKeep = index / instancesPerPage; int maxBin = (size()-1) / instancesPerPage; for (int i=0; i <= maxBin; i++) { if (i==binToKeep || pageNotInMemory.get(i)) continue; logger.info ("\tSwapping out page " + i); try { int beginIndex = i*instancesPerPage; int endIndex = Math.min((i+1)*(instancesPerPage)-1, size()-1); File f = new File (swapDir, id + "." + String.valueOf(i)); if (!f.exists()) { // save time by not re-writing files. try { ObjectOutputStream out = new ObjectOutputStream (new FileOutputStream (f)); for (int bi=beginIndex; bi <= endIndex; bi++) { Instance inst = (Instance)instances.get(bi); if (inst.getPipe() != null) { inst.getPipe().getDataAlphabet().setInstanceId (new VMID()); inst.getPipe().getTargetAlphabet().setInstanceId (new VMID()); } assert (inst != null) : "null instance while swapping out page from bin " + i; inst.unLock(); inst.setPipe (null); inst.setLock(); out.writeObject (inst); } out.close(); } catch (Exception e) { System.err.println (e); System.exit(-1); } } for (int bi=beginIndex; bi <= endIndex; bi++) { instances.set(bi, null); inMemory.set (bi, false); } logger.fine ("Swapping out page " + i); pageNotInMemory.set(i, true); } catch (OutOfMemoryError ee) { // xxx FIX THIS SOMEHOW! System.out.println ("Ran out of memory while swapping out."); System.exit(-1); } } if (collectGarbage) System.gc(); long end = System.currentTimeMillis (); logger.info ("PagedInstanceList swapout time (ms) = "+(end - start)); } // ACCESSORS /** Returns the <code>Instance</code> at the specified index. If * this Instance is not in memory, swap a block of instances back * into memory. */ public Instance getInstance (int index) { if (!inMemory.get(index)) swapIn (index); return (Instance) instances.get (index); } /** Replaces the <code>Instance</code> at position * <code>index</code> with a new one. Note that this is the only * sanctioned way of changing an Instance. */ public void setInstance(int index, Instance instance) { if (!inMemory.get(index)) swapIn (index); instances.set(index, instance); } /** Appends the instance to this list. Note that since memory for * the Instance has already been allocated, no check is made to * catch OutOfMemoryError. * @return <code>true</code> if successful */ public boolean add (Instance instance) { if (pipe == notYetSetPipe) pipe = instance.getPipe(); else if (instance.getPipe() != pipe) // Making sure that the Instance has the same pipe as us. // xxx This also is a good time check that the constituent data is // of a consistent type? throw new IllegalArgumentException ("pipes don't match: instance: "+ instance.getPipe()+" Instance.list: "+ this.pipe); if (dataClass == null) { dataClass = instance.data.getClass(); if (pipe != null && pipe.isTargetProcessing()) targetClass = instance.target.getClass(); } instance.setLock(); boolean ret = instances.add (instance); inMemory.set(size()-1); logger.finer ("Added instance " + (size()-1) + ". Free memory remaining (bytes): " + Runtime.getRuntime().freeMemory()); return ret; } /** Adds to this list every instance generated by the iterator, * passing each one through this list's pipe. Checks are made to * ensure an OutOfMemoryError is not thrown when instantiating a new * Instance. */ public void add (PipeInputIterator pi) { while (pi.hasNext()) { Instance carrier = pi.nextInstance(); add (carrier.getData(), carrier.getTarget(), carrier.name, carrier.getSource()); } } /** Constructs and appends an instance to this list, passing it through this * list's pipe and assigning it the specified weight. Checks are made to * ensure an OutOfMemoryError is not thrown when instantiating a new * Instance. * @return <code>true</code> */ public boolean add (Object data, Object target, Object name, Object source, double instanceWeight) { Instance inst = null; logger.fine ("Trying to add instance..."); try { inst = new Instance (data, target, name, source, pipe); } catch (OutOfMemoryError e) { logger.info ("Caught " + e + "\n Instances in memory: " + inMemory.cardinality() + ". Swapping out to free memory."); inst = null; if (collectGarbage) System.gc(); swapOutExcept (size()); logger.info ("After paging, InstanceList.size:" + size() + " Instances in memory: " + inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory()); try { inst = new Instance (data, target, name, source, pipe); } catch (OutOfMemoryError ee) { inst = null; logger.warning ("Still insufficient memory after swapping to disk. Instance too large to fit in memory?"); System.exit(-1); } } boolean retVal = add (inst, instanceWeight); if ((instancesPerPage > 0) && (inMemory.cardinality () > instancesPerPage)) { logger.info ("Page size "+instancesPerPage+" exceeded. Forcing swap. Instances in memory: " + inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory()); if (collectGarbage) System.gc (); swapOutExcept (size()); logger.info ("After paging, InstanceList.size:" + size() + " Instances in memory: " + inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory()); } return retVal; } public void setCollectGarbage (boolean b) { this.collectGarbage = b; } public boolean collectGarbage () { return this.collectGarbage; } public InstanceList shallowClone () { PagedInstanceList ret = new PagedInstanceList (pipe, instances.size(), -1, swapDir); for (int i = 0; i < instances.size(); i++) ret.add (getInstance(i)); if (instanceWeights == null) ret.instanceWeights = null; else ret.instanceWeights = instanceWeights.cloneDoubleList(); return ret; } public InstanceList cloneEmpty () { PagedInstanceList ret = new PagedInstanceList (pipe, size(), instancesPerPage, swapDir ); ret.instanceWeights = instanceWeights == null ? null : (DoubleList) instanceWeights.clone(); // xxx Should the featureSelection and perLabel... be cloned? // Note that RoostingTrainer currently depends on not cloning its splitting. ret.featureSelection = this.featureSelection; ret.perLabelFeatureSelection = this.perLabelFeatureSelection; ret.dataClass = this.dataClass; ret.targetClass = this.targetClass; ret.dataVocab = this.dataVocab; ret.targetVocab = this.targetVocab; ret.collectGarbage = this.collectGarbage; return ret; } /** Constructs a new <code>InstanceList</code>, deserialized from <code>file</code>. If the string value of <code>file</code> is "-", then deserialize from {@link System.in}. */ public static InstanceList load (File file) { try { ObjectInputStream ois; if (file.toString().equals("-")) ois = new ObjectInputStream (System.in); else ois = new ObjectInputStream (new FileInputStream (file)); PagedInstanceList ilist = (PagedInstanceList) ois.readObject(); ois.close(); return ilist; } catch (Exception e) { e.printStackTrace(); throw new IllegalArgumentException ("Couldn't read PagedInstanceList from file "+file); } } // Serialization of PagedInstanceList private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 1; private void writeObject (ObjectOutputStream out) throws IOException { int i, size; out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject (id); out.writeObject (instances); out.writeObject(instanceWeights); out.writeObject(pipe); // memory attributes out.writeInt (instancesPerPage); out.writeObject (swapDir); out.writeObject (inMemory); out.writeObject (pageNotInMemory); out.writeBoolean (collectGarbage); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int i, size; int version = in.readInt (); id = (VMID) in.readObject (); instances = (ArrayList) in.readObject(); instanceWeights = (DoubleList) in.readObject(); pipe = (Pipe) in.readObject(); // memory attributes instancesPerPage = in.readInt (); swapDir = (File) in.readObject (); inMemory = (BitSet) in.readObject (); pageNotInMemory = (BitSet) in.readObject (); collectGarbage = in.readBoolean (); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?