pagedinstancelist.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 679 行 · 第 1/2 页

JAVA
679
字号
		  sumProbs += r.nextDouble();		  probabilities[i] = sumProbs;		}		MatrixOps.timesEquals(probabilities, sumOfWeights / sumProbs);				// make sure rounding didn't mess things up		probabilities[size() - 1] = sumOfWeights;		// do sampling		int a = 0; int b = 0; sumProbs = 0;		while (a < size() && b < size()) {		  sumProbs += weights[b];		  		  while (a < size() && probabilities[a] <= sumProbs) {				newList.add(getInstance(b));				newList.setInstanceWeight(a, 1);				a++;		  }		  b++;		}				return newList;	}		// PAGING METHODS		/** Swap in the page for Instance at <code>index</code>. Swap out	 * all other pages.	 * @param index index in the <code>instances</code> list of the	 * Instance we want. */	private void swapIn (int index) {    long start = System.currentTimeMillis ();		if (instancesPerPage == -1) {			throw new IllegalStateException ("instancesPerPage not set => swapOut not yet called => swapIn cannot be called yet.");		}		int bin = index / instancesPerPage;		if (pageNotInMemory.get(bin)) {			logger.info ("Swapping in instance " + index + " from page " + bin);			swapOutExcept (index);			try {				ObjectInputStream in = new ObjectInputStream															 (new FileInputStream (new File (swapDir, id + "." + String.valueOf(bin))));				for (int ii=0; ii < instancesPerPage; ii++) {					// xxx What if now we don't have enough memory to swap in					// entire page?!?!					Instance inst = (Instance) in.readObject();					int newIndex = (instancesPerPage*bin) + ii;					inst.unLock();					inst.setPipe (pipe);					inst.setLock();					if (inMemory.get(newIndex))						throw new IllegalStateException (newIndex + " already in memory! ");					instances.set (newIndex, inst);					inMemory.set (newIndex);					if (newIndex == size()-1) // for last bin						break;				}				pageNotInMemory.set (bin, false);			}			catch (Exception e) {				System.err.println (e);				System.exit(-1);			}		}    long end = System.currentTimeMillis ();    logger.info ("PagedInstaceList swap-in time (ms) = "+(end-start));	}	/** Save all instances to disk and set to null to free memory.*/	public void swapOutAll () {		swapOutExcept (size());	}		/** Swap out all pages except the page for index.  	 * @param index index in the <code>instances</code> list of the 	 * Instance we want. */	private void swapOutExcept (int index) {    long start = System.currentTimeMillis ();		if (index < 0 || inMemory.cardinality() < 1) {			logger.warning ("nothing to swap out to read instance " + index);			return;		}		if (instancesPerPage == -1) { // set to half the # of instances we can store in mem			instancesPerPage = Math.max(size()/2,1);		}				int binToKeep = index / instancesPerPage;		int maxBin =  (size()-1) / instancesPerPage;		for (int i=0; i <= maxBin; i++) {			if (i==binToKeep || pageNotInMemory.get(i))				continue;			logger.info ("\tSwapping out page " + i);			try {				int beginIndex = i*instancesPerPage;				int endIndex = Math.min((i+1)*(instancesPerPage)-1, size()-1);				File f = new File (swapDir, id + "." + String.valueOf(i));				if (!f.exists()) { // save time by not re-writing files.					try {						ObjectOutputStream out = new ObjectOutputStream (new FileOutputStream (f));						for (int bi=beginIndex; bi <= endIndex; bi++) {							Instance inst = (Instance)instances.get(bi);														if (inst.getPipe() != null) {								inst.getPipe().getDataAlphabet().setInstanceId (new VMID());								inst.getPipe().getTargetAlphabet().setInstanceId (new VMID());							}														assert (inst != null) : "null instance while swapping out page from bin " + i;							inst.unLock();							inst.setPipe (null);							inst.setLock();							out.writeObject (inst);						}						out.close();					}					catch (Exception e) {						System.err.println (e);						System.exit(-1);					}				}						for (int bi=beginIndex; bi <= endIndex; bi++) {				instances.set(bi, null);				inMemory.set (bi, false);			}			logger.fine ("Swapping out page " + i);			pageNotInMemory.set(i, true);			}			catch (OutOfMemoryError ee) { // xxx FIX THIS SOMEHOW!				System.out.println ("Ran out of memory while swapping out.");				System.exit(-1);							}		}				if (collectGarbage)			System.gc();    long end = System.currentTimeMillis ();    logger.info ("PagedInstanceList swapout time (ms) = "+(end - start));	}			// ACCESSORS		/** Returns the <code>Instance</code> at the specified index. If	 * this Instance is not in memory, swap a block of instances back	 * into memory. */	public Instance getInstance (int index)	{		if (!inMemory.get(index))			swapIn (index);		return (Instance) instances.get (index);	}      /** Replaces the <code>Instance</code> at position   * <code>index</code> with a new one. Note that this is the only   * sanctioned way of changing an Instance. */  public void setInstance(int index, Instance instance)  {		if (!inMemory.get(index))			swapIn (index);    instances.set(index, instance);  }  /** Appends the instance to this list. Note that since memory for   * the Instance has already been allocated, no check is made to   * catch OutOfMemoryError.   * @return <code>true</code> if successful   */	public boolean add (Instance instance)	{		if (pipe == notYetSetPipe)			pipe = instance.getPipe();		else if (instance.getPipe() != pipe)			// Making sure that the Instance has the same pipe as us.			// xxx This also is a good time check that the constituent data is			// of a consistent type?			throw new IllegalArgumentException ("pipes don't match: instance: "+																					instance.getPipe()+" Instance.list: "+																					this.pipe);		if (dataClass == null) {			dataClass = instance.data.getClass();      if (pipe != null && pipe.isTargetProcessing())        targetClass = instance.target.getClass();		}		instance.setLock();		boolean ret = instances.add (instance);		inMemory.set(size()-1);		logger.finer ("Added instance " + (size()-1) + ". Free memory remaining (bytes): " +								 Runtime.getRuntime().freeMemory()); 		return ret;	}  /** Adds to this list every instance generated by the iterator,   * passing each one through this list's pipe. Checks are made to   * ensure an OutOfMemoryError is not thrown when instantiating a new   * Instance. */	public void add (PipeInputIterator pi)	{		while (pi.hasNext()) {			Instance carrier = pi.nextInstance();			add (carrier.getData(), carrier.getTarget(), carrier.name, carrier.getSource());		}	}	/** Constructs and appends an instance to this list, passing it through this   * list's pipe and assigning it the specified weight. Checks are made to   * ensure an OutOfMemoryError is not thrown when instantiating a new   * Instance.   * @return <code>true</code>   */	public boolean add (Object data, Object target, Object name, Object source, double instanceWeight)	{		Instance inst = null;		logger.fine ("Trying to add instance...");		try {			inst = new Instance (data, target, name, source, pipe);		}		catch (OutOfMemoryError e) {			logger.info ("Caught " + e + "\n Instances in memory: " + inMemory.cardinality()									 + ". Swapping out to free memory."); 			inst = null;			if (collectGarbage) System.gc();			swapOutExcept (size());			logger.info ("After paging, InstanceList.size:" + size() + " Instances in memory: " +									 inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory());			try { 				inst = new Instance (data, target, name, source, pipe);			}			catch (OutOfMemoryError ee) {				inst = null;															logger.warning ("Still insufficient memory after swapping to disk. Instance too large to fit in memory?");				System.exit(-1);			}		}		 boolean retVal = add (inst, instanceWeight);    if ((instancesPerPage > 0) && (inMemory.cardinality () > instancesPerPage)) {      logger.info ("Page size "+instancesPerPage+" exceeded.  Forcing swap.  Instances in memory: " +									 inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory());      if (collectGarbage) System.gc ();      swapOutExcept (size());      logger.info ("After paging, InstanceList.size:" + size() + " Instances in memory: " +                   inMemory.cardinality() + " Free Memory (bytes): " + Runtime.getRuntime().freeMemory());    }    return retVal;	}	public void setCollectGarbage (boolean b) { this.collectGarbage = b; }	public boolean collectGarbage () { return this.collectGarbage; }	public InstanceList shallowClone ()	{		PagedInstanceList ret = new PagedInstanceList (pipe, instances.size(), -1, swapDir);		for (int i = 0; i < instances.size(); i++)			ret.add (getInstance(i));		if (instanceWeights == null)			ret.instanceWeights = null;		else			ret.instanceWeights = instanceWeights.cloneDoubleList();		return ret;	}	public InstanceList cloneEmpty ()	{		PagedInstanceList ret = new PagedInstanceList (pipe, size(), instancesPerPage, swapDir );		ret.instanceWeights = instanceWeights == null ? null : (DoubleList) instanceWeights.clone();		// xxx Should the featureSelection and perLabel... be cloned?		// Note that RoostingTrainer currently depends on not cloning its splitting.		ret.featureSelection = this.featureSelection;		ret.perLabelFeatureSelection = this.perLabelFeatureSelection;		ret.dataClass = this.dataClass;		ret.targetClass = this.targetClass;		ret.dataVocab = this.dataVocab;		ret.targetVocab = this.targetVocab;		ret.collectGarbage = this.collectGarbage;		return ret;	}	/** Constructs a new <code>InstanceList</code>, deserialized from			<code>file</code>.  If the string value of <code>file</code> is			"-", then deserialize from {@link System.in}. */	public static InstanceList load (File file)	{		try {			ObjectInputStream ois;			if (file.toString().equals("-"))				ois = new ObjectInputStream (System.in);			else				ois = new ObjectInputStream (new FileInputStream (file));			PagedInstanceList ilist = (PagedInstanceList) ois.readObject();			ois.close();			return ilist;		} catch (Exception e) {			e.printStackTrace();			throw new IllegalArgumentException ("Couldn't read PagedInstanceList from file "+file);		}	}	// Serialization of PagedInstanceList	private static final long serialVersionUID = 1;	private static final int CURRENT_SERIAL_VERSION = 1;			private void writeObject (ObjectOutputStream out) throws IOException {		int i, size;		out.writeInt (CURRENT_SERIAL_VERSION);		out.writeObject (id);		out.writeObject (instances);		out.writeObject(instanceWeights);		out.writeObject(pipe);		// memory attributes		out.writeInt (instancesPerPage);		out.writeObject (swapDir);		out.writeObject (inMemory);		out.writeObject (pageNotInMemory);		out.writeBoolean (collectGarbage);	}			private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {		int i, size;		int version = in.readInt ();		id = (VMID) in.readObject ();		instances = (ArrayList) in.readObject();		instanceWeights = (DoubleList) in.readObject();		pipe = (Pipe) in.readObject();		// memory attributes		instancesPerPage = in.readInt ();		swapDir = (File) in.readObject ();		inMemory = (BitSet) in.readObject ();		pageNotInMemory = (BitSet) in.readObject ();		collectGarbage = in.readBoolean ();	}}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?