📄 maxenttrainer.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
						// Create the list of error tokens			InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(),                                                      trainingData.getTargetAlphabet());			// This errorInstances.featureSelection will get examined by FeatureInducer,			// so it can know how to add "new" singleton features			errorInstances.setFeatureSelection (globalFS);			List errorLabelVectors = new ArrayList();    // these are length-1 vectors      for (int i = 0; i < trainingData.size(); i++) {				Instance instance = trainingData.getInstance(i);				FeatureVector inputVector = (FeatureVector) instance.getData();				Label trueLabel = (Label) instance.getTarget();        // Having trained using just the current features, see how we classify        // the training data now.        Classification classification = maxent.classify(instance);        if (!classification.bestLabelIsCorrect()) {          errorInstances.add(inputVector, trueLabel, null, null);          errorLabelVectors.add(classification.getLabelVector());        }      }      logger.info ("Error instance list size = "+errorInstances.size());      int s = errorLabelVectors.size();      LabelVector[] lvs = new LabelVector[s];      for (int i = 0; i < s; i++) {        lvs[i] = (LabelVector)errorLabelVectors.get(i);      }      RankedFeatureVector.Factory gainFactory = null;      if (gainName.equals (EXP_GAIN))        gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance);      else if (gainName.equals(GRADIENT_GAIN))        gainFactory =	new GradientGain.Factory (lvs);      else if (gainName.equals(INFORMATION_GAIN))        gainFactory =	new InfoGain.Factory ();      else        throw new IllegalArgumentException("Unsupported gain name: "+gainName);                  FeatureInducer klfi =        new FeatureInducer (gainFactory,                            errorInstances,                             numFeaturesPerFeatureInduction,                            2*numFeaturesPerFeatureInduction,                            2*numFeaturesPerFeatureInduction);                  // Note that this adds features globally, but not on a per-transition basis      klfi.induceFeaturesFor (trainingData, false, false);      if (testingData != null) klfi.induceFeaturesFor (testingData, false, false);      logger.info ("MaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features");      klfi = null;      double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()];      // XXX (Executing this block often causes an error during training; I don't know why.)      if (saveParametersDuringFI) {        // Keep current parameter values        // XXX This relies on the implementation detail that the most recent features        // added to an Alphabet get the highest indices.                // Count parameters per output label        int oldParamCount = maxent.parameters.length / outputAlphabet.size();        int newParamCount = 1+inputAlphabet.size();        // Copy params into the proper locations        for (int i=0; i<outputAlphabet.size(); i++) {          System.arraycopy(maxent.parameters, i*oldParamCount,                           newParameters, i*newParamCount,                           oldParamCount);        }        for (int i=0; i<oldParamCount; i++)          if (maxent.parameters[i] != newParameters[i]) {            System.out.println(maxent.parameters[i]+" "+newParameters[i]);            System.exit(0);          }      }            maxent.parameters = newParameters;      maxent.defaultFeatureIndex = inputAlphabet.size();                }            // Finished feature induction    logger.info("Ended with "+globalFS.cardinality()+" features.");    setNumIterations(totalIterations - trainingIteration);    return this.train (trainingData, validationData, testingData,                       evaluator, maxent);  }    // XXX Should these really be public?  Why?    /** Counts how many times this trainer has computed the gradient of the      * log probability of training labels. */	public int getValueGradientCalls() {return numGetValueGradientCalls;}    /** Counts how many times this trainer has computed the      * log probability of training labels. */	public int getValueCalls() {return numGetValueCalls;}//	public int getIterations() {return maximizerByGradient.getIterations();}		public String toString()	{		return "MaxEntTrainer"		//	+ "("+maximizerClass.getName()+") "		    + ",numIterations=" + numIterations			+ (usingHyperbolicPrior				 ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+						",hyperbolicPriorSharpness="+hyperbolicPriorSharpness)				 : (",gaussianPriorVariance="+gaussianPriorVariance));	}	  // A private inner class that wraps up a MaxEnt classifier and its training data.	// The result is a maximize.Maximizable function.	private class MaximizableTrainer implements Maximizable.ByGradient	{		double[] parameters, constraints, cachedGradient;		MaxEnt theClassifier;		InstanceList trainingList;		// The expectations are (temporarily) stored in the cachedGradient		double cachedValue;		boolean cachedValueStale;		boolean cachedGradientStale;		int numLabels;		int numFeatures;		int defaultFeatureIndex;						// just for clarity		FeatureSelection featureSelection;		FeatureSelection[] perLabelFeatureSelection;				public MaximizableTrainer (){}		public MaximizableTrainer (InstanceList ilist, MaxEnt initialClassifier)		{			this.trainingList = ilist;			Alphabet fd = ilist.getDataAlphabet();			LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();			// Don't fd.stopGrowth, because someone might want to do feature induction			ld.stopGrowth();			// Add one feature for the "default feature".			this.numLabels = ld.size();			this.numFeatures = fd.size() + 1;			this.defaultFeatureIndex = numFeatures-1;			this.parameters = new double [numLabels * numFeatures];			this.constraints = new double [numLabels * numFeatures];			this.cachedGradient = new double [numLabels * numFeatures];			Arrays.fill (parameters, 0.0);			Arrays.fill (constraints, 0.0);			Arrays.fill (cachedGradient, 0.0);			this.featureSelection = ilist.getFeatureSelection();			this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection();			// Add the default feature index to the selection			if (featureSelection != null)				featureSelection.add (defaultFeatureIndex);			if (perLabelFeatureSelection != null)				for (int i = 0; i < perLabelFeatureSelection.length; i++)					perLabelFeatureSelection[i].add (defaultFeatureIndex);			// xxx Later change this to allow both to be set, but select which one to use by a boolean flag?			assert (featureSelection == null || perLabelFeatureSelection == null);			if (initialClassifier != null) {                this.theClassifier = initialClassifier;        this.parameters = theClassifier.parameters;        this.featureSelection = theClassifier.featureSelection;        this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;        this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;				assert (initialClassifier.getInstancePipe() == ilist.getPipe());			}			else if (this.theClassifier == null) {				this.theClassifier = new MaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);			}			cachedValueStale = true;			cachedGradientStale = true;			// Initialize the constraints			InstanceList.Iterator iter = trainingList.iterator ();			logger.fine("Number of instances in training list = " + trainingList.size());			while (iter.hasNext()) {				double instanceWeight = iter.getInstanceWeight();				Instance inst = iter.nextInstance();				Labeling labeling = inst.getLabeling ();				//logger.fine ("Instance "+ii+" labeling="+labeling);				FeatureVector fv = (FeatureVector) inst.getData ();				Alphabet fdict = fv.getAlphabet();				assert (fv.getAlphabet() == fd);				int li = labeling.getBestIndex();				MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, instanceWeight);				// For the default feature, whose weight is 1.0				assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";				assert(!Double.isNaN(li)) : "bestIndex is NaN";				boolean hasNaN = false;				for(int i = 0; i < fv.numLocations(); i++) {					if(Double.isNaN(fv.valueAtLocation(i))) {						logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); 						hasNaN = true;					}				}				if(hasNaN)					logger.info("NaN in instance: " + inst.getName());				constraints[li*numFeatures + defaultFeatureIndex] += 1.0 * instanceWeight;			}			//TestMaximizable.testValueAndGradientCurrentParameters (this);		}		public MaxEnt getClassifier () { return theClassifier; }				public double getParameter (int index) {			return parameters[index];		}				public void setParameter (int index, double v) {			cachedValueStale = true;			cachedGradientStale = true;			parameters[index] = v;		}				public int getNumParameters() {			return parameters.length;		}				public void getParameters (double[] buff) {			if (buff == null || buff.length != parameters.length)				buff = new double [parameters.length];			System.arraycopy (parameters, 0, buff, 0, parameters.length);		}			public void setParameters (double [] buff) {			assert (buff != null);			cachedValueStale = true;			cachedGradientStale = true;			if (buff.length != parameters.length)				parameters = new double[buff.length];			System.arraycopy (buff, 0, parameters, 0, buff.length);		}					// log probability of the training labels		public double getValue ()		{			if (cachedValueStale) {				numGetValueCalls++;				cachedValue = 0;				// We'll store the expectation values in "cachedGradient" for now				cachedGradientStale = true;				MatrixOps.setAll (cachedGradient, 0.0);				// Incorporate likelihood of data				double[] scores = new double[trainingList.getTargetAlphabet().size()];				double value = 0.0;                //System.out.println("I Now "+inputAlphabet.size()+" regular features.");				InstanceList.Iterator iter = trainingList.iterator();				int ii=0;				while (iter.hasNext()) {					ii++;					double instanceWeight = iter.getInstanceWeight();					Instance instance = iter.nextInstance();					Labeling labeling = instance.getLabeling ();                    //System.out.println("L Now "+inputAlphabet.size()+" regular features.");					this.theClassifier.getClassificationScores (instance, scores);					FeatureVector fv = (FeatureVector) instance.getData ();					int li = labeling.getBestIndex();					value = - (instanceWeight * Math.log (scores[li]));					if(Double.isNaN(value)) {						logger.fine ("MaxEntTrainer: Instance " + instance.getName() +												 "has NaN value. log(scores)= " + Math.log(scores[li]) +												 " scores = " + scores[li] + 												 " has instance weight = " + instanceWeight);											}					if (Double.isInfinite(value)) {						logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient");						cachedValue -= value;						cachedValueStale = false;						return -value;//						continue;					}					cachedValue += value;					for (int si = 0; si < scores.length; si++) {						if (scores[si] == 0) continue;						assert (!Double.isInfinite(scores[si]));						MatrixOps.rowPlusEquals (cachedGradient, numFeatures,																		 si, fv, -instanceWeight * scores[si]);						cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]);					}				}					//logger.info ("-Expectations:"); cachedGradient.print();				// Incorporate prior on parameters				if (usingHyperbolicPrior) {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++)							cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness														 * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi])));				} else {					for (int li = 0; li < numLabels; li++)						for (int fi = 0; fi < numFeatures; fi++) {							double param = parameters[li*numFeatures + fi];							cachedValue += param * param / (2 * gaussianPriorVariance);						}				}				cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE				cachedValueStale = false;				progressLogger.info ("Value (loglikelihood) = "+cachedValue);			}			return cachedValue;		}		public void getValueGradient (double [] buffer)		{			// Gradient is (constraint - expectation - parameters/gaussianPriorVariance)			if (cachedGradientStale) {				numGetValueGradientCalls++;				if (cachedValueStale)					// This will fill in the cachedGradient with the "-expectation"					getValue ();				MatrixOps.plusEquals (cachedGradient, constraints);				// Incorporate prior on parameters				if (usingHyperbolicPrior) {					throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented.");				}				else {					MatrixOps.plusEquals (cachedGradient, parameters,																-1.0 / gaussianPriorVariance);				}								// A parameter may be set to -infinity by an external user.				// We set gradient to 0 because the parameter's value can				// never change anyway and it will mess up future calculations				// on the matrix, such as norm().				MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0);				// Set to zero all the gradient dimensions that are not among the selected features				if (perLabelFeatureSelection == null) {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,																 labelIndex, 0.0, featureSelection, false);				} else {					for (int labelIndex = 0; labelIndex < numLabels; labelIndex++)						MatrixOps.rowSetAll (cachedGradient, numFeatures,																 labelIndex, 0.0,																 perLabelFeatureSelection[labelIndex], false);				}				cachedGradientStale = false;			}			assert (buffer != null && buffer.length == parameters.length);			System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length);		}	}}
上一页 12
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -