📄 crf3.java

📁 常用机器学习算法,java编写源代码,内含常用分类算法,包括说明文档
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
		}	}	/** Add as many states as there are labels, but don't create			separate observational-test-weights for each source-destination			pair of states---instead have all the incoming transitions to a			state share the same observational-feature-test weights.			However, do create separate default feature for each transition,			(which acts as an HMM-style transition probability). */	public void addStatesForThreeQuarterLabelsConnectedAsIn (InstanceList trainingSet)	{		priorCost(trainingSet);		int numLabels = outputAlphabet.size();		boolean[][] connections = labelConnectionsIn (trainingSet);		for (int i = 0; i < numLabels; i++) {			int numDestinations = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j]) numDestinations++;			String[] destinationNames = new String[numDestinations];			String[][] weightNames = new String[numDestinations][];			int destinationIndex = 0;			for (int j = 0; j < numLabels; j++)				if (connections[i][j]) {					String labelName = (String)outputAlphabet.lookupObject(j);					destinationNames[destinationIndex] = labelName;					weightNames[destinationIndex] = new String[2];					// The "half-labels" will include all observational tests					weightNames[destinationIndex][0] = labelName;					// The "transition" weights will include only the default feature					String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j);					weightNames[destinationIndex][1] = wn;					int wi = getWeightsIndex (wn);					// A new empty FeatureSelection won't allow any features here, so we only					// get the default feature for transitions					featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet());					destinationIndex++;				}//			addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,//								destinationNames, destinationNames, weightNames);			addState ((String)outputAlphabet.lookupObject(i),  priorInitialCost[i], priorFinalCost[i],								destinationNames, destinationNames, weightNames);		}	}	public void addFullyConnectedStatesForThreeQuarterLabels (InstanceList trainingSet)	{		int numLabels = outputAlphabet.size();		for (int i = 0; i < numLabels; i++) {			String[] destinationNames = new String[numLabels];			String[][] weightNames = new String[numLabels][];			for (int j = 0; j < numLabels; j++) {				String labelName = (String)outputAlphabet.lookupObject(j);				destinationNames[j] = labelName;				weightNames[j] = new String[2];				// The "half-labels" will include all observational tests				weightNames[j][0] = labelName;				// The "transition" weights will include only the default feature				String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j);				weightNames[j][1] = wn;				int wi = getWeightsIndex (wn);				// A new empty FeatureSelection won't allow any features here, so we only				// get the default feature for transitions				featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet());			}			addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,								destinationNames, destinationNames, weightNames);		}	}		public void addFullyConnectedStatesForBiLabels ()	{		String[] labels = new String[outputAlphabet.size()];		// This is assuming the the entries in the outputAlphabet are Strings!		for (int i = 0; i < outputAlphabet.size(); i++) {			logger.info ("CRF: outputAlphabet.lookup class = "+									 outputAlphabet.lookupObject(i).getClass().getName());			labels[i] = (String) outputAlphabet.lookupObject(i);		}		for (int i = 0; i < labels.length; i++) {			for (int j = 0; j < labels.length; j++) {				String[] destinationNames = new String[labels.length];				for (int k = 0; k < labels.length; k++)					destinationNames[k] = labels[j]+LABEL_SEPARATOR+labels[k];				addState (labels[i]+LABEL_SEPARATOR+labels[j], 0.0, 0.0,									destinationNames, labels);			}		}	}	/** Add states to create a second-order Markov model on labels,			adding only those transitions the occur in the given			trainingSet. */	public void addStatesForBiLabelsConnectedAsIn (InstanceList trainingSet)	{		priorCost(trainingSet);		int numLabels = outputAlphabet.size();		boolean[][] connections = labelConnectionsIn (trainingSet);		for (int i = 0; i < numLabels; i++) {			for (int j = 0; j < numLabels; j++) {				if (!connections[i][j])					continue;				int numDestinations = 0;				for (int k = 0; k < numLabels; k++)					if (connections[j][k]) numDestinations++;				String[] destinationNames = new String[numDestinations];				String[] labels = new String[numDestinations];				int destinationIndex = 0;				for (int k = 0; k < numLabels; k++)					if (connections[j][k]) {						destinationNames[destinationIndex] =							(String)outputAlphabet.lookupObject(j)+LABEL_SEPARATOR+(String)outputAlphabet.lookupObject(k);						labels[destinationIndex] = (String)outputAlphabet.lookupObject(k);						destinationIndex++;					}				addState ((String)outputAlphabet.lookupObject(i)+LABEL_SEPARATOR+									(String)outputAlphabet.lookupObject(j), 0.0, 0.0,									destinationNames, labels);			}		}	}		public void addFullyConnectedStatesForTriLabels ()	{		String[] labels = new String[outputAlphabet.size()];		// This is assuming the the entries in the outputAlphabet are Strings!		for (int i = 0; i < outputAlphabet.size(); i++) {			logger.info ("CRF: outputAlphabet.lookup class = "+									 outputAlphabet.lookupObject(i).getClass().getName());			labels[i] = (String) outputAlphabet.lookupObject(i);		}		for (int i = 0; i < labels.length; i++) {			for (int j = 0; j < labels.length; j++) {				for (int k = 0; k < labels.length; k++) {					String[] destinationNames = new String[labels.length];					for (int l = 0; l < labels.length; l++)						destinationNames[l] = labels[j]+LABEL_SEPARATOR+labels[k]+LABEL_SEPARATOR+labels[l];					addState (labels[i]+LABEL_SEPARATOR+labels[j]+LABEL_SEPARATOR+labels[k], 0.0, 0.0,										destinationNames, labels);				}			}		}	}		public void addSelfTransitioningStateForAllLabels (String name)	{		String[] labels = new String[outputAlphabet.size()];		String[] destinationNames  = new String[outputAlphabet.size()];		// This is assuming the the entries in the outputAlphabet are Strings!		for (int i = 0; i < outputAlphabet.size(); i++) {			logger.info ("CRF: outputAlphabet.lookup class = "+													outputAlphabet.lookupObject(i).getClass().getName());			labels[i] = (String) outputAlphabet.lookupObject(i);			destinationNames[i] = name;		}		addState (name, 0.0, 0.0, destinationNames, labels);	}  private String concatLabels(String[] labels)  {    String sep = "";    StringBuffer buf = new StringBuffer();    for (int i = 0; i < labels.length; i++)    {      buf.append(sep).append(labels[i]);      sep = LABEL_SEPARATOR;    }    return buf.toString();  }    private String nextKGram(String[] history, int k, String next)  {    String sep = "";    StringBuffer buf = new StringBuffer();    int start = history.length + 1 - k;    for (int i = start; i < history.length; i++)    {      buf.append(sep).append(history[i]);      sep = LABEL_SEPARATOR;    }    buf.append(sep).append(next);    return buf.toString();  }    private boolean allowedTransition(String prev, String curr,                                    Pattern no, Pattern yes)  {    String pair = concatLabels(new String[]{prev, curr});    if (no != null && no.matcher(pair).matches())      return false;    if (yes != null && !yes.matcher(pair).matches())      return false;    return true;  }      private boolean allowedHistory(String[] history, Pattern no, Pattern yes) {    for (int i = 1; i < history.length; i++)      if (!allowedTransition(history[i-1], history[i], no, yes))        return false;    return true;  }  /**   * Assumes that the CRF's output alphabet contains   * <code>String</code>s. Creates an order-<em>n</em> CRF with input   * predicates and output labels given by <code>trainingSet</code>   * and order, connectivity, and weights given by the remaining   * arguments.   *   * @param trainingSet the training instances   * @param orders an array of increasing non-negative numbers giving   * the orders of the features for this CRF. The largest number   * <em>n</em> is the Markov order of the CRF. States are   * <em>n</em>-tuples of output labels. Each of the other numbers   * <em>k</em> in <code>orders</code> represents a weight set shared   * by all destination states whose last (most recent) <em>k</em>   * labels agree. If <code>orders</code> is <code>null</code>, an   * order-0 CRF is built.   * @param defaults If non-null, it must be the same length as   * <code>orders</code>, with <code>true</code> positions indicating   * that the weight set for the corresponding order contains only the   * weight for a default feature; otherwise, the weight set has   * weights for all features built from input predicates.   * @param start The label that represents the context of the start of   * a sequence. It may be also used for sequence labels.   * @param forbidden If non-null, specifies what pairs of successive   * labels are not allowed, both for constructing <em>n</em>order   * states or for transitions. A label pair (<em>u</em>,<em>v</em>)   * is not allowed if <em>u</em> + "," + <em>v</em> matches   * <code>forbidden</code>.   * @param allowed If non-null, specifies what pairs of successive   * labels are allowed, both for constructing <em>n</em>order   * states or for transitions. A label pair (<em>u</em>,<em>v</em>)   * is allowed only if <em>u</em> + "," + <em>v</em> matches   * <code>allowed</code>.   * @param fullyConnected Whether to include all allowed transitions,   * even those not occurring in <code>trainingSet</code>,   * @return The name of the start state.   *    */  public String addOrderNStates(InstanceList trainingSet, int[] orders,                                boolean[] defaults, String start,                                Pattern forbidden, Pattern allowed,                                boolean fullyConnected)  {    boolean[][] connections = null;    if (!fullyConnected)      connections = labelConnectionsIn (trainingSet);    int order = -1;    if (defaults != null && defaults.length != orders.length)      throw new IllegalArgumentException("Defaults must be null or match orders");    if (orders == null)      order = 0;    else    {      for (int i = 0; i < orders.length; i++)        if (orders[i] <= order)          throw new IllegalArgumentException("Orders must be non-negative and in ascending order");        else           order = orders[i];      if (order < 0) order = 0;    }    if (order > 0)    {      int[] historyIndexes = new int[order];      String[] history = new String[order];      String label0 = (String)outputAlphabet.lookupObject(0);      for (int i = 0; i < order; i++)        history[i] = label0;      int numLabels = outputAlphabet.size();      while (historyIndexes[0] < numLabels)      {        logger.info("Preparing " + concatLabels(history));        if (allowedHistory(history, forbidden, allowed))        {          String stateName = concatLabels(history);          int nt = 0;          String[] destNames = new String[numLabels];          String[] labelNames = new String[numLabels];          String[][] weightNames = new String[numLabels][orders.length];          for (int nextIndex = 0; nextIndex < numLabels; nextIndex++)          {            String next = (String)outputAlphabet.lookupObject(nextIndex);            if (allowedTransition(history[order-1], next, forbidden, allowed)                && (fullyConnected ||                    connections[historyIndexes[order-1]][nextIndex]))            {              destNames[nt] = nextKGram(history, order, next);              labelNames[nt] = next;              for (int i = 0; i < orders.length; i++)              {                weightNames[nt][i] = nextKGram(history, orders[i]+1, next);                if (defaults != null && defaults[i])                  featureSelections[getWeightsIndex(weightNames[nt][i])] =                    new FeatureSelection(trainingSet.getDataAlphabet());              }              nt++;            }          }          if (nt < numLabels)          {            String[] newDestNames = new String[nt];            String[] newLabelNames = new String[nt];            String[][] newWeightNames = new String[nt][];            for (int t = 0; t < nt; t++)            {              newDestNames[t] = destNames[t];              newLabelNames[t] = labelNames[t];              newWeightNames[t] = weightNames[t];            }            destNames = newDestNames;            labelNames = newLabelNames;            weightNames = newWeightNames;          }          for (int i = 0; i < destNames.length; i++)          {            StringBuffer b = new StringBuffer();            for (int j = 0; j < orders.length; j++)              b.append(" ").append(weightNames[i][j]);            logger.info(stateName + "->" + destNames[i] +                        "(" + labelNames[i] + ")" + b.toString());          }          addState (stateName, 0.0, 0.0, destNames, labelNames, weightNames);        }        for (int o = order-1; o >= 0; o--)           if (++historyIndexes[o] < numLabels)          {            history[o] = (String)outputAlphabet.lookupObject(historyIndexes[o]);            break;          } else if (o > 0)          {            historyIndexes[o] = 0;            history[o] = label0;          }      }      for (int i = 0; i < order; i++)        history[i] = start;      return concatLabels(history);    }    else    {      String[] stateNames = new String[outputAlphabet.size()];
💿 文件大小 5351 K
👤 上传用户 lihuitao1987
📂 所属分类数学计算
🏷️ 相关标签

#java #机器学习 #分类算法 #文档
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -