📄 crf3.java
字号:
} } /** Add as many states as there are labels, but don't create separate observational-test-weights for each source-destination pair of states---instead have all the incoming transitions to a state share the same observational-feature-test weights. However, do create separate default feature for each transition, (which acts as an HMM-style transition probability). */ public void addStatesForThreeQuarterLabelsConnectedAsIn (InstanceList trainingSet) { priorCost(trainingSet); int numLabels = outputAlphabet.size(); boolean[][] connections = labelConnectionsIn (trainingSet); for (int i = 0; i < numLabels; i++) { int numDestinations = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) numDestinations++; String[] destinationNames = new String[numDestinations]; String[][] weightNames = new String[numDestinations][]; int destinationIndex = 0; for (int j = 0; j < numLabels; j++) if (connections[i][j]) { String labelName = (String)outputAlphabet.lookupObject(j); destinationNames[destinationIndex] = labelName; weightNames[destinationIndex] = new String[2]; // The "half-labels" will include all observational tests weightNames[destinationIndex][0] = labelName; // The "transition" weights will include only the default feature String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j); weightNames[destinationIndex][1] = wn; int wi = getWeightsIndex (wn); // A new empty FeatureSelection won't allow any features here, so we only // get the default feature for transitions featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet()); destinationIndex++; }// addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,// destinationNames, destinationNames, weightNames); addState ((String)outputAlphabet.lookupObject(i), priorInitialCost[i], priorFinalCost[i], destinationNames, destinationNames, weightNames); } } public void addFullyConnectedStatesForThreeQuarterLabels (InstanceList trainingSet) { int numLabels = outputAlphabet.size(); for (int i = 0; i < numLabels; i++) { String[] destinationNames = new String[numLabels]; String[][] weightNames = new String[numLabels][]; for (int j = 0; j < numLabels; j++) { String labelName = (String)outputAlphabet.lookupObject(j); destinationNames[j] = labelName; weightNames[j] = new String[2]; // The "half-labels" will include all observational tests weightNames[j][0] = labelName; // The "transition" weights will include only the default feature String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j); weightNames[j][1] = wn; int wi = getWeightsIndex (wn); // A new empty FeatureSelection won't allow any features here, so we only // get the default feature for transitions featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet()); } addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0, destinationNames, destinationNames, weightNames); } } public void addFullyConnectedStatesForBiLabels () { String[] labels = new String[outputAlphabet.size()]; // This is assuming the the entries in the outputAlphabet are Strings! for (int i = 0; i < outputAlphabet.size(); i++) { logger.info ("CRF: outputAlphabet.lookup class = "+ outputAlphabet.lookupObject(i).getClass().getName()); labels[i] = (String) outputAlphabet.lookupObject(i); } for (int i = 0; i < labels.length; i++) { for (int j = 0; j < labels.length; j++) { String[] destinationNames = new String[labels.length]; for (int k = 0; k < labels.length; k++) destinationNames[k] = labels[j]+LABEL_SEPARATOR+labels[k]; addState (labels[i]+LABEL_SEPARATOR+labels[j], 0.0, 0.0, destinationNames, labels); } } } /** Add states to create a second-order Markov model on labels, adding only those transitions the occur in the given trainingSet. */ public void addStatesForBiLabelsConnectedAsIn (InstanceList trainingSet) { priorCost(trainingSet); int numLabels = outputAlphabet.size(); boolean[][] connections = labelConnectionsIn (trainingSet); for (int i = 0; i < numLabels; i++) { for (int j = 0; j < numLabels; j++) { if (!connections[i][j]) continue; int numDestinations = 0; for (int k = 0; k < numLabels; k++) if (connections[j][k]) numDestinations++; String[] destinationNames = new String[numDestinations]; String[] labels = new String[numDestinations]; int destinationIndex = 0; for (int k = 0; k < numLabels; k++) if (connections[j][k]) { destinationNames[destinationIndex] = (String)outputAlphabet.lookupObject(j)+LABEL_SEPARATOR+(String)outputAlphabet.lookupObject(k); labels[destinationIndex] = (String)outputAlphabet.lookupObject(k); destinationIndex++; } addState ((String)outputAlphabet.lookupObject(i)+LABEL_SEPARATOR+ (String)outputAlphabet.lookupObject(j), 0.0, 0.0, destinationNames, labels); } } } public void addFullyConnectedStatesForTriLabels () { String[] labels = new String[outputAlphabet.size()]; // This is assuming the the entries in the outputAlphabet are Strings! for (int i = 0; i < outputAlphabet.size(); i++) { logger.info ("CRF: outputAlphabet.lookup class = "+ outputAlphabet.lookupObject(i).getClass().getName()); labels[i] = (String) outputAlphabet.lookupObject(i); } for (int i = 0; i < labels.length; i++) { for (int j = 0; j < labels.length; j++) { for (int k = 0; k < labels.length; k++) { String[] destinationNames = new String[labels.length]; for (int l = 0; l < labels.length; l++) destinationNames[l] = labels[j]+LABEL_SEPARATOR+labels[k]+LABEL_SEPARATOR+labels[l]; addState (labels[i]+LABEL_SEPARATOR+labels[j]+LABEL_SEPARATOR+labels[k], 0.0, 0.0, destinationNames, labels); } } } } public void addSelfTransitioningStateForAllLabels (String name) { String[] labels = new String[outputAlphabet.size()]; String[] destinationNames = new String[outputAlphabet.size()]; // This is assuming the the entries in the outputAlphabet are Strings! for (int i = 0; i < outputAlphabet.size(); i++) { logger.info ("CRF: outputAlphabet.lookup class = "+ outputAlphabet.lookupObject(i).getClass().getName()); labels[i] = (String) outputAlphabet.lookupObject(i); destinationNames[i] = name; } addState (name, 0.0, 0.0, destinationNames, labels); } private String concatLabels(String[] labels) { String sep = ""; StringBuffer buf = new StringBuffer(); for (int i = 0; i < labels.length; i++) { buf.append(sep).append(labels[i]); sep = LABEL_SEPARATOR; } return buf.toString(); } private String nextKGram(String[] history, int k, String next) { String sep = ""; StringBuffer buf = new StringBuffer(); int start = history.length + 1 - k; for (int i = start; i < history.length; i++) { buf.append(sep).append(history[i]); sep = LABEL_SEPARATOR; } buf.append(sep).append(next); return buf.toString(); } private boolean allowedTransition(String prev, String curr, Pattern no, Pattern yes) { String pair = concatLabels(new String[]{prev, curr}); if (no != null && no.matcher(pair).matches()) return false; if (yes != null && !yes.matcher(pair).matches()) return false; return true; } private boolean allowedHistory(String[] history, Pattern no, Pattern yes) { for (int i = 1; i < history.length; i++) if (!allowedTransition(history[i-1], history[i], no, yes)) return false; return true; } /** * Assumes that the CRF's output alphabet contains * <code>String</code>s. Creates an order-<em>n</em> CRF with input * predicates and output labels given by <code>trainingSet</code> * and order, connectivity, and weights given by the remaining * arguments. * * @param trainingSet the training instances * @param orders an array of increasing non-negative numbers giving * the orders of the features for this CRF. The largest number * <em>n</em> is the Markov order of the CRF. States are * <em>n</em>-tuples of output labels. Each of the other numbers * <em>k</em> in <code>orders</code> represents a weight set shared * by all destination states whose last (most recent) <em>k</em> * labels agree. If <code>orders</code> is <code>null</code>, an * order-0 CRF is built. * @param defaults If non-null, it must be the same length as * <code>orders</code>, with <code>true</code> positions indicating * that the weight set for the corresponding order contains only the * weight for a default feature; otherwise, the weight set has * weights for all features built from input predicates. * @param start The label that represents the context of the start of * a sequence. It may be also used for sequence labels. * @param forbidden If non-null, specifies what pairs of successive * labels are not allowed, both for constructing <em>n</em>order * states or for transitions. A label pair (<em>u</em>,<em>v</em>) * is not allowed if <em>u</em> + "," + <em>v</em> matches * <code>forbidden</code>. * @param allowed If non-null, specifies what pairs of successive * labels are allowed, both for constructing <em>n</em>order * states or for transitions. A label pair (<em>u</em>,<em>v</em>) * is allowed only if <em>u</em> + "," + <em>v</em> matches * <code>allowed</code>. * @param fullyConnected Whether to include all allowed transitions, * even those not occurring in <code>trainingSet</code>, * @return The name of the start state. * */ public String addOrderNStates(InstanceList trainingSet, int[] orders, boolean[] defaults, String start, Pattern forbidden, Pattern allowed, boolean fullyConnected) { boolean[][] connections = null; if (!fullyConnected) connections = labelConnectionsIn (trainingSet); int order = -1; if (defaults != null && defaults.length != orders.length) throw new IllegalArgumentException("Defaults must be null or match orders"); if (orders == null) order = 0; else { for (int i = 0; i < orders.length; i++) if (orders[i] <= order) throw new IllegalArgumentException("Orders must be non-negative and in ascending order"); else order = orders[i]; if (order < 0) order = 0; } if (order > 0) { int[] historyIndexes = new int[order]; String[] history = new String[order]; String label0 = (String)outputAlphabet.lookupObject(0); for (int i = 0; i < order; i++) history[i] = label0; int numLabels = outputAlphabet.size(); while (historyIndexes[0] < numLabels) { logger.info("Preparing " + concatLabels(history)); if (allowedHistory(history, forbidden, allowed)) { String stateName = concatLabels(history); int nt = 0; String[] destNames = new String[numLabels]; String[] labelNames = new String[numLabels]; String[][] weightNames = new String[numLabels][orders.length]; for (int nextIndex = 0; nextIndex < numLabels; nextIndex++) { String next = (String)outputAlphabet.lookupObject(nextIndex); if (allowedTransition(history[order-1], next, forbidden, allowed) && (fullyConnected || connections[historyIndexes[order-1]][nextIndex])) { destNames[nt] = nextKGram(history, order, next); labelNames[nt] = next; for (int i = 0; i < orders.length; i++) { weightNames[nt][i] = nextKGram(history, orders[i]+1, next); if (defaults != null && defaults[i]) featureSelections[getWeightsIndex(weightNames[nt][i])] = new FeatureSelection(trainingSet.getDataAlphabet()); } nt++; } } if (nt < numLabels) { String[] newDestNames = new String[nt]; String[] newLabelNames = new String[nt]; String[][] newWeightNames = new String[nt][]; for (int t = 0; t < nt; t++) { newDestNames[t] = destNames[t]; newLabelNames[t] = labelNames[t]; newWeightNames[t] = weightNames[t]; } destNames = newDestNames; labelNames = newLabelNames; weightNames = newWeightNames; } for (int i = 0; i < destNames.length; i++) { StringBuffer b = new StringBuffer(); for (int j = 0; j < orders.length; j++) b.append(" ").append(weightNames[i][j]); logger.info(stateName + "->" + destNames[i] + "(" + labelNames[i] + ")" + b.toString()); } addState (stateName, 0.0, 0.0, destNames, labelNames, weightNames); } for (int o = order-1; o >= 0; o--) if (++historyIndexes[o] < numLabels) { history[o] = (String)outputAlphabet.lookupObject(historyIndexes[o]); break; } else if (o > 0) { historyIndexes[o] = 0; history[o] = label0; } } for (int i = 0; i < order; i++) history[i] = start; return concatLabels(history); } else { String[] stateNames = new String[outputAlphabet.size()];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -