📄 svm_struct_api.cpp

📁 SVMhmm: Learns a hidden Markov model from examples. Training examples (e.g. for part-of-speech taggi
💻 CPP
📖 第 1 页 / 共 3 页
字号:
  		{
		  	fprintf(stderr, "read_struct_examples(): fishy input: no features found; exiting\n");
	  		exit(-1);
  		}
		sparm->featureSpaceSize = maxFeatNumFound; //feature numbers start at 1
	}

  sample.n = tokens.size();
  sample.examples = new EXAMPLE[sample.n]; //initialize the PATTERNs and LABELs from our temporary storage
  for(unsigned int i = 0; i < tokens.size(); i++)
  {
	  sample.examples[i].x.setEmissionsVector(tokens[i]);
	  sample.examples[i].y.setTagsVector(tagIDs[i]);
  }
  return(sample);
}

/*
this is called BEFORE init_struct_constraints() but AFTER read_struct_examples()
*/
void init_struct_model(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, LEARN_PARM* lparm, KERNEL_PARM* kparm)
{
  /* Initialize structmodel sm. The weight vector w does not need to be
     initialized, but you need to provide the maximum size of the
     feature space in sizePsi. This is the maximum number of different
     weights that can be learned. Later, the weight vector w will
     contain the learned weights for the model. */

  /*
  for an HMM, depends on the sizes of phi(X) and Y, the feature space and the label set
  */
  sm->sizePsi = getNumTags() * (getNumTags() + sparm->featureSpaceSize);
}

CONSTSET    init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Initializes the optimization problem. Typically, you do not need
     to change this function, since you want to start with an empty
     set of constraints. However, if for example you have constraints
     that certain weights need to be positive, you might put that in
     here. The constraints are represented as lhs[i]*w >= rhs[i]. lhs
     is an array of feature vectors, rhs is an array of doubles. m is
     the number of constraints. The function returns the initial
     set of constraints. */
  CONSTSET c;
  long     sizePsi=sm->sizePsi;
  long     i;
  WORD     words[2];

  if(1) { /* normal case: start with empty set of constraints */
    c.lhs=NULL;
    c.rhs=NULL;
    c.m=0;
  }
  else { /* add constraints so that all learned weights are
            positive. WARNING: Currently, they are positive only up to
            precision epsilon set by -e. */
    c.lhs=(DOC**)my_malloc(sizeof(DOC *)*sizePsi);
    c.rhs=(double*)my_malloc(sizeof(double)*sizePsi);
    for(i=0; i<sizePsi; i++) {
      words[0].wnum=i+1;
      words[0].weight=1.0;
      words[1].wnum=0;
      /* the following slackid is a hack. we will run into problems
         if we have more than MAX_NUM_EXAMPLES slack sets (ie examples).
         MAX_NUM_EXAMPLES is defined in svm_struct_api_types.h */
      c.lhs[i]=create_example(i,0,MAX_NUM_EXAMPLES+i,1,create_svector(words,"",1.0));
      c.rhs[i]=0.0;
    }
  }
  return(c);
}

/*
return the index into a feature vector that denotes the y1 -> y2 transition,
with an offset of 1 to work with svmlight
*/
inline unsigned int get_transition_feature_id(tagID y1, tagID y2)
{
	return y1 * getNumTags() + y2 + 1;
}

/*
return the index into a feature vector that denotes the start of the output features for tag y,
with an offset of 1 to work with svmlight
*/
unsigned int get_output_feature_start_id(tagID y, STRUCT_LEARN_PARM* sparm)
{
	static const unsigned int psqr = sqr(getNumTags()) + 1;
	return psqr + sparm->featureSpaceSize * y;
}

/*
auxiliary to classify_struct_example(): return the log-probability, according to weight vector w,
of the state transition y1 -> y2
*/
inline double get_transition_probability(const double* w, tagID y1, tagID y2)
{
	return w[get_transition_feature_id(y1, y2)];
}

/*
auxiliary to classify_struct_example(): return the log-probability, according to weight vector w,
of state y outputting a token with x's feature vector
*/
inline double get_output_probability(const double* w, tagID y, const token& x, STRUCT_LEARN_PARM* sparm)
{
	//we want the dot product of x's features with the appropriate subvector of w
	const unsigned int startIndex = get_output_feature_start_id(y, sparm);
	return x.dotProduct(&w[startIndex - 1]); //the feature numbers in x start at 1
}

LABEL       classify_struct_example(PATTERN x, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Finds the label yhat for pattern x that scores the highest
     according to the linear evaluation function in sm, especially the
     weights sm->w. The returned label is taken as the prediction of sm
     for the pattern x. The weights correspond to the features defined
     by psi() and range from index 1 to index sm->sizePsi. If the
     function cannot find a label, it shall return an empty label as
     recognized by the function empty_label(y). */
  LABEL y;

	/* use Viterbi to calculate, in order, each token's most likely state */

	static double* stateProbabilities[2] = {NULL, NULL}; //one for the current tag position and one for the previous
	static bool init = true;
	if(init)
	{
		stateProbabilities[0] = new double[getNumTags()];
		stateProbabilities[1] = new double[getNumTags()];
		init = false;
	}
	bool vecnum; //which of the two is the current 'current' vector

	double maxProb = -1;
	unsigned int maxIndex;
	/* initial probability P(x_0 | y_0 = y) for all y */
	vecnum = 0;
	for(unsigned int i = 0; i < getNumTags(); i++)
	{
		stateProbabilities[vecnum][i] = get_output_probability(sm->w, (tagID)i, x.getToken(0), sparm);
		if(stateProbabilities[vecnum][i] > maxProb)
		{
			maxProb = stateProbabilities[vecnum][i];
			maxIndex = i;
		}
	}

	vector<vector<tagID> > mostLikelyPaths; //from index (j - 1, i) we can trace back the most likely path ending at state i at position j
	/* recursion: find argmax(y) {P(x_i = x'_i | y_i) P(y_i = y | y_i-1)} */
	double tempProb;
	for(unsigned int j = 1; j < x.getLength(); j++) /* loop over words in the sentence */
	{
		vecnum = !vecnum;
		mostLikelyPaths.push_back(vector<tagID>());
		maxProb = -1;
		//loop over the tag in the current spot
		for(unsigned int i = 0; i < getNumTags(); i++)
		{
			stateProbabilities[vecnum][i] = -1;
			double outputProb = get_output_probability(sm->w, i, x.getToken(j), sparm);
			//loop over the tag in the previous spot
			for(unsigned int k = 0; k < getNumTags(); k++)
			{
				//add log-"probabilities" to get a comparison equivalent to multiplying probabilities
				tempProb = stateProbabilities[!vecnum][k]							//value of previous subsequence
								+ get_transition_probability(sm->w, k, i)			//transition cost
								+ outputProb;												//output cost
				if(tempProb > stateProbabilities[vecnum][i])
				{
					stateProbabilities[vecnum][i] = tempProb;
					maxIndex = k;
				}
			}
			mostLikelyPaths.back().push_back((tagID)maxIndex); //push a reference to the previous label in the most likely path to this one
		}
	}

	//find the final state whose max-prob sequence has highest probability
	maxIndex = 0;
	maxProb = stateProbabilities[vecnum][0];
	for(unsigned int i = 1; i < getNumTags(); i++)
		if(stateProbabilities[vecnum][i] > maxProb)
		{
			maxProb = stateProbabilities[vecnum][i];
			maxIndex = i;
		}
	//build y in reverse by looking backward through the table to find the max-prob path
	y.setLength(x.getLength());
	y.setTag(y.getLength() - 1, (tagID)maxIndex);
	for(int j = x.getLength() - 2; j > -1; j--)
	{
		y.setTag(j, mostLikelyPaths[j][maxIndex]);
		maxIndex = mostLikelyPaths[j][maxIndex];
	}

  return(y);
}

LABEL       find_most_violated_constraint_slackrescaling(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Finds the label ybar for pattern x that that is responsible for
     the most violated constraint for the slack rescaling
     formulation. It has to take into account the scoring function in
     sm, especially the weights sm.w, as well as the loss
     function. The weights in sm.w correspond to the features defined
     by psi() and range from index 1 to index sm->sizePsi. Most simple
     is the case of the zero/one loss function. For the zero/one loss,
     this function should return the highest scoring label ybar, if
     ybar is unequal y; if it is equal to the correct label y, then
     the function shall return the second highest scoring label. If
     the function cannot find a label, it shall return an empty label
     as recognized by the function empty_label(y). */
  LABEL ybar;

  /* insert your code for computing the label ybar here */
  fprintf(stderr, "Error: find_most_violated_constraint_slackrescaling() shouldn't be called (not used); exiting\n");
  exit(-1);

  return(ybar);
}

LABEL       find_most_violated_constraint_marginrescaling(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Finds the label ybar for pattern x that that is responsible for
     the most violated constraint for the margin rescaling
     formulation. It has to take into account the scoring function in
     sm, especially the weights sm->w, as well as the loss
     function. The weights in sm->w correspond to the features defined
     by psi() and range from index 1 to index sm->sizePsi. Most simple
     is the case of the zero/one loss function. For the zero/one loss,
     this function should return the highest scoring label ybar, if
     ybar is unequal y; if it is equal to the correct label y, then
     the function shall return the second highest scoring label. If
     the function cannot find a label, it shall return an empty label
     as recognized by the function empty_label(y). */
  LABEL ybar;

  /* use Viterbi to calculate the cost for each possible state at each position in the input in turn */

	static double* stateCosts[2] = {NULL, NULL}; //one for the current tag position and one for the previous
	static bool init = true;
	if(init)
	{
		stateCosts[0] = new double[getNumTags()];
		stateCosts[1] = new double[getNumTags()];
		init = false;
	}
	bool vecnum; //which cost vector is acting as the 'current' one

	//calculate costs for the first position
	vecnum = 0;
	for(unsigned int j = 0; j < getNumTags(); j++)
		/*
		calculate the total additional cost of adding this tag to the sequence
		(note we don't subtract w * psi(x, y), since that's the same for every ybar)
		*/
		stateCosts[vecnum][j] = ((j != y.getTag(0)) ? 1 : 0) 														//mislabeling cost (loss)
										+ get_output_probability(sm->w, (tagID)j, x.getToken(0), sparm);		//output cost

	vector<vector<tagID> > mostCostlyPaths; //from index (j - 1, i) we can trace back the most likely path ending at state i at position j
	double tempCost, outputProb;
	unsigned int maxIndex;
	for(unsigned int i = 1; i < x.getLength(); i++) /* loop over words in the sentence */
	{
		vecnum = !vecnum;
		mostCostlyPaths.push_back(vector<tagID>());
		/* calculate the cost of labeling x_i with postag j */
		//run through tags at present position
		for(unsigned int j = 0; j < getNumTags(); j++)
		{
			outputProb = get_output_probability(sm->w, j, x.getToken(i), sparm); //probability that x[i] is output from state j
			//run through tags at previous position
			for(unsigned int k = 0; k < getNumTags(); k++)
			{
				/*
				calculate the total additional cost of adding this tag to the sequence
				(note we don't subtract w * psi(x, y), since that's the same for every ybar)
				*/
				tempCost = stateCosts[!vecnum][k]									//cost of previous subsequence
								+ ((j != y.getTag(i)) ? 1 : 0) 						//mislabeling cost (loss)
								+ get_transition_probability(sm->w, k, j)			//transition cost
								+ outputProb;												//output cost
				if(k == 0 || tempCost > stateCosts[vecnum][j])
				{
					stateCosts[vecnum][j] = tempCost;
					maxIndex = k;
				}
			}
			mostCostlyPaths.back().push_back((tagID)maxIndex); //push a reference to the previous tag in this tag's most costly path
		}
	}

	//find the last-position tag with the highest-cost path
	double maxCost = stateCosts[vecnum][0];
	maxIndex = 0;
	for(unsigned int j = 1; j < getNumTags(); j++)
	{
		if(stateCosts[vecnum][j] > maxCost)
		{
			maxCost = stateCosts[vecnum][j];
			maxIndex = j;
		}
	}
	//build the costliest overall path backward from its end via the table
	ybar.setLength(x.getLength());
	ybar.setTag(ybar.getLength() - 1, (tagID)maxIndex);
	for(int i = x.getLength() - 2; i > -1; i--)
	{
		ybar.setTag(i, mostCostlyPaths[i][maxIndex]);
		maxIndex = mostCostlyPaths[i][maxIndex];
	}

	//	if(y == ybar) return label(); //special case: return empty label
  return(ybar);
}

int         empty_label(LABEL y)
{
  /* Returns true, if y is an empty label. An empty label might be
     returned by find_most_violated_constraint_???(x, y, sm) if there
     is no incorrect label that can be found for x, or if it is unable
     to label x at all */
  return y.isEmpty();
}

/*
add all entries from src into dest, where each entry is ID -> value
(when an ID exists in dest already, its value will change)

the return value should be freed using free_svector() after use
*/
inline SVECTOR* addFeatureVectors(const SVECTOR& v1, const SVECTOR& v2)
{
	return add_ss(&const_cast<SVECTOR&>(v1), &const_cast<SVECTOR&>(v2));
}

/*
auxiliary to appendFeatureVectorWithFeatNumOffset():
return the number of elements in the word list NOT COUNTING THE 0 that must come at the end
*/
unsigned int sparseVecLength(const SVECTOR& v)
{
	WORD* w = v.words;
	unsigned int count = 0;
	while(w->wnum != 0)
	{
		w++;
		count++;
	}
	return count;
}

/*
stick src on the end of dest, adding offset to each feature number in src
*/
void appendFeatureVectorWithFeatNumOffset(SVECTOR& dest, const SVECTOR& src, unsigned int offset)
{
	unsigned int sizeDest = sparseVecLength(dest), sizeSrc = sparseVecLength(src);
	WORD* temp = dest.words;
	dest.words = (WORD*)my_malloc((sizeDest + sizeSrc + 1) * sizeof(WORD));
	memcpy(dest.words, temp, sizeDest * sizeof(WORD));
	free(temp); temp = NULL;
	for(WORD* w = dest.words + sizeDest, *w2 = src.words; w2->wnum != 0; w++, w2++)
	{
		w->wnum = w2->wnum + offset;
		w->weight = w2->weight;
	}
	dest.words[sizeDest + sizeSrc].wnum = 0;
}

SVECTOR     *psi(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Returns a feature vector describing the match between pattern x
💿 文件大小 94 K
👤 上传用户 wldxmy
📂 所属分类其他
🏷️ 相关标签

#examples #e.g. #part-of-speech #Training
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -