📄 svm_struct_api.cpp

📁 SVMhmm: Learns a hidden Markov model from examples. Training examples (e.g. for part-of-speech taggi
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
     and label y. The feature vector is returned as a list of
     SVECTOR's. Each SVECTOR is in a sparse representation of pairs
     <featurenumber:featurevalue>, where the last pair has
     featurenumber 0 as a terminator. Featurenumbers start with 1 and
     end with sizePsi. Featurenumbers that are not specified default
     to value 0. As mentioned before, psi() actually returns a list of
     SVECTOR's. Each SVECTOR has a field 'factor' and 'next'. 'next'
     specifies the next element in the list, terminated by a NULL
     pointer. The list can be thought of as a linear combination of
     vectors, where each vector is weighted by its 'factor'. This
     linear combination of feature vectors is multiplied with the
     learned (kernelized) weight vector to score label y for pattern
     x. Without kernels, there will be one weight in sm->w for each
     feature. Note that psi has to match
     find_most_violated_constraint_???(x, y, sm) and vice versa. In
     particular, find_most_violated_constraint_???(x, y, sm) finds
     the ybar!=y that maximizes psi(x,ybar,sm)*sm->w (where * is the
     inner vector product) and the appropriate function of the
     loss + margin/slack rescaling method. See that paper for details. */

   //for HMM purposes, there will be just one SVECTOR in the linked list, and the score for (x, y) is w * psi(x, y)
  SVECTOR *fvec = (SVECTOR*)my_malloc(sizeof(SVECTOR));
  fvec->factor = 1;
  fvec->userdefined = (char*)my_malloc(sizeof(char));	//leaving this uninitialized causes seg faults
  fvec->userdefined[0] = 0;								//(this value gets checked in create_svector() )
	fvec->next = NULL;

	/*
	psi(x, y) contains a copy of each word x_i, offset depending on y_i, and a 1 earlier in the vector for each state->state transition
	*/

	//count state transitions and build a total feature vector for each tag that's used in sentence x
	hash_map<unsigned int, unsigned int> transitions; //one entry per tag->tag transition found in the input; the value is the count
	static SVECTOR** featuresByTag = new SVECTOR*[getNumTags()]; //tag ID -> map of feature IDs to sum of values for all words with said tag

	for(unsigned int i = 0; i < getNumTags(); i++)
	{
		featuresByTag[i] = (SVECTOR*)my_malloc(sizeof(SVECTOR));
		featuresByTag[i]->words = (WORD*)my_malloc(sizeof(WORD));
		featuresByTag[i]->words[0].wnum = 0;
		featuresByTag[i]->userdefined = NULL;
		featuresByTag[i]->next = NULL;
		featuresByTag[i]->factor = 1;
	}
	for(unsigned int i = 0; i < y.getLength() - 1; i++)
	{
		SVECTOR* tempVec = addFeatureVectors(*featuresByTag[y.getTag(i)], x.getToken(i).getFeatureMap());
		free_svector(featuresByTag[y.getTag(i)]);
		featuresByTag[y.getTag(i)] = tempVec;
		tempVec = NULL;
		transitions[get_transition_feature_id(y.getTag(i), y.getTag(i + 1))]++;
	}
	SVECTOR* tempVec = addFeatureVectors(*featuresByTag[y.getLastTag()], x.getLastToken().getFeatureMap());
	free_svector(featuresByTag[y.getLastTag()]);
	featuresByTag[y.getLastTag()] = tempVec;
	tempVec = NULL;

	fvec->words = (WORD*)my_malloc((transitions.size() + 1) * sizeof(WORD)); //allow space for the end-vector flag (feat. # 0)

	//add features to the vector in numerical order (transitions, then tag feature sums)
	unsigned int fvecIndex = 0; //index into output vector that we're currently writing

	//add the count of uses of each transition that's used
	for(unsigned int i = 0; i < getNumTags(); i++)
		for(unsigned int j = 0; j < getNumTags(); j++)
		{
			unsigned int id = get_transition_feature_id(i, j);
			if(transitions.find(id) != transitions.end())
			{
				fvec->words[fvecIndex].wnum = id; //feature numbers start at 1; this is handled in get_*_id()
				fvec->words[fvecIndex].weight = transitions[id];
				fvecIndex++;
			}
		}
	//add the end-of-list flag (that this is 0 is *why* feature numbers start at 1)
	fvec->words[fvecIndex].wnum = 0;

	//for each tag in order, add the sum of the feature vectors of the words so labeled
	for(unsigned int i = 0; i < getNumTags(); i++)
		if(featuresByTag[i]->words[0].wnum != 0) //there are tokens with this label
			appendFeatureVectorWithFeatNumOffset(*fvec, *featuresByTag[i], get_output_feature_start_id((tagID)i, sparm) - 1);

	//cleanup
	for(unsigned int i = 0; i < getNumTags(); i++)
		free_svector(featuresByTag[i]);

	return(fvec);
}

/*
if the labels aren't the same length, the loss is computed using the appropriate subsequence of whichever label is longer
*/
double      loss(LABEL y, LABEL ybar, STRUCT_LEARN_PARM *sparm)
{
  /* loss for correct label y and predicted label ybar. The loss for
     y==ybar has to be zero. sparm->loss_function is set with the -l option. */
  if(sparm->loss_function == 0)   /* type 0 loss: 0/1 loss */
                                  /* return 0, if y==ybar. return 1 else */
  {
	  fprintf(stderr, "loss(): loss function is set to zero/one loss; this code only works with Hamming loss (loss_func = 1). exiting\n");
	  exit(-1);
	  const unsigned int minSize = min(y.getLength(), ybar.getLength());
	  for(unsigned int i = 0; i < minSize; i++)
	  	if(ybar.getTag(i) != y.getTag(i))
	  		return 1;
	  return 0;
  }
  /* Put your code for different loss functions here. But then
       find_most_violated_constraint_???(x, y, sm) has to return the
       highest scoring label with the largest loss. */
  else if(sparm->loss_function == 1) /* type 1 loss: constant penalty per wrong POS tag */
  {
	 unsigned int penalty = 0;
	 const unsigned int minSize = min(y.getLength(), ybar.getLength());
	  for(unsigned int i = 0; i < minSize; i++)
	  	if(ybar.getTag(i) != y.getTag(i)) penalty++;
	  return (double)penalty;
  }
  else
  {
	  fprintf(stderr, "loss(): unknown loss function id %d\n", sparm->loss_function);
	  exit(-1);
  }
}

int         finalize_iteration(double ceps, int cached_constraint,
			       SAMPLE sample, STRUCTMODEL *sm,
			       CONSTSET cset, double *alpha, 
			       STRUCT_LEARN_PARM *sparm)
{
  /* This function is called just before the end of each cutting plane iteration. ceps is the amount by which the most violated constraint found in the current iteration was violated. cached_constraint is true if the added constraint was constructed from the cache. If the return value is FALSE, then the algorithm is allowed to terminate. If it is TRUE, the algorithm will keep iterating even if the desired precision sparm->epsilon is already reached. */
  return(0);
}

void        print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm,
					CONSTSET cset, double *alpha,
					STRUCT_LEARN_PARM *sparm)
{
  /* This function is called after training and allows final touches to
     the model sm. But primarily it allows computing and printing any
     kind of statistic (e.g. training error) you might want. */
}

void        print_struct_testing_stats(SAMPLE sample, STRUCTMODEL *sm,
				       STRUCT_LEARN_PARM *sparm,
				       STRUCT_TEST_STATS *teststats)
{
  /* This function is called after making all test predictions in
     svm_struct_classify and allows computing and printing any kind of
     evaluation (e.g. precision/recall) you might want. You can use
     the function eval_prediction to accumulate the necessary
     statistics for each prediction. */

	double avgLoss = (double)(teststats->numTokens - teststats->numCorrectTags) / teststats->numTokens;
	printf("average loss per word: %.4lf\n", avgLoss);
}

void        eval_prediction(long exnum, EXAMPLE ex, LABEL ypred,
			    STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm,
			    STRUCT_TEST_STATS *teststats)
{
  /* This function allows you to accumlate statistic for how well the
     prediction matches the labeled example. It is called from
     svm_struct_classify. See also the function
     print_struct_testing_stats. */
  if(exnum == 0) /* this is the first time the function is called. So initialize the teststats (note it has been allocated) */
  {
		teststats->numTokens = teststats->numCorrectTags = 0;
  }
  teststats->numTokens += ex.x.getLength();
  for(unsigned int i = 0; i < ex.x.getLength(); i++)
  	if(ex.y.getTag(i) == ypred.getTag(i))
  		teststats->numCorrectTags++;
}

/*
auxiliary to read/write_struct_model()
*/
string structModelFilename2svmModelFilename(const string& smFilename)
{
	return smFilename.substr(0, smFilename.rfind('.')) + "_svmModel.dat";
}

/*
autogenerate a filename to which to write the svm model
*/
void write_struct_model(char *file, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
  /* Writes structural model sm to file file. */

  ofstream outfile(file);
  //write number of features per word
  outfile << "feature space size: " << sparm->featureSpaceSize << endl;
  //write the tags we picked up from the input
  outfile << "labels:";
  for(hash_map<tagID, tag>::iterator i = idToTagMap.begin(); i != idToTagMap.end(); i++)
  	outfile << " " << (*i).first << "=" << (*i).second;
  outfile << endl;
  //write the (sparse) weight vector
  outfile << "weight vector size: " << sm->sizePsi << endl;
  outfile << "weight vector:";
  for(unsigned int i = 0; i < (unsigned int)sm->sizePsi; i++)
  	if(sm->w[i] != 0)
  		outfile << " " << i << ":" << setprecision(8) << sm->w[i];
  outfile << endl;
  outfile << "loss type (1 = slack rescaling, 2 = margin rescaling): " << sparm->loss_type << endl;
  outfile << "loss function (should be 1 for svm-hmm): " << sparm->loss_function << endl;
  outfile.close();
  printf("writing svm model to '%s'\n", structModelFilename2svmModelFilename(file).c_str());
  write_model(const_cast<char*>(structModelFilename2svmModelFilename(file).c_str()), sm->svm_model); //write svm model
}

/*
autogenerate a filename to check for the svm model
*/
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm)
{
  /* Reads structural model sm from file file. This function is used
     only in the prediction module, not in the learning module. */

  STRUCTMODEL model;

#define ERROR_READING(what) fprintf(stderr, "read_struct_model(): error reading " #what "\n"); exit(-1)

  ifstream infile(file);
  //read number of features per word
  if(!(infile >> match("feature space size: ") >> sparm->featureSpaceSize))
  {
	  ERROR_READING("feature space size");
  }
  //read tags taken from input to learner
  if(!(infile >> match("\nlabels: ")))
  {
	  ERROR_READING("labels");
  }
  string labelLine;
  if(!getline(infile, labelLine, '\n'))
  {
	  ERROR_READING("labels");
  }
  //the model is read before the examples, so we can fill up the tag database without using the protective interface above
  unsigned int id;
  string label, idStr;
  istringstream inlbl(labelLine);
  while(getline(inlbl, idStr, '=') && inlbl >> label)
  {
	  istringstream inid(idStr);
	  if(!(inid >> id))
	  {
		  ERROR_READING("labels");
	  }
	  idToTagMap[id] = label;
	  tagToIDMap[label] = id;
  }
  //read the (sparse) weight vector
  if(!(infile >> match("weight vector size: ") >> model.sizePsi))
  {
	  ERROR_READING("weight vector size");
  }
  model.w = (double*)my_malloc(model.sizePsi * sizeof(double));
  memset(model.w, 0, model.sizePsi * sizeof(double)); //all entries default to 0
  if(!(infile >> match("\nweight vector: ")))
  {
	  ERROR_READING("weight vector");
  }
  unsigned int featNum;
  double featVal;
  string featLine;
  if(!getline(infile, featLine, '\n'))
  {
	  ERROR_READING("weight vector");
  }
  istringstream instr(featLine);
  while(instr >> featNum >> match(":") >> featVal)
  	model.w[featNum] = featVal;
  //read the learning parameters
  if(!(infile >> match("loss type (1 = slack rescaling, 2 = margin rescaling): ") >> sparm->loss_type))
  {
	  	ERROR_READING("loss type");
  }
  if(!(infile >> match("\nloss function (should be 1 for svm-hmm): ") >> sparm->loss_function))
  {
	  	ERROR_READING("loss function");
  }

#undef ERROR_READING

  infile.close();
  setTagRegistryWritable(false); //make sure tags read in through the test set won't be used during classification
  model.svm_model = read_model(const_cast<char*>(structModelFilename2svmModelFilename(file).c_str())); //read svm model
  return model;
}

void        write_label(FILE *fp, LABEL y)
{
  /* Writes label y to file handle fp. Used only to output classification results. */
  fprintf(fp, "{ ");
  for(unsigned int i = 0; i < y.getLength(); i++)
  	fprintf(fp, "%s ", getTagByID(y.getTag(i)).c_str());
  fprintf(fp, "}");
}

void        free_pattern(PATTERN x)
{
  /* Frees the memory of x. */
	//no-op
}

void        free_label(LABEL y) {
  /* Frees the memory of y. */
	//no-op
}

void        free_struct_model(STRUCTMODEL sm)
{
  /* Frees the memory of model. */
  /* if(sm.w) GC_FREE(sm.w); */ /* this is free'd in free_model */
  if(sm.svm_model) free_model(sm.svm_model, 1);
  /* add free calls for user defined data here */
}

void        free_struct_sample(SAMPLE s)
{
  /* Frees the memory of sample s. */
  //no-op; we don't know whether the examples were allocated via malloc() or new[]
}

void        print_struct_help()
{
  /* Prints a help text that is appended to the common help text of
     svm_struct_learn. */
  printf("         --* string  -> custom parameters that can be adapted for struct\n");
  printf("                        learning. The * can be replaced by any character\n");
  printf("                        and there can be multiple options starting with --.\n");
}

void         parse_struct_parameters(STRUCT_LEARN_PARM *sparm)
{
	sparm->featureSpaceSize = 0; //this is checked when reading the examples

  /* Parses the command line parameters that start with -- */
  for(unsigned int i=0;(i<sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-');i++) {
    switch ((sparm->custom_argv[i])[2])
      {
	      case 'a': i++; /* strcpy(learn_parm->alphafile,argv[i]); */ break;
	      case 'e': i++; /* sparm->epsilon=atof(sparm->custom_argv[i]); */ break;
	      case 'k': i++; /* sparm->newconstretrain=atol(sparm->custom_argv[i]); */ break;
	      default: printf("\nUnrecognized option %s!\n\n",sparm->custom_argv[i]); exit(0);
      }
  }
}

void        print_struct_help_classify()
{
  /* Prints a help text that is appended to the common help text of
     svm_struct_classify. */
  printf("         --* string -> custom parameters that can be adapted for struct\n");
  printf("                       learning. The * can be replaced by any character\n");
  printf("                       and there can be multiple options starting with --.\n");
}

void         parse_struct_parameters_classify(char *attribute, char *value)
{
  /* Parses one command line parameters that start with -- . The name
     of the parameter is given in attribute, the value is given in
     value. */

  switch (attribute[2]) 
    { 
      /* case 'x': strcpy(xvalue,value); break; */
      default: printf("\nUnrecognized option %s!\n\n",attribute);
	       exit(0);
    }
}
上一页 1 23
💿 文件大小 94 K
👤 上传用户 wldxmy
📂 所属分类其他
🏷️ 相关标签

#examples #e.g. #part-of-speech #Training
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -