📄 svm_struct_api.cpp
字号:
and label y. The feature vector is returned as a list of
SVECTOR's. Each SVECTOR is in a sparse representation of pairs
<featurenumber:featurevalue>, where the last pair has
featurenumber 0 as a terminator. Featurenumbers start with 1 and
end with sizePsi. Featurenumbers that are not specified default
to value 0. As mentioned before, psi() actually returns a list of
SVECTOR's. Each SVECTOR has a field 'factor' and 'next'. 'next'
specifies the next element in the list, terminated by a NULL
pointer. The list can be thought of as a linear combination of
vectors, where each vector is weighted by its 'factor'. This
linear combination of feature vectors is multiplied with the
learned (kernelized) weight vector to score label y for pattern
x. Without kernels, there will be one weight in sm->w for each
feature. Note that psi has to match
find_most_violated_constraint_???(x, y, sm) and vice versa. In
particular, find_most_violated_constraint_???(x, y, sm) finds
the ybar!=y that maximizes psi(x,ybar,sm)*sm->w (where * is the
inner vector product) and the appropriate function of the
loss + margin/slack rescaling method. See that paper for details. */
//for HMM purposes, there will be just one SVECTOR in the linked list, and the score for (x, y) is w * psi(x, y)
SVECTOR *fvec = (SVECTOR*)my_malloc(sizeof(SVECTOR));
fvec->factor = 1;
fvec->userdefined = (char*)my_malloc(sizeof(char)); //leaving this uninitialized causes seg faults
fvec->userdefined[0] = 0; //(this value gets checked in create_svector() )
fvec->next = NULL;
/*
psi(x, y) contains a copy of each word x_i, offset depending on y_i, and a 1 earlier in the vector for each state->state transition
*/
//count state transitions and build a total feature vector for each tag that's used in sentence x
hash_map<unsigned int, unsigned int> transitions; //one entry per tag->tag transition found in the input; the value is the count
static SVECTOR** featuresByTag = new SVECTOR*[getNumTags()]; //tag ID -> map of feature IDs to sum of values for all words with said tag
for(unsigned int i = 0; i < getNumTags(); i++)
{
featuresByTag[i] = (SVECTOR*)my_malloc(sizeof(SVECTOR));
featuresByTag[i]->words = (WORD*)my_malloc(sizeof(WORD));
featuresByTag[i]->words[0].wnum = 0;
featuresByTag[i]->userdefined = NULL;
featuresByTag[i]->next = NULL;
featuresByTag[i]->factor = 1;
}
for(unsigned int i = 0; i < y.getLength() - 1; i++)
{
SVECTOR* tempVec = addFeatureVectors(*featuresByTag[y.getTag(i)], x.getToken(i).getFeatureMap());
free_svector(featuresByTag[y.getTag(i)]);
featuresByTag[y.getTag(i)] = tempVec;
tempVec = NULL;
transitions[get_transition_feature_id(y.getTag(i), y.getTag(i + 1))]++;
}
SVECTOR* tempVec = addFeatureVectors(*featuresByTag[y.getLastTag()], x.getLastToken().getFeatureMap());
free_svector(featuresByTag[y.getLastTag()]);
featuresByTag[y.getLastTag()] = tempVec;
tempVec = NULL;
fvec->words = (WORD*)my_malloc((transitions.size() + 1) * sizeof(WORD)); //allow space for the end-vector flag (feat. # 0)
//add features to the vector in numerical order (transitions, then tag feature sums)
unsigned int fvecIndex = 0; //index into output vector that we're currently writing
//add the count of uses of each transition that's used
for(unsigned int i = 0; i < getNumTags(); i++)
for(unsigned int j = 0; j < getNumTags(); j++)
{
unsigned int id = get_transition_feature_id(i, j);
if(transitions.find(id) != transitions.end())
{
fvec->words[fvecIndex].wnum = id; //feature numbers start at 1; this is handled in get_*_id()
fvec->words[fvecIndex].weight = transitions[id];
fvecIndex++;
}
}
//add the end-of-list flag (that this is 0 is *why* feature numbers start at 1)
fvec->words[fvecIndex].wnum = 0;
//for each tag in order, add the sum of the feature vectors of the words so labeled
for(unsigned int i = 0; i < getNumTags(); i++)
if(featuresByTag[i]->words[0].wnum != 0) //there are tokens with this label
appendFeatureVectorWithFeatNumOffset(*fvec, *featuresByTag[i], get_output_feature_start_id((tagID)i, sparm) - 1);
//cleanup
for(unsigned int i = 0; i < getNumTags(); i++)
free_svector(featuresByTag[i]);
return(fvec);
}
/*
if the labels aren't the same length, the loss is computed using the appropriate subsequence of whichever label is longer
*/
double loss(LABEL y, LABEL ybar, STRUCT_LEARN_PARM *sparm)
{
/* loss for correct label y and predicted label ybar. The loss for
y==ybar has to be zero. sparm->loss_function is set with the -l option. */
if(sparm->loss_function == 0) /* type 0 loss: 0/1 loss */
/* return 0, if y==ybar. return 1 else */
{
fprintf(stderr, "loss(): loss function is set to zero/one loss; this code only works with Hamming loss (loss_func = 1). exiting\n");
exit(-1);
const unsigned int minSize = min(y.getLength(), ybar.getLength());
for(unsigned int i = 0; i < minSize; i++)
if(ybar.getTag(i) != y.getTag(i))
return 1;
return 0;
}
/* Put your code for different loss functions here. But then
find_most_violated_constraint_???(x, y, sm) has to return the
highest scoring label with the largest loss. */
else if(sparm->loss_function == 1) /* type 1 loss: constant penalty per wrong POS tag */
{
unsigned int penalty = 0;
const unsigned int minSize = min(y.getLength(), ybar.getLength());
for(unsigned int i = 0; i < minSize; i++)
if(ybar.getTag(i) != y.getTag(i)) penalty++;
return (double)penalty;
}
else
{
fprintf(stderr, "loss(): unknown loss function id %d\n", sparm->loss_function);
exit(-1);
}
}
int finalize_iteration(double ceps, int cached_constraint,
SAMPLE sample, STRUCTMODEL *sm,
CONSTSET cset, double *alpha,
STRUCT_LEARN_PARM *sparm)
{
/* This function is called just before the end of each cutting plane iteration. ceps is the amount by which the most violated constraint found in the current iteration was violated. cached_constraint is true if the added constraint was constructed from the cache. If the return value is FALSE, then the algorithm is allowed to terminate. If it is TRUE, the algorithm will keep iterating even if the desired precision sparm->epsilon is already reached. */
return(0);
}
void print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm,
CONSTSET cset, double *alpha,
STRUCT_LEARN_PARM *sparm)
{
/* This function is called after training and allows final touches to
the model sm. But primarily it allows computing and printing any
kind of statistic (e.g. training error) you might want. */
}
void print_struct_testing_stats(SAMPLE sample, STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm,
STRUCT_TEST_STATS *teststats)
{
/* This function is called after making all test predictions in
svm_struct_classify and allows computing and printing any kind of
evaluation (e.g. precision/recall) you might want. You can use
the function eval_prediction to accumulate the necessary
statistics for each prediction. */
double avgLoss = (double)(teststats->numTokens - teststats->numCorrectTags) / teststats->numTokens;
printf("average loss per word: %.4lf\n", avgLoss);
}
void eval_prediction(long exnum, EXAMPLE ex, LABEL ypred,
STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm,
STRUCT_TEST_STATS *teststats)
{
/* This function allows you to accumlate statistic for how well the
prediction matches the labeled example. It is called from
svm_struct_classify. See also the function
print_struct_testing_stats. */
if(exnum == 0) /* this is the first time the function is called. So initialize the teststats (note it has been allocated) */
{
teststats->numTokens = teststats->numCorrectTags = 0;
}
teststats->numTokens += ex.x.getLength();
for(unsigned int i = 0; i < ex.x.getLength(); i++)
if(ex.y.getTag(i) == ypred.getTag(i))
teststats->numCorrectTags++;
}
/*
auxiliary to read/write_struct_model()
*/
string structModelFilename2svmModelFilename(const string& smFilename)
{
return smFilename.substr(0, smFilename.rfind('.')) + "_svmModel.dat";
}
/*
autogenerate a filename to which to write the svm model
*/
void write_struct_model(char *file, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm)
{
/* Writes structural model sm to file file. */
ofstream outfile(file);
//write number of features per word
outfile << "feature space size: " << sparm->featureSpaceSize << endl;
//write the tags we picked up from the input
outfile << "labels:";
for(hash_map<tagID, tag>::iterator i = idToTagMap.begin(); i != idToTagMap.end(); i++)
outfile << " " << (*i).first << "=" << (*i).second;
outfile << endl;
//write the (sparse) weight vector
outfile << "weight vector size: " << sm->sizePsi << endl;
outfile << "weight vector:";
for(unsigned int i = 0; i < (unsigned int)sm->sizePsi; i++)
if(sm->w[i] != 0)
outfile << " " << i << ":" << setprecision(8) << sm->w[i];
outfile << endl;
outfile << "loss type (1 = slack rescaling, 2 = margin rescaling): " << sparm->loss_type << endl;
outfile << "loss function (should be 1 for svm-hmm): " << sparm->loss_function << endl;
outfile.close();
printf("writing svm model to '%s'\n", structModelFilename2svmModelFilename(file).c_str());
write_model(const_cast<char*>(structModelFilename2svmModelFilename(file).c_str()), sm->svm_model); //write svm model
}
/*
autogenerate a filename to check for the svm model
*/
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm)
{
/* Reads structural model sm from file file. This function is used
only in the prediction module, not in the learning module. */
STRUCTMODEL model;
#define ERROR_READING(what) fprintf(stderr, "read_struct_model(): error reading " #what "\n"); exit(-1)
ifstream infile(file);
//read number of features per word
if(!(infile >> match("feature space size: ") >> sparm->featureSpaceSize))
{
ERROR_READING("feature space size");
}
//read tags taken from input to learner
if(!(infile >> match("\nlabels: ")))
{
ERROR_READING("labels");
}
string labelLine;
if(!getline(infile, labelLine, '\n'))
{
ERROR_READING("labels");
}
//the model is read before the examples, so we can fill up the tag database without using the protective interface above
unsigned int id;
string label, idStr;
istringstream inlbl(labelLine);
while(getline(inlbl, idStr, '=') && inlbl >> label)
{
istringstream inid(idStr);
if(!(inid >> id))
{
ERROR_READING("labels");
}
idToTagMap[id] = label;
tagToIDMap[label] = id;
}
//read the (sparse) weight vector
if(!(infile >> match("weight vector size: ") >> model.sizePsi))
{
ERROR_READING("weight vector size");
}
model.w = (double*)my_malloc(model.sizePsi * sizeof(double));
memset(model.w, 0, model.sizePsi * sizeof(double)); //all entries default to 0
if(!(infile >> match("\nweight vector: ")))
{
ERROR_READING("weight vector");
}
unsigned int featNum;
double featVal;
string featLine;
if(!getline(infile, featLine, '\n'))
{
ERROR_READING("weight vector");
}
istringstream instr(featLine);
while(instr >> featNum >> match(":") >> featVal)
model.w[featNum] = featVal;
//read the learning parameters
if(!(infile >> match("loss type (1 = slack rescaling, 2 = margin rescaling): ") >> sparm->loss_type))
{
ERROR_READING("loss type");
}
if(!(infile >> match("\nloss function (should be 1 for svm-hmm): ") >> sparm->loss_function))
{
ERROR_READING("loss function");
}
#undef ERROR_READING
infile.close();
setTagRegistryWritable(false); //make sure tags read in through the test set won't be used during classification
model.svm_model = read_model(const_cast<char*>(structModelFilename2svmModelFilename(file).c_str())); //read svm model
return model;
}
void write_label(FILE *fp, LABEL y)
{
/* Writes label y to file handle fp. Used only to output classification results. */
fprintf(fp, "{ ");
for(unsigned int i = 0; i < y.getLength(); i++)
fprintf(fp, "%s ", getTagByID(y.getTag(i)).c_str());
fprintf(fp, "}");
}
void free_pattern(PATTERN x)
{
/* Frees the memory of x. */
//no-op
}
void free_label(LABEL y) {
/* Frees the memory of y. */
//no-op
}
void free_struct_model(STRUCTMODEL sm)
{
/* Frees the memory of model. */
/* if(sm.w) GC_FREE(sm.w); */ /* this is free'd in free_model */
if(sm.svm_model) free_model(sm.svm_model, 1);
/* add free calls for user defined data here */
}
void free_struct_sample(SAMPLE s)
{
/* Frees the memory of sample s. */
//no-op; we don't know whether the examples were allocated via malloc() or new[]
}
void print_struct_help()
{
/* Prints a help text that is appended to the common help text of
svm_struct_learn. */
printf(" --* string -> custom parameters that can be adapted for struct\n");
printf(" learning. The * can be replaced by any character\n");
printf(" and there can be multiple options starting with --.\n");
}
void parse_struct_parameters(STRUCT_LEARN_PARM *sparm)
{
sparm->featureSpaceSize = 0; //this is checked when reading the examples
/* Parses the command line parameters that start with -- */
for(unsigned int i=0;(i<sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-');i++) {
switch ((sparm->custom_argv[i])[2])
{
case 'a': i++; /* strcpy(learn_parm->alphafile,argv[i]); */ break;
case 'e': i++; /* sparm->epsilon=atof(sparm->custom_argv[i]); */ break;
case 'k': i++; /* sparm->newconstretrain=atol(sparm->custom_argv[i]); */ break;
default: printf("\nUnrecognized option %s!\n\n",sparm->custom_argv[i]); exit(0);
}
}
}
void print_struct_help_classify()
{
/* Prints a help text that is appended to the common help text of
svm_struct_classify. */
printf(" --* string -> custom parameters that can be adapted for struct\n");
printf(" learning. The * can be replaced by any character\n");
printf(" and there can be multiple options starting with --.\n");
}
void parse_struct_parameters_classify(char *attribute, char *value)
{
/* Parses one command line parameters that start with -- . The name
of the parameter is given in attribute, the value is given in
value. */
switch (attribute[2])
{
/* case 'x': strcpy(xvalue,value); break; */
default: printf("\nUnrecognized option %s!\n\n",attribute);
exit(0);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -