📄 trainer.cpp
字号:
if (j > 0) { *temp = *alpha; mathlib::mult(num_labels, next_alpha, Mi, temp, 1); next_alpha->comp_mult(Vi); } else { *next_alpha = *Vi; } // start to scan feature at "i" position of the current sequence pfgen->start_scan_features_at(*datait, j); while (pfgen->has_next_feature()) { feature f; pfgen->next_feature(f); if ((f.ftype == EDGE_FEATURE1 && f.y == (*datait)[j].label && (j > 0 && f.yp == (*datait)[j-1].label)) || (f.ftype == STAT_FEATURE1 && f.y == (*datait)[j].label)) { gradlogli[f.idx] += f.val; seq_logli += lambda[f.idx] * f.val; } if (f.ftype == STAT_FEATURE1) { // state feature ExpF[f.idx] += (*next_alpha)[f.y] * f.val * (*(betas[j]))[f.y]; } else if (f.ftype == EDGE_FEATURE1) { // edge feature ExpF[f.idx] += (*alpha)[f.yp] * (*Vi)[f.y] * Mi->mtrx[f.yp][f.y] * f.val * (*(betas[j]))[f.y]; } } *alpha = *next_alpha; alpha->comp_mult(1.0 / scale[j]); } // Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len double Zx = alpha->sum(); // Log-likelihood of the current sequence // seq_logli = lambda * F(y_k, x_k) - log(Zx_k) // where x_k is the current sequence seq_logli -= log(Zx); // re-correct the value of seq_logli because Zx was computed from // scaled alpha values for (k = 0; k < seq_len; k++) { seq_logli -= log(scale[k]); } // Log-likelihood = sum_k[lambda * F(y_k, x_k) - log(Zx_k)] logli += seq_logli; // update the gradient vector for (k = 0; k < num_features; k++) { gradlogli[k] -= ExpF[k] / Zx; } } // end of the main loop // output some status information if (popt->debug_level > 0) { // cout << endl; printf("\n"); printf("Iteration: %d\n", num_iters); printf("\tLog-likelihood = %17.6f\n", logli); double gradlogli_norm = trainer::norm(num_features, gradlogli); printf("\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm); double lambda_norm = trainer::norm(num_features, lambda); printf("\tNorm(lambda vector) = %17.6f\n", lambda_norm); if (is_logging) { fprintf(fout, "\n"); fprintf(fout, "Iteration: %d\n", num_iters); fprintf(fout, "\tLog-likelihood = %17.6f\n", logli); fprintf(fout, "\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm); fprintf(fout, "\tNorm(lambda vector) = %17.6f\n", lambda_norm); } } return logli;}// compute log Mi (first-order Markov)void trainer::compute_log_Mi_1order(sequence & seq, int pos, doublematrix * Mi, doublevector * Vi, int is_exp) { *Mi = 0.0; *Vi = 0.0; // start scan features for sequence "seq" at position "i" pfgen->start_scan_features_at(seq, pos); // examine all features at position "pos" while (pfgen->has_next_feature()) { feature f; pfgen->next_feature(f); if (f.ftype == STAT_FEATURE1) { // state feature (*Vi)[f.y] += lambda[f.idx] * f.val; } else if (f.ftype == EDGE_FEATURE1) /* if (pos > 0)*/ { // edge feature (i.e., f.ftype == EDGE_FEATURE) Mi->get(f.yp, f.y) += lambda[f.idx] * f.val; } } // take exponential operator if (is_exp) { for (int i = 0; i < Mi->rows; i++) { // update for Vi (*Vi)[i] = exp((*Vi)[i]); // update for Mi for (int j = 0; j < Mi->cols; j++) { Mi->get(i, j) = exp(Mi->get(i, j)); } } }}// compute log-likelihood gradient vector (second-order Markov)double trainer::compute_logli_gradient_2order(double * lambda, double * gradlogli, int num_iters, FILE * fout) { double logli = 0.0; int lfo = popt->num_labels - 1; if (popt->lfo >= 0) { lfo = popt->lfo; } // counter variable int i, j, k; for (i = 0; i < num_features; i++) { gradlogli[i] = -1 * lambda[i] / popt->sigma_square; logli -= (lambda[i] * lambda[i]) / (2 * popt->sigma_square); } dataset::iterator datait; sequence::iterator seqit; int seq_count = 0; // go though all training data sequences for (datait = pdata->ptrndata->begin(); datait != pdata->ptrndata->end(); datait++) { seq_count++; int seq_len = datait->size(); *alpha = 1; for (i = 0; i < num_features; i++) { ExpF[i] = 0; } int betassize = betas.size(); if (betassize < seq_len) { // allocate more beta vector for (i = 0; i < seq_len - betassize; i++) { betas.push_back(new doublevector(num_labels)); } } int scalesize = scale.size(); if (scalesize < seq_len) { // allocate more scale elements for (i = 0; i < seq_len - scalesize; i++) { scale.push_back(1.0); } } // compute beta values in a backward fashion // also scale beta-values to 1 to avoid numerical problems scale[seq_len - 1] = (popt->is_scaling) ? num_labels : 1; betas[seq_len - 1]->assign(1.0 / scale[seq_len - 1]); // start to compute beta values in backward fashion for (int i = seq_len - 1; i > 0; i--) { // compute the Mi matrix and Vi vector compute_log_Mi_2order(*datait, i, Mi, Vi, 1); *temp = *(betas[i]); temp->comp_mult(Vi); mathlib::mult(num_labels, betas[i - 1], Mi, temp, 0); // scale for the next (backward) beta values scale[i - 1] = (popt->is_scaling) ? betas[i - 1]->sum() : 1; betas[i - 1]->comp_mult(1.0 / scale[i - 1]); } // end of beta values computation // start to compute the log-likelihood of the current sequence double seq_logli = 0; for (j = 0; j < seq_len; j++) { compute_log_Mi_2order(*datait, j, Mi, Vi, 1); if (j > 0) { *temp = *alpha; mathlib::mult(num_labels, next_alpha, Mi, temp, 1); next_alpha->comp_mult(Vi); } else { *next_alpha = *Vi; } // start to scan feature at "i" position of the current sequence pfgen->start_scan_features_at(*datait, j); while (pfgen->has_next_feature()) { feature f; pfgen->next_feature(f); if (f.ftype == EDGE_FEATURE1 && f.y == (*datait)[j].label) { // edge feature (type 1) if ((j == 0 && f.yp == lfo) || (j > 0 && f.yp == (*datait)[j-1].label)) { gradlogli[f.idx] += f.val; seq_logli += lambda[f.idx] * f.val; } } else if (f.ftype == EDGE_FEATURE2 && f.y == (*datait)[j].label2order && (j > 0 && f.yp == (*datait)[j-1].label2order)) { // edge feature (type 2) gradlogli[f.idx] += f.val; seq_logli += lambda[f.idx] * f.val; } else if (f.ftype == STAT_FEATURE1 && f.y == (*datait)[j].label) { // state feature (type 1) gradlogli[f.idx] += f.val; seq_logli += lambda[f.idx] * f.val; } else if (f.ftype == STAT_FEATURE2 && f.y == (*datait)[j].label2order) { // state feature (type 2) gradlogli[f.idx] += f.val; seq_logli += lambda[f.idx] * f.val; } if (f.ftype == EDGE_FEATURE1) { // edge feature (type 1) int index = f.yp * popt->num_labels + f.y; ExpF[f.idx] += (*next_alpha)[index] * f.val * (*(betas[j]))[index]; } else if (f.ftype == EDGE_FEATURE2) { // edge feature (type 2) ExpF[f.idx] += (*alpha)[f.yp] * (*Vi)[f.y] * Mi->mtrx[f.yp][f.y] * f.val * (*(betas[j]))[f.y]; } else if (f.ftype == STAT_FEATURE1) { // state feature (type 1) for (int i = 0; i < popt->num_labels; i++) { int index = i * popt->num_labels + f.y; ExpF[f.idx] += (*next_alpha)[index] * f.val * (*(betas[j]))[index]; } } else if (f.ftype == STAT_FEATURE2) { // state feature (type 2) ExpF[f.idx] += (*next_alpha)[f.y] * f.val * (*(betas[j]))[f.y]; } } *alpha = *next_alpha; alpha->comp_mult(1.0 / scale[j]); } // Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len double Zx = alpha->sum(); // Log-likelihood of the current sequence // seq_logli = lambda * F(y_k, x_k) - log(Zx_k) // where x_k is the current sequence seq_logli -= log(Zx); // re-correct the value of seq_logli because Zx was computed from // scaled alpha values for (k = 0; k < seq_len; k++) { seq_logli -= log(scale[k]); } // Log-likelihood = sum_k[lambda * F(y_k, x_k) - log(Zx_k)] logli += seq_logli; // update the gradient vector for (k = 0; k < num_features; k++) { gradlogli[k] -= ExpF[k] / Zx; } } // end of the main loop // output some status information if (popt->debug_level > 0) { printf("\n"); printf("Iteration: %d\n", num_iters); printf("\tLog-likelihood = %17.6f\n", logli); double gradlogli_norm = trainer::norm(num_features, gradlogli); printf("\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm); double lambda_norm = trainer::norm(num_features, lambda); printf("\tNorm(lambda vector) = %17.6f\n", lambda_norm); if (is_logging) { fprintf(fout, "\n"); fprintf(fout, "Iteration: %d\n", num_iters); fprintf(fout, "\tLog-likelihood = %17.6f\n", logli); fprintf(fout, "\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm); fprintf(fout, "\tNorm(lambda vector) = %17.6f\n", lambda_norm); } } return logli;}// compute log Mi (second-order Markov)void trainer::compute_log_Mi_2order(sequence & seq, int pos, doublematrix * Mi, doublevector * Vi, int is_exp) { *Mi = 0.0; *Vi = 0.0; // start scan features for sequence "seq" at position "i" pfgen->start_scan_features_at(seq, pos); // examine all features at position "pos" while (pfgen->has_next_feature()) { feature f; pfgen->next_feature(f); if (f.ftype == EDGE_FEATURE1) { // edge feature (type 1) (*Vi)[f.yp * popt->num_labels + f.y] += lambda[f.idx] * f.val; } else if (f.ftype == EDGE_FEATURE2) { // edge feature (type 2) Mi->get(f.yp, f.y) += lambda[f.idx] * f.val; } else if (f.ftype == STAT_FEATURE1) { // state feature (type 1) for (int i = 0; i < popt->num_labels; i++) { (*Vi)[i * popt->num_labels + f.y] += lambda[f.idx] * f.val; } } else if (f.ftype == STAT_FEATURE2) { // state feature (type 2) (*Vi)[f.y] += lambda[f.idx] * f.val; } } // take exponential operator if (is_exp) { for (int i = 0; i < Mi->rows; i++) { // update for Vi (*Vi)[i] = exp((*Vi)[i]); // update for Mi for (int j = 0; j < Mi->cols; j++) { Mi->get(i, j) = exp(Mi->get(i, j)); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -