📄 trainer.cpp

📁 Hieu Xuan Phan & Minh Le Nguyen 利用CRF统计模型写的可用于英文命名实体识别、英文分词的工具（开放源码）。CRF模型最早由Lafferty提出
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
	    if (j > 0) {		*temp = *alpha;		mathlib::mult(num_labels, next_alpha, Mi, temp, 1);		next_alpha->comp_mult(Vi);	    } else {		*next_alpha = *Vi;	    }	    	    // start to scan feature at "i" position of the current sequence	    pfgen->start_scan_features_at(*datait, j);	    while (pfgen->has_next_feature()) {		feature f;		pfgen->next_feature(f);				if ((f.ftype == EDGE_FEATURE1 && f.y == (*datait)[j].label && 			(j > 0 && f.yp == (*datait)[j-1].label)) || 			(f.ftype == STAT_FEATURE1 && f.y == (*datait)[j].label)) {		    gradlogli[f.idx] += f.val;		    seq_logli += lambda[f.idx] * f.val;		    		}				if (f.ftype == STAT_FEATURE1) {		    // state feature		    ExpF[f.idx] += (*next_alpha)[f.y] * f.val * (*(betas[j]))[f.y];		} else if (f.ftype == EDGE_FEATURE1) {		    // edge feature		    ExpF[f.idx] += (*alpha)[f.yp] * (*Vi)[f.y] * Mi->mtrx[f.yp][f.y] 				    * f.val * (*(betas[j]))[f.y];		}			    }	    	    	    *alpha = *next_alpha;	    alpha->comp_mult(1.0 / scale[j]);	    	} 	// Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len	double Zx = alpha->sum();		// Log-likelihood of the current sequence	// seq_logli = lambda * F(y_k, x_k) - log(Zx_k)	// where x_k is the current sequence	seq_logli -= log(Zx);		// re-correct the value of seq_logli because Zx was computed from	// scaled alpha values	for (k = 0; k < seq_len; k++) {	    seq_logli -= log(scale[k]);	}	// Log-likelihood = sum_k[lambda * F(y_k, x_k) - log(Zx_k)]	logli += seq_logli;		// update the gradient vector	for (k = 0; k < num_features; k++) {	    gradlogli[k] -= ExpF[k] / Zx;	}    } // end of the main loop    // output some status information    if (popt->debug_level > 0) {	// cout << endl;	printf("\n");	printf("Iteration: %d\n", num_iters);	printf("\tLog-likelihood                       = %17.6f\n", logli);	double gradlogli_norm = trainer::norm(num_features, gradlogli);	printf("\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm);			double lambda_norm = trainer::norm(num_features, lambda);    	printf("\tNorm(lambda vector)                  = %17.6f\n", lambda_norm);			if (is_logging) {	    fprintf(fout, "\n");	    fprintf(fout, "Iteration: %d\n", num_iters);	    fprintf(fout, "\tLog-likelihood                       = %17.6f\n", logli);	    fprintf(fout, "\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm);			    fprintf(fout, "\tNorm(lambda vector)                  = %17.6f\n", lambda_norm);	}    }            return logli;}// compute log Mi (first-order Markov)void trainer::compute_log_Mi_1order(sequence & seq, int pos, doublematrix * Mi, 		  doublevector * Vi, int is_exp) {    *Mi = 0.0;    *Vi = 0.0;    // start scan features for sequence "seq" at position "i"    pfgen->start_scan_features_at(seq, pos);    // examine all features at position "pos"    while (pfgen->has_next_feature()) {	feature f;	pfgen->next_feature(f);		if (f.ftype == STAT_FEATURE1) {	    // state feature	    (*Vi)[f.y] += lambda[f.idx] * f.val;	} else if (f.ftype == EDGE_FEATURE1) /* if (pos > 0)*/ {	    // edge feature (i.e., f.ftype == EDGE_FEATURE)	    Mi->get(f.yp, f.y) += lambda[f.idx] * f.val;	}    }        // take exponential operator    if (is_exp) {	for (int i = 0; i < Mi->rows; i++) {	    // update for Vi	    (*Vi)[i] = exp((*Vi)[i]);	    // update for Mi	    for (int j = 0; j < Mi->cols; j++) {		Mi->get(i, j) = exp(Mi->get(i, j));	    }	}    }}// compute log-likelihood gradient vector (second-order Markov)double trainer::compute_logli_gradient_2order(double * lambda, double * gradlogli, 			int num_iters, FILE * fout) {    double logli = 0.0;        int lfo = popt->num_labels - 1;    if (popt->lfo >= 0) {	lfo = popt->lfo;    }        // counter variable    int i, j, k;        for (i = 0; i < num_features; i++) {	gradlogli[i] = -1 * lambda[i] / popt->sigma_square;	logli -= (lambda[i] * lambda[i]) / (2 * popt->sigma_square);    }        dataset::iterator datait;    sequence::iterator seqit;        int seq_count = 0;    // go though all training data sequences    for (datait = pdata->ptrndata->begin(); datait != pdata->ptrndata->end(); datait++) {	seq_count++;	int seq_len = datait->size();		*alpha = 1;		for (i = 0; i < num_features; i++) {	    ExpF[i] = 0;	}		int betassize = betas.size();	if (betassize < seq_len) {	    // allocate more beta vector			    for (i = 0; i < seq_len - betassize; i++) {		betas.push_back(new doublevector(num_labels));	    }	    	}		int scalesize = scale.size();	if (scalesize < seq_len) {	    // allocate more scale elements	    for (i = 0; i < seq_len - scalesize; i++) {		scale.push_back(1.0);	    }	}		// compute beta values in a backward fashion	// also scale beta-values to 1 to avoid numerical problems	scale[seq_len - 1] = (popt->is_scaling) ? num_labels : 1;	betas[seq_len - 1]->assign(1.0 / scale[seq_len - 1]);		// start to compute beta values in backward fashion	for (int i = seq_len - 1; i > 0; i--) {	    // compute the Mi matrix and Vi vector	    compute_log_Mi_2order(*datait, i, Mi, Vi, 1);	    *temp = *(betas[i]);	    temp->comp_mult(Vi);	    mathlib::mult(num_labels, betas[i - 1], Mi, temp, 0);	    	    // scale for the next (backward) beta values	    scale[i - 1] = (popt->is_scaling) ? betas[i - 1]->sum() : 1;	    betas[i - 1]->comp_mult(1.0 / scale[i - 1]);	} // end of beta values computation		// start to compute the log-likelihood of the current sequence	double seq_logli = 0;	for (j = 0; j < seq_len; j++) {	    compute_log_Mi_2order(*datait, j, Mi, Vi, 1);	    	    if (j > 0) {		*temp = *alpha;		mathlib::mult(num_labels, next_alpha, Mi, temp, 1);		next_alpha->comp_mult(Vi);	    } else {		*next_alpha = *Vi;	    }	    	    // start to scan feature at "i" position of the current sequence	    pfgen->start_scan_features_at(*datait, j);	    while (pfgen->has_next_feature()) {		feature f;		pfgen->next_feature(f);				if (f.ftype == EDGE_FEATURE1 && f.y == (*datait)[j].label) {		    // edge feature (type 1)    		    if ((j == 0 && f.yp == lfo) || 			    (j > 0  && f.yp == (*datait)[j-1].label)) {			gradlogli[f.idx] += f.val;			seq_logli += lambda[f.idx] * f.val;		    		    }				} else if (f.ftype == EDGE_FEATURE2 && 			   f.y == (*datait)[j].label2order && 			   (j > 0 && f.yp == (*datait)[j-1].label2order)) {		    // edge feature (type 2)		    gradlogli[f.idx] += f.val;		    seq_logli += lambda[f.idx] * f.val;		    		    				} else if (f.ftype == STAT_FEATURE1 && f.y == (*datait)[j].label) {		    // state feature (type 1)		    gradlogli[f.idx] += f.val;		    seq_logli += lambda[f.idx] * f.val;		    				} else if (f.ftype == STAT_FEATURE2 && f.y == (*datait)[j].label2order) {		    // state feature (type 2)		    gradlogli[f.idx] += f.val;		    seq_logli += lambda[f.idx] * f.val;		    		    		}		if (f.ftype == EDGE_FEATURE1) {		    // edge feature (type 1)    		    int index = f.yp * popt->num_labels + f.y;		    ExpF[f.idx] += (*next_alpha)[index] * f.val * (*(betas[j]))[index];				} else if (f.ftype == EDGE_FEATURE2) {		    // edge feature (type 2)		    ExpF[f.idx] += (*alpha)[f.yp] * (*Vi)[f.y] * Mi->mtrx[f.yp][f.y] 				    * f.val * (*(betas[j]))[f.y];				} else if (f.ftype == STAT_FEATURE1) {		    // state feature (type 1)		    for (int i = 0; i < popt->num_labels; i++) {			int index = i * popt->num_labels + f.y;			ExpF[f.idx] += (*next_alpha)[index] * f.val * (*(betas[j]))[index];		    }				} else if (f.ftype == STAT_FEATURE2) {		    // state feature (type 2)		    ExpF[f.idx] += (*next_alpha)[f.y] * f.val * (*(betas[j]))[f.y];		}	    }	    	    	    *alpha = *next_alpha;	    alpha->comp_mult(1.0 / scale[j]);	    	} 	// Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len	double Zx = alpha->sum();		// Log-likelihood of the current sequence	// seq_logli = lambda * F(y_k, x_k) - log(Zx_k)	// where x_k is the current sequence	seq_logli -= log(Zx);		// re-correct the value of seq_logli because Zx was computed from	// scaled alpha values	for (k = 0; k < seq_len; k++) {	    seq_logli -= log(scale[k]);	}	// Log-likelihood = sum_k[lambda * F(y_k, x_k) - log(Zx_k)]	logli += seq_logli;		// update the gradient vector	for (k = 0; k < num_features; k++) {	    gradlogli[k] -= ExpF[k] / Zx;	}    } // end of the main loop    // output some status information    if (popt->debug_level > 0) {	printf("\n");	printf("Iteration: %d\n", num_iters);	printf("\tLog-likelihood                       = %17.6f\n", logli);	double gradlogli_norm = trainer::norm(num_features, gradlogli);	printf("\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm);			double lambda_norm = trainer::norm(num_features, lambda);    	printf("\tNorm(lambda vector)                  = %17.6f\n", lambda_norm);			if (is_logging) {	    fprintf(fout, "\n");	    fprintf(fout, "Iteration: %d\n", num_iters);	    fprintf(fout, "\tLog-likelihood                       = %17.6f\n", logli);	    fprintf(fout, "\tNorm(log-likelihood gradient vector) = %17.6f\n", gradlogli_norm);			    fprintf(fout, "\tNorm(lambda vector)                  = %17.6f\n", lambda_norm);	}    }            return logli;}// compute log Mi (second-order Markov)void trainer::compute_log_Mi_2order(sequence & seq, int pos, doublematrix * Mi, 		  doublevector * Vi, int is_exp) {    *Mi = 0.0;    *Vi = 0.0;    // start scan features for sequence "seq" at position "i"    pfgen->start_scan_features_at(seq, pos);    // examine all features at position "pos"    while (pfgen->has_next_feature()) {	feature f;	pfgen->next_feature(f);		if (f.ftype == EDGE_FEATURE1) {	    // edge feature (type 1)	    (*Vi)[f.yp * popt->num_labels + f.y] += lambda[f.idx] * f.val;    		} else if (f.ftype == EDGE_FEATURE2) {	    // edge feature (type 2)	    Mi->get(f.yp, f.y) += lambda[f.idx] * f.val;		} else if (f.ftype == STAT_FEATURE1) {	    // state feature (type 1)	    for (int i = 0; i < popt->num_labels; i++) {		(*Vi)[i * popt->num_labels + f.y] += lambda[f.idx] * f.val;	    }		} else if (f.ftype == STAT_FEATURE2) {	    // state feature (type 2)	    (*Vi)[f.y] += lambda[f.idx] * f.val;	}    }        // take exponential operator    if (is_exp) {	for (int i = 0; i < Mi->rows; i++) {	    // update for Vi	    (*Vi)[i] = exp((*Vi)[i]);	    // update for Mi	    for (int j = 0; j < Mi->cols; j++) {		Mi->get(i, j) = exp(Mi->get(i, j));	    }	}    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -