📄 gistrainer.java~94~
字号:
outcomeLabels = di.getOutcomeLabels(); // 得到输出结果的字符串形式
//outcomeList = di.getOutcomeList();
numOutcomes = outcomeLabels.length;
iprob = Math.log(1.0 / numOutcomes); // 对每个输出结果的概率,平均概率
predLabels = di.getPredLabels(); // 得到断言的字符串形式
numPreds = predLabels.length; // 得到断言的个数
display("\tNumber of Event Tokens: " + numTokens + "\n");
display("\t Number of Outcomes: " + numOutcomes + "\n");
display("\t Number of Predicates: " + numPreds + "\n");
// 建立特征数组,第一维表示断言(整数形式),第二维表示输出结果
// 数组元素表示断言在各种输出结果中出现的次数(在整个训练集上的统计信息)
// 计算:num(ai,bj),形成特征-输出矩阵
double[][] predCount = new double[numPreds][numOutcomes];
for (TID = 0; TID < numTokens; TID++) {
for (int j = 0; j < contexts[TID].length; j++) {
predCount[contexts[TID][j]][outcomes[TID]]
+= numTimesEventsSeen[TID] * weightLabels[weights[TID][j]];
}
}
di = null; // don't need it anymore
// A fake "observation" to cover features which are not detected in the data.
// The default is to assume that we observed "1/10th" of a feature during training.
// 对没有出现的特征,假设出现次数为logSmoothingObservation(0.1)以平滑
final double smoothingObservation = _smoothingObservation;
// Get the observed expectations of the features. Strictly speaking,
// we should divide the counts by the number of Tokens, but because of
// the way the model's expectations are approximated in the
// implementation, this is cancelled out when we compute the next
// iteration of a parameter, making the extra divisions wasteful.
// 得到特征的经验期望值.严格来讲,应该把出现次数除以事件数,但因为实现过程中
// 模型的期望值是近似的,进行参数的迭代时会被抵偿掉,会使除法浪费,因此没有除法过程??
// 统计针对每个特征进行,因此所有的数据结构大小都是特征的个数
// HASH数组,每个数组元素都是一个HASH表
params = new TIntParamHashMap[numPreds];
modelExpects = new TIntParamHashMap[numPreds];
observedExpects = new TIntParamHashMap[numPreds];
// 为生成HASH映射准备????(不清楚此段代码的作用!!!)
int initialCapacity;
float loadFactor = (float) 0.9;
if (numOutcomes < 3) {
initialCapacity = 2;
loadFactor = (float) 1.0;
}
else if (numOutcomes < 5) {
initialCapacity = 2;
}
else {
initialCapacity = (int) numOutcomes / 2;
}
// 处理每个特征,由上面得到的特征数组,得到其经验期望值.
for (PID = 0; PID < numPreds; PID++) {
// 生成保存此特征的信息需要的数据结构HASH表
params[PID] = new TIntParamHashMap(initialCapacity, loadFactor); // 参数表
modelExpects[PID] = new TIntParamHashMap(initialCapacity, loadFactor); // 模型期望值
observedExpects[PID] = new TIntParamHashMap(initialCapacity, loadFactor); // 经验期望值
// 根据此特征在各种输出中的出现情况,为统计其期望值做初始化
for (OID = 0; OID < numOutcomes; OID++) {
if (predCount[PID][OID] > 0) { // (ai,bj)出现过,则将其相关参数插入经验期望HASH表,并初始化模型和参数HASH表
params[PID].put(OID, 0.0);
modelExpects[PID].put(OID, 0.0);
observedExpects[PID].put(OID, predCount[PID][OID]); //????
// observedExpects[PID].put(OID,Math.log(predCount[PID][OID])); 这是旧版本的处理方法!
}
else if (_simpleSmoothing) { // 没有出现,则根据平滑设置处理
params[PID].put(OID, 0.0);
modelExpects[PID].put(OID, 0.0);
observedExpects[PID].put(OID, smoothingObservation);
}
}
// 压缩相关HASH表
params[PID].compact();
modelExpects[PID].compact();
observedExpects[PID].compact();
}
// compute the expected value of correction 计算修正特征的期望值
if (_useSlackParameter) { // _useSlackParameter是新引入的参数,作用??
int cfvalSum = 0;
double curPredWeight = 0.0;
double curTokenSumWeight = 0.0;
for (TID = 0; TID < numTokens; TID++) { // 每个事件包含的特征都包含在contexts数组中
for (int j = 0; j < contexts[TID].length; j++) { //当前事件的每个特征;
PID = contexts[TID][j];
curPredWeight = weightLabels[weights[TID][j]];
curTokenSumWeight +=curPredWeight;
if (!modelExpects[PID].containsKey(outcomes[TID])) // HASH表中不包含,为修正特征??
cfvalSum += numTimesEventsSeen[TID] * curPredWeight;
}
// C-所有事件的特征函数之和 = 修正特征函数f(l)
cfvalSum += (constant - curTokenSumWeight) * numTimesEventsSeen[TID];
curTokenSumWeight = 0.0;
}
if (cfvalSum > 0 && cfvalSum < NEAR_ZERO) {
cfObservedExpect = Math.log(NEAR_ZERO); //nearly zero so log is defined
}
else {
// 上面的期望值初始化过程中旧版本用LOG,新版本中没有用LOG,此处要不要修改?????
cfObservedExpect = Math.log(cfvalSum); // 修正特征的期望值
}
correctionParam = 0.0; // 修正特征对应的参数
}
predCount = null; // don't need it anymore
display("...done.\n");
// 所有的初始化完毕,要进行迭代以得到模型参数
// 模型分布,估计一个事件对每个输出的概率
modelDistribution = new double[numOutcomes];
// 事件特征集对应每个输出的特征个数
numfeats = new int[numOutcomes];
/***************** Find the parameters ************************/
display("Computing model parameters...\n");
findParameters(iterations); // 进行迭代以得到模型参数
/*************** Create and return the model ******************/
return new GISModel(params, predLabels, outcomeLabels, (int)constant,
correctionParam);
}
/* Estimate and return the model parameters. 估计并返回模型参数(写入全局数组)*/
private void findParameters(int iterations) {
double prevLL = 0.0; // 上次迭代得到的log-likelihood值
double currLL = 0.0; // 此次迭代得到的log-likelihood值
display("Performing " + iterations + " iterations.\n");
for (int i = 1; i <= iterations; i++) { // 调整输出格式
if (i < 10) {
display(" " + i + ": ");
}
else if (i < 100) {
display(" " + i + ": ");
}
else {
display(i + ": ");
}
currLL = nextIteration(); // 进行迭代,得到新的参数
// 如果迭代过程中log-likelihood值减小,则出错,模型不再收敛,应中止.
// 或者两次迭代的差值小于阈值,可以中止(不必再迭代100次)
if (i > 1) {
if (prevLL > currLL) {
System.err.println("Model Diverging: loglikelihood decreased");
break;
}
if (currLL - prevLL < LLThreshold) {
break;
}
}
// 把当前得到的参数保存,以便进行下一次迭代时与新的参数值进行比较
prevLL = currLL;
}
// 清理不要需要的数据结构
observedExpects = null;
modelExpects = null;
numTimesEventsSeen = null;
contexts = null;
}
/**
* Use this model to evaluate a context and return an array of the
* likelihood of each outcome given that context.
* 用现有的模型,根据给定的上下文(事件),得到一个此上下文对每一个输出的likelihood值数组
* @param context The integers of the predicates which have been
* observed at the present decision point.
* 目前事件使用的断言(特征)的整数表示(索引号)
* @return The normalized probabilities for the outcomes given the
* context. The indexes of the double[] are the outcome
* ids, and the actual string representation of the
* outcomes can be obtained from the method getOutcome(int i).
* 返回给定上下文对所有输出的归一化概率值.用输出结果的ID表示,
* 实际字符串可以用getOutcome(int i)方法得到(结果写入数组)
*/
public void eval(int[] context, int [] weight, double[] outsums) {
// 初始化,此上下文对所有输出具有相同的概率iprob,
// outsums数组元素表示当前上下文对应每个输出的分布
for (int oid = 0; oid < numOutcomes; oid++) {
outsums[oid] = iprob;
numfeats[oid] = 0;
}
int[] activeOutcomes;
for (int i = 0; i < context.length; i++) { // context.length 表示当前事件的特征的个数
TIntParamHashMap predParams = params[context[i]]; // 得到每个特征的当前参数的HASH表
activeOutcomes = predParams.keys(); // 单步调试说明:此数组元素为出现当前特征的输出结果集
for (int j = 0; j < activeOutcomes.length; j++) { // 所有出现此特征的输出结果集A
int oid = activeOutcomes[j];
numfeats[oid]++; // 计算修正参数时使用
outsums[oid] += constantInverse * predParams.get(oid) * weightLabels[weight[i]];
/* 到此处时,outsums实现了李荣陆文档中公式10中exp()中指数部分的计算,
*与上面 private TIntDoubleProcedure updateParams结合,此处的coustantInverse为1/C,
*/
}
}
double SUM = 0.0; // 当前事件对所有可能的输出的概率之和,为了做归一化
for (int oid = 0; oid < numOutcomes; oid++) {
outsums[oid] = Math.exp(outsums[oid]); // Adwait文档中P14的最后一行的改写形式
// 实现李荣陆文档中的公式10,与Adwait文档中P14的最后一行的作用完全相同
// 此处特征函数值为1,修改特征函数将涉及此处的修改
if (_useSlackParameter) {
outsums[oid] +=
( (1.0 - ( (double) numfeats[oid] / constant)) * correctionParam);
}
SUM += outsums[oid];
}
// 归一化的概率值,返回
for (int oid = 0; oid < numOutcomes; oid++) {
outsums[oid] /= SUM;
}
}
/* Compute one iteration of GIS and retutn log-likelihood.*/
// 执行一次GIS迭代,返回log-likelihood值.
private double nextIteration() {
// compute contribution of p(a|b_i) for each feature and the new correction parameter
// 计算等每个特征对输出的影响,并得到新的修正参数
double loglikelihood = 0.0;
CFMOD = 0.0;
int numEvents = 0;
int numCorrect = 0; // 正确的个数
for (TID = 0; TID < numTokens; TID++) {
// TID, modeldistribution and PID are globals used in
// the updateModelExpects procedure. They need to be set.
// 重新设置,过程返回后modelDistribution为此事件当前对各种输出的概率,迭代过程使其更新
eval(contexts[TID], weights[TID],modelDistribution);
for (int j = 0; j < contexts[TID].length; j++) { // 当前事件的每个特征
PID = contexts[TID][j];
WID = weights[TID][j];
modelExpects[PID].forEachEntry(updateModelExpect); // 更新模型期望值
if (_useSlackParameter) { // 处理修正特征
for (OID = 0; OID < numOutcomes; OID++) {
if (!modelExpects[PID].containsKey(OID)) {
CFMOD += modelDistribution[OID] * numTimesEventsSeen[TID];
}
}
}
}
if (_useSlackParameter) {
CFMOD += (constant - contexts[TID].length) * numTimesEventsSeen[TID];
// 最大似然值估计
}
loglikelihood += Math.log(modelDistribution[outcomes[TID]]) * numTimesEventsSeen[TID];
numEvents += numTimesEventsSeen[TID];
// 计算预测正确的事件数,根据显示设置,输出正确的比率,以直观地观察参数训练的效果
if (printMessages) {
int max = 0;
for (OID = 1; OID < numOutcomes; OID++) {
if (modelDistribution[OID] > modelDistribution[max]) {
max = OID;
}
}
if (max == outcomes[TID]) {
numCorrect += numTimesEventsSeen[TID];
}
}
}
display(".");
// compute the new parameter values
// 计算新的参数值
for (PID = 0; PID < numPreds; PID++) {
params[PID].forEachEntry(updateParams);
modelExpects[PID].transformValues(backToZeros); // re-initialize to 0.0's
}
if (CFMOD > 0.0 && _useSlackParameter) {
correctionParam += (cfObservedExpect - Math.log(CFMOD));
}
display(". loglikelihood=" + loglikelihood + "\t" +
( (double) numCorrect / numEvents) + "\n");
return (loglikelihood);
}
private void display(String s) {
if (printMessages) {
System.out.print(s);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -