📄 lpcalc.c
字号:
lm->vocSize = N = bi->wmap->used; lm->binMap = (NameId *) New(lm->heap,N*sizeof(NameId)); lm->binMap--; lm->htab = CreateHashTable(11731,"Back-off LM hash table"); for (i=0; i<N; i++) { nId = GetNameId(lm->htab,bi->wmap->id[i]->name,TRUE); ndx = LMNDX(bi->wmap,i); LM_INDEX(nId) = ndx; lm->binMap[ndx] = nId; } lm->gInfo[1].boInfo = NULL; for (i=2; i<=lm->nSize; i++) { /* initialise discount info, etc */ boi = (BackOffInfo *) New(lm->heap,sizeof(BackOffInfo)); boi->cutOff = bi->cutOff[i]; boi->wdThresh = bi->wdThresh[i]; boi->dcType = bi->dctype; switch (boi->dcType) { case DC_KATZ: boi->dcInfo.tgInfo.kRange = bi->kRange; boi->dcInfo.tgInfo.coef = NULL; break; case DC_ABSOLUTE: boi->dcInfo.bCoef = 0.0; break; default: break; } lm->gInfo[i].boInfo = boi; } CheckCutoffs(lm);#ifdef HTK_CRYPT lm->encrypt = FALSE;#endif lm->fe_buff = (FLEntry *) New(lm->heap,N*sizeof(FLEntry)); lm->se_buff = (SMEntry *) New(lm->heap,N*sizeof(SMEntry)); lm->classLM = FALSE; return lm;}/* CloneInputSet: return a copy of ngs */static NGInputSet *CloneInputSet(NGInputSet *src,NGInputSet *tgt){ GFLink p; CreateInputSet(&gstack,src->wm,tgt); for(p=src->head.chain; p!=NULL; p=p->chain) AddInputGFile(tgt,p->fn,p->weight); return tgt;}/* ComputeFoFTab: scan files and produce FoF table */void ComputeFoFTab(FoFTab *ftab, int nSize, NGInputSet *inSet){ int j,k,oci; NGram p, q; long **tocMat; int pos,fofSize; float count,occ[LM_NSIZE]; UInt gram[LM_NSIZE], fkey[LM_NSIZE]; if (ftab->N < nSize) HError(15595,"ComputeFoFTab: n-gram size mismatch"); for (k=0; k<LM_NSIZE; k++) occ[k] = 0.0f; /* initialise total count matrix */ fofSize = ftab->size; tocMat = (long **) New(&gstack,fofSize*sizeof(long *)); for (j=0; j<fofSize; j++) { tocMat[j] = (long *) New(&gstack,LM_NSIZE*sizeof(long)); tocMat[j]--; /* indexed from 1 */ for (k=0; k<LM_NSIZE; k++) tocMat[j][k+1] = 0; } OpenInputSet(inSet); if (!FilterNGram(inSet,gram,&count,nSize)) HError(15513,"ComputeFoFTab: Unable to read first n-gram"); memcpy(fkey,gram,nSize*sizeof(UInt)); do { for (p=gram,q=fkey,pos=0; pos<nSize; pos++,p++,q++) { if (*p == *q) { occ[pos] += count; } else { for (k=pos; k<nSize; k++) { oci = (int) occ[k]; if ((oci > 0) && (oci <= fofSize)) { ftab->fof[oci][k+1]++; } for (j=0; j<((fofSize<oci)?fofSize:oci); j++) tocMat[j][k+1]++; } for (k=pos; k<nSize; k++) { fkey[k] = gram[k]; occ[k] = count; } break; } } } while(FilterNGram(inSet,gram,&count,nSize)); for (k=0; k<nSize; k++) { oci = (int) occ[k]; if ((oci > 0) && (oci <= fofSize)) { ftab->fof[oci][k+1]++; } for (j=0; j<((fofSize<oci)?fofSize:oci); j++) tocMat[j][k+1]++; } CloseInputSet(inSet); if (trace&T_FOF) { /* print total counts if requested */ printf("\ncutoff "); for (j=0; j<nSize; j++) printf("\t%d-g",j+1); printf("\n"); for (k=0; k<fofSize; k++){ printf("%d", k); for (j=0; j<nSize; j++) printf("\t%ld ", tocMat[k][j+1]); printf("\n"); } } Dispose(&gstack,tocMat);}/* EXPORT->UpdateModel: update an existing model */BackOffLM *UpdateModel(BackOffLM *lm, BuildInfo *bi){ int i,curSize; NGInputSet iset; BackOffInfo *boi; curSize = lm->nSize; if (bi->nSize <= curSize) HError(15590,"UpdateModel: Current model is already %d-gram",curSize); if (bi->ptype != lm->probType) HError(15590,"UpdateModel: Incompatible probability kind specified (%d)",bi->ptype); for (i = curSize+1; i<=bi->nSize; i++) { boi = (BackOffInfo *) New(lm->heap,sizeof(BackOffInfo)); boi->cutOff = bi->cutOff[i]; boi->wdThresh = bi->wdThresh[i]; boi->dcType = bi->dctype; switch (boi->dcType) { case DC_KATZ: boi->dcInfo.tgInfo.kRange = bi->kRange; boi->dcInfo.tgInfo.coef = NULL; break; case DC_ABSOLUTE: boi->dcInfo.bCoef = 0.0; break; default: break; } lm->gInfo[i].boInfo = boi; } lm->nSize = bi->nSize; if (bi->ftab==NULL) { if (trace&T_TOP) { printf("Calculating FoF table\n"); fflush(stdout); } CloneInputSet(bi->inSet,&iset); bi->ftab = CreateFoFTab(&gstack,128,iset.N); ComputeFoFTab(bi->ftab,iset.N,&iset); } if (trace&T_TOP) { printf("Calculating discount coefficients\n"); fflush(stdout); } CheckCutoffs(lm); CalcDiscountCoefs(lm,bi->ftab); if (lm->se_buff==NULL) lm->se_buff = (SMEntry *) New(lm->heap,lm->vocSize*sizeof(SMEntry)); if (lm->fe_buff==NULL) lm->fe_buff = (FLEntry *) New(lm->heap,lm->vocSize*sizeof(FLEntry)); for (i=curSize+1; i<=bi->nSize; i++) { if (trace&T_TOP) { printf("Calculating %d-grams\n",i); fflush(stdout); } CloneInputSet(bi->inSet,&iset); lm->gInfo[i].nEntry = CalculateNGram(lm,&iset,i); } for (i=1; i<=lm->nSize; i++) { lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : bi->saveFmt; } return lm;}/* EXPORT->GenerateModel: generate model fron n-gram data files */BackOffLM *GenerateModel(MemHeap *heap, BuildInfo *bi){ int i; BackOffLM *lm; NGInputSet iset; lm = InitTargetModel(heap,bi); if (bi->nSize > 1) { if (bi->ftab==NULL) { if (trace&T_TOP) { printf("Calculating FoF table\n"); fflush(stdout); } CloneInputSet(bi->inSet,&iset); bi->ftab = CreateFoFTab(&gstack,128,iset.N); ComputeFoFTab(bi->ftab,iset.N,&iset); /* WriteFoFTab("foo.fof",bi->ftab,"NULL"); */ } if (trace&T_TOP) { printf("Calculating discount coefficients\n"); fflush(stdout); } CheckCutoffs(lm); CalcDiscountCoefs(lm,bi->ftab); } uniFloor = bi->uniFloor; /* !! global */ for (i=1; i<=bi->nSize; i++) { if (trace&T_TOP) { printf("Calculating %d-grams\n",i); fflush(stdout); } CloneInputSet(bi->inSet,&iset); lm->gInfo[i].nEntry = CalculateNGram(lm,&iset,i); } for (i=1;i<=bi->nSize;i++) lm->gInfo[i].fmt = (i==1) ? LMF_TEXT : bi->saveFmt; return lm;}/* BuildFE: calculate nSize-gram probabilities and backoff weights */static int RebuildNGrams(BackOffLM *lm,int cxSize,int nSize,FLEntry **context){ int i, nfe, nse, nItem, tFE; UInt feId[LM_NSIZE]; FLEntry *fe, *parent, *tfe; nItem = tFE = 0; parent = context[cxSize-1]; if (cxSize < nSize-1) { for (fe=parent->fea,i=0; i<parent->nfe; i++,fe++) { context[cxSize] = fe; nItem += RebuildNGrams(lm,cxSize+1,nSize,context); } } else { /* cxSize == nSize-1 */ for (i=0; i<cxSize; i++) feId[i] = context[i]->ndx; nfe = 0; tfe = lm->fe_buff; for (fe=parent->fea,i=0; i<parent->nfe; i++,fe++) { feId[cxSize] = fe->ndx; if ((nse=CalcNGramProbs(lm,feId+1,nSize,fe,TRUE)) > 0) { nItem += nse; *tfe++ = *fe; nfe++; } } memcpy(parent->fea,lm->fe_buff,nfe*sizeof(FLEntry)); parent->nfe = nfe; tFE+=nfe; } lm->gInfo[0].nEntry+=tFE; return nItem;}/* ConvertToLog: LMP_FLOAT _> LMP_LOG conversion */static void ConvertToLog(FLEntry *parent){ int i; FLEntry *fe; SMEntry *se; for (se=parent->sea,i=0; i<parent->nse; i++,se++) se->prob = LN10*FLT_TO_LOG10(se->prob); for (fe=parent->fea,i=0; i<parent->nfe; i++,fe++) { fe->bowt = LN10*FLT_TO_LOG10(fe->bowt); ConvertToLog(fe); }}/* EXPORT->RebuildLM: normalise probs and calculate back-off weights */void RebuildLM(BackOffLM *lm, int *cutOff, float *wdThresh, LMProbType tgtPType) { int i,k,r,nItem,nNode; BackOffInfo *boi; TuringGoodInfo *tgi; FLEntry *cx[LM_NSIZE]; if (tgtPType!=LMP_FLOAT && tgtPType!=LMP_LOG && tgtPType !=LMP_COUNT) HError(15590,"RebuildLM: Invalid target probability kind (%d)",tgtPType); if (lm->probType==LMP_COUNT) { if (cutOff!=NULL || wdThresh!=NULL) { /* new cut-offs and coefs */ for (i=2; i<=lm->nSize; i++) { if ((boi = lm->gInfo[i].boInfo)==NULL) HError(15590,"RebuildLM: Back-off info not present for %d-grams",i); if (boi->dcType!=DC_KATZ) HError(15590,"RebuildLM: Unsupported LM type (%d)",boi->dcType); if (wdThresh!=NULL) boi->wdThresh = wdThresh[i]; if (cutOff==NULL || cutOff[i] < boi->cutOff) continue; boi->cutOff = cutOff[i]; tgi = &boi->dcInfo.tgInfo; k = (boi->cutOff > tgi->kRange) ? tgi->kRange : boi->cutOff; for (r=1; r<=k; r++) tgi->coef[r] = 0.0; } CheckCutoffs(lm); } if (lm->se_buff==NULL) lm->se_buff = (SMEntry *) New(lm->heap,lm->vocSize*sizeof(SMEntry)); if (lm->fe_buff==NULL) lm->fe_buff = (FLEntry *) New(lm->heap,lm->vocSize*sizeof(FLEntry)); if (tgtPType!=LMP_COUNT) lm->probType = LMP_FLOAT; /* convert counts to probs */ nItem = CalcUniProbs(lm,&lm->root,TRUE); if (trace&T_TOP) { printf(" rebuilt %d-grams, %d -> %d\n",1,lm->gInfo[1].nEntry,nItem); } lm->gInfo[1].nEntry = nItem; nNode=lm->gInfo[0].nEntry; lm->gInfo[0].nEntry=1; for (cx[0] = &lm->root,i=2; i<=lm->nSize; i++) { nItem = RebuildNGrams(lm,1,i,cx); if (trace&T_TOP) { printf(" rebuilt %d-grams, %d -> %d\n",i,lm->gInfo[i].nEntry,nItem); } lm->gInfo[i].nEntry = nItem; } if (trace&T_TOP) { printf(" rebuilt x-nodes, %d -> %d\n", nNode,lm->gInfo[0].nEntry); } } if (tgtPType==LMP_LOG && lm->probType==LMP_FLOAT) { ConvertToLog(&lm->root); lm->probType=LMP_LOG; }}/* ---------------------- End of LPCalc.c ---------------------- */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -