⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lgprep.c

📁 隐马尔科夫模型工具箱
💻 C
📖 第 1 页 / 共 2 页
字号:
/* ReadRuleDef: read rule definition from s and it add to rule set */void ReadRuleDef(char *s, RuleSet *rset){   RuleDef *x;   char buf[256];   float f;   Boolean inPat = TRUE;      x = (RuleDef *)New(&(rset->mem),sizeof(RuleDef));   x->next = NULL;   if (rset->nRules++ == 0)      rset->head = x;   else      rset->tail->next = x;   rset->tail = x;   s = NextWord(s,buf);   f = atof(buf);   if (f<0.0 || f>1.0)      HError(16020,"ReadRuleDef: appl. factor %f out of range 0..1",f);   x->pact = f; x->psum = 0.0;   x->src.n = 0;   s = NextWord(s,buf);   while (s != NULL && inPat) {      if (x->src.n >= MAX_FIELDS)         HError(16020,"ReadRuleDef: too many fields in pattern");      switch(buf[0]) {         case ':':            inPat = FALSE;            break;         case '*':            x->src.fop[x->src.n++] = f_WILD;            break;         case '%':            x->src.fop[x->src.n] = f_WSET;            x->src.fdt[x->src.n++].setid = atoi(buf+1);            break;         case '!':            x->src.fop[x->src.n] = f_NWSET;            x->src.fdt[x->src.n++].setid = atoi(buf+1);            break;                     default:            x->src.fop[x->src.n] = f_WORD;            x->src.fdt[x->src.n++].wdid = GetLabId(buf,TRUE);            break;                  }      s = NextWord(s,buf);   }   x->tgt.n = 0;   while (s != NULL) {      if (x->tgt.n >= MAX_FIELDS)         HError(16020,"ReadRuleDef: too many fields in replace");      switch(buf[0]) {         case '$':            x->tgt.fop[x->tgt.n] = f_FIELD;            x->tgt.fdt[x->tgt.n++].setid = atoi(buf+1);            break;         default:            x->tgt.fop[x->tgt.n] = f_WORD;            x->tgt.fdt[x->tgt.n++].wdid = GetLabId(buf,TRUE);            break;                  }      s = NextWord(s,buf);   }   if (x->src.n > editWinSize) editWinSize = x->src.n;   if (trace&T_RIN)      printf(" read rule: %5.2f [%d : %d]\n",x->pact,x->src.n,x->tgt.n);}/* ReadRuleSet: read rule set from file */void ReadRuleSet(char *fn, RuleSet *rset){   Source src;   char buf[1024], *s;   Boolean infile;      if (InitSource(fn,&src,NoFilter)==FAIL) {      HError(16010, "ReadRuleSet: Can't read rule set from '%s'", fn);   }   do {      infile = ReadLine(&src,buf);      s = SkipToWord(buf);      if (*s != '\0') {         if (*s == '#')             ReadSetDef(s+1,rset);         else            ReadRuleDef(s,rset);      }   } while (infile);  if (trace&T_RIN) {      printf("Loaded %d sets and %d rules from file %s\n",              rset->nSets,rset->nRules,fn);      fflush(stdout);   }   CloseSource(&src);}/* PrintFields: print a list of rule fields */void PrintFields(FieldVec *fl){   int i;      for (i=0; i<fl->n; i++)      switch(fl->fop[i]){         case f_FIELD:  printf(" $%d",fl->fdt[i].flid); break;         case f_WILD:   printf(" *"); break;         case f_WORD:   printf(" %s",fl->fdt[i].wdid->name); break;         case f_WSET:   printf(" %%%d",fl->fdt[i].setid); break;         case f_NWSET:  printf(" !%d",fl->fdt[i].setid); break;         default:       printf(" <error; unknown type>"); break;      }}/* PrintRuleSet: print rule set */void PrintRuleSet(RuleSet *rset){   int i,j;   SetDef *x;   RuleDef *r;      printf("Rule Set [%d sets, %d rules]:\n",rset->nSets,rset->nRules);   for (i=0; i<MAX_SETS; i++) {      if ((x = rset->setlist[i]) != NULL) {         printf(" #%3d ",i);         for (j=0; j<x->nItem; j++) printf(" %s",x->item[j]->name);         printf("\n");      }   }   for (r=rset->head; r != NULL; r = r->next) {      printf("%5.2f ",r->pact);      PrintFields(&r->src); printf(" : "); PrintFields(&r->tgt);      printf("\n");   }}/* ------------------------ Initialisation ----------------------- *//* Exists:  return true if given file exists */Boolean Exists(char *fn){   FILE *f;      if ((f=fopen(fn,"r")) == NULL) return FALSE;   fclose(f);   return TRUE;}/* InitWordMap: load and initialise wordmap */void InitWordMap(void){   CreateWordMap(imapFN, &wmap, newWords);   if (forceCnts) wmap.hasCnts = TRUE;   if (!htkEscape) wmap.htkEsc = FALSE;   /* default is TRUE */   ++wmap.seqno;   mapUpdated = FALSE;}/* InitShiftReg: initialise a shift register */void InitShiftReg(ShiftReg *sr, int size, char *fn){   char path[256];      MakeFN(fn,dbsDir,NULL,path);   sr->used = 0;   sr->ng[nSize] = 1;   /* count = 1 */   sr->ngb = CreateNGBuffer(&ngbHeap,nSize,size,path,&wmap);   sr->ngb->fndx += dumpOfs;}/* Initialise: initialise global data structures */void Initialise(void){   char buf[256];      if (ruleFN != NULL) {      if (trace&T_TOP) printf(" creating rule set %s\n",ruleFN);      CreateRuleSet(&rset);      ReadRuleSet(ruleFN,&rset);      if (trace&T_RUL) PrintRuleSet(&rset);   }   InitWordMap();   CreateHeap(&ngbHeap,"NGB mem",MSTAK,1,0.0,1000,1000);   if (gbGen) InitShiftReg(&stdBuf,ngbSize,rootFN);   if (ruleFN != NULL) {      sprintf(buf,"%s_pos",rootFN);      InitShiftReg(&posBuf,egbSize,buf);      sprintf(buf,"%s_neg",rootFN);      InitShiftReg(&negBuf,egbSize,buf);   }}/* ----------------- NGram Counting Routines -------------------- *//* CompressBuffer: and save if necessary or mustSave is TRUE */void CompressBuffer(NGBuffer *ngb, Boolean mustSave){   float compx;   if (ngb->used == 0) return;   if (trace&T_MEM) {      printf("** before buffer sort\n");      PrintAllHeapStats();   }   SortNGBuffer(ngb);   if (trace&T_MEM) {      printf("** after buffer sort\n");      PrintAllHeapStats();   }   compx = 100.0 * (float)ngb->used / (float)ngb->poolsize;   if (trace&T_SAV) {      printf(" buffer %s.%d compressed%s to %.1f%% at word %d\n",              ngb->fn, ngb->fndx, mustSave?"[must save]":"",compx,wordnum);   }   if (compx > 75.0 || mustSave) {      if (mustSave && mapUpdated) {         SaveWordMap(omapFN,&wmap,FALSE);         mapUpdated = FALSE;         if (trace&T_TOP)             printf(" word map saved to %s\n",omapFN);      }      if (trace&T_TOP) {         printf(" saving %d ngrams to file %s.%d\n",                 ngb->used, ngb->fn, ngb->fndx);      }      WriteNGBuffer(ngb,txtsrc);   }}/* PutShiftRegister: push word into shift register and extract ngram */void PutShiftRegister(LabId id, ShiftReg *sr){   int i;   MapEntry *me;     if (trace&T_SHR){      printf("   %12s --> %s\n",id->name,sr->ngb->fn);      fflush(stdout);   }   AddWordToMap(&wmap,id);    mapUpdated = TRUE;   me = (MapEntry *)id->aux;   sr->ng[sr->used++] = me->ndx;   if (sr->used == nSize) {      /* record ngram */      StoreNGram(sr->ngb,sr->ng);      /* shift words */      sr->used--;      for (i=0; i<sr->used; i++)         sr->ng[i] = sr->ng[i+1];      /* compress buffer if full */      if (sr->ngb->used == sr->ngb->poolsize)  {         CompressBuffer(sr->ngb,FALSE);      }   }}/* -------------------- Editing Routines ------------------------- *//* MatchRule: return true if given rule matches editBuf */Boolean MatchRule(RuleDef *r){   int i,j;      for (i=0; i<r->src.n; i++) {      switch(r->src.fop[i]) {         case f_WORD:               if (editBuf[i] != r->src.fdt[i].wdid) return FALSE;             break;         case f_WSET:            j = r->src.fdt[i].setid;            if (!InSet(rset.setlist[j],editBuf[i])) return FALSE;            break;         case f_NWSET:            j = r->src.fdt[i].setid;            if (InSet(rset.setlist[j],editBuf[i])) return FALSE;            break;         case f_WILD:               break;         default:            HError(16090,": bad op [%d] in field %d of replace",                   r->tgt.fop[i],i);      }   }   return TRUE;}/* ApplyRule: put replace part of rule r into buf */void ApplyRule(RuleDef *r, LabId *buf){   int i;      for (i=0; i<r->tgt.n; i++) {      switch(r->tgt.fop[i]) {         case f_WORD:               buf[i] = r->tgt.fdt[i].wdid;             break;         case f_FIELD:              buf[i] = editBuf[r->tgt.fdt[i].flid];             break;         default:            HError(16090,": bad op [%d] in field %d of replace",                   r->tgt.fop[i],i);      }   }}/* SendToEditBuffer: insert word into edit buffer and apply rules */void SendToEditBuffer(LabId id){   RuleDef *r;   LabId replBuf[MAX_FIELDS];   int i;   editBuf[editUsed++] = id;   if (editUsed == editWinSize) {  /* buffer is filled */      /* try each rule in turn */      for (r=rset.head; r != NULL; r = r->next)         if (MatchRule(r)) {            r->psum += r->pact;            if (r->psum>1.0) {               ApplyRule(r,replBuf);               r->psum -= 1.0;               for (i=0; i< r->src.n; i++)                   PutShiftRegister(editBuf[i],&negBuf);               negBuf.used = 0;               for (i=0; i< r->tgt.n; i++)                   PutShiftRegister(replBuf[i],&posBuf);               posBuf.used = 0;            }         }      /* Shift words */      editUsed--;      for (i=0; i<editUsed; i++)         editBuf[i] = editBuf[i+1];   }}/* -------------------------- Input Text ------------------------- *//* ProcessText: read text files line by line and count ngrams */void ProcessText(char *fn, Boolean lastFile){   FILE *f;   LabId id;   char sbuf[1024],*word;   Boolean isPipe,wasSentStart;   if (trace&T_TOP)       printf("Reading source text file %s\n",(fn==NULL) ? "<stdin>" : fn);   if ((fn!=NULL) && (strcmp(fn,"-")!=0)) {      if ((f = FOpen(fn,LMTextFilter,&isPipe))==NULL)	 HError(16010,"ProcessText: unable to open text file %s", fn);   } else {      f = stdin;   }   wasSentStart = FALSE;   word = sbuf+1; sbuf[0]='_';   while (fscanf(f,"%255s",word)==1) {      wordnum++;      if (tagSentStart) {	id = GetLabId(wasSentStart ? sbuf : word,TRUE);	wasSentStart = (id==sstId);      } else {	id = GetLabId(word,TRUE);      }      if (trace&T_INP) printf("[%s]\n",id->name);      if (ruleFN == NULL && !gbGen) {	AddWordToMap(&wmap,id);       }	else {	id = GetLabId(word,TRUE);	if (ruleFN != NULL) 	  SendToEditBuffer(id);	if (gbGen) 	  PutShiftRegister(id,&stdBuf);      }   }   if (fn!=NULL)      FClose(f,isPipe);   if (lastFile) {      if (ruleFN == NULL && !gbGen) {         SortWordMap(&wmap);          SaveWordMap(omapFN,&wmap,FALSE);      } else {	if (gbGen)	  CompressBuffer(stdBuf.ngb,TRUE);	if (ruleFN != NULL){	  CompressBuffer(negBuf.ngb,TRUE);	  CompressBuffer(posBuf.ngb,TRUE);	}      }   }}/* ---------------------- End of LGPrep.c ----------------------- */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -