⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngramcount.c

📁 about sound recognition.i want to downlod
💻 C
字号:
#include <stdlib.h>#include <math.h>#include <sys/types.h>#include <netinet/in.h>#include "libs.h"#include "io.h"#include "misc.h"#include "hash.h"#include "vocab.h"#include "ngram.h"#define ST_NORMAL  0#define ST_MIN     1#define ST_EOF     2SLMWordTuple SLMDupTuple(SLMWordTuple t, int len){    SLMWordTuple d = SLMNewWordTuple(len);    int i;    for (i = 0; i < len; i++) {	d[i] = t[i];    }    return d;}SLMNgramCount*SLMNewNgramCount(int len){    SLMNgramCount *nc;    nc = New(SLMNgramCount);    nc->word_id = New_N(SLMWordID,len);    return nc;}voidSLMFreeNgramCount(SLMNgramCount *nc){    Free(nc->word_id);    Free(nc);}SLMNgramCount*SLMNewNgramCount_N(int n, int len){    SLMNgramCount *nc;    SLMWordID *wid;    int i;    nc = New_N(SLMNgramCount,n);    wid = New_N(SLMWordID,len*n);    for (i = 0; i < n; i++) {        nc[i].word_id = &wid[i*len];    }    return nc;}voidSLMFreeNgramCount_N(SLMNgramCount *nc, int n){    Free(nc[0].word_id);    Free(nc);}SLMNgramCount *SLMReadNgramCount(int ngram_len, FILEHANDLE inf,		  SLMNgramCount *base,int ascii_in){    int i;    SLMWordID w;    int4 c;    if (base == NULL) {	base = SLMNewNgramCount(ngram_len);    }    if (ascii_in) {	for (i = 0; i < ngram_len; i++) {	    if (z_getWordID(inf,&base->word_id[i]) != 0)		return NULL;	}	if (z_getulong(inf,&base->count) != 0)	    return NULL;    }    else {	for (i = 0; i < ngram_len; i++) {	    if (z_read(&w,sizeof(SLMWordID),1,inf) != 1)		return NULL;	    base->word_id[i] = SLMntohID(w);	}	if (z_read(&c,sizeof(int4),1,inf) != 1)	    return NULL;	base->count = ntohl(c);    }    return base;}intSLMCompareNgramCount(int ngram_len, SLMNgramCount *p1, SLMNgramCount *p2){    int d,i;    for (i = 0; i < ngram_len; i++) {	d = p1->word_id[i]-p2->word_id[i];	if (d != 0)	    return d;    }    return 0;} static intcheck_merge(int ngram_len, int n_ng, SLMNgramCount **ngs, char *status){  int min_id = -1;  int d,i,j;  for (i = 0; i < n_ng; i++) {    if (status[i] & ST_EOF)      continue;    if (min_id < 0) {      min_id = i;      status[i] |= ST_MIN;      continue;    }    d = SLMCompareNgramCount(ngram_len,ngs[min_id],ngs[i]);    if (d > 0) {      /* ngs[i] is smaller than ngs[min_id] */      for (j = 0; j < i; j++)	status[j] &= ~ST_MIN;      min_id = i;      status[i] |= ST_MIN;    }    else if (d == 0)      status[i] |= ST_MIN;  }  return min_id;}voidSLMPrintNgramCount(int ngram_len,SLMNgramCount *ngc, FILEHANDLE outf,int ascii_out){    int j;    if (ascii_out) {	for (j = 0; j < ngram_len; j++)	    z_printf(outf,SLMWordID_FMT " ",ngc->word_id[j]);	z_printf(outf,"%lu\n",ngc->count);    }    else {	SLMWordID id;	int4 c;	for (j = 0; j < ngram_len; j++) {	    id = SLMhtonID(ngc->word_id[j]);	    z_write(&id,sizeof(SLMWordID),1,outf);	}	c = htonl(ngc->count);	z_write(&c,sizeof(int4),1,outf);    }}voidSLMMergeIDNgram(int ngram_len, char **list, int nlist, FILEHANDLE outf, int ascii_in, int ascii_out){    double *weights = NewAtom_N(double,nlist);    int i;    for (i = 0; i < nlist; i++)	weights[i] = 1.0;    SLMMixIDNgram(ngram_len,list,weights,nlist,outf,ascii_in,ascii_out);    Free(weights);}voidSLMMixIDNgram(int ngram_len, char **list, double *weight,int nlist,	      FILEHANDLE outf, int ascii_in, int ascii_out){    FILEHANDLE *fhs = New_N(FILEHANDLE, nlist);    int i,j;    char *status = New_N(char,nlist);    SLMNgramCount **ngcount,nc;    double newcount;        /* initialize */    ngcount = New_N(SLMNgramCount*,nlist);    for (i = 0; i < nlist; i++) {	status[i] = ST_MIN;	ngcount[i] = SLMNewNgramCount(ngram_len);	fhs[i] = z_open(list[i],"r");    }    /* main loop */    for (;;) {	for (i = 0; i < nlist; i++) {	    if (status[i] & ST_EOF)		continue;	    if (status[i] & ST_MIN) {		if (SLMReadNgramCount(ngram_len,fhs[i],ngcount[i],ascii_in) == NULL) {		    status[i] |= ST_EOF;		}		status[i] &= ~ST_MIN;	    }	}	if ((j = check_merge(ngram_len,nlist,ngcount,status)) < 0) {	    /* all streams end */	    for (i = 0; i < nlist; i++)		z_close(fhs[i]);	    break;	}	newcount = 0;	for (i =0; i < nlist; i++) {	    if (status[i] & ST_MIN) {		nc.word_id = ngcount[i]->word_id;		newcount += ngcount[i]->count*weight[i];	    }	}	nc.count = ceil(newcount);	if (nc.count < newcount) {	    fprintf(stderr,"Warning: n-gram count overflow (%f -> %lu)\n",newcount,nc.count);        }	SLMPrintNgramCount(ngram_len,&nc,outf,ascii_out);    }    Free(fhs);    Free(status);    for (i = 0; i < nlist; i++) {	Free(ngcount[i]);    }    Free(ngcount);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -