⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngram.c

📁 about sound recognition.i want to downlod
💻 C
📖 第 1 页 / 共 2 页
字号:
	return NULL;    }    if (len > ng->ngram_len) {	fprintf(stderr,"SLMReadLM Error: You can't specify longer n-gram length (%d) than the original model length (%d)\n",len,ng->ngram_len);	return NULL;    }    ng2 = SLMNewLM();    memcpy(ng2,ng,sizeof(SLMNgram));    ng2->context_len = len-1;    ng2->delegate = ng;    ng2->next_lm = ng;    return ng2;}#ifdef ENABLE_REMOTE_MODEL/* * openRemoteModel() connects to the specified host * and returns a pointer to SLMNgram structure that points * the host.  */static SLMNgram *openRemoteModel(char *hostname, int portnum, int verbosity){    SLMNgram *ng;    char buf[1024];    int sock;    char cmd;    unsigned char par1;    unsigned short par2,id;    unsigned char size;    int i;    sock = openSocket(hostname,portnum);    if (sock < 0)	return NULL;    ng = SLMNewLM();    sprintf(buf,"%s:%d",hostname,portnum);    ng->filename = strdup(buf);    ng->type = SLM_REMOTE_MODEL;    SLM_SOCK(ng) = sock;    /* retrieve basic info */    cmd = SLM_NGD_BASIC_INFO;    write(SLM_SOCK(ng),&cmd,1);    read(SLM_SOCK(ng),&par2,2);    ng->type |= ntohs(par2);    read(SLM_SOCK(ng),&par1,1);    ng->first_id = par1;    read(SLM_SOCK(ng),&par1,1);    ng->first_class_id = par1;    read(SLM_SOCK(ng),&par1,1);    ng->ngram_len = par1;    read(SLM_SOCK(ng),&par1,1);    ng->context_len = par1;    read(SLM_SOCK(ng),&par2,2);    ng->n_unigram = ng->n_word = ntohs(par2);    /* retrieve word info */    ng->vocab_ht = SLMHashCreateSI(ng->n_word*3/2);    ng->vocab = New_N(char*,ng->n_word);    ng->vocab[0] = strdup("<UNK>");    cmd = SLM_NGD_ID2WORD;    for (i = ng->first_id; i <= ng->n_word; i++) {	write(SLM_SOCK(ng),&cmd,1);	id = htons((unsigned short)i);	write(SLM_SOCK(ng),&id,2);	read(SLM_SOCK(ng),&size,1);	read(SLM_SOCK(ng),buf,size);	buf[size] = '\0';	ng->vocab[i] = strdup(buf);	SLMIntHashInsert(ng->vocab_ht,ng->vocab[i],i);	if (verbosity > 1) {	  fprintf(stderr,"%d:%s\n",i,buf);	  if (i % 100 == 0) {	    fprintf(stderr,".");	    fflush(stderr);	  }	}    }    if (verbosity > 1) {      fprintf(stderr,"Remote model read.\n");    }    return ng;}#endif/* * SLMReadLM() invokes SLMReadLM0() to read an LM. If filename is * "lmfile1.arpa[;length]*weight,lmfile2.arpa[;length]*weight,..."  * then all LMs are read and combined with the specified weight. */SLMNgram *SLMReadLM(char *filename,int format,int verbosity){    SLMNgram *ng = NULL;    char buf1[256],buf2[256];    char *p,*q;    double w;    int len;#ifdef ENABLE_REMOTE_MODEL    if ((p = strchr(filename,':')) != NULL) {      /* the filename is hostname:portnum */      strncpy(buf1,filename,p-filename);      buf1[p-filename] = '\0';      return openRemoteModel(buf1,atoi(p+1),verbosity);    }#endif    q = buf1;    for (p = filename; *p; p++) {	if (*p == '*' || *p == ';'	    ) {	    *q = '\0';	    if (*buf1 == '\0') {		/* no filename is specified */		fprintf(stderr,"SLMReadLM: %s: no filename part\n",filename);		exit(1);	    }	    len = 0; /* no length specified */	    w = 1.0; /* no weight specified */	    if (*p == ';') {		/* length follows */		q = buf2;		p++;		while (*p && *p != '*' && *p != ',')		    *(q++) = *(p++);		*q = '\0';		len = atoi(buf2);	    }	    if (*p == '*') {		q = buf2;		p++;		while (*p && *p != ',')		    *(q++) = *(p++);		*q = '\0';		w = atof(buf2);	    }	    if (verbosity > 1) {		fprintf(stderr,"Reading LM file %s \n",buf1);		if (len > 0)		    fprintf(stderr,"length=%d ",len);		if (w != 1.0)		    fprintf(stderr,"weight=%f",w);		fprintf(stderr,"\n");	    }	    if (ng == NULL) {		ng = SLMReadLM0(buf1,format,verbosity);		if (len > 0 && ng->ngram_len != len) {		    ng->weight = 0.0;		    ng = create_delegate(ng,len);		    if (ng == NULL) {			/* error */			return ng;		    }		}		ng->weight = w;	    }	    else {		SLMAddLM(ng,len,w,buf1,format,verbosity);	    }	    q = buf1;	}	else {	    *(q++) = *p;	}    }    if (ng == NULL) {	*q = '\0';	if (verbosity > 1) {	    fprintf(stderr,"Reading LM file %s\n",buf1);	}	ng = SLMReadLM0(buf1,format,verbosity);    }    return ng;}voidSLMAddLM(SLMNgram *ng, int len, double weight, char *filename,int format,int verbosity){    SLMNgram *next_ng;    if (ng == NULL) {	fprintf(stderr,"SLMAddLM Warning: base LM == NULL\n");	return;    }    next_ng = check_ngram_filename(ng,filename);    if (next_ng != NULL) {	SLMNgram *ng2 = create_delegate(next_ng,len);	if (ng2 == NULL) {	    /* error */	    return;	}	ng2->weight = weight;	ng2->next_lm = ng->next_lm;	ng->next_lm = ng2;	return;    }    next_ng = SLMReadLM0(filename,format,verbosity);    if (len > 0 && next_ng->ngram_len != len) {	SLMNgram *ng2 = create_delegate(next_ng,len);	if (ng2 == NULL) {	    /* error */	    return;	}	next_ng->weight = 0.0;	next_ng = ng2;    }    next_ng->weight = weight;    next_ng->next_lm = ng->next_lm;    ng->next_lm = next_ng;}voidSLMFreeLM(SLMNgram *ng){    int i;#ifdef ENABLE_REMOTE_MODEL    if (ng->type & SLM_REMOTE_MODEL) {	Free(ng->filename);	close(SLM_SOCK(ng));	return;    }#endif    if (ng->next_lm != NULL)	SLMFreeLM(ng->next_lm);    if (ng->delegate == NULL) {	for (i = 0; i < ng->ngram_len-1; i++)	    Free(ng->node[i]);	Free(ng->node);	Free(ng->leaf);	Free(ng->vocab);	Free(ng->filename);#ifdef NG_CACHE	Free(ng->hist);#endif	SLMHashDestroy(ng->vocab_ht);	if (SLM_NgramType(ng->type) == SLM_ClassNgram) {	    Free(ng->class_sym);	    SLMHashDestroy(ng->class_ht);	}    }    Free(ng);}static SLMNgramNode*search_node(SLMNgram *ng, SLMNgramNode *base, int nelem, int level,	    int len, SLMWordID *idarray, int cache_ok){    SLMNgramNode ref,*nd;    ref.id = idarray[level];    if (level == ng->ngram_len-1) {	return bsearch(&ref,base,nelem,sizeof(SLMNgramLeaf),		       SLMNgramLeafCompare);    }    else {#ifdef NG_CACHE	/* check cache */	if (cache_ok && ng->hist[level].id == idarray[level])	    nd = ng->hist[level].node;	else {	    nd = bsearch(&ref,base,nelem,sizeof(SLMNgramNode),SLMNgramNodeCompare);	    ng->hist[level].id = idarray[level];	    ng->hist[level].node = nd;	    cache_ok = 0;	}#else	nd = bsearch(&ref,base,nelem,sizeof(SLMNgramNode),SLMNgramNodeCompare);#endif	    	if (level == len-1 || nd == NULL)	    return nd;	else {	    if (level == ng->ngram_len-2) {		return search_node(ng,				   (SLMNgramNode*)&ng->leaf[nd->nextpos],				   nd->nelem,				   level+1,len,idarray, cache_ok);	    }	    else {		return search_node(ng,				   &ng->node[level+1][nd->nextpos],				   nd->nelem,				   level+1,len,idarray, cache_ok);	    }	}    }}static double SLMGetBOProb0(SLMNgram *ng, int len, SLMWordID *idarray, SLMBOStatus *status){    SLMNgramNode *nn1,*nn2;    double prob;    int i;    nn1 = search_node(ng,ng->node[0],ng->n_unigram,0,len,idarray,1);    if (nn1 != NULL) {	if (status) {	    for (i = 0; i < len; i++)		status->hit[i] = SLM_STAT_HIT;	}	prob = nn1->prob;    }    else {	if (len == 1) {	    /* unigram search failed */	    prob = 0;	}	else {	    nn2 = search_node(ng,ng->node[0],ng->n_unigram, 0,len-1,idarray,1);	    if (nn2 != NULL) {		if (status)		    status->hit[len-1] = SLM_STAT_BO_WITH_ALPHA;		prob = nn2->alpha*SLMGetBOProb0(ng,len-1,idarray+1,status);	    }	    else 		prob = SLMGetBOProb0(ng,len-1,idarray+1,status);	}    }    if (status) {	status->ng_prob = prob;	status->ug_prob = 1;    }    return prob;}SLMWordIDSLMWord2ID(SLMNgram *ng, char *word){    int id;    char *q,buf[256];    if (ng->delegate != NULL)	return SLMWord2ID(ng->delegate,word);    id = SLMIntHashSearch(ng->vocab_ht,word);    if (id == 0 && SLM_NgramType(ng->type) == SLM_ClassNgram) {	/* if ng is class ngram, UNK is class by class */	for (q = word+strlen(word)-1; q >= word; q--) {	    if (*q == ng->delimiter)		break;	}	q++;	sprintf(buf,"<UNK>%c%s",ng->delimiter,q);	id = SLMIntHashSearch(ng->vocab_ht,buf);    }    return id;}intSLMVocabSize(SLMNgram *ng){    if (SLM_NgramType(ng->type) == SLM_WordNgram)	return ng->n_unigram;    else	return ng->n_word;}const char*SLMID2Word(SLMNgram *ng, SLMWordID id){    if (id == 0)	return "<UNK>";    if (id >= SLMVocabSize(ng))	return "<ERROR>";    return ng->vocab[id];}intSLMContextLength(SLMNgram *ng){    int context_len = 0;    while (ng) {	if (ng->weight == 0) {	    /* dummy model */	    ng = ng->next_lm;	    continue;	}	if (ng->context_len > context_len)	    context_len = ng->context_len;	ng = ng->next_lm;    }    return context_len;}    intSLMNgramLength(SLMNgram *ng){    int len = 0,x;    while (ng) {	if (IS_DISTANT_BIGRAM(ng))	    x = 2;	else	    x = ng->context_len+1;	if (ng->weight > 0 && x > len)	    len = x;	ng = ng->next_lm;    }    return len;}    double SLMGetBOProb(SLMNgram *ng, int len, SLMWordID *idarray, SLMBOStatus *status){    SLMWordID cidarray[MAX_GRAM];    double prob_array[MAX_GRAM];    double weight_array[MAX_GRAM];    int i,j;    double prob;    double sum_weight = 0.0;    int reallen;    SLMBOStatus my_stat;    if (len > MAX_GRAM) {	fprintf(stderr,"SLMGetBOProb: n-gram length %d too big (limit is %d\n",		len, MAX_GRAM);	return 0.0;    }#ifdef ENABLE_REMOTE_MODEL    if (ng->type & SLM_REMOTE_MODEL) {	char cmd = SLM_NGD_PROB;	unsigned char len1 = len;	SLMWordID id;	int4 iprob;	write(SLM_SOCK(ng),&cmd,1);	write(SLM_SOCK(ng),&len1,1);	for (i = 0; i < len; i++) {	    id = SLMhtonID(idarray[i]);	    write(SLM_SOCK(ng),&id,sizeof(SLMWordID));	}	read(SLM_SOCK(ng),&len1,1);	my_stat.len = len1;	read(SLM_SOCK(ng),&iprob,4);	my_stat.ng_prob = exp(SLMl2d(ntohl(iprob)));	read(SLM_SOCK(ng),&iprob,4);	my_stat.ug_prob = exp(SLMl2d(ntohl(iprob)));	read(SLM_SOCK(ng),my_stat.hit,len1);	if (status) {	    status->len = my_stat.len;	    status->ng_prob = my_stat.ng_prob;	    status->ug_prob = my_stat.ug_prob;	    for (i = 0; i < my_stat.len; i++)		status->hit[i] = my_stat.hit[i];	}	return my_stat.ng_prob*my_stat.ug_prob;    }#endif	    j = 0;    for (; ng != NULL; ng = ng->next_lm) {	if (ng->weight == 0.0)	    continue;	/* set reallen; reallen is set to n-gram length */	reallen = len;	if (len > ng->context_len+1)	    reallen = ng->context_len+1;	my_stat.len = len;	for (i = 0; i < reallen; i++)	    my_stat.hit[i] = 0;	if (SLM_NgramType(ng->type) == SLM_WordNgram) {	    if (IS_DISTANT_BIGRAM(ng)) {		/* distant bigram */		if (len < ng->context_len+1) {		    reallen = 1; /* back to unigram */		    cidarray[len-1] = idarray[len-1];		}		else {		    reallen = 2;		    cidarray[len-2] = idarray[len-ng->context_len-1];		    cidarray[len-1] = idarray[len-1];		}	    }	    else {		for (i = 0; i < len; i++) {		    cidarray[i] = idarray[i];		}	    }	}	else {	    if (IS_DISTANT_BIGRAM(ng)) {		if (len < ng->context_len+1) {		    reallen = 1; /* back to unigram */		    cidarray[0] = ng->class_id[idarray[len-1]];		}		else {		    reallen = 2;		    cidarray[0] = ng->class_id[idarray[len-ng->context_len-1]];		    cidarray[1] = ng->class_id[idarray[len-1]];		}	    }	    else {		for (i = 0; i < len; i++) {		    cidarray[i] = ng->class_id[idarray[i]];		}	    }	}	if (ng->delegate != NULL)	    prob_array[j] = SLMGetBOProb0(ng->delegate,reallen,cidarray+len-reallen,&my_stat);	else	    prob_array[j] = SLMGetBOProb0(ng,reallen,cidarray+len-reallen,&my_stat);	if (SLM_NgramType(ng->type) == SLM_ClassNgram) {	    my_stat.ug_prob = ng->c_uniprob[idarray[len-1]];	    prob_array[j] *= ng->c_uniprob[idarray[len-1]];	}	weight_array[j] = ng->weight;	sum_weight += ng->weight;	j++;    }    /* combine probs using weight*/    if (sum_weight < 0.99999) {	/* Illegal weight; in this case weight is re-normalized */	for (i = 0; i < j; i++) {	    weight_array[i] /= sum_weight;	}    }    prob = 0;    for (i = 0; i < j; i++) {	prob += weight_array[i]*prob_array[i];    }    if (status) {	status->len = my_stat.len;	status->ng_prob = my_stat.ng_prob;	status->ug_prob = my_stat.ug_prob;	for (i = 0; i < my_stat.len; i++)	    status->hit[i] = my_stat.hit[i];    }    return prob;}voidSLMBOStatusString(SLMBOStatus *status, char *buf){    int i;    for (i = 0; i < status->len; i++) {	if (status->hit[i] == SLM_STAT_HIT) {	    buf[i] = 'H';	}	else if (status->hit[i] == SLM_STAT_BO_WITH_ALPHA)	    buf[i] = 'b';	else	    buf[i] = '-';    }    buf[i] = '\0';}    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -