📄 encoder.cpp
字号:
data_write(&prosodyEnable,1,1,YY,fout); data_write(&videoEnable,1,1,YY,fout); data_write(&lipShapeEnable,1,1,YY,fout); data_write(&trickModeEnable,1,1,YY,fout); if(YY[1]!=8) { c=0; i=YY[1]; data_write(&c,i,1,YY,fout); }}void ttsSequence::clear_ttsSentence(){ ttsSentence *s=sentence; ttsProsody *p=s->prosody; if(!s->silence) { free(s->ttsText);
s->ttsText=NULL; if(prosodyEnable) { free(p->phonemeSymbols); p->phonemeSymbols=NULL; if(get_durEn()) { free(p->durEachPhoneme); p->durEachPhoneme=NULL; } if(get_f0En()) { free(p->f0ContourEachPhoneme); p->f0ContourEachPhoneme=NULL; } if(get_enEn()) { free(p->energyContourEachPhoneme); p->energyContourEachPhoneme=NULL; } } if(lipShapeEnable) { free(s->LipShapeInSentence); s->LipShapeInSentence=NULL; free(s->lipShape); s->lipShape=NULL; }
}}void ttsSequence::save_ttsSentence(FILE *fout){ ttsSentence *s=sentence; ttsProsody *p=s->prosody; int i, j, Ts, Tn, N, nphone; char c; if(videoEnable) { Ts=(int)(s->startTime*1000); Tn=iframe*itime; N=(Ts-Tn)/itime; for(i=0; i<N; i++) { data_write(&s->tts_sentence_start_code,32,1,YY,fout); data_write(&s->ttsSentenceID,10,1,YY,fout); c=1; data_write(&c,1,1,YY,fout); data_write(&itime,12,1,YY,fout); if(YY[1]!=8) { c=0; j=YY[1]; data_write(&c,j,1,YY,fout); } } iframe+=N; Tn=s->offset+s->sentenceDuration; N=Tn/itime; if((Tn%itime)!=0) N++; } else N=1; for(i=0; i<N; i++) { data_write(&s->tts_sentence_start_code,32,1,YY,fout); data_write(&s->ttsSentenceID,10,1,YY,fout); data_write(&s->silence,1,1,YY,fout); if(s->silence) data_write(&s->silenceDuration,12,1,YY,fout); else { if(genderEnable) data_write(&s->gender,1,1,YY,fout); if(ageEnable) data_write(&s->age,3,1,YY,fout); if(!videoEnable && speechRateEnable) data_write(&s->speechRate,4,1,YY,fout); data_write(&s->lengthText,12,1,YY,fout); data_write(s->ttsText,8,s->lengthText,YY,fout); if(prosodyEnable) { data_write(&p->durEnable,1,1,YY,fout); data_write(&p->f0ContourEnable,1,1,YY,fout); data_write(&p->energyContourEnable,1,1,YY,fout); nphone=p->numberPhonemes; data_write(&nphone,10,1,YY,fout); data_write(&p->phonemeSymbolsLength,13,1,YY,fout); data_write(p->phonemeSymbols,8,p->phonemeSymbolsLength,YY,fout); if(get_durEn()) data_write(p->durEachPhoneme,12,nphone,YY,fout); if(get_f0En()) data_write(p->f0ContourEachPhoneme,8,nphone*3,YY,fout); if(get_enEn()) data_write(p->energyContourEachPhoneme,8,nphone*3,YY,fout); } if(videoEnable) { data_write(&s->sentenceDuration,16,1,YY,fout); data_write(&s->positionInSentence,16,1,YY,fout); data_write(&s->offset,10,1,YY,fout); if(s->offset) s->positionInSentence+=(itime-s->offset); else s->positionInSentence+=itime; s->offset=0; } if(lipShapeEnable) { j=s->numberLipShape; data_write(&j,10,1,YY,fout); data_write(s->LipShapeInSentence,16,j,YY,fout); data_write(s->lipShape,8,j,YY,fout); }
} if(YY[1]!=8) { c=0; j=YY[1]; data_write(&c,j,1,YY,fout); } } if(videoEnable) iframe+=N;}void ttsSequence::data_write(char *A, int nbits, int n, char *B, FILE *fout){ int i, nx, ny; unsigned char X,Y; short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; Y=B[0]; ny=B[1]; for(i=0; i<n; i++) { nx=nbits; while(nx!=0) { if(ny>nx) { X=*(A+i) & W[nx]; Y=Y | (X << (ny-nx)); ny-=nx; nx=0; } else { X=(*(A+i)) >> (nx-ny); Y=Y | (X & W[ny]); nx-=ny; ny=8; fwrite(&Y,1,1,fout); Y=0; } } } B[0]=Y; B[1]=ny;}void ttsSequence::data_write(short *A, int nbits, int n, char *B, FILE *fout){ int i, nx, ny; unsigned short X; unsigned char Y; short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; Y=B[0]; ny=B[1]; for(i=0; i<n; i++) { nx=nbits; while(nx!=0) { if(ny>nx) { X=*(A+i) & W[nx]; Y=Y | (X << (ny-nx)); ny-=nx; nx=0; } else { X=(*(A+i)) >> (nx-ny); Y=Y | (X & W[ny]); nx-=ny; ny=8; fwrite(&Y,1,1,fout); Y=0; } } } B[0]=Y; B[1]=ny;}void ttsSequence::data_write(int *A, int nbits, int n, char *B, FILE *fout){ int i, nx, ny; unsigned int X; unsigned char Y; short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; Y=B[0]; ny=B[1]; for(i=0; i<n; i++) { nx=nbits; while(nx!=0) { if(ny>nx) { X=*(A+i) & W[nx]; Y=Y | (X << (ny-nx)); ny-=nx; nx=0; } else { X=(*(A+i)) >> (nx-ny); Y=Y | (X & W[ny]); nx-=ny; ny=8; fwrite(&Y,1,1,fout); Y=0; }
} } B[0]=Y; B[1]=ny;}int rd_txt(FILE *fp, char *Text);int read_prosody(FILE *fp,short Sphon[][6],short Dur[],short F0[][3],short En[][3]);void read_video(FILE *fp,float *stime,int *dur);
int Niframe, v_st, v_ed, sp_st, sp_ed;float Itime;void Encoder(int E_Lang_v, int E_Gender, int E_Gender_v, int E_Age, int E_Age_v, int E_Spch, int E_Spch_v, int E_Proso, int E_Proso_d, int E_Proso_F0, int E_Proso_e, int E_Video, int E_Lip, int E_Trick )
{ int i, itime, Nphone, dur; short Dur[800], F0[800][3], En[800][3]; float stime; char fname[30],Text[800];
short Sphone[800][6]; FILE *ftext, *fvideo, *fprosody, *fout;
G_Lang_v = E_Lang_v; G_Gender = E_Gender; G_Gender_v = E_Gender_v;
G_Age = E_Age; G_Age_v = E_Age_v; G_Spch = E_Spch; G_Spch_v=E_Spch_v;
G_Proso = E_Proso; G_Proso_d = E_Proso_d; G_Proso_F0 = E_Proso_F0; G_Proso_e = E_Proso_e;
G_Video = E_Video; G_Lip = E_Lip; G_Trick = E_Trick;
/* We assume time duration between adjacent I-frames is 0.5sec */ itime=500; /* msec */ i=0; ttsSequence *ttsSeq = new ttsSequence(1);
ttsSeq->init();
ttsSentence *ttsSnt = new ttsSentence(++i); ttsSeq->AddSentence(ttsSnt); ttsSeq->Sntinit();
YY[0]=0; YY[1]=8; /* Check Data files */ if((ftext=fopen("ko_text.dat","r"))==NULL) { ::MessageBox(NULL, "Error: ko_text.dat open", "ERROR", MB_OK); exit(1); } if(ttsSeq->get_prosodyEn()) { if(ttsSeq->get_gender()) strcpy(fname,"ko_m-prosody.dat"); else strcpy(fname,"ko_f-prosody.dat"); if((fprosody=fopen(fname,"r"))==NULL) { ::MessageBox(NULL, "Error: prosody data open", "ERROR", MB_OK); exit(1); } } if(ttsSeq->get_videoEn()) { if(ttsSeq->get_gender()) strcpy(fname,"ko_m-video.dat"); else strcpy(fname,"ko_f-video.dat"); if((fvideo=fopen(fname,"r"))==NULL) { ::MessageBox(NULL, "Err: video data open", "ERROR", MB_OK); exit(1); } } fout=fopen("mpeg_tts.dat","w+b"); ttsSeq->save_ttsSequence(fout); while( (i=rd_txt(ftext,Text)) > 1 ) { ttsSeq->put_text(Text); i=ttsSeq->get_text(Text); //printf("%d %s\n",i,Text); if(ttsSeq->get_prosodyEn()) { Nphone=read_prosody(fprosody,Sphone,Dur,F0,En);
if( Dur[0] == -1000 ) ttsSeq->set_durEn(0); if( F0[0][0] == -1000 ) ttsSeq->set_f0En(0);
if( En[0][0] == -1000 ) ttsSeq->set_enEn(0);
ttsSeq->put_prosody(Nphone,Sphone,Dur,F0,En); } if(ttsSeq->get_videoEn()) { read_video(fvideo,&stime,&dur); ttsSeq->put_position(itime,stime,dur); } ttsSeq->save_ttsSentence(fout); ttsSeq->clear_ttsSentence(); }
fclose(ftext); fclose(fout);}int rd_txt(FILE *fp, char *Text){ int sentnce_end=0, i=0, ch; char pchr; pchr=32; while( (ch=fgetc(fp)) != EOF && i< 800 ) { if(ch=='\n' || ch=='\r' || ch == '\0' || ch =='\t') ch=32; if(ch=='"' || ch=='\'' || ch=='{' || ch=='}' || ch=='[' || ch==']' || ch=='<' || ch=='>' || ch=='(' || ch==')' || ch==',' ) continue; if(ch != 32 || pchr != 32) Text[i++]=ch; if(ch=='.') { ch=fgetc(fp); if(!isdigit(ch)) { sentnce_end=1; break; } Text[i++]=ch; } pchr=ch; if( ch=='?' || ch=='!') { sentnce_end=1; break; } } if(sentnce_end == 0) { if(Text[i-1]==32) i--; Text[i++]='.'; } Text[i]=0; return(i);}int read_prosody(FILE *fp,short Sphon[][6],short Dur[],short F0[][3],short En[][3]){
int i, j, k, m[10], n=0, np, nIPA;
char line[100], str[15][20], *tokn;
char *field[]={ "IPA_phones","dur","F0_1","F0_2","F0_3","En_1","En_2","En_3" };
for(i=0; i<8; i++) m[i]=-1;
fgets(line,90,fp);
for(i=0, tokn=strtok(line," \n"); tokn!=NULL; tokn=strtok(NULL," \n"))
strcpy(str[i++],tokn);
np=i-1;
if(strcmp(str[0],"begin")!=0) { ::MessageBox(NULL, "prosody file err", "Error", MB_OK); exit(1); }
for(j=1; j<i; j++) {
for(k=0; k<8; k++)
if(strcmp(str[j],field[k])==0) { m[k]=j-1; break; }
}
while(fgets(line,90,fp)!=NULL) {
for(i=0, tokn=strtok(line," \n"); tokn!=NULL; tokn=strtok(NULL," \n"))
strcpy(str[i++],tokn);
if(strcmp(str[0],"end")==0) break;
nIPA=i-(np-1);
for(j=0; j<nIPA; j++) {
sscanf(str[m[0]+j],"%x",&k); Sphon[n][j]=k;
}
Sphon[n][j]=0;
if(m[1]!=-1) Dur[n]=(int)(atof(str[m[1]+nIPA-1])*1000);
else Dur[0] = -1000;
for(i=2; i<5; i++) {
if(m[i]!=-1) F0[n][i-2]=atoi(str[m[i]+nIPA-1])/2;
else { F0[0][0] = -1000; break; }
}
for(i=5; i<8; i++) {
if(m[i]!=-1) En[n][i-5]=atoi(str[m[i]+nIPA-1]);
else { En[0][0] = -1000; break; }
}
n++;
}
return(n);
}void read_video(FILE *fp,float *stime,int *dur){ int i; float t; char line[100], str[10][10], *tokn; fgets(line,90,fp); for(i=0, tokn=strtok(line," \n"); tokn!=NULL; tokn=strtok(NULL," \n")) strcpy(str[i++],tokn); if(strcmp(str[0],"begin")!=0) { ::MessageBox(NULL, "video file err", "Error", MB_OK); exit(1); } fgets(line,90,fp); tokn=strtok(line," \n"); *stime=(float)atof(tokn); fgets(line,90,fp); tokn=strtok(line," \n"); t=(float)atof(tokn); *dur=(int)((t-*stime)*1000); fgets(line,90,fp); tokn=strtok(line," \n"); if(strcmp(tokn,"end")!=0) { ::MessageBox(NULL, "video file err", "Error", MB_OK); exit(1); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -