📄 encoder.cpp
字号:
/*
This software module was originally developed by
Hang-Seop Lee (hslee@etri.re.kr), ETRI
Jung-Chul Lee (jclee@etri.re.kr), ETRI
and edited by Hang-Seop Lee, Jung-Chul Lee of ETRI,
in the course of development of the MPEG-4.
This software module is an implementation of a part of one or
more MPEG-4 tools as specified by the MPEG-4.
ISO/IEC gives users of the MPEG-4 free license to this
software module or modifications thereof for use in hardware
or software products claiming conformance to the MPEG-4.
Those intending to use this software module in hardware or software
products are advised that its use may infringe existing patents.
The original developer of this software module and his/her company,
the subsequent editors and their companies, and ISO/IEC have no
liability for use of this software module or modifications thereof
in an implementation.
Copyright is not released for non MPEG-4 conforming
products. ETRI retains full right to use the code for his/her own
purpose, assign or donate the code to a third party and to
inhibit third parties from using the code for non
MPEG-4 conforming products.
This copyright notice must be included in all copies or
derivative works. Copyright (c) 1997.
*/
#include "stdafx.h"
#include <stdio.h>#include <string.h>#include <stdlib.h>
#include <malloc.h>#include <sys/stat.h>#include <ctype.h>#include <fcntl.h>
#include <windows.h> /* For Bit stream */
char YY[2];
int G_Lang_v, G_Gender, G_Gender_v, G_Age, G_Age_v, G_Spch, G_Spch_v;
int G_Proso, G_Proso_d, G_Proso_F0, G_Proso_e, G_Video, G_Lip, G_Trick;
class ttsProsody { private: public: ttsProsody(); ~ttsProsody(); char durEnable; char f0ContourEnable; char energyContourEnable; int numberPhonemes; int phonemeSymbolsLength; char *phonemeSymbols; int *durEachPhoneme; int *f0ContourEachPhoneme; int *energyContourEachPhoneme;};ttsProsody::ttsProsody(){ numberPhonemes=0; phonemeSymbolsLength=0; phonemeSymbols=NULL; durEnable=0; durEachPhoneme=NULL; f0ContourEnable=0; f0ContourEachPhoneme=NULL; energyContourEnable=0; energyContourEachPhoneme=NULL;}class ttsSentence { private: public: ttsSentence(int i); ~ttsSentence(); int tts_sentence_start_code; int ttsSentenceID; char silence; int silenceDuration; char gender; char age; char speechRate; int lengthText; char *ttsText; ttsProsody *prosody; float startTime; int sentenceDuration; int positionInSentence; int offset; int numberLipShape; int *LipShapeInSentence; int *lipShape;};ttsSentence::ttsSentence(int i){ tts_sentence_start_code=0x012121212; ttsSentenceID=i; silence=0; silenceDuration=0; gender=0; age=0; speechRate=3; /* r=[0.7 - 1.6], R=r*10 -3 : default r=1 */ lengthText=0; ttsText=NULL; prosody=NULL; startTime=(float)0.; sentenceDuration=0; positionInSentence=0; offset=0; numberLipShape=0; LipShapeInSentence=NULL; lipShape=NULL;}class ttsSequence { private: int tts_sequence_start_code; int ttsSequenceID; char languageCode; char genderEnable; char ageEnable; char speechRateEnable; char prosodyEnable; char videoEnable; char lipShapeEnable; char trickModeEnable; ttsSentence *sentence; int itime; int iframe; public: ttsSequence(int i); ~ttsSequence(); void init(); void set_LangEn(int i) { languageCode=i; } void set_genderEn(int i) { genderEnable=i; } void set_ageEn(int i) { ageEnable=i; } void set_spRateEn(int i) { speechRateEnable=i; } void set_prosodyEn(int i) { prosodyEnable=i; } void set_videoEn(int i) { videoEnable=i; } void set_lipEn(int i) { lipShapeEnable=i; } void set_trickEn(int i) { trickModeEnable=i; } int get_LangEn() { return (int)languageCode; } int get_genderEn() { return (int)genderEnable; } int get_ageEn() { return (int)ageEnable; } int get_spRateEn() { return (int)speechRateEnable; } int get_prosodyEn() { return (int)prosodyEnable; } int get_videoEn() { return (int)videoEnable; } int get_lipEn() { return (int)lipShapeEnable; } int get_trickEn() { return (int)trickModeEnable; } void Sntinit(); void AddSentence(ttsSentence *ttsSnt) { sentence=ttsSnt; } void set_gender(int i) { sentence->gender=i; } void set_age(int i) { sentence->age=i; } void set_spRate(int i) { sentence->speechRate=i; } void set_durEn(int i) { sentence->prosody->durEnable=i; }; void set_f0En(int i) { sentence->prosody->f0ContourEnable=i; }; void set_enEn(int i) { sentence->prosody->energyContourEnable=i; }; int get_gender() { return (int)sentence->gender; } int get_age() { return (int)sentence->age; } int get_spRate() { return (int)sentence->speechRate; } int get_durEn() { return (int)sentence->prosody->durEnable; }; int get_f0En() { return (int)sentence->prosody->f0ContourEnable; }; int get_enEn() { return (int)sentence->prosody->energyContourEnable; }; void put_text(char *Text); int get_text(char *Text) { strcpy(Text,sentence->ttsText); return (int)sentence->lengthText; } void put_prosody(int Nphone, short Sphone[][6],short Dur[],short F0[][3],short En[][3]); void put_position(int itime, float stime, int dur); void put_lip(FILE *fp); void save_ttsSequence(FILE *fout); void save_ttsSentence(FILE *fout); void clear_ttsSentence(); void data_write(char *A, int nbits, int n, char *B, FILE *fout); void data_write(short *A, int nbits, int n, char *B, FILE *fout); void data_write(int *A, int nbits, int n, char *B, FILE *fout);};ttsSequence::ttsSequence(int i){ tts_sequence_start_code=0x000000011; ttsSequenceID=i; languageCode=0; genderEnable=0; ageEnable=0; speechRateEnable=0; prosodyEnable=0; videoEnable=0; lipShapeEnable=0; trickModeEnable=0; sentence=NULL; itime=0; iframe=0;}
void ttsSequence::init(){ set_LangEn(G_Lang_v); set_genderEn(G_Gender); set_ageEn(G_Age); set_spRateEn(G_Spch); set_prosodyEn(G_Proso); set_videoEn(G_Video); set_lipEn(G_Lip); set_trickEn(G_Trick);}
void ttsSequence::Sntinit(){ if(genderEnable) { set_gender(G_Gender_v); } if(ageEnable) { set_age(G_Age_v); } if(speechRateEnable) { set_spRate(G_Spch_v); } if(prosodyEnable) { ttsProsody *prosody = new ttsProsody(); sentence->prosody=prosody; set_durEn(G_Proso_d); set_f0En(G_Proso_F0); set_enEn(G_Proso_e); }}void ttsSequence::put_text(char *Text) { sentence->lengthText=strlen(Text); sentence->ttsText=(char *)malloc(strlen(Text)+2); strcpy(sentence->ttsText,Text);
}void ttsSequence::put_prosody(int Nphone, short Sphone[][6],short Dur[],short F0[][3],short En[][3]){ int i, j, k;
ttsProsody *p=sentence->prosody;
p->numberPhonemes=Nphone;
for(i=k=0; i<Nphone; i++)
for(j=0; j<6; j++, k++) if(Sphone[i][j]==0) break;
i=p->phonemeSymbolsLength=k*2;
p->phonemeSymbols=(char *)malloc(i+2);
for(i=k=0; i<Nphone; i++) {
for(j=0; j<6; j++) {
if(Sphone[i][j]==0) break;
p->phonemeSymbols[k++]=(Sphone[i][j] >> 8) & 0xFF;
p->phonemeSymbols[k++]=(Sphone[i][j] & 0xFF);
} }
if(get_durEn()) {
p->durEachPhoneme=(int *)malloc(Nphone*sizeof(int));
for(i=0; i<Nphone; i++) p->durEachPhoneme[i]=Dur[i];
}
if(get_f0En()) {
p->f0ContourEachPhoneme=(int *)malloc(Nphone*3*sizeof(int));
for(i=0; i<Nphone; i++)
for(j=0; j<3; j++) p->f0ContourEachPhoneme[i*3+j]=F0[i][j];
}
if(get_enEn()) {
p->f0ContourEachPhoneme=(int *)malloc(Nphone*3*sizeof(int));
for(i=0; i<Nphone; i++)
for(j=0; j<3; j++) p->f0ContourEachPhoneme[i*3+j]=F0[i][j];
}
}void ttsSequence::put_position(int t0, float stime, int dur){ int i; sentence->startTime=stime; sentence->sentenceDuration=dur; sentence->positionInSentence=0; itime=t0; i=(int)(stime*1000); sentence->offset=i%itime;}void ttsSequence::save_ttsSequence(FILE *fout){ int i; char c; data_write(&tts_sequence_start_code,32,1,YY,fout); data_write(&ttsSequenceID,5,1,YY,fout); data_write(&languageCode,10,1,YY,fout); data_write(&genderEnable,1,1,YY,fout); data_write(&ageEnable,1,1,YY,fout); data_write(&speechRateEnable,1,1,YY,fout);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -