📄 myhmm.c
字号:
/* if not, exit the program */
if(M != m)
{
printf("the number of observations is inconsistent!\n");
exit(1);
}
observations = (char*)malloc((M+1)*sizeof(char));
observations[M] = '\0';
token = strtok( line, seps );
token = strtok(NULL, seps);
end=0;
// read the observations and sort them
// insertion sort
while(token != NULL)
{
int pos = end;
while(pos > 0 && observations[pos-1] > token[0])
{
observations[pos] = observations[pos-1];
pos--;
}
observations[pos] = token[0];
end++;
token = strtok(NULL, seps);
}
}
fgets(line, nBuffer, in_fp); // skip one line
// Part 3
if(AllocateDataSpace( &transitions, N, N ) != correctAction)
{
printf("allocate memory for transitions error!\n");
exit(1);
}
for(i=0; i < N; i++)
{
for(j=0; j < N; j++)
{
transitions[i][j] = 0;
}
}
// read transition matrix
for(i=0; i < N; i++)
{
fgets(line, nBuffer, in_fp);
token = strtok( line, seps );
j=0;
while(token != NULL)
{
transitions[i][j] = atof(token);
j++;
token = strtok(NULL, seps);
}
}
fgets(line, nBuffer, in_fp); // skip one line
// Part 4
// read emission matrix
if(observationsDefined == TRUE)
{
if(AllocateDataSpace( &emissions, N, M ) != correctAction)
{
printf("allocate memory for emissions error!\n");
exit(1);
}
// initializing the emission matrix
for(i=0; i < N; i++)
{
for(j=0; j < M; j++)
{
emissions[i][j] = 0;
}
}
// read emission matrix
for(i=0; i < N; i++)
{
fgets(line, nBuffer, in_fp);
token = strtok( line, seps );
j=0;
while(token != NULL)
{
emissions[i][j] = atof(token);
j++;
token = strtok(NULL, seps);
}
}
}
fgets(line, nBuffer, in_fp); // skip one line
// Part 5
pi = (double*)malloc(N*sizeof(double));
for(i=0; i < N; i++)
{
pi[i] = 0; // initial value
}
// read initial distributions
fgets(line, nBuffer, in_fp);
token = strtok( line, seps );
i=0;
while(token != NULL)
{
pi[i] = atof(token);
i++;
token = strtok(NULL, seps);
}
fclose(in_fp);
free(line);
free(buffer);
}
/************************************************************************
NAME
loadSeq - load sequences from the specified file
DESCRIPTION
This function ...
Input:
the specified sequence file name
Output:
an string array for the input file
Global variables list:
trainData or testData.
*************************************************************************/
void loadSeq(char***pSeq, char* seqFile)
{
FILE *in_fp;
int i;
char * token;
int nLines = cal_lines(seqFile);
int nBuffer = getLengthOfLongestLine(seqFile)+extraSpace;
char *line;
char **seq;
line = (char *) malloc(nBuffer * sizeof(char));
if(AllocateDataSpaceChar(&seq, nLines, nBuffer) != correctAction)
{
printf("allocate memory error\n");
exit(1);
}
/* Open for read (will fail if inputfile does not exist) */
if( (in_fp = fopen( seqFile, "r" )) == NULL )
{
printf( "The file '%s' was not opened\n", seqFile);
exit(1);
}
for(i=0; i < nLines; i++)
{
fgets(seq[i], nBuffer, in_fp);
// deletet the invalid characters
token = strtok(seq[i], seps);
/* if there are errors in the input sequence */
if(strlen(seq[i]) <= 0)
{
printf("the length of the sequence is less than 1 in line %d of file %s!\n", i, seqFile);
exit(1);
}
}
fclose(in_fp);
free(line);
(*pSeq) = seq;
}
/************************************************************************
NAME
AllocateDataSpaceChar - allocate memory with specified space
DESCRIPTION
This function ...
Input:
row and col of the space, the pointer
Output:
correctAction or not
Global variables list:
None.
*************************************************************************/
int AllocateDataSpaceChar(char ***pData, int row, int col)
{
int i;
char** Data;
Data = (char**)malloc(sizeof(char*) * row);
SUCCESS( Data );
for(i=0; i < row; i++)
{
Data[ i ] = (char *)malloc(sizeof(char) * col);
SUCCESS( Data[ i ] );
}
(*pData) = Data;
return correctAction;
}
/************************************************************************
NAME
readObservations - read observations from the specified data
DESCRIPTION
This function ...
Input:
specifed data
Output:
observation list
Global variables list:
M, iTrain, trainData.
*************************************************************************/
void readObservations(char** trainData)
{
int nBuffer = strlen(trainData[0]); // initial size of the buffer
char* buffer;
char* buffer2;
int nObserv = 0;
int length;
int i, j, k, ii;
// allocate memory for the buffer
buffer = (char*) malloc((nBuffer+1)*sizeof(char));
// initialize that there is one observation in the observation array
buffer[0] = trainData[0][0];
nObserv = 1;
for(i=0; i < iTrain; i++)
{
length = strlen(trainData[i]);
for(j=0; j < length; j++)
{
// find the position of the current observation in the observation array
for(k=0; k < nObserv; k++)
{
if(trainData[i][j] <= buffer[k])
break;
}
// if it is a new observation
// insert it into the observation array with insertion sort
if(k == nObserv || trainData[i][j] < buffer[k])
{
nObserv++;
// adjust the buffer's size
if(nObserv == nBuffer)
{
nBuffer = nBuffer + nBuffer;
buffer2 = (char*) malloc((nBuffer+1)*sizeof(char));
for(ii=0; ii < nObserv; ii++)
{ buffer2[ii] = buffer[ii];
}
free(buffer);
buffer = buffer2;
}
// insert the new observation into the array
if(trainData[i][j] < buffer[k])
{
for(ii=nObserv-2; ii >= k; ii--)
{
buffer[ii+1] = buffer[ii];
}
}
buffer[k] = trainData[i][j];
}
}
}
// assign the known observations to the observation array
M = nObserv;
observations = (char*)malloc(M*sizeof(char));
for(i=0; i < M; i++)
observations[i] = buffer[i];
free(buffer);
}
/************************************************************************
NAME
initEmissions - initialize the emission matrix
DESCRIPTION
If the emission matrix is read from file, it will be normalized.
If the emission matrix is initlized at the first time, the emission
matrix will be initialized as follows.
START state will emit '$' with probability 1 and other observations
with probabilities with 0. END state will emit '#' with probabilities
with probability 1 and other observations with probabilities with 0.
Other states will emit '$' and '#' with probabilities 0, and other
observations with uniformly distribution.
Input:
None
Output:
emission matrix
Global variables list:
N, M, emissions, extraObservations.
*************************************************************************/
void initEmissions()
{
double temp;
int i, j;
int iObservationStart, iObservationEnd;
if(observationsDefined == FALSE)
{
if(AllocateDataSpace( &emissions, N, M ) != correctAction)
{
printf("allocate memory for emissions error!\n");
exit(1);
}
/* START state and END state */
// emit start observation with probability 1 in START state
// emit ens observation with probability 1 in END state
for(j=0; j < M; j++)
{
emissions[0][j] = 0;
emissions[N-1][j] = 0;
}
iObservationStart = getObservation('$');
emissions[0][iObservationStart] = 1;
iObservationEnd = getObservation('#');
emissions[N-1][iObservationEnd] = 1;
/* other states */
temp = 1.0 / (double)(M - extraObservations);
for(i=1; i < N-1; i++)
{
/* emission matrix */
for(j=0; j < M; j++)
{
emissions[i][j] = temp;
}
// other states will not emit the start and end observations
emissions[i][iObservationStart] = 0;
emissions[i][iObservationEnd] = 0;
}
// observationsDefined = TRUE;
}
else
{
for(i=0; i < N; i++)
{
double sum = 0;
/* emission probabilities in one state */
for(j=0; j < M; j++)
{
sum = sum + emissions[i][j];
}
// if all of the emission probabilities in one state is very small
if(sum < 0.1)
{
printf("error in the emission matrix!\n");
exit(1);
}
/* normalize the emission probabilities */
for(j=0; j < M; j++)
{
emissions[i][j] = emissions[i][j] / sum;
}
}
}
}
/************************************************************************
NAME
initTransitions - initialize the transition matrix
DESCRIPTION
The function normalizes the transition matrix.
Input:
None
Output:
transition matrix
Global variables list:
N, transitions.
*************************************************************************/
void initTransitions()
{
int i, j;
for(i=0; i < N; i++)
{
double sum = 0; // used for normalizing the transition matrix
/* transition probabilities from one state */
for(j=0; j < N; j++)
{
sum = sum + transitions[i][j];
}
// if all of the transition probabilities left one state is very small
if(sum < 0.1)
{
printf("error in the transition matrix!\n");
exit(1);
}
/* normalization */
for(j=0; j < N; j++)
{
transitions[i][j] = transitions[i][j] / sum;
}
}
}
/************************************************************************
NAME
initPi - initialize the initial state distribution
DESCRIPTION
It is assumed that the model is in the first state with probability 1
Input:
None
Output:
pi
Global variables list:
N, pi.
*************************************************************************/
void initPi()
{
int i;
pi[0] = 1;
for(i=1; i < N; i++)
{
pi[i] = 0;
}
}
/************************************************************************
NAME
init - initialize the hidden Markov model and the space
DESCRIPTION
Input:
None
Output:
None
Global variables list:
N, T, alpha, beta.
*************************************************************************/
void init()
{
initTransitions();
initEmissions();
initPi();
AllocateDataSpace(&alpha, T, N);
AllocateDataSpace(&beta, T, N);
}
/************************************************************************
NAME
check_mode - check whether the files are provided for the specified
mode.
DESCRIPTION
This function ... Different files must be provided for different
mode.
TRAINING: training file must be provided
TESTING: testing file must be provided
TRAINING_TESTING: both training file and testing file must be provided
Input:
trainFile and testFile pointers
Output:
None
Global variables list:
None.
*************************************************************************/
void check_mode(int mode, char* trainFile, char* testFile)
{
switch(mode)
{
case TRAINING:
if(trainFile == NULL)
{
printf("In TRAINING mode, the training file was not provided!\n");
exit(1);
}
break;
case TESTING:
if(testFile == NULL)
{
printf("In TESTING mode, the testing file was not provided!\n");
exit(1);
}
break;
case TRAINING_TESTING:
if(trainFile == NULL || testFile == NULL)
{
printf("Training file and/or testing file was not provided\n");
exit(1);
}
break;
default:
printf("The mode is invalid! Please read the usage message!\n\n");
printf("%s", usage);
exit(1);
break;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -