⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 myhmm.c

📁 一个马尔可夫模型的源码
💻 C
📖 第 1 页 / 共 3 页
字号:
        /* if not, exit the program */
        if(M != m)
        {
            printf("the number of observations is inconsistent!\n");
            exit(1);
        }

        observations = (char*)malloc((M+1)*sizeof(char));
        observations[M] = '\0';
        token = strtok( line, seps );
        token = strtok(NULL, seps);
        end=0;
        // read the observations and sort them
        // insertion sort
        while(token != NULL)
        {
            int pos = end;
            while(pos > 0 && observations[pos-1] > token[0])
            {
                observations[pos] = observations[pos-1];
                pos--;
            }
            observations[pos] = token[0];
            end++;
            token = strtok(NULL, seps);
        }
    }
    fgets(line, nBuffer, in_fp); // skip one line

    // Part 3
    if(AllocateDataSpace( &transitions, N, N ) != correctAction)
    {
        printf("allocate memory for transitions error!\n");
        exit(1);
    }
    for(i=0; i < N; i++)
    {
        for(j=0; j < N; j++)
        {
            transitions[i][j] = 0;
        }
    }
    // read transition matrix
    for(i=0; i < N; i++)
    {
        fgets(line, nBuffer, in_fp);
        token = strtok( line, seps );
        j=0;
        while(token != NULL)
        {
            transitions[i][j] = atof(token);
            j++;
            token = strtok(NULL, seps);
        }
    }
    fgets(line, nBuffer, in_fp); // skip one line

    // Part 4
    // read emission matrix
    if(observationsDefined == TRUE)
    {
        if(AllocateDataSpace( &emissions, N, M ) != correctAction)
        {
            printf("allocate memory for emissions error!\n");
            exit(1);
        }
        // initializing the emission matrix
        for(i=0; i < N; i++)
        {
            for(j=0; j < M; j++)
            {
                emissions[i][j] = 0;
            }
        }
        // read emission matrix
        for(i=0; i < N; i++)
        {
            fgets(line, nBuffer, in_fp);
            token = strtok( line, seps );
            j=0;
            while(token != NULL)
            {
                emissions[i][j] = atof(token);
                j++;
                token = strtok(NULL, seps);
            }
        }
    }
    fgets(line, nBuffer, in_fp); // skip one line

    // Part 5
    pi = (double*)malloc(N*sizeof(double));
    for(i=0; i < N; i++)
    {
        pi[i] = 0; // initial value
    }
    // read initial distributions
    fgets(line, nBuffer, in_fp);
    token = strtok( line, seps );
    i=0;
    while(token != NULL)
    {
        pi[i] = atof(token);
        i++;
        token = strtok(NULL, seps);
    }

    fclose(in_fp);
    free(line);
    free(buffer);
}

/************************************************************************
NAME
     loadSeq - load sequences from the specified file

DESCRIPTION
     This function ...

     Input:
	   the specified sequence file name
	 Output:
	   an string array for the input file
	 Global variables list:
	   trainData or testData.
*************************************************************************/
void loadSeq(char***pSeq, char* seqFile)
{
    FILE *in_fp;
    int i;
    char * token;
    int nLines = cal_lines(seqFile);
    int nBuffer = getLengthOfLongestLine(seqFile)+extraSpace;
    char *line;
    char **seq;
    line = (char *) malloc(nBuffer * sizeof(char));

    if(AllocateDataSpaceChar(&seq, nLines, nBuffer) != correctAction)
    {
        printf("allocate memory error\n");
        exit(1);
    }
    /* Open for read (will fail if inputfile does not exist) */
    if( (in_fp  = fopen( seqFile, "r" )) == NULL )
    {
        printf( "The file '%s' was not opened\n", seqFile);
        exit(1);
    }

    for(i=0; i < nLines; i++)
    {
        fgets(seq[i], nBuffer, in_fp);
        // deletet the invalid characters
        token = strtok(seq[i], seps);
        /* if there are errors in the input sequence */
        if(strlen(seq[i]) <= 0)
        {
            printf("the length of the sequence is less than 1 in line %d of file %s!\n", i, seqFile);
            exit(1);
        }
    }
    fclose(in_fp);

    free(line);

    (*pSeq) = seq;
}

/************************************************************************
NAME
     AllocateDataSpaceChar - allocate memory with specified space

DESCRIPTION
     This function ...

     Input:
	   row and col of the space, the pointer
	 Output:
	   correctAction or not
	 Global variables list:
	   None.
*************************************************************************/
int AllocateDataSpaceChar(char ***pData, int row, int col)
{
    int i;
    char** Data;
    Data = (char**)malloc(sizeof(char*) * row);
    SUCCESS( Data );
    for(i=0; i < row; i++)
    {
        Data[ i ] = (char *)malloc(sizeof(char) * col);
        SUCCESS( Data[ i ] );
    }

    (*pData) = Data;
    return correctAction;
}

/************************************************************************
NAME
     readObservations - read observations from the specified data

DESCRIPTION
     This function ...

     Input:
	   specifed data
	 Output:
	   observation list
	 Global variables list:
	   M, iTrain, trainData.
*************************************************************************/
void readObservations(char** trainData)
{
    int nBuffer = strlen(trainData[0]); // initial size of the buffer
    char* buffer;
    char* buffer2;
    int nObserv = 0;
    int length;
    int i, j, k, ii;
    // allocate memory for the buffer
    buffer = (char*) malloc((nBuffer+1)*sizeof(char));

    // initialize that there is one observation in the observation array
    buffer[0] = trainData[0][0];
    nObserv = 1;
    for(i=0; i < iTrain; i++)
    {
        length = strlen(trainData[i]);
        for(j=0; j < length; j++)
        {
            // find the position of the current observation in the observation array
            for(k=0; k < nObserv; k++)
            {
                if(trainData[i][j] <= buffer[k])
                break;
            }
            // if it is a new observation
            // insert it into the observation array with insertion sort
            if(k == nObserv || trainData[i][j] < buffer[k])
            {
                nObserv++;
                // adjust the buffer's size
                if(nObserv == nBuffer)
                {
                    nBuffer = nBuffer + nBuffer;
                    buffer2 = (char*) malloc((nBuffer+1)*sizeof(char));
                    for(ii=0; ii < nObserv; ii++)
                    {            buffer2[ii] = buffer[ii];
                    }
                    free(buffer);
                    buffer = buffer2;
                }

                // insert the new observation into the array
                if(trainData[i][j] < buffer[k])
                {
                    for(ii=nObserv-2; ii >= k; ii--)
                    {
                        buffer[ii+1] = buffer[ii];
                    }
                }
                buffer[k] = trainData[i][j];
            }
        }
    }

    // assign the known observations to the observation array
    M = nObserv;
    observations = (char*)malloc(M*sizeof(char));
    for(i=0; i < M; i++)
        observations[i] = buffer[i];

    free(buffer);
}

/************************************************************************
NAME
     initEmissions - initialize the emission matrix

DESCRIPTION
     If the emission matrix is read from file, it will be normalized.
	 If the emission matrix is initlized at the first time, the emission
	 matrix will be initialized as follows.
	 START state will emit '$' with probability 1 and other observations
	 with probabilities with 0. END state will emit '#' with probabilities
	 with probability 1 and other observations with probabilities with 0.
	 Other states will emit '$' and '#' with probabilities 0, and other
	 observations with uniformly distribution.

     Input:
	   None
	 Output:
	   emission matrix
	 Global variables list:
	   N, M, emissions, extraObservations.
*************************************************************************/
void initEmissions()
{
    double temp;
    int i, j;
    int iObservationStart, iObservationEnd;
    if(observationsDefined == FALSE)
    {
        if(AllocateDataSpace( &emissions, N, M ) != correctAction)
        {
            printf("allocate memory for emissions error!\n");
            exit(1);
        }
        /* START state and END state */
        // emit start observation with probability 1 in START state
        // emit ens   observation with probability 1 in END   state
        for(j=0; j < M; j++)
        {
            emissions[0][j]  = 0;
            emissions[N-1][j] = 0;
        }
        iObservationStart = getObservation('$');
        emissions[0][iObservationStart] = 1;
        iObservationEnd = getObservation('#');
        emissions[N-1][iObservationEnd] = 1;

        /* other states */
        temp = 1.0 / (double)(M - extraObservations);
        for(i=1; i < N-1; i++)
        {
            /* emission matrix */
            for(j=0; j < M; j++)
            {
                emissions[i][j] = temp;
            }
            // other states will not emit the start and end observations
            emissions[i][iObservationStart] = 0;
            emissions[i][iObservationEnd] = 0;
        }
        //  observationsDefined = TRUE;
    }
    else
    {
        for(i=0; i < N; i++)
        {
            double sum = 0;
            /* emission probabilities in one state */
            for(j=0; j < M; j++)
            {
                sum = sum + emissions[i][j];
            }
            // if all of the emission probabilities in one state is very small
            if(sum < 0.1)
            {
                printf("error in the emission matrix!\n");
                exit(1);
            }
			/* normalize the emission probabilities */
            for(j=0; j < M; j++)
            {
                emissions[i][j] = emissions[i][j] / sum;
            }
        }
    }
}

/************************************************************************
NAME
     initTransitions - initialize the transition matrix

DESCRIPTION
     The function normalizes the transition matrix.

     Input:
	   None
	 Output:
	   transition matrix
	 Global variables list:
	   N, transitions.
*************************************************************************/
void initTransitions()
{
    int i, j;
    for(i=0; i < N; i++)
    {
        double sum = 0; // used for normalizing the transition matrix
        /* transition probabilities from one state */
        for(j=0; j < N; j++)
        {
            sum = sum + transitions[i][j];
        }
        // if all of the transition probabilities left one state is very small
        if(sum < 0.1)
        {
            printf("error in the transition matrix!\n");
            exit(1);
        }
		/* normalization */
        for(j=0; j < N; j++)
        {
            transitions[i][j] = transitions[i][j] / sum;
        }
    }
}

/************************************************************************
NAME
     initPi - initialize the initial state distribution

DESCRIPTION
     It is assumed that the model is in the first state with probability 1

     Input:
	   None
	 Output:
	   pi
	 Global variables list:
	   N, pi.
*************************************************************************/
void initPi()
{
    int i;
    pi[0] = 1;
    for(i=1; i < N; i++)
    {
        pi[i] = 0;
    }
}

/************************************************************************
NAME
     init - initialize the hidden Markov model and the space

DESCRIPTION

     Input:
	   None
	 Output:
	   None
	 Global variables list:
	   N, T, alpha, beta.
*************************************************************************/
void init()
{
    initTransitions();
    initEmissions();
    initPi();
    AllocateDataSpace(&alpha, T, N);
    AllocateDataSpace(&beta, T, N);
}

/************************************************************************
NAME
     check_mode - check whether the files are provided for the specified
	 mode.

DESCRIPTION
     This function ... Different files must be provided for different
	 mode.

     TRAINING: training file must be provided
	 TESTING:  testing file must be provided
	 TRAINING_TESTING: both training file and testing file must be provided

     Input:
	   trainFile and testFile pointers
	 Output:
	   None
	 Global variables list:
	   None.
*************************************************************************/
void check_mode(int mode, char* trainFile, char* testFile)
{
    switch(mode)
    {
    case TRAINING:
        if(trainFile == NULL)
        {
            printf("In TRAINING mode, the training file was not provided!\n");
            exit(1);
        }
        break;
    case TESTING:
        if(testFile == NULL)
        {
            printf("In TESTING mode, the testing file was not provided!\n");
            exit(1);
        }
        break;
    case TRAINING_TESTING:
        if(trainFile == NULL || testFile == NULL)
        {
            printf("Training file and/or testing file was not provided\n");
            exit(1);
        }
        break;
    default:
        printf("The mode is invalid! Please read the usage message!\n\n");
        printf("%s", usage);
        exit(1);
        break;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -