⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 myhmm.c

📁 一个马尔可夫模型的源码
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************************
 *                                                                      *
 *  Program packages 'myHMM' :                                          *
 *                                                                      *
 *  myHMM.c                                                             *
 *   -the main program for myHMM hidden Markov model                    *
 *                                                                      *
 *  Version 0.1                                                         *
 *  Date: 3 May 2003                                                    *
 *                                                                      *
 *  NOTE: This program package is copyrighted in the sense that it      *
 *  may be used for scientific purposes. The package as a whole, or     *
 *  parts thereof, cannot be included or used in any commercial         *
 *  application without written permission granted by its producents.   *
 *  No programs contained in this package may be copied for commercial  *
 *  distribution.                                                       *
 *                                                                      *
 *  All comments concerning this program package may be sent to the     *
 *  e-mail address 'dcslgl@nus.edu.sg'.                                 *
 *                                                                      *
 ************************************************************************/

/*************************************************************************
NAME
     myHMM - train the hidden Markov model

SYNOPSIS
     myHMM -hmm [HMM] [-train trainFile] [-trainedHmm trainedHmmFile]
     [-test testFile] [-tested testedFile] [-mode mode]

DESCRIPTION

     Input:
       required: pre-defined hidden Markov model
       optional: training data, testing data

     Output:
       optional: trained hidden Markov model, tested result

     Global variables:
       N: integer, number of states
       M: integer, number of observations
       T: integer, the length of the longest input sequence
       S: integer, finite set of possible states
       O: integer, finite set of possible observations
       t: double, transition matrix: N x N
       e: integer, emission matrix: N x M
       pi: integer, initial state distribution: 1 x N

       %% A hidden Markov model (HMM) is a five-tuple (S,O,t,e,pi).
       %% Let lambda = {t,e,pi} denote the parameters for a given HMM
       %% with fixed S and O.

       %% There are three basic problems in HMM:
       %% 1) probability of the observation sequence given the HMM model
       %%   -> forward algorithm & backward algorithm
       %% 2) the most probable state path of the observation sequence
       %%   given the HMM model
       %%   -> Viterbi algorithm
       %% 3) Building the model given a training set of sequence
       %%   -> Baum-Welch algorithm

OPTIONS
     -hmm [HMM]
         pre-defined hidden Markov Model

     -train trainFile
         training file

     -trainedHmmFile trainedHmmFile
         trained hidden Markov model

     -test testFile
         testing file

     -tested testedFile
         tested result file

     -mode mode
         mode must be one of 1, 2 or 3. '1' represents TRAINING, '2'
         represents TESTING, and '3' represents TRAINING_TESTING.
*************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <math.h>
#include "tools.h"
#include "myhmm.h"
#include "arg.h"

static char usage[] =
	"myHMM - hidden Markov model\n"
	"command: myHMM -hmm <hmm> [option]\n"
	"Required parameters:\n"
	"  -hmm filename        the file for hmm definition\n"
	"Optional parameters:\n"
	"  -train filename      the file for training\n"
	"  -test filename       the file for testing\n"
	"  -trainedHmm filename the file for trained hmm\n"
	"  -tested filename     the file for tested result\n"
	"  -mode int            the mode for training, testing or both\n"
   "                       1: training, 2: testing, 3: both\n";

/************************************************************************
NAME
     main - main function in the program

DESCRIPTION
     This function extracts parameters from the argument list and perform
     the specified actions by the parameters.

     Input:
       arguments in the command line
	 Output:
	   specified output(s): trained HMM file and/or tesed result file
	 Global variable list:
	   iTrain, iTest, T, trainData, testData, observationDefined,
	   extraSpace.
*************************************************************************/
int main(int argc, char** argv)
{
    char* hmmFile;
    char* trainFile;
    char* testFile;
    char* trainedHmmFile;
    char* testedFile;
    int mode;

    /*---------- get the parameters ----------*/
    if (argc <= 1 || extract_parameter(argc, argv, "-help", OPTION2) || extract_parameter(argc, argv, "-h", OPTION2) )
    {
        printf("%s", usage);
        exit(0);
    }
    hmmFile   = extract_parameter(argc, argv, HMM_FILE, ALWAYS);
    trainFile = extract_parameter(argc, argv, TRAINING_FILE, OPTION);
    testFile  = extract_parameter(argc, argv, TESTING_FILE, OPTION);
    trainedHmmFile = extract_parameter(argc, argv, TRAINED_HMM_FILE, OPTION);
    testedFile    = extract_parameter(argc, argv, TESTED_FILE, OPTION);
    mode = oatoi(extract_parameter(argc, argv, MODE, OPTION), TRAINING);

    /* check whether the mode is consistant with the provided files */
    check_mode(mode, trainFile, testFile);

    /* default values for trainedHmmFile and testedFile */
    if(trainedHmmFile == NULL)
        trainedHmmFile = defTrainedHmmFile;
    if(testedFile == NULL)
        testedFile = defTestedFile;

    /*---------- load HMM ----------*/
    loadHMM(hmmFile);

    /*---------- train the HMM model ----------*/
    if(mode == TRAINING || mode == TRAINING_TESTING)
    {
        /*---------- load training data ----------*/
        iTrain = cal_lines(trainFile);
        T = getLengthOfLongestLine(trainFile) + extraSpace;
        loadSeq(&trainData, trainFile);
        if(observationsDefined == FALSE)
        {
            readObservations(trainData);
        }
        /*---------- initialization ----------*/
        init();
        /*---------- train HMM ----------*/
        baumWelch();
        /*---------- save trained HMM ----------*/
        saveHmm(trainedHmmFile);
    }

    /*---------- test with the HMM model ----------*/
    if(mode == TESTING || mode == TRAINING_TESTING)
    {
        /*---------- 5. load testing data ----------*/
        if(mode == TESTING)
        {
            T = getLengthOfLongestLine(testFile) + extraSpace;
            init();
        }
        iTest = cal_lines(testFile);
        loadSeq(&testData, testFile);
        /*---------- test ----------*/
        test(testedFile);
    }
    /* post-processing */
    post_processing();

    return 0;
}

/************************************************************************
NAME
     post_processing - post processing after the main process

DESCRIPTION
     This function releases the allocated memery for the global variables.

     Input:
	   pointers to global variables.
	 Output:
	   None.
	 Global variable list:
	   transitions, emissions, pi, observations, alpha, beta, trainData,
	   testData.
*************************************************************************/
void post_processing()
{
    int i;

    /* free space for transition matrix */
    if(transitions != NULL)
    {
        for(i=0; i < N; i++)
        {
            if(transitions[i] != NULL) free(transitions[i]);
        }
        free(transitions);
    }
    /* free space for emission matrix */
    if(emissions != NULL)
    {
        for(i=0; i < N; i++)
        {
            if(emissions[i] != NULL) free(emissions[i]);
        }
        free(emissions);
    }
    /* free space for initial distribution vector */
    if(pi != NULL) free(pi);
    /* free space for observation list */
    if(observations != NULL) free(observations);
    /* free space for matrix alpha, used in forward algorithm */
    if(alpha != NULL)
    {
        for(i=0; i < T; i++)
        {
            if(alpha[i] != NULL) free(alpha[i]);
        }
        free(alpha);
    }
    /* free space for matrix beta, used in backward algorithm */
    if(beta != NULL)
    {
        for(i=0; i < T; i++)
        {
            if(beta[i] != NULL) free(beta[i]);
        }
        free(beta);
    }
    /* free space for training data */
    if(trainData != NULL)
    {
        for(i=0; i < iTrain; i++)
        {
            if(trainData[i] != NULL) free(trainData[i]);
        }
        free(trainData);
    }
    /* free space for testing data */
    if(testData != NULL)
    {
        for(i=0; i < iTest; i++)
        {
            if(testData[i] != NULL) free(testData[i]);
        }
        free(testData);
    }
}

/************************************************************************
NAME
     saveHmm - save the trained hidden Markov model in pre-defined format

DESCRIPTION
     This function saves the trained hidden Markov model in pre-defined
	 format.

     Input:
	   trained HMM & output file name
	 Output:
	   output file
	 Global variables:
	   N, M, T, transitions, emissions, pi.
*************************************************************************/
void saveHmm(char *out)
{
    FILE *fp;
    int i, j;

    /* Open for write (will fail if inputfile does not exist) */
    if( (fp  = fopen( out, "w" )) == NULL )
    {
        printf( "The file '%s' was not opened\n", out);
        exit(1);
    }
    fprintf(fp, "%d %d %d\n", N, M, T);
    fprintf(fp, "// Observation list\n");
    fprintf(fp, "O: ");
    for(i=0; i < M; i++)
    {
        fprintf(fp, "%c ", observations[i]);
    }
    fprintf(fp, "\n");
    /* transition matrix */
    fprintf(fp, "// Transition matrix\n");
    for(i=0; i < N; i++)
    {
        for(j=0; j < N; j++)
        {
            fprintf(fp, "%6.4f ", transitions[i][j]);
        }
        fputc('\n', fp);
    }
    /* emission matrix */
    fprintf(fp, "// Emission matrix\n");
    // emission matrix
    for(i=0; i < N; i++)
    {
        for(j=0; j < M; j++)
        {
            fprintf(fp, "%6.4f ", emissions[i][j]);
        }
        fputc('\n', fp);
    }
    /* initial state distribution */
    fprintf(fp, "// Initial state distribution\n");
    for(i=0; i < N; i++)
    {
        fprintf(fp, "%6.4f ", pi[i]);
    }
    fputc('\n', fp);

    fclose( fp );
}

/************************************************************************
NAME
     test - test the test file with given hidden Markov model

DESCRIPTION
     This function tests the test file with given hidden Markov model.

     Input:
	   trained HMM & tested file name
	 Output:
	   tested file
	 Global variables list:
	   T, iTest, testData, extraObservations.
*************************************************************************/
void test(char* testedFile)
{
    FILE *out_fp;
    int i;
    double prob;
    /* the most probable state path of the current observation sequence */
    char *buffer;
    /* the most probable state paths of all the observation sequences */
    double score;

    buffer = (char *) malloc(T * sizeof(char));

    /* Open for write (will fail if inputfile does not exist) */
    if( (out_fp  = fopen( testedFile, "w" )) == NULL )
    {
        printf( "The file '%s' was not opened\n", testedFile);
        exit(1);
    }

    for(i=0; i < iTest; i++)
    {
        prob = forward( testData[i] );
        /* score is equal to the natural logarithm of the probability
           divided by the length */
        score = log(prob) / (double)(strlen(testData[i]) - extraObservations);
        viterbi(testData[i], buffer);
        /* output probability and the most probable path */
        fprintf(out_fp, "%f\n%s\n%s\n", score, testData[i], buffer);
    }
    
    free( buffer );

    fclose( out_fp );
}

/************************************************************************
NAME
     getSymbol - get the symbol corresponding to each state

DESCRIPTION
     This function .. There are only 52 distinct symbols in this function.

     Input:
	   state number
	 Output:
	   corresponding symbol.
	 Global variables list:
	   None.
*************************************************************************/
char getSymbol(int k)
{
    char result;
    
    if(k >= 0 && k < 26)
        result = 'a' + k;
    else if(k >= 26 && k < 52)
        result = 'A' + k - 26;
    else
        result = '-';
    
    return result;
}

/************************************************************************
NAME
     getObservation - get the order of the specified observation

DESCRIPTION
     This function gets the order of the specified observation in the
	 observation list with binary search 

     Input:
	   specified observation
	 Output:
	   the order of the specified observation
	 Global variables list:
	   M.
*************************************************************************/
int getObservation(char ch)
{
    int start, end, middle;
    /* initial value */
    start = 0;
    end = M-1;
    middle= -1;
    //exception: the input observation is not in the observation list
    if(ch < observations[start] || ch > observations[end])
    {
        printf("the input observation is not in the observation list!\n");
        exit(1);
    }

    if(observations[start] == ch)
        return start;
    if(observations[end] == ch)
        return end;
    middle = (start + end)/2;

    /* why middle != start:
        if middle == start, it means end = start + 1, and 'ch' is a
        character which lies between observation 'start' and
        observation 'end' */
    while(observations[middle]!=ch && start < end && middle != start)
    {
        if(observations[middle] < ch)
            start = middle;
        else
            end = middle;

        middle = (start + end) / 2;
    }

    if(observations[middle] != ch)
    {
        printf("The char is not in observation list!\n");
        exit(1);
    }

    return middle;
}


/************************************************************************
NAME
     forward - forward algorithm

DESCRIPTION
     This function calculates the probability of the observation sequence
	 with forward algorithm given the HMM model.

     Input:
	   HMM model & the observation sequence
	 Output:
	   Probability
	 Global variables list:
	   N, alpha, pi, transitions, emissions.
*************************************************************************/
double forward(char *line)
{
    int i, j, t;
    int length = strlen( line );
    /* the order of the real observation in the observation set */
    int iObservation;
    /* transition probability to state i given the observation sequence */
    double sumTransProb;
    double prob;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -