📄 program.cs
字号:
//假定字符集为'A'-'Z',但是适用于很多场合,比如将要做的多序列比对
//初始化的时候需要将序列的符号集映射一下,输出的时候映射回来,会比较方便,未实现
using System;
using System.Collections.Generic;
using System.Text;
struct HMMType
{
public int N;
public int M;
public double[] Pi;
public double[,] A;
public double[,] B;
public HMMType(int n, int m)
{
this.N = n;
this.M = m;
Pi = new double[n];
A = new double[n, n];
B = new double[n, m];
}
public void SetAverage()
{
for (int i = 0; i < N; i++)
{
Pi[i] = 1.0 / N;
}
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
A[i, j] = 1.0 / N / N;
}
}
for (int i = 0; i < N; i++)
{
for (int j = 0; j < M; j++)
{
B[i, j] = 1.0 / M;
}
}
}
}
struct ScoringForwardReturnType
{
public double Probability;
public double[,] alpha;
public ScoringForwardReturnType(int T, int n)
{
this.alpha = new double[T, n];
this.Probability = 0;
}
}
struct ScoringBackwardReturnType
{
public double Probability;
public double[,] beta;
public ScoringBackwardReturnType(int T, int n)
{
this.beta = new double[T, n];
this.Probability = 0;
}
}
struct SequenceType
{
public char[] s;
public SequenceType(int n)
{
s = new char[n];
}
}
struct StateSequenceType
{
public int[] s;
public StateSequenceType(int n)
{
s = new int[n];
}
}
class Program
{
const double eps = 0.1;
static void Main(string[] args)
{
}
static ScoringForwardReturnType ScoringForward(ref HMMType H, ref SequenceType S)
{
double[,] alpha = new double[S.s.Length, H.N];
//init
for (int i = 0; i < H.N; i++)
{
alpha[0, i] = H.Pi[i] * H.B[i, S.s[0] - 'A'];
}
//recurive
for (int t = 1; t < S.s.Length; t++)
{
for (int j = 0; j < H.N; j++)
{
double temp = 0;
for (int i = 0; i < H.N; i++)
{
temp += alpha[t, i] * H.A[i, j];
}
alpha[t, j] = temp * H.B[j, S.s[t] - 'A'];
}
}
//calculate result
ScoringForwardReturnType Res = new ScoringForwardReturnType(S.s.Length, H.N);
for (int i = 0; i < H.N; i++)
{
Res.Probability += alpha[S.s.Length - 1, i];
}
for (int i = 0; i < S.s.Length; i++)
{
for (int j = 0; j < H.N; j++)
{
Res.alpha[i, j] = alpha[i, j];
}
}
return Res;
}
static ScoringBackwardReturnType ScoringBackward(ref HMMType H, ref SequenceType S)
{
double[,] beta = new double[S.s.Length, H.N];
for (int i = 0; i < H.N; i++)
{
beta[S.s.Length - 1, i] = 1;
}
for (int t = S.s.Length - 2; t >= 0; t--)
{
for (int i = 0; i < H.N; i++)
{
for (int j = 0; j < H.N; j++)
{
beta[t, i] += H.A[i, j] * H.B[j, S.s[t + 1] - 'A'] * beta[t + 1, j];
}
}
}
ScoringBackwardReturnType Res = new ScoringBackwardReturnType(S.s.Length, H.N);
for (int i = 0; i < H.N; i++)
{
Res.Probability += H.Pi[i] * H.B[i, S.s[0] - 'A'] * beta[0, i];
}
for (int i = 0; i < S.s.Length; i++)
{
for (int j = 0; j < H.N; j++)
{
Res.beta[i, j] = beta[i, j];
}
}
return Res;
}
static StateSequenceType Alignment(ref HMMType H, ref SequenceType S)
{
StateSequenceType Res = new StateSequenceType(S.s.Length);
double[,] delta = new double[S.s.Length, H.N];
int[,] phi = new int[S.s.Length, H.N];
for (int i = 0; i < H.N; i++)
{
delta[0, i] = H.Pi[i] * H.B[i, S.s[0] - 'A'];
phi[0, i] = 0;
}
for (int t = 1; t < S.s.Length; t++)
{
for (int i = 0; i < H.N; i++)
{
double MaxValue = double.MinValue;
int MaxValueState = 0;
for (int j = 0; j < H.N; j++)
{
if (delta[t - 1, j] * H.A[j, i] > MaxValue)
{
MaxValue = delta[t - 1, j] * H.A[j, i];
MaxValueState = j;
}
}
phi[t, i] = MaxValueState;
}
}
double ResProbability = double.MinValue;
for (int i = 0; i < H.N; i++)
{
if (ResProbability < delta[S.s.Length - 1, i])
{
ResProbability = delta[S.s.Length - 1, i];
Res.s[S.s.Length - 1] = i;
}
}
for (int i = S.s.Length - 2; i >= 0; i--)
{
Res.s[i] = phi[i + 1, Res.s[i + 1]];
}
return Res;
}
static void UpdataPi(ref HMMType H, ref double[,] gamma)
{
for (int i = 0; i < H.N; i++)
{
H.Pi[i] = gamma[0, i];
}
}
static void UpdataA(int T, ref HMMType H, ref double[,] gamma, ref double[,,] xi)
{
for (int i = 0; i < H.N; i++)
{
for (int j = 0; j < H.N; j++)
{
double SumTop = 0;
double SumBotton = 0;
for (int k = 0; k < T - 1; k++)
{
SumTop += xi[k, i, j];
SumBotton += gamma[k, i];
}
H.A[i, j] = SumTop / SumBotton;
}
}
}
static void UpdataB(ref HMMType H, ref double[,] gamma, ref double[,,] xi, ref SequenceType S)
{
for (int i = 0; i < H.N; i++)
{
for (int k = 0; k < H.M; k++)
{
double SumTop = 0;
double SumBotton = 0;
for (int t = 0; t < S.s.Length; t++)
{
if (S.s[t] - 'A' == k)
{
SumTop += gamma[t, i];
}
SumBotton += gamma[t, i];
}
}
}
}
static void CalcGamma(int T, int N, ref double[,] gamma, ref double[,] alpha, ref double[,] beta)
{
for (int t = 0; t < T; t++)
{
for (int i = 0; i < N; i++)
{
double SumBotton = 0;
for (int j = 0; j < N; j++)
{
SumBotton += alpha[t, j] * beta[t, j];
}
gamma[t, i] = alpha[t, i] * beta[t, i] / SumBotton;
}
}
}
static void CalcXi(ref HMMType H, int T, ref double[, ,] xi, ref double[,] alpha, ref double[,] beta, ref SequenceType S)
{
for (int t = 0; t < T - 1; t++)
{
for (int i = 0; i < H.N; i++)
{
for (int j = 0; j < H.N; j++)
{
double TopValue = alpha[t, i] * H.A[i, j] * H.B[j, S.s[t + 1] - 'A'] * beta[t + 1, j];
double SumBotton = 0;
for (int k = 0; k < H.N; k++)
{
SumBotton += alpha[t, j] * beta[t, j];
}
xi[t, i, j] = TopValue / SumBotton;
}
}
}
}
static bool CheckOK(ref HMMType H1, ref HMMType H2)
{
double sum = 0;
for (int i = 0; i < H1.N; i++)
{
sum += Math.Abs(H1.Pi[i] - H2.Pi[i]);
}
for (int i = 0; i < H1.N; i++)
{
for (int j = 0; j < H2.N; j++)
{
sum += Math.Abs(H1.A[i, j] - H2.A[i, j]);
}
}
for (int i = 0; i < H1.N; i++)
{
for (int j = 0; j < H1.M; j++)
{
sum += Math.Abs(H1.B[i, j] - H2.B[i, j]);
}
}
return sum < eps;
}
static HMMType Training(ref SequenceType S)
{
int N = S.s.Length;
int M = 0;
int[] Cnt = new int[26];
for (int i = 0; i < S.s.Length; i++)
{
Cnt[S.s[i] - 'A']++;
}
for (int i = 0; i < 26; i++)
{
if (Cnt[i] > 0)
{
M++;
}
}
HMMType Res = new HMMType(N, M);
Res.SetAverage();
double[,] gamma = new double[S.s.Length, N];
double[,,] xi = new double[S.s.Length, N, N];
do
{
ScoringForwardReturnType ScoringForwardRes = ScoringForward(ref Res, ref S);
ScoringBackwardReturnType ScoringBackwardRes = ScoringBackward(ref Res, ref S);
CalcGamma(S.s.Length, N, ref gamma, ref ScoringForwardRes.alpha, ref ScoringBackwardRes.beta);
CalcXi(ref Res, S.s.Length, ref xi, ref ScoringForwardRes.alpha, ref ScoringBackwardRes.beta, ref S);
HMMType NextHMM = new HMMType(N, M);
UpdataPi(ref NextHMM, ref gamma);
UpdataA(S.s.Length, ref NextHMM, ref gamma, ref xi);
UpdataB(ref NextHMM, ref gamma, ref xi, ref S);
if (CheckOK(ref NextHMM, ref Res))
{
Res = NextHMM;
break;
}
else
{
Res = NextHMM;
}
} while (true);
return Res;
}
static void ProfileHMM(SequenceType[] Ss)
{
int TotalLength = 0;
for (int i = 0; i < Ss.Length; i++)
{
TotalLength += Ss[i].s.Length;
}
int[] Cnt = new int[26];
int M = 0;
for (int i = 0; i < Ss.Length; i++)
{
for (int j = 0; j < Ss[i].s.Length; j++)
{
Cnt[Ss[i].s[j] - 'A']++;
}
}
for (int i = 0; i < 26; i++)
{
if (Cnt[i] > 0)
{
M++;
}
}
HMMType PHMM = new HMMType(TotalLength / Ss.Length, M);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -