📄 phmm.pl
字号:
#!C:/Perl/bin/perl.exe
# 从AIP。txt中读取经clustalx 2.0对齐的序列,将其存储于content数组中
#***********************************#
#while (defined($line=<AIP>)){ #
# $content[$i]=$line; #
# $i+=1; #
# # print "$line\n"; #
# # str2state($line); #
#} 等同于@content=<AIP>; #
#***********************************#
# 读取文件的对齐序列存放于数组content中
open(AIP,"./AIP.txt")||die "$!";
@content=<AIP>;
close(AIP);
# 判断在对齐结果中那些位置是Match state,哪些是insert state
# 算法:某个位置的空位("-")数大于总序列数的一半时则认为是insert state,否则为match state
#@state 由于存放各位置的状态,$mstate存放状态数即match state的长度
$mstate=0;
for ($i=0;$i<length($content[1])-1;$i++){
$num_gaps=0;
for ($j=0;$j<scalar(@content);$j++){
$tempchar=substr($content[$j],$i,1);
if ($tempchar eq "-"){
$num_gaps+=1;
}
}
if ($num_gaps>=scalar(@content)/2){
$state[$i]="I".$mstate;
# print "$i\tI\t$state[$i]\t$num_gaps\n";
}
else{
$mstate+=1;
$state[$i]="M".$mstate;
# print "$i\tM\t$state[$i]\t$num_gaps\n";
}
}
#seqstate 存放序列的状态序列
for ($i=0;$i<scalar(@content);$i++){
# print "$content[$i]";
$seqstate[$i]=str2state($content[$i]);
# print "$seqstate[$i]\n";
# print seqsta "$seqstate[$i]\n"
}
#初始化转移矩阵
$mstate=7;
$Mstate[0]{'name'}="M0";
$Mstate[0]{'next1'}[0]="M1";
$Mstate[0]{'next1'}[1]=0;
$Mstate[0]{'next2'}[0]="D1";
$Mstate[0]{'next2'}[1]=0;
$Mstate[0]{'next3'}[0]="I0";
$Mstate[0]{'next3'}[1]=0;
$Istate[0]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Istate[0]{'name'}="I0";
$Istate[0]{'next1'}[0]="M1";
$Istate[0]{'next1'}[1]=0;
$Istate[0]{'next2'}[0]="D1";
$Istate[0]{'next2'}[1]=0;
$Istate[0]{'next3'}[0]="I0";
$Istate[0]{'next3'}[1]=0;
$Mstate[$mstate]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Mstate[$mstate]{'name'}="M".$mstate;
$Mstate[$mstate]{'next1'}[0]="E";
$Mstate[$mstate]{'next1'}[1]=0;
$Mstate[$mstate]{'next3'}[0]="I".$mstate;
$Mstate[$mstate]{'next3'}[1]=0;
$Istate[$mstate]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Istate[$mstate]{'name'}="I".$mstate;
$Istate[$mstate]{'next1'}[0]="E";
$Istate[$mstate]{'next1'}[1]=0;
$Istate[$mstate]{'next3'}[0]="I".$mstate;
$Istate[$mstate]{'next3'}[1]=0;
$Dstate[$mstate]{'name'}="D".$mstate;
$Dstate[$mstate]{'next1'}[0]="E";
$Dstate[$mstate]{'next1'}[1]=0;
$Dstate[$mstate]{'next3'}[0]="I".$mstate;
$Dstate[$mstate]{'next3'}[1]=0;
$Mstate[$mstate+1]{'name'}="M8";
for ($i=1;$i<$mstate;$i++){
$Mstate[$i]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Mstate[$i]{'name'}="M".$i;
$Mstate[$i]{'next1'}[0]="M".($i+1);
$Mstate[$i]{'next1'}[1]=0;
$Mstate[$i]{'next2'}[0]="D".($i+1);
$Mstate[$i]{'next2'}[1]=0;
$Mstate[$i]{'next3'}[0]="I".$i;
$Mstate[$i]{'next3'}[1]=0;
$Istate[$i]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Istate[$i]{'name'}="I".$i;
$Istate[$i]{'next1'}[0]="M".($i+1);
$Istate[$i]{'next1'}[1]=0;
$Istate[$i]{'next2'}[0]="D".($i+1);
$Istate[$i]{'next2'}[1]=0;
$Istate[$i]{'next3'}[0]="I".$i;
$Istate[$i]{'next3'}[1]=0;
$Dstate[$i]{'name'}="D".$i;
$Dstate[$i]{'next1'}[0]="M".($i+1);
$Dstate[$i]{'next1'}[1]=0;
$Dstate[$i]{'next2'}[0]="D".($i+1);
$Dstate[$i]{'next2'}[1]=0;
$Dstate[$i]{'next3'}[0]="I".$i;
$Dstate[$i]{'next3'}[1]=0;
}
# 统计各个状态之间的转移概率
for ($i=0;$i<scalar(@content);$i++){
campstate($seqstate[$i]);
}
for ($i=0;$i<=$mstate+1;$i++){
$M=$Mstate[$i]{'next1'}[1]+$Mstate[$i]{'next2'}[1]+$Mstate[$i]{'next3'}[1];
if ($M!=0) {
$Mstate[$i]{'next1'}[1]/=$M;
$Mstate[$i]{'next2'}[1]/=$M;
$Mstate[$i]{'next3'}[1]/=$M;
}
$I=$Istate[$i]{'next1'}[1]+$Istate[$i]{'next2'}[1]+$Istate[$i]{'next3'}[1];
if ($I!=0) {
$Istate[$i]{'next1'}[1]/=$I;
$Istate[$i]{'next2'}[1]/=$I;
$Istate[$i]{'next3'}[1]/=$I;
}
$D=$Dstate[$i]{'next1'}[1]+$Dstate[$i]{'next2'}[1]+$Dstate[$i]{'next3'}[1];
if ($D!=0) {
$Dstate[$i]{'next1'}[1]/=$D;
$Dstate[$i]{'next2'}[1]/=$D;
$Dstate[$i]{'next3'}[1]/=$D;
}
}
=pod 输出中间参数
for ($i=0;$i<=$mstate+1;$i++){
print $Mstate[$i]{'name'}."\t";
print $Mstate[$i]{'next1'}[0]."\t";
print $Mstate[$i]{'next1'}[1]."\t";
print $Mstate[$i]{'next2'}[0]."\t";
print $Mstate[$i]{'next2'}[1]."\t";
print $Mstate[$i]{'next3'}[0]."\t";
print $Mstate[$i]{'next3'}[1]."\t\n";
print $Istate[$i]{'name'}."\t";
print $Istate[$i]{'next1'}[0]."\t";
print $Istate[$i]{'next1'}[1]."\t";
print $Istate[$i]{'next2'}[0]."\t";
print $Istate[$i]{'next2'}[1]."\t";
print $Istate[$i]{'next3'}[0]."\t";
print $Istate[$i]{'next3'}[1]."\t\n";
print $Dstate[$i]{'name'}."\t";
print $Dstate[$i]{'next1'}[0]."\t";
print $Dstate[$i]{'next1'}[1]."\t";
print $Dstate[$i]{'next2'}[0]."\t";
print $Dstate[$i]{'next2'}[1]."\t";
print $Dstate[$i]{'next3'}[0]."\t";
print $Dstate[$i]{'next3'}[1]."\t\n";
}
=cut
# 计算各状态的发出概率
# print $content[0];
for ($i=0;$i<length($content[1])-1;$i++){
for ($j=0;$j<scalar(@content);$j++){
$tempchar=substr($content[$j],$i,1);
if ($tempchar ne "-"){
if (substr($state[$i],0,1) eq "I"){
$Istate[substr($state[$i],1,length($state[$i])-1)]{$tempchar}+=1;
}
elsif (substr($state[$i],0,1) eq "M"){
$Mstate[substr($state[$i],1,length($state[$i])-1)]{$tempchar}+=1;
}
}
}
}
for ($i=0;$i<=$mstate;$i++){
$emitm=$Mstate[$i]{"A"}+$Mstate[$i]{"C"}+$Mstate[$i]{"D"}+$Mstate[$i]{"E"}+$Mstate[$i]{"F"}+$Mstate[$i]{"G"}+$Mstate[$i]{"H"}+$Mstate[$i]{"I"}+$Mstate[$i]{"K"}+$Mstate[$i]{"L"}+$Mstate[$i]{"M"}+$Mstate[$i]{"N"}+$Mstate[$i]{"P"}+$Mstate[$i]{"Q"}+$Mstate[$i]{"R"}+$Mstate[$i]{"S"}+$Mstate[$i]{"T"}+$Mstate[$i]{"V"}+$Mstate[$i]{"W"}+$Mstate[$i]{"Y"}+$Mstate[$i]{"X"};
if ($emitm!=0){
$Mstate[$i]{"A"}/=$emitm;
$Mstate[$i]{"C"}/=$emitm;
$Mstate[$i]{"D"}/=$emitm;
$Mstate[$i]{"E"}/=$emitm;
$Mstate[$i]{"F"}/=$emitm;
$Mstate[$i]{"G"}/=$emitm;
$Mstate[$i]{"H"}/=$emitm;
$Mstate[$i]{"I"}/=$emitm;
$Mstate[$i]{"K"}/=$emitm;
$Mstate[$i]{"L"}/=$emitm;
$Mstate[$i]{"M"}/=$emitm;
$Mstate[$i]{"N"}/=$emitm;
$Mstate[$i]{"P"}/=$emitm;
$Mstate[$i]{"Q"}/=$emitm;
$Mstate[$i]{"R"}/=$emitm;
$Mstate[$i]{"S"}/=$emitm;
$Mstate[$i]{"T"}/=$emitm;
$Mstate[$i]{"V"}/=$emitm;
$Mstate[$i]{"W"}/=$emitm;
$Mstate[$i]{"Y"}/=$emitm;
$Mstate[$i]{"X"}/=$emitm;
}
$emiti=$Istate[$i]{"A"}+$Istate[$i]{"C"}+$Istate[$i]{"D"}+$Istate[$i]{"E"}+$Istate[$i]{"F"}+$Istate[$i]{"G"}+$Istate[$i]{"H"}+$Istate[$i]{"I"}+$Istate[$i]{"K"}+$Istate[$i]{"L"}+$Istate[$i]{"M"}+$Istate[$i]{"N"}+$Istate[$i]{"P"}+$Istate[$i]{"Q"}+$Istate[$i]{"R"}+$Istate[$i]{"S"}+$Istate[$i]{"T"}+$Istate[$i]{"V"}+$Istate[$i]{"W"}+$Istate[$i]{"Y"}+$Istate[$i]{"X"};
if ($emiti!=0){
$Istate[$i]{"A"}/=$emiti;
$Istate[$i]{"C"}/=$emiti;
$Istate[$i]{"D"}/=$emiti;
$Istate[$i]{"E"}/=$emiti;
$Istate[$i]{"F"}/=$emiti;
$Istate[$i]{"G"}/=$emiti;
$Istate[$i]{"H"}/=$emiti;
$Istate[$i]{"I"}/=$emiti;
$Istate[$i]{"K"}/=$emiti;
$Istate[$i]{"L"}/=$emiti;
$Istate[$i]{"M"}/=$emiti;
$Istate[$i]{"N"}/=$emiti;
$Istate[$i]{"P"}/=$emiti;
$Istate[$i]{"Q"}/=$emiti;
$Istate[$i]{"R"}/=$emiti;
$Istate[$i]{"S"}/=$emiti;
$Istate[$i]{"T"}/=$emiti;
$Istate[$i]{"V"}/=$emiti;
$Istate[$i]{"W"}/=$emiti;
$Istate[$i]{"Y"}/=$emiti;
$Istate[$i]{"X"}/=$emiti;
}
}
=p 输出中间参数
for ($i=0;$i<=$mstate;$i++){
print "\$Mstate[$i]{\"A\"}=".$Mstate[$i]{"A"}."\n";
print "\$Mstate[$i]{\"C\"}=".$Mstate[$i]{"C"}."\n";
print "\$Mstate[$i]{\"D\"}=".$Mstate[$i]{"D"}."\n";
print "\$Mstate[$i]{\"E\"}=".$Mstate[$i]{"E"}."\n";
print "\$Mstate[$i]{\"F\"}=".$Mstate[$i]{"F"}."\n";
print "\$Mstate[$i]{\"G\"}=".$Mstate[$i]{"G"}."\n";
print "\$Mstate[$i]{\"H\"}=".$Mstate[$i]{"H"}."\n";
print "\$Mstate[$i]{\"I\"}=".$Mstate[$i]{"I"}."\n";
print "\$Mstate[$i]{\"K\"}=".$Mstate[$i]{"K"}."\n";
print "\$Mstate[$i]{\"L\"}=".$Mstate[$i]{"L"}."\n";
print "\$Mstate[$i]{\"M\"}=".$Mstate[$i]{"M"}."\n";
print "\$Mstate[$i]{\"N\"}=".$Mstate[$i]{"N"}."\n";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -