⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 phmm.pl

📁 一个用于蛋白质序列分类的profile hmm的Perl代码
💻 PL
📖 第 1 页 / 共 2 页
字号:
#!C:/Perl/bin/perl.exe
# 从AIP。txt中读取经clustalx 2.0对齐的序列,将其存储于content数组中
#***********************************#
#while (defined($line=<AIP>)){      #
#    $content[$i]=$line;            #
#    $i+=1;                         #
#    # print "$line\n";             #
#    # str2state($line);            #
#} 等同于@content=<AIP>;         	  #
#***********************************#

# 读取文件的对齐序列存放于数组content中
open(AIP,"./AIP.txt")||die "$!";
@content=<AIP>;
close(AIP);

# 判断在对齐结果中那些位置是Match state,哪些是insert state
# 算法:某个位置的空位("-")数大于总序列数的一半时则认为是insert state,否则为match state
#@state 由于存放各位置的状态,$mstate存放状态数即match state的长度
$mstate=0;
for ($i=0;$i<length($content[1])-1;$i++){
    $num_gaps=0;
    for ($j=0;$j<scalar(@content);$j++){
       $tempchar=substr($content[$j],$i,1);
       if ($tempchar eq "-"){
          $num_gaps+=1;
       }
    }
    if ($num_gaps>=scalar(@content)/2){
       $state[$i]="I".$mstate;
       # print "$i\tI\t$state[$i]\t$num_gaps\n";
       
    }
    else{
       $mstate+=1;
       $state[$i]="M".$mstate;
       # print "$i\tM\t$state[$i]\t$num_gaps\n";
       
    }
}

#seqstate 存放序列的状态序列
for ($i=0;$i<scalar(@content);$i++){
    # print "$content[$i]";
    $seqstate[$i]=str2state($content[$i]);
    # print "$seqstate[$i]\n";
    # print seqsta "$seqstate[$i]\n"
}

#初始化转移矩阵
$mstate=7;
$Mstate[0]{'name'}="M0";
$Mstate[0]{'next1'}[0]="M1";
$Mstate[0]{'next1'}[1]=0;
$Mstate[0]{'next2'}[0]="D1";
$Mstate[0]{'next2'}[1]=0;
$Mstate[0]{'next3'}[0]="I0";
$Mstate[0]{'next3'}[1]=0;

$Istate[0]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Istate[0]{'name'}="I0";
$Istate[0]{'next1'}[0]="M1";
$Istate[0]{'next1'}[1]=0;
$Istate[0]{'next2'}[0]="D1";
$Istate[0]{'next2'}[1]=0;
$Istate[0]{'next3'}[0]="I0";
$Istate[0]{'next3'}[1]=0;

$Mstate[$mstate]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Mstate[$mstate]{'name'}="M".$mstate;
$Mstate[$mstate]{'next1'}[0]="E";
$Mstate[$mstate]{'next1'}[1]=0;
$Mstate[$mstate]{'next3'}[0]="I".$mstate;
$Mstate[$mstate]{'next3'}[1]=0;

$Istate[$mstate]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
$Istate[$mstate]{'name'}="I".$mstate;
$Istate[$mstate]{'next1'}[0]="E";
$Istate[$mstate]{'next1'}[1]=0;
$Istate[$mstate]{'next3'}[0]="I".$mstate;
$Istate[$mstate]{'next3'}[1]=0;

$Dstate[$mstate]{'name'}="D".$mstate;
$Dstate[$mstate]{'next1'}[0]="E";
$Dstate[$mstate]{'next1'}[1]=0;
$Dstate[$mstate]{'next3'}[0]="I".$mstate;
$Dstate[$mstate]{'next3'}[1]=0;

$Mstate[$mstate+1]{'name'}="M8";

for ($i=1;$i<$mstate;$i++){

        $Mstate[$i]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
        $Mstate[$i]{'name'}="M".$i;
        $Mstate[$i]{'next1'}[0]="M".($i+1);
        $Mstate[$i]{'next1'}[1]=0;
        $Mstate[$i]{'next2'}[0]="D".($i+1);
        $Mstate[$i]{'next2'}[1]=0;
        $Mstate[$i]{'next3'}[0]="I".$i;
        $Mstate[$i]{'next3'}[1]=0;

        $Istate[$i]={("A"=>0,"C"=>0,"D"=>0,"E"=>0,"F"=>0,"G"=>0,"H"=>0,"I"=>0,"K"=>0,"L"=>0,"M"=>0,"N"=>0,"P"=>0,"Q"=>0,"R"=>0,"S"=>0,"T"=>0,"V"=>0,"W"=>0,"Y"=>0,"X"=>0)};
        $Istate[$i]{'name'}="I".$i;
        $Istate[$i]{'next1'}[0]="M".($i+1);
        $Istate[$i]{'next1'}[1]=0;
        $Istate[$i]{'next2'}[0]="D".($i+1);
        $Istate[$i]{'next2'}[1]=0;
        $Istate[$i]{'next3'}[0]="I".$i;
        $Istate[$i]{'next3'}[1]=0;

        $Dstate[$i]{'name'}="D".$i;
        $Dstate[$i]{'next1'}[0]="M".($i+1);
        $Dstate[$i]{'next1'}[1]=0;
        $Dstate[$i]{'next2'}[0]="D".($i+1);
        $Dstate[$i]{'next2'}[1]=0;
        $Dstate[$i]{'next3'}[0]="I".$i;
        $Dstate[$i]{'next3'}[1]=0;

}


# 统计各个状态之间的转移概率
for ($i=0;$i<scalar(@content);$i++){
     campstate($seqstate[$i]);
}
for ($i=0;$i<=$mstate+1;$i++){

        $M=$Mstate[$i]{'next1'}[1]+$Mstate[$i]{'next2'}[1]+$Mstate[$i]{'next3'}[1];
        if ($M!=0) {
            $Mstate[$i]{'next1'}[1]/=$M;
            $Mstate[$i]{'next2'}[1]/=$M;
            $Mstate[$i]{'next3'}[1]/=$M;
        }

        $I=$Istate[$i]{'next1'}[1]+$Istate[$i]{'next2'}[1]+$Istate[$i]{'next3'}[1];
        if ($I!=0) {
            $Istate[$i]{'next1'}[1]/=$I;
            $Istate[$i]{'next2'}[1]/=$I;
            $Istate[$i]{'next3'}[1]/=$I;
        }

        $D=$Dstate[$i]{'next1'}[1]+$Dstate[$i]{'next2'}[1]+$Dstate[$i]{'next3'}[1];
        if ($D!=0) {
            $Dstate[$i]{'next1'}[1]/=$D;
            $Dstate[$i]{'next2'}[1]/=$D;
            $Dstate[$i]{'next3'}[1]/=$D;
        }
}


=pod 输出中间参数
for ($i=0;$i<=$mstate+1;$i++){

        print $Mstate[$i]{'name'}."\t";
        print $Mstate[$i]{'next1'}[0]."\t";
        print $Mstate[$i]{'next1'}[1]."\t";
        print $Mstate[$i]{'next2'}[0]."\t";
        print $Mstate[$i]{'next2'}[1]."\t";
        print $Mstate[$i]{'next3'}[0]."\t";
        print $Mstate[$i]{'next3'}[1]."\t\n";
        
        print $Istate[$i]{'name'}."\t";
        print $Istate[$i]{'next1'}[0]."\t";
        print $Istate[$i]{'next1'}[1]."\t";
        print $Istate[$i]{'next2'}[0]."\t";
        print $Istate[$i]{'next2'}[1]."\t";
        print $Istate[$i]{'next3'}[0]."\t";
        print $Istate[$i]{'next3'}[1]."\t\n";

        print $Dstate[$i]{'name'}."\t";
        print $Dstate[$i]{'next1'}[0]."\t";
        print $Dstate[$i]{'next1'}[1]."\t";
        print $Dstate[$i]{'next2'}[0]."\t";
        print $Dstate[$i]{'next2'}[1]."\t";
        print $Dstate[$i]{'next3'}[0]."\t";
        print $Dstate[$i]{'next3'}[1]."\t\n";
}
=cut

# 计算各状态的发出概率
# print $content[0];
for ($i=0;$i<length($content[1])-1;$i++){
    for ($j=0;$j<scalar(@content);$j++){
       $tempchar=substr($content[$j],$i,1);
       if ($tempchar ne "-"){
           if (substr($state[$i],0,1) eq "I"){
              $Istate[substr($state[$i],1,length($state[$i])-1)]{$tempchar}+=1;
           }
           elsif (substr($state[$i],0,1) eq "M"){
              $Mstate[substr($state[$i],1,length($state[$i])-1)]{$tempchar}+=1;
           }
       }
    }
}

for ($i=0;$i<=$mstate;$i++){
    $emitm=$Mstate[$i]{"A"}+$Mstate[$i]{"C"}+$Mstate[$i]{"D"}+$Mstate[$i]{"E"}+$Mstate[$i]{"F"}+$Mstate[$i]{"G"}+$Mstate[$i]{"H"}+$Mstate[$i]{"I"}+$Mstate[$i]{"K"}+$Mstate[$i]{"L"}+$Mstate[$i]{"M"}+$Mstate[$i]{"N"}+$Mstate[$i]{"P"}+$Mstate[$i]{"Q"}+$Mstate[$i]{"R"}+$Mstate[$i]{"S"}+$Mstate[$i]{"T"}+$Mstate[$i]{"V"}+$Mstate[$i]{"W"}+$Mstate[$i]{"Y"}+$Mstate[$i]{"X"};
    if ($emitm!=0){
        $Mstate[$i]{"A"}/=$emitm;
        $Mstate[$i]{"C"}/=$emitm;
        $Mstate[$i]{"D"}/=$emitm;
        $Mstate[$i]{"E"}/=$emitm;
        $Mstate[$i]{"F"}/=$emitm;
        $Mstate[$i]{"G"}/=$emitm;
        $Mstate[$i]{"H"}/=$emitm;
        $Mstate[$i]{"I"}/=$emitm;
        $Mstate[$i]{"K"}/=$emitm;
        $Mstate[$i]{"L"}/=$emitm;
        $Mstate[$i]{"M"}/=$emitm;
        $Mstate[$i]{"N"}/=$emitm;
        $Mstate[$i]{"P"}/=$emitm;
        $Mstate[$i]{"Q"}/=$emitm;
        $Mstate[$i]{"R"}/=$emitm;
        $Mstate[$i]{"S"}/=$emitm;
        $Mstate[$i]{"T"}/=$emitm;
        $Mstate[$i]{"V"}/=$emitm;
        $Mstate[$i]{"W"}/=$emitm;
        $Mstate[$i]{"Y"}/=$emitm;
        $Mstate[$i]{"X"}/=$emitm;
    }

    $emiti=$Istate[$i]{"A"}+$Istate[$i]{"C"}+$Istate[$i]{"D"}+$Istate[$i]{"E"}+$Istate[$i]{"F"}+$Istate[$i]{"G"}+$Istate[$i]{"H"}+$Istate[$i]{"I"}+$Istate[$i]{"K"}+$Istate[$i]{"L"}+$Istate[$i]{"M"}+$Istate[$i]{"N"}+$Istate[$i]{"P"}+$Istate[$i]{"Q"}+$Istate[$i]{"R"}+$Istate[$i]{"S"}+$Istate[$i]{"T"}+$Istate[$i]{"V"}+$Istate[$i]{"W"}+$Istate[$i]{"Y"}+$Istate[$i]{"X"};
    if ($emiti!=0){
        $Istate[$i]{"A"}/=$emiti;
        $Istate[$i]{"C"}/=$emiti;
        $Istate[$i]{"D"}/=$emiti;
        $Istate[$i]{"E"}/=$emiti;
        $Istate[$i]{"F"}/=$emiti;
        $Istate[$i]{"G"}/=$emiti;
        $Istate[$i]{"H"}/=$emiti;
        $Istate[$i]{"I"}/=$emiti;
        $Istate[$i]{"K"}/=$emiti;
        $Istate[$i]{"L"}/=$emiti;
        $Istate[$i]{"M"}/=$emiti;
        $Istate[$i]{"N"}/=$emiti;
        $Istate[$i]{"P"}/=$emiti;
        $Istate[$i]{"Q"}/=$emiti;
        $Istate[$i]{"R"}/=$emiti;
        $Istate[$i]{"S"}/=$emiti;
        $Istate[$i]{"T"}/=$emiti;
        $Istate[$i]{"V"}/=$emiti;
        $Istate[$i]{"W"}/=$emiti;
        $Istate[$i]{"Y"}/=$emiti;
        $Istate[$i]{"X"}/=$emiti;
    }
}

=p 输出中间参数
for ($i=0;$i<=$mstate;$i++){
print "\$Mstate[$i]{\"A\"}=".$Mstate[$i]{"A"}."\n";
print "\$Mstate[$i]{\"C\"}=".$Mstate[$i]{"C"}."\n";
print "\$Mstate[$i]{\"D\"}=".$Mstate[$i]{"D"}."\n";
print "\$Mstate[$i]{\"E\"}=".$Mstate[$i]{"E"}."\n";
print "\$Mstate[$i]{\"F\"}=".$Mstate[$i]{"F"}."\n";
print "\$Mstate[$i]{\"G\"}=".$Mstate[$i]{"G"}."\n";
print "\$Mstate[$i]{\"H\"}=".$Mstate[$i]{"H"}."\n";
print "\$Mstate[$i]{\"I\"}=".$Mstate[$i]{"I"}."\n";
print "\$Mstate[$i]{\"K\"}=".$Mstate[$i]{"K"}."\n";
print "\$Mstate[$i]{\"L\"}=".$Mstate[$i]{"L"}."\n";
print "\$Mstate[$i]{\"M\"}=".$Mstate[$i]{"M"}."\n";
print "\$Mstate[$i]{\"N\"}=".$Mstate[$i]{"N"}."\n";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -