📄 patscan.pl
字号:
use strict;
use warnings;
#open the source data files
my $inPath = "D:\\patscan\\input\\";
my $outPath = "D:\\patscan\\output\\";
open(SOURCE_FILE, $inPath."3UTR.Mus_nr.dat") || die "cannot open source file!\n";
open(OUT_FILE,">".$outPath."result.dat") || die "cannot creat output file!\n";
my $count1 = 0;
my $count2 = 0;
#get records and check if there is identical sequence
while(my $record = get_next_record(SOURCE_FILE)) {
#print $record,"\n";
my $microSeq = get_micro_seq($record);
#print $microSeq;
my $ID = get_ID($record);
while($microSeq =~ /CACTGCC/ig){
my $pos = length $`;
$count1++;
print OUT_FILE "CACTGCC ","ID = ",$ID," ","Position = ",$pos," ","\n";
print "CACTGCC ","ID = ",$ID," ","Position = ",$pos," ","\n";
};
while($microSeq =~ /ACTGCCT/ig){
my $pos = length $`;
$count2++;
print OUT_FILE "ACTGCCT ","ID = ",$ID," ","Position = ",$pos," ","\n";
print "ACTGCCT ","ID = ",$ID," ","Position = ",$pos," ","\n";
};
}
print OUT_FILE "Num of CACTGCC = $count1\n","Num of ACTGCCT = $count2", "\n";
print "Num of CACTGCC = $count1\n","Num of ACTGCCT = $count2", "\n";
print "success!\n";
print OUT_FILE "success!\n";
close(SOURCE_FILE);
close(OUT_FILE);
exit;
sub get_next_record {
my $fileHandle = shift; #@_中究竟是什么?
my $record = '';
my $saveInputSep = $/;
$/ = "//\n";
$record = <$fileHandle>;
$/ = $saveInputSep;
return $record;
}
sub get_micro_seq{
my $record = shift; #为什么这些地方都使用shift
$record =~ /^SQ.*\n((.*\n)+)/m;
$preSeq = $1;
$preSeq =~ s/\s//g; #去除数据中的空格
$preSeq =~ s/\d//g; #去除数据中的数字
return $preSeq; #此时的数据中只有序列,没有其他的干扰
}
sub get_ID{
my $record = shift;
$record =~/^ID (\S*)\s*/m;
my $ID = $1;
return $ID;
}
#By Jun Chen and Yi Dongliang
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -