📄 ssrfinder.java
字号:
/*
* Author: ruanjue
* Created: 11/29/2003 15:12:16
* Modified: 11/29/2003 15:12:16
*/
package org.genomics.ssr;
import java.io.*;
import java.util.*;
/**
*@author 阮珏
*/
public class SSRFinder
{
/**
*the minimum of total repeat region length
*/
public static int MIN_LENGTH=12;
/**
*we use 2-8 nt repeat unit,however there is another criterion 1-5,if you will use 1-5 please change those tow static parameter
*to (1,5).
*/
public static int MIN_REPEAT=2;
public static int MAX_REPEAT=8;
/**
*set the number of the length of the left and right sequence of ssr.
*/
public static int LEFT_SEQ_LENGTH=15;
public static int RIGHT_SEQ_LENGTH=15;
/**
*call this method to find ssr
*@reader must be genbank fasta style,the sequence can't suffer space character
*/
public static SSR[] find(Reader reader)throws IOException{
BufferedReader in=new BufferedReader(reader);
String str=null;
StringBuffer sb=new StringBuffer(512);
String header=null;
Vector v=new Vector();
//int lineNum=0;
while((str=in.readLine())!=null){
//System.out.println(++lineNum);
if(str.startsWith(">")){
if(sb.length()>0){
find(header,sb.toString(),v);
sb.delete(0,sb.length());
}
int i=1;
for(;i<str.length();i++){
if(str.charAt(i)==' '||str.charAt(i)=='\t'){
break;
}
}
header=str.substring(1,i);
}
else {
sb.append(str);
}
}
find(header,sb.toString(),v);
sb.delete(0,sb.length());
in.close();
SSR[] ssrs=new SSR[v.size()];
for(int i=0;i<ssrs.length;i++){
ssrs[i]=(SSR)v.get(i);
}
return ssrs;
}
protected static int find(String header,String seq,Vector v)throws IOException{
if(seq.length()<MIN_LENGTH){
return 0;
}
int count=0;
int pointer=0;
int ptr=0;
char[] repeat=new char[MAX_REPEAT];
for(int i=0;i<seq.length()-MIN_REPEAT;i++){
if(i-ptr>=MAX_REPEAT-1){
ptr++;
}
//scan repeat
for(int t=MIN_REPEAT;t<=(i-ptr+1);t++){
int num=0;
for(int m=t+i;m<seq.length();m+=t){
int n=0;
for(;n<t;n++){
if(seq.charAt(i-n)!=seq.charAt(m-n)){
break;
}
}
if(n!=t){
break;
}
else {
num++;
}
}
if((num+1)*t>=MIN_LENGTH){//notice:num should add one
boolean ok=false;
char c=seq.charAt(i-t+1);
for(int k=i-t+2;k<=i;k++){
if(seq.charAt(k)!=c){
ok=true;
break;
}
}
if(ok){
SSR ssr=new SSR();
ssr.seq=seq.substring(i-t+1,i+1);
ssr.src=seq;
if(header==null||header.length()==0){
ssr.name="undefined_"+count;
}
else {
ssr.name=header+"_"+count;
}
int left=i-t-LEFT_SEQ_LENGTH+1;
if(left<0){
left=0;
}
ssr.left_seq=seq.substring(left,i-t+1);
ssr.left_pos=i-t+1;
int right=i+num*t+RIGHT_SEQ_LENGTH+1;
if(right>=seq.length()){
right=seq.length()-1;
}
int right_start=i+num*t+1;
if(right_start>=seq.length()){
right_start=seq.length()-1;
}
ssr.right_seq=seq.substring(right_start,right);
ssr.right_pos=i+num*t;
ssr.repeat=num+1;
v.add(ssr);
count++;
//skip the repeat region ,continue scan at new pot
}
i+=num*t-1;
ptr=i;
break;
}
else {
continue;
}
}
}
return count;
}
public static void main(String[] args)throws Exception{
FileReader reader=new FileReader(args[0]);
long time=System.currentTimeMillis();
SSR[] ssrs=find(reader);
System.out.println("find "+ssrs.length+"\t"+( System.currentTimeMillis()-time)+" ms");
for(int i=0;i<ssrs.length;i++){
System.out.println(">"+(i+1));
System.out.println(ssrs[i]);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -