⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ssrfinder.java

📁 快速寻找SSR,使用滑动块寻找短重复序列,目前已有分子生物育种网站在使用,比较宝贵的源码
💻 JAVA
字号:
/*
 * Author: ruanjue
 * Created: 11/29/2003 15:12:16
 * Modified: 11/29/2003 15:12:16
 */
package org.genomics.ssr;

import java.io.*;
import java.util.*;
/**
 *@author 阮珏
 */
public class SSRFinder
{
	/**
	 *the minimum of total repeat region length
	 */
	public static int MIN_LENGTH=12;
	/**
	 *we use 2-8 nt repeat unit,however there is another criterion 1-5,if you will use 1-5 please change those tow static parameter
	 *to (1,5).
	 */
	public static int MIN_REPEAT=2;
	public static int MAX_REPEAT=8;
	/**
	 *set the number of the length of the left and right sequence of ssr.
	 */
	public static int LEFT_SEQ_LENGTH=15;
	public static int RIGHT_SEQ_LENGTH=15;
	/**
	 *call this method to find ssr
	 *@reader must be genbank fasta style,the sequence can't suffer space character
	 */
	public static SSR[] find(Reader reader)throws IOException{
		BufferedReader in=new BufferedReader(reader);
		String str=null;
		StringBuffer sb=new StringBuffer(512);
		String header=null;
		Vector v=new Vector();
		//int lineNum=0;
		while((str=in.readLine())!=null){
			//System.out.println(++lineNum);
			if(str.startsWith(">")){
				if(sb.length()>0){
					find(header,sb.toString(),v);
					sb.delete(0,sb.length());
				}
				int i=1;
				for(;i<str.length();i++){
					if(str.charAt(i)==' '||str.charAt(i)=='\t'){
						break;
					}
				}
				header=str.substring(1,i);
			}
			else {
				sb.append(str);
			}
		}
		find(header,sb.toString(),v);
		sb.delete(0,sb.length());
		in.close();
		SSR[] ssrs=new SSR[v.size()];
		for(int i=0;i<ssrs.length;i++){
			ssrs[i]=(SSR)v.get(i);
		}
		return ssrs;
	}
	protected static int find(String header,String seq,Vector v)throws IOException{
		if(seq.length()<MIN_LENGTH){
			return 0;
		}
		int count=0;
		int pointer=0;
		int ptr=0;
		char[] repeat=new char[MAX_REPEAT];
		for(int i=0;i<seq.length()-MIN_REPEAT;i++){
			if(i-ptr>=MAX_REPEAT-1){
				ptr++;
			}
			//scan repeat
			for(int t=MIN_REPEAT;t<=(i-ptr+1);t++){
				int num=0;
				for(int m=t+i;m<seq.length();m+=t){
					int n=0;
					for(;n<t;n++){
						if(seq.charAt(i-n)!=seq.charAt(m-n)){
							break;
						}
					}
					if(n!=t){
						break;
					}
					else {
						num++;
					}
				}
				if((num+1)*t>=MIN_LENGTH){//notice:num should add one
					boolean ok=false;
					char c=seq.charAt(i-t+1);
					for(int k=i-t+2;k<=i;k++){
						if(seq.charAt(k)!=c){
							ok=true;
							break;
						}
					}
					if(ok){
						SSR ssr=new SSR();
						ssr.seq=seq.substring(i-t+1,i+1);
						ssr.src=seq;
						if(header==null||header.length()==0){
							ssr.name="undefined_"+count;
						}
						else {
							ssr.name=header+"_"+count;
						}
						int left=i-t-LEFT_SEQ_LENGTH+1;
						if(left<0){
							left=0;
						}
						ssr.left_seq=seq.substring(left,i-t+1);
						ssr.left_pos=i-t+1;
						int right=i+num*t+RIGHT_SEQ_LENGTH+1;
						if(right>=seq.length()){
							right=seq.length()-1;
						}
						int right_start=i+num*t+1;
						if(right_start>=seq.length()){
							right_start=seq.length()-1;
						}
						ssr.right_seq=seq.substring(right_start,right);
						ssr.right_pos=i+num*t;
						ssr.repeat=num+1;
						v.add(ssr);
						count++;
						//skip the repeat region ,continue scan at new pot
					}
					i+=num*t-1;
					ptr=i;
					break;
				}
				else {
					continue;
				}
			}
		}
		return count;
	}
	public static void main(String[] args)throws Exception{
		FileReader reader=new FileReader(args[0]);
		long time=System.currentTimeMillis();
		SSR[] ssrs=find(reader);
		System.out.println("find "+ssrs.length+"\t"+( System.currentTimeMillis()-time)+" ms");
		for(int i=0;i<ssrs.length;i++){
			System.out.println(">"+(i+1));
			System.out.println(ssrs[i]);
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -