📄 msa.java
字号:
import java.io.*;import java.util.*;class MSA{ final double MATCH_GAP_SHARE_CUTOFF = 0.6; public int nrAlphabets; public int nrRows; public int nrColumns; public MultiLetter[][] theMsa; public double[] sequenceWeights; public boolean[] matchColumns; public MSA(String modfile, String constructionMethod) { matchColumns = null; readModfile(modfile); setInitialSequenceWeights(); getMatchColumns(constructionMethod, modfile); } public void dumpMSA() { System.out.println("msa dump:"); for(int i = 0; i < nrRows; i++) { System.out.println(""); for(int j = 0; j < nrColumns; j++) { theMsa[i][j].dump(); System.out.print(" "); } } System.out.println(""); } private void readModfile(String modfile) { String modrow; try { BufferedReader br = new BufferedReader(new FileReader(modfile)); int msaLength = 0; int nrSeqs = 0; boolean first = true; modrow = br.readLine(); while(modrow != null && !(modrow.equals(""))) { nrAlphabets = 1; if(modrow.charAt(0) == '<' && modrow.charAt(1) != '<') { nrSeqs++; if(first) { for(int i = 0; i < modrow.length(); i++) { if(modrow.charAt(i) == ';') { msaLength++; } } first = false; } } if(modrow.startsWith("<<")) { nrAlphabets = 2; } if(modrow.startsWith("<<<")) { nrAlphabets = 3; } if(modrow.startsWith("<<<<")) { nrAlphabets = 4; } modrow = br.readLine(); } br.close(); nrColumns = msaLength; nrRows = nrSeqs; theMsa = new MultiLetter[nrRows][nrColumns]; br = new BufferedReader(new FileReader(modfile)); modrow = br.readLine(); int curAlphabet = 1; int curRow = 0; while(modrow != null && !(modrow.equals(""))) { if(curAlphabet == 1 && modrow.startsWith("<<")) { curAlphabet = 2; curRow = 0; } if(curAlphabet == 2 && modrow.startsWith("<<<")) { curAlphabet = 3; curRow = 0; } if(curAlphabet == 3 && modrow.startsWith("<<<<")) { curAlphabet = 4; curRow = 0; } /* read alignment */ StringTokenizer st = new StringTokenizer(modrow, "<>;"); int pos = 0; while(st.hasMoreTokens()) { switch(curAlphabet) { case 1: theMsa[curRow][pos] = new MultiLetter(null, null, null, null); theMsa[curRow][pos].letter_1 = st.nextToken(); break; case 2: theMsa[curRow][pos].letter_2 = st.nextToken(); break; case 3: theMsa[curRow][pos].letter_3 = st.nextToken(); break; case 4: theMsa[curRow][pos].letter_4 = st.nextToken(); break; } pos++; } while(pos < nrColumns) { switch(curAlphabet) { case 1: theMsa[curRow][pos] = new MultiLetter(null, null, null, null); theMsa[curRow][pos].letter_1 = " "; break; case 2: theMsa[curRow][pos].letter_2 = " "; break; case 3: theMsa[curRow][pos].letter_3 = " "; break; case 4: theMsa[curRow][pos].letter_4 = " "; break; } pos++; } curRow++; modrow = br.readLine(); } br.close(); } catch(IOException e) { System.out.println("Could not read from modfile"); System.exit(0); } } private void setInitialSequenceWeights() { sequenceWeights = new double[nrRows]; for(int i = 0; i < nrRows; i++) { sequenceWeights[i] = 1.0; } } private void getMatchColumns(String constructionMethod, String modfile) { if(constructionMethod.startsWith("F")) { getMatchColumnsFast(); } else if(constructionMethod.startsWith("H")) { getMatchColumnsHand(modfile); } else if(constructionMethod.startsWith("M")) { getMatchColumnsMAP(); } else if(constructionMethod.startsWith("Q")) { getMatchColumnsQuery(); } else { P.MESSAGE("No such match column identifyer exists"); System.exit(0); } } private void getMatchColumnsFast() { matchColumns = new boolean[nrColumns]; for(int i = 0; i < nrColumns; i++) { int nrGaps = 0; for(int j = 0; j < nrRows; j++) { if(!(theMsa[j][i].letter_1.equals("-")) && !(theMsa[j][i].letter_1.equals(" ")) && !(theMsa[j][i].letter_1.equals(".")) && !(theMsa[j][i].letter_1.equals("_"))) { nrGaps++; } } if(((double)nrGaps/((double)nrRows)) >= MATCH_GAP_SHARE_CUTOFF) { matchColumns[i] = true; } else { matchColumns[i] = false; } } } private void getMatchColumnsHand(String modfile) { try { BufferedReader br = new BufferedReader(new FileReader(modfile)); String modrow = br.readLine(); while(modrow != null && !(modrow.equals(""))) { if(modrow.charAt(0) == 'L') { /* lead columns */ matchColumns = new boolean[nrColumns]; for(int i = 1; i < modrow.length() - 1; i++) { try { if(modrow.charAt(i) == '*') { matchColumns[i-1] = true; } else { matchColumns[i-1] = false; } } catch(ArrayIndexOutOfBoundsException aioube) { P.MESSAGE("match column row is longer than the alignment"); System.exit(0); } } } modrow = br.readLine(); } br.close(); } catch(Exception e) { P.MESSAGE("Could not read msa"); System.exit(0); } } private void getMatchColumnsMAP() { } private void getMatchColumnsQuery() { /* return the columns for which the query sequence has non gap residues * the query seq is always taken to be the first seq in the alignment */ matchColumns = new boolean[nrColumns]; for(int i = 0; i < nrColumns; i++) { if(!(theMsa[0][i].letter_1.equals("-")) && !(theMsa[0][i].letter_1.equals(" ")) && !(theMsa[0][i].letter_1.equals(".")) && !(theMsa[0][i].letter_1.equals("_"))) { matchColumns[i] = true; } else { matchColumns[i] = false; } } } public int getNrOfMatchColumns() { int nr = 0; for(int i = 0; i < matchColumns.length; i++) { if(matchColumns[i]) { nr++; } } return nr; } class MultiLetter { public String letter_1; public String letter_2; public String letter_3; public String letter_4; public MultiLetter(String l_1, String l_2, String l_3, String l_4) { letter_1 = l_1; letter_2 = l_2; letter_3 = l_3; letter_4 = l_4; } public void dump() { System.out.print(letter_1 + ":" + letter_2 + ":" + letter_3 + ":" +letter_4); } public String getLetter(int i) { switch(i) { case 1: return letter_1; case 2: return letter_2; case 3: return letter_3; case 4: return letter_4; default: return null; } } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -