📄 profilecreator.java
字号:
import java.io.*;import java.util.*;class ProfileCreator{ BufferedReader reader; ModelMaker modelmaker; String hmmrow; int nrOfAlphabets; LinkedList alphabets; String emiss_prior_1 = null; String emiss_prior_2 = null; String emiss_prior_3 = null; String emiss_prior_4 = null; String emiss_prior_scaler_match_1 = null; String emiss_prior_scaler_match_2 = null; String emiss_prior_scaler_match_3 = null; String emiss_prior_scaler_match_4 = null; String emiss_prior_scaler_insert_1 = null; String emiss_prior_scaler_insert_2 = null; String emiss_prior_scaler_insert_3 = null; String emiss_prior_scaler_insert_4 = null; /* first input parameter is a file with information on how to create the hmms, * second input parameter is a name of outputdir */ public ProfileCreator(String hmmfile, String outdir) { //System.out.println("outdir = " + outdir); //System.exit(0); if(outdir.charAt(outdir.length() - 1) != '/') { outdir = outdir + "/"; } try { BufferedReader br = new BufferedReader(new FileReader(hmmfile)); hmmrow = br.readLine(); while(hmmrow != null) { StringTokenizer st = new StringTokenizer(hmmrow, "\t "); if(st.countTokens() < 1 || hmmrow.startsWith("\n") || hmmrow.startsWith("#")) { hmmrow = br.readLine(); continue; } else { String s = st.nextToken(); String alphabet = ""; if(s.equals("NR_OF_ALPHABETS:")) { nrOfAlphabets = Integer.parseInt(st.nextToken()); alphabets = new LinkedList(); } else if(s.equals("ALPHABET_1:")) { if(alphabets != null) { alphabet = st.nextToken(); alphabets.add(getAlphabet(alphabet)); } } else if(s.equals("ALPHABET_2:")) { alphabet = st.nextToken(); alphabets.add(getAlphabet(alphabet)); } else if(s.equals("ALPHABET_3:")) { alphabet = st.nextToken(); alphabets.add(getAlphabet(alphabet)); } else if(s.equals("ALPHABET_4:")) { alphabet = st.nextToken(); alphabets.add(getAlphabet(alphabet)); } else if(s.equals("EMISSION_PRIORFILE_1:")) { emiss_prior_1 = st.nextToken(); } else if(s.equals("EMISSION_PRIORFILE_2:")) { emiss_prior_2 = st.nextToken(); } else if(s.equals("EMISSION_PRIORFILE_3:")) { emiss_prior_3 = st.nextToken(); } else if(s.equals("EMISSION_PRIORFILE_4:")) { emiss_prior_4 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_MATCH_1:")) { emiss_prior_scaler_match_1 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_MATCH_2:")) { emiss_prior_scaler_match_2 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_MATCH_3:")) { emiss_prior_scaler_match_3 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_MATCH_4:")) { emiss_prior_scaler_match_4 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_INSERT_1:")) { emiss_prior_scaler_insert_1 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_INSERT_2:")) { emiss_prior_scaler_insert_2 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_INSERT_3:")) { emiss_prior_scaler_insert_3 = st.nextToken(); } else if(s.equals("EMISSION_PRIORSCALER_INSERT_4:")) { emiss_prior_scaler_insert_4 = st.nextToken(); } else if(s.equals("END_HEADER")) { break; } } hmmrow = br.readLine(); } /* read hmm rows */ hmmrow = br.readLine(); while(hmmrow != null && hmmrow != "") { StringTokenizer st = new StringTokenizer(hmmrow, "\t "); if(st.countTokens() < 1 || hmmrow.startsWith("\n") || hmmrow.startsWith("#")) { hmmrow = br.readLine(); continue; } if(st.countTokens() != 3) { printerr("Could not create hmm"); hmmrow = br.readLine(); continue; } else { modelmaker = new ModelMaker(); try { createHMM(st.nextToken(), st.nextToken(), st.nextToken(), emiss_prior_1, emiss_prior_2, emiss_prior_3, emiss_prior_4, emiss_prior_scaler_match_1, emiss_prior_scaler_match_2, emiss_prior_scaler_match_3, emiss_prior_scaler_match_4, emiss_prior_scaler_insert_1, emiss_prior_scaler_insert_2, emiss_prior_scaler_insert_3, emiss_prior_scaler_insert_4); saveHMM(outdir); } catch(Exception e) { printerr("Could not create hmm"); } hmmrow = br.readLine(); } } } catch(IOException e) { printerr("Could not read from hmmfile"); System.exit(0); } } /****************input/output handling**********************************/ private void println(String s) { System.out.println(s); } private void newln(){ System.out.println(""); } private void printerr(String s) { System.out.println("Error: " + s); } private void print(String s) { System.out.print(s); System.out.flush(); } private String readln() { try { String s = reader.readLine(); if(s == "" || s == null) { return null; } s = s.trim(); return s; } catch(IOException e) { P.INTERNAL_ERROR("DataReader.readLine: IOException"); return null; } } /*******************************HMM methods****************************/ private void createHMM(String name, String size_s, String global_s, String prifile, String prifile_2, String prifile_3, String prifile_4, String scaler_s, String scaler_s_2, String scaler_s_3, String scaler_s_4, String scaler_insert_s, String scaler_insert_2_s, String scaler_insert_3_s, String scaler_insert_4_s) { modelmaker.createHMM(name); modelmaker.setNrOfAlphabets(alphabets.size()); for(int i = 1; i <= alphabets.size(); i++) { modelmaker.setAlphabet(i, ((String[])alphabets.get(i-1))); } int size = 0; try { size = Integer.parseInt(size_s); } catch(NumberFormatException e) { printerr("Could not read size from infile"); size = 0; } double emissPriorScaler = 1.0; double emissPriorScaler_2 = 1.0; double emissPriorScaler_3 = 1.0; double emissPriorScaler_4 = 1.0; double emissPriorScaler_insert = 1.0; double emissPriorScaler_insert_2 = 1.0; double emissPriorScaler_insert_3 = 1.0; double emissPriorScaler_insert_4 = 1.0; if(prifile != null && scaler_s != null) { try { emissPriorScaler = Double.parseDouble(scaler_s); emissPriorScaler_insert = emissPriorScaler; if(scaler_insert_s != null) { emissPriorScaler_insert = Double.parseDouble(scaler_insert_s); } } catch(NumberFormatException e) { printerr("Could not read prior scaler from infile"); emissPriorScaler = 1.0; emissPriorScaler_insert = 1.0; } } if(nrOfAlphabets > 1 && prifile_2 != null && scaler_s_2 != null) { try { emissPriorScaler_2 = Double.parseDouble(scaler_s_2); emissPriorScaler_insert_2 = emissPriorScaler_2; if(scaler_insert_2_s != null) { emissPriorScaler_insert_2 = Double.parseDouble(scaler_insert_2_s); } } catch(NumberFormatException e) { printerr("Could not read prior scaler from infile"); emissPriorScaler_2 = 1.0; emissPriorScaler_insert_2 = 1.0; } } if(nrOfAlphabets > 2 && prifile_3 != null && scaler_s_3 != null) { try { emissPriorScaler_3 = Double.parseDouble(scaler_s_3); emissPriorScaler_insert_3 = emissPriorScaler_3; if(scaler_insert_3_s != null) { emissPriorScaler_insert_3 = Double.parseDouble(scaler_insert_3_s); } } catch(NumberFormatException e) { printerr("Could not read prior scaler from infile"); emissPriorScaler_3 = 1.0; emissPriorScaler_insert_3 = 1.0; } } if(nrOfAlphabets > 3 && prifile_4 != null && scaler_s_4 != null) { try { emissPriorScaler_4 = Double.parseDouble(scaler_s_4); emissPriorScaler_insert_4 = emissPriorScaler_4; if(scaler_insert_4_s != null) { emissPriorScaler_insert_4 = Double.parseDouble(scaler_insert_4_s); } } catch(NumberFormatException e) { printerr("Could not read prior scaler from infile"); emissPriorScaler_4 = 1.0; emissPriorScaler_insert_4 = 1.0; } } boolean global = true; if(global_s.startsWith("l") || global_s.startsWith("L")) { global = false; } int res = modelmaker.createModule("s", HMM.STARTNODE, HMM.ZERO, 1, "0"); res = modelmaker.createModule("p1", HMM.PROFILE7, HMM.EVEN, size, "def", global); res = modelmaker.createModule("e", HMM.ENDNODE, HMM.ZERO, 1, "def"); modelmaker.setTransition("s", "p1"); modelmaker.initializeTransitionProbabilities("s"); modelmaker.setTransition("p1", "e"); modelmaker.initializeTransitionProbabilities("p1"); Module m = modelmaker.getModule("p1"); if(prifile != null && !(prifile.equals("null")) && !(prifile.equals("NULL"))) { m.setPriorfile(prifile); modelmaker.addPriorfile(prifile); m.setEmissPriorScaler(emissPriorScaler); if(m instanceof Profile7) { Profile7 p7 = ((Profile7)m); p7.setEmissPriorScalerInsert(1, emissPriorScaler_insert); } } if(nrOfAlphabets > 1) { if(prifile_2 != null && !(prifile_2.equals("null")) && !(prifile_2.equals("NULL"))) { m.setPriorfile(2, prifile_2); modelmaker.addPriorfile(2, prifile_2); m.setEmissPriorScaler(2, emissPriorScaler_2); if(m instanceof Profile7) { Profile7 p7 = ((Profile7)m); p7.setEmissPriorScalerInsert(2, emissPriorScaler_insert_2); } } } if(nrOfAlphabets > 2) { if(prifile_3 != null && !(prifile_3.equals("null")) && !(prifile_3.equals("NULL"))) { m.setPriorfile(3, prifile_3); modelmaker.addPriorfile(3, prifile_3); m.setEmissPriorScaler(3, emissPriorScaler_3); if(m instanceof Profile7) { Profile7 p7 = ((Profile7)m); p7.setEmissPriorScalerInsert(3, emissPriorScaler_insert_3); } } } if(nrOfAlphabets > 3) { if(prifile_4 != null && !(prifile_4.equals("null")) && !(prifile_4.equals("NULL"))) { m.setPriorfile(4, prifile_4); modelmaker.addPriorfile(4, prifile_4); m.setEmissPriorScaler(4, emissPriorScaler_4); if(m instanceof Profile7) { Profile7 p7 = ((Profile7)m); p7.setEmissPriorScalerInsert(4, emissPriorScaler_insert_4); } } } m.setDistribType(HMM.EVEN); m.setDistribType(2, HMM.EVEN); } private void saveHMM(String outdir) { int res = modelmaker.saveHMM(outdir); } private String[] getAlphabet(String s) { StringTokenizer st = new StringTokenizer(s,";"); String[] alphabet = new String[st.countTokens()]; int i=0; /* counter for the alphabet array */ while (st.hasMoreTokens()){ String l = st.nextToken(); l = l.trim(); if(l.length() <= 4) /* letter OK*/ { alphabet[i] = l; i++; } else /* letter not OK */ { printerr("Only symbols of 4 characters or less are allowed in alphabet"); System.exit(0); } } /* Check for doubles */ for(int k = 0 ; k < alphabet.length - 1; k++) { for (int j = k+1; j < alphabet.length; j++) { if(alphabet[k].equals( alphabet[j])) { printerr("Alphabet contains doubles"); System.exit(0); } } } return alphabet; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -