📄 dependencypipe.java
字号:
package mstparser;import java.io.*;import gnu.trove.*;import java.util.*;public class DependencyPipe { public Alphabet dataAlphabet; public Alphabet typeAlphabet; public String[] types; public int[] typesInt; public boolean labeled = false; public boolean createForest; public DependencyPipe() throws IOException { this(true); } public DependencyPipe(boolean createForest) throws IOException { dataAlphabet = new Alphabet(); typeAlphabet = new Alphabet(); this.createForest = createForest; } public void setLabeled(String file) throws IOException { BufferedReader in = new BufferedReader(new FileReader(file)); in.readLine(); in.readLine(); in.readLine(); String line = in.readLine(); if(line.trim().length() > 0) labeled = true; in.close(); } public String[][] getLines(BufferedReader in) throws IOException { String line = in.readLine(); String pos_line = in.readLine(); String lab_line = labeled ? in.readLine() : pos_line; String deps_line = in.readLine(); in.readLine(); // blank line if(line == null) return null; String[] toks = line.split("\t"); String[] pos = pos_line.split("\t"); String[] labs = lab_line.split("\t"); String[] deps = deps_line.split("\t"); String[] toks_new = new String[toks.length+1]; String[] pos_new = new String[pos.length+1]; String[] labs_new = new String[labs.length+1]; String[] deps_new = new String[deps.length+1]; toks_new[0] = "<root>"; pos_new[0] = "<root-POS>"; labs_new[0] = "<no-type>"; deps_new[0] = "-1"; for(int i = 0; i < toks.length; i++) { toks_new[i+1] = normalize(toks[i]); pos_new[i+1] = pos[i]; labs_new[i+1] = labeled ? labs[i] : "<no-type>"; deps_new[i+1] = deps[i]; } toks = toks_new; pos = pos_new; labs = labs_new; deps = deps_new; String[][] result = new String[4][]; result[0] = toks; result[1] = pos; result[2] = labs; result[3] = deps; return result; } public DependencyInstance createInstance(BufferedReader in) throws IOException { String[][] lines = getLines(in); if(lines == null) return null; String[] toks = lines[0]; String[] pos = lines[1]; String[] labs = lines[2]; String[] deps = lines[3]; int[] deps1 = new int[deps.length]; for(int i = 0; i < deps.length; i++) deps1[i] = Integer.parseInt(deps[i]); FeatureVector fv = createFeatureVector(toks,pos,labs,deps1); DependencyInstance pti = new DependencyInstance(toks,pos,labs,fv); String spans = ""; for(int i = 1; i < deps.length; i++) { spans += deps[i]+"|"+i+":"+typeAlphabet.lookupIndex(labs[i])+" "; } pti.actParseTree = spans.trim(); return pti; } public DependencyInstance[] createInstances(String file, String featFileName) throws IOException { createAlphabet(file); System.out.println("Num Features: " + dataAlphabet.size()); BufferedReader in = //new BufferedReader(new FileReader(file)); new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF8")); String[][] lines = getLines(in); LinkedList lt = new LinkedList(); ObjectOutputStream out = createForest ? new ObjectOutputStream(new FileOutputStream(featFileName)) : null; int num1 = 0; while(lines != null) { System.out.println("Creating Feature Vector Instance: " + num1); String[] toks = lines[0]; String[] pos = lines[1]; String[] labs = lines[2]; String[] deps = lines[3]; int[] deps1 = new int[deps.length]; for(int i = 0; i < deps.length; i++) deps1[i] = Integer.parseInt(deps[i]); FeatureVector fv = createFeatureVector(toks,pos,labs,deps1); DependencyInstance pti = new DependencyInstance(toks,pos,labs,fv); String spans = ""; for(int i = 1; i < deps.length; i++) { spans += deps[i]+"|"+i+":"+typeAlphabet.lookupIndex(labs[i])+" "; } pti.actParseTree = spans.trim(); if(createForest) possibleFeatures(pti,out); pti = null; lt.add(new DependencyInstance(toks.length)); lines = getLines(in); num1++; } closeAlphabets(); DependencyInstance[] pti = new DependencyInstance[lt.size()]; for(int i = 0; i < pti.length; i++) { pti[i] = (DependencyInstance)lt.get(i); } if(createForest) out.close(); in.close(); return pti; } private void createAlphabet(String file) throws IOException { System.out.print("Creating Alphabet ... "); BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF8")); String[][] lines = getLines(in); int cnt = 0; while(lines != null) { String[] toks = lines[0]; String[] pos = lines[1]; String[] labs = lines[2]; String[] deps = lines[3]; for(int i = 0; i < labs.length; i++) typeAlphabet.lookupIndex(labs[i]); int[] deps1 = new int[deps.length]; for(int i = 0; i < deps.length; i++) { deps1[i] = Integer.parseInt(deps[i]); } createFeatureVector(toks,pos,labs,deps1); lines = getLines(in); cnt++; } closeAlphabets(); in.close(); System.out.println("Done."); } public void closeAlphabets() { dataAlphabet.stopGrowth(); typeAlphabet.stopGrowth(); types = new String[typeAlphabet.size()]; Object[] keys = typeAlphabet.toArray(); for(int i = 0; i < keys.length; i++) { int indx = typeAlphabet.lookupIndex(keys[i]); types[indx] = (String)keys[i]; } KBestParseForest.rootType = typeAlphabet.lookupIndex("<root-type>"); } public String normalize(String s) { if(s.matches("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+")) return "<num>"; return s; } public FeatureVector createFeatureVector(String[] toks, String[] pos, String[] posA, int small, int large, boolean attR, FeatureVector fv) { String att = ""; if(attR) att = "RA"; else att = "LA"; int dist = Math.abs(large-small); String distBool = "0"; if(dist > 1) distBool = "1"; if(dist > 2) distBool = "2"; if(dist > 3) distBool = "3"; if(dist > 4) distBool = "4"; if(dist > 5) distBool = "5"; if(dist > 10) distBool = "10"; String attDist = "&"+att+"&"+distBool; String pLeft = small > 0 ? pos[small-1] : "STR"; String pRight = large < pos.length-1 ? pos[large+1] : "END"; String pLeftRight = small < large-1 ? pos[small+1] : "MID"; String pRightLeft = large > small+1 ? pos[large-1] : "MID"; String pLeftA = small > 0 ? posA[small-1] : "STR"; String pRightA = large < pos.length-1 ? posA[large+1] : "END"; String pLeftRightA = small < large-1 ? posA[small+1] : "MID"; String pRightLeftA = large > small+1 ? posA[large-1] : "MID"; // feature posR posMid posL for(int i = small+1; i < large; i++) { String allPos = pos[small]+" "+pos[i]+" "+pos[large]; String allPosA = posA[small]+" "+posA[i]+" "+posA[large]; fv = add("PC="+allPos+attDist,1.0,fv); fv = add("1PC="+allPos,1.0,fv); fv = add("XPC="+allPosA+attDist,1.0,fv); fv = add("X1PC="+allPosA,1.0,fv); } // feature posL-1 posL posR posR+1 fv = add("PT="+pLeft+" "+pos[small]+" "+pos[large]+" "+pRight+attDist,1.0,fv); fv = add("PT1="+pos[small]+" "+pos[large]+" " +pRight+attDist,1.0,fv); fv = add("PT2="+pLeft+" "+pos[small]+" "+pos[large]+attDist,1.0,fv); fv = add("PT3="+pLeft+" "+pos[large]+" "+pRight+attDist,1.0,fv); fv = add("PT4="+pLeft+" "+pos[small]+" "+pRight+attDist,1.0,fv); fv = add("1PT="+pLeft+" "+pos[small]+" "+pos[large]+" "+pRight,1.0,fv); fv = add("1PT1="+pos[small]+" "+pos[large]+" " +pRight,1.0,fv); fv = add("1PT2="+pLeft+" "+pos[small]+" "+pos[large],1.0,fv); fv = add("1PT3="+pLeft+" "+pos[large]+" "+pRight,1.0,fv); fv = add("1PT4="+pLeft+" "+pos[small]+" "+pRight,1.0,fv); fv = add("XPT="+pLeftA+" "+posA[small]+" "+posA[large]+" "+pRightA+attDist,1.0,fv); fv = add("XPT1="+posA[small]+" "+posA[large]+" " +pRightA+attDist,1.0,fv); fv = add("XPT2="+pLeftA+" "+posA[small]+" "+posA[large]+attDist,1.0,fv); fv = add("XPT3="+pLeftA+" "+posA[large]+" "+pRightA+attDist,1.0,fv); fv = add("XPT4="+pLeftA+" "+posA[small]+" "+pRightA+attDist,1.0,fv); fv = add("X1PT="+pLeftA+" "+posA[small]+" "+posA[large]+" "+pRightA,1.0,fv); fv = add("X1PT1="+posA[small]+" "+posA[large]+" " +pRightA,1.0,fv); fv = add("X1PT2="+pLeftA+" "+posA[small]+" "+posA[large],1.0,fv); fv = add("X1PT3="+pLeftA+" "+posA[large]+" "+pRightA,1.0,fv); fv = add("X1PT4="+pLeftA+" "+posA[small]+" "+pRightA,1.0,fv); // feature posL posL+1 posR-1 posR fv = add("APT="+pos[small]+" "+pLeftRight+" " +pRightLeft+" "+pos[large]+attDist,1.0,fv); fv = add("APT1="+pos[small]+" "+pRightLeft+" "+pos[large]+attDist,1.0,fv); fv = add("APT2="+pos[small]+" "+pLeftRight+" "+pos[large]+attDist,1.0,fv); fv = add("APT3="+pLeftRight+" "+pRightLeft+" "+pos[large]+attDist,1.0,fv); fv = add("APT4="+pos[small]+" "+pLeftRight+" "+pRightLeft+attDist,1.0,fv); fv = add("1APT="+pos[small]+" "+pLeftRight+" " +pRightLeft+" "+pos[large],1.0,fv); fv = add("1APT1="+pos[small]+" "+pRightLeft+" "+pos[large],1.0,fv); fv = add("1APT2="+pos[small]+" "+pLeftRight+" "+pos[large],1.0,fv); fv = add("1APT3="+pLeftRight+" "+pRightLeft+" "+pos[large],1.0,fv); fv = add("1APT4="+pos[small]+" "+pLeftRight+" "+pRightLeft,1.0,fv); fv = add("XAPT="+posA[small]+" "+pLeftRightA+" " +pRightLeftA+" "+posA[large]+attDist,1.0,fv); fv = add("XAPT1="+posA[small]+" "+pRightLeftA+" "+posA[large]+attDist,1.0,fv); fv = add("XAPT2="+posA[small]+" "+pLeftRightA+" "+posA[large]+attDist,1.0,fv); fv = add("XAPT3="+pLeftRightA+" "+pRightLeftA+" "+posA[large]+attDist,1.0,fv); fv = add("XAPT4="+posA[small]+" "+pLeftRightA+" "+pRightLeftA+attDist,1.0,fv); fv = add("X1APT="+posA[small]+" "+pLeftRightA+" " +pRightLeftA+" "+posA[large],1.0,fv); fv = add("X1APT1="+posA[small]+" "+pRightLeftA+" "+posA[large],1.0,fv); fv = add("X1APT2="+posA[small]+" "+pLeftRightA+" "+posA[large],1.0,fv); fv = add("X1APT3="+pLeftRightA+" "+pRightLeftA+" "+posA[large],1.0,fv); fv = add("X1APT4="+posA[small]+" "+pLeftRightA+" "+pRightLeftA,1.0,fv); // feature posL-1 posL posR-1 posR // feature posL posL+1 posR posR+1 fv = add("BPT="+pLeft+" "+pos[small]+" "+pRightLeft+" "+pos[large]+attDist,1.0,fv); fv = add("1BPT="+pLeft+" "+pos[small]+" "+pRightLeft+" "+pos[large],1.0,fv); fv = add("CPT="+pos[small]+" "+pLeftRight+" "+pos[large]+" "+pRight+attDist,1.0,fv); fv = add("1CPT="+pos[small]+" "+pLeftRight+" "+pos[large]+" "+pRight,1.0,fv); fv = add("XBPT="+pLeftA+" "+posA[small]+" "+pRightLeftA+" "+posA[large]+attDist,1.0,fv); fv = add("X1BPT="+pLeftA+" "+posA[small]+" "+pRightLeftA+" "+posA[large],1.0,fv); fv = add("XCPT="+posA[small]+" "+pLeftRightA+" "+posA[large]+" "+pRightA+attDist,1.0,fv); fv = add("X1CPT="+posA[small]+" "+pLeftRightA+" "+posA[large]+" "+pRightA,1.0,fv); String head = attR ? toks[small] : toks[large]; String headP = attR ? pos[small] : pos[large]; String child = attR ? toks[large] : toks[small]; String childP = attR ? pos[large] : pos[small]; String all = head + " " + headP + " " + child + " " + childP; String hPos = headP + " " + child + " " + childP; String cPos = head + " " + headP + " " + childP; String hP = headP + " " + child; String cP = head + " " + childP; String oPos = headP + " " + childP; String oLex = head + " " + child; fv = add("A="+all+attDist,1.0,fv); //this fv = add("B="+hPos+attDist,1.0,fv); fv = add("C="+cPos+attDist,1.0,fv); fv = add("D="+hP+attDist,1.0,fv); fv = add("E="+cP+attDist,1.0,fv); fv = add("F="+oLex+attDist,1.0,fv); //this fv = add("G="+oPos+attDist,1.0,fv); fv = add("H="+head+" "+headP+attDist,1.0,fv); fv = add("I="+headP+attDist,1.0,fv); fv = add("J="+head+attDist,1.0,fv); //this fv = add("K="+child+" "+childP+attDist,1.0,fv); fv = add("L="+childP+attDist,1.0,fv); fv = add("M="+child+attDist,1.0,fv); //this fv = add("AA="+all,1.0,fv); //this fv = add("BB="+hPos,1.0,fv); fv = add("CC="+cPos,1.0,fv); fv = add("DD="+hP,1.0,fv); fv = add("EE="+cP,1.0,fv); fv = add("FF="+oLex,1.0,fv); //this fv = add("GG="+oPos,1.0,fv); fv = add("HH="+head+" "+headP,1.0,fv); fv = add("II="+headP,1.0,fv);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -