📄 featuregen.java
字号:
/* Copyright (C) 2006, Xuan-Hieu Phan Email: hieuxuan@ecei.tohoku.ac.jp pxhieu@gmail.com URL: http://www.hori.ecei.tohoku.ac.jp/~hieuxuan Graduate School of Information Sciences, Tohoku University*/package crf.tagger;import java.io.*;import java.util.*;public class FeatureGen { List features = null; // list of features Map fmap = null; // feature map Maps maps = null; // context predicate and label maps Dictionary dict = null; // dictionary public FeatureGen(Maps maps, Dictionary dict) { this.maps = maps; this.dict = dict; } // adding a feature public void addFeature(Feature f) { f.strId2IdxAdd(fmap); features.add(f); } public int numFeatures() { if (features == null) { return 0; } else { return features.size(); } } public void readFeatures(BufferedReader fin) throws IOException { if (features != null) { features.clear(); } else { features = new ArrayList(); } if (fmap != null) { fmap.clear(); } else { fmap = new HashMap(); } if (eFeatures != null) { eFeatures.clear(); } else { eFeatures = new ArrayList(); } if (sFeatures != null) { sFeatures.clear(); } else { sFeatures = new ArrayList(); } String line; // get the number of features if ((line = fin.readLine()) == null) { System.out.println("Unknown number of features"); return; } int numFeatures = Integer.parseInt(line); if (numFeatures <= 0) { System.out.println("Invalid number of features"); return; } System.out.println("Reading features ..."); // main loop for reading features for (int i = 0; i < numFeatures; i++) { line = fin.readLine(); if (line == null) { // invalid feature line, ignore it continue; } StringTokenizer strTok = new StringTokenizer(line, " "); if (strTok.countTokens() != 3) { // invalid feature line, ignore it continue; } // create a new feature by parsing the line Feature f = new Feature(line, maps.cpStr2Int, maps.lbStr2Int); Integer fidx = (Integer)fmap.get(f.strId); if (fidx == null) { // insert the feature into the feature map fmap.put(f.strId, new Integer(f.idx)); features.add(f); if (f.ftype == Feature.EDGE_FEATURE1) { eFeatures.add(f); } } } System.out.println("Reading " + Integer.toString(features.size()) + " features completed!"); // read the line ###... line = fin.readLine(); } // start to scan features at a particular position in a data sequence public void startScanFeaturesAt(List seq, int pos) { startScanSFeaturesAt(seq, pos); startScanEFeatures(); } public boolean hasNextFeature() { return (hasNextSFeature() || hasNextEFeature()); } public Feature nextFeature() { Feature f = null; if (hasNextSFeature()) { f = nextSFeature(); } else if (hasNextEFeature()) { f = nextEFeature(); } else { // do nothing } return f; } // start to scan state features List sFeatures = null; int sFeatureIdx = 0; void startScanSFeaturesAt(List seq, int pos) { sFeatures.clear(); sFeatureIdx = 0; Observation obsr = (Observation)seq.get(pos); // scan over all context predicates for (int i = 0; i < obsr.cps.length; i++) { Element elem = (Element)dict.dict.get(new Integer(obsr.cps[i])); if (elem == null) { continue; } if (!(elem.isScanned)) { // scan all labels for state feature Iterator it = elem.lbCntFidxes.keySet().iterator(); while (it.hasNext()) { Integer label = (Integer)it.next(); CountFeatureIdx cntFidx = (CountFeatureIdx)elem.lbCntFidxes.get(label); if (cntFidx.fidx >= 0) { Feature sF = new Feature(); sF.sFeature1Init(label.intValue(), obsr.cps[i]); sF.idx = cntFidx.fidx; elem.cpFeatures.add(sF); } } elem.isScanned = true; } for (int j = 0; j < elem.cpFeatures.size(); j++) { sFeatures.add(elem.cpFeatures.get(j)); } } } boolean hasNextSFeature() { return (sFeatureIdx < sFeatures.size()); } Feature nextSFeature() { Feature sF = (Feature)sFeatures.get(sFeatureIdx); sFeatureIdx++; return sF; } // start to scan edge features List eFeatures = null; int eFeatureIdx = 0; void startScanEFeatures() { eFeatureIdx = 0; } boolean hasNextEFeature() { return (eFeatureIdx < eFeatures.size()); } Feature nextEFeature() { Feature eF = (Feature)eFeatures.get(eFeatureIdx); eFeatureIdx++; return eF; } } // end of class FeatureGen
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -