📄 crftagger.java
字号:
/* Copyright (C) 2006, Xuan-Hieu Phan Email: hieuxuan@ecei.tohoku.ac.jp pxhieu@gmail.com URL: http://www.hori.ecei.tohoku.ac.jp/~hieuxuan Graduate School of Information Sciences, Tohoku University*/package crf.tagger;import java.io.*;import java.util.*;public class CRFTagger { public static void main(String[] args) { displayCopyright(); if (!checkArgs(args)) { displayHelp(); return; } String modelDir = args[1]; boolean isInputFile = true; if (args[2].compareToIgnoreCase("-inputfile") != 0) { isInputFile = false; } String inputFile = ""; String inputDir = ""; if (isInputFile) { inputFile = args[3]; } else { inputDir = args[3]; } Option taggerOpt = new Option(modelDir); if (!taggerOpt.readOptions()) { return; } Maps taggerMaps = new Maps(); Dictionary taggerDict = new Dictionary(); FeatureGen taggerFGen = new FeatureGen(taggerMaps, taggerDict); Viterbi taggerVtb = new Viterbi(); Model taggerModel = new Model(taggerOpt, taggerMaps, taggerDict, taggerFGen, taggerVtb); if (!taggerModel.init()) { System.out.println("Couldn't load the model"); System.out.println("Check the <model directory> and the <model file> again"); return; } TaggingData taggerData = new TaggingData(); if (isInputFile) { taggerData.readData(inputFile); taggerData.cpGen(taggerMaps.cpStr2Int); // inference taggerModel.inferenceAll(taggerData.data); taggerData.writeData(taggerMaps.lbInt2Str, inputFile + ".pos"); } if (!isInputFile) { if (inputDir.endsWith(File.separator)) { inputDir = inputDir.substring(0, inputDir.length() - 1); } File dir = new File(inputDir); String[] children = dir.list(); for (int i = 0; i < children.length; i++) { String filename = inputDir + File.separator + children[i]; if ((new File(filename)).isDirectory()) { continue; } taggerData.readData(filename); taggerData.cpGen(taggerMaps.cpStr2Int); // inference taggerModel.inferenceAll(taggerData.data); taggerData.writeData(taggerMaps.lbInt2Str, filename + ".pos"); } } } // end of the main method public static boolean checkArgs(String[] args) { // case 1: CRFTagger -modeldir <model directory> -inputfile <input data file> // case 2: CRFTagger -modeldir <model directory> -inputdir <input data directory> if (args.length < 4) { return false; } if (args[0].compareToIgnoreCase("-modeldir") != 0) { return false; } if (!(args[2].compareToIgnoreCase("-inputfile") == 0 || args[2].compareToIgnoreCase("-inputdir") == 0)) { return false; } return true; } public static void displayCopyright() { System.out.println("English CRFTagger:"); System.out.println("\tTrain on sections 01-24 of Wall Street Journal corpus"); System.out.println("\tusing first-order Markov Conditional Random Fields"); System.out.println("\ttesting on section 00 with the highest accuracy of 97.00%"); System.out.println("Copyright (C) by Xuan-Hieu Phan"); System.out.println("Graduate School of Information Sciences, Tohoku University"); System.out.println("Email: hieuxuan@ecei.tohoku.ac.jp"); System.out.println(); } public static void displayHelp() { System.out.println("Usage:"); System.out.println("\tCase 1: CRFTagger -modeldir <model directory> -inputfile <input data file>"); System.out.println("\tCase 2: CRFTagger -modeldir <model directory> -inputdir <input data directory>"); System.out.println("Where:"); System.out.println("\t<model directory> is the directory contain the model and option files"); System.out.println("\t<input data file> is the file containing input sentences that need to"); System.out.println("\tbe tagged (each sentence on a line)"); System.out.println("\t<input data directory> is the directory containing multiple input data files"); System.out.println(); } } // end of class CRFTagger
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -