⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parameter.cpp.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
// $Id$/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#include <iostream>#include <iterator>#include <fstream>#include <sstream>#include <algorithm>#include "Parameter.h"#include "Util.h"#include "InputFileStream.h"#include "UserMessage.h"using namespace std;/** define allowed parameters */Parameter::Parameter() {	AddParam("beam-threshold", "b", "threshold for threshold pruning");	AddParam("config", "f", "location of the configuration file");	AddParam("drop-unknown", "du", "drop unknown words instead of copying them");	AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");	AddParam("generation-file", "location and properties of the generation table");	AddParam("input-factors", "list of factors in the input");	AddParam("input-file", "i", "location of the input file to be translated");	AddParam("inputtype", "text (0) or confusion network (1)");	AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");	AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false");	AddParam("lmodel-file", "location and properties of the language models");	AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis");	AddParam("mapping", "description of decoding steps");	AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");	AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");	AddParam("max-phrase-length", "maximum phrase length (default 20)");	AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");	AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");	AddParam("output-factors", "list of factors in the output");	AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison	AddParam("report-all-factors", "report all factors in output, not just first");	AddParam("report-segmentation", "t", "report phrase segmentation in the output");	AddParam("stack", "s", "maximum stack size for histogram pruning");	AddParam("translation-details", "T", "for each best translation hypothesis, print out details about what sourcce spans were used, dropped");	AddParam("ttable-file", "location and properties of the translation tables");	AddParam("ttable-limit", "maximum number of translation table entries per input phrase");	AddParam("use-distortion-future-costs", "consider expected distortion cost in future cost estimation");	AddParam("verbose", "v", "verbosity level of the logging");	AddParam("weight-d", "d", "weight(s) for distortion (reordering components)");	AddParam("weight-generation", "g", "weight(s) for generation components");	AddParam("weight-i", "I", "weight for word insertion");	AddParam("weight-l", "lm", "weight(s) for language models");	AddParam("weight-t", "tm", "weights for translation model components");	AddParam("weight-w", "w", "weight for word penalty");	AddParam("weight-e", "e", "weight for word deletion"); 	AddParam("output-factors", "list if factors in the output");	AddParam("cache-path", "?");	AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words");		AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables"); 	AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");	AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'"); 	AddParam("mbr-scale", "scaling factor to convert log linear score into a probability."); 	AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)");	AddParam("use-persistent-cache", "cache translation options across sentences (default=true)");	AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");}Parameter::~Parameter(){}/** initialize a parameter, sub of constructor */void Parameter::AddParam(const string &paramName, const string &description){	m_valid[paramName] = true;	m_description[paramName] = description;}/** initialize a parameter (including abbreviation), sub of constructor */void Parameter::AddParam(const string &paramName, const string &abbrevName, const string &description){	m_valid[paramName] = true;	m_valid[abbrevName] = true;	m_abbreviation[paramName] = abbrevName;	m_description[paramName] = description;}/** print descriptions of all parameters */void Parameter::Explain() {	cerr << "Usage:" << endl;	for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) 	{		const string paramName = iterParam->first;		const string paramDescription = iterParam->second;		cerr <<  "\t-" << paramName;		PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );		if ( iterAbbr != m_abbreviation.end() )			cerr <<  " (" << iterAbbr->second << ")";		cerr <<  ": " << paramDescription << endl;	}}/** check whether an item on the command line is a switch or a value  * \param token token on the command line to checked **/bool Parameter::isOption(const char* token) {  if (! token) return false;  std::string tokenString(token);  size_t length = tokenString.size();  if (length > 0 && tokenString.substr(0,1) != "-") return false;  if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;  return false;}/** load all parameters from the configuration file and the command line switches */bool Parameter::LoadParam(const string &filePath){	const char *argv[] = {"executable", "-f", filePath.c_str() };	return LoadParam(3, (char**) argv);}/** load all parameters from the configuration file and the command line switches */bool Parameter::LoadParam(int argc, char* argv[]) {	// config file (-f) arg mandatory	string configPath;	if ( (configPath = FindParam("-f", argc, argv)) == "" 		&& (configPath = FindParam("-config", argc, argv)) == "")	{		PrintCredit();		UserMessage::Add("No configuration file was specified.  Use -config or -f");		return false;	}	else	{		if (!ReadConfigFile(configPath))		{			UserMessage::Add("Could not read "+configPath);			return false;		}	}		// overwrite parameters with values from switches	for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) 	{		const string paramName = iterParam->first;		OverwriteParam("-" + paramName, paramName, argc, argv);	}	// ... also shortcuts	for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) 	{		const string paramName = iterParam->first;		const string paramShortName = iterParam->second;		OverwriteParam("-" + paramShortName, paramName, argc, argv);	}	// logging of parameters that were set in either config or switch	int verbose = 1;	if (m_setting.find("verbose") != m_setting.end() &&	    m_setting["verbose"].size() > 0)	  verbose = Scan<int>(m_setting["verbose"][0]);	if (verbose >= 1) { // only if verbose	  TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);	  for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {	    TRACE_ERR( "\t" << iterParam->first << ": ");	    for ( size_t i = 0; i < iterParam->second.size(); i++ )	      TRACE_ERR( iterParam->second[i] << " ");	    TRACE_ERR( endl);	  }	}	// check for illegal parameters	bool noErrorFlag = true;	for (int i = 0 ; i < argc ; i++)	{		if (isOption(argv[i]))			{				string paramSwitch = (string) argv[i];								string paramName = paramSwitch.substr(1);				if (m_valid.find(paramName) == m_valid.end()) 					{						UserMessage::Add("illegal switch: " + paramSwitch);						noErrorFlag = false;					}			}	}  // check if parameters make sense	return Validate() && noErrorFlag;}/** check that parameter settings make sense */bool Parameter::Validate() {	bool noErrorFlag = true;  // required parameters	if (m_setting["ttable-file"].size() == 0)	{		UserMessage::Add("No phrase translation table (ttable-file)");		noErrorFlag = false;	}	if (m_setting["lmodel-file"].size() == 0)	{		UserMessage::Add("No language model (lmodel-file)");		noErrorFlag = false;	}	if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size()) 	{		stringstream errorMsg("");		errorMsg << "Config and parameters specify "            << static_cast<int>(m_setting["lmodel-file"].size()) 						<< " language model files (lmodel-file), but " 						<< static_cast<int>(m_setting["weight-l"].size())						<< " weights (weight-l)";    errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";		UserMessage::Add(errorMsg.str());		noErrorFlag = false;	}  // do files exist?	// phrase tables	if (noErrorFlag) 	{		std::vector<std::string> ext;		// standard phrase table extension (i.e. full name has to be specified)		// raw tables in either un compressed or compressed form		ext.push_back("");	  ext.push_back(".gz");		// alternative file extension for binary phrase table format:		ext.push_back(".binphr.idx");		noErrorFlag = FilesExist("ttable-file", 3,ext);	}	// language model	if (noErrorFlag)		noErrorFlag = FilesExist("lmodel-file", 3);	// input file	if (noErrorFlag && m_setting["input-file"].size() == 1)	{		noErrorFlag = FileExists(m_setting["input-file"][0]);	}	// generation tables	if (noErrorFlag)	{	  std::vector<std::string> ext;	  //raw tables in either un compressed or compressed form	  ext.push_back("");	  ext.push_back(".gz");		noErrorFlag = FilesExist("generation-file", 3, ext);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -