📄 parameter.cpp.svn-base
字号:
// $Id$/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA***********************************************************************/#include <iostream>#include <iterator>#include <fstream>#include <sstream>#include <algorithm>#include "Parameter.h"#include "Util.h"#include "InputFileStream.h"#include "UserMessage.h"using namespace std;/** define allowed parameters */Parameter::Parameter() { AddParam("beam-threshold", "b", "threshold for threshold pruning"); AddParam("config", "f", "location of the configuration file"); AddParam("drop-unknown", "du", "drop unknown words instead of copying them"); AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default"); AddParam("generation-file", "location and properties of the generation table"); AddParam("input-factors", "list of factors in the input"); AddParam("input-file", "i", "location of the input file to be translated"); AddParam("inputtype", "text (0) or confusion network (1)"); AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true"); AddParam("include-alignment-in-n-best", "include word alignment in the n-best list. default is false"); AddParam("lmodel-file", "location and properties of the language models"); AddParam("lmstats", "L", "(1/0) compute LM backoff statistics for each translation hypothesis"); AddParam("mapping", "description of decoding steps"); AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)"); AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)"); AddParam("max-phrase-length", "maximum phrase length (default 20)"); AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT"); AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0"); AddParam("output-factors", "list of factors in the output"); AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison AddParam("report-all-factors", "report all factors in output, not just first"); AddParam("report-segmentation", "t", "report phrase segmentation in the output"); AddParam("stack", "s", "maximum stack size for histogram pruning"); AddParam("translation-details", "T", "for each best translation hypothesis, print out details about what sourcce spans were used, dropped"); AddParam("ttable-file", "location and properties of the translation tables"); AddParam("ttable-limit", "maximum number of translation table entries per input phrase"); AddParam("use-distortion-future-costs", "consider expected distortion cost in future cost estimation"); AddParam("verbose", "v", "verbosity level of the logging"); AddParam("weight-d", "d", "weight(s) for distortion (reordering components)"); AddParam("weight-generation", "g", "weight(s) for generation components"); AddParam("weight-i", "I", "weight for word insertion"); AddParam("weight-l", "lm", "weight(s) for language models"); AddParam("weight-t", "tm", "weights for translation model components"); AddParam("weight-w", "w", "weight for word penalty"); AddParam("weight-e", "e", "weight for word deletion"); AddParam("output-factors", "list if factors in the output"); AddParam("cache-path", "?"); AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words"); AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables"); AddParam("distortion", "configurations for each factorized/lexicalized reordering model."); AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'"); AddParam("mbr-scale", "scaling factor to convert log linear score into a probability."); AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)"); AddParam("use-persistent-cache", "cache translation options across sentences (default=true)"); AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");}Parameter::~Parameter(){}/** initialize a parameter, sub of constructor */void Parameter::AddParam(const string ¶mName, const string &description){ m_valid[paramName] = true; m_description[paramName] = description;}/** initialize a parameter (including abbreviation), sub of constructor */void Parameter::AddParam(const string ¶mName, const string &abbrevName, const string &description){ m_valid[paramName] = true; m_valid[abbrevName] = true; m_abbreviation[paramName] = abbrevName; m_description[paramName] = description;}/** print descriptions of all parameters */void Parameter::Explain() { cerr << "Usage:" << endl; for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) { const string paramName = iterParam->first; const string paramDescription = iterParam->second; cerr << "\t-" << paramName; PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName ); if ( iterAbbr != m_abbreviation.end() ) cerr << " (" << iterAbbr->second << ")"; cerr << ": " << paramDescription << endl; }}/** check whether an item on the command line is a switch or a value * \param token token on the command line to checked **/bool Parameter::isOption(const char* token) { if (! token) return false; std::string tokenString(token); size_t length = tokenString.size(); if (length > 0 && tokenString.substr(0,1) != "-") return false; if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true; return false;}/** load all parameters from the configuration file and the command line switches */bool Parameter::LoadParam(const string &filePath){ const char *argv[] = {"executable", "-f", filePath.c_str() }; return LoadParam(3, (char**) argv);}/** load all parameters from the configuration file and the command line switches */bool Parameter::LoadParam(int argc, char* argv[]) { // config file (-f) arg mandatory string configPath; if ( (configPath = FindParam("-f", argc, argv)) == "" && (configPath = FindParam("-config", argc, argv)) == "") { PrintCredit(); UserMessage::Add("No configuration file was specified. Use -config or -f"); return false; } else { if (!ReadConfigFile(configPath)) { UserMessage::Add("Could not read "+configPath); return false; } } // overwrite parameters with values from switches for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) { const string paramName = iterParam->first; OverwriteParam("-" + paramName, paramName, argc, argv); } // ... also shortcuts for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) { const string paramName = iterParam->first; const string paramShortName = iterParam->second; OverwriteParam("-" + paramShortName, paramName, argc, argv); } // logging of parameters that were set in either config or switch int verbose = 1; if (m_setting.find("verbose") != m_setting.end() && m_setting["verbose"].size() > 0) verbose = Scan<int>(m_setting["verbose"][0]); if (verbose >= 1) { // only if verbose TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl); for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) { TRACE_ERR( "\t" << iterParam->first << ": "); for ( size_t i = 0; i < iterParam->second.size(); i++ ) TRACE_ERR( iterParam->second[i] << " "); TRACE_ERR( endl); } } // check for illegal parameters bool noErrorFlag = true; for (int i = 0 ; i < argc ; i++) { if (isOption(argv[i])) { string paramSwitch = (string) argv[i]; string paramName = paramSwitch.substr(1); if (m_valid.find(paramName) == m_valid.end()) { UserMessage::Add("illegal switch: " + paramSwitch); noErrorFlag = false; } } } // check if parameters make sense return Validate() && noErrorFlag;}/** check that parameter settings make sense */bool Parameter::Validate() { bool noErrorFlag = true; // required parameters if (m_setting["ttable-file"].size() == 0) { UserMessage::Add("No phrase translation table (ttable-file)"); noErrorFlag = false; } if (m_setting["lmodel-file"].size() == 0) { UserMessage::Add("No language model (lmodel-file)"); noErrorFlag = false; } if (m_setting["lmodel-file"].size() != m_setting["weight-l"].size()) { stringstream errorMsg(""); errorMsg << "Config and parameters specify " << static_cast<int>(m_setting["lmodel-file"].size()) << " language model files (lmodel-file), but " << static_cast<int>(m_setting["weight-l"].size()) << " weights (weight-l)"; errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'"; UserMessage::Add(errorMsg.str()); noErrorFlag = false; } // do files exist? // phrase tables if (noErrorFlag) { std::vector<std::string> ext; // standard phrase table extension (i.e. full name has to be specified) // raw tables in either un compressed or compressed form ext.push_back(""); ext.push_back(".gz"); // alternative file extension for binary phrase table format: ext.push_back(".binphr.idx"); noErrorFlag = FilesExist("ttable-file", 3,ext); } // language model if (noErrorFlag) noErrorFlag = FilesExist("lmodel-file", 3); // input file if (noErrorFlag && m_setting["input-file"].size() == 1) { noErrorFlag = FileExists(m_setting["input-file"][0]); } // generation tables if (noErrorFlag) { std::vector<std::string> ext; //raw tables in either un compressed or compressed form ext.push_back(""); ext.push_back(".gz"); noErrorFlag = FilesExist("generation-file", 3, ext);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -