⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 util.h.svn-base

📁 moses开源的机器翻译系统
💻 SVN-BASE
字号:
// $Id$/***********************************************************************Moses - factored phrase-based language decoderCopyright (C) 2006 University of EdinburghThis library is free software; you can redistribute it and/ormodify it under the terms of the GNU Lesser General PublicLicense as published by the Free Software Foundation; eitherversion 2.1 of the License, or (at your option) any later version.This library is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNULesser General Public License for more details.You should have received a copy of the GNU Lesser General PublicLicense along with this library; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA***********************************************************************/#pragma once#include <cassert>#include <fstream>#include <sstream>#include <string>#include <vector>#include <cmath>#include <limits>#include "TypeDef.h"/** Outputting debugging/verbose information to stderr. * Use TRACE_ENABLE flag to redirect tracing output into oblivion * so that you can output your own ad-hoc debugging info. * However, if you use stderr diretly, please delete calls to it once * you finished debugging so that it won't clutter up. * Also use TRACE_ENABLE to turn off output of any debugging info * when compiling for a gui front-end so that running gui won't generate * output on command line * */#ifdef TRACE_ENABLE#define TRACE_ERR(str) std::cerr << str#else#define TRACE_ERR(str) {}#endif/** verbose macros * */#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)//! get string representation of any object/variable, as long as it can pipe to a streamtemplate<typename T>inline std::string SPrint(const T &input){	std::stringstream stream("");	stream << input;	return stream.str();}//! convert string to variable of type T. Used to reading floats, int etc from filestemplate<typename T>inline T Scan(const std::string &input){	std::stringstream stream(input);	T ret;	stream >> ret;	return ret;}//! Specialisation for performancetemplate<>inline int Scan<int>(const std::string &input){	return atoi(input.c_str());}//! Specialisation for performancetemplate<>inline float Scan<float>(const std::string &input){	return (float) atof(input.c_str());}//! Specialisation to understand yes/no y/n true/false 0/1template<>bool Scan<bool>(const std::string &input);//! convert vectors of string to vectors of type T variablestemplate<typename T>inline std::vector<T> Scan(const std::vector< std::string > &input){	std::vector<T> output(input.size());	for (size_t i = 0 ; i < input.size() ; i++)	{		output[i] = Scan<T>( input[i] );	}	return output;}/** tokenise input string to vector of string. each element has been separated by a character in the delimiters argument. 		The separator can only be 1 character long. The default delimiters are space or tab*/inline std::vector<std::string> Tokenize(const std::string& str,																				 const std::string& delimiters = " \t"){	std::vector<std::string> tokens;	// Skip delimiters at beginning.	std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);	// Find first "non-delimiter".	std::string::size_type pos     = str.find_first_of(delimiters, lastPos);	while (std::string::npos != pos || std::string::npos != lastPos)	{		// Found a token, add it to the vector.		tokens.push_back(str.substr(lastPos, pos - lastPos));		// Skip delimiters.  Note the "not_of"		lastPos = str.find_first_not_of(delimiters, pos);		// Find next "non-delimiter"		pos = str.find_first_of(delimiters, lastPos);	}	return tokens;}//! tokenise input string to vector of type Ttemplate<typename T>inline std::vector<T> Tokenize( const std::string &input															 , const std::string& delimiters = " \t") {	std::vector<std::string> stringVector = Tokenize(input, delimiters);	return Scan<T>( stringVector );}inline std::vector<std::string> TokenizeMultiCharSeparator(																				const std::string& str,																				const std::string& separator){	std::vector<std::string> tokens;	size_t pos = 0;	// Find first "non-delimiter".	std::string::size_type nextPos     = str.find(separator, pos);	while (nextPos != std::string::npos)	{		// Found a token, add it to the vector.		tokens.push_back(str.substr(pos, nextPos - pos));		// Skip delimiters.  Note the "not_of"		pos = nextPos + separator.size();		// Find next "non-delimiter"		nextPos	= str.find(separator, pos);	}	tokens.push_back(str.substr(pos, nextPos - pos));	return tokens;}/** * Convert vector of type T to string */template <typename T>std::string Join(const std::string& delimiter, const std::vector<T>& items){	std::ostringstream outstr;	if(items.size() == 0) return "";	outstr << items[0];	for(unsigned int i = 1; i < items.size(); i++) 		outstr << delimiter << items[i];	return outstr.str();}//! transform prob to natural log scoreinline float TransformScore(float prob){	return log(prob);}//! transform natural log score to prob. Not currently used inline float UntransformScore(float score){	return exp(score);}//! irst number are in log 10, transform to natural loginline float TransformIRSTScore(float irstScore){ 	return irstScore * 2.30258509299405f;}inline float UntransformIRSTScore(float logNScore){ // opposite of above	return logNScore / 2.30258509299405f;}//! make sure score doesn't fall below LOWEST_SCOREinline float FloorScore(float logScore){	return (std::max)(logScore , LOWEST_SCORE);}//! Should SRI & IRST transform functions be merged ???inline float TransformSRIScore(float sriScore){	return sriScore * 2.30258509299405f;}inline float UntransformSRIScore(float logNScore){ // opposite of above	return logNScore / 2.30258509299405f;}/** convert prob vector to log prob and calc inner product with weight vector. * At least, that's what I think it does, fn is only 9 lines but can't figure out what it does. * Not sure whether give zens a medal for being a genius, or shoot him for writing unreadable code. Mabe both... */inline float CalcTranslationScore(const std::vector<float> &probVector, 																	const std::vector<float> &weightT) {	assert(weightT.size()==probVector.size());	float rv=0.0;	for(float const *sb=&probVector[0],*se=sb+probVector.size(),*wb=&weightT[0];			sb!=se; ++sb, ++wb)		rv += TransformScore(*sb) * (*wb);	return rv;}/** declaration of ToString() function to go in header for each class.  *	This function, as well as the operator<< fn for each class, is  *	for debugging purposes only. The output format is likely to change from  *	time-to-time as classes are updated so shouldn't be relied upon  *	for any decoding algorithm*/#define TO_STRING()	 std::string ToString() const;//! definition of ToString() function to go in .cpp file. Can be used for any class that can be piped to a stream#define TO_STRING_BODY(CLASS) 	\	std::string CLASS::ToString() const	\	{															\		std::stringstream out;			\		out << *this;								\		return out.str();						\	}															\//! delete and remove every element of a collection object such as map, set, list etctemplate<class COLL>void RemoveAllInColl(COLL &coll){	for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)	{		delete (*iter);	}	coll.clear();}//! x-platform reference to temp folderstd::string GetTempFolder();//! Create temp file and return output stream and full file path as argumentsvoid CreateTempFile(std::ofstream  &fileStream, std::string &filePath);//! MD5 hash of a filestd::string GetMD5Hash(const std::string &filePath);//! save memory by getting rid of spare, unused elements in a collectiontemplate<typename T> inline void ShrinkToFit(T& v) {  if(v.capacity()>v.size())   	T(v).swap(v);  assert(v.capacity()==v.size());}bool FileExists(const std::string& filePath);//! delete white spaces at beginning and end of stringconst std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");const std::string ToLower(const std::string& str);// A couple of utilities to measure decoding timevoid ResetUserTime();void PrintUserTime(const std::string &message);// dump SGML parser for <seg> tags#include <map>std::map<std::string, std::string> ProcessAndStripSGML(std::string &line);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -