📄 utf8.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: utf8.hpp,v $ * PRODUCTION Revision 1000.1 2004/04/21 13:24:22 gouriano * PRODUCTION PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.5 * PRODUCTION * =========================================================================== */#ifndef UTIL_UTF8__H#define UTIL_UTF8__H/* $Id: utf8.hpp,v 1000.1 2004/04/21 13:24:22 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Aleksey Vinokurov, Vladimir Ivanov * * File Description: * UTF8 conversion functions * */#include <corelib/ncbistd.hpp>#include <vector>/** @addtogroup utf8 * * @{ */BEGIN_NCBI_SCOPEBEGIN_SCOPE(utf8)// For characters that could not be translated into similar ASCII-7 or// Unicode character because there is no graphically similar character in// ASCII-7 table for this one.//const char kOutrangeChar = '?';// 0xFF This means that the character should be skipped in translation to// ASCII-7. // For example, there are a lot of characters which meaning is to modify the// character next to them.const char kSkipChar = '\xFF';// Result (status) conversion Unicode symbols to characterenum EConversionStatus { eSuccess, // Success, result is good eSkip, // Result conversion == kSkipChar eOutrange // Result conversion == kOutrangeChar};// Convert first UTF-8 symbol of "src" into ASCII-7 character.// "ascii_table" specifies whether to use ASCII-7 translation tables.// Length of the retrieved UTF-8 symbol is returned in "*seq_len"// (if "seq_len" is not NULL).// Return resulting ASCII-7 character.// NOTE: If the UTF-8 symbol has no ASCII-7 equivalent, then return// kOutrangeChar or hSkipChar.//NCBI_XUTIL_EXPORTextern char StringToChar(const string& src, size_t* seq_len = 0, bool ascii_table = true, EConversionStatus* status = 0);// Convert UTF-8 string "src" into the ASCII-7 string with// graphically similar characters -- using StringToChar().// Return resulting ASCII-7 string.//NCBI_XUTIL_EXPORTextern string StringToAscii(const string& src, bool ascii_table = true);// Convert first UTF-8 symbol of "src" into a Unicode symbol code.// Length of the retrieved UTF-8 symbol is returned in "*seq_len"// (if "seq_len" is not NULL).// Return resulting Unicode symbol code.// NOTE: If the UTF-8 symbol has no Unicode equivalent, then return// kOutrangeChar or hSkipChar.//NCBI_XUTIL_EXPORTextern long StringToCode(const string& src, size_t* seq_len = 0, EConversionStatus* status = 0);// Convert UTF-8 string "src" into the vector of Unicode symbol codes// using StringToCode().// Return resulting vector.//NCBI_XUTIL_EXPORTextern vector<long> StringToVector(const string& src);// Translate Unicode symbol code "src" into graphically similar ASCII-7// character.// Return resulting ASCII-7 character.// NOTE: If the Unicode symbol has no ASCII-7 equivalent, then return// kOutrangeChar or hSkipChar.//NCBI_XUTIL_EXPORTextern char CodeToChar(const long src, EConversionStatus* status = 0); END_SCOPE(utf8)END_NCBI_SCOPE/* @} *//* * =========================================================================== * $Log: utf8.hpp,v $ * Revision 1000.1 2004/04/21 13:24:22 gouriano * PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.5 * * Revision 1.5 2004/03/11 22:55:43 gorelenk * Added export prefixes NCBI_XUTIL_EXPORT to functions. * * Revision 1.4 2003/04/17 17:50:39 siyan * Added doxygen support * * Revision 1.3 2002/01/18 19:21:52 ivanov * Polish source code * * Revision 1.2 2001/04/18 16:26:04 ivanov * Change types TUnicodeChar, TUnicodeString to simple types. * TUnicode char to long, TUnicodeString to vector<long>. * * Revision 1.1 2001/04/06 19:14:36 ivanov * Initial revision * =========================================================================== */#endif /* UTIL_UTF8__H */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -