macosunicodeconverter.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,049 行 · 第 1/3 页
CPP
1,049 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: MacOSUnicodeConverter.cpp,v 1.23 2004/09/08 13:56:46 peiyongz Exp $ */ // ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/util/XercesDefs.hpp>#include <algorithm>#include <cstddef>#include <cstring>#if defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T) // Only used under metrowerks. #include <cwctype>#endif#if defined(__APPLE__) // Framework includes from ProjectBuilder #include <CoreServices/CoreServices.h>#else // Classic includes otherwise #include <MacErrors.h> #include <Script.h> #include <TextUtils.h> #include <TextEncodingConverter.h> #include <TextCommon.h> #include <CodeFragments.h> #include <UnicodeConverter.h> #include <UnicodeUtilities.h> #include <CFCharacterSet.h> #include <CFString.h>#endif#include <xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLUni.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/TranscodingException.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/Janitor.hpp>#include <xercesc/util/Platforms/MacOS/MacOSPlatformUtils.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// Typedefs// ---------------------------------------------------------------------------// TempBufs are used for cases where we need a temporary buffer while processing.const std::size_t kTempBufCount = 512;typedef char TempCharBuf[kTempBufCount];typedef UniChar TempUniBuf[kTempBufCount];typedef XMLCh TempXMLBuf[kTempBufCount];// ---------------------------------------------------------------------------// Local, const data// ---------------------------------------------------------------------------static const XMLCh MacOSUnicodeConverter::fgMyServiceId[] ={ chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chNull};static const XMLCh MacOSUnicodeConverter::fgMacLCPEncodingName[] ={ chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chLatin_L , chLatin_C, chLatin_P, chLatin_E, chLatin_c, chLatin_o, chLatin_d , chLatin_i, chLatin_n, chLatin_g, chNull};// ---------------------------------------------------------------------------// MacOSUnicodeConverter: Constructors and Destructor// ---------------------------------------------------------------------------MacOSUnicodeConverter::MacOSUnicodeConverter(){ // Test for presense of unicode collation functions mHasUnicodeCollation = (UCCompareTextDefault != (void*)kUnresolvedCFragSymbolAddress);}MacOSUnicodeConverter::~MacOSUnicodeConverter(){}// ---------------------------------------------------------------------------// MacOSUnicodeConverter: The virtual transcoding service API// ---------------------------------------------------------------------------int MacOSUnicodeConverter::compareIString( const XMLCh* const comp1 , const XMLCh* const comp2){ // If unicode collation routines are available, use them. // This should be the case on Mac OS 8.6 and later, // with Carbon 1.0.2 or later, and under Mac OS X. // // Otherwise, but only for Metrowerks, since only Metrowerks // has a c library with a valid set of wchar routines, // fall back to the standard library. if (mHasUnicodeCollation) { // Use the Unicode Utilities to do the compare UCCollateOptions collateOptions = kUCCollateComposeInsensitiveMask | kUCCollateWidthInsensitiveMask | kUCCollateCaseInsensitiveMask | kUCCollatePunctuationSignificantMask ; std::size_t cnt1 = XMLString::stringLen(comp1); std::size_t cnt2 = XMLString::stringLen(comp2); Boolean equivalent = false; SInt32 order = 0; OSStatus status = UCCompareTextDefault( collateOptions, reinterpret_cast<const UniChar*>(comp1), cnt1, reinterpret_cast<const UniChar*>(comp2), cnt2, &equivalent, &order ); return ((status != noErr) || equivalent) ? 0 : order; }#if defined(XML_METROWERKS) else { const XMLCh* cptr1 = comp1; const XMLCh* cptr2 = comp2; while ( (*cptr1 != 0) && (*cptr2 != 0) ) { std::wint_t wch1 = std::towupper(*cptr1); std::wint_t wch2 = std::towupper(*cptr2); if (wch1 != wch2) break; cptr1++; cptr2++; } return (int) (std::towupper(*cptr1) - std::towupper(*cptr2)); }#else else { // For some reason there is no platform utils available // where we expect it. Bail. XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService); return 0; }#endif}int MacOSUnicodeConverter::compareNIString( const XMLCh* const comp1 , const XMLCh* const comp2 , const unsigned int maxChars){ // If unicode collation routines are available, use them. // This should be the case on Mac OS 8.6 and later, // with Carbon 1.0.2 or later, and under Mac OS X. // // Otherwise, but only for Metrowerks, since only Metrowerks // has a c library with a valid set of wchar routines, // fall back to the standard library. if (mHasUnicodeCollation) { // Use the Unicode Utilities to do the compare UCCollateOptions collateOptions = kUCCollateComposeInsensitiveMask | kUCCollateWidthInsensitiveMask | kUCCollateCaseInsensitiveMask | kUCCollatePunctuationSignificantMask ; std::size_t cnt1 = XMLString::stringLen(comp1); std::size_t cnt2 = XMLString::stringLen(comp2); // Restrict view of source characters to first {maxChars} if (cnt1 > maxChars) cnt1 = maxChars; if (cnt2 > maxChars) cnt2 = maxChars; // Do multiple passes over source, comparing each pass. // The first pass that's not equal wins. Boolean equivalent = false; SInt32 order = 0; OSStatus status = UCCompareTextDefault( collateOptions, reinterpret_cast<const UniChar*>(comp1), cnt1, reinterpret_cast<const UniChar*>(comp2), cnt2, &equivalent, &order ); return ((status != noErr) || equivalent) ? 0 : order; }#if defined(XML_METROWERKS) else { unsigned int n = 0; const XMLCh* cptr1 = comp1; const XMLCh* cptr2 = comp2; while ( (*cptr1 != 0) && (*cptr2 != 0) && (n < maxChars) ) { std::wint_t wch1 = std::towupper(*cptr1); std::wint_t wch2 = std::towupper(*cptr2); if (wch1 != wch2) break; cptr1++; cptr2++; n++; } return (int)(std::towupper(*cptr1) - std::towupper(*cptr2)); }#else else { // For some reason there is no platform utils available // where we expect it. Bail. XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService); return 0; }#endif}const XMLCh* MacOSUnicodeConverter::getId() const{ return fgMyServiceId;}bool MacOSUnicodeConverter::isSpace(const XMLCh toCheck) const{#if TARGET_API_MAC_CARBON // Return true if the specified character is in the set. CFCharacterSetRef wsSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline); return CFCharacterSetIsCharacterMember(wsSet, toCheck);#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T) // Use this if there's a reasonable c library available. // ProjectBuilder currently has no support for iswspace ;( return (std::iswspace(toCheck) != 0);#endif}TextEncodingMacOSUnicodeConverter::discoverLCPEncoding(){ TextEncoding encoding = 0; // Ask the OS for the best text encoding for this application // We would call GetApplicationTextEncoding(), but it's available only in // Carbon (not CarbonCore), and we try to link with frameworks only in CoreServices. // encoding = GetApplicationTextEncoding(); // Get TextEncoding for the current Mac System Script, falling back to Mac Roman if (noErr != UpgradeScriptInfoToTextEncoding( smSystemScript, kTextLanguageDontCare, kTextRegionDontCare, NULL, &encoding)) encoding = CreateTextEncoding(kTextEncodingMacRoman, kTextEncodingDefaultVariant, kTextEncodingDefaultFormat); // Traditionally, the Mac transcoder has used the current system script // as the LCP text encoding. // // As of Xerces 2.6, this continues to be the case if XML_MACOS_LCP_TRADITIONAL // is defined. // // Otherwise, but only for Mac OS X, utf-8 will be used instead. // Since posix paths are utf-8 encoding on OS X, and the OS X // terminal uses utf-8 by default, this seems to make the most sense. #if !defined(XML_MACOS_LCP_TRADITIONAL) if (gMacOSXOrBetter) { // Manufacture a text encoding for UTF8 encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicodeUTF8Format); } #endif return encoding;}XMLLCPTranscoder* MacOSUnicodeConverter::makeNewLCPTranscoder(){ XMLLCPTranscoder* result = NULL; OSStatus status = noErr; MemoryManager* manager = XMLPlatformUtils::fgMemoryManager; // Discover the text encoding to use for the LCP TextEncoding lcpTextEncoding = discoverLCPEncoding(); // We implement the LCP transcoder in terms of the XMLTranscoder. // Create an XMLTranscoder for this encoding XMLTransService::Codes resValue; XMLTranscoder* xmlTrans = makeNewXMLTranscoder(fgMacLCPEncodingName, resValue, kTempBufCount, lcpTextEncoding, manager); if (xmlTrans) { // Pass the XMLTranscoder over to the LPC transcoder if (resValue == XMLTransService::Ok) result = new MacOSLCPTranscoder(xmlTrans, manager); else delete xmlTrans; } return result;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?