xmlformatter.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 706 行 · 第 1/2 页

CPP
706
字号
    fIsXML11 = false;  // docVersion 1.0 is not 1.1!}XMLFormatter::~XMLFormatter(){    fMemoryManager->deallocate(fAposRef); //delete [] fAposRef;    fMemoryManager->deallocate(fAmpRef); //delete [] fAmpRef;    fMemoryManager->deallocate(fGTRef); //delete [] fGTRef;    fMemoryManager->deallocate(fLTRef); //delete [] fLTRef;    fMemoryManager->deallocate(fQuoteRef); //delete [] fQuoteRef;    fMemoryManager->deallocate(fOutEncoding); //delete [] fOutEncoding;    delete fXCoder;    // We DO NOT own the target object!}// ---------------------------------------------------------------------------//  XMLFormatter: Formatting methods// ---------------------------------------------------------------------------voidXMLFormatter::formatBuf(const   XMLCh* const    toFormat                        , const unsigned int    count                        , const EscapeFlags     escapeFlags                        , const UnRepFlags      unrepFlags){    //    //  Figure out the actual escape flag value. If the parameter is not    //  the default, then take it. Else take the current default.    //    const EscapeFlags actualEsc = (escapeFlags == DefaultEscape)                                ? fEscapeFlags : escapeFlags;    // And do the same for the unrep flags    const UnRepFlags  actualUnRep = (unrepFlags == DefaultUnRep)                                    ? fUnRepFlags : unrepFlags;    //    //  If the actual unrep action is that they want to provide char refs    //  for unrepresentable chars, then this one is a much more difficult    //  one to do cleanly, and we handle it separately.    //    if (actualUnRep == UnRep_CharRef)    {        specialFormat(toFormat, count, actualEsc);        return;    }    //    //  If we don't have any escape flags set, then we can do the most    //  efficient loop, else we have to do it the hard way.    //    const XMLCh*    srcPtr = toFormat;    const XMLCh*    endPtr = toFormat + count;    if (actualEsc == NoEscapes)    {        //        //  Just do a whole buffer at a time into the temp buffer, cap         //  it off, and send it to the target.         //        if (srcPtr < endPtr)           srcPtr += handleUnEscapedChars(srcPtr, endPtr - srcPtr, actualUnRep);     }     else    {        //        //  Escape chars that require it according tot he scale flags         //  we were given. For the others, try to accumulate them and         //  format them in as big as bulk as we can.         //        while (srcPtr < endPtr)        {            //            //  Run a temp pointer up until we hit a character that we have            //  to escape. Then we can convert all the chars between our            //  current source pointer and here all at once.            //            const XMLCh* tmpPtr = srcPtr;            while ((tmpPtr < endPtr) && !inEscapeList(actualEsc, *tmpPtr))                tmpPtr++;            //            //  If we got any chars, then lets convert them and write them            //  out.            //            if (tmpPtr > srcPtr)               srcPtr += handleUnEscapedChars(srcPtr, tmpPtr - srcPtr,                                                actualUnRep);              else if (tmpPtr < endPtr)            {                //                //  Ok, so we've hit a char that must be escaped. So do                //  this one specially.                //                const XMLByte * theChars;                                switch (*srcPtr) {                     case chAmpersand :                        theChars = getCharRef(fAmpLen, fAmpRef, gAmpRef);                         fTarget->writeChars(theChars, fAmpLen, this);                        break;                    case chSingleQuote :                        theChars = getCharRef(fAposLen, fAposRef, gAposRef);                         fTarget->writeChars(theChars, fAposLen, this);                        break;                    case chDoubleQuote :                        theChars = getCharRef(fQuoteLen, fQuoteRef, gQuoteRef);                         fTarget->writeChars(theChars, fQuoteLen, this);                        break;                    case chCloseAngle :                        theChars = getCharRef(fGTLen, fGTRef, gGTRef);                         fTarget->writeChars(theChars, fGTLen, this);                        break;                    case chOpenAngle :                        theChars = getCharRef(fLTLen, fLTRef, gLTRef);                         fTarget->writeChars(theChars, fLTLen, this);                        break;                    default:                        // control characters                        writeCharRef(*srcPtr);                        break;                }                srcPtr++;            }        }    }}unsigned int XMLFormatter::handleUnEscapedChars(const XMLCh *                  srcPtr,                                    const unsigned int             oCount,                                    const UnRepFlags               actualUnRep) {    //   //  Use that to figure out what I should pass to the transcoder. If we   //  are doing character references or failing for unrepresentable chars,   //  then we just throw, since we should never get a call for something   //  we cannot represent. Else, we tell it to just use the replacement   //  char.   //   const XMLTranscoder::UnRepOpts unRepOpts = (actualUnRep == UnRep_Replace)                                             ? XMLTranscoder::UnRep_RepChar                                             : XMLTranscoder::UnRep_Throw;                                             	   unsigned int charsEaten;    unsigned int count = oCount;     while (count) {       const unsigned srcChars           = count > kTmpBufSize ? kTmpBufSize : count;        const unsigned int outBytes           = fXCoder->transcodeTo(srcPtr, srcChars,                                  fTmpBuf, kTmpBufSize,                                 charsEaten, unRepOpts);        if (outBytes) {          fTmpBuf[outBytes]     = 0; fTmpBuf[outBytes + 1] = 0;          fTmpBuf[outBytes + 2] = 0; fTmpBuf[outBytes + 3] = 0;          fTarget->writeChars(fTmpBuf, outBytes, this);       }        srcPtr += charsEaten;       count  -= charsEaten;    }        return oCount; // This should be an assertion that count == 0. }   XMLFormatter& XMLFormatter::operator<<(const XMLCh* const toFormat){    const unsigned int len = XMLString::stringLen(toFormat);    formatBuf(toFormat, len);    return *this;}XMLFormatter& XMLFormatter::operator<<(const XMLCh toFormat){    // Make a temp string format that    XMLCh szTmp[2];    szTmp[0] = toFormat;    szTmp[1] = 0;    formatBuf(szTmp, 1);    return *this;}/** * the parameter, count, is needed since stringLen() * does not work on a BOM like "0xFE0xFF0x000x00" or * "0x000x000xFF0xFE" **/void XMLFormatter::writeBOM(const XMLByte* const toFormat                          , const unsigned int   count){    fTarget->writeChars(toFormat, count, this);    }// ---------------------------------------------------------------------------//  XMLFormatter: Private helper methods// ---------------------------------------------------------------------------void XMLFormatter::writeCharRef(const XMLCh &toWrite){    XMLCh tmpBuf[32];    tmpBuf[0] = chAmpersand;    tmpBuf[1] = chPound;    tmpBuf[2] = chLatin_x;    // Build a char ref for the current char    XMLString::binToText(toWrite, &tmpBuf[3], 8, 16, fMemoryManager);    const unsigned int bufLen = XMLString::stringLen(tmpBuf);    tmpBuf[bufLen] = chSemiColon;    tmpBuf[bufLen+1] = chNull;    // write it out    formatBuf(tmpBuf            , bufLen + 1            , XMLFormatter::NoEscapes            , XMLFormatter::UnRep_Fail);}void XMLFormatter::writeCharRef(unsigned long toWrite){    XMLCh tmpBuf[32];    tmpBuf[0] = chAmpersand;    tmpBuf[1] = chPound;    tmpBuf[2] = chLatin_x;    // Build a char ref for the current char    XMLString::binToText(toWrite, &tmpBuf[3], 8, 16, fMemoryManager);    const unsigned int bufLen = XMLString::stringLen(tmpBuf);    tmpBuf[bufLen] = chSemiColon;    tmpBuf[bufLen+1] = chNull;    // write it out    formatBuf(tmpBuf            , bufLen + 1            , XMLFormatter::NoEscapes            , XMLFormatter::UnRep_Fail);}const XMLByte* XMLFormatter::getCharRef(unsigned int & count,                                         XMLByte*       &ref,                                         const XMLCh *  stdRef) {   if (!ref) {        unsigned int charsEaten;       const unsigned int outBytes =            fXCoder->transcodeTo(stdRef, XMLString::stringLen(stdRef),                                 fTmpBuf, kTmpBufSize, charsEaten,                                 XMLTranscoder::UnRep_Throw);        fTmpBuf[outBytes] = 0;        fTmpBuf[outBytes + 1] = 0;       fTmpBuf[outBytes + 2] = 0;        fTmpBuf[outBytes + 3] = 0;       ref = (XMLByte*) fMemoryManager->allocate       (           (outBytes + 4) * sizeof(XMLByte)       );//new XMLByte[outBytes + 4];        memcpy(ref, fTmpBuf, outBytes + 4);        count = outBytes;    }   return ref; }void XMLFormatter::specialFormat(const  XMLCh* const    toFormat                                , const unsigned int    count                                , const EscapeFlags     escapeFlags){    //    //  We have to check each character and see if it could be represented.    //  As long as it can, we just keep up with where we started and how    //  many chars we've checked. When we hit an unrepresentable one, we    //  stop, transcode everything we've collected, then start handling    //  the unrepresentables via char refs. We repeat this until we get all    //  the chars done.    //    const XMLCh*    srcPtr = toFormat;    const XMLCh*    endPtr = toFormat + count;    while (srcPtr < endPtr)    {        const XMLCh* tmpPtr = srcPtr;        while (tmpPtr < endPtr)        {            if (fXCoder->canTranscodeTo(*tmpPtr))                tmpPtr++;            else                break;        }        if (tmpPtr > srcPtr)        {            // We got at least some chars that can be done normally            formatBuf            (                srcPtr                , tmpPtr - srcPtr                , escapeFlags                , XMLFormatter::UnRep_Fail            );            // Update the source pointer to our new spot            srcPtr = tmpPtr;        }         else        {             //  We hit something unrepresentable. So continue forward doing            //  char refs until we hit something representable again or the            //  end of input.            //            while (srcPtr < endPtr)            {                if ((*srcPtr & 0xFC00) == 0xD800) {                    // we have encountered a surrogate, need to recombine before printing out					                      // use writeCharRef that takes unsigned long to get values larger than                    // hex 0xFFFF printed.                    tmpPtr = srcPtr;                    tmpPtr++; // point at low surrogate                    writeCharRef((unsigned long) (0x10000+((*srcPtr-0xD800)<<10)+*tmpPtr-0xDC00));                    srcPtr++; // advance to low surrogate (will advance again below)                }                else {                    writeCharRef(*srcPtr);                }                // Move up the source pointer and break out if needed                srcPtr++;                if (fXCoder->canTranscodeTo(*srcPtr))                    break;            }        }    }}XERCES_CPP_NAMESPACE_END

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?