xmlreader.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,501 行 · 第 1/4 页
CPP
1,501 行
fCharSizeBuf[fCharsAvail] = 0; fCharOfsBuf[fCharsAvail] = 0; fCharBuf[fCharsAvail++] = chSpace; }}XMLReader::~XMLReader(){ fMemoryManager->deallocate(fEncodingStr); fMemoryManager->deallocate(fPublicId); fMemoryManager->deallocate(fSystemId); delete fStream; delete fTranscoder;}// ---------------------------------------------------------------------------// XMLReader: Character buffer management methods// ---------------------------------------------------------------------------unsigned int XMLReader::getSrcOffset() const{ if (!fSrcOfsSupported || !fCalculateSrcOfs) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_SrcOfsNotSupported, fMemoryManager); // // Take the current source offset and add in the sizes that we've // eaten from the source so far. // if( fCharIndex == 0 ) { return fSrcOfsBase; } if( fCharIndex < fCharsAvail ) { return (fSrcOfsBase + fCharOfsBuf[fCharIndex]); } return (fSrcOfsBase + fCharOfsBuf[fCharIndex-1] + fCharSizeBuf[fCharIndex-1]);}bool XMLReader::refreshCharBuffer(){ // If the no more flag is set, then don't both doing anything if (fNoMore) return false; unsigned int startInd; // See if we have any existing chars. const unsigned int spareChars = fCharsAvail - fCharIndex; // If we are full, then don't do anything. if (spareChars == kCharBufSize) return true; // // If no transcoder has been created yet, then we never saw the // any encoding="" string and the encoding was not forced, so lets // create one now. We know that it won't change now. // // However, note that if we autosensed EBCDIC, then we have to // consider it an error if we never got an encoding since we don't // know what variant of EBCDIC it is. // if (!fTranscoder) { if (fEncoding == XMLRecognizer::EBCDIC) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_EncodingStrRequired, fMemoryManager); // Ask the transcoding service to make use a transcoder XMLTransService::Codes failReason; fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor ( fEncodingStr , failReason , kCharBufSize , fMemoryManager ); if (!fTranscoder) { ThrowXMLwithMemMgr1 ( TranscodingException , XMLExcepts::Trans_CantCreateCvtrFor , fEncodingStr , fMemoryManager ); } } // // Add the number of source bytes eaten so far to the base src // offset member. // if (fCalculateSrcOfs) { for (startInd = 0; startInd < fCharIndex; startInd++) fSrcOfsBase += fCharSizeBuf[startInd]; } // // If there are spare chars, then move then down to the bottom. We // have to move the char sizes down also. // startInd = 0; if (spareChars) { for (unsigned int index = fCharIndex; index < fCharsAvail; index++) { fCharBuf[startInd] = fCharBuf[index]; fCharSizeBuf[startInd] = fCharSizeBuf[index]; startInd++; } } // // And then get more chars, starting after any spare chars that were // left over from the last time. // fCharsAvail = xcodeMoreChars ( &fCharBuf[startInd] , &fCharSizeBuf[startInd] , kCharBufSize - spareChars ); // Add back in the spare chars fCharsAvail += spareChars; // Reset the buffer index to zero, so we start from the 0th char again fCharIndex = 0; // // If no chars available, then we have to check for one last thing. If // this is reader for a PE and its not being expanded inside a literal, // then unget a trailing space. We use a boolean to avoid triggering // this more than once. // if (!fCharsAvail && (fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral) && !fSentTrailingSpace) { fCharBuf[0] = chSpace; fCharsAvail = 1; fSentTrailingSpace = true; } // // If we are on our first block of chars and the encoding is one of the // UTF-16 formats, then check the first char for the BOM and skip over // it manually. // if (fCharsAvail) { if ((fCurLine == 1) && (fCurCol == 1)) { if (((fEncoding == XMLRecognizer::UTF_16L) || (fEncoding == XMLRecognizer::UTF_16B)) && !startInd) { if ((fCharBuf[startInd] == chUnicodeMarker) || (fCharBuf[startInd] == chSwappedUnicodeMarker)) { fCharIndex++; } } // If there's a utf-8 BOM (0xEF 0xBB 0xBF), skip past it. else { const char* asChars = (const char*)fRawByteBuf; if ((fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen )&& (XMLString::compareNString( asChars , XMLRecognizer::fgUTF8BOM , XMLRecognizer::fgUTF8BOMLen) == 0) && !startInd) { fCharIndex += XMLRecognizer::fgUTF8BOMLen; } } } } // // If we get here with no more chars, then set the fNoMore flag which // lets us optimize and know without checking that no more chars are // available. // if (!fCharsAvail) fNoMore = true; // Calculate fCharOfsBuf using the elements from fCharBufSize if (fCalculateSrcOfs) { fCharOfsBuf[0] = 0; for (unsigned int index = 1; index < fCharsAvail; ++index) { fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1]; } } return (fCharsAvail != 0);}// ---------------------------------------------------------------------------// XMLReader: Scanning methods// ---------------------------------------------------------------------------bool XMLReader::getName(XMLBuffer& toFill, const bool token){ // Ok, first lets see if we have chars in the buffer. If not, then lets // reload. if (fCharIndex == fCharsAvail) { if (!refreshCharBuffer()) return false; } // Lets check the first char for being a first name char. If not, then // what's the point in living mannnn? Just give up now. We only do this // if its a name and not a name token that they want. if (!token) { if (fXMLVersion == XMLV1_1 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) { if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF)) return false; // Looks ok, so lets eat it and put it in our buffer toFill.append(fCharBuf[fCharIndex++]); fCurCol++; toFill.append(fCharBuf[fCharIndex++]); fCurCol++; } else { if (!isFirstNameChar(fCharBuf[fCharIndex])) return false; // Looks ok, so lets eat it and put it in our buffer. Update column also! toFill.append(fCharBuf[fCharIndex++]); fCurCol++; } } // And now we loop until we run out of data in this reader or we hit // a non-name char. do { while (fCharIndex < fCharsAvail) { // Check the current char and take it if its a name char. Else // break out. if (fXMLVersion == XMLV1_1 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) { if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF)) return !toFill.isEmpty(); toFill.append(fCharBuf[fCharIndex++]); fCurCol++; toFill.append(fCharBuf[fCharIndex++]); fCurCol++; } else { if (!isNameChar(fCharBuf[fCharIndex])) { return !toFill.isEmpty(); } toFill.append(fCharBuf[fCharIndex++]); fCurCol++; } } // If we don't get no more, then break out. } while (refreshCharBuffer()); return !toFill.isEmpty();}bool XMLReader::getSpaces(XMLBuffer& toFill){ // // We just loop until we either hit a non-space or the end of this // entity. We return true if we returned because of a non-space and // false if because of end of entity. // // NOTE: We have to maintain line/col info here and we have to do // whitespace normalization if we are not already internalized. // while (true) { // Loop through the current chars in the buffer while (fCharIndex < fCharsAvail) { // Get the current char out of the buffer XMLCh curCh = fCharBuf[fCharIndex]; // // See if its a white space char. If so, then process it. Else // we've hit a non-space and need to return. // if (isWhitespace(curCh)) { // Eat this char fCharIndex++; handleEOL(curCh, false); // Ok we can add this guy to our buffer toFill.append(curCh); } else { // Return true to indicate we broke out due to a whitespace return true; } } // // We've eaten up the current buffer, so lets try to reload it. If // we don't get anything new, then break out. If we do, then we go // back to the top to keep getting spaces. // if (!refreshCharBuffer()) break; } return false;}bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck){ while (true) { // Loop through the current chars in the buffer while (fCharIndex < fCharsAvail) { // Get the current char out of the buffer XMLCh curCh = fCharBuf[fCharIndex]; // // See if its not a white space or our target char, then process // it. Else, we need to return. // if (!isWhitespace(curCh) && (curCh != toCheck)) { // Eat this char fCharIndex++; handleEOL(curCh, false); // Add it to our buffer toFill.append(curCh); } else { return true; } } // // We've eaten up the current buffer, so lets try to reload it. If // we don't get anything new, then break out. If we do, then we go // back to the top to keep getting spaces. // if (!refreshCharBuffer()) break; } // We never hit any non-space and ate up the whole reader return false;}bool XMLReader::skipIfQuote(XMLCh& chGotten){ if (fCharIndex == fCharsAvail)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?