📄 xmpscanner.cpp
字号:
default : // Look for the closing quote. assert ( ths->fPosition == 2 ); if ( currChar != ths->fQuoteChar ) return eTriNo; ths->fBufferPtr += bytesPerChar; return eTriYes; break; } }} // CaptureAccess// =================================================================================================// RecordTailAttr// ==============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::RecordTailAttr ( PacketMachine * ths, const char * /* unused */ ){ // There are no known "general" attributes for the packet trailer. ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() ); ths->fAttrValue.erase ( ths->fAttrValue.begin(), ths->fAttrValue.end() ); return eTriYes;} // RecordTailAttr// =================================================================================================// CheckPacketEnd// ==============//// Check for trailing padding and record the packet length. We have trailing padding if the bytes// attribute is present and has a value greater than the current length.XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::CheckPacketEnd ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; if ( ths->fPosition == 0 ) { // First call, decide if there is trailing padding. const XMP_Int64 currLen64 = (ths->fBufferOffset + (ths->fBufferPtr - ths->fBufferOrigin)) - ths->fPacketStart; if ( currLen64 > 0x7FFFFFFF ) throw std::runtime_error ( "Packet length exceeds 2GB-1" ); const XMP_Int32 currLength = (XMP_Int32)currLen64; if ( (ths->fBytesAttr != -1) && (ths->fBytesAttr != currLength) ) { if ( ths->fBytesAttr < currLength ) { ths->fBogusPacket = true; // The bytes attribute value is too small. } else { ths->fPosition = ths->fBytesAttr - currLength; if ( (ths->fPosition % ths->fBytesPerChar) != 0 ) { ths->fBogusPacket = true; // The padding is not a multiple of the character size. ths->fPosition = (ths->fPosition / ths->fBytesPerChar) * ths->fBytesPerChar; } } } } while ( ths->fPosition > 0 ) { if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const char currChar = *ths->fBufferPtr; if ( (currChar != ' ') && (currChar != '\t') && (currChar != '\n') && (currChar != '\r') ) { ths->fBogusPacket = true; // The padding is not whitespace. break; // Stop the packet here. } ths->fPosition -= bytesPerChar; ths->fBufferPtr += bytesPerChar; } const XMP_Int64 currLen64 = (ths->fBufferOffset + (ths->fBufferPtr - ths->fBufferOrigin)) - ths->fPacketStart; if ( currLen64 > 0x7FFFFFFF ) throw std::runtime_error ( "Packet length exceeds 2GB-1" ); ths->fPacketLength = (XMP_Int32)currLen64; return eTriYes;} // CheckPacketEnd// =================================================================================================// CheckFinalNulls// ===============//// Do some special case processing for little endian characters. We have to make sure the presumed// nulls after the last character actually exist, i.e. that the stream does not end too soon. Note// that the prior character scanning has moved the buffer pointer to the address following the last// byte of the last character. I.e. we're already past the presumed nulls, so we can't check their// content. All we can do is verify that the stream does not end too soon.//// Doing this check is simple yet subtle. If we're still in the current buffer then the trailing// bytes obviously exist. If we're exactly at the end of the buffer then the bytes also exist.// The only question is when we're actually past this buffer, partly into the next buffer. This is// when "ths->fBufferPtr > ths->fBufferLimit" on entry. For that case we have to wait until we've// actually seen enough extra bytes of input.//// Since the normal buffer processing is already adjusting for this partial character overrun, all// that needs to be done here is wait until "ths->fBufferPtr <= ths->fBufferLimit" on entry. In// other words, if we're presently too far, ths->fBufferPtr will be adjusted by the amount of the// overflow the next time XMPScanner::Scan is called. This might still be too far, so just keep// waiting for enough data to pass by.//// Note that there is a corresponding special case for big endian characters, we must decrement the// starting offset by the number of leading nulls. But we don't do that here, we leave it to the// outer code. This is because the leading nulls might have been at the exact end of a previous// buffer, in which case we have to also decrement the length of that raw data snip.XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::CheckFinalNulls ( PacketMachine * ths, const char * /* unused */ ){ if ( (ths->fCharForm != eChar8Bit) && CharFormIsLittleEndian ( ths->fCharForm ) ) { if ( ths->fBufferPtr > ths->fBufferLimit ) return eTriMaybe; } return eTriYes;} // CheckFinalNulls// =================================================================================================// SetNextRecognizer// =================voidXMPScanner::PacketMachine::SetNextRecognizer ( RecognizerKind nextRecognizer ){ fRecognizer = nextRecognizer; fPosition = 0; } // SetNextRecognizer// =================================================================================================// FindNextPacket// ==============// *** When we start validating intervening nulls for 2 and 4 bytes characters, throw an exception// *** for errors. Don't return eTriNo, that might skip at an optional point.XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::FindNextPacket (){ TriState status; #define kPacketHead "?xpacket begin=" #define kPacketID "W5M0MpCehiHzreSzNTczkc9d" #define kPacketTail "?xpacket end=" static const RecognizerInfo recognizerTable [eRecognizerCount] = { // ! Would be safer to assign these explicitly. // proc successNext failureNext literal { NULL, eFailureRecognizer, eFailureRecognizer, NULL}, // eFailureRecognizer { NULL, eSuccessRecognizer, eSuccessRecognizer, NULL}, // eSuccessRecognizer { FindLessThan, eHeadStartRecorder, eFailureRecognizer, "H" }, // eLeadInRecognizer { RecordStart, eHeadStartRecognizer, eLeadInRecognizer, NULL }, // eHeadStartRecorder { MatchString, eBOMRecognizer, eLeadInRecognizer, kPacketHead }, // eHeadStartRecognizer { RecognizeBOM, eIDTagRecognizer, eLeadInRecognizer, NULL }, // eBOMRecognizer { MatchString, eIDOpenRecognizer, eLeadInRecognizer, " id=" }, // eIDTagRecognizer { MatchOpenQuote, eIDValueRecognizer, eLeadInRecognizer, NULL }, // eIDOpenRecognizer { MatchString, eIDCloseRecognizer, eLeadInRecognizer, kPacketID }, // eIDValueRecognizer { MatchCloseQuote, eAttrSpaceRecognizer_1, eLeadInRecognizer, NULL }, // eIDCloseRecognizer { MatchChar, eAttrNameRecognizer_1, eHeadEndRecognizer, " " }, // eAttrSpaceRecognizer_1 { CaptureAttrName, eAttrValueRecognizer_1, eLeadInRecognizer, NULL }, // eAttrNameRecognizer_1 { CaptureAttrValue, eAttrValueRecorder_1, eLeadInRecognizer, NULL }, // eAttrValueRecognizer_1 { RecordHeadAttr, eAttrSpaceRecognizer_1, eLeadInRecognizer, NULL }, // eAttrValueRecorder_1 { MatchString, eBodyRecognizer, eLeadInRecognizer, "?>" }, // eHeadEndRecognizer { FindLessThan, eTailStartRecognizer, eBodyRecognizer, "T"}, // eBodyRecognizer { MatchString, eAccessValueRecognizer, eBodyRecognizer, kPacketTail }, // eTailStartRecognizer { CaptureAccess, eAttrSpaceRecognizer_2, eBodyRecognizer, NULL }, // eAccessValueRecognizer { MatchChar, eAttrNameRecognizer_2, eTailEndRecognizer, " " }, // eAttrSpaceRecognizer_2 { CaptureAttrName, eAttrValueRecognizer_2, eBodyRecognizer, NULL }, // eAttrNameRecognizer_2 { CaptureAttrValue, eAttrValueRecorder_2, eBodyRecognizer, NULL }, // eAttrValueRecognizer_2 { RecordTailAttr, eAttrSpaceRecognizer_2, eBodyRecognizer, NULL }, // eAttrValueRecorder_2 { MatchString, ePacketEndRecognizer, eBodyRecognizer, "?>" }, // eTailEndRecognizer { CheckPacketEnd, eCloseOutRecognizer, eBodyRecognizer, "" }, // ePacketEndRecognizer { CheckFinalNulls, eSuccessRecognizer, eBodyRecognizer, "" } // eCloseOutRecognizer }; while ( true ) { switch ( fRecognizer ) { case eFailureRecognizer : return eTriNo; case eSuccessRecognizer : return eTriYes; default : // ------------------------------------------------------------------- // For everything else, the normal cases, use the state machine table. const RecognizerInfo * thisState = &recognizerTable [fRecognizer]; status = thisState->proc ( this, thisState->literal ); switch ( status ) { case eTriNo : SetNextRecognizer ( thisState->failureNext ); continue; case eTriYes : SetNextRecognizer ( thisState->successNext ); continue; case eTriMaybe : fBufferOverrun = fBufferPtr - fBufferLimit; return eTriMaybe; // Keep this recognizer intact, to be resumed later. } } // switch ( fRecognizer ) { ... } // while ( true ) { ...} // FindNextPacket// =================================================================================================// =================================================================================================// class InternalSnip// ==================// =================================================================================================// InternalSnip// ============XMPScanner::InternalSnip::InternalSnip ( XMP_Int64 offset, XMP_Int64 length ){ fInfo.fOffset = offset; fInfo.fLength = length; } // InternalSnip// =================================================================================================// InternalSnip// ============XMPScanner::InternalSnip::InternalSnip ( const InternalSnip & rhs ) : fInfo ( rhs.fInfo ), fMachine ( NULL ){ assert ( rhs.fMachine.get() == NULL ); // Don't copy a snip with a machine. assert ( (rhs.fInfo.fEncodingAttr == 0) || (*rhs.fInfo.fEncodingAttr == 0) ); // Don't copy a snip with an encoding.} // InternalSnip// =================================================================================================// ~InternalSnip// =============XMPScanner::InternalSnip::~InternalSnip (){} // ~InternalSnip// =================================================================================================// =================================================================================================// class XMPScanner// ================// =================================================================================================// DumpSnipList// ============#if DEBUGstatic const char * snipStateName [6] = { "not-seen", "pending", "raw-data", "good-packet", "partial", "bad-packet" };voidXMPScanner::DumpSnipList ( const char * title ){ InternalSnipIterator currPos = fInternalSnips.begin(); InternalSnipIterator endPos = fInternalSnips.end(); cout << endl << title << " snip list: " << fInternalSnips.size() << endl; for ( ; currPos != endPos; ++currPos ) { SnipInfo * currSnip = &currPos->fInfo; cout << '\t' << currSnip << ' ' << snipStateName[currSnip->fState] << ' ' << currSnip->fOffset << ".." << (currSnip->fOffset + currSnip->fLength - 1) << ' ' << currSnip->fLength << ' ' << endl; }} // DumpSnipList#endif// =================================================================================================// PrevSnip and NextSnip// =====================XMPScanner::InternalSnipIteratorXMPScanner::PrevSnip ( InternalSnipIterator snipPos ){ InternalSnipIterator prev = snipPos; return --prev;} // PrevSnipXMPScanner::InternalSnipIteratorXMPScanner::NextSnip ( InternalSnipIterator snipPos ){ InternalSnipIterator next = snipPos; return ++next;} // NextSnip// =================================================================================================// XMPScanner// ==========//// Initialize the scanner object with one "not seen" snip covering the whole stream.XMPScanner::XMPScanner ( XMP_Int64 streamLength ) : fStreamLength ( streamLength ) { InternalSnip rootSnip ( 0, streamLength ); if ( streamLength > 0 ) fInternalSnips.push_front ( rootSnip ); // Be nice for empty files. // DumpSnipList ( "New XMPScanner" ); } // XMPScanner// =================================================================================================// ~XMPScanner// ===========XMPScanner::~XMPScanner(){ } // ~XMPScanner
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -