📄 xmpscanner.cpp
字号:
// =================================================================================================// Copyright 2002-2007 Adobe Systems Incorporated// All Rights Reserved.//// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms// of the Adobe license agreement accompanying it.//// Adobe patent application tracking #P435, entitled 'Unique markers to simplify embedding data of// one format in a file with a different format', inventors: Sean Parent, Greg Gilley.// =================================================================================================#if WIN32 // The VC++ debugger can't handle long symbol names. #pragma warning ( disable : 4786 )#endif#include "XMPScanner.hpp"#include <cassert>#include <string>#include <cstdlib>#if DEBUG #include <iostream> #include <iomanip> #include <fstream>#endif#ifndef UseStringPushBack // VC++ 6.x does not provide push_back for strings! #define UseStringPushBack 0#endifusing namespace std;// *** Consider Boyer-Moore style search for "<?xpacket begin=". It isn't an obvious win, the// *** additional code might be slower than scanning every character. Especially if we will// *** read every cache line anyway.// =================================================================================================// =================================================================================================// class PacketMachine// ===================//// This is the packet recognizer state machine. The top of the machine is FindNextPacket, this// calls the specific state components and handles transitions. The states are described by an// array of RecognizerInfo records, indexed by the RecognizerKind enumeration. Each RecognizerInfo// record has a function that does that state's work, the success and failure transition states,// and a string literal that is passed to the state function. The literal lets a common MatchChar// or MatchString function be used in several places.//// The state functions are responsible for consuming input to recognize their particular state.// This includes intervening nulls for 16 and 32 bit character forms. For the simplicity, things// are treated as essentially little endian and the nulls are not actually checked. The opening// '<' is found with a byte-by-byte search, then the number of bytes per character is determined// by counting the following nulls. From then on, consuming a character means incrementing the// buffer pointer by the number of bytes per character. Thus the buffer pointer only points to// the "real" bytes. This also means that the pointer can go off the end of the buffer by a// variable amount. The amount of overrun is saved so that the pointer can be positioned at the// right byte to start the next buffer.//// The state functions return a TriState value, eTriYes means the pattern was found, eTriNo means// the pattern was definitely not found, eTriMaybe means that the end of the buffer was reached// while working through the pattern.//// When eTriYes is returned, the fBufferPtr data member is left pointing to the "real" byte// following the last actual byte. Which might not be addressable memory! This also means that// a state function can be entered with nothing available in the buffer. When eTriNo is returned,// the fBufferPtr data member is left pointing to the byte that caused the failure. The state // machine starts over from the failure byte.//// The state functions must preserve their internal micro-state before returning eTriMaybe, and// resume processing when called with the next buffer. The fPosition data member is used to denote// how many actual characters have been consumed. The fNullCount data member is used to denote how// many nulls are left before the next actual character.// =================================================================================================// PacketMachine// =============XMPScanner::PacketMachine::PacketMachine ( XMP_Int64 bufferOffset, const void * bufferOrigin, XMP_Int64 bufferLength ) : // Public members fPacketStart ( 0 ), fPacketLength ( 0 ), fBytesAttr ( -1 ), fCharForm ( eChar8Bit ), fAccess ( ' ' ), fBogusPacket ( false ), // Private members fBufferOffset ( bufferOffset ), fBufferOrigin ( (const char *) bufferOrigin ), fBufferPtr ( fBufferOrigin ), fBufferLimit ( fBufferOrigin + bufferLength ), fRecognizer ( eLeadInRecognizer ), fPosition ( 0 ), fBytesPerChar ( 1 ), fBufferOverrun ( 0 ), fQuoteChar ( ' ' ){ /* REVIEW NOTES : Should the buffer stuff be in a class? */ assert ( bufferOrigin != NULL ); assert ( bufferLength != 0 ); } // PacketMachine// =================================================================================================// ~PacketMachine// ==============XMPScanner::PacketMachine::~PacketMachine (){ // An empty placeholder.} // ~PacketMachine// =================================================================================================// AssociateBuffer// ===============voidXMPScanner::PacketMachine::AssociateBuffer ( XMP_Int64 bufferOffset, const void * bufferOrigin, XMP_Int64 bufferLength ){ fBufferOffset = bufferOffset; fBufferOrigin = (const char *) bufferOrigin; fBufferPtr = fBufferOrigin + fBufferOverrun; fBufferLimit = fBufferOrigin + bufferLength; } // AssociateBuffer// =================================================================================================// ResetMachine// ============voidXMPScanner::PacketMachine::ResetMachine (){ fRecognizer = eLeadInRecognizer; fPosition = 0; fBufferOverrun = 0; fCharForm = eChar8Bit; fBytesPerChar = 1; fAccess = ' '; fBytesAttr = -1; fBogusPacket = false; fAttrName.erase ( fAttrName.begin(), fAttrName.end() ); fAttrValue.erase ( fAttrValue.begin(), fAttrValue.end() ); fEncodingAttr.erase ( fEncodingAttr.begin(), fEncodingAttr.end() ); } // ResetMachine// =================================================================================================// FindLessThan// ============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::FindLessThan ( PacketMachine * ths, const char * which ){ if ( *which == 'H' ) { // -------------------------------------------------------------------------------- // We're looking for the '<' of the header. If we fail there is no packet in this // part of the input, so return eTriNo. ths->fCharForm = eChar8Bit; // We might have just failed from a bogus 16 or 32 bit case. ths->fBytesPerChar = 1; while ( ths->fBufferPtr < ths->fBufferLimit ) { // Don't skip nulls for the header's '<'! if ( *ths->fBufferPtr == '<' ) break; ths->fBufferPtr++; } if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriNo; ths->fBufferPtr++; return eTriYes; } else { // -------------------------------------------------------------------------------- // We're looking for the '<' of the trailer. We're already inside the packet body, // looking for the trailer. So here if we fail we must return eTriMaybe so that we // keep looking for the trailer in the next buffer. const int bytesPerChar = ths->fBytesPerChar; while ( ths->fBufferPtr < ths->fBufferLimit ) { if ( *ths->fBufferPtr == '<' ) break; ths->fBufferPtr += bytesPerChar; } if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; ths->fBufferPtr += bytesPerChar; return eTriYes; }} // FindLessThan// =================================================================================================// MatchString// ===========XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::MatchString ( PacketMachine * ths, const char * literal ){ const int bytesPerChar = ths->fBytesPerChar; const char * litPtr = literal + ths->fPosition; const int charsToGo = strlen ( literal ) - ths->fPosition; int charsDone = 0; while ( (charsDone < charsToGo) && (ths->fBufferPtr < ths->fBufferLimit) ) { if ( *litPtr != *ths->fBufferPtr ) return eTriNo; charsDone++; litPtr++; ths->fBufferPtr += bytesPerChar; } if ( charsDone == charsToGo ) return eTriYes; ths->fPosition += charsDone; return eTriMaybe;} // MatchString// =================================================================================================// MatchChar// =========XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::MatchChar ( PacketMachine * ths, const char * literal ){ const int bytesPerChar = ths->fBytesPerChar; if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const char currChar = *ths->fBufferPtr; if ( currChar != *literal ) return eTriNo; ths->fBufferPtr += bytesPerChar; return eTriYes;} // MatchChar// =================================================================================================// MatchOpenQuote// ==============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::MatchOpenQuote ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const char currChar = *ths->fBufferPtr; if ( (currChar != '\'') && (currChar != '"') ) return eTriNo; ths->fQuoteChar = currChar; ths->fBufferPtr += bytesPerChar; return eTriYes;} // MatchOpenQuote// =================================================================================================// MatchCloseQuote// ===============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::MatchCloseQuote ( PacketMachine * ths, const char * /* unused */ ){ return MatchChar ( ths, &ths->fQuoteChar );} // MatchCloseQuote// =================================================================================================// CaptureAttrName// ===============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::CaptureAttrName ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; char currChar; if ( ths->fPosition == 0 ) { // Get the first character in the name. if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; currChar = *ths->fBufferPtr; if ( ths->fAttrName.size() == 0 ) { if ( ! ( ( ('a' <= currChar) && (currChar <= 'z') ) || ( ('A' <= currChar) && (currChar <= 'Z') ) || (currChar == '_') || (currChar == ':') ) ) { return eTriNo; } } ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() ); #if UseStringPushBack ths->fAttrName.push_back ( currChar ); #else ths->fAttrName.insert ( ths->fAttrName.end(), currChar ); #endif ths->fBufferPtr += bytesPerChar; } while ( ths->fBufferPtr < ths->fBufferLimit ) { // Get the remainder of the name. currChar = *ths->fBufferPtr; if ( ! ( ( ('a' <= currChar) && (currChar <= 'z') ) || ( ('A' <= currChar) && (currChar <= 'Z') ) || ( ('0' <= currChar) && (currChar <= '9') ) || (currChar == '-') || (currChar == '.') || (currChar == '_') || (currChar == ':') ) ) { break; } #if UseStringPushBack ths->fAttrName.push_back ( currChar ); #else ths->fAttrName.insert ( ths->fAttrName.end(), currChar ); #endif ths->fBufferPtr += bytesPerChar; } if ( ths->fBufferPtr < ths->fBufferLimit ) return eTriYes; ths->fPosition = ths->fAttrName.size(); // The name might span into the next buffer. return eTriMaybe;} // CaptureAttrName// =================================================================================================// CaptureAttrValue// ================//// Recognize the equal sign and the quoted string value, capture the value along the way.XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::CaptureAttrValue ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; char currChar = 0; TriState result = eTriMaybe; if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -