📄 xmpscanner.cpp
字号:
switch ( ths->fPosition ) { case 0 : // The name should haved ended at the '=', nulls already skipped. if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; if ( *ths->fBufferPtr != '=' ) return eTriNo; ths->fBufferPtr += bytesPerChar; ths->fPosition = 1; // fall through OK because MatchOpenQuote will check the buffer limit and nulls ... case 1 : // Look for the open quote. result = MatchOpenQuote ( ths, NULL ); if ( result != eTriYes ) return result; ths->fPosition = 2; // fall through OK because the buffer limit and nulls are checked below ... default : // Look for the close quote, capturing the value along the way. assert ( ths->fPosition == 2 ); const char quoteChar = ths->fQuoteChar; while ( ths->fBufferPtr < ths->fBufferLimit ) { currChar = *ths->fBufferPtr; if ( currChar == quoteChar ) break; #if UseStringPushBack ths->fAttrValue.push_back ( currChar ); #else ths->fAttrValue.insert ( ths->fAttrValue.end(), currChar ); #endif ths->fBufferPtr += bytesPerChar; } if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; assert ( currChar == quoteChar ); ths->fBufferPtr += bytesPerChar; // Advance past the closing quote. return eTriYes; }} // CaptureAttrValue// =================================================================================================// RecordStart// ===========//// Note that this routine looks at bytes, not logical characters. It has to figure out how many// bytes per character there are so that the other recognizers can skip intervening nulls.XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::RecordStart ( PacketMachine * ths, const char * /* unused */ ){ while ( true ) { if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const char currByte = *ths->fBufferPtr; switch ( ths->fPosition ) { case 0 : // Record the length. assert ( ths->fCharForm == eChar8Bit ); assert ( ths->fBytesPerChar == 1 ); ths->fPacketStart = ths->fBufferOffset + ((ths->fBufferPtr - 1) - ths->fBufferOrigin); ths->fPacketLength = 0; ths->fPosition = 1; // ! OK to fall through here, we didn't consume a byte in this step. case 1 : // Look for the first null byte. if ( currByte != 0 ) return eTriYes; // No nulls found. ths->fCharForm = eChar16BitBig; // Assume 16 bit big endian for now. ths->fBytesPerChar = 2; ths->fBufferPtr++; ths->fPosition = 2; break; // ! Don't fall through, have to check for the end of the buffer between each byte. case 2 : // One null was found, look for a second. if ( currByte != 0 ) return eTriYes; // Just one null found. ths->fBufferPtr++; ths->fPosition = 3; break; case 3 : // Two nulls were found, look for a third. if ( currByte != 0 ) return eTriNo; // Just two nulls is not valid. ths->fCharForm = eChar32BitBig; // Assume 32 bit big endian for now. ths->fBytesPerChar = 4; ths->fBufferPtr++; return eTriYes; break; } } } // RecordStart// =================================================================================================// RecognizeBOM// ============//// Recognizing the byte order marker is a surprisingly messy thing to do. It can't be done by the// normal string matcher, there are no intervening nulls. There are 4 transitions after the opening// quote, the closing quote or one of the three encodings. For the actual BOM there are then 1 or 2// following bytes that depend on which of the encodings we're in. Not to mention that the buffer// might end at any point.//// The intervening null count done earlier determined 8, 16, or 32 bits per character, but not the// big or little endian nature for the 16/32 bit cases. The BOM must be present for the 16 and 32// bit cases in order to determine the endian mode. There are six possible byte sequences for the// quoted BOM string, ignoring the differences for quoting with ''' versus '"'.//// Keep in mind that for the 16 and 32 bit cases there will be nulls for the quote. In the table// below the symbol <quote> means just the one byte containing the ''' or '"'. The nulls for the// quote character are explicitly shown.//// <quote> <quote> - 1: No BOM, this must be an 8 bit case.// <quote> \xEF \xBB \xBF <quote> - 1.12-13: The 8 bit form.//// <quote> \xFE \xFF \x00 <quote> - 1.22-23: The 16 bit, big endian form// <quote> \x00 \xFF \xFE <quote> - 1.32-33: The 16 bit, little endian form.//// <quote> \x00 \x00 \xFE \xFF \x00 \x00 \x00 <quote> - 1.32.43-45.56-57: The 32 bit, big endian form.// <quote> \x00 \x00 \x00 \xFF \xFE \x00 \x00 <quote> - 1.32.43.54-57: The 32 bit, little endian form.enum { eBOM_8_1 = 0xEF, eBOM_8_2 = 0xBB, eBOM_8_3 = 0xBF, eBOM_Big_1 = 0xFE, eBOM_Big_2 = 0xFF, eBOM_Little_1 = eBOM_Big_2, eBOM_Little_2 = eBOM_Big_1};XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::RecognizeBOM ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; while ( true ) { // Handle one character at a time, the micro-state (fPosition) changes for each. if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const unsigned char currChar = *ths->fBufferPtr; // ! The BOM bytes look like integers bigger than 127. switch ( ths->fPosition ) { case 0 : // Look for the opening quote. if ( (currChar != '\'') && (currChar != '"') ) return eTriNo; ths->fQuoteChar = currChar; ths->fBufferPtr++; ths->fPosition = 1; break; // ! Don't fall through, have to check for the end of the buffer between each byte. case 1 : // Look at the byte immediately following the opening quote. if ( currChar == ths->fQuoteChar ) { // Closing quote, no BOM character, must be 8 bit. if ( ths->fCharForm != eChar8Bit ) return eTriNo; ths->fBufferPtr += bytesPerChar; // Skip the nulls after the closing quote. return eTriYes; } else if ( currChar == eBOM_8_1 ) { // Start of the 8 bit form. if ( ths->fCharForm != eChar8Bit ) return eTriNo; ths->fBufferPtr++; ths->fPosition = 12; } else if ( currChar == eBOM_Big_1 ) { // Start of the 16 bit big endian form. if ( ths->fCharForm != eChar16BitBig ) return eTriNo; ths->fBufferPtr++; ths->fPosition = 22; } else if ( currChar == 0 ) { // Start of the 16 bit little endian or either 32 bit form. if ( ths->fCharForm == eChar8Bit ) return eTriNo; ths->fBufferPtr++; ths->fPosition = 32; } else { return eTriNo; } break; case 12 : // Look for the second byte of the 8 bit form. if ( currChar != eBOM_8_2 ) return eTriNo; ths->fPosition = 13; ths->fBufferPtr++; break; case 13 : // Look for the third byte of the 8 bit form. if ( currChar != eBOM_8_3 ) return eTriNo; ths->fPosition = 99; ths->fBufferPtr++; break; case 22 : // Look for the second byte of the 16 bit big endian form. if ( currChar != eBOM_Big_2 ) return eTriNo; ths->fPosition = 23; ths->fBufferPtr++; break; case 23 : // Look for the null before the closing quote of the 16 bit big endian form. if ( currChar != 0 ) return eTriNo; ths->fBufferPtr++; ths->fPosition = 99; break; case 32 : // Look at the second byte of the 16 bit little endian or either 32 bit form. if ( currChar == eBOM_Little_1 ) { ths->fPosition = 33; } else if ( currChar == 0 ) { ths->fPosition = 43; } else { return eTriNo; } ths->fBufferPtr++; break; case 33 : // Look for the third byte of the 16 bit little endian form. if ( ths->fCharForm != eChar16BitBig ) return eTriNo; // Null count before assumed big endian. if ( currChar != eBOM_Little_2 ) return eTriNo; ths->fCharForm = eChar16BitLittle; ths->fPosition = 99; ths->fBufferPtr++; break; case 43 : // Look at the third byte of either 32 bit form. if ( ths->fCharForm != eChar32BitBig ) return eTriNo; // Null count before assumed big endian. if ( currChar == eBOM_Big_1 ) { ths->fPosition = 44; } else if ( currChar == 0 ) { ths->fPosition = 54; } else { return eTriNo; } ths->fBufferPtr++; break; case 44 : // Look for the fourth byte of the 32 bit big endian form. if ( currChar != eBOM_Big_2 ) return eTriNo; ths->fPosition = 45; ths->fBufferPtr++; break; case 45 : // Look for the first null before the closing quote of the 32 bit big endian form. if ( currChar != 0 ) return eTriNo; ths->fPosition = 56; ths->fBufferPtr++; break; case 54 : // Look for the fourth byte of the 32 bit little endian form. ths->fCharForm = eChar32BitLittle; if ( currChar != eBOM_Little_1 ) return eTriNo; ths->fPosition = 55; ths->fBufferPtr++; break; case 55 : // Look for the fifth byte of the 32 bit little endian form. if ( currChar != eBOM_Little_2 ) return eTriNo; ths->fPosition = 56; ths->fBufferPtr++; break; case 56 : // Look for the next to last null before the closing quote of the 32 bit forms. if ( currChar != 0 ) return eTriNo; ths->fPosition = 57; ths->fBufferPtr++; break; case 57 : // Look for the last null before the closing quote of the 32 bit forms. if ( currChar != 0 ) return eTriNo; ths->fPosition = 99; ths->fBufferPtr++; break; default : // Look for the closing quote. assert ( ths->fPosition == 99 ); if ( currChar != ths->fQuoteChar ) return eTriNo; ths->fBufferPtr += bytesPerChar; // Skip the nulls after the closing quote. return eTriYes; break; } }} // RecognizeBOM// =================================================================================================// RecordHeadAttr// ==============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::RecordHeadAttr ( PacketMachine * ths, const char * /* unused */ ){ if ( ths->fAttrName == "encoding" ) { assert ( ths->fEncodingAttr.empty() ); ths->fEncodingAttr = ths->fAttrValue; } else if ( ths->fAttrName == "bytes" ) { long value = 0; int count = ths->fAttrValue.size(); int i; assert ( ths->fBytesAttr == -1 ); if ( count > 0 ) { // Allow bytes='' to be the same as no bytes attribute. for ( i = 0; i < count; i++ ) { const char currChar = ths->fAttrValue[i]; if ( ('0' <= currChar) && (currChar <= '9') ) { value = (value * 10) + (currChar - '0'); } else { ths->fBogusPacket = true; value = -1; break; } } ths->fBytesAttr = value; if ( CharFormIs16Bit ( ths->fCharForm ) ) { if ( (ths->fBytesAttr & 1) != 0 ) ths->fBogusPacket = true; } else if ( CharFormIs32Bit ( ths->fCharForm ) ) { if ( (ths->fBytesAttr & 3) != 0 ) ths->fBogusPacket = true; } } } ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() ); ths->fAttrValue.erase ( ths->fAttrValue.begin(), ths->fAttrValue.end() ); return eTriYes;} // RecordHeadAttr// =================================================================================================// CaptureAccess// =============XMPScanner::PacketMachine::TriStateXMPScanner::PacketMachine::CaptureAccess ( PacketMachine * ths, const char * /* unused */ ){ const int bytesPerChar = ths->fBytesPerChar; while ( true ) { if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe; const char currChar = *ths->fBufferPtr; switch ( ths->fPosition ) { case 0 : // Look for the opening quote. if ( (currChar != '\'') && (currChar != '"') ) return eTriNo; ths->fQuoteChar = currChar; ths->fBufferPtr += bytesPerChar; ths->fPosition = 1; break; // ! Don't fall through, have to check for the end of the buffer between each byte. case 1 : // Look for the 'r' or 'w'. if ( (currChar != 'r') && (currChar != 'w') ) return eTriNo; ths->fAccess = currChar; ths->fBufferPtr += bytesPerChar; ths->fPosition = 2; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -