📄 textprsr.cpp
字号:
/* ***** BEGIN LICENSE BLOCK ***** * Source last modified: $Id: textprsr.cpp,v 1.1.2.1 2004/07/09 01:50:20 hubbe Exp $ * * Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved. * * The contents of this file, and the files included with this file, * are subject to the current version of the RealNetworks Public * Source License (the "RPSL") available at * http://www.helixcommunity.org/content/rpsl unless you have licensed * the file under the current version of the RealNetworks Community * Source License (the "RCSL") available at * http://www.helixcommunity.org/content/rcsl, in which case the RCSL * will apply. You may also obtain the license terms directly from * RealNetworks. You may not use this file except in compliance with * the RPSL or, if you have a valid RCSL with RealNetworks applicable * to this file, the RCSL. Please see the applicable RPSL or RCSL for * the rights, obligations and limitations governing use of the * contents of the file. * * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL") in which case the provisions of the GPL are applicable * instead of those above. If you wish to allow use of your version of * this file only under the terms of the GPL, and not to allow others * to use your version of this file under the terms of either the RPSL * or RCSL, indicate your decision by deleting the provisions above * and replace them with the notice and other provisions required by * the GPL. If you do not delete the provisions above, a recipient may * use your version of this file under the terms of any one of the * RPSL, the RCSL or the GPL. * * This file is part of the Helix DNA Technology. RealNetworks is the * developer of the Original Code and owns the copyrights in the * portions it created. * * This file, and the files included with this file, is distributed * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET * ENJOYMENT OR NON-INFRINGEMENT. * * Technology Compatibility Kit Test Suite(s) Location: * http://www.helixcommunity.org/content/tck * * Contributor(s): * * ***** END LICENSE BLOCK ***** */#include "hxtypes.h"#include "hxassert.h"#include <stdlib.h>#include <string.h>#include "rt_types.h" //for _CHAR, RED_GREEN_OR_BLUE, COLORTYPE#include "hxslist.h" //for base class CHXSimpleList.#include "hxstack.h" //for base class CHXStack.#include "fontdefs.h"#include "txtattrb.h" //for class TextAttributes#include "txtcntnr.h" //for class TextContainer & class TextContainerList#include "textline.h" //for class TextLine & TextLineList#ifdef _WINDOWS#ifdef _WIN16#include <windows.h>#endif /* _WIN16 */#endif /* _WINDOWS */#include "txtwindw.h" //for class TextWindow.#include "parsing.h" //for parsing helper functions.#include "atocolor.h" //for string-to-COLORTYPE conversion functions.#include "rt_string.h" //for stringCompare().#include "atotime.h"#include "fontinfo.h" //for GetCharacterWidth().#include "hxstrutl.h" //for isspace()#include "textprsr.h"#include "hxheap.h"#ifdef _DEBUG#undef HX_THIS_FILE static char HX_THIS_FILE[] = __FILE__;#endif///////////////////////////////////////////////////////////////////////////// // Returns 1 + (index of pData where the <WINDOW ..> tag's '>' char is).// returns dataLength if failure to find valid <WINDOW..> tag.ULONG32 TextParser::ParseHeader(void* pData, ULONG32 dataLength, ULONG32 ulRTFileFormatMarkupParsingMajorVersion, ULONG32 ulRTFileFormatMarkupParsingMinorVersion){ //Now parse pData:#if !defined(_CHARsizeInBytesIs1)#error this code needs to be updated...#endif _CHAR* pData_CHAR = (_CHAR*)pData; if(!m_pTextWindow || !m_pTextWindow->m_pTLList || !m_pTextWindow->m_pFontUndoTagList) { //can't do anything if a lack of memory meant that memory for // one or both of these objects couldn't be allocated (which is // the only reason they should be NULL here): return dataLength; } // Find the first '<' and then find the first '>' or end-of-data, and // send the contents found to m_pTextWindow->parseHeaderTag(): _CHAR* pHeaderTagBuf; LONG32 headerTagBufLen; LONG32 indexOfLeftBracket = -1; LONG32 indexOfRightBracket = -1; LONG32 indx; LONG32 len = LONG32(dataLength); for(indx=0; indx<len; indx++) {/*XXXEH- for now, we have to assume that the first text encountered is not DBCS text; it and all text inside tags must be us-ascii charset: //added the following to handle DBCS chars: if((UCHAR)pData_CHAR[indx] >= DBCS_MIN_LEAD_BYTE_VAL) { indx++; //skip this and the following trail byte. continue; }*/ if(pData_CHAR[indx] == '<') { indexOfLeftBracket = indx; break; } } if(indexOfLeftBracket != -1) { for(indx++; indx<len; indx++) {/*XXXEH- for now, we have to assume that the first text encountered is not DBCS text; it and all text inside tags must be us-ascii charset: //added the following to handle DBCS chars: if((UCHAR)pData_CHAR[indx] >= DBCS_MIN_LEAD_BYTE_VAL) { indx++; //skip this and the following trail byte. continue; }*/ if(pData_CHAR[indx] == '>') { indexOfRightBracket = indx; break; } } } if(-1 == indexOfLeftBracket || -1 == indexOfRightBracket || ((indexOfRightBracket-indexOfLeftBracket)-1) < LONG32(int(strlen("WINDOW"))) ) { //Added this to allow <HTML> to be the first tag: if( ((indexOfRightBracket-indexOfLeftBracket)-1) >= LONG32(int(strlen("HTML"))) ) { if(('H' == pData_CHAR[indexOfLeftBracket] || 'h' == pData_CHAR[indexOfLeftBracket]) && ('T' == pData_CHAR[indexOfLeftBracket+1] || 't' == pData_CHAR[indexOfLeftBracket+1]) && ('M' == pData_CHAR[indexOfLeftBracket+2] || 'm' == pData_CHAR[indexOfLeftBracket+2]) && ('L' == pData_CHAR[indexOfLeftBracket+3] || 'l' == pData_CHAR[indexOfLeftBracket+3]) ) { //XXXEH- need to handle type=html in parseHeaderTag(): m_pTextWindow->parseHeaderTag("WINDOW type=HTML", 16, ulRTFileFormatMarkupParsingMajorVersion, ulRTFileFormatMarkupParsingMinorVersion); if(-1 == indexOfRightBracket) { //XXXEH- should dataLength be returned here?!: return 0L; //signals that no header tag was found (and //that the text starts at byte zero of file). } else { return (indexOfRightBracket+1); } } } //Added this to allow files with no header tag: // No '<' was found (or no valid header tag was found), //Need to set default vals to type // generic's default vals here: //XXXEH- need to handle type=plaintext in parseHeaderTag(): char szTmpBuff[255] = {"WINDOW type=plaintext"}; /* Flawfinder: ignore */ m_pTextWindow->parseHeaderTag(szTmpBuff, 21, ulRTFileFormatMarkupParsingMajorVersion, ulRTFileFormatMarkupParsingMinorVersion); if(-1 == indexOfRightBracket) { //XXXEH- should dataLength be returned here?!: return 0L; //this signals that no header tag was found (and //that the text starts at byte zero of the file). } else { return (indexOfRightBracket+1); } } // Next, parse the header's tag after copying it into pHeaderTagBuf: headerTagBufLen = indexOfRightBracket-indexOfLeftBracket-1; pHeaderTagBuf = new _CHAR[headerTagBufLen+1];//Add 1 for terminating '\0' HX_ASSERT_VALID_PTR(pHeaderTagBuf); if(NULL == pHeaderTagBuf) { return dataLength; //return end-of-pData index to signal error. } for(indx=0; indx<headerTagBufLen; indx++) { pHeaderTagBuf[indx] = pData_CHAR[indx+indexOfLeftBracket+1]; } pHeaderTagBuf[headerTagBufLen] = '\0'; // Now parse the header to get the WINDOW tag, and, if it is // found, get the "NAME=value" pairs and assign the TextWindow's // appropriate objects' data to these requested values: if(!m_pTextWindow->parseHeaderTag(pHeaderTagBuf, headerTagBufLen, ulRTFileFormatMarkupParsingMajorVersion, ulRTFileFormatMarkupParsingMinorVersion)) { // Returned FALSE because of invalid header tag: delete [] pHeaderTagBuf; pHeaderTagBuf = NULL; return dataLength; //return end-of-pData index to signal error. } delete [] pHeaderTagBuf; pHeaderTagBuf = NULL; //This kills a bug in a tickertape where // the packet starts with <TL> but the <POS .. Y0= > // before it causes the renderer to use the Y0=0 value // that the ff or encoder incorrectly calculated for TL // text because it thought the "visible" window height // was 0: m_pTextWindow->setVisibleWindowWidth(m_pTextWindow->getWidth()); m_pTextWindow->setVisibleWindowHeight(m_pTextWindow->getHeight()); return (indexOfRightBracket+1);}/////////////////////////////////////////////////////////////////////////////// Method:// ULONG32 TextParser::ParseText(...)// Purpose:// This function receives the latest raw data (in pData) and// inserts it into the m_pTextWindow::TextContainerList with the latest// render attribute in m_pTextWindow::TextAttributeStacks.//// bDataHasREQUIREDContents gets set by this function to TRUE if any of// the TextContainers created from pData have the REQUIRED attribute.//// ulEarliestTimeOfNewData gets set with the time which is the earliest// time of the TextLines into which this data is parsed.//// Return value is HXR_OK if in-bound data was valid and no error occurred,// otherwise it's HXR_NO_DATA if pData is NULL or empty,// HXR_ELEMENT_NOT_FOUND if all that is in pData is spaces, tabs, and/or// newline characters, and HXR_NOT_INITIALIZED if ulEarliestTimeOfNewData// was not set.//HX_RESULT TextParser::ParseText(void* pData, ULONG32 dataLength, LONG32 lTimeOffset, ULONG32& ulEarliestTimeOfNewData, ULONG32& ulEndTimeOfPacket, BOOL& bRef_DataHasREQUIREDContents, BOOL bFileFormatIsCallingThis, ULONG32 ulByteOffsetIntoFile, TextLine** ppFirstTextLineInPkt ){ *ppFirstTextLineInPkt = NULL; ULONG32 ulStartByteOfFirstTL = 0L; BOOL bFirstTLwasFound = FALSE; //Look through the data for the first valid markup tag. Any text found // before that gets put in a new TextContainer object and is inserted // into the TextContainerList part of the TextWindow object. If that // list is empty, a new TextContainer is added to it with the default // characteristics for the text. If a valid markup tag is found before // the end of the pData is reached, the text that follows it, up to the // next tag, goes into a new TextContainer object that is added to the // list and has the text-rendering characteristic specified in that tag: #if !defined(_CHARsizeInBytesIs1)#error this code needs to be updated...#endif _CHAR* pData_CHAR = (_CHAR*)pData; //Added this for when this is called by the file format ulEarliestTimeOfNewData = TIME_INVALID; BOOL bEarliestTimeOfNewDataIsValid = FALSE; //Added this for when this is called by the file format ulEndTimeOfPacket = TIME_INVALID; bRef_DataHasREQUIREDContents = FALSE; if(!pData_CHAR || dataLength<1) { return HXR_NO_DATA; //there's nothing to do. } ULONG32 ulOriginalDataLength = dataLength; if('\0' == pData_CHAR[dataLength-1]) { dataLength -= 1; } BOOL bIsLiveSource = m_pTextWindow->isLiveSource(); TextContainer* pTC = NULL; //Added this for when this is called by the file format TextLine* pTL = NULL; //Find the first '<' or end-of-data, whichever comes first, and put // any raw text found before it into the TextContainer list: LONG32 startIndex=0L; //Added the following which keep track of where // each TextLine starts/ends in the file (this is ignored if // bFileFormatIsCallingThis is FALSE): ULONG32 ulCurLineStartByteNumOfFile = ulByteOffsetIntoFile; ULONG32 ulCurLineEndByteNumOfFile = ulByteOffsetIntoFile; //Changed this to skip newlines only: //First, skip all newline characters at start of this string only // if this is the very first packet: IncrementCurrentPacketNum(); if(GetCurrentPacketNum() <= 1L) { for(startIndex=0L; startIndex<(LONG32)dataLength; startIndex++) { if(pData_CHAR[startIndex] != '\n' && pData_CHAR[startIndex] != '\r') { break; } } } if((LONG32)dataLength == startIndex) { //All that were found were newline chars, so quit: return HXR_ELEMENT_NOT_FOUND; } //Reset the ptr and dataLength to where the first // non-newline is: dataLength = dataLength-startIndex; pData_CHAR = &(pData_CHAR[startIndex]); startIndex = 0L; LONG32 len = LONG32(dataLength); //Got rid of this and replaced with // m_pTextWindow's functions to keep track of this because there // was a bug if a <BR> was the last thing in a packet, using just // the following local variable meant that this info got lost // when this function was done, so the next packet's data would // not end up with the proper number of line breaks before it: ///LONG32 numBreakTagsEncountered = 0L; BOOL bSomeCharsFoundSinceLastBreakTag = FALSE; //For bug #6906: BOOL bSomeCharsFoundSinceLastPosTag = FALSE; BOOL bUserPosTagFoundSinceLastTextContainer = FALSE; ULONG32 ulCurCharset; ULONG32 ulNumPREtagNewlineCharsFound = 0L; do //Now find the next '<', starting at pData_CHAR[startIndex]: { ulCurCharset = m_pTextWindow->peekAtCharsetStack(); LONG32 indexOfLeftBracket = -1; LONG32 indexOfRightBracket = -1; BOOL bSlashFoundAtEndOfTag = FALSE; BOOL bIgnoringNewlineChars = FALSE; BOOL bDealingWithTabCharWithPre = FALSE; LONG32 indx; for(indx=startIndex; indx<len; indx++, bSomeCharsFoundSinceLastPosTag=TRUE, bSomeCharsFoundSinceLastBreakTag=TRUE) { _CHAR ch = pData_CHAR[indx]; //added the following to handle DBCS chars: if((ulCurCharset & HX_DBCS_CHARSET) && (UCHAR)ch >= DBCS_MIN_LEAD_BYTE_VAL) { indx++; //skip this and the following trail byte. continue; } if('<' == ch) { indexOfLeftBracket = indx; bSomeCharsFoundSinceLastBreakTag=FALSE; //Fixes bug #6903. bSomeCharsFoundSinceLastPosTag=FALSE; //Helps fix bug #6906. break; } //Convert any tab chars outside a tag to spaces if we're not // currently between a <PRE> and a </PRE>, else leave '\t' alone // and let TextWindow::insertAtEndOfList() calculate where the // next tab stop is: BOOL bTabCharHandled = FALSE; if('\t' == ch || '\v' == ch || '\0' == ch) //<--added this check for safety.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -