📄 htmlparse.cpp
字号:
/*----------------------------------------------------------------------
Copyright (c) 1998 Gipsysoft. All Rights Reserved.
Please see the file "licence.txt" for licencing details.
File: htmlparse.cpp
Owner: russf@gipsysoft.com
Purpose: Main HTML parser
The CHTMLParse::Parse() function generates a document, the document
contains all of the elements of the HTML page but broken into
their parts.
The document will then be used to create the display page.
----------------------------------------------------------------------*/
#include "stdafx.h"
#include <WinHelper.h>
#include <ImgLib.h>
#include "QHTM_Includes.h"
#include "AquireImage.h"
#include "DrawContext.h"
#include "defaults.h"
#include "smallstringhash.h"
#include "HTMLParse.h"
extern LPTSTR stristr( LPTSTR pszSource, LPCTSTR pcszSearch );
extern BYTE DecodeCharset( const CStaticString &strCharSet );
static CHTMLParse::Align knDefaultImageAlignment = CHTMLParse::algBottom;
static CHTMLParse::Align knDefaultHRAlignment = CHTMLParse::algLeft;
static CHTMLParse::Align knDefaultParagraphAlignment = CHTMLParse::algLeft;
// richg - 19990224 - Default table alignment changed form algLeft to algTop
static CHTMLParse::Align knDefaultTableAlignment = CHTMLParse::algMiddle;
// richg - 19990224 - Table Cells have their own default
static CHTMLParse::Align knDefaultTableCellAlignment = CHTMLParse::algLeft;
TCHAR g_cCarriageReturn = _T('\r');
static const CStaticString g_strTabSpaces( _T(" ") );
static MapClass< StringClass, bool> g_mapFontName;
CHTMLParse::CHTMLParse( LPCTSTR pcszStream, UINT uLength, HINSTANCE hInstLoadedFrom, LPCTSTR pcszFilePath, CDefaults *pDefaults )
: CHTMLParseBase( pcszStream, uLength )
, m_pProp( NULL )
, m_pDocument( NULL )
, m_hInstLoadedFrom( hInstLoadedFrom )
, m_pcszFilePath( pcszFilePath )
, m_pLastAnchor( NULL )
, m_pDefaults( pDefaults )
{
ASSERT( m_pDefaults );
}
CHTMLParse::~CHTMLParse()
{
CleanupParse();
}
CHTMLDocument * CHTMLParse::Parse()
//
// returns either a fully created document ready to create the display from or
// NULL in the event of failure.
{
CleanupParse();
//
// Create the first properties
m_pProp = new Properties;
m_pProp->m_crFore = m_pDefaults->m_crDefaultForeColour;
_tcscpy( m_pProp->szFaceName, m_pDefaults->m_strFontName );
m_pProp->nSize = m_pDefaults->m_nFontSize;
m_stkProperties.Push( m_pProp );
//
// Create the main document and a paragraph to add to it.
m_pMasterDocument = m_pDocument = new CHTMLDocument( m_pDefaults );
m_pDocument->m_crBack = m_pDefaults->m_crBackground;
CreateNewParagraph( m_pDefaults->m_nParagraphLinesAbove, m_pDefaults->m_nParagraphLinesBelow, knDefaultParagraphAlignment );
if( !ParseBase() )
{
if( m_pMasterDocument )
{
delete m_pMasterDocument;
m_pDocument = NULL;
m_pMasterDocument = NULL;
}
}
return m_pMasterDocument;
}
void CHTMLParse::CleanupParse()
//
// Cleanup anything left over from our previous parsing
{
while( m_stkProperties.GetSize() )
delete m_stkProperties.Pop();
while( m_stkDocument.GetSize() )
m_stkDocument.Pop();
// richg - 19990227 - Clean up the table stack as well
while( m_stkTable.GetSize() )
m_stkTable.Pop();
while( m_stkInTableCell.GetSize() )
m_stkInTableCell.Pop();
// richg - 19990621 - Clean up list stack
while( m_stkList.GetSize() )
m_stkList.Pop();
m_pLastAnchor = NULL;
}
void CHTMLParse::OnGotText( TCHAR ch )
//
// Callback when some text has been interrupted with a tag or end tag.
{
if( ch == _T('\t') )
{
m_strToken.Add( g_strTabSpaces, g_strTabSpaces.GetLength() );
}
else
{
m_strToken.Add( ch );
}
}
void CHTMLParse::OnEndDoc()
{
CreateNewTextObject();
}
void CHTMLParse::OnGotTag( const Token token, const CParameters &pList )
{
switch( token )
{
case tokIgnore:
break;
case tokSub:
CreateNewTextObject();
CreateNewProperties();
if( m_pProp->nSize > 1 )
m_pProp->nSize--;
m_pProp->m_nSub++;
break;
case tokSup:
CreateNewTextObject();
CreateNewProperties();
if( m_pProp->nSize > 1 )
m_pProp->nSize--;
m_pProp->m_nSup++;
break;
case tokPre:
CreateNewProperties();
_tcscpy( m_pProp->szFaceName, m_pDefaults->m_strDefaultPreFontName );
break;
case tokBody:
OnGotBody( pList );
break;
case tokFont:
OnGotFont( pList );
break;
case tokBold:
CreateNewTextObject();
CreateNewProperties();
m_pProp->bBold = true;
break;
case tokUnderline:
CreateNewTextObject();
CreateNewProperties();
m_pProp->bUnderline = true;
break;
case tokItalic:
CreateNewTextObject();
CreateNewProperties();
m_pProp->bItalic = true;
break;
case tokStrikeout:
CreateNewTextObject();
CreateNewProperties();
m_pProp->bStrikeThrough = true;
break;
case tokImage:
OnGotImage( pList );
break;
case tokTableHeading:
case tokTableDef:
OnGotTableCell( pList );
break;
case tokTableRow:
OnGotTableRow( pList );
break;
case tokTable:
CreateNewProperties();
OnGotTable( pList );
break;
case tokHorizontalRule:
OnGotHR( pList );
break;
case tokCenter:
CreateNewProperties();
m_pProp->nAlignment = algCentre;
OnGotParagraph( pList );
break;
case tokDiv:
OnGotParagraph( pList );
break;
case tokParagraph:
OnGotParagraph( pList );
break;
case tokBreak:
m_strToken.Add( g_cCarriageReturn );
break;
case tokAnchor:
OnGotAnchor( pList );
break;
case tokH1:
case tokH2:
case tokH3:
case tokH4:
case tokH5:
case tokH6:
OnGotHeading( token, pList );
break;
case tokOrderedList:
OnGotOrderedList( pList );
break;
case tokUnorderedList:
OnGotUnorderedList( pList );
break;
case tokListItem:
OnGotListItem( pList );
break;
case tokAddress:
OnGotAddress( pList );
break;
case tokBlockQuote:
OnGotBlockQuote( pList );
break;
case tokCode:
CreateNewTextObject();
CreateNewProperties();
_tcscpy( m_pProp->szFaceName, m_pDefaults->m_strDefaultPreFontName );
break;
case tokMeta:
OnGotMeta( pList );
break;
}
}
void CHTMLParse::OnGotEndTag( const Token token )
{
switch( token )
{
case tokCode:
CreateNewTextObject();
PopPreviousProperties();
break;
case tokPre:
CreateNewTextObject();
PopPreviousProperties();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokCenter:
CreateNewTextObject();
PopPreviousProperties();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokParagraph:
CreateNewTextObject();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokImage: // Ignore end images
// There is no end image tag!
ASSERT( FALSE );
break;
case tokDiv:
case tokIgnore:
CreateNewTextObject();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokFont:
case tokBold:
case tokUnderline:
case tokItalic:
case tokStrikeout:
case tokSub:
case tokSup:
CreateNewTextObject();
PopPreviousProperties();
break;
case tokH1:
case tokH2:
case tokH3:
case tokH4:
case tokH5:
case tokH6:
CreateNewTextObject();
PopPreviousProperties();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokAnchor:
OnGotEndAnchor();
break;
case tokTableHeading:
case tokTableDef:
OnGotEndTableCell();
break;
case tokTableRow:
OnGotEndTableRow();
break;
//
// IE seems to ne okay about using an end BR (</br>) so we should too.
case tokBreak:
m_strToken.Add( g_cCarriageReturn );
break;
case tokTable:
if( m_stkTable.GetSize() )
{
m_stkTable.Pop();
}
else
{
TRACE( _T("Got an table but no tables left in the stack\n") );
}
// Pop the InACell flag
if (m_stkInTableCell.GetSize())
{
// If the cell is still open... close it!
OnGotEndTableRow(); // Does the same thing.
m_stkInTableCell.Pop();
}
else
{
TRACE( _T("Got an end table but in-cell stack empty.") );
}
PopPreviousProperties();
CreateNewParagraph( 0, 0, m_pProp->nAlignment );
break;
case tokHorizontalRule:
break;
case tokOrderedList:
case tokUnorderedList:
OnGotEndList();
break;
case tokListItem:
OnGotEndListItem();
break;
case tokAddress:
OnGotEndAddress();
break;
case tokBlockQuote:
OnGotEndBlockQuote();
break;
case tokDocumentTitle:
m_strToken.Add( 0 );
m_pMasterDocument->m_strTitle = m_strToken.GetData();
m_strToken.SetSize( 0 );
break;
}
}
void CHTMLParse::OnGotBody( const CParameters &pList )
{
const UINT uParamSize = pList.GetSize();
for( UINT n = 0; n < uParamSize; n++ )
{
const CStaticString &strParam = pList[n].m_strValue;
switch( pList[n].m_param )
{
case pBColor:
m_pDocument->m_crBack = GetColourFromString( strParam, m_pDocument->m_crBack );
break;
case pLink:
m_pDocument->m_crLink = GetColourFromString( strParam, RGB( 141, 7, 102 ) );
break;
case pALink:
m_pDocument->m_crLinkHover = GetColourFromString( strParam, RGB( 29, 49, 149 ) );
break;
case pMarginTop:
m_pDocument->m_nTopMargin = GetNumberParameter( strParam, m_pDocument->m_nTopMargin );
break;
case pMarginBottom:
m_pDocument->m_nBottomMargin = GetNumberParameter( strParam, m_pDocument->m_nBottomMargin );
break;
case pMarginLeft:
m_pDocument->m_nLeftMargin = GetNumberParameter( strParam, m_pDocument->m_nLeftMargin );
break;
case pMarginRight:
m_pDocument->m_nRightMargin = GetNumberParameter( strParam, m_pDocument->m_nRightMargin );
break;
}
}
}
void CHTMLParse::OnGotImage( const CParameters &pList )
{
CreateNewTextObject();
int nHeight = 0;
int nWidth = 0;
int nBorder = 0;
CStaticString strFilename;
Align alg = knDefaultImageAlignment;
const UINT uParamSize = pList.GetSize();
for( UINT n = 0; n < uParamSize; n++ )
{
const CStaticString &strParam = pList[n].m_strValue;
switch( pList[n].m_param )
{
case pWidth:
nWidth = GetNumberParameter( strParam, nWidth );
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -