tagparse.cpp

来自「symbian 下的helix player源代码」· C++ 代码 · 共 796 行 · 第 1/2 页

CPP
796
字号
/* ***** BEGIN LICENSE BLOCK *****
 * Source last modified: $Id: tagparse.cpp,v 1.4.36.3 2004/07/09 01:44:10 hubbe Exp $
 * 
 * Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
 * 
 * The contents of this file, and the files included with this file,
 * are subject to the current version of the RealNetworks Public
 * Source License (the "RPSL") available at
 * http://www.helixcommunity.org/content/rpsl unless you have licensed
 * the file under the current version of the RealNetworks Community
 * Source License (the "RCSL") available at
 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
 * will apply. You may also obtain the license terms directly from
 * RealNetworks.  You may not use this file except in compliance with
 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
 * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
 * the rights, obligations and limitations governing use of the
 * contents of the file.
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL") in which case the provisions of the GPL are applicable
 * instead of those above. If you wish to allow use of your version of
 * this file only under the terms of the GPL, and not to allow others
 * to use your version of this file under the terms of either the RPSL
 * or RCSL, indicate your decision by deleting the provisions above
 * and replace them with the notice and other provisions required by
 * the GPL. If you do not delete the provisions above, a recipient may
 * use your version of this file under the terms of any one of the
 * RPSL, the RCSL or the GPL.
 * 
 * This file is part of the Helix DNA Technology. RealNetworks is the
 * developer of the Original Code and owns the copyrights in the
 * portions it created.
 * 
 * This file, and the files included with this file, is distributed
 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
 * ENJOYMENT OR NON-INFRINGEMENT.
 * 
 * Technology Compatibility Kit Test Suite(s) Location:
 *    http://www.helixcommunity.org/content/tck
 * 
 * Contributor(s):
 * 
 * ***** END LICENSE BLOCK ***** */

/*
 *      XML tag parser
 *
 *	Not to be confused with the full XML parser, this parser will only
 *      tell you what tags appear in a document, and will parse their
 *      attributes for you. It will not perform any validation.
 */

#include <ctype.h>

#include "hxcom.h"
#include "hxtypes.h"
#include "hxstrutl.h"
#include "hxmap.h"
#include "xmlencod.h"
#include "looseprs.h"
#include "tagparse.h"

#include "hxheap.h"
#ifdef _DEBUG
#undef HX_THIS_FILE		
static const char HX_THIS_FILE[] = __FILE__;
#endif


XMLTagParser::XMLTagParser(const char* pEncoding)
    : m_comment_state(0)
    , m_comment_get_arg(0)
    , m_comment_pos(0)
{
    if(pEncoding)
    {
	m_pEncoding = new_string(pEncoding);
    }
    else
    {
	m_pEncoding = new_string("US-ASCII");	// default encoding
    }
}

XMLTagParser::~XMLTagParser()
{
    HX_DELETE(m_pEncoding);
}

XMLParseResult
XMLTagParser::Parse(const char*& buf, 
		    UINT32 len, 
		    XMLTag*& tag)
{
    const char* open;
    const char* close;
    const char* cur;
    const char* afterclose;

    tag = NULL;

    if(m_comment_state > 0)
    {
	FindCommentClose(buf, buf, buf+len);
	if(m_comment_state != 0)
	{
	    return XMLPNoClose;
	}
	else if(m_comment_get_arg != 3)
	{
	    tag = new XMLTag(FALSE);
	    tag->new_attribute()->value = new_string("");   // dummy tag
	    return XMLPComment;
	}
	// Got a comment command
	tag = new XMLTag(FALSE);
	tag->new_attribute()->value = new_string(m_comment_arg);
	tag->m_cur_attribute->name = new_string(m_comment_command);
	return XMLPComment;
    }

    if(*buf != '<')
    {
	// If there isn't a tag right away, tell the user there's just plain
	// text here.
	cur = buf;
	while(((UINT32)(cur - buf) < len) && (*cur != '<'))
	{
	    cur++;
	}
	tag = new XMLTag(FALSE);
	char* pText = new char[cur - buf + 1];
	strncpy(pText, buf, cur - buf); /* Flawfinder: ignore */
	pText[cur - buf] = '\0';
	tag->new_attribute()->value = new_string(pText);
	delete [] pText;
	buf = cur;
	return XMLPPlainText;
    }
    open = buf;

    BOOL   bInDoubleQuote = FALSE;
    BOOL   bInSingleQuote = FALSE;
    BOOL   bInComment	  = FALSE;
    BOOL   bInDeclaration = FALSE;
    UINT16 nCommentDepth  = 0;
    if(*(open+1) && *(open+1) == '!' &&
       *(open+2) && *(open+2) == '-' && 
       *(open+3) && *(open+3) == '-')
    {
	// '<!--' starts a comment
	bInComment = TRUE;
    }
    for(close = open; close < buf+len; close++)
    {
	if(*close == '"' && !bInComment)
	{
	    if(!bInSingleQuote)
	    {
		if(bInDoubleQuote)
		{
		    bInDoubleQuote = FALSE;
		}
		else
		{
		    bInDoubleQuote = TRUE;
		}
	    }
	}
	else if(*close == '\'' && !bInComment)
	{
	    if(!bInDoubleQuote)
	    {
		if(bInSingleQuote)
		{
		    bInSingleQuote = FALSE;
		}
		else
		{
		    bInSingleQuote = TRUE;
		}
	    }
	}
	else if(*close == '[' && !bInDeclaration)
	{
	    bInDeclaration = TRUE;
	}
	else if(*close == ']' && bInDeclaration)
	{
	    bInDeclaration = FALSE;
	}
	// Increase the depth if we find a comment within a comment
	else if(*(close) == '<' && bInComment)
	{
	    if(*(close+1) && *(close+1) == '!' &&
	       *(close+2) && *(close+2) == '-' && 
	       *(close+3) && *(close+3) == '-')
	    {
		// '<!--' starts a comment
		nCommentDepth++;
	    }
	}
	else if(*close == '>')
	{
	    // If we are in a comment, we should only stop at a comment end
	    // (Comments must end with "-->")
	    if (bInComment)
	    {
		if ((close - open) > 5 && 
		    *(close-1) == '-'  && 
		    *(close-2) == '-')
		{
		    nCommentDepth--;
		    if (!nCommentDepth)
		    {
			break;
		    }
		}
	    }
	    else
	    {
		if (!bInDoubleQuote && !bInSingleQuote && !bInDeclaration)
		{
		    break;
		}
	    }
	}
    }

    if(*close != '>')
    {
	buf = open;
	return XMLPNoClose;
    }

    afterclose = close+1;

    if(*(open+1) == '!')
    {
	if(*(open+2) == '-' && *(open+3) == '-')
	{
	    // '<!--' starts a comment
	    m_comment_state = 1;
	    m_comment_start = TRUE;
	    FindCommentClose(buf, open+4, buf + len);
	    if(m_comment_state != 0)
	    {
		return XMLPNoClose;
	    }
	    else if(m_comment_get_arg != 3)
	    {
		tag = new XMLTag(FALSE);
		const char* pBeginComment = open + 4;
		int commentLen = buf - pBeginComment - 3;
		tag->new_attribute()->value = new_string(pBeginComment, commentLen);
		return XMLPComment;
	    }
	    // Got a comment command
	    tag = new XMLTag(FALSE);
	    tag->new_attribute()->value = new_string(m_comment_arg);
	    tag->m_cur_attribute->name = new_string(m_comment_command);
	    return XMLPComment;
	}		
	XMLParseResult rc = ParseTag(open+1, close, XMLDirectiveTag, tag);
	if(XMLPTag == rc)
	{
	    buf = afterclose;
	    return XMLPDirective;
	}
	buf = afterclose;
	return XMLPBadDirective;
    }
    
    if(*(open + 1) == '?')
    {
	// A Processing Instruction
	XMLParseResult rc = ParseTag(open+1, close, XMLProcInstTag, tag);
	if(XMLPTag == rc)
	{
	    buf = afterclose;
	    return XMLPProcInst;
	}
	return XMLPBadProcInst;
    }

    // Just a plain old tag
    XMLParseResult rc = ParseTag(open, close, XMLPlainTag, tag);
    if(XMLPTag == rc)
    {
	buf = afterclose;
	return XMLPTag;
    }

    return rc;
}

XMLParseResult
XMLTagParser::ParseTag(const char* open, 
		       const char* close, 
		       XMLTagType tType, 
		       XMLTag*& tag)
{
    const char* cur = open+1;
    BOOL bHasAttributeNames = TRUE;
    BOOL bUseNonQuotedValues = TRUE;
    BOOL bHasDirectives = FALSE;

    tag = new XMLTag(FALSE);

    switch(tType)
    {
	case XMLPlainTag:
	{
	    if(*(close - 1) == '/')
	    {
		tag->m_need_close = FALSE;
		close--;
	    }
	}
	break;
	case XMLProcInstTag:
	{
	    tag->m_need_close = FALSE;
	    if(*(close - 1) == '?')
	    {
		close--;
	    }
	}
	break;
	case XMLDirectiveTag:
	{
	    bHasAttributeNames = FALSE;
	    bUseNonQuotedValues = TRUE;
	    bHasDirectives = TRUE;
	    tag->m_need_close = FALSE;
	}
	break;
	default:
	{
	    tag->m_need_close = FALSE;
	}
	break;
    }
    tag->m_type = tType;

    GetStringResult res = GetString(cur, close, tag->m_name, TagType);
    if(res == GSEndTag)
    {
	tag->m_type = XMLEndTag;
	tag->m_need_close = FALSE;

	return XMLPTag;
    }
    else if(res == GSMissingQuote)
    {
	delete tag;
	tag = NULL;
	return XMLPAttributeValueNotQuoted;
    }

    if(GSFoundExpected != res)
    {
	delete tag;
	tag = NULL;
	return XMLPNoTagType;
    }
    else
    {
	while(cur < close)
	{
	    if(bHasAttributeNames)
	    {
		GetStringResult res = GetString(cur, close, 
						tag->new_attribute()->name,
						AttributeName);
		if(res == GSNoValue)
		{
		    delete tag->m_cur_attribute;
		    tag->m_numAttributes--;
		    break;
		}
		switch(res)
		{
		    case GSValueOnly:
			// The user of this parser will fill in the name of this
			// attribute
			tag->m_cur_attribute->value = tag->m_cur_attribute->name;
			tag->m_cur_attribute->name = NULL;
			continue;
		    case GSFoundExpected:
			break;
		    default:
			delete tag;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?