⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlformat.cpp

📁 一个C++写的RSS解析器,需要libxml2.rpm支持
💻 CPP
字号:
#include "stdio.h"
#include "stdlib.h"
#include "string.h"

#include "string.h"
#include "stdio.h"

#include <iconv.h>

#include "htmlformat.h"

#define		ASC_SPACE	32

void Format::TRIM(char* a)
{
	int len=0; 
	while ((a)[len] && (isspace((a)[len]) || (a)[len] == -95)) //-95为汉字中编码空格
		len++; 
	
	memmove((a),&(a)[len], strlen(&(a)[len])+1); 
	while ((len=strlen(a))>0 && isspace((a)[len-1])) 
		(a)[len-1]='\0'; 
}

	
void HtmlFormat::format(char* content)
{
	Blank* pBlank = new Blank(NULL);//这个类必须在最里层
	HTML* pLtgtTag = new HTML(pBlank,"&lt;", "&gt;");
	HTML* pLtgt = new HTML(pLtgtTag,"<", ">");
		
	pLtgt->format(content);
	
	TRIM(content);
	
	delete pBlank;
	delete pLtgtTag;
	delete pLtgt;
};

void HtmlFormat::format_code_convert(char *content, char* begintag, char* endtag, int method)
{
	CodeConvert* pConvert = new CodeConvert();
	Blank* pBlank = new Blank(pConvert);//这个类必须在最里层
	HTML* pLtgtTag = new HTML(pBlank,"&lt;", "&gt;");
	HTML* pLtgt = new HTML(pLtgtTag,"<", ">");
	HTML* pTail = new HTML(pLtgt,"<", NULL);
	Replace* pReplace = new Replace(pTail, "&quot;", "\"");

	if(begintag || endtag)
	{
		HTML* pSingle = new HTML(pReplace,begintag, endtag);
		
		pSingle->format(content);
		
		delete pSingle;
	}else
	{
		pReplace->format(content);
	}

	TRIM(content);

	delete pConvert;
	delete pBlank;
	delete pLtgtTag;
	delete pLtgt;
	delete pTail;
	delete pReplace;
}

void HtmlFormat::codeConvert(char *content, int method)
{
	CodeConvert* pConvert = new CodeConvert(method);
		
	pConvert->format(content);
	
	delete pConvert;

}
	
void CodeConvert::format(char* content)
{
#define OUT_SIZE 8096	//这个值和数据库中t_superlink表中description字段长度一致


	int inbytesleft = 0;
	int outbytesleft = OUT_SIZE;
	
	int contentlen = strlen(content);
	if(contentlen > OUT_SIZE)
	{
		memset(content, 0, contentlen);	
		
		return;
	}

	char *out = (char*) malloc(OUT_SIZE);

	char *outbuf_ptr = out;
	iconv_t conv_handle;
	
	char *input = content;

	switch(m_method)
	{
	case METHOD_GB2312_TO_UTF8:
		conv_handle = iconv_open("UTF-8", "GB18030");
		break;
	case METHOD_UTF8_TO_GB2312:
		conv_handle = iconv_open("GB18030", "UTF-8");
		break;
	default:
		if(out)
			free(out);
		return;
	}
		
	inbytesleft = strlen(content);
		
	if(conv_handle == (iconv_t) -1)
	{
		if(out)
			free(out);
		return;
	}

	size_t nconv = iconv(conv_handle, &input, (size_t*)&inbytesleft, &outbuf_ptr, (size_t*)&outbytesleft);
	if(nconv == (size_t) -1)
	{  	
		iconv_close(conv_handle);
		if(out)
			free(out);
			
		return;
	}
	
	*outbuf_ptr = '\0';
	
	int outlen = strlen(out);
	if(contentlen < outlen)
	{
		content = (char*)realloc(content, outlen+1);
		memset(content, 0, outlen+1);
	}else
		memset(content, 0, strlen(content));

	strcpy(content, out);

	iconv_close(conv_handle);
	
	if(out)
		free(out);

	return;
};


void HTML::format(char* content)
{
	if( !content )
		return;

	Decorator::format(content);
	
	char* pCur = content;
	char* pBegin = NULL, *pEnd = NULL, *pNext = NULL;

//printf("begintag:%s;endtag:%s\n",begintag, endtag);	
	
	if(begintag && endtag)
	{
		while(pCur)
		{
			pBegin = strstr(pCur, begintag);
			pEnd = strstr(pCur, endtag);
			
			if(!pBegin || !pEnd)
			{//no tag
				break;
			}
			
			pNext = pBegin;
			while(pNext)
			{
				pNext = strstr(pNext, begintag);
				if(!pNext || (pNext > pEnd))
				{
					break;
				}
				
				pBegin = pNext;
				
				pNext += 1;
			}
			
			if(pBegin > pEnd)
			{// begintag is behind.
				pCur = pEnd + 1;
				
				continue;	
			}
		
			memmove(pBegin, pEnd+end_len, strlen(pEnd+end_len));
			memset(pBegin + strlen(pEnd+end_len), 0, pEnd-pBegin);
			
			pCur = pBegin;
		}
		
		TRIM(content);
		if(strncasecmp(content, begintag, strlen(begintag)) == 0)
		{//删除以<开头的字符
			memset(content, 0, strlen(content));
		}
	}else if(begintag)
	{
		pBegin = strstr(pCur, begintag);
		if(pBegin)
		{
			memset(pBegin, 0, strlen(pBegin));
		}
	}else if(endtag)
	{
		pEnd = strstr(pCur, endtag);
		if(pEnd)
		{
			int len = strlen(pEnd);
			memmove(pCur, pEnd, len);
			memset(pCur+len, 0, strlen(pCur) - len);
		}
	}

//	printf("After:%s\n",content);
};


void Replace::format(char* content)
{
#define EXT_LEN	120
	if( !content )
		return;

	Decorator::format(content);

	int contenlen = strlen(content);
	char* Result = (char*)malloc(contenlen + EXT_LEN );
	memset(Result, 0, contenlen + EXT_LEN);
	
	int pos = 0;
	
	char* pBegin = NULL;
	char* pCur = content;
	while(pCur)
	{
		pBegin = strstr(pCur, beforetag);
		if(!pBegin)
		{//no tag
			memcpy(Result + pos, pCur, strlen(pCur));
			
			break;
		}
		
		memcpy(Result+pos, pCur, pBegin-pCur);
		pos += pBegin-pCur;

		strcat(Result, aftertag);
		pos += aftertag_len;

		pCur = pBegin + beforetag_len;
	}
		
	if(beforetag_len < aftertag_len)
	{
		int result_len = strlen(Result);
		content = (char*)realloc(content, result_len+1);
		memset(content, 0, result_len+1);
	}
	
	strcpy(content, Result);
	
	if(Result)
		free(Result);
};


void Blank::format(char* content)
{
	if( !content )
		return;
	
	Decorator::format(content);
	
	char* pCur = content;
	char* pNext = NULL;
	
	while(pCur)
	{
		pCur = strstr(pCur, tag);
		if(!pCur)
		{//no blank
			break;
		}else
		{//blank
			memmove(pCur, pCur + tag_len, strlen(pCur + tag_len));
			memset(pCur + (strlen(pCur + tag_len)), 0, tag_len);
		}
	}
	
};



⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -