⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rss.cpp

📁 一个C++写的RSS解析器,需要libxml2.rpm支持
💻 CPP
字号:
#include "string.h"
#include "stdio.h"

#include "rss.h"
#include "utils.h"
#include "htmlformat.h"


#ifdef LINUX_REDHAT
#include <time.h>
#endif



char* g_version = NULL;
struct ITEM *g_head = NULL;

struct IMAGE* malloc_image()
{
	struct IMAGE* pImage = (struct IMAGE*)malloc(sizeof(struct IMAGE));

	memset(pImage, 0, sizeof(struct IMAGE));

	pImage->link = NULL;
	pImage->title = NULL;
	pImage->url = NULL;

	return pImage;
};

void free_image(struct IMAGE* image)
{
	if(image == NULL)
		return;
	
	if(image->link)
		free(image->link);
	if(image->title)
		free(image->title);
	if(image->url)
		free(image->url);
};

struct ITEM* malloc_item()
{
	struct ITEM* pItem = (struct ITEM*)malloc(sizeof(struct ITEM));

	memset(pItem, 0, sizeof(struct ITEM));

	pItem->author = NULL;
	pItem->description = NULL;
	pItem->link = NULL;
	pItem->pubdate = NULL;
	pItem->title = NULL;
	pItem->source = NULL;

	pItem->pNext= NULL;
	
	pItem->is_inserted = 0;
	pItem->is_overdue = 0;

	return pItem;
};

void free_item(struct ITEM* item)
{
	if(item == NULL)
		return;
	
	if(item->author)
		free(item->author);
	if(item->description)
		free(item->description);
	if(item->link)
		free(item->link);
	if(item->pubdate)
		free(item->pubdate);
	if(item->title)
		free(item->title);
	if(item->source)
		free(item->source);
		
	item->pNext= NULL;
}

struct Channel* malloc_channel()
{
	struct Channel* pChannel = (struct Channel*)malloc(sizeof(struct Channel));

	memset(pChannel, 0, sizeof(struct Channel));

	pChannel->description = NULL;
	pChannel->generator = NULL;
	pChannel->language = NULL;
	pChannel->link = NULL;
	pChannel->pubdate = NULL;
	pChannel->title = NULL;
	pChannel->pubdate = NULL;

	pChannel->pItem = NULL;
	pChannel->pNext = NULL;
	pChannel->pImage = NULL;

	pChannel->item_size = 0;

	return pChannel;
}

void free_channel(struct Channel* channel)
{
	if(channel == NULL);
		return;
		
	if(channel->description)
		free(channel->description);
	if(channel->generator)
		free(channel->generator);
	if(channel->language)
		free(channel->language);
	if(channel->link)
		free(channel->link);
	if(channel->title)
		free(channel->title);
	if(channel->pubdate)
		free(channel->pubdate);

	channel->item_size = 0;

	channel->pItem = NULL;
	channel->pNext = NULL;
	channel->pImage = NULL;

	free(channel);
}


void insertSortList(struct ITEM* pItem)
{//最近时间的节点在前
	if(g_head == NULL)
	{
		g_head = pItem;
		return;	
	}
	
	struct ITEM *pCur = g_head;
	struct ITEM *pBefore = NULL;
	int ret = 0;
		
	while(pCur)
	{
		//printf("cur: %s; %s\n", pCur->pubdate, pItem->pubdate);
		ret = strcmp(pCur->pubdate, pItem->pubdate);
		if(ret <= 0)
		{//插入节点早于当前节点或者相同时间
			if(pBefore == NULL)
			{//链表头
				//printf("1111\n");
				pItem->pNext = g_head;
				g_head = pItem;
			}else
			{
				//printf("2222\n");
				pItem->pNext = pBefore->pNext;
				pBefore->pNext = pItem;
			}
			
			break;
		}else
		{
			if(pCur->pNext == NULL)
			{//链表尾
				//printf("3333\n");
				pCur->pNext = pItem;
				
				break;
			}
			
			//printf("4444\n");
			pBefore = pCur;
			pCur = pCur->pNext;
		}
	}

}


char* formate_issuetime(char* info)
{//没有格式化秒级
	///////////
	//格式化时间
	char* issuetime = (char*)malloc(sizeof(char)*20);
	memset(issuetime, 0, 20);
	char temp[5];
				
	char* year_end = strchr(info, '-');
	if(year_end == NULL)
		NULL;

	memset(temp, 0, 5);
	strncpy(temp, info, (year_end - info));
	int year = atoi(temp);
	if(year < 100)
	{
		if(year < 50)
			year += 2000;
		else
			year += 1900;
	}
				
	sprintf(issuetime, "%d-", year);
					
	/////////
	char* month_end = strchr(year_end+1, '-');
	if(month_end == NULL)
		NULL;

	memset(temp, 0, 5);
	int len = (month_end - (year_end+1));
	if(len > 2 || len <= 0)
		NULL;
					
	strncpy(temp, year_end+1, len);
	if(len == 2)
	{
		strcat(issuetime, temp);
		strcat(issuetime, "-");
	}else
	{
		strcat(issuetime, "0");
		strcat(issuetime, temp);
		strcat(issuetime, "-");
	}
				
	//////////
	char* other = NULL;
	char* day_end = strchr(month_end+1, 32);//32为空格
	
	memset(temp, 0, 5);
	len = 2;
	if(day_end == NULL)
	{
		strncpy(temp, month_end+1, len);
	}else
	{
		len = day_end - (month_end+1);
		strncpy(temp, month_end+1, len);	
	}
			
	len = strlen(temp);
	if(len > 2 || len <= 0)
		NULL;

	if(len == 2)
	{
		strcat(issuetime, temp);
	}else
	{
		strcat(issuetime, "0");
		strcat(issuetime, temp);
	}
	
	if(day_end)
		strcat(issuetime, day_end);
		

	return issuetime;
}

struct Channel* createRss(char* rss, struct Outline* outline)
{
	xmlNodePtr cur;
	xmlDocPtr doc;
	
	//////////////
	time_t now;
	int interval = 10*60;//10分钟
    	struct tm *timenow;
    	char curtime[32];

	now = time(NULL);

    	timenow = localtime(&now);
    	strftime(curtime, 20, "%Y-%m-%d %H:%M:%S", timenow);
	//////////////

	HtmlFormat* pHtmlFormat = new HtmlFormat();

	char xpath[ XPATH_LEN ];
	char *info = NULL;	char *pub_date = NULL;	int j = 1;
		doc = xmlParseFile( rss);
    	if ( doc == NULL )
    	{
    		delete pHtmlFormat;
    		return NULL;
    	}
	
	cur = xmlDocGetRootElement( doc );    
	if ( cur == NULL )
    	{
       		xmlFreeDoc( doc );
       		delete pHtmlFormat;
       		return NULL;
    	}

	struct Channel* channel = malloc_channel();
	
	cur = get_location( doc, "//rss" );
	g_version = (char*)xmlGetProp( cur, ( const xmlChar* ) "version" );

	sprintf( xpath, "//rss/channel/title");
	info = get_locationContent( doc, xpath );
   	if (info)
   	{
   		TRIM(info);
		channel->title = CodeConvert(info, METHOD_UTF8_TO_GB2312);
   	}
		
	sprintf( xpath, "//rss/channel/description");
	info = get_locationContent( doc, xpath );
   	if (info)
	{
   		TRIM(info);
		channel->description = CodeConvert(info, METHOD_UTF8_TO_GB2312);
   	}

	sprintf( xpath, "//rss/channel/link");
	info = get_locationContent( doc, xpath );
   	if (info)
   	{
   		TRIM(info);
		channel->link = (char*)strdup(info);
	}

	sprintf( xpath, "//rss/channel/language");
	info = get_locationContent( doc, xpath );
   	if (info)
   	{
   		TRIM(info);
		channel->language = (char*)strdup(info);
	}else
	{
		channel->language = (char*)strdup("zh-cn");//default is "zh-cn"
	}

	sprintf( xpath, "//rss/channel/pubDate");
	info = get_locationContent( doc, xpath );
	if (info)
   	{
		struct tm datetime;

   		TRIM(info);
		
		strptime(info, "%a, %d %b %Y %H:%M:%S %z", &datetime);
		strftime(info, 20, "%Y-%m-%d %H:%M:%S", &datetime);

		channel->pubdate = (char*)strdup(info);
   	}

	struct IMAGE* pImage = malloc_image();

	sprintf( xpath, "//rss/channel/image/title");
	info = get_locationContent( doc, xpath );
	if (info)
   	{
   		pHtmlFormat->format_code_convert(info);
		pImage->title = (char*)strdup(info);
   	}

	sprintf( xpath, "//rss/channel/image/link");
	info = get_locationContent( doc, xpath );
   	if (info)
   	{
   		TRIM(info);
		pImage->link = (char*)strdup(info);
	}

	sprintf( xpath, "//rss/channel/image/url");
	info = get_locationContent( doc, xpath );
   	if (info)
   	{
   		TRIM(info);
		pImage->url = (char*)strdup(info);
	}

	channel->pImage = pImage;

	g_head = NULL;
    	for ( j = 1;;j++ )
   	{
		memset(xpath, 0, XPATH_LEN);
		sprintf( xpath, "//rss/channel/item[%d]/title", j);
		if(get_locationContent (doc, xpath) == NULL)
			break;

		struct ITEM* pItem = malloc_item();

		sprintf( xpath, "//rss/channel/item[%d]/title", j);
		info = get_locationContent( doc, xpath );
		if (info)
		{
	   		pHtmlFormat->format_code_convert(info);
        		pItem->title = (char*)strdup(info);
		}

		sprintf( xpath, "//rss/channel/item[%d]/source", j);
		info = get_locationContent( doc, xpath );
		if (info)
		{
	   		TRIM(info);
        	pItem->source = CodeConvert(info, METHOD_UTF8_TO_GB2312);
		}
        
		sprintf( xpath, "//rss/channel/item[%d]/link", j);
		info = get_locationContent( doc, xpath );
		if (info)
		{
			TRIM(info);
			pItem->link = (char*)strdup(info);
		}

		sprintf( xpath, "//rss/channel/item[%d]/description", j);
		info = get_locationContent( doc, xpath );
		if (info)
    		{
			pHtmlFormat->format_code_convert(info, outline->begintag, outline->endtag);
			pItem->description = (char*)strdup(info);

			if(info)
				free(info);
    		}
	
		sprintf( xpath, "//rss/channel/item[%d]/pubDate", j);
		info = get_locationContent( doc, xpath );
		if (info)
		{
	        	struct tm datetime;
			
   			TRIM(info);

			if(strchr(info, '-') == NULL)
			{//Not format.
				char* pTemp = (char*)strdup(info);
			
				strptime(info, "%a, %d %b %Y %H:%M:%S %z", &datetime);
				strftime(info, 20, "%Y-%m-%d %H:%M:%S", &datetime);
	
				if(strncmp(info, "1900", 4) != 0)
					pItem->pubdate = (char*)strdup(info);
				else
					pItem->pubdate = (char*)strdup(pTemp);
					
				free(pTemp);
			}else
			{//Formatted.
				char issuretime[20];
				memset(issuretime, 0, 20);
				
				strncpy(issuretime, info, strlen("2006-00-00 00:00:00"));
				
				pItem->pubdate = formate_issuetime(issuretime);
			}
			
		}else
		{
			now -= interval;
		    	timenow = localtime(&now);
		    	strftime(curtime, 20, "%Y-%m-%d %H:%M:%S", timenow);
		    	
    			pItem->pubdate = (char*)strdup(curtime);
		}
		
		/*
		if(strcmp(pItem->pubdate, curtime) > 0)
		{//发布时间晚于当前时间,丢弃
			printf("当前时间:%s; 链接时间:%s\n", curtime, pItem->pubdate);
			
			free_item(pItem);
			continue;
		}
		*/
		
		insertSortList(pItem);
		
		channel->item_size++;
	}
	
	channel->pItem = g_head;
    	
	xmlFreeDoc( doc );
		
	delete pHtmlFormat;
	
	return channel;
}

void rssVersion(char* version)
{
	strcpy(version, g_version);

	if(g_version)
		free(g_version);

	g_version = NULL;
}

void destroyRss(struct Channel* channel)
{
	struct Channel* pHead = channel;
	struct Channel* pNext = NULL;

	struct ITEM* cur_item = NULL;
	struct ITEM* next_item = NULL;

	if(g_version)
		free(g_version);

	g_version = NULL;

	if(pHead == NULL)
		return;

	while(pHead)
	{
		pNext = pHead->pNext;

		free_image(pHead->pImage);

		cur_item = pHead->pItem;
		while(cur_item)
		{
			next_item = cur_item->pNext;
			free_item(cur_item);

			cur_item = next_item;
		}

		free_channel(pHead);

		pHead = pNext;
	}
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -