⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parsenewslink.cpp

📁 本程序是VC为平台开发的股票资讯系统
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// ParseNewsLink.cpp : implementation file
//

#include "stdafx.h"
#include "ParseNewsLink.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

#define CHAR_NUM 62

static CString ClearTitleString[62]={
    _T ("laquo"),  _T ("raquo"),  _T ("iexcl"),
    _T ("iquest"), _T ("agrave"), _T ("aacute"),
    _T ("acirc"), _T ("atilde"), _T ("auml"),
    _T ("aring"), _T ("aelig"),  _T ("ccedil"),
    _T ("eth"), _T ("egrave"),  _T ("eacute"),
    _T ("ecirc"), _T ("euml"),  _T ("igrave"),
    _T ("iacute"), _T ("icirc"), _T ("iuml"),
    _T ("ntilde"), _T ("ograve"), _T ("oacute"),
    _T ("ocirc"), _T ("otilde"),  _T ("ouml"),
    _T ("oslash"), _T ("ugrave"),  _T ("uacute"),
    _T ("ucirc"), _T ("uuml"),  _T ("yacute"),
    _T ("yuml"), _T ("thorn"),  _T ("szlig"),
    _T ("sect"), _T ("para"), _T ("micro"),
    _T ("brvbar"),  _T ("plusmn"), _T ("middot"),
    _T ("uml"),  _T ("cedil"),  _T ("ordf"),
    _T ("ordm"),  _T ("not"),  _T ("shy"),
    _T ("macr"), _T ("deg"),  _T ("sup1"),
    _T ("sup2"),  _T ("sup3"),  _T ("frac14"),
    _T ("frac12"), _T ("frac34"), _T ("times"),
    _T ("divide"),  _T ("cent"),   _T ("pound"),
    _T ("curren"),    _T ("yen"),
};

static CString ResChar[CHAR_NUM + 30]=
{
    _T ("Agrave"), _T ("Aacute"), _T ("Acirc"),
    _T ("Atilde"), _T ("Auml"), _T ("Aring"),
	_T ("AElig"), _T ("Ccedil"), _T ("ETH"),
	_T ("Egrave"), _T ("Eacute"), _T ("Ecirc"),
	_T ("Euml"), _T ("Igrave"), _T ("Iacute"), 
	_T ("Icirc"), _T ("Iuml"), _T ("Ntilde"), 
	_T ("Ograve"), _T ("Oacute"), _T ("Ocirc"),
	_T ("Otilde"), _T ("Ouml"), _T ("Oslash"), 
	_T ("Ugrave"), _T ("Uacute"), _T ("Ucirc"), 
	_T ("Uuml"), _T ("Yacute"), _T ("THORN"), 
};

/////////////////////////////////////////////////////////////////////////////
// CParseNewsLink

CParseNewsLink::CParseNewsLink()
{
}

CParseNewsLink::~CParseNewsLink()
{
}

/////////////////////////////////////////////////////////////////////////////
// CParseNewsLink message handlers

CString  CParseNewsLink::ExtractLinkAndTitle(CString temp)
{
	CString content,strSource;
	CString TempString,LinkString,Slink,Stitle,filter=FUrlCharacter.TitleFilter;
	strSource = temp;
	temp.MakeLower();
	int pos,pos1=0;
	pos = FUrlCharacter.BaseUrl.ReverseFind('/');
	if(pos < (FUrlCharacter.BaseUrl.GetLength() - 1))
		TempBaseLink = FUrlCharacter.BaseUrl.Left(pos + 1);
	else
		TempBaseLink = FUrlCharacter.BaseUrl;
	pos=0;
	if (!FUrlCharacter.VerifyDate.IsEmpty()) {
		TempString=FUrlCharacter.SearchTime.Format(FUrlCharacter.VerifyDate);
		if (temp.Find(TempString)==-1)
			return _T("");
	}
	if (!filter.IsEmpty()&&filter[0]=='&') {
		filter.Delete(0,1);
		filter=FUrlCharacter.SearchTime.Format(filter);
	}
	else  filter=_T("");
	if (!FUrlCharacter.StartPos.IsEmpty()) {
		pos=temp.Find(FUrlCharacter.StartPos);
		temp.Delete(0,pos);
		strSource.Delete(0,pos);
	}
	if (!FUrlCharacter.FinishPos.IsEmpty()) {
		pos=temp.Find(FUrlCharacter.FinishPos);
		temp = temp.Mid(0,pos);
		strSource = strSource.Mid(0,pos);
	}
	pos=temp.Find(_T("href="));    // =<a href ;
  
	while (pos>-1) 
	{
		temp.Delete(0,pos);
		strSource.Delete(0,pos);
		pos=temp.Find(_T("</a>"));
		if (pos==-1)  break;
//		TempString=temp.Mid(0,pos+3);
		TempString=strSource.Mid(0,pos+3);
  		temp.Delete(0,pos);
		strSource.Delete(0,pos);
		pos=temp.Find(_T("href="));   //=<a href
		if (!filter.IsEmpty())  {
			pos1=temp.Find(filter);
			if (pos1==-1) break;
			if (pos1>pos) continue;
		}
		content = LinkAndTitle(TempString,LinkString);
		int ki = content.Find('\n');
		if(ki != -1){
			Slink = content.Left(ki);
			if(LinkString.Find(Slink) == -1){
				Stitle = content.Mid(ki + 1);
				Stitle = Stitle.Mid(0,Stitle.GetLength() - 1);
				CString TempFilter,tmp;
				int post;
				pos1 = FUrlCharacter.ClearString.Find('$');
				if(pos1 == 0){
					TempFilter = FUrlCharacter.SearchTime.Format(
							FUrlCharacter.ClearString.Mid(1));
					post = TempFilter.Find('&');
					if(post != -1){
						tmp = TempFilter.Left(post);
						pos1 = Stitle.Find(tmp);
						if(pos1 != -1){
							Stitle.Delete(pos1,tmp.GetLength());
							post = Stitle.Find(TempFilter.Mid(post + 1),pos1);
							if(post != -1){
								Stitle.Delete(pos1,post - pos1 + 1);
							}
						}
					}
					else{
						pos1 = Stitle.Find(TempFilter);
						if(pos1 != -1)
							Stitle = Stitle.Left(pos1) + 
								Stitle.Mid(pos1 + TempFilter.GetLength());
					}			
				}
				LinkString=LinkString+Slink+'\n'+Stitle+'\n';	
			}
		}
	}

    return LinkString;
}

CString  CParseNewsLink::LinkAndTitle(CString temp,CString TempLink)
{
	int pos=0,pos1=0;
	CString link,title,strSource,tmplink;
	strSource = temp;
	temp.MakeLower();

    pos=temp.Find(_T("javascript"));
    if (pos!=-1) 
	{
        pos=temp.Find('(');
        pos1=temp.Find('=');
		if(pos!=-1)
		{
			temp.Delete(pos1+1,pos-pos1);
			strSource.Delete(pos1+1,pos-pos1);
		}
    }
    if (temp[5]=='"')  {        //=9
        temp.Delete(0,6);   //=9
		strSource.Delete(0,6);
        pos=temp.Find('"');
        if (pos<4) {
            temp.Delete(0,pos);
			strSource.Delete(0,pos);
            pos=temp.Find('"');
        }
//		link=temp.Mid(0,pos);
		link=strSource.Mid(0,pos);
    }
    else if (temp.Mid(5,1)==_T("'")) {
            temp.Delete(0,6);          //=9
			strSource.Delete(0,6); 
            pos=temp.Find(_T("'"));
            if (pos<3) {
               temp.Delete(0,pos);
			   strSource.Delete(0,pos);
               pos=temp.Find(_T("'"));
            }
//			link=temp.Mid(0,pos);
 			link=strSource.Mid(0,pos);
   }
    else if (temp.Mid(5,1)==_T("\\"))  {
            temp.Delete(0,6);            //=9
			strSource.Delete(0,6);
            pos=temp.Find(_T("\\"));
            if (pos<3) {
               temp.Delete(0,pos);
			   strSource.Delete(0,pos);
               pos=temp.Find('"');
            }
//			link=temp.Mid(0,pos);
			link=strSource.Mid(0,pos);
    }
    else {
         temp.Delete(0,5);          //=8;
		 strSource.Delete(0,5); 
         pos=temp.Find(' ');
         if (pos<3) pos=temp.Find('>');
			
//			link=temp.Mid(0,pos);
			link=strSource.Mid(0,pos);
    }
    
    link=ClearLink(link);
    if (IsVidLink(link,FUrlCharacter.LinkFilter))    
	{
       pos=temp.Find('>');
       temp.Delete(0,pos+1);
	   strSource.Delete(0,pos+1);
       pos=temp.Find(_T("</a"));
       title=temp.Mid(0,pos);
       title=ClearTitle(title,FUrlCharacter.ClearString);
	   tmplink = link;
	   if(tmplink.Find(_T("http")) == 0)
		   link = link + '\n';
       else
	   {
		   int ki;
		   char ch;
		   if(link.Find('/') == 0)
		   {
			   ki = TempBaseLink.Find('/');
			   if(ki == -1) return _T("");
			   link = TempBaseLink.Left(ki) + link + '\n';
		   }
		   else
		   {
			   ki = link.Find('.');
			   if(ki == 0)
			   {
				   ch = link[ki + 1];
				   if(ch == '.')
				   {
					   if(link[ki + 2] == '/')
					   {
						   CString templink = TempBaseLink.Mid(0,TempBaseLink.GetLength() - 1);
						   ki = templink.ReverseFind('/');
						   if(ki == -1) return _T("");
						   templink = templink.Left(ki + 1);
						   link = templink + link.Mid(3) + '\n';
					   }
					   else return _T("");
				   }
				   else if(ch == '/')
					   link = TempBaseLink + link.Mid(2) + '\n';
				   else return _T("");
			   }
			   else
				   link=TempBaseLink+link+'\n';
		   }
		   link = _T("http://") + link;
	   }
       if (!IsVidTitle(title,FUrlCharacter.TitleFilter))
           title=_T("");
       if (title.IsEmpty())
          return _T("");
       else{
          return link+title +'\n';
	   }
    }
    else
        return _T("");
}

BOOL CParseNewsLink::IsVidLink(CString temp,CString filter)
{
	temp.MakeLower();
	if(temp.Find(_T("default")) != -1 ||
		temp.Find(_T("index")) != -1)
		return FALSE;	

	if(temp.Find(_T("mailto:")) != -1) return FALSE;
	if(filter.IsEmpty()) return TRUE;
	if(filter.Find('%') != -1) 
	{
		filter.Delete(0,1);
		filter = FUrlCharacter.SearchTime.Format(filter);
	}
	if(temp.Find(filter) == -1)
		return FALSE;
	else 
		return TRUE;
}

CString  CParseNewsLink::ClearTitle(CString temp,CString filter)
{
   int pos1=0,pos2=0,count;
   CString TempFilter,tmp;
   count = CHAR_NUM - 1;
   while (count>=0) {
	   pos1=temp.Find(ClearTitleString[count]);
	   if (pos1!=-1) {
		   if(temp[pos1 + ClearTitleString[count].GetLength()] == ';')
			   temp.Delete(pos1 + ClearTitleString[count].GetLength(),1);
		   temp.Replace(ClearTitleString[count],_T(" "));
		   break;
	   }
	   count--;
   }
   while (1) {
       pos1=temp.Find('<');
       pos2=temp.Find('>');
       if (pos1>-1&&pos2>-1&&pos2>pos1)
         temp.Delete(pos1,pos2-pos1+1);
       else
          break;
   }
   while (1) {
	   pos1=temp.Find(_T("nbsp"));
	   if (pos1==-1) break;
	   if(temp[pos1 + 4] == ';')
		   temp.Delete(pos1 + 4,1);
	   temp.Replace("nbsp",_T(" "));
   }
   if (!filter.IsEmpty() && filter.Find('$') != 0) {
		pos1=1;	
		while (pos1>-1) {
			pos1=filter.Find('&');
			if (pos1==-1) {
				while (1) {
					pos2=temp.Find(filter);
					if (pos2>-1)
						temp.Delete(pos2,filter.GetLength());
					else break;
				}
				break;
			}
			else  {
					TempFilter=filter.Mid(0,pos1-1);
					while (1) {
						pos2=temp.Find(TempFilter);
						if (pos2>-1)
							temp.Delete(pos2,TempFilter.GetLength());
						else break;
					}
				filter.Delete(0,pos1);
			}	
		}	
   }
//   while (1) {
//        pos2=temp.Find(_T("  "));
//        if (pos2==-1) break;
//        temp.Delete(pos2,1);
//   }

   while (1) {
        pos2=temp.Find('\n');
        if (pos2==-1) break;
		temp.Delete(pos2,1);
//        temp.Replace('\n',NULL);
   }
//   temp = ClearChar(temp,
   return ClearChar(temp,'&');
}

BOOL CParseNewsLink::IsVidTitle(CString temp, CString filter)
{
	temp.TrimLeft(' ');
	if (temp.IsEmpty()) return FALSE;
	if (filter.IsEmpty()) return TRUE;
	if (filter.Find('$')!=-1) {
		filter.Delete(0,1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -