📄 parsenewslink.cpp
字号:
// ParseNewsLink.cpp : implementation file
//
#include "stdafx.h"
#include "ParseNewsLink.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
#define CHAR_NUM 62
static CString ClearTitleString[62]={
_T ("laquo"), _T ("raquo"), _T ("iexcl"),
_T ("iquest"), _T ("agrave"), _T ("aacute"),
_T ("acirc"), _T ("atilde"), _T ("auml"),
_T ("aring"), _T ("aelig"), _T ("ccedil"),
_T ("eth"), _T ("egrave"), _T ("eacute"),
_T ("ecirc"), _T ("euml"), _T ("igrave"),
_T ("iacute"), _T ("icirc"), _T ("iuml"),
_T ("ntilde"), _T ("ograve"), _T ("oacute"),
_T ("ocirc"), _T ("otilde"), _T ("ouml"),
_T ("oslash"), _T ("ugrave"), _T ("uacute"),
_T ("ucirc"), _T ("uuml"), _T ("yacute"),
_T ("yuml"), _T ("thorn"), _T ("szlig"),
_T ("sect"), _T ("para"), _T ("micro"),
_T ("brvbar"), _T ("plusmn"), _T ("middot"),
_T ("uml"), _T ("cedil"), _T ("ordf"),
_T ("ordm"), _T ("not"), _T ("shy"),
_T ("macr"), _T ("deg"), _T ("sup1"),
_T ("sup2"), _T ("sup3"), _T ("frac14"),
_T ("frac12"), _T ("frac34"), _T ("times"),
_T ("divide"), _T ("cent"), _T ("pound"),
_T ("curren"), _T ("yen"),
};
static CString ResChar[CHAR_NUM + 30]=
{
_T ("Agrave"), _T ("Aacute"), _T ("Acirc"),
_T ("Atilde"), _T ("Auml"), _T ("Aring"),
_T ("AElig"), _T ("Ccedil"), _T ("ETH"),
_T ("Egrave"), _T ("Eacute"), _T ("Ecirc"),
_T ("Euml"), _T ("Igrave"), _T ("Iacute"),
_T ("Icirc"), _T ("Iuml"), _T ("Ntilde"),
_T ("Ograve"), _T ("Oacute"), _T ("Ocirc"),
_T ("Otilde"), _T ("Ouml"), _T ("Oslash"),
_T ("Ugrave"), _T ("Uacute"), _T ("Ucirc"),
_T ("Uuml"), _T ("Yacute"), _T ("THORN"),
};
/////////////////////////////////////////////////////////////////////////////
// CParseNewsLink
CParseNewsLink::CParseNewsLink()
{
}
CParseNewsLink::~CParseNewsLink()
{
}
/////////////////////////////////////////////////////////////////////////////
// CParseNewsLink message handlers
CString CParseNewsLink::ExtractLinkAndTitle(CString temp)
{
CString content,strSource;
CString TempString,LinkString,Slink,Stitle,filter=FUrlCharacter.TitleFilter;
strSource = temp;
temp.MakeLower();
int pos,pos1=0;
pos = FUrlCharacter.BaseUrl.ReverseFind('/');
if(pos < (FUrlCharacter.BaseUrl.GetLength() - 1))
TempBaseLink = FUrlCharacter.BaseUrl.Left(pos + 1);
else
TempBaseLink = FUrlCharacter.BaseUrl;
pos=0;
if (!FUrlCharacter.VerifyDate.IsEmpty()) {
TempString=FUrlCharacter.SearchTime.Format(FUrlCharacter.VerifyDate);
if (temp.Find(TempString)==-1)
return _T("");
}
if (!filter.IsEmpty()&&filter[0]=='&') {
filter.Delete(0,1);
filter=FUrlCharacter.SearchTime.Format(filter);
}
else filter=_T("");
if (!FUrlCharacter.StartPos.IsEmpty()) {
pos=temp.Find(FUrlCharacter.StartPos);
temp.Delete(0,pos);
strSource.Delete(0,pos);
}
if (!FUrlCharacter.FinishPos.IsEmpty()) {
pos=temp.Find(FUrlCharacter.FinishPos);
temp = temp.Mid(0,pos);
strSource = strSource.Mid(0,pos);
}
pos=temp.Find(_T("href=")); // =<a href ;
while (pos>-1)
{
temp.Delete(0,pos);
strSource.Delete(0,pos);
pos=temp.Find(_T("</a>"));
if (pos==-1) break;
// TempString=temp.Mid(0,pos+3);
TempString=strSource.Mid(0,pos+3);
temp.Delete(0,pos);
strSource.Delete(0,pos);
pos=temp.Find(_T("href=")); //=<a href
if (!filter.IsEmpty()) {
pos1=temp.Find(filter);
if (pos1==-1) break;
if (pos1>pos) continue;
}
content = LinkAndTitle(TempString,LinkString);
int ki = content.Find('\n');
if(ki != -1){
Slink = content.Left(ki);
if(LinkString.Find(Slink) == -1){
Stitle = content.Mid(ki + 1);
Stitle = Stitle.Mid(0,Stitle.GetLength() - 1);
CString TempFilter,tmp;
int post;
pos1 = FUrlCharacter.ClearString.Find('$');
if(pos1 == 0){
TempFilter = FUrlCharacter.SearchTime.Format(
FUrlCharacter.ClearString.Mid(1));
post = TempFilter.Find('&');
if(post != -1){
tmp = TempFilter.Left(post);
pos1 = Stitle.Find(tmp);
if(pos1 != -1){
Stitle.Delete(pos1,tmp.GetLength());
post = Stitle.Find(TempFilter.Mid(post + 1),pos1);
if(post != -1){
Stitle.Delete(pos1,post - pos1 + 1);
}
}
}
else{
pos1 = Stitle.Find(TempFilter);
if(pos1 != -1)
Stitle = Stitle.Left(pos1) +
Stitle.Mid(pos1 + TempFilter.GetLength());
}
}
LinkString=LinkString+Slink+'\n'+Stitle+'\n';
}
}
}
return LinkString;
}
CString CParseNewsLink::LinkAndTitle(CString temp,CString TempLink)
{
int pos=0,pos1=0;
CString link,title,strSource,tmplink;
strSource = temp;
temp.MakeLower();
pos=temp.Find(_T("javascript"));
if (pos!=-1)
{
pos=temp.Find('(');
pos1=temp.Find('=');
if(pos!=-1)
{
temp.Delete(pos1+1,pos-pos1);
strSource.Delete(pos1+1,pos-pos1);
}
}
if (temp[5]=='"') { //=9
temp.Delete(0,6); //=9
strSource.Delete(0,6);
pos=temp.Find('"');
if (pos<4) {
temp.Delete(0,pos);
strSource.Delete(0,pos);
pos=temp.Find('"');
}
// link=temp.Mid(0,pos);
link=strSource.Mid(0,pos);
}
else if (temp.Mid(5,1)==_T("'")) {
temp.Delete(0,6); //=9
strSource.Delete(0,6);
pos=temp.Find(_T("'"));
if (pos<3) {
temp.Delete(0,pos);
strSource.Delete(0,pos);
pos=temp.Find(_T("'"));
}
// link=temp.Mid(0,pos);
link=strSource.Mid(0,pos);
}
else if (temp.Mid(5,1)==_T("\\")) {
temp.Delete(0,6); //=9
strSource.Delete(0,6);
pos=temp.Find(_T("\\"));
if (pos<3) {
temp.Delete(0,pos);
strSource.Delete(0,pos);
pos=temp.Find('"');
}
// link=temp.Mid(0,pos);
link=strSource.Mid(0,pos);
}
else {
temp.Delete(0,5); //=8;
strSource.Delete(0,5);
pos=temp.Find(' ');
if (pos<3) pos=temp.Find('>');
// link=temp.Mid(0,pos);
link=strSource.Mid(0,pos);
}
link=ClearLink(link);
if (IsVidLink(link,FUrlCharacter.LinkFilter))
{
pos=temp.Find('>');
temp.Delete(0,pos+1);
strSource.Delete(0,pos+1);
pos=temp.Find(_T("</a"));
title=temp.Mid(0,pos);
title=ClearTitle(title,FUrlCharacter.ClearString);
tmplink = link;
if(tmplink.Find(_T("http")) == 0)
link = link + '\n';
else
{
int ki;
char ch;
if(link.Find('/') == 0)
{
ki = TempBaseLink.Find('/');
if(ki == -1) return _T("");
link = TempBaseLink.Left(ki) + link + '\n';
}
else
{
ki = link.Find('.');
if(ki == 0)
{
ch = link[ki + 1];
if(ch == '.')
{
if(link[ki + 2] == '/')
{
CString templink = TempBaseLink.Mid(0,TempBaseLink.GetLength() - 1);
ki = templink.ReverseFind('/');
if(ki == -1) return _T("");
templink = templink.Left(ki + 1);
link = templink + link.Mid(3) + '\n';
}
else return _T("");
}
else if(ch == '/')
link = TempBaseLink + link.Mid(2) + '\n';
else return _T("");
}
else
link=TempBaseLink+link+'\n';
}
link = _T("http://") + link;
}
if (!IsVidTitle(title,FUrlCharacter.TitleFilter))
title=_T("");
if (title.IsEmpty())
return _T("");
else{
return link+title +'\n';
}
}
else
return _T("");
}
BOOL CParseNewsLink::IsVidLink(CString temp,CString filter)
{
temp.MakeLower();
if(temp.Find(_T("default")) != -1 ||
temp.Find(_T("index")) != -1)
return FALSE;
if(temp.Find(_T("mailto:")) != -1) return FALSE;
if(filter.IsEmpty()) return TRUE;
if(filter.Find('%') != -1)
{
filter.Delete(0,1);
filter = FUrlCharacter.SearchTime.Format(filter);
}
if(temp.Find(filter) == -1)
return FALSE;
else
return TRUE;
}
CString CParseNewsLink::ClearTitle(CString temp,CString filter)
{
int pos1=0,pos2=0,count;
CString TempFilter,tmp;
count = CHAR_NUM - 1;
while (count>=0) {
pos1=temp.Find(ClearTitleString[count]);
if (pos1!=-1) {
if(temp[pos1 + ClearTitleString[count].GetLength()] == ';')
temp.Delete(pos1 + ClearTitleString[count].GetLength(),1);
temp.Replace(ClearTitleString[count],_T(" "));
break;
}
count--;
}
while (1) {
pos1=temp.Find('<');
pos2=temp.Find('>');
if (pos1>-1&&pos2>-1&&pos2>pos1)
temp.Delete(pos1,pos2-pos1+1);
else
break;
}
while (1) {
pos1=temp.Find(_T("nbsp"));
if (pos1==-1) break;
if(temp[pos1 + 4] == ';')
temp.Delete(pos1 + 4,1);
temp.Replace("nbsp",_T(" "));
}
if (!filter.IsEmpty() && filter.Find('$') != 0) {
pos1=1;
while (pos1>-1) {
pos1=filter.Find('&');
if (pos1==-1) {
while (1) {
pos2=temp.Find(filter);
if (pos2>-1)
temp.Delete(pos2,filter.GetLength());
else break;
}
break;
}
else {
TempFilter=filter.Mid(0,pos1-1);
while (1) {
pos2=temp.Find(TempFilter);
if (pos2>-1)
temp.Delete(pos2,TempFilter.GetLength());
else break;
}
filter.Delete(0,pos1);
}
}
}
// while (1) {
// pos2=temp.Find(_T(" "));
// if (pos2==-1) break;
// temp.Delete(pos2,1);
// }
while (1) {
pos2=temp.Find('\n');
if (pos2==-1) break;
temp.Delete(pos2,1);
// temp.Replace('\n',NULL);
}
// temp = ClearChar(temp,
return ClearChar(temp,'&');
}
BOOL CParseNewsLink::IsVidTitle(CString temp, CString filter)
{
temp.TrimLeft(' ');
if (temp.IsEmpty()) return FALSE;
if (filter.IsEmpty()) return TRUE;
if (filter.Find('$')!=-1) {
filter.Delete(0,1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -