📄 parsenewslink.cpp
字号:
filter=FUrlCharacter.SearchTime.Format(filter);
if (temp.Find(filter)==-1)
return FALSE;
else return TRUE;
}
else return TRUE;
}
CString CParseNewsLink::ClearLink(CString temp)
{
int pos,n;
pos = temp.Find('<');
n = temp.Find('>');
if(n != -1) temp.Delete(n,1);
if(pos != -1)
temp = temp.Mid(0,pos-1);
temp = ClearChar(temp,'"');
temp = ClearChar(temp,_T("'"));
temp = ClearChar(temp,_T("\\"));
temp = ClearChar(temp,'\n');
return temp;
}
CString CParseNewsLink::ClearChar(CString temp,CString cc)
{
int pos;
while (1) {
pos=temp.Find(cc);
if (pos!=-1)
temp.Delete(pos,1);
else break;
}
return temp;
}
void CParseNewsLink::Copy(LinkCharacter* temp)
{
FUrlCharacter.SearchTime=temp->SearchTime;
FUrlCharacter.BaseUrl=temp->BaseUrl;
FUrlCharacter.LinkFilter=temp->LinkFilter;
FUrlCharacter.TitleFilter=temp->TitleFilter;
FUrlCharacter.ClearString=temp->ClearString;
FUrlCharacter.VerifyDate=temp->VerifyDate;
FUrlCharacter.StartPos=temp->StartPos;
FUrlCharacter.FinishPos=temp->FinishPos;
}
CString CParseNewsLink::GetNewsBody(CString content,CString title,CString key,CString cut)
{
CString body,tmpstr,tmptit;
BOOL isnull = FALSE;
if(key.IsEmpty()) isnull = TRUE;
else key.MakeLower();
tmptit = title;
tmptit.MakeLower();
body = GetAllContent(content,tmptit);
if(body.IsEmpty()) return _T("");
tmpstr = body;
if(tmpstr.Find(key) != -1 || tmptit.Find(key) != -1 || isnull){
if(cut.IsEmpty())
return body;
else{
body = CutNoUse(body,cut);
return body;
}
}
else
return _T("");
}
CString CParseNewsLink::GetAllContent(CString html,CString title)
{
int ki1,ki2,ki3,ki4,i;
BOOL IsRtn;
CString str,Ht,tmptit;
Ht = html;
str = Ht;
Ht.MakeLower();
/* ki1 = Ht.Find(_T("<a href="));
while(ki1 >= 0) //舍去 <a href=...> 链接内容
{
ki2 = Ht.Find(_T("</a>"),ki1);
if(ki2<ki1)
break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 4);
str = str.Left(ki1) + str.Mid(ki2 + 4);
ki1 = Ht.Find(_T("<a href="));
}
*/
ki1 = Ht.Find(_T("<a"));
while(ki1 != -1)
{
ki2 = Ht.Find(_T("href"),ki1);
ki3 = Ht.Find(_T("<"),ki1 + 1);
ki4 = Ht.Find(_T(">"),ki1);
if(ki2 > ki3 || ki2 > ki4)
ki1 = Ht.Find(_T("<a"),ki1 + 1);
else
{
ki3 = Ht.Find(_T("="),ki2);
if(ki3 < ki1) break;
ki4 = Ht.Find(_T("</a>"),ki3);
if(ki4 < ki1) break;
Ht = Ht.Left(ki1) + Ht.Mid(ki4 + 4);
str = str.Left(ki1) + str.Mid(ki4 + 4);
ki1 = Ht.Find(_T("<a"));
}
}
ki1 = Ht.Find(_T("<script"));
while(ki1 >= 0) //舍去 <script...> 内容
{
ki2 = Ht.Find(_T("</script>"),ki1);
if(ki2<ki1)
break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 9);
str = str.Left(ki1) + str.Mid(ki2 + 9);
ki1 = Ht.Find(_T("<script"));
}
ki1 = Ht.Find(_T("<option"));
while(ki1 >= 0) //舍去 <option...> 内容
{
ki2 = Ht.Find(_T("</option>"),ki1);
if(ki2<ki1)
break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 9);
str = str.Left(ki1) + str.Mid(ki2 + 9);
ki1 = Ht.Find(_T("<option"));
}
ki1 = Ht.Find(_T("<title"));
while(ki1 >= 0) //舍去 <title...> 内容
{
ki2 = Ht.Find(_T("</title>"),ki1);
if(ki2<ki1)
break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 8);
str = str.Left(ki1) + str.Mid(ki2 + 8);
ki1 = Ht.Find(_T("<title"));
}
ki1 = Ht.Find(_T("<body"));
if(ki1 != -1)
Ht = Ht.Mid(ki1);
str = str.Mid(ki1);
title.Replace(_T("\x09"),NULL);
title.TrimLeft(_T(" "));
title.TrimRight(_T(" "));
i = Ht.Find(title);
if(i == -1){
i = Ht.Find(title.Left(1));
if(i == -1) return _T("");
Ht = Ht.Mid(i);
str = str.Mid(i);
ki2 = Ht.Find(_T("\n"));
ki1 = Ht.Find(_T("<"));
if(ki1 == -1 && ki2 == -1) return _T("");
if(ki2 < ki1){IsRtn = TRUE;ki1 = ki2;}
else IsRtn = FALSE;
while(ki1 != -1){
if(IsRtn){
Ht.Delete(ki1,1);
str.Delete(ki1,1);
i = Ht.Find(title);
if(i != -1) break;
}
else{
ki2 = Ht.Find('>',ki1);
if(ki2 == -1) break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 1);
str = str.Left(ki1) + str.Mid(ki2 +1);
i = Ht.Find(title);
if(i != -1) break;
}
ki1 = Ht.Find('<',ki1);
ki2 = Ht.Find(_T("\n"),ki1);
if(ki2 < ki1){IsRtn = TRUE;ki1 = ki2;}
else IsRtn = FALSE;
}
if(i == -1) return _T("");
}
ki1 = title.GetLength();
title = str.Mid(i,ki1);
Ht = Ht.Mid(i+ki1);
str = str.Mid(i+ki1);
ki1 = title.Find(_T(" "));
while(ki1 != -1){
title = title.Left(ki1 + 1) + title.Mid(ki1 + 2);
ki1 = title.Find(_T(" "));
}
str = title + str;
title.MakeLower();
Ht = title + Ht;
Ht.Replace(_T(">"),_T(">\n"));
str.Replace(_T(">"),_T(">\n"));
ki1 = Ht.Find(_T("<"));
while(ki1 >= 0) //舍去<>内容
{
ki2 = Ht.Find('>',ki1);
if(ki2 == -1) break;
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 1);
str = str.Left(ki1) + str.Mid(ki2 +1);
ki1 = Ht.Find('<',ki1);
}
str.Replace(_T(" "),_T(" "));
str.Replace(_T(" "),_T(" "));
str.Replace(_T(">"),_T(">"));
str.Replace(_T(">"),_T(">"));
str.Replace(_T("<"),_T("<"));
str.Replace(_T("<"),_T("<"));
str.Replace(_T("""),_T("\""));
str.Replace(_T("""),_T("\""));
str.Replace(_T("&"),_T("&"));
str.Replace(_T("&"),_T("&"));
str.Replace(_T("©"),_T("(C)"));
str.Replace(_T("©"),_T("(C)"));
str.Replace(_T("®"),_T("(R)"));
str.Replace(_T("®"),_T("(R)"));
str.Replace(_T("´"),_T("'"));
str.Replace(_T("´"),_T("'"));
str.Replace(_T("\xA1\xA1"),_T(" "));
for(i=0;i<92;i++)
str.Replace(ResChar[i],NULL);
return str;
}
CString CParseNewsLink::ClearStr(CString html)
{
CString Ht;
int ki1,ki2;
Ht = html;
ki1 = Ht.Find('<');
while(ki1 >= 0) //舍去<>内容
{
ki2 = Ht.Find('>',ki1);
Ht = Ht.Left(ki1) + Ht.Mid(ki2 + 1);
ki1 = Ht.Find('<',ki1);
}
return Ht;
}
CString CParseNewsLink::CutNoUse(CString body, CString snouse)
{
int ki,ki1,ki2,ki3,i,len;
CString line,rcut,cut,news,nouse,tmpline,begin,end;
COleDateTime m_Date;
BOOL bSuccess;
if(snouse.IsEmpty())
return body;
rcut = snouse;
rcut.Replace(_T("\\7c"),_T("|"));
ki = body.Find('\n');
while(ki != -1)
{
line = body.Left(ki);
cut = rcut;
ki1 = cut.Find('&');
while(ki1 != -1 && !line.IsEmpty())
{
i = 0;
nouse = cut.Left(ki1);
ki2 = nouse.Find('^');
while(ki2 != -1)
{
ki3 = nouse.Find('^',ki2 + 1);
if(ki3 != -1){
tmpline = nouse.Mid(ki2 + 1,ki3 - ki2 - 1);
begin.Format(_T("%c"),atoi(tmpline));
end = nouse.Mid(ki3 + 1);
nouse = begin + end;
}else
break;
ki2 = nouse.Find('^');
}
len = line.GetLength();
while(i<len && line.GetAt(i) == ' ') i++;
ki2 = nouse.Find('%');
if(ki2 != -1)
{
begin = nouse.Left(ki2);
end = nouse.Mid(ki2 + 1);
if(begin.IsEmpty() || line.Find(begin) == i)
{
if(end.Find(_T("@Date@")) == 0)
{
if(end.GetLength() > 6)
{
end = end.Mid(6);
ki3 = line.Find(end);
if(ki3 != -1)
{
tmpline = line.Left(ki3);
tmpline.Replace(begin,NULL);
}
else tmpline.Empty();
}
else
{
end.Empty();
tmpline = line.Mid(begin.GetLength());
}
try
{
bSuccess = m_Date.ParseDateTime(tmpline);
}
catch(...)
{
bSuccess = FALSE;
}
if(bSuccess)
{
line = begin + end;
break;
}
}
else
{
ki3 = line.Find(end);
if(ki3 != -1)
{
nouse = line.Mid(i,ki3 - i + end.GetLength());
line.Replace(nouse,NULL);
break;
}
}
}
}
else
{
ki2 = nouse.Find('#');
if(ki2 != -1)
{
nouse = nouse.Mid(1);
tmpline = line;
tmpline.MakeLower();
if(tmpline.Find(nouse) == i)
{
line = _T("");break;
}
}
else
{
if(line.Find(nouse) == i)
{
line.Replace(nouse,NULL);break;
}
}
}
cut = cut.Mid(ki1 + 1);
ki1 = cut.Find('&');
}
if(!line.IsEmpty()){
tmpline = line;
tmpline.Replace(_T(" "),NULL);
if(!tmpline.IsEmpty()){news += line;news += '\n';}
}
body = body.Mid(ki + 1);
ki = body.Find('\n');
}
return news;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -