📄 collect.cs
字号:
//======================================================
//== (c)2008 aspxcms inc by NeTCMS v1.0 ==
//== Forum:bbs.aspxcms.com ==
//== Website:www.aspxcms.com ==
//======================================================
using System;
using System.IO;
using System.Data;
using System.Net;
using System.Text;
using NetCMS.Model;
using NetCMS.Control;
namespace NetCMS.Content.Collect
{
/// <summary>
/// 采集类
/// </summary>
public class Collect
{
private NetCMS.DALFactory.ICollect dal;
private string ErrorMsg = "";
private bool _ShowProGressBar;
/// <summary>
/// 构造函数
/// </summary>
public Collect()
{
_ShowProGressBar = true;
dal = NetCMS.DALFactory.DataAccess.CreateCollect();
}
#region 采集入库
/// <summary>
/// 是否保存远程图片
/// </summary>
private bool bSaveRemotePic = false;
private string PicSavePath = "";
private string PicSaveUrl = "";
/// <summary>
/// 是否在采集时显示进度条,默认为true
/// </summary>
public bool ShowProGressBar
{
set { _ShowProGressBar = value; }
get { return _ShowProGressBar; }
}
/// <summary>
/// 开始采集
/// </summary>
/// <param name="folderid">目录名称</param>
/// <param name="num">采集数量</param>
public void Collecting(int folderid, int num, bool bnorepeat)
{
if (ShowProGressBar) HProgressBar.Start("正在读取列表数据");
DataTable tb = GetSite(folderid);
#region 检查数据是否完整
if (tb == null || tb.Rows.Count < 1)
{
if (ShowProGressBar) HProgressBar.Roll("没有找到该站点的相关记录!", 0);
return;
}
DataRow r = tb.Rows[0];
if (r.IsNull("LinkSetting") || r.IsNull("PageTitleSetting") || r.IsNull("PagebodySetting"))
{
if (ShowProGressBar) HProgressBar.Roll("相关的参数没有设置,无法取得新闻列表!", 0);
return;
}
if (bool.Parse(r["SaveRemotePic"].ToString()))
{
#region 远程图片
string rtpath = NetCMS.Config.UIConfig.dirFile;
if (rtpath == null || rtpath.Trim().Equals(""))
{
if (ShowProGressBar) HProgressBar.Roll("没有找到管理员附件目录!", 0);
return;
}
string dtpath = DateTime.Now.ToString("yyyyMMdd");
PicSavePath = NetCMS.Common.ServerInfo.GetRootPath().TrimEnd('\\') + @"\" + rtpath + @"\RemoteFiles\" + dtpath;
if (!Directory.Exists(PicSavePath))
Directory.CreateDirectory(PicSavePath);
PicSaveUrl = NetCMS.Publish.CommonData.getUrl() + "/" + rtpath + "/RemoteFiles/" + dtpath;
bSaveRemotePic = true;
#endregion
}
#endregion 检查数据是否完整
if (ShowProGressBar) HProgressBar.Roll("正在获取新闻列表页", 0);
string sListUrl = r["objURL"].ToString();
string sEncode = r["Encode"].ToString();
bool bReverse = bool.Parse(r["IsReverse"].ToString());
string listset = @"<body[^>]*>(?<list>[\s\S]+?)</body>";
if (!r.IsNull("ListSetting"))
listset = r["ListSetting"].ToString();
PageList PL = new PageList(r["objURL"].ToString(), r["Encode"].ToString());
PL.RuleOfList = listset;
PL.RuleOfLink = r["LinkSetting"].ToString();
string[] NewsUrl = GetNewsList(PL);
if (NewsUrl == null)
{
if (ShowProGressBar) HProgressBar.Roll("没有找到相关新闻链接地址!", 0);
return;
}
int len = NewsUrl.Length;
if (len < num)
{
int pagetype = int.Parse(r["OtherType"].ToString());
string[] otherurl = null;
switch (pagetype)
{
case 0:
break;
case 1://递归
otherurl = PL.Pagination(r["OtherPageSetting"].ToString(), num - len);
break;
case 2://其他页
otherurl = PL.SinglePagination(r["OtherPageSetting"].ToString(), num - len);
break;
case 3://索引页
otherurl = PL.IndexPagination(r["OtherPageSetting"].ToString(), int.Parse(r["StartPageNum"].ToString()), int.Parse(r["EndPageNum"].ToString()), num - len);
break;
default:
break;
}
if (otherurl != null && otherurl.Length > 0)
{
Array.Resize(ref NewsUrl, len + otherurl.Length);
otherurl.CopyTo(NewsUrl, len);
}
}
if (NewsUrl.Length < 1)
{
if (ShowProGressBar) HProgressBar.Roll("从列表内容中没有找到任何新闻的相关链接!", 0);
return;
}
if (bReverse)
Array.Reverse(NewsUrl);
if (ShowProGressBar) HProgressBar.Roll("开始采集新闻", 0);
int nSucceed = 0, nFailed = 0, nRepeat = 0;
for (int i = 0; i < NewsUrl.Length; i++)
{
if (i >= num)
break;
try
{
int flag = CollectPage(NewsUrl[i], r, bnorepeat);
if (flag != 1)
{
nSucceed++;
if (flag == -1)
nRepeat++;
}
else
nFailed++;
}
catch
{
nFailed++;
}
string prompt = "正在采集新闻,终止<a href=\"Collect_List.aspx\">返回</a>.成功:" + nSucceed * 100 / num + "% ";
if (nRepeat > 0)
prompt += "(其中重复:" + nRepeat * 100 / num + "%) ";
prompt += "失败:" + nFailed * 100 / num + "%";
if (ShowProGressBar) HProgressBar.Roll(prompt, (i + 1) * 100 / num);
}
}
/// <summary>
/// 处理采集单条新闻
/// </summary>
/// <param name="Url"></param>
/// <param name="r"></param>
/// <param name="norepeat"></param>
/// <returns>0为成功,-1为重复,1,为失败</returns>
private int CollectPage(string Url, DataRow r, bool norepeat)
{
try
{
if (Url == null || Url.Trim().Equals(""))
return 1;
PageNews pn = new PageNews(Url, r["Encode"].ToString());
if (!pn.Fetch())
return 1;
pn.RuleOfTitle = r["PageTitleSetting"].ToString();
pn.RuleOfContent = r["PagebodySetting"].ToString();
pn.FigureTitle();
if (norepeat)
{
if (pn.Title == null)
return 1;
if (dal.TitleExist(pn.Title))
return -1;
}
pn.FigureContent();
if (r.IsNull("HandSetAuthor"))
{
pn.FigureAuthor(r["AuthorSetting"].ToString(), false);
}
else
{
pn.FigureAuthor(r["HandSetAuthor"].ToString(), true);
}
if (r.IsNull("HandSetSource"))
{
pn.FigureSource(r["SourceSetting"].ToString(), false);
}
else
{
pn.FigureSource(r["HandSetSource"].ToString(), true);
}
if (r.IsNull("HandSetAddDate"))
{
pn.FigureAddTime(r["AddDateSetting"].ToString(), false);
}
else
{
pn.FigureAddTime(r["HandSetAddDate"].ToString(), true);
}
int pgtp = int.Parse(r["OtherNewsType"].ToString());
if (pgtp == 1)
{
pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString());
}
else if (pgtp == 2)
{
pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString());
}
pn.Filter(bool.Parse(r["TextTF"].ToString()),
bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()),
bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()),
bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString()));
if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase"))
pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString()));
if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals(""))
{
NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo();
ninf.Author = pn.Author;
ninf.Source = pn.Source;
ninf.AddDate = pn.AddTime;
ninf.Title = pn.Title;
ninf.SiteID = int.Parse(r["ID"].ToString());
ninf.Links = Url;
ninf.ClassID = r["ClassID"].ToString();
string Content = pn.Content;
if (bSaveRemotePic)
{
RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true);
rs.FetchResource();
Content = rs.Content;
}
ninf.Content = Content;
NewsAdd(ninf);
return 0;
}
else
{
return 1;
}
}
catch (Exception e)
{
return 1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -