⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collect.cs

📁 最好用的站点内容管理系统 全部源代码都有
💻 CS
📖 第 1 页 / 共 2 页
字号:
//======================================================
//==     (c)2008 aspxcms inc by NeTCMS v1.0              ==
//==          Forum:bbs.aspxcms.com                   ==
//==         Website:www.aspxcms.com                  ==
//======================================================
using System;
using System.IO;
using System.Data;
using System.Net;
using System.Text;
using NetCMS.Model;
using NetCMS.Control;

namespace NetCMS.Content.Collect
{
    /// <summary>
    /// 采集类
    /// </summary>
    public class Collect
    {
        private NetCMS.DALFactory.ICollect dal;
        private string ErrorMsg = "";
        private bool _ShowProGressBar;
        /// <summary>
        /// 构造函数
        /// </summary>
        public Collect()
        {
            _ShowProGressBar = true;
            dal = NetCMS.DALFactory.DataAccess.CreateCollect();
        }
        #region 采集入库
        /// <summary>
        /// 是否保存远程图片
        /// </summary>
        private bool bSaveRemotePic = false;
        private string PicSavePath = "";
        private string PicSaveUrl = "";
        /// <summary>
        /// 是否在采集时显示进度条,默认为true
        /// </summary>
        public bool ShowProGressBar
        {
            set { _ShowProGressBar = value; }
            get { return _ShowProGressBar; }
        }
        /// <summary>
        /// 开始采集
        /// </summary>
        /// <param name="folderid">目录名称</param>
        /// <param name="num">采集数量</param>
        public void Collecting(int folderid, int num, bool bnorepeat)
        {
            if (ShowProGressBar) HProgressBar.Start("正在读取列表数据");
            DataTable tb = GetSite(folderid);
            #region 检查数据是否完整
            if (tb == null || tb.Rows.Count < 1)
            {
                if (ShowProGressBar) HProgressBar.Roll("没有找到该站点的相关记录!", 0);
                return;
            }
            DataRow r = tb.Rows[0];
            if (r.IsNull("LinkSetting") || r.IsNull("PageTitleSetting") || r.IsNull("PagebodySetting"))
            {
                if (ShowProGressBar) HProgressBar.Roll("相关的参数没有设置,无法取得新闻列表!", 0);
                return;
            }
            if (bool.Parse(r["SaveRemotePic"].ToString()))
            {
                #region 远程图片
                string rtpath = NetCMS.Config.UIConfig.dirFile;
                if (rtpath == null || rtpath.Trim().Equals(""))
                {
                    if (ShowProGressBar) HProgressBar.Roll("没有找到管理员附件目录!", 0);
                    return;
                }
                string dtpath = DateTime.Now.ToString("yyyyMMdd");
                PicSavePath = NetCMS.Common.ServerInfo.GetRootPath().TrimEnd('\\') + @"\" + rtpath + @"\RemoteFiles\" + dtpath;
                if (!Directory.Exists(PicSavePath))
                    Directory.CreateDirectory(PicSavePath);
                PicSaveUrl = NetCMS.Publish.CommonData.getUrl() + "/" + rtpath + "/RemoteFiles/" + dtpath;
                bSaveRemotePic = true;
                #endregion
            }
            #endregion 检查数据是否完整
            if (ShowProGressBar) HProgressBar.Roll("正在获取新闻列表页", 0);

            string sListUrl = r["objURL"].ToString();
            string sEncode = r["Encode"].ToString();
            bool bReverse = bool.Parse(r["IsReverse"].ToString());
            string listset = @"<body[^>]*>(?<list>[\s\S]+?)</body>";
            if (!r.IsNull("ListSetting"))
                listset = r["ListSetting"].ToString();
            PageList PL = new PageList(r["objURL"].ToString(), r["Encode"].ToString());
            PL.RuleOfList = listset;
            PL.RuleOfLink = r["LinkSetting"].ToString();
            string[] NewsUrl = GetNewsList(PL);
            if (NewsUrl == null)
            {
                if (ShowProGressBar) HProgressBar.Roll("没有找到相关新闻链接地址!", 0);
                return;
            }
            int len = NewsUrl.Length;
            if (len < num)
            {
                int pagetype = int.Parse(r["OtherType"].ToString());
                string[] otherurl = null;
                switch (pagetype)
                {
                    case 0:
                        break;
                    case 1://递归
                        otherurl = PL.Pagination(r["OtherPageSetting"].ToString(), num - len);
                        break;
                    case 2://其他页
                        otherurl = PL.SinglePagination(r["OtherPageSetting"].ToString(), num - len);
                        break;
                    case 3://索引页
                        otherurl = PL.IndexPagination(r["OtherPageSetting"].ToString(), int.Parse(r["StartPageNum"].ToString()), int.Parse(r["EndPageNum"].ToString()), num - len);
                        break;
                    default:
                        break;
                }
                if (otherurl != null && otherurl.Length > 0)
                {
                    Array.Resize(ref NewsUrl, len + otherurl.Length);
                    otherurl.CopyTo(NewsUrl, len);
                }
            }
            if (NewsUrl.Length < 1)
            {
                if (ShowProGressBar) HProgressBar.Roll("从列表内容中没有找到任何新闻的相关链接!", 0);
                return;
            }
            if (bReverse)
                Array.Reverse(NewsUrl);
            if (ShowProGressBar) HProgressBar.Roll("开始采集新闻", 0);
            int nSucceed = 0, nFailed = 0, nRepeat = 0;
            for (int i = 0; i < NewsUrl.Length; i++)
            {
                if (i >= num)
                    break;
                try
                {
                    int flag = CollectPage(NewsUrl[i], r, bnorepeat);
                    if (flag != 1)
                    {
                        nSucceed++;
                        if (flag == -1)
                            nRepeat++;
                    }
                    else
                        nFailed++;
                }
                catch
                {
                    nFailed++;
                }
                string prompt = "正在采集新闻,终止<a href=\"Collect_List.aspx\">返回</a>.成功:" + nSucceed * 100 / num + "% ";
                if (nRepeat > 0)
                    prompt += "(其中重复:" + nRepeat * 100 / num + "%) ";
                prompt += "失败:" + nFailed * 100 / num + "%";
                if (ShowProGressBar) HProgressBar.Roll(prompt, (i + 1) * 100 / num);
            }
        }
        /// <summary>
        /// 处理采集单条新闻
        /// </summary>
        /// <param name="Url"></param>
        /// <param name="r"></param>
        /// <param name="norepeat"></param>
        /// <returns>0为成功,-1为重复,1,为失败</returns>
        private int CollectPage(string Url, DataRow r, bool norepeat)
        {
            try
            {
                if (Url == null || Url.Trim().Equals(""))
                    return 1;
                PageNews pn = new PageNews(Url, r["Encode"].ToString());
                if (!pn.Fetch())
                    return 1;
                pn.RuleOfTitle = r["PageTitleSetting"].ToString();
                pn.RuleOfContent = r["PagebodySetting"].ToString();
                pn.FigureTitle();
                if (norepeat)
                {
                    if (pn.Title == null)
                        return 1;
                    if (dal.TitleExist(pn.Title))
                        return -1;
                }
                pn.FigureContent();
                if (r.IsNull("HandSetAuthor"))
                {
                    pn.FigureAuthor(r["AuthorSetting"].ToString(), false);
                }
                else
                {
                    pn.FigureAuthor(r["HandSetAuthor"].ToString(), true);
                }
                if (r.IsNull("HandSetSource"))
                {
                    pn.FigureSource(r["SourceSetting"].ToString(), false);
                }
                else
                {
                    pn.FigureSource(r["HandSetSource"].ToString(), true);
                }
                if (r.IsNull("HandSetAddDate"))
                {
                    pn.FigureAddTime(r["AddDateSetting"].ToString(), false);
                }
                else
                {
                    pn.FigureAddTime(r["HandSetAddDate"].ToString(), true);
                }
                int pgtp = int.Parse(r["OtherNewsType"].ToString());
                if (pgtp == 1)
                {
                    pn.Content += pn.GetOtherPagination(r["OtherNewsPageSetting"].ToString());
                }
                else if (pgtp == 2)
                {
                    pn.Content += pn.GetIndexPagination(r["OtherNewsPageSetting"].ToString());
                }
                pn.Filter(bool.Parse(r["TextTF"].ToString()),
                    bool.Parse(r["IsStyle"].ToString()), bool.Parse(r["IsDIV"].ToString()), bool.Parse(r["IsA"].ToString()),
                    bool.Parse(r["IsClass"].ToString()), bool.Parse(r["IsFont"].ToString()), bool.Parse(r["IsSpan"].ToString()),
                    bool.Parse(r["IsObject"].ToString()), bool.Parse(r["IsIFrame"].ToString()), bool.Parse(r["IsScript"].ToString()));
                if (!r.IsNull("OldContent") && !r.IsNull("ReContent") && !r.IsNull("IgnoreCase"))
                    pn.Replace(r["OldContent"].ToString(), r["ReContent"].ToString(), bool.Parse(r["IgnoreCase"].ToString()));
                if (pn.Content != null && !pn.Content.Trim().Equals("") && !pn.Title.Trim().Equals(""))
                {
                    NetCMS.Model.CollectNewsInfo ninf = new NetCMS.Model.CollectNewsInfo();
                    ninf.Author = pn.Author;
                    ninf.Source = pn.Source;
                    ninf.AddDate = pn.AddTime;
                    ninf.Title = pn.Title;
                    ninf.SiteID = int.Parse(r["ID"].ToString());
                    ninf.Links = Url;
                    ninf.ClassID = r["ClassID"].ToString();
                    string Content = pn.Content;
                    if (bSaveRemotePic)
                    {
                        RemoteResource rs = new RemoteResource(Content, PicSaveUrl, PicSavePath, Url, true);
                        rs.FetchResource();
                        Content = rs.Content;
                    }
                    ninf.Content = Content;
                    NewsAdd(ninf);
                    return 0;
                }
                else
                {
                    return 1;
                }
            }
            catch (Exception e)
            {
                return 1;
            }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -