⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 news.cs

📁 智能新闻自动采集系统.能够自动下载各类新闻网站的内容,比较使用
💻 CS
字号:
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.Xml;
using System.Net;
using System.Text.RegularExpressions;

namespace 个性系列_智能新闻自动采集系统
{
  public   class News
    {

        const string SQL1 = "insert into News (NewsID,NewsTitle,NewsAuthor,NewsPubDate,NewsDescription,NewsCategory,NewsBody,NewsUrl) values('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}')";
        const string SQL2 = "select count(*) as Count from News where NewsUrl='{0}'";
        const string SQL3 = "select *  from News where NewsUrl='{0}'";
        public static List<RssModel> GetRssList()
        {
            List<RssModel> rssList = new List<RssModel>();
            XmlDocument XML = new XmlDocument();
            XML.Load("RssNews.xml");
            XmlNodeList NodeList = XML.SelectNodes("RssNews/NewsSite/Channel[IsGetNews='True']");
            foreach (XmlNode Node in NodeList)
            {
                RssModel rss = new RssModel();
                rss.NewsName = Node["NewsName"].InnerText;
                rss.NewsUrl = Node["NewsUrl"].InnerText;
                rss.StartTag = Node["StartTag"].InnerText;
                rss.EndTag = Node["EndTag"].InnerText;
                rss.ImgStartTag = Node["ImgStartTag"].InnerText;
                rss.ImgEndTag = Node["ImgEndTag"].InnerText;
                rss.IsGetNews = Node["IsGetNews"].InnerText;
                rss.NewsDescription = Node["NewsDescription"].InnerText;
                rss.MaxNewscount = Node["MaxNewscount"].InnerText;
                rss.NewsID = Node["NewsID"].InnerText;
                XmlNode ConfigNode = XML.SelectSingleNode("RssNews/NewsSite/Channel[NewsUrl='" + rss.NewsUrl + "']/Config");
                if (ConfigNode != null)
                {
                    rss.NewsConfig = new NewsConfig();
                    rss.NewsConfig.IsGetA = ConfigNode["IsGetA"].InnerText;
                    rss.NewsConfig.IsGetDIV = ConfigNode["IsGetDIV"].InnerText;
                    rss.NewsConfig.IsGetIMG = ConfigNode["IsGetIMG"].InnerText;
                    rss.NewsConfig.IsGetSPAN = ConfigNode["IsGetSPAN"].InnerText;
                    rss.NewsConfig.IsGetTABLE = ConfigNode["IsGetTABLE"].InnerText;
                }
                rssList.Add(rss);
            }
            return rssList;
        }
        public static List<NewsModel> GetNewsList(RssModel rss)
        {
            List<NewsModel> newsList = new List<NewsModel>();
            XmlDocument doc = new XmlDocument();
            try
            {
                doc.Load(rss.NewsUrl);
                XmlNodeList NodeList = doc.SelectNodes("//item");
                foreach (XmlNode node in NodeList)
                {
                    NewsModel news = new  NewsModel();
                    #region 获取新闻属性
                    if (node["author"] != null)
                    {
                        news.NewsAuthor = node["author"].InnerText;
                    }
                    if (node["category"] != null)
                    {
                        news.NewsCategory = node["category"].InnerText;
                    }
                    if (node["title"] != null)
                    {
                        news.NewsTitle = node["title"].InnerText;
                    }
                    if (node["description"] != null)
                    {
                        news.NewsDescription = node["description"].InnerText;
                    }
                    else
                    {
                        news.NewsDescription = news.NewsTitle;
                    }
                    if (node["link"] != null)
                    {
                        news.NewsUrl = node["link"].InnerText;
                    }
                    if (node["pubDate"] != null)
                    {
                        news.NewsPubDate = node["pubDate"].InnerText;
                    }
                    #endregion
                    news.Rss = rss;
                    newsList.Add(news);
                }
                return newsList;

            }
            catch (Exception e)
            {
                return newsList;
            }
        }
        static string RemoveHtmlTag(String inputString, string Tag)
        {
            ArrayList TagList = new ArrayList();
            int Top = -1;
            Match m;
            Regex r;
            if (Tag.ToUpper() == "IMG")
            {
                inputString =Regex.Replace(inputString,"\\<" + Tag + @"[\s\S]*?/>","",RegexOptions.IgnoreCase);
            }
            else
            {
                r = new Regex("\\</?" + Tag, RegexOptions.IgnoreCase | RegexOptions.Compiled);

                for (m = r.Match(inputString); m.Success; )
                {
                    string TagValue = m.Value;
                    int TagIndex = m.Index;
                    if (TagValue.ToUpper() == @"<" + Tag.ToUpper())
                    {
                        Top = TagList.Add(TagIndex);
                        m = m.NextMatch();
                    }
                    else if (TagValue.ToUpper() == @"</" + Tag.ToUpper())
                    {
                        if (TagList.Count > 0)
                        {
                            int DIndex = (int)TagList[Top];
                            inputString = inputString.Remove(DIndex, TagIndex - DIndex + Tag.Length + 3);
                            m = r.Match(inputString);
                        }
                        else
                        {
                           m= m.NextMatch(); 
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            return inputString;
        }
        public static NewsModel GetNewsByNewsLink(NewsModel news)
        {
            object o = new object();
            lock (o)
            {
                string NewsString = "";
                string MyStartTag = news.Rss.StartTag;
                string MyEndTag = news.Rss.EndTag;
                string NewsImg = "";
                string NewsImg1 = "";
                try
                {
                    WebClient MyWebClient = new WebClient();                   
                    byte[] NewsStringByte = MyWebClient.DownloadData(news.NewsUrl);
                    NewsString = Encoding.Default.GetString(NewsStringByte);
                    Regex r;
                    Match m;
                    if (news.Rss.StartTag != "" && news.Rss.EndTag != "")
                    {
                        r = new Regex(news.Rss.StartTag + @"[\s\S]*?" + news.Rss.EndTag, RegexOptions.IgnoreCase);
                        m = r.Match(NewsString);
                        NewsString = m.Value;
                    }
                    if (news.Rss.ImgStartTag != "" && news.Rss.ImgEndTag != "")
                    {
                        r = new Regex(news.Rss.ImgStartTag + "[\\s\\S]*?" + news.Rss.ImgEndTag, RegexOptions.IgnoreCase);
                        m = r.Match(NewsString);
                        if (m.Success)
                        {
                            NewsImg = m.Value;
                            r = new Regex("\\<IMG"+@"[\s\S]*?>", RegexOptions.IgnoreCase);
                            //m = r.Match(NewsImg);
                            for (m = r.Match(NewsImg); m.Success; m = m.NextMatch())
                            {
                                NewsImg1 += @"<CENTER>" + m.Value + @"</CENTER>";
                            }
                            NewsImg = NewsImg1; 
                        }
                    }
                    NewsString = GetNews(NewsString,news);
                    NewsString = NewsImg + NewsString;
                    news.NewsBody = NewsString;
                    return news;
                }
                catch
                {
                    return news;
                }
            }

        }
        static string GetNews(string NewsString, NewsModel news)
        {
            string All = "";
            try
            {
                Regex r;
                Match m;
                r = new Regex("\\<P[^a-z]+(.|\n)*?\\</P\\>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
                for (m = r.Match(NewsString); m.Success; m = m.NextMatch())
                {                     
                    string TempString = m.Value;
                    if (news.Rss.NewsConfig != null)
                    {
                        if (news.Rss.NewsConfig.IsGetTABLE == "FALSE")
                        {
                            TempString = RemoveHtmlTag(TempString, "TABLE");
                            TempString = RemoveHtmlTag(TempString, "iframe");
                            TempString = RemoveHtmlTag(TempString, "TR");
                            TempString = RemoveHtmlTag(TempString, "TD"); 
                        }
                        if (news.Rss.NewsConfig.IsGetDIV == "FALSE")
                        {
                            TempString = RemoveHtmlTag(TempString, "DIV");
                        }
                        if (news.Rss.NewsConfig.IsGetIMG == "FALSE")
                        {
                            TempString = RemoveHtmlTag(TempString, "IMG");
                        }
                        if (news.Rss.NewsConfig.IsGetA == "FALSE")
                        {
                            TempString = RemoveHtmlTag(TempString, "A");
                        }

                        if (news.Rss.NewsConfig.IsGetSPAN == "FALSE")
                        {
                            TempString = RemoveHtmlTag(TempString, "SPAN");
                        }
                        TempString = RemoveHtmlTag(TempString, "SCRIPT");
                        All += TempString;                        
                    }
                }
                return All;
            }
            catch
            {
                return All;
            }
        }
        public static void AddToDatabase(NewsModel news)
        {
           
            string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
            SqlConnection conn = new SqlConnection(connstr);
            //string sql = string.Format(SQL1, news.Rss.NewsID, news.NewsTitle,news.NewsAuthor,news.NewsPubDate,news.NewsDescription,news.NewsCategory, news.NewsBody,news.NewsUrl);
            SqlCommand cmd = new SqlCommand("InsertNews", conn);
            cmd.CommandType = CommandType.StoredProcedure;
            cmd.Parameters.AddWithValue("@NewsID", news.Rss.NewsID);
            cmd.Parameters.AddWithValue("@NewsTitle", news.NewsTitle);
            cmd.Parameters.AddWithValue("@NewsAuthor", news.NewsAuthor);
            cmd.Parameters.AddWithValue("@NewsPubDate", news.NewsPubDate);
            cmd.Parameters.AddWithValue("@NewsDescription", news.NewsDescription);
            cmd.Parameters.AddWithValue("@NewsBody", news.NewsBody);
            cmd.Parameters.AddWithValue("@NewsUrl", news.NewsUrl);
            cmd.Parameters.AddWithValue("@NewsCategory", news.NewsCategory);
            cmd.Parameters.AddWithValue("@NewsSiteName", news.Rss.NewsName);
            Regex r;
            Match m;
            r = new Regex("\\<CENTER\\>"+"\\<IMG" + @"[\s\S]*?>", RegexOptions.IgnoreCase);
             m = r.Match( news.NewsBody);
             string NewsImage="NO";
             if (m.Success)
             {
                 r = new Regex("(?<=SRC\\s*=)\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))",RegexOptions.IgnoreCase | RegexOptions.Compiled);
                 m = r.Match(news.NewsBody);
                 if (m.Success)
                 {
                     NewsImage = m.Value;
                 }
                 NewsImage = NewsImage.Replace("\"", "");
             }
            cmd.Parameters.AddWithValue("@NewsImage", NewsImage);
            conn.Open();
            cmd.ExecuteNonQuery();
            conn.Close();
        }
      public static bool ExistNews(NewsModel news)
      {
          string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
          SqlConnection conn = new SqlConnection(connstr);
          string sql = string.Format(SQL2, news.NewsUrl);
          SqlCommand cmd = new SqlCommand(sql, conn);
          conn.Open();
          SqlDataReader dr;
          dr = cmd.ExecuteReader();
          int NewsCount=0;
          if (dr.Read())
          {
              NewsCount = int.Parse(dr["Count"].ToString());
          }
          conn.Close();
          if (NewsCount > 0)
          {
              return true;
          }
          return false;
          
      }
      public static NewsModel NewsBrowser(NewsModel news)
      {
          NewsModel news1=null;
          try
          {
              string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
              SqlConnection conn = new SqlConnection(connstr);
              string sql = string.Format(SQL3, news.NewsUrl);
              SqlCommand cmd = new SqlCommand(sql, conn);
              conn.Open();
              SqlDataReader dr;
              dr = cmd.ExecuteReader();
              if (dr.Read())
              {
                  news1 = new NewsModel();
                  news1.NewsTitle = dr["NewsTitle"].ToString();
                  news1.NewsPubDate = dr["NewsPubDate"].ToString();
                  news1.NewsBody = dr["NewsBody"].ToString();
                  news1.NewsAuthor = dr["NewsAuthor"].ToString();
                  news1.NewsAuthor = dr["NewsCategory"].ToString();
                  news1.NewsUrl = dr["NewsUrl"].ToString();

                   
              }
              conn.Close();
              return news1;
          }
          catch
          {
              return news1; 
          }
         

      }
      
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -