📄 news.cs
字号:
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.Xml;
using System.Net;
using System.Text.RegularExpressions;
namespace 个性系列_智能新闻自动采集系统
{
public class News
{
const string SQL1 = "insert into News (NewsID,NewsTitle,NewsAuthor,NewsPubDate,NewsDescription,NewsCategory,NewsBody,NewsUrl) values('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}')";
const string SQL2 = "select count(*) as Count from News where NewsUrl='{0}'";
const string SQL3 = "select * from News where NewsUrl='{0}'";
public static List<RssModel> GetRssList()
{
List<RssModel> rssList = new List<RssModel>();
XmlDocument XML = new XmlDocument();
XML.Load("RssNews.xml");
XmlNodeList NodeList = XML.SelectNodes("RssNews/NewsSite/Channel[IsGetNews='True']");
foreach (XmlNode Node in NodeList)
{
RssModel rss = new RssModel();
rss.NewsName = Node["NewsName"].InnerText;
rss.NewsUrl = Node["NewsUrl"].InnerText;
rss.StartTag = Node["StartTag"].InnerText;
rss.EndTag = Node["EndTag"].InnerText;
rss.ImgStartTag = Node["ImgStartTag"].InnerText;
rss.ImgEndTag = Node["ImgEndTag"].InnerText;
rss.IsGetNews = Node["IsGetNews"].InnerText;
rss.NewsDescription = Node["NewsDescription"].InnerText;
rss.MaxNewscount = Node["MaxNewscount"].InnerText;
rss.NewsID = Node["NewsID"].InnerText;
XmlNode ConfigNode = XML.SelectSingleNode("RssNews/NewsSite/Channel[NewsUrl='" + rss.NewsUrl + "']/Config");
if (ConfigNode != null)
{
rss.NewsConfig = new NewsConfig();
rss.NewsConfig.IsGetA = ConfigNode["IsGetA"].InnerText;
rss.NewsConfig.IsGetDIV = ConfigNode["IsGetDIV"].InnerText;
rss.NewsConfig.IsGetIMG = ConfigNode["IsGetIMG"].InnerText;
rss.NewsConfig.IsGetSPAN = ConfigNode["IsGetSPAN"].InnerText;
rss.NewsConfig.IsGetTABLE = ConfigNode["IsGetTABLE"].InnerText;
}
rssList.Add(rss);
}
return rssList;
}
public static List<NewsModel> GetNewsList(RssModel rss)
{
List<NewsModel> newsList = new List<NewsModel>();
XmlDocument doc = new XmlDocument();
try
{
doc.Load(rss.NewsUrl);
XmlNodeList NodeList = doc.SelectNodes("//item");
foreach (XmlNode node in NodeList)
{
NewsModel news = new NewsModel();
#region 获取新闻属性
if (node["author"] != null)
{
news.NewsAuthor = node["author"].InnerText;
}
if (node["category"] != null)
{
news.NewsCategory = node["category"].InnerText;
}
if (node["title"] != null)
{
news.NewsTitle = node["title"].InnerText;
}
if (node["description"] != null)
{
news.NewsDescription = node["description"].InnerText;
}
else
{
news.NewsDescription = news.NewsTitle;
}
if (node["link"] != null)
{
news.NewsUrl = node["link"].InnerText;
}
if (node["pubDate"] != null)
{
news.NewsPubDate = node["pubDate"].InnerText;
}
#endregion
news.Rss = rss;
newsList.Add(news);
}
return newsList;
}
catch (Exception e)
{
return newsList;
}
}
static string RemoveHtmlTag(String inputString, string Tag)
{
ArrayList TagList = new ArrayList();
int Top = -1;
Match m;
Regex r;
if (Tag.ToUpper() == "IMG")
{
inputString =Regex.Replace(inputString,"\\<" + Tag + @"[\s\S]*?/>","",RegexOptions.IgnoreCase);
}
else
{
r = new Regex("\\</?" + Tag, RegexOptions.IgnoreCase | RegexOptions.Compiled);
for (m = r.Match(inputString); m.Success; )
{
string TagValue = m.Value;
int TagIndex = m.Index;
if (TagValue.ToUpper() == @"<" + Tag.ToUpper())
{
Top = TagList.Add(TagIndex);
m = m.NextMatch();
}
else if (TagValue.ToUpper() == @"</" + Tag.ToUpper())
{
if (TagList.Count > 0)
{
int DIndex = (int)TagList[Top];
inputString = inputString.Remove(DIndex, TagIndex - DIndex + Tag.Length + 3);
m = r.Match(inputString);
}
else
{
m= m.NextMatch();
}
}
else
{
break;
}
}
}
return inputString;
}
public static NewsModel GetNewsByNewsLink(NewsModel news)
{
object o = new object();
lock (o)
{
string NewsString = "";
string MyStartTag = news.Rss.StartTag;
string MyEndTag = news.Rss.EndTag;
string NewsImg = "";
string NewsImg1 = "";
try
{
WebClient MyWebClient = new WebClient();
byte[] NewsStringByte = MyWebClient.DownloadData(news.NewsUrl);
NewsString = Encoding.Default.GetString(NewsStringByte);
Regex r;
Match m;
if (news.Rss.StartTag != "" && news.Rss.EndTag != "")
{
r = new Regex(news.Rss.StartTag + @"[\s\S]*?" + news.Rss.EndTag, RegexOptions.IgnoreCase);
m = r.Match(NewsString);
NewsString = m.Value;
}
if (news.Rss.ImgStartTag != "" && news.Rss.ImgEndTag != "")
{
r = new Regex(news.Rss.ImgStartTag + "[\\s\\S]*?" + news.Rss.ImgEndTag, RegexOptions.IgnoreCase);
m = r.Match(NewsString);
if (m.Success)
{
NewsImg = m.Value;
r = new Regex("\\<IMG"+@"[\s\S]*?>", RegexOptions.IgnoreCase);
//m = r.Match(NewsImg);
for (m = r.Match(NewsImg); m.Success; m = m.NextMatch())
{
NewsImg1 += @"<CENTER>" + m.Value + @"</CENTER>";
}
NewsImg = NewsImg1;
}
}
NewsString = GetNews(NewsString,news);
NewsString = NewsImg + NewsString;
news.NewsBody = NewsString;
return news;
}
catch
{
return news;
}
}
}
static string GetNews(string NewsString, NewsModel news)
{
string All = "";
try
{
Regex r;
Match m;
r = new Regex("\\<P[^a-z]+(.|\n)*?\\</P\\>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
for (m = r.Match(NewsString); m.Success; m = m.NextMatch())
{
string TempString = m.Value;
if (news.Rss.NewsConfig != null)
{
if (news.Rss.NewsConfig.IsGetTABLE == "FALSE")
{
TempString = RemoveHtmlTag(TempString, "TABLE");
TempString = RemoveHtmlTag(TempString, "iframe");
TempString = RemoveHtmlTag(TempString, "TR");
TempString = RemoveHtmlTag(TempString, "TD");
}
if (news.Rss.NewsConfig.IsGetDIV == "FALSE")
{
TempString = RemoveHtmlTag(TempString, "DIV");
}
if (news.Rss.NewsConfig.IsGetIMG == "FALSE")
{
TempString = RemoveHtmlTag(TempString, "IMG");
}
if (news.Rss.NewsConfig.IsGetA == "FALSE")
{
TempString = RemoveHtmlTag(TempString, "A");
}
if (news.Rss.NewsConfig.IsGetSPAN == "FALSE")
{
TempString = RemoveHtmlTag(TempString, "SPAN");
}
TempString = RemoveHtmlTag(TempString, "SCRIPT");
All += TempString;
}
}
return All;
}
catch
{
return All;
}
}
public static void AddToDatabase(NewsModel news)
{
string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
SqlConnection conn = new SqlConnection(connstr);
//string sql = string.Format(SQL1, news.Rss.NewsID, news.NewsTitle,news.NewsAuthor,news.NewsPubDate,news.NewsDescription,news.NewsCategory, news.NewsBody,news.NewsUrl);
SqlCommand cmd = new SqlCommand("InsertNews", conn);
cmd.CommandType = CommandType.StoredProcedure;
cmd.Parameters.AddWithValue("@NewsID", news.Rss.NewsID);
cmd.Parameters.AddWithValue("@NewsTitle", news.NewsTitle);
cmd.Parameters.AddWithValue("@NewsAuthor", news.NewsAuthor);
cmd.Parameters.AddWithValue("@NewsPubDate", news.NewsPubDate);
cmd.Parameters.AddWithValue("@NewsDescription", news.NewsDescription);
cmd.Parameters.AddWithValue("@NewsBody", news.NewsBody);
cmd.Parameters.AddWithValue("@NewsUrl", news.NewsUrl);
cmd.Parameters.AddWithValue("@NewsCategory", news.NewsCategory);
cmd.Parameters.AddWithValue("@NewsSiteName", news.Rss.NewsName);
Regex r;
Match m;
r = new Regex("\\<CENTER\\>"+"\\<IMG" + @"[\s\S]*?>", RegexOptions.IgnoreCase);
m = r.Match( news.NewsBody);
string NewsImage="NO";
if (m.Success)
{
r = new Regex("(?<=SRC\\s*=)\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))",RegexOptions.IgnoreCase | RegexOptions.Compiled);
m = r.Match(news.NewsBody);
if (m.Success)
{
NewsImage = m.Value;
}
NewsImage = NewsImage.Replace("\"", "");
}
cmd.Parameters.AddWithValue("@NewsImage", NewsImage);
conn.Open();
cmd.ExecuteNonQuery();
conn.Close();
}
public static bool ExistNews(NewsModel news)
{
string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
SqlConnection conn = new SqlConnection(connstr);
string sql = string.Format(SQL2, news.NewsUrl);
SqlCommand cmd = new SqlCommand(sql, conn);
conn.Open();
SqlDataReader dr;
dr = cmd.ExecuteReader();
int NewsCount=0;
if (dr.Read())
{
NewsCount = int.Parse(dr["Count"].ToString());
}
conn.Close();
if (NewsCount > 0)
{
return true;
}
return false;
}
public static NewsModel NewsBrowser(NewsModel news)
{
NewsModel news1=null;
try
{
string connstr = System.Configuration.ConfigurationSettings.AppSettings["sqlconnstr"];
SqlConnection conn = new SqlConnection(connstr);
string sql = string.Format(SQL3, news.NewsUrl);
SqlCommand cmd = new SqlCommand(sql, conn);
conn.Open();
SqlDataReader dr;
dr = cmd.ExecuteReader();
if (dr.Read())
{
news1 = new NewsModel();
news1.NewsTitle = dr["NewsTitle"].ToString();
news1.NewsPubDate = dr["NewsPubDate"].ToString();
news1.NewsBody = dr["NewsBody"].ToString();
news1.NewsAuthor = dr["NewsAuthor"].ToString();
news1.NewsAuthor = dr["NewsCategory"].ToString();
news1.NewsUrl = dr["NewsUrl"].ToString();
}
conn.Close();
return news1;
}
catch
{
return news1;
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -