📄 index.cs
字号:
using System;
using System.Collections;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Threading;
using DataFactory;
using DataEntity;
//该代码下载自 :http://i.aspx1.com (asp.net学习社区)
namespace WebDataGather
{
public partial class Index : Form
{
#region 变量定义
/// <summary>
/// 数据操作类
/// </summary>
private PageCommon Common = new PageCommon();
/// <summary>
/// 最大允许线程数
/// </summary>
private int ThreadCount = int.Parse(System.Configuration.ConfigurationManager.AppSettings["ThreadCount"].ToString());
/// <summary>
/// 采集进程记录
/// </summary>
private ArrayList alMsgList = new ArrayList();
/// <summary>
/// 线程
/// </summary>
private Thread thread;
/// <summary>
/// 线程计数器
/// </summary>
private int Count = 0;
#endregion
#region 系统初始化
/// <summary>
/// 系统加载
/// </summary>
public Index()
{
InitializeComponent();
BindNode(this.treeView1.Nodes, "0");
this.treeView1.ExpandAll();
CreateManageThreads();
}
/// <summary>
/// 绑定窗体左侧树型控件
/// </summary>
/// <param name="Nds"></param>
/// <param name="pNodeId"></param>
private void BindNode(TreeNodeCollection Nds, string pNodeId)
{
DataSet ds = new DataSet();
ds = Common.GetMessage("MgPId=" + pNodeId);
if (ds != null)
{
if (ds.Tables.Count > 0)
{
if (ds.Tables[0].Rows.Count > 0)
{
foreach (DataRow myRow in ds.Tables[0].Rows)
{
TreeNode tempNode = new TreeNode();
tempNode.Tag = myRow["MgId"].ToString();
tempNode.Text = myRow["MgName"].ToString();
//添加右键
if (pNodeId == "0")
tempNode.ContextMenuStrip = this.MessageParentMenu;
else
tempNode.ContextMenuStrip = this.AddDataSourceConfig;
Nds.Add(tempNode);
BindNodeDataSource(tempNode.Nodes, tempNode.Tag.ToString());
BindNode(tempNode.Nodes, tempNode.Tag.ToString());
}
}
}
}
}
/// <summary>
/// 绑定窗体左侧信息类别对应的数据源
/// </summary>
/// <param name="Nds"></param>
/// <param name="MgID"></param>
private void BindNodeDataSource(TreeNodeCollection Nds,string MgID)
{
DataSet ds = new DataSet();
ds = Common.GetDataSourceIds(int.Parse(MgID));
if (ds != null)
{
if (ds.Tables.Count > 0)
{
if (ds.Tables[0].Rows.Count > 0)
{
foreach (DataRow myRow in ds.Tables[0].Rows)
{
TreeNode tempNode = new TreeNode();
tempNode.Tag = myRow["DataSourceId"].ToString();
tempNode.Text = myRow["DataSourceName"].ToString();
//添加右键
tempNode.ContextMenuStrip = this.DataSourceMenu;
Nds.Add(tempNode);
}
}
}
}
}
/// <summary>
/// 重新绑定树型控件
/// </summary>
private void ReSetTreeView()
{
this.treeView1.Nodes.Clear();
BindNode(this.treeView1.Nodes, "0");
this.treeView1.ExpandAll();
}
#endregion
#region 多线程采集
/// <summary>
/// 创建线程
/// </summary>
private void CreateManageThreads()
{
thread = new Thread(new ThreadStart(ManageThreads));
thread.Start();
}
/// <summary>
/// 循环线程
/// </summary>
private void ManageThreads()
{
while (thread.ThreadState == ThreadState.Running)
{
GetCircleStart();
Count = 0;
Thread.Sleep(5000);
}
}
/// <summary>
/// 根据采集周期判定获取当前需要采集的记录
/// </summary>
/// <param name="obj"></param>
private void GetCircleStart()
{
if (alMsgList.Count == 0)
return;
Thread[] subTread = new Thread[alMsgList.Count];
for (int i = 0; i < alMsgList.Count; i++)
{
//采集进程配置实体
Entity_GatherThread obj = (Entity_GatherThread)alMsgList[i];
if (obj.StateId == "2")
{
//obj.PeriodGather.ToString() != "0"表示定时采集;obj.PeriodGather.ToString() == ""表示循环采集
if (obj.PeriodGather.ToString() == "0")
{
if (DateTime.Now >= obj.NextTime && obj.OneYesOrNo)
{
Count++;
if (Count <= this.ThreadCount)
{
obj.OneFlag = false;
obj.NextTime = DateTime.Now;
subTread[i] = new Thread(new ParameterizedThreadStart(StartCreateGetData));
subTread[i].Start(obj);
}
}
}
else
{
if (DateTime.Now <= obj.EndTime && DateTime.Now >= obj.NextTime)
{
Count++;
if (Count <= this.ThreadCount)
{
obj.NextTime = DateTime.Now.AddMinutes(obj.PeriodGather);
subTread[i] = new Thread(new ParameterizedThreadStart(StartCreateGetData));
subTread[i].Start(obj);
}
}
}
}
}
}
/// <summary>
/// 开始数据采集操作(按每天为周期)
/// </summary>
/// <param name="obj"></param>
private void StartCreateGetData(object obj)
{
Entity_GatherThread entity = (Entity_GatherThread)obj;
//更改Arrylist状态:正在采集
entity.StateId = "1";
entity.StateName = "正在采集";
//启动数据采集函数
GatherOper(entity.DataSourceId,entity.MgId);
//更改当前列表和Arrylist状态
//entity.OneFlag == true表示定时采集的当天未采集,false表示已采集
if (entity.OneFlag == false)
{
entity.StateName = "完成采集";
entity.StateId = "3";
}
else
{
entity.StateName = "等待采集";
entity.StateId = "2";
}
//采集完毕
}
#endregion
#region 根据数据源Id(DataSourceId)执行采集操作
/// <summary>
/// 根据数据源Id(DataSourceId)执行采集操作
/// 详细采集步骤:
//根据DataSourceID,获取Gather_DataSourceConfig实体类
//根据实体类中的网页采集地址Url,获取网页源代码Str
//开始进行处理
//第一步:根据DataSourceID,基本配置ID=1(信息列表配置),获取信息列表的详细配置Dataset,写成通用函数
//第二步:根据信息列表的详细配置Dataset,网页源代码Str,对Str进行解析,返回str
//第三步:根据DataSourceID,基本配置ID=2(单个信息Url配置),获取每个新闻信息的Url地址,并保存至Dataset2
//第四步:循环解析Dataset2,根据单个信息的Url获取该条新闻信息的网页源代码ArticleStr
//根据获取的网页源代码ArticleStr,获取文章标题
//根据获取的网页源代码ArticleStr,获取文章说明
//根据获取的网页源代码ArticleStr,获取文章正文内容。
//将文章标题、说明、内容存储至数据库中
/// </summary>
/// <param name="DataSourceId"></param>
private void GatherOper(int DataSourceId,int MgID)
{
Gather_DataSourceConfig GatherDataSource = new Gather_DataSourceConfig();
GatherDataSource = Common.GetDataSourceConfig(DataSourceId);
// 信息列表源代码
string strTemp = Common.getUrlSource(GatherDataSource.DataSourceUrl, GatherDataSource.DataSourceEncoding);
//获得信息列表的详细配置
DataSet ds = new DataSet();
ds = Common.GetParticularConfig(DataSourceId, 1);
strTemp = Common.GetOperatedString(strTemp, ds);
//获得单个Url信息的详细配置
ds = Common.GetParticularConfig(DataSourceId, 2);
//获得所有要采集的信息Url列表
DataTable dt = new DataTable();
dt = Common.GetAllSingleUrl(strTemp, ds);
#region 获得每个Url的文章标题、说明、正文内容,并插入数据库
Gather_Article G_A = new Gather_Article();
for (int i = 0; i < dt.Rows.Count; i++)
{
strTemp = Common.getUrlSource(dt.Rows[i][0].ToString(), "gb2312");
//获取新闻内容配置
ds = Common.GetParticularConfig(DataSourceId, 3);
strTemp = Common.GetOperatedString(strTemp, ds);
string strContent = strTemp;
// 获取新闻标题配置
ds = Common.GetParticularConfig(DataSourceId, 4);
strTemp = Common.GetOperatedString(strContent, ds);
G_A.Title = strTemp;
// 获取新闻说明配置
ds = Common.GetParticularConfig(DataSourceId, 5);
strTemp = Common.GetOperatedString(strContent, ds);
G_A.Resource = strTemp;
// 获取新闻正文内容配置
ds = Common.GetParticularConfig(DataSourceId, 6);
strTemp = Common.GetOperatedString(strContent, ds);
G_A.Content = strTemp;
if (!Common.CheckArticleExists(G_A.Title))
{
G_A.MgId = MgID;
Common.AddArticle(G_A);
}
}
#endregion
}
#endregion
#region "启动采集"按钮事件
/// <summary>
/// 选中某个数据源后,点击“启动采集”按钮触发
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void StartGatherButton_Click(object sender, EventArgs e)
{
if (this.treeView1.SelectedNode.ContextMenuStrip.Name == "DataSourceMenu")
{
if (!this.CheckDataSourceIDInMsgList(int.Parse(this.treeView1.SelectedNode.Tag.ToString())))
{
DataEntity.Gather_DataSourceConfig DataSourceConfig = new Gather_DataSourceConfig();
DataSourceConfig = Common.GetDataSourceConfig(int.Parse(this.treeView1.SelectedNode.Tag.ToString()));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -