⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mainform.cs

📁 c#的多线程采集源代码
💻 CS
字号:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

using DataFactory;
using DataEntity;
//该代码下载自 :http://i.aspx1.com (asp.net学习社区)
namespace WebDataGather
{
    public partial class MainForm : Form
    {
        #region 变量定义
        private PageCommon Common = new PageCommon();
        #endregion
        public MainForm()
        {
            InitializeComponent();

        }

        private void button1_Click(object sender, EventArgs e)
        {
            //int DataSourceID = 1;
            //根据DataSourceID,获取Gather_DataSourceConfig实体类

            //根据实体类中的网页采集地址Url,获取网页源代码Str

            //开始进行处理

            //第一步:根据DataSourceID,基本配置ID=1(信息列表配置),获取信息列表的详细配置Dataset,写成通用函数

            //第二步:根据信息列表的详细配置Dataset,网页源代码Str,对Str进行解析,返回str

            //第三步:根据DataSourceID,基本配置ID=2(单个信息Url配置),获取每个新闻信息的Url地址,并保存至Dataset2

            //第四步:循环解析Dataset2,根据单个信息的Url获取该条新闻信息的网页源代码ArticleStr

                      //根据获取的网页源代码ArticleStr,获取文章标题

                     //根据获取的网页源代码ArticleStr,获取文章说明

                     //根据获取的网页源代码ArticleStr,获取文章正文内容。

                     //将文章标题、说明、内容存储至数据库中
        }

        /// <summary>
        /// 根据各个操作类型处理字符串
        /// </summary>
        /// <param name="str">要处理的字符串</param>
        /// <param name="startstr">开始字符串</param>
        /// <param name="endstr">结束字符串</param>
        /// <param name="Newstr">替换/前缀/后缀的字符串</param>
        /// <param name="operate">操作类型</param>
        /// <param name="inHead">包含头1,不包含头0</param>
        /// <param name="inTail">包含尾1,不包含尾0</param>
        /// <returns>返回处理过后的字符串</returns>
        private string OperateStr(string str,string startstr,string endstr,string Newstr,int operate,int inHead,int inTail)
        {
            int startIndex = str.IndexOf(startstr);
            int endIndex = str.IndexOf(endstr);
            switch (operate)
            { 
                case 1://截取
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;

                    if (inHead == 1 && inTail == 1)//表示包含头尾
                    {
                        return str.Substring(startIndex, endIndex + endstr.Length);
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Substring(startIndex, endIndex);
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Substring(startIndex + startstr.Length, endIndex + endstr.Length);
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Substring(startIndex + startstr.Length, endIndex);
                    }
                    break;
                case 2://删除段

                    if (endIndex <= startIndex || startIndex == -1)
                        return str;

                    if (inHead == 1 && inTail == 1)//表示包含头尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex + endstr.Length),"");
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex),"");
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex + endstr.Length),"");
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex),"");
                    }
                    break;
                case 3://删除字符串
                    return str.Replace(startstr, "");
                    break;
                case 4://字符串前缀
                    return str.Replace(startstr, Newstr+startstr);
                    break;
                case 5://段落前缀
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;
                    if(inHead == 1)
                        return str.Replace(str.Substring(startIndex, endIndex), Newstr + str.Substring(startIndex, endIndex));

                    if (inHead == 0)
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex), Newstr + str.Substring(startIndex + startstr.Length, endIndex));
                    break;
                case 6://字符串后缀
                    return str.Replace(startstr, startstr + Newstr);
                    break;
                case 7://段落后缀
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;

                    if (inTail == 1)
                        return str.Replace(str.Substring(startIndex, endIndex + endstr.Length), str.Substring(startIndex, endIndex + endstr.Length) + Newstr);

                    if (inTail == 0)
                        return str.Replace(str.Substring(startIndex, endIndex), Newstr + str.Substring(startIndex, endIndex) + Newstr);
                    break;
                case 8://替换字符串
                    return str.Replace(startstr, Newstr);
                    break;
                case 9://替换段落
                    if (inHead == 1 && inTail == 1)//表示包含头尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex + endstr.Length), Newstr);
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex), Newstr);
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex + endstr.Length), Newstr);
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex), Newstr);
                    }

                    break;
            }
            return str;
        }
    
        private void ultraButton1_Click(object sender, EventArgs e)
        {
            string strUrl = "http://192.168.1.14/GkptWeb/test.aspx";
            string strEncoding = "gb2312";

            PageCommon PC = new PageCommon();
            this.ultraTextEditor1.Text = PC.getUrlSource(strUrl, strEncoding);
        }

        private void ultraButton2_Click(object sender, EventArgs e)
        {  
            PageCommon PC = new PageCommon();

            // 信息列表源代码临时变量
            string strTemp = PC.getUrlSource("http://www.sina.com","gb2312");

            //获得信息列表的详细配置
            DataSet ds = new DataSet();
            ds = PC.GetParticularConfig(1, 1);

            strTemp = PC.GetOperatedString(strTemp, ds);

            

            //获得单个Url信息的详细配置
            ds = PC.GetParticularConfig(1, 2);

            DataTable dt = new DataTable();
            dt = PC.GetAllSingleUrl(strTemp, ds);

            this.dataGridView1.DataSource = dt;

            Gather_Article G_A = new Gather_Article();
            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (i == 10) // 获取第15条新闻
                {
                    strTemp = PC.getUrlSource(dt.Rows[i][0].ToString(), "gb2312");

                    ds = PC.GetParticularConfig(1, 3);//获取新闻内容配置

                    strTemp = PC.GetOperatedString(strTemp, ds);
                    string strContent = strTemp;

                    this.ultraTextEditor1.Text = strTemp;

                    ds = PC.GetParticularConfig(1, 4); // 获取新闻标题

                    strTemp = PC.GetOperatedString(strContent, ds);

                    this.textBox1.Text = strTemp;

                    G_A.Title = strTemp;

                    ds = PC.GetParticularConfig(1, 5); // 获取新闻来源

                    strTemp = PC.GetOperatedString(strContent, ds);

                    this.textBox2.Text = strTemp;

                    G_A.Resource = strTemp;

                    ds = PC.GetParticularConfig(1,6);

                    strTemp = PC.GetOperatedString(strContent, ds);

                    this.textBox3.Text = strTemp;

                    G_A.Content = strTemp;

                    if (PC.CheckArticleExists(G_A.Title))
                    {
                        MessageBox.Show("已经存在该文章!");
                    }
                    else
                    {
                        PC.AddArticle(G_A);
                        MessageBox.Show("成功插入!");
                    }

                }
            }
        }

        private void button2_Click(object sender, EventArgs e)
        {
            Gather_ParticularConfig G_PC = new Gather_ParticularConfig();
            PageCommon PC = new PageCommon();

            G_PC.DataSourceId = 2;
            G_PC.BasicConfigId = int.Parse(textBox4.Text);
            G_PC.OparationId = int.Parse(textBox5.Text);
            G_PC.StartString = textBox6.Text;
            G_PC.EndString = textBox7.Text;
            G_PC.NewString = textBox8.Text;
            G_PC.Remark = textBox9.Text;

            int intId = PC.AddParticularConfig(G_PC);
            
        }

        private void button3_Click(object sender, EventArgs e)
        {
            Gather_ParticularConfig G_PC = new Gather_ParticularConfig();
            PageCommon PC = new PageCommon();

            G_PC.ParticularConfigId = 27;
            G_PC.DataSourceId = 2;
            G_PC.BasicConfigId = int.Parse(textBox4.Text);
            G_PC.OparationId = int.Parse(textBox5.Text);
            G_PC.StartString = textBox6.Text;
            G_PC.EndString = textBox7.Text;
            G_PC.NewString = textBox8.Text;
            G_PC.Remark = textBox9.Text;
            G_PC.IsContainTail = 1;
            G_PC.IsContainHeader = 1;

            PC.UpdateParticularConfig(G_PC);
            MessageBox.Show("修改成功!");
        }

        private void button4_Click(object sender, EventArgs e)
        {
            PageCommon PC = new PageCommon();
            PC.DeleteParticularConfig(31);
            MessageBox.Show("删除成功!");
        }

        private void button5_Click(object sender, EventArgs e)
        {
            //Gather_ParticularConfig G_PC = new Gather_ParticularConfig();
            PageCommon PC = new PageCommon();
            DataTable dt = PC.GetAllParticularConfig().Tables[0];
            

            DataGridViewCheckBoxColumn check = new DataGridViewCheckBoxColumn();
            dataGridView1.Columns.Add(check);
            dataGridView1.Columns[10].Name = "选择";

            dataGridView1.DataSource = dt;
            dt.DefaultView.AllowNew = false;
            

            //G_PC = PC.GetParticularConfig(29);
        }

        private void button6_Click(object sender, EventArgs e)
        {
            Gather_Period G_P = new Gather_Period();
            PageCommon PC = new PageCommon();
            G_P.Description = this.textBox10.Text;
            int intId = PC.AddPeriod(G_P);
        }

        private void MainForm_Load(object sender, EventArgs e)
        {

        }

    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -