⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pagecommon.cs

📁 c#的多线程采集源代码
💻 CS
📖 第 1 页 / 共 3 页
字号:
        /// 是否存在该记录
        /// </summary>
        public bool CheckArticleExists(string Title)
        {
            bool Exists = false;
            StringBuilder strSql = new StringBuilder();
            strSql.Append("select count(*) from Gather_Article where Title='" + Title.Replace("'","''")  + "'");
            OleDbDataReader dr = Run.RunSql_DataReader(strSql.ToString());

            if (dr.Read())
            {
                if (int.Parse(dr[0].ToString()) > 0)
                {
                    Exists = true;
                }
                else
                {
                    Exists = false;
                }
            }
            dr.Close();
            return Exists;
        }

        /// <summary>
        /// 增加一条数据
        /// </summary>
        public int AddArticle(Gather_Article model)
        {
            StringBuilder strSql = new StringBuilder();
            strSql.Append("insert into Gather_Article(");
            strSql.Append("MgId,Title,Resource,Content");
            strSql.Append(")");
            strSql.Append(" values (");
            strSql.Append(model.MgId + ",");
            strSql.Append("'" + model.Title.Replace("'","''") + "',");
            strSql.Append("'" + model.Resource.Replace("'","''") + "',");
            strSql.Append("'" + model.Content.Replace("'","''") + "'");
            strSql.Append(")");

            return Run.RunSql_Int(strSql.ToString());
        }

        /// <summary>
        /// 更新一条数据
        /// </summary>
        public int Update(Gather_Article model)
        {
            StringBuilder strSql = new StringBuilder();
            strSql.Append("update Gather_Article set ");
            strSql.Append("MgId=" + model.MgId + ",");
            strSql.Append("Title='" + model.Title.Replace("'","''") + "',");
            strSql.Append("Resource='" + model.Resource.Replace("'","''") + "',");
            strSql.Append("Content='" + model.Content.Replace("'","''") + "',");
            strSql.Append("IsShow=" + model.IsShow + ",");
            strSql.Append("Picture='" + model.Picture.Replace("'","''")  + "'");
            strSql.Append(" where MgId=" + model.MgId + "");

            return Run.RunSql_Int(strSql.ToString());
        }

        /// <summary>
        /// 删除一条数据
        /// </summary>
        public int DeleteArticle(int MgId)
        {
            StringBuilder strSql = new StringBuilder();
            strSql.Append("delete Gather_Article ");
            strSql.Append(" where MgId=" + MgId);
            return Run.RunSql_Int(strSql.ToString());
        }

        /// <summary>
        /// 得到一个对象实体
        /// </summary>
        public Gather_Article GetModel(int MgId)
        {
            StringBuilder strSql = new StringBuilder();
            strSql.Append("select  ");
            strSql.Append("[MgId],[Title],[Resource],[Content],[IsShow],[Picture] ");
            strSql.Append(" from Gather_Article ");
            strSql.Append(" where MgId=" + MgId);
            Gather_Article model = new Gather_Article();
            DataSet ds = Run.RunSql_DataSet(strSql.ToString());
            if (ds.Tables[0].Rows.Count > 0)
            {
                if (ds.Tables[0].Rows[0]["MgId"].ToString() != "")
                {
                    model.MgId = int.Parse(ds.Tables[0].Rows[0]["MgId"].ToString());
                }
                model.Title = ds.Tables[0].Rows[0]["Title"].ToString();
                model.Resource = ds.Tables[0].Rows[0]["Resource"].ToString();
                model.Content = ds.Tables[0].Rows[0]["Content"].ToString();
                if (ds.Tables[0].Rows[0]["IsShow"].ToString() != "")
                {
                    model.IsShow = int.Parse(ds.Tables[0].Rows[0]["IsShow"].ToString());
                }
                model.Picture = ds.Tables[0].Rows[0]["Picture"].ToString();
                return model;
            }
            else
            {
                return null;
            }
        }
        /// <summary>
        /// 获得数据列表
        /// </summary>
        public DataSet GetArticleList(string strWhere)
        {
            StringBuilder strSql = new StringBuilder();
            strSql.Append("select [MgId],[Title],[Resource],[Content],[IsShow],[Picture] ");
            strSql.Append(" FROM Gather_Article ");
            if (strWhere.Trim() != "")
            {
                strSql.Append(" where " + strWhere);
            }
            return Run.RunSql_DataSet(strSql.ToString());
        }

        #endregion

        #region 其它
        
        /// <summary>
        /// 根据采集url地址以及编码方式获得网页源代码
        /// </summary>
        /// <param name="strUrl">信息采集地址 eg:www.sina.com</param>
        /// <param name="strEncoding">编码方式 eg:gb2312,utf-8 etc</param>
        /// <returns>网页源代码字符串</returns>
        public string getUrlSource(string strUrl,string strEncoding)
        {
            string lsResult;
            try
            { 
                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(strUrl);
                HttpWebResponse rep = (HttpWebResponse)req.GetResponse();

                StreamReader sr = new StreamReader(rep.GetResponseStream(), Encoding.GetEncoding(strEncoding));

                lsResult = sr.ReadToEnd();
            }
            catch(Exception ex)
            {
                lsResult = "";
                Console.WriteLine(ex.Message);
            }
            return lsResult;
        }

        

        /// <summary>
        /// 根据各个操作类型处理字符串
        /// </summary>
        /// <param name="str">要处理的字符串</param>
        /// <param name="startstr">开始字符串</param>
        /// <param name="endstr">结束字符串</param>
        /// <param name="Newstr">替换/前缀/后缀的字符串</param>
        /// <param name="operate">操作类型</param>
        /// <param name="inHead">包含头1,不包含头0</param>
        /// <param name="inTail">包含尾1,不包含尾0</param>
        /// <returns>返回处理过后的字符串</returns>
        private string OperateStr(string str, string startstr, string endstr, string Newstr, int operate, int inHead, int inTail)
        {
            int intLength = str.Length;
            int startIndex = str.IndexOf(startstr);
            int endIndex = str.IndexOf(endstr);
            switch (operate)
            {
                case 1://截取
                    if (endIndex <= startIndex || startIndex == -1)
                        return "";

                    if (inHead == 1 && inTail == 1 )//表示包含头尾,保证能找到尾
                    {
                        return str.Substring(startIndex, endIndex - startIndex + endstr.Length);
                        
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Substring(startIndex, endIndex - startIndex);
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length + endstr.Length);
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length);
                    }
                    break;
                case 2://删除段
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;

                    if (inHead == 1 && inTail == 1)//表示包含头尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex + endstr.Length), "");
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex ), "");
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length + endstr.Length), "");
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length), "");
                    }
                    break;
                case 3://删除字符串
                    return str.Replace(startstr, "");
                    break;
                case 4://字符串前缀
                    return str.Replace(startstr, Newstr + startstr);
                    break;
                case 5://段落前缀
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;
                    if (inHead == 1)
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex), Newstr + str.Substring(startIndex, endIndex - startIndex));

                    if (inHead == 0)
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length), Newstr + str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length));
                    break;
                case 6://字符串后缀
                    return str.Replace(startstr, startstr + Newstr);
                    break;
                case 7://段落后缀
                    if (endIndex <= startIndex || startIndex == -1)
                        return str;

                    if (inTail == 1)
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex + endstr.Length), str.Substring(startIndex, endIndex - startIndex + endstr.Length) + Newstr);

                    if (inTail == 0)
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex), Newstr + str.Substring(startIndex, endIndex - startIndex) + Newstr);
                    break;
                case 8://替换字符串
                    return str.Replace(startstr, Newstr);
                    break;
                case 9://替换段落
                    if (inHead == 1 && inTail == 1)//表示包含头尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex + endstr.Length), Newstr);
                    }

                    if (inHead == 1 && inTail == 0)//表示包含头,不包含尾
                    {
                        return str.Replace(str.Substring(startIndex, endIndex - startIndex), Newstr);
                    }

                    if (inHead == 0 && inTail == 1)//表示不包含头,包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length + endstr.Length), Newstr);
                    }

                    if (inHead == 0 && inTail == 0)//表示不包含头,也不包含尾
                    {
                        return str.Replace(str.Substring(startIndex + startstr.Length, endIndex - startIndex - startstr.Length), Newstr);
                    }

                    break;
            }
            return str;
        }

        /// <summary>
        /// 获取处理过后的字符串(信息列表、信息内容、信息标题、信息来源、信息正文等)
        /// </summary>
        /// <param name="str">要处理的字符串</param>
        /// <param name="dsParticularConfig">详细信息配置</param>
        /// <returns>处理后的字符串</returns>
        public string GetOperatedString(string str,DataSet dsParticularConfig)
        {
            foreach (DataRow dr in dsParticularConfig.Tables[0].Rows)
            {
                str = OperateStr(str, dr["StartString"].ToString(), dr["EndString"].ToString(), dr["NewString"].ToString(),
                        int.Parse(dr["OparationId"].ToString()), int.Parse(dr["IsContainHeader"].ToString()), int.Parse(dr["IsContainTail"].ToString()));
            }

            return str;
        }

        /// <summary>
        /// 通过 信息列表以及单个url信息的详细配置 来获取单个url信息的集合
        /// </summary>
        /// <param name="OperatedStr">信息列表(处理过后的字符串)</param>
        /// <param name="dsSingleUrlConfig">单个url配置信息集合</param>
        /// <returns>单个url信息集合</returns>
        public DataTable GetAllSingleUrl(string Str,DataSet dsSingleUrlConfig)
        {
            DataTable dtSingleUrls = new DataTable();
            DataColumn dc = new DataColumn("SingleUrl");
            dtSingleUrls.Columns.Add(dc);

            while(Str != "")
            {
                string str1 = string.Empty;//要得到的单个url临时变量
                str1 = Str;
                string strTemp = string.Empty;
                int RowNum = 0;
                foreach (DataRow dr in dsSingleUrlConfig.Tables[0].Rows)
                {
                    //进行处理,获得单一的url信息
                    str1 = OperateStr(str1, dr["StartString"].ToString(), dr["EndString"].ToString(), dr["NewString"].ToString(),
                        int.Parse(dr["OparationId"].ToString()), int.Parse(dr["IsContainHeader"].ToString()), int.Parse(dr["IsContainTail"].ToString()));
                    // 判断是否是截取操作,截取操作时,开始字符串与结束字符串都不为空
                    if (RowNum==0 && dr["OparationId"].ToString() == "1" && str1.IndexOf(dr["StartString"].ToString()) != -1 && str1.IndexOf(dr["EndString"].ToString()) != -1)
                    {
                        strTemp = str1;
                    }
                    RowNum++;
                }

                if (str1 != "" && str1 != Str)
                {
                    Str = Str.Replace(strTemp, "");  // 从信息列表中删除掉已经获取到的url
                    DataRow dr = dtSingleUrls.NewRow();
                    dr[0] = str1;
                    dtSingleUrls.Rows.Add(dr);
                    int count = dtSingleUrls.Rows.Count;
                }
                else
                    Str = ""; // 当获取不到单一的url地址后,则将字符串设置为空,作为退出循环的条件
            }

            return dtSingleUrls;
        }

        #endregion

    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -