⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 start_catch.aspx.cs

📁 资源大小: 116.59KB 资源类型: application 自己写个采集器 采集起来更灵活
💻 CS
📖 第 1 页 / 共 5 页
字号:
                savedata.linkstor.Clear();
                savedata.arctitle.Clear();
                savedata.value.Clear();
                endlink = new ArrayList();
                endtitle = new ArrayList();
                string[] url = selecturl.Split(' ');
                if (url.Length > 1)
                {
                    for (int i = int.Parse(url[1]); i <= int.Parse(url[2]); i++)
                    {
                        string okurl = url[0].Replace("[var]", i.ToString());

                        HttpWebRequest adsdf = (HttpWebRequest)WebRequest.Create(okurl);

                        HttpWebResponse aaaaaaaaa = (HttpWebResponse)adsdf.GetResponse();

                        Stream strm = aaaaaaaaa.GetResponseStream();

                        StreamReader sr = new StreamReader(strm, Encoding.GetEncoding(code.Trim()));

                        string dataHTML = sr.ReadToEnd();

                        StripHTML(dataHTML);

                        strm.Close();
                    }

                    for (int i = 0; i <endlink.Count; i++)
                    {
                        savedata.linkstor.Add(endlink[i].ToString());
                    }
                    pub();
                }
                else
                {
                    string okurl = url[0];

                    HttpWebRequest adsdf = (HttpWebRequest)WebRequest.Create(okurl);

                    HttpWebResponse aaaaaaaaa = (HttpWebResponse)adsdf.GetResponse();

                    Stream strm = aaaaaaaaa.GetResponseStream();

                    StreamReader sr = new StreamReader(strm, Encoding.GetEncoding(code.Trim()));

                    string dataHTML = sr.ReadToEnd();

                    StripHTML(dataHTML);

                    strm.Close();

                    for (int i = 0; i < endlink.Count; i++)
                    {
                        savedata.linkstor.Add(endlink[i].ToString());
                    }

                    pub();
                }

                this.Panel2.Visible = true;
                this.Label6.Text = savedata.value.Count.ToString();
            }
            catch
            {
            }

        }
    }

    public void StripHTML(string strHtml)
    {
        link = new ArrayList();
        title = new ArrayList();
        ArrayList enddata = new ArrayList();
        try
        {
            string strOutput = strHtml.Substring(strHtml.IndexOf(rule0.Trim()));
            strOutput = strOutput.Substring(0, strOutput.IndexOf(rule1.Trim()));

            string[] moshi = split(rule2.Trim(),"&&&&");
            string[] exper = split(moshi[0].Trim(), "||||");
            if (moshi[1].IndexOf("IgnoreCase") != -1)
            {
                regexzj = new Regex(exper[0], RegexOptions.IgnoreCase);
            }
            if (moshi[1].IndexOf("Singleline") != -1)
            {
                regexzj = new Regex(exper[0], RegexOptions.Singleline);
            }
            if (moshi[1].IndexOf("ExplicitCapture") != -1)
            {
                regexzj = new Regex(exper[0], RegexOptions.ExplicitCapture);
            }
            if (moshi[1].IndexOf("Multiline") != -1)
            {
                regexzj = new Regex(exper[0], RegexOptions.Multiline);
            }
            MatchCollection matches = regexzj.Matches(strOutput);
            System.Collections.IEnumerator enu = matches.GetEnumerator();
            while (enu.MoveNext() && enu.Current != null)
            {
                Match match = (Match)(enu.Current);
                enddata.Add(match.Value);
            }

            int a = 0;
            for (int i = 0; i < enddata.Count; i++)
            {
                for (int j = 0; j < enddata.Count; j++)
                {
                    if (enddata[i].ToString() == enddata[j].ToString())
                    {
                        a++;
                    }

                }
                if (a == 2)
                {
                    enddata.RemoveAt(i);
                }
                a = 0;

            }

            int c = 0;
            for (int i = 0; i < enddata.Count; i++)
            {
                if (enddata[i].ToString().IndexOf("http://") == -1)
                {
                    c++;
                }
            }
            if (c == enddata.Count)
            {
                string mylink = selecturl.Trim().Substring(7);
                mylink = mylink.Substring(0, mylink.IndexOf('/'));
                for (int i = 0; i < enddata.Count; i++)
                {
                    link.Add("http://" + mylink + "/" + enddata[i].ToString());
                }
            }
            else
            {
                for (int i = 0; i < enddata.Count; i++)
                {
                    link.Add(enddata[i].ToString());
                }
            }


            if (exper.Length > 1)
            {
                if (moshi[1].IndexOf("IgnoreCase") != -1)
                {
                    regexwl = new Regex(exper[1], RegexOptions.IgnoreCase);
                }
                if (moshi[1].IndexOf("Singleline") != -1)
                {
                    regexwl = new Regex(exper[1], RegexOptions.Singleline);
                }
                if (moshi[1].IndexOf("ExplicitCapture") != -1)
                {
                    regexwl = new Regex(exper[1], RegexOptions.ExplicitCapture);
                }
                if (moshi[1].IndexOf("Multiline") != -1)
                {
                    regexwl = new Regex(exper[1], RegexOptions.Multiline);
                }
                MatchCollection matches1 = regexwl.Matches(strOutput);
                System.Collections.IEnumerator enu1 = matches1.GetEnumerator();
                while (enu1.MoveNext() && enu1.Current != null)
                {
                    Match match = (Match)(enu1.Current);
                    title.Add(match.Value);
                }

                int b = 0;
                for (int i = 0; i < title.Count; i++)
                {
                    for (int j = 0; j < title.Count; j++)
                    {
                        if (title[i].ToString() == title[j].ToString())
                        {
                            b++;
                        }

                    }
                    if (b == 2)
                    {
                        title.RemoveAt(i);
                    }
                    b = 0;

                }
            }
            else
            {
                for (int i = 0; i < enddata.Count; i++)
                {
                    title.Add("");
                }
            }

            for (int i = 0; i < link.Count-1; i++)
            {
                endtitle.Add(title[i].ToString());
                endlink.Add(link[i].ToString());
            }
        }
        catch
        { }
    }

    public void pub()
    {
        try
        {
            for (int i = int.Parse(this.TextBox3.Text.Trim())-1; i < int.Parse(this.TextBox4.Text.Trim()); i++)
            {
                ArrayList enddata = new ArrayList();

                HttpWebRequest adsdfa = (HttpWebRequest)WebRequest.Create(savedata.linkstor[i].ToString().Trim());

                HttpWebResponse aaaa = (HttpWebResponse)adsdfa.GetResponse();

                Stream strmeam = aaaa.GetResponseStream();

                StreamReader srea = new StreamReader(strmeam, Encoding.GetEncoding(code.Trim()));

                string usedataHTML = srea.ReadToEnd();
                
                if (usedataHTML.IndexOf(rule3.Trim()) == -1)
                {
                    continue;   
                }
                string strOutput = usedataHTML.Substring(usedataHTML.IndexOf(rule3.Trim()));
                if (strOutput.IndexOf(rule4.Trim()) == -1)
                {
                    continue;
                }
                strOutput = strOutput.Substring(0, strOutput.IndexOf(rule4.Trim()));
                string a = "";


                string[] mo = split(rule5.Trim(),"&&&&");
                string[] guize = split(mo[0].Trim(), "||||");
                
                int m = 1;
                if (guize != null)
                {
                    for (int j = 0; j < guize.Length; j++)
                    {
                        string b = "";
                        if (mo[1].IndexOf("IgnoreCase") != -1)
                        {
                            regexlo = new Regex(guize[j].Trim(), RegexOptions.IgnoreCase);
                        }
                        if (mo[1].IndexOf("Singleline") != -1)
                        {
                            regexlo = new Regex(guize[j].Trim(), RegexOptions.Singleline);
                        }
                        if (mo[1].IndexOf("ExplicitCapture") != -1)
                        {
                            regexlo = new Regex(guize[j].Trim(), RegexOptions.ExplicitCapture);
                        }
                        if (mo[1].IndexOf("Multiline") != -1)
                        {
                            regexlo = new Regex(guize[j].Trim(), RegexOptions.Multiline);
                        }
                        MatchCollection matches = regexlo.Matches(strOutput);
                        System.Collections.IEnumerator enu = matches.GetEnumerator();
                        while (enu.MoveNext() && enu.Current != null)
                        {
                            Match match = (Match)(enu.Current);
                            b += match.Value;
                        }
                        a += "@" + (j + 1) + b;
                        m++;
                    }
                    a += "@" + m;
                }
                if (a == "")
                {
                    a = strOutput;
                }

                //if (rule6.Trim() != "")
                //{

                //    string[] tihuan1 = split(rule6.Trim(), "||||");
                //    string[] tihuan2 = split(rule7.Trim(), "||||");
                //    for (int k = 0; k < tihuan1.Length; k++)
                //    {
                //        if (b.IndexOf(tihuan1[k]) != -1)
                //        {
                //            if (rule7.Trim() != "")
                //            {
                //                if (tihuan1.Length == tihuan2.Length)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -