⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 C#编写的网络爬虫程序 效率很高 很好用!
💻 CS
📖 第 1 页 / 共 2 页
字号:
			this.tabPage2.TabIndex = 1;
			this.tabPage2.Text = "Graphics Ripper";
			// 
			// axWebBrowser
			// 
			this.axWebBrowser.ContainingControl = this;
			this.axWebBrowser.Enabled = true;
			this.axWebBrowser.Location = new System.Drawing.Point(10, 43);
			this.axWebBrowser.OcxState = ((System.Windows.Forms.AxHost.State)(resources.GetObject("axWebBrowser.OcxState")));
			this.axWebBrowser.Size = new System.Drawing.Size(1113, 474);
			this.axWebBrowser.TabIndex = 21;
			// 
			// button1
			// 
			this.button1.Location = new System.Drawing.Point(595, 526);
			this.button1.Name = "button1";
			this.button1.Size = new System.Drawing.Size(87, 24);
			this.button1.TabIndex = 20;
			this.button1.Text = "Start";
			this.button1.Click += new System.EventHandler(this.button1_Click);
			// 
			// txtUrlGraphicsRipper
			// 
			this.txtUrlGraphicsRipper.Location = new System.Drawing.Point(182, 9);
			this.txtUrlGraphicsRipper.Name = "txtUrlGraphicsRipper";
			this.txtUrlGraphicsRipper.Size = new System.Drawing.Size(500, 21);
			this.txtUrlGraphicsRipper.TabIndex = 19;
			this.txtUrlGraphicsRipper.Text = "http://images.google.com.au/images?q=funny+sex&ie=UTF-8&oe=UTF-8&hl=en";
			// 
			// label2
			// 
			this.label2.Location = new System.Drawing.Point(10, 9);
			this.label2.Name = "label2";
			this.label2.Size = new System.Drawing.Size(163, 24);
			this.label2.TabIndex = 18;
			this.label2.Text = "Url";
			// 
			// statusBar
			// 
			this.statusBar.Location = new System.Drawing.Point(0, 602);
			this.statusBar.Name = "statusBar";
			this.statusBar.Panels.AddRange(new System.Windows.Forms.StatusBarPanel[] {
																						 this.statusBarPanel});
			this.statusBar.Size = new System.Drawing.Size(712, 24);
			this.statusBar.TabIndex = 1;
			// 
			// Form1
			// 
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
			this.ClientSize = new System.Drawing.Size(712, 626);
			this.Controls.Add(this.statusBar);
			this.Controls.Add(this.tabControl1);
			this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
			this.Name = "Form1";
			this.Text = "Form1";
			this.tabControl1.ResumeLayout(false);
			this.tabPage1.ResumeLayout(false);
			((System.ComponentModel.ISupportInitialize)(this.spnProcessUriMax)).EndInit();
			this.tabPage2.ResumeLayout(false);
			((System.ComponentModel.ISupportInitialize)(this.axWebBrowser)).EndInit();
			((System.ComponentModel.ISupportInitialize)(this.statusBarPanel)).EndInit();
			this.ResumeLayout(false);

		}
		#endregion

      #region Broken Link Checker
      private void btnStartStop_Click(object sender, System.EventArgs e)
      {
         statusBar.Text = "Processed Uri: " + txtUri.Text;

         btnStartStop.Enabled = false;
         txtOutput.Lines      = null;

         WebSpider spider = new WebSpider( txtUri.Text, txtBaseUri.Text, (int)spnProcessUriMax.Value );

         spider.WebPageProcessor.ContentHandler += new WebPageContentDelegate( HandleContent );
         spider.Execute( );

         statusBar.Text = "Finished Processing";

         ICollection pages             = spider.WebPages.Values;
         ArrayList   pagesFail         = new ArrayList( );
         int         pagesNotProcessed = 0;

         foreach( WebPageState page in pages )
         {
            if( ! page.ProcessStarted )
            {
               pagesNotProcessed++;
            }
            else if ( ! page.ProcessSuccessfull )
            {
               pagesFail.Add( page );
            }
         }

         string[] lines = new string[ pagesFail.Count*3 ];
         int      index = 0;

         foreach( WebPageState page in pagesFail )
         {
            lines[index++] = "Uri        : " + page.Uri.AbsoluteUri;
            lines[index++] = "Description: " + page.StatusDescription;
         }

         txtOutput.Lines      = lines;
         btnStartStop.Enabled = true;
      }

      private void HandleContent( WebPageState state )
      {
         statusBar.Text = "Processed Uri: " + state.Uri;

         Application.DoEvents( );
      }
      #endregion

      #region Image Ripper
//      private ArrayList   m_images;
      private Uri          m_baseUri;
      private StreamWriter m_graphicViewerWriter;
      private string       m_outFolder;
      private string       m_outFile;
      private int          m_fileId;

      private void button1_Click(object sender, System.EventArgs e)
      {
         statusBar.Text = "Process Uri: " + txtUrlGraphicsRipper.Text;

         m_baseUri = new Uri( txtUrlGraphicsRipper.Text );

         try
         { 
            SetupOutputFile( );
         }
         catch (Exception ex)
         {
            MessageBox.Show( "Failed to setup output file\n" + ex.ToString( ) );
            return;
         }

         IWebPageProcessor processor = new WebPageProcessor( );
         processor.ContentHandler += new WebPageContentDelegate( GraphicsLinkHandler );
         processor.Process( new WebPageState( txtUrlGraphicsRipper.Text ) );

         CloseOutputFile( );

         statusBar.Text = "Finished Processing";
      }

      private void GraphicsLinkHandler(WebPageState state)
      {
         Match       m     = RegExUtil.GetMatchRegEx( RegularExpression.SrcExtractor, state.Content );
         string      image;
      
         while( m.Success )
         {
            m     = m.NextMatch( );
            image = m.Groups[1].ToString( );

            statusBar.Text = "Image: " + image;
            Application.DoEvents( );

            DownloadImage( image );
         }

      }

      private void SetupOutputFile( )
      {
         m_outFolder = Application.StartupPath + "\\temp";
         m_outFile   = m_outFolder + "\\viewer.html";

         m_fileId    = 0;

         try
         {
            DirectoryInfo dirInfo   = new DirectoryInfo( m_outFolder );
            dirInfo.Delete( true );
         } 
         catch {}

         Directory.CreateDirectory( m_outFolder );

         m_graphicViewerWriter = File.CreateText( m_outFile );
         m_graphicViewerWriter.WriteLine( "<html><body>" );
      }

      private void CloseOutputFile( )
      {
         m_graphicViewerWriter.WriteLine( "</body></html>" );
         m_graphicViewerWriter.Close( );

         object empty = null;
         axWebBrowser.Navigate( m_outFile, ref empty, ref empty, ref empty, ref empty );
      }

      private void DownloadImage( string imgUri )
      {
         Uri      imageUri = null;
         string   ext      = null;
         string   outFile  = null;
         try
         {
            imageUri = new Uri( m_baseUri, imgUri );

            ext      = StrUtil.RightLastIndexOf( imageUri.AbsoluteUri, "." ).ToLower( );
            outFile  = m_outFolder + "\\img" + ( m_fileId++ ) + "." + ext;

            if ( "png|gif|jpg|jpeg|swf".IndexOf( ext ) > -1 )
            {
               WebClient web = new WebClient( );
               web.DownloadFile( imageUri.AbsoluteUri, outFile );

               if ( ext == "swf" )
               {
                  m_graphicViewerWriter.WriteLine( "<object classid='clsid:D27CDB6E-AE6D-11cf-96B8-444553540000' codebase='http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,29,0' width='150' height='100'>" );
                  m_graphicViewerWriter.WriteLine( "<param name='movie' value='" + outFile + "'>" );
                  m_graphicViewerWriter.WriteLine( "<param name=quality value=high>" );
                  m_graphicViewerWriter.WriteLine( "<embed src='" + outFile + "' quality=high pluginspage='http://www.macromedia.com/shockwave/download/index.cgi?P1_Prod_Version=ShockwaveFlash' type='application/x-shockwave-flash' width='150' height='100'></embed>" );
                  m_graphicViewerWriter.WriteLine( "</object>" );
               }
               else
               {
                  m_graphicViewerWriter.WriteLine( "<img src='file://" + outFile + "' /><br />" );
               }
            }
         }
         catch (Exception)
         {
            m_graphicViewerWriter.WriteLine( "could not download img: " + imageUri.AbsoluteUri );
         }
      }
      #endregion
   }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -