⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webpageprocessor.cs

📁 一系列实验参考资料,希望对大家有点参考价值.是用JAVA编写的
💻 CS
字号:
using System;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Threading;

using Mf.Util;

namespace Mf.Service.WebSpider
{
	/// <summary>
	/// Summary description for WebPageProcessor.
	/// </summary>
	public class WebPageProcessor
	{
      private  WebSpider         m_spider;

      public WebPageProcessor( WebSpider spider )
		{
         m_spider          = spider;
      }

      public void Process( Object s )
      {
         try
         {
            Log( "Process Uri: {0}, HashCode: {1}", ((WebPageState)s).Uri.AbsoluteUri, Thread.CurrentThread.GetHashCode( ) );

            WebPageState   state = (WebPageState)s;
            WebRequest     req   = WebRequest.Create( state.Uri );
            WebResponse    res   = null;
            Thread.Sleep( 1 );

            try
            {
               res = req.GetResponse( );
               Thread.Sleep( 1 );

               if ( res is HttpWebResponse )
               {
                  state.StatusCode        = ((HttpWebResponse)res).StatusCode.ToString( );
                  state.StatusDescription = ((HttpWebResponse)res).StatusDescription;
               }
               if ( res is FileWebResponse /* && FILE EXIST - TODO */ )
               {
                  state.StatusCode        = "OK";
                  state.StatusDescription = "OK";
               }

               if ( state.StatusCode.Equals( "OK" ) )
               {
                  StreamReader   sr    = new StreamReader( res.GetResponseStream( ) );
            
                  state.Content        = sr.ReadToEnd( );
                  Thread.Sleep( 1 );

                  Log( "Handle Links for Uri: {0}", state.Uri.AbsoluteUri );
                  HandleLinks( state );
               }
            }
            catch( Exception ex )
            {
               if ( ex.ToString( ).IndexOf( "404" ) != -1 )
               {
                  state.StatusCode        = "404";
                  state.StatusDescription = "(404) Not Found";
               }
               else
               {
                  state.StatusDescription = ex.ToString( );
               }
            }
            finally
            {
               if ( res != null )
               {
                  res.Close( );
               }
            }

            state.Content = null;

            Log( "Process Uri: {0} - Success", state.Uri.AbsoluteUri );

         }
         catch (Exception ex)
         {
            Log( "Process Uri: {0} - Failure", ((WebPageState)s).Uri.AbsoluteUri );
            Log( ex.ToString( ) );
         }

         lock ( m_spider )
         {
            m_spider.ThreadCount--;
            if ( m_spider.ThreadCount == 0 )
            {
               _.P( "ResetEvent.Set" );

               m_spider.ResetEvent.Set( );
            }
         }
      }

      private void HandleLinks( WebPageState state )
      {
         string   html     = state.Content;
         Match    m        = RegExUtil.GetMatchRegEx( RegularExpression.UrlExtractor, html );
         
         while( m.Success )
         {
            m_spider.AddWebPage( state.Uri, m.Groups["url"].ToString( ) );

            m = m.NextMatch( );
         }
      }

      #region Log
      private void Log( string v ) 
      { 
         _.P( v );
      }
      private void Log( string format, object o1 ) 
      { 
         _.P( format, o1 );
      }
      private void Log( string format, object o1, object o2 ) 
      { 
         _.P( format, o1, o2 );
      }
      private void Log( string format, object o1, object o2, object o3 ) 
      { 
         _.P( format, o1, o2, o3 );
      }
      #endregion

   }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -