urlfetcher.java

来自「jetspeed源代码」· Java 代码 · 共 464 行 · 第 1/2 页

JAVA
464
字号
/*
 * Copyright 2000-2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jetspeed.services.urlmanager;

//standard Java stuff
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Hashtable;
import java.util.Vector;

//turbine stuff
import org.apache.jetspeed.services.resources.JetspeedResources;

//jetspeed stuff
import org.apache.jetspeed.cache.disk.DiskCacheEntry;
import org.apache.jetspeed.cache.disk.DiskCacheUtils;
import org.apache.jetspeed.cache.disk.JetspeedDiskCache;
import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
import org.apache.jetspeed.services.logging.JetspeedLogger;

/**
<p>
Handles fetching URLs and if for some reason anything happens add it to the
BadURLManager.  There are also some util methods for downloading URLs that don't
use the Disk Cache.
</p>



@author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
@author <a href="mailto:sgala@hisitech.com">Santiago Gala</a>
@version $Id: URLFetcher.java,v 1.14 2004/02/23 03:30:47 jford Exp $
*/
public class URLFetcher 
{
    /**
     * Static initialization of the logger for this class
     */    
    private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLFetcher.class.getName());
    
    /**
    URLs that Jetspeed is currently trying to fetch in real time.
    */
    private static Hashtable realtime_urls = new Hashtable();

    /**
     *
     */
    static final boolean shouldFetchNow = 
        JetspeedResources.getBoolean( JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
    
    static {
        //Looking for redirected channels...
        java.net.HttpURLConnection.setFollowRedirects(true);
    }

    public static final Reader fetch( String url ) throws IOException {
        return fetch ( url, false );
    }

    
    /**
    Try and fetch a URL as and get the content as a String and possibly add
    the URL to the BadURLManager if anything goes wrong.
    
    @param url The URL to fetch
    @param force if set to true then do not use force this entry to be in the cache...
                 IE do not use CACHE_REQUIRE_CACHED
    */
    public static final Reader fetch( String url,
                                           boolean force ) throws IOException {

        if ( ! URLManager.isOK( url ) ) {
            throw new URLNotAvailableException( url );
        }

        //SGP
        if( force == false && DiskCacheUtils.isCached( url ) == true) 
        {
            logger.info( "The url " + 
                      url + 
                      " is fetched from the Cache" );
            return JetspeedDiskCache.getInstance().getEntry( url ).getReader();
        }
        
        //do cache required checking
        if ( shouldFetchNow && 
             DiskCacheUtils.isCached( url ) == false && 
             isRealtimeURL( url ) == false &&
             force == false ) {

            logger.info( "The url " + 
                      url + 
                      " is not in the cache and will be fetched now because you have configured -> " + 
                      JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
                 
            //it is possible that two thread request the same URL.
            //The refresh call in JetspeedDiskCache takes care of this.
            JetspeedDiskCache.getInstance().refresh( url );
            
            //thow an Exception that this isn't in the cache.
            throw new ContentNotAvailableException( url );
        }

        if( isRealtimeURL( url ) == true ) {
            addRealtimeURL( url );
          synchronized(url.intern())
          {
             try
             {
               //We wait for other thread to load
                url.intern().wait();
             } catch (InterruptedException e)
             {
               logger.info("Wait Interrupted");
             } finally
             {
                  removeRealtimeURL( url );
               }
          }
            // We try again
          return URLFetcher.fetch( url, force );
        } else {
            addRealtimeURL( url );
        }
        try {
            
            URL content;

	    // Determine the URL's protocol
            String protocol = url.substring(0, url.indexOf(":/"));
	    
	    // Check if a proxy is set. If no port is set, use the default port (-1)
            String proxyHost = URLManager.getProxyHost( protocol );
            if (proxyHost != null)
            {
                // Open the URL using a proxy
                content = new URL(protocol,
                                  proxyHost,
                                  URLManager.getProxyPort( protocol ),
                                  url);
            }
            else
            {
                content = new URL( url );
            }

            URLConnection conn = content.openConnection();
            return getReader( conn );
            
        } catch ( Throwable t ) {
            
            String reason = "";
            
            if ( t instanceof MalformedURLException ) {
                reason = "The URL is Malformed.";
            } else {
                reason = t.toString();
            }
            
            //if the URL couldn't be fetched because it is remote AND
            //it is not in the cache, add it to the bad URL list.
            if ( DiskCacheUtils.isCached( url ) == false ) {
                //Reported up there...
                //logger.error( t );
                URLManager.register( url, URLManagerService.STATUS_BAD, reason );
            } else {
            //it is in the cache, remove it (could be broken in cache).
            //next time we could be luckier.
                JetspeedDiskCache.getInstance().remove(url);
            }


            throw new URLNotAvailableException( reason, url );

        } finally {
            removeRealtimeURL( url );
        }

    }


    /**
    Try and fetch a URL if the copy in the cache has expired and add
    the URL to the BadURLManager if anything goes wrong.
    
    @param url The URL to fetch
    @param force if set to true then do not use force this entry to be in the cache...
                 IE do not use CACHE_REQUIRE_CACHED
    */
    public static final boolean refresh( String url) throws IOException {
        
        if ( ! URLManager.isOK( url ) ) {
            if( DiskCacheUtils.isCached(url) ) 
                JetspeedDiskCache.getInstance().remove(url);
            throw new URLNotAvailableException( url );
        }
        
        if(isRealtimeURL(url)) {
            return false;
        }

            
         DiskCacheEntry dce = null;
         if( DiskCacheUtils.isCached(url) ) {
             try {
                 dce = JetspeedDiskCache.getInstance().getEntry( url );
                 if(!dce.hasExpired())
                 {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?