urlfetcher.java
来自「jetspeed源代码」· Java 代码 · 共 464 行 · 第 1/2 页
JAVA
464 行
/*
* Copyright 2000-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.services.urlmanager;
//standard Java stuff
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Hashtable;
import java.util.Vector;
//turbine stuff
import org.apache.jetspeed.services.resources.JetspeedResources;
//jetspeed stuff
import org.apache.jetspeed.cache.disk.DiskCacheEntry;
import org.apache.jetspeed.cache.disk.DiskCacheUtils;
import org.apache.jetspeed.cache.disk.JetspeedDiskCache;
import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
import org.apache.jetspeed.services.logging.JetspeedLogger;
/**
<p>
Handles fetching URLs and if for some reason anything happens add it to the
BadURLManager. There are also some util methods for downloading URLs that don't
use the Disk Cache.
</p>
@author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
@author <a href="mailto:sgala@hisitech.com">Santiago Gala</a>
@version $Id: URLFetcher.java,v 1.14 2004/02/23 03:30:47 jford Exp $
*/
public class URLFetcher
{
/**
* Static initialization of the logger for this class
*/
private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLFetcher.class.getName());
/**
URLs that Jetspeed is currently trying to fetch in real time.
*/
private static Hashtable realtime_urls = new Hashtable();
/**
*
*/
static final boolean shouldFetchNow =
JetspeedResources.getBoolean( JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
static {
//Looking for redirected channels...
java.net.HttpURLConnection.setFollowRedirects(true);
}
public static final Reader fetch( String url ) throws IOException {
return fetch ( url, false );
}
/**
Try and fetch a URL as and get the content as a String and possibly add
the URL to the BadURLManager if anything goes wrong.
@param url The URL to fetch
@param force if set to true then do not use force this entry to be in the cache...
IE do not use CACHE_REQUIRE_CACHED
*/
public static final Reader fetch( String url,
boolean force ) throws IOException {
if ( ! URLManager.isOK( url ) ) {
throw new URLNotAvailableException( url );
}
//SGP
if( force == false && DiskCacheUtils.isCached( url ) == true)
{
logger.info( "The url " +
url +
" is fetched from the Cache" );
return JetspeedDiskCache.getInstance().getEntry( url ).getReader();
}
//do cache required checking
if ( shouldFetchNow &&
DiskCacheUtils.isCached( url ) == false &&
isRealtimeURL( url ) == false &&
force == false ) {
logger.info( "The url " +
url +
" is not in the cache and will be fetched now because you have configured -> " +
JetspeedResources.CACHE_REQUIRE_CACHED_KEY );
//it is possible that two thread request the same URL.
//The refresh call in JetspeedDiskCache takes care of this.
JetspeedDiskCache.getInstance().refresh( url );
//thow an Exception that this isn't in the cache.
throw new ContentNotAvailableException( url );
}
if( isRealtimeURL( url ) == true ) {
addRealtimeURL( url );
synchronized(url.intern())
{
try
{
//We wait for other thread to load
url.intern().wait();
} catch (InterruptedException e)
{
logger.info("Wait Interrupted");
} finally
{
removeRealtimeURL( url );
}
}
// We try again
return URLFetcher.fetch( url, force );
} else {
addRealtimeURL( url );
}
try {
URL content;
// Determine the URL's protocol
String protocol = url.substring(0, url.indexOf(":/"));
// Check if a proxy is set. If no port is set, use the default port (-1)
String proxyHost = URLManager.getProxyHost( protocol );
if (proxyHost != null)
{
// Open the URL using a proxy
content = new URL(protocol,
proxyHost,
URLManager.getProxyPort( protocol ),
url);
}
else
{
content = new URL( url );
}
URLConnection conn = content.openConnection();
return getReader( conn );
} catch ( Throwable t ) {
String reason = "";
if ( t instanceof MalformedURLException ) {
reason = "The URL is Malformed.";
} else {
reason = t.toString();
}
//if the URL couldn't be fetched because it is remote AND
//it is not in the cache, add it to the bad URL list.
if ( DiskCacheUtils.isCached( url ) == false ) {
//Reported up there...
//logger.error( t );
URLManager.register( url, URLManagerService.STATUS_BAD, reason );
} else {
//it is in the cache, remove it (could be broken in cache).
//next time we could be luckier.
JetspeedDiskCache.getInstance().remove(url);
}
throw new URLNotAvailableException( reason, url );
} finally {
removeRealtimeURL( url );
}
}
/**
Try and fetch a URL if the copy in the cache has expired and add
the URL to the BadURLManager if anything goes wrong.
@param url The URL to fetch
@param force if set to true then do not use force this entry to be in the cache...
IE do not use CACHE_REQUIRE_CACHED
*/
public static final boolean refresh( String url) throws IOException {
if ( ! URLManager.isOK( url ) ) {
if( DiskCacheUtils.isCached(url) )
JetspeedDiskCache.getInstance().remove(url);
throw new URLNotAvailableException( url );
}
if(isRealtimeURL(url)) {
return false;
}
DiskCacheEntry dce = null;
if( DiskCacheUtils.isCached(url) ) {
try {
dce = JetspeedDiskCache.getInstance().getEntry( url );
if(!dce.hasExpired())
{
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?