⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 arale.java

📁 一个网络爬虫
💻 JAVA
字号:
package org.flaviotordini.arale;

import java.util.*;
import java.text.*;
import java.net.*;
import java.io.*;

/**
 *  Arale main class
 *
 *@author     Flavio Tordini
 *@created    25 novembre 2001
 */
public class Arale {

    /**
     *  Description of the Field
     */
    public final static String ARALE_APPNAME = "Arale";
    /**
     *  Description of the Field
     */
    public final static int ARALE_MAJOR_VERSION = 1;
    /**
     *  Description of the Field
     */
    public final static int ARALE_MINOR_VERSION = 0;
    /**
     *  Description of the Field
     */
    public final static String ARALE_RELEASE_TYPE = "";

    /**
     *  Stream buffer size
     */
    public final static int STREAM_BUFFER_SIZE = 4096;

    static Logger logger;
    long startTime;
    Set threads;
    List visitedUrls;
    List queuedUrls;
    ThreadGroup threadGroup;
    long writtenBytesCount;
    AraleSettings settings;


    /**
     *  Constructor for the Arale object
     *
     *@exception  Exception  Description of Exception
     *@since                 26 novembre 2001
     */
    public Arale() throws Exception {

        visitedUrls = new Vector(100);
        queuedUrls = new Vector(100);
        threads = new HashSet(32);
        threadGroup = new ThreadGroup("Arale threads");

        logger = new Logger();
        logger.setLogFile("arale.log");

    }


    /**
     *  Adds a ContextualURL to the queue.
     *
     *@param  contextualURL  Description of Parameter
     */
    public void followLink(ContextualURL contextualURL) {
        /*
            if (contextualURL.token != null) {
            contextualURL.scannable = scanTokens.contains(contextualURL.token);
            contextualURL.downloadable = downloadTokens.contains(contextualURL.token);
            } else {
            String urlstring = contextualURL.url.toString();
            contextualURL.scannable = AraleUtilities.StringContainsToken(urlstring, scanTokens);
            contextualURL.downloadable = AraleUtilities.StringContainsToken(urlstring, downloadTokens);
            }
          */
        queuedUrls.add(contextualURL);
        startThread();

    }


    /**
     *  Starts a new thread if maximum thread count is not exceeded.
     */
    public void startThread() {

        //int threadnum = threadGroup.activeCount();
        // int threadnum = threads.size();
        // logger.log("threads: " + threadnum);

        if (threadGroup.activeCount() < settings.maxThreads) {
            AraleThread aralethread = new AraleThread(this);
            new Thread(threadGroup, aralethread).start();
        }
    }


    /**
     *  Description of the Method
     */
    public void endProcess() {

        logger.log("*** statistics: ***");

        long processingTime = System.currentTimeMillis() - startTime;
        String str_processingtime;
        if (processingTime < 1000) {
            str_processingtime = processingTime + "ms";
        } else {
            str_processingtime = processingTime / 1000 + "sec";
        }
        logger.log("processing time: " + str_processingtime);

        NumberFormat nf = NumberFormat.getInstance();

        double megstodisk = (double) writtenBytesCount / 1048576.0;
        logger.log("written to disk: " + nf.format(writtenBytesCount) + "bytes (" + nf.format(megstodisk) + "Mb)");

    }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -