⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 thumbelina.java

📁 html 解析处理代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    }    /**     * Get the links of an element of a document.     * Only gets the links on IMG elements that reference another image.     * The latter is based on suffix (.jpg, .gif and .png).     * @param lexer The fully conditioned lexer, ready to rock.     * @param docbase The url to read.     * @return The URLs, targets of the IMG links;     * @exception IOException If the underlying infrastructure throws it.     * @exception ParserException If there is a problem parsing the url.     */    protected URL[][] extractImageLinks (final Lexer lexer, final URL docbase)        throws            IOException,            ParserException    {        HashMap images;        HashMap links;        boolean ina; // true when within a <A></A> pair        Node node;        Tag tag;        String name;        Tag startatag;        Tag imgtag;        String href;        String src;        URL url;        URL[][] ret;        images = new HashMap ();        links = new HashMap ();        ina = false;        startatag = null;        imgtag = null;        while (null != (node = lexer.nextNode ()))        {            if (node instanceof Tag)            {                tag = (Tag)node;                name = tag.getTagName ();                if ("A".equals (name))                {                    if (tag.isEndTag ())                    {                        ina = false;                        if (null != imgtag)                        {                            // evidence of a thumb                            href = startatag.getAttribute ("HREF");                            if (null != href)                            {                                if (isImage (href))                                {                                    src = imgtag.getAttribute ("SRC");                                    if (null != src)                                        try                                        {                                            url = new URL (docbase, href);                                            // eliminate duplicates                                            href = url.toExternalForm ();                                            if (!images.containsKey (href))                                                images.put (href, url);                                        }                                        catch (MalformedURLException murle)                                        {                                            // oops, forget it                                        }                                }                            }                        }                    }                    else                    {                        startatag = tag;                        imgtag = null;                        ina = true;                        href = startatag.getAttribute ("HREF");                        if (null != href)                        {                            if (!isImage (href))                                try                                {                                    url = new URL (docbase, href);                                    // eliminate duplicates                                    href = url.toExternalForm ();                                    if (!links.containsKey (href))                                        links.put (href, url);                                }                                catch (MalformedURLException murle)                                {                                    // well, obviously we don't want this one                                }                        }                    }                }                else if (ina && "IMG".equals (name))                    imgtag = tag;            }        }        ret = new URL[2][];        ret[0] = new URL[images.size ()];        images.values ().toArray (ret[0]);        ret[1] = new URL[links.size ()];        links.values ().toArray (ret[1]);        return (ret);    }    /**     * Get the image links from the current URL.     * @param url The URL to get the links from     * @return An array of two URL arrays, index 0 is a list of images,     * index 1 is a list of links to possibly follow.     */    protected URL[][] getImageLinks (final URL url)    {        Lexer lexer;        URL[][] ret;        if (null != url)        {            try            {                lexer = new Lexer (url.openConnection ());                ret = extractImageLinks (lexer, url);            }            catch (Throwable t)            {                System.out.println (t.getMessage ());                ret = NONE;            }        }        else            ret =  NONE;        return (ret);    }    /**     * Get the picture panel object encapsulated by this Thumbelina.     * @return The picture panel.     */    public PicturePanel getPicturePanel ()    {        return (mPicturePanel);    }    /**     * Add a PropertyChangeListener to the listener list.     * The listener is registered for all properties.     * @param listener The PropertyChangeListener to be added.     */    public void addPropertyChangeListener (        final PropertyChangeListener listener)    {        mPropertySupport.addPropertyChangeListener (listener);    }    /**     * Remove a PropertyChangeListener from the listener list.     * This removes a PropertyChangeListener that was registered for all     * properties.     * @param listener The PropertyChangeListener to be removed.     */    public void removePropertyChangeListener (        final PropertyChangeListener listener)    {        mPropertySupport.removePropertyChangeListener (listener);    }    /**     * Return the URL currently being examined.     * This is a bound property. Notifications are available via the     * PROP_CURRENT_URL_PROPERTY property.     * @return The size of the 'to be examined' list.     */    public String getCurrentURL ()    {        return (mCurrentURL);    }    /**     * Set the current URL being examined.     * @param url The url that is being examined.     */    protected void setCurrentURL (final String url)    {        String oldValue;        if (((null != url) && !url.equals (mCurrentURL))            || ((null == url) && (null != mCurrentURL)))        {            oldValue = mCurrentURL;            mCurrentURL = url;            mPropertySupport.firePropertyChange (                PROP_CURRENT_URL_PROPERTY, oldValue, url);        }    }    /**     * Apply a change in 'to be examined' URL list size.     * Sends notification via the <code>PROP_URL_QUEUE_PROPERTY</code> property     * and updates the status bar.     * @param original The original size of the list.     * @param current The new size of the list.     */    protected void updateQueueSize (final int original, final int current)    {        StringBuffer buffer;        buffer = new StringBuffer ();        buffer.append (current);        while (buffer.length () < 5)            buffer.insert (0, '0');        mQueueSize.setText (buffer.toString ());        mPropertySupport.firePropertyChange (            PROP_URL_QUEUE_PROPERTY, original, current);    }    /**     * Apply a change in 'visited' URL list size.     * Sends notification via the <code>PROP_URL_VISITED_PROPERTY</code>     * property and updates the status bar.     * @param original The original size of the list.     * @param current The new size of the list.     */    protected void updateVisitedSize (final int original, final int current)    {        StringBuffer buffer;        buffer = new StringBuffer ();        buffer.append (current);        while (buffer.length () < 5)            buffer.insert (0, '0');        mVisitedSize.setText (buffer.toString ());        mPropertySupport.firePropertyChange (            PROP_URL_VISITED_PROPERTY, original, current);    }    /**     * Fetch images.     * Ask the toolkit to make the image from a URL, and add a tracker     * to handle it when it's received.     * Add details to the rquested and tracked lists and update     * the status bar.     * @param images The list of images to fetch.     */    protected void fetch (final URL[] images)    {        Image image;        Tracker tracker;        int size;        for (int j = 0; j < images.length; j++)        {            if (!mRequested.containsKey (                images[j].toExternalForm ()))            {                image = getToolkit ().createImage (images[j]);                tracker = new Tracker (images[j]);                synchronized (mTracked)                {                    size = mTracked.size () + 1;                    if (mQueueProgress.getMaximum () < size)                    {                        try                        {                            mTracked.wait ();                        }                        catch (InterruptedException ie)                        {                            // this won't happen, just continue on                        }                        }                    mRequested.put (images[j].toExternalForm (), images[j]);                    mTracked.put (images[j].toExternalForm (), images[j]);                    mQueueProgress.setValue (size);                    image.getWidth (tracker); // trigger the observer                }            }        }    }    //    // Runnable interface    //    /**     * The main processing loop.     * Pull suspect URLs off the queue one at a time, fetch and parse it,     * request images and enqueue further links.     */    public void run ()    {        URL link;        int original;        String href;        URL[][] urls;        while (true)        {            try            {                link = null;                original = -1;                synchronized (mUrls)                {                    if (0 != mUrls.size ())                    {                        original = mUrls.size ();                        link = (URL)mUrls.remove (0);                    }                    else                        // don't spin crazily on an empty list                        Thread.sleep (100);                }                if (null != link)                {                    updateQueueSize (original, mUrls.size ());                    href = link.toExternalForm ();                    setCurrentURL (href);                    mVisited.put (href, link);                    updateVisitedSize (                        mVisited.size () - 1, mVisited.size ());                    urls = getImageLinks (link);                    fetch (urls[0]);                    //append (filter (urls[1]));                    synchronized (mEnqueuers)                    {                        Enqueuer enqueuer = new Enqueuer (urls[1]);                        enqueuer.setPriority (Thread.MIN_PRIORITY);                        mEnqueuers.add (enqueuer);                        enqueuer.start ();                    }                    setCurrentURL (null);                }                if (!mActive)                    synchronized (mUrls)                    {                        mUrls.wait ();                    }            }            catch (Throwable t)            {                t.printStackTrace ();            }        }    }    static ArrayList mEnqueuers = new ArrayList ();        class Enqueuer extends Thread    {        URL[] mList;        public Enqueuer (URL[] list)        {            mList = list;        }        public void run ()        {            append (filter (mList));            synchronized (mEnqueuers)            {                mEnqueuers.remove (this);            }        }    }    //    // ItemListener interface    //

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -