standardparserdata.java

来自「spam source codejasen-0.9jASEN - java An」· Java 代码 · 共 267 行

JAVA
267
字号
/*
 * @(#)StandardParserData.java	31/10/2004
 *
 * Copyright (c) 2004, 2005  jASEN.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright notice,
 *      this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in
 *      the documentation and/or other materials provided with the distribution.
 *
 *   3. The names of the authors may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *   4. Any modification or additions to the software must be contributed back
 *      to the project.
 *
 *   5. Any investigation or reverse engineering of source code or binary to
 *      enable emails to bypass the filters, and hence inflict spam and or viruses
 *      onto users who use or do not use jASEN could subject the perpetrator to
 *      criminal and or civil liability.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JASEN.ORG,
 * OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
package org.jasen.core;

import java.util.List;

import org.jasen.core.token.CountTokenErrorReport;
import org.jasen.core.token.ObfuscatedCharacterTokenErrorRecorder;
import org.jasen.interfaces.ParserData;
import org.jasen.interfaces.TokenErrorRecorder;
import org.jasen.interfaces.TokenErrorReport;

/**
 * <P>
 * 	Holds the information obtained from parsing and tokenizing the message.
 * </P>
 * @author Jason Polites
 */
public class StandardParserData implements ParserData
{
    private String htmlAsText;
    private String textParsed;
    private String[] messageTokens;

	private int concealedHtmlCount = 0;
	private int srcCgiCount = 0;
	private int imageCount = 0;
	private int srcPortCount = 0;
	private int falseAnchorCount = 0;

	private TokenErrorRecorder recorder;

	private List ports;


    /**
     *
     */
    public StandardParserData() {
        super ();
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getMessageTokens()
     */
    public String[] getMessageTokens() {
        return messageTokens;
    }

    /**
     * Sets the message tokens obtained from tokenization.
     * @param htmlTokens
     * @see org.jasen.core.token.EmailTokenizer
     */
    public void setMessageTokens(String[] htmlTokens) {
        this.messageTokens = htmlTokens;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getHtmlAsText()
     */
    public String getHtmlAsText() {
        return htmlAsText;
    }

    /**
     * Sets the parsed html.  That is, the plain text components of the html in the message.
     * @param parsedHtml
     */
    public void setHtmlAsText(String parsedHtml) {
        this.htmlAsText = parsedHtml;
    }

    /**
     * Gets the number of occurrances of concealed HTML.
     * @return Returns the concealedHtmlCount.
     */
    public int getConcealedHtmlCount() {
        return concealedHtmlCount;
    }

    /**
     * Sets the number of occurrances of concealed HTML.
     * @param concealedHtmlCount The concealedHtmlCount to set.
     */
    public void setConcealedHtmlCount(int concealedHtmlCount) {
        this.concealedHtmlCount = concealedHtmlCount;
    }

    /**
     * Gets the number of images in the email body.
     * @return Returns the imageCount.
     */
    public int getImageCount() {
        return imageCount;
    }

    /**
     * Sets the number of images in the email body.
     * @param imageCount The imageCount to set.
     */
    public void setImageCount(int imageCount) {
        this.imageCount = imageCount;
    }

    /**
     * Gets the number of occurrances of SRC cgi references.
     * <P>
     * That is, occurrances of HTML tags where a SRC (or other remote reference) which 
     * would normally be expected to be a flat file (eg the IMG tag) was found to reference 
     * a cgi script or similar.  This often indicates the presence of mail bugs
     * </P>
     * @return Returns the srcCgiCount.
     */
    public int getSrcCgiCount() {
        return srcCgiCount;
    }

    /**
     * Sets the number of occurrances of SRC cgi references.
     * @param srcCgiCount The srcCgiCount to set.
     * @see StandardParserData#getSrcCgiCount()
     */
    public void setSrcCgiCount(int srcCgiCount) {
        this.srcCgiCount = srcCgiCount;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getTextParsed()
     */
    public String getTextParsed() {
        return textParsed;
    }

    /**
     * Sets the parsed (cleaned) text resulting from the message parse.
     * @param textParsed
     */
    public void setTextParsed(String textParsed) {
        this.textParsed = textParsed;
    }

    /**
     * Gets the list of TCP ports found appended to URLs in the HTML body of the message.
     * @return A list of String objects
     */
    public List getPorts() {
        return ports;
    }
    
    /**
     * Sets the list of TCP ports found appended to URLs in the HTML body of the message.
     * @param ports A list of String objects
     */
    public void setPorts(List ports) {
        this.ports = ports;
    }

    /**
     * Gets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
     * @return The number of occurrances
     */
    public int getSrcPortCount() {
        return srcPortCount;
    }

    /**
     * Sets the list of anchor or image src (or href) attributes which had alternate TCP ports appended.
     * @param srcPortCount The number of occurrances.
     */
    public void setSrcPortCount(int srcPortCount) {
        this.srcPortCount = srcPortCount;
    }

    /**
     * Gets the count of anchor tags whose text was URL text (eg http://...) .
     * but did not match the href attribute.
     * @return The number of occurrances.
     */
    public int getFalseAnchorCount() {
        return falseAnchorCount;
    }

    /**
     * Sets the count of anchor tags whose text was URL text (eg http://...) but did not match the href attribute .
     * @param falseAnchorCount The number of occurrances.
     */
    public void setFalseAnchorCount(int falseAnchorCount) {
        this.falseAnchorCount = falseAnchorCount;
    }

    /**
     * Gets the number of character obfuscation observations.
     * <br/>
     * These are instances where non ascii characters are used to obscure normal words.
     * @return The number of occurrances.
     */
    public int getObfuscatedCharacterCount() {

        if(recorder != null) {
            TokenErrorReport report = recorder.getReport();

            if(report instanceof CountTokenErrorReport) {
                return ((CountTokenErrorReport)report).getCount();
            }
        }

        return 0;
    }

    /*
     * (non-Javadoc)
     * @see org.jasen.interfaces.ParserData#getTokenErrorRecorder()
     */
    public TokenErrorRecorder getTokenErrorRecorder() {
        if(recorder == null) {
            synchronized(this) {
                if(recorder == null) {
                    recorder = new ObfuscatedCharacterTokenErrorRecorder();
                }
                notifyAll();
            }
        }
        return recorder;
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?