⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 logfilerecognizer.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        // it means: find all lines with any symbols between '#' and ':' in the begining
        regularExpr = "^#\\w*:";
        // set field separator to devide directive line on two substring - directive name and directive value
        processor.setFieldSeparator( "[#:]" ); // ""[#:]""
        processor.addAction(regularExpr, new MatchAction()
        {
            @SuppressWarnings("unchecked")
			public void processMatch(MatchActionInfo info)
            {
                logFileDirectives.put( (String)info.fields.get(1) , (String)info.fields.get(2) );
            }
        });
        FileInputStream fis = new FileInputStream( file );
        processor.processMatches(fis, System.out);
        fis.close();
        //LogFileUtils.print( logFileDirectives );
        return logFileDirectives;
    }


    /**
     * Creates a numeric attribute (integer, > 0)
     * (attribute is generic in the given szenario)
     * <p>
     * @param attrMDS Mining Data Specification
     * @param attrName of Attribute to be added
     */
    private void  addGenericNumericAttribute(MiningDataSpecification attrMDS, String attrName)
    {
      NumericAttribute attribute = new NumericAttribute( attrName );
      attribute.setDataType(NumericAttribute.INTEGER);
      attribute.setLowerBound( 0 );
      attrMDS.addMiningAttribute( attribute );
    }

    /**
     * Creates a categoric attribute
     * <p>
     * @param attrMDS Mining Data Specification
     * @param attrName of Attribute to be added     *
     */
    private void  addCategoricAttribute(MiningDataSpecification attrMDS, String attrName)
    {
      CategoricalAttribute attribute = new CategoricalAttribute( attrName );
      // Modified by Kelvin Jor. UnstoredCategories cause problem in LogFileStream read line. So use UnboundedCategories instead
      //attribute.setUnstoredCategories(true);
      attribute.setUnboundedCategories(true);
      attrMDS.addMiningAttribute( attribute );
    }

    /**
     * Implementations of the W3C ELFF standard typically use the scheme "prefix(Header)"
     * for HTTP header field identifiers instead of the form "prefix-identifier".
     * There are also some ambiguities (due to optional prefixes).
     * The attribute names are checked and the issues mentioned are resolved,
     * which makes the naming scheme much more stringent.
     * <p>
     * @param attrName Name of attribute
     * @return New name of attribute
     */
    private String remapElffName(String attrName)
    {
        // change alternative scheme "prefix(Header)" to "prefix-identifier"
      attrName = attrName.replace('(','-').replace(')',' ').trim().toLowerCase();

        // remap to avoid ambiguity
      if (attrName.equalsIgnoreCase( "sc-bytes" ))
        return "bytes";

      return attrName;
    }


    /**
     * Returns meta data corresponding to recognized logfile.
     *
     * @return meta data of logfile
     */
    public MiningDataSpecification buildMiningDataSpecification(  )
    {
        MiningDataSpecification dataSpecification = new MiningDataSpecification();
        dataSpecification.setRelationName( getLogFileNameNoExtension() + " (" + this.getDescription() + ")" );
        fieldsNames = fieldsNames.trim();
        Vector splitNames = new Vector();
        perl.split( splitNames, fieldsNames );
        int splitNamesSize = splitNames.size();
        String name;
        for(int i = 0; i < splitNamesSize; i++)
        {
            name = (String)splitNames.get(i);

            // ensure name conforms to naming scheme
            name = remapElffName(name);

            // map a few interesting identifiers manually
            if( name.equalsIgnoreCase( "sc-status" ) )
              addGenericNumericAttribute(dataSpecification, name);
            else
            if( name.equalsIgnoreCase( "bytes" ) )
              addGenericNumericAttribute(dataSpecification, name);
            else
            if( name.equalsIgnoreCase( "cs-bytes" ) )
              addGenericNumericAttribute(dataSpecification, name);
            else
            if( name.equalsIgnoreCase( "time-taken" ) )
              addGenericNumericAttribute(dataSpecification, name);
            else
            if( name.equalsIgnoreCase( "sc-win32-status" ) )
              addGenericNumericAttribute(dataSpecification, name);
            else
              addCategoricAttribute(dataSpecification, name);
        }
        return dataSpecification;
    }

    /**
     * Create log file fields string for the unrecognized log file
     * <p>
     * @return String instance containing log file fields.
     */
    private String buildLogFileFieldsUnrecognized() {

        String fieldsNames = "";
        for( int i = 0; i < logFileUnrecognizedFields; i++ )
        {
            fieldsNames = fieldsNames + "column" + i + " ";
        }
        return fieldsNames;
    }

    /**
     * Check log file header to recognize if there are some special fields.
     * <p>
     * @param logFileDirectives Log file directives and their values.
     * @return int containing log file type.
     */
    private int checkLogFileDirectives( Hashtable logFileDirectives )
    {
        Vector directiveFields = new Vector();

        // check if directives contain string "#Fields:"
        if(logFileDirectives != null && logFileDirectives.containsKey(DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT))
        {
            perl.split( directiveFields, "/ /", (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT ) );
            // if log file is corrupted.
            if(directiveFields.isEmpty())
            {
                return NO_DIRECTIVES_LOG_FILE_FORMAT;
            }
            else
            {
                // Check if the fields list contains special symbol that specifies meta log file format:
                if ( search( directiveFields, LogFileRecognizer.ZSOFT_META_LOG_FILE_FORMAT_FIELDNAME ) )
                {
                    logFileSeparator = "/[;]+/";
                    fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
                    description = "ZSoft Meta Log File Format";
                    return ZSoft_Meta_Log_File_Format;
                };

                // Check if the fields list contains special symbol that specifies shop log file format:
                if( search( directiveFields, SHOP_LOG_FILE_FORMAT_FIELDNAME ) )
                {
                    logFileSeparator = "/[ ]+/";
                    fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
                    description = "Shop Log File Format";
                    return Shop_Log_File_Format;
                }
                else
                {
                    logFileSeparator = "/[\t]+/";
                    fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
                    description = "W3C Extended Log File Format";
                    return Extended_Log_File_Format;
                }
            }
        }
        else
        {
            return NO_DIRECTIVES_LOG_FILE_FORMAT;
        }
    }

    /**
     * Check log file header to recognize if there are some special fields.
     * <p>
     * @param pattern Pattern to find in the log file.
     * @param file Log file.
     * @return boolean containing true, if there are at least one line matches this pattern.
     * @throws MalformedPatternException Throws if there are some problems while recognizing a log file.
     * @throws IOException Throws if there are some problems while readind from log file.
     * @throws FileNotFoundException Throws if cann't find log file.
     */
    private boolean checkSpecificPatternsInLogFile( String pattern, File file ) throws MalformedPatternException, FileNotFoundException, IOException
    {
        MatchActionProcessor processor = new MatchActionProcessor();
        logFileCounter = 0;
        processor.addAction(pattern, new MatchAction()
        {
            public void processMatch(MatchActionInfo info)
            {
                logFileCounter++;
            }
        });
        FileInputStream fis = new FileInputStream( file );
        byte[] buf = new byte[ bytesToRecognize ];
        fis.read( buf );
        fis.close();
        ByteArrayInputStream bais = new ByteArrayInputStream( buf );
        processor.processMatches( bais, System.out );
        bais.close();
        return logFileCounter > 0 ? true:false;
    }

    /**
     * Searches for string in the vector.
     * <p>
     * @param v The vector where search.
     * @param s The string to search for.
     * @return boolean containing true if vector includes this string, false otherwise.
     */
    private static boolean search(Vector v, String s)
    {
        Enumeration em = v.elements();
        String temp = null;
        while(em.hasMoreElements())
        {
            temp = (String)em.nextElement();
            if(temp.equals(s))
            {
                return true;
            }
        }
        return false;
    }

    /**
     * Returns number of bytes required to recognize logfile.
     *
     * @return number of bytes required to recognize logfile
     */
    public int getBytesToRecognize()
    {
        return bytesToRecognize;
    }

    /**
     * Returns logfile type.
     *
     * @return logfile type
     */
    public int getLogFileType()
    {
        return logFileType;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -