⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 logfileqsort.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
             // mark the new end of the list and add the "floating" part to the end of the list
             miningVectorStorageData.lastElement.nextElement = null;
             decompListLastElement.nextElement = miningVectorStorageData.firstElement;

             // update entry marking the end of the list
             decompListLastElement = miningVectorStorageData.lastElement;
           }

       } else // if (decompHashtable.containsKey(transactionIDString))
       {
           // create new control entry for hash table
           miningVectorStorageData = new DecompHashClass();

           miningVectorListEntry.transactionIDNumber = decompTransactionIDCounter;
           miningVectorStorageData.transactionIDNumber = decompTransactionIDCounter;

           // append miningVectorListEntry to list
           if (decompListLastElement != null)  // list empty?
           {
             // remember the transaction ID number of the previously saved transaction
             miningVectorStorageData.previousTransactionIDString = decompListLastElement.transactionIDString;

             decompListLastElement.nextElement = miningVectorListEntry;
             decompListLastElement = miningVectorListEntry;
           } else
           {
             // no previous transaction
             miningVectorStorageData.previousTransactionIDString = null;

             decompListFirstElement = miningVectorListEntry;
             decompListLastElement = miningVectorListEntry;
           }

           // add control entry to hash table (w/ initialised ptrs)
           miningVectorStorageData.firstElement = miningVectorListEntry;
           miningVectorStorageData.lastElement = miningVectorListEntry;
           decompHashtable.put(transactionIDString, miningVectorStorageData);

           decompTransactionIDCounter++;
       }

       long end = ( new java.util.Date() ).getTime();
       sortTimeTaken += end - start;
     } // while ((decompListEntries <= decompWindowSize)&&(processedStream.next()))

// -------------------  end of init part ------------------------------------

       // if list is empty, this means there are no more entries available
       // (local list-"buffer" is empty and p/.next() returned false)
     if (decompListEntries == 0)
       return false;

     double instance[] = new double[3];

     // deliver first entry in list

     // convert data from sorting block back to mining vectors
     MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
     Category transactionCat = new Category( ""+decompListFirstElement.transactionIDNumber);
     double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
     instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;

     instance[1] = decompListFirstElement.itemIDkey;

     // numerical attribute is not yet initialised, still
     instance[2] = Category.MISSING_VALUE;

     nextMiningVector = new MiningVector(instance);
     nextMiningVector.setMetaData(processedStream.metaDataProcessed);

     nextTransactionID = decompListFirstElement.transactionIDNumber;

     // delete entry - update structures
     decompListEntries--;

     // Update Hashtable (update first element, if applicable delete whole hashtable entry)
       // retrieve entry from hashtable
     miningVectorStorageData = (DecompHashClass)decompHashtable.get(decompListFirstElement.transactionIDString);

     // - Update Hashtable

     // check if current entry was last one of transaction recorded in hashtable
     if (miningVectorStorageData.firstElement != miningVectorStorageData.lastElement)
     {
       // no, only update ptr to first entry
       miningVectorStorageData.firstElement = miningVectorStorageData.firstElement.nextElement;
     } else
     {
        // before deleting hash table entry update control entry referencing it
        if (miningVectorStorageData.lastElement.nextElement != null)
        {
          DecompHashClass nextControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.lastElement.nextElement.transactionIDString);
          nextControlEntry.previousTransactionIDString = null;
        }

        // now eliminate whole hashtable entry
        decompHashtable.remove(decompListFirstElement.transactionIDString);
     }

     // - Update decompListFirstElement
     decompListFirstElement = decompListFirstElement.nextElement;

     return true;
   }



   /**
    * The next() function is overwritten - this function only dispatches the call
    * depending on the sorting method selected, so effectively the interface is
    * always the same, no matter what algorithm is used.
    *
    * @throws MiningException
    * @return boolean
    */
   public boolean next() throws MiningException
    {
      nextMiningVectorUpdated = true;

      long start = ( new java.util.Date() ).getTime();

      boolean nextResult;
      switch (sortingMethod)
      {
        case METHOD_TRIVIAL:
          nextResult = trivialNext();
          break;

        case METHOD_GLOBALBLOCK:
          nextResult = globalBlockNext();
          break;

        case METHOD_DECOMPBLOCK:
          nextResult = decompBlockNext();
          break;

        case METHOD_DECOMP:
          nextResult = decompNext();
          break;

        default:
          throw new MiningException("Function next(): Unknown sorting method requested (" + sortingMethod + ").");
      }

        // add time taken
      totalTimeTaken += (( new java.util.Date() ).getTime() - start);

      return nextResult;
    }


    /**
     * Reset the (sorted) data stream
     *
     * @throws MiningException
     */
    public void reset() throws MiningException
    {
      processedStream.reset();

      // initialise new session
      itemIndexCurrent = 1;
      transactionIDLastSeen = -1;

      setSortingMethod(sortingMethod);
    }

    /**
     * Recognize log file. This is done autonomously (triggered by a preprocessor class used).
     * Calling this function will always result in an exception
     *
     * @return -dummy-, exception is always thrown
     * @throws MiningException (never thrown)
     */
    public MiningDataSpecification recognize() throws MiningException
    {
      throw new MiningException("Not implemented");
    }

    /**
     * Open logfile for reading.
     * This function doesn't do anything, because the preprocessor used by this
     * class handles the underlying logstream autonomously.
     *
     * @throws MiningException (never thrown)
     */
    public void open() throws MiningException
    {
    }

    /**
     * Close logfile.
     * This function doesn't do anything, because the preprocessor used by this
     * class handles the underlying logstream autonomously.
     *
     * @throws MiningException (never thrown)
     */
    public void close() throws MiningException
    {
    }

    /**
     * Function always throws exception.
     *
     * @throws MiningException (always thrown)
     */
    public void findPhysicalModel() throws MiningException
    {
      throw new MiningException("findPhysicalModel() is not implemented.");
    }

    /**
     * Function always throws exception.
     *
     * @throws MiningException (always thrown)
     * @return org.omg.cwm.analysis.transformation.TransformationMap
     */
    public org.omg.cwm.analysis.transformation.TransformationMap getPhysicalToLogicalModelTransformation() throws MiningException
    {
      throw new MiningException("getPhysicalToLogicalModelTransformation() is not implemented.");
    }


    /**
     * Returns supported stream methods
     * Only "reset" is supported
     *
     * @return supported stream methods
     */
    public Enumeration getSupportedStreamMethods()
    {
      Vector suppmeth = new Vector();
      suppmeth.addElement("reset");
      return suppmeth.elements();
    }


    public static void main(String[] args)
     {

       try {
//       String fileName = "config\\config.properties";
//       String fileName = "data\\logs\\NCSA Combined Log File Format.log";
//       String fileName = "data\\logs\\NCSA Common Log File Format.log";
         String fileName = "data\\logs\\WebShopNCSA.log";
//       String fileName = "data\\logs\\Extended Log File Format.log";
//       String fileName = "data\\logs\\IIS Log File Format.log";
//       String fileName = "data\\logs\\Intershop Log File Format.log";
//       String fileName = "data\\logs\\Shop Log File Format.log";
//       String fileName = "data\\logs\\UNRECOGNIZED.log";

         LogFileQSort converter = new LogFileQSort(fileName);

         // configure "parsing tree"
         LogFileQParse.LogFileQParseAction extrquote = converter.getPreprocessor().addParseActionExtract(null, true, true, true, LogFileQParse.PARSE_EXTRACT,1,'\"',1,'\"');
         LogFileQParse.LogFileQParseAction extrtransac = converter.getPreprocessor().addParseActionPerl(extrquote, true, true, false, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/[0-9]{4,10}\\-[0-9]{8}/");
         LogFileQParse.LogFileQParseAction extritem21 = converter.getPreprocessor().addParseActionPerl(extrtransac, false, false, true, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/\\.cgi\\?id=[0-9]{2,20}&p=3/");
         LogFileQParse.LogFileQParseAction extritem22 = converter.getPreprocessor().addParseActionExtract(extritem21, true, false, true, LogFileQParse.PARSE_EXTRACT,1,'=',1,'&');
         LogFileQParse.LogFileQParseAction extritem11 = converter.getPreprocessor().addParseActionPerl(extritem21, false, false, true, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/3\\-[0-9]{2,20}\\-[1-9].html/");
         LogFileQParse.LogFileQParseAction extritem12 = converter.getPreprocessor().addParseActionExtract(extritem11, true, false, true, LogFileQParse.PARSE_EXTRACT, 1,'-',1,'-');

     //  for IIS format
/*         LogFileQParse.LogFileQParseAction extrtransac = converter.getPreprocessor().addParseActionPrefix(null, true, true, false, LogFileQParse.PARSE_PREFIX,',');
         LogFileQParse.LogFileQParseAction extrquote = converter.getPreprocessor().addParseActionExtract(extrtransac, false, false, true, LogFileQParse.PARSE_EXTRACT,1,'/',1,',');
*/

         int method = METHOD_DECOMP;
         converter.setSortingMethod(method);
         //converter.decompOptimize = false;
         converter.setBlockSize(10000);
         converter.setItemIDType( CategoricalAttribute.INTEGER );  // items as integers

         System.out.println("Analysing...");

         // overall timer - time taken for parsing AND sorting is measured
         long start = ( new java.util.Date() ).getTime();

         MiningVector miningVector;
         MiningDataSpecification metaData = converter.getMetaData();
         CategoricalAttribute itemId = (CategoricalAttribute) metaData.getMiningAttribute(1);
         System.out.println("log meta Data: " + metaData);
         long i=0;
         while( converter.next() )
         {
           i++;
           miningVector = converter.read();
           converter.read();

           if (i<100) {    // do not output long logs to screen
             System.out.print(miningVector);

             // Check and show item type
             double ival  = miningVector.getValue(1);
             Category cat = itemId.getCategory(ival);
             String itype = "string";
             if ( cat.getValue() instanceof Integer )
               itype = "int";
             System.out.println(" =>   item = " + cat + " itype = " + itype);
           }
         }

         if (i>=100)    // do not output ultra-long logs to screen
            System.out.print("\nLast Vector:\n" +  converter.read());

         long end = ( new java.util.Date() ).getTime();
         long programTime = end - start;

         System.out.println("\nLog reading was finished.\n");
         System.out.println("Lines Total: " +  converter.processedStream.numLinesCount + ", invalid lines: " + converter.processedStream.numLinesInvalid);
         System.out.println("\nSorting took " + converter.sortTimeTaken + " ms, reading and parsing took "+(converter.totalTimeTaken - converter.sortTimeTaken) +  " ms, total runtime = " + programTime + " ms.\n");
         if (LogFileQSort.decompBlockDebug)
             System.out.println("Wrong (artificial) transactions: " + converter.decompBlockDebugWrongSingle + ", appearing in multiple blocks: " + converter.decompBlockDebugWrongMulti);
         if (method == METHOD_DECOMP)
             System.out.println("TID Counter: " + (converter.decompTransactionIDCounter-1));
       }
       catch (MiningException ex) {
         ex.printStackTrace();
       }
     }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -