📄 logfileqsort.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:

       // hash table for storing all known transaction id strings
       Hashtable transactionIDHash = new Hashtable();
       long transactionIDCounter = 1; // note: numbering really starts w/ "1" !

       // read file data into memory structure
       MiningVector miningVector;
       String transactionIDString;

       while(processedStream.next())
       {
         miningVector = processedStream.read();

         // extract transaction id
         // transaction id is converted to a number using a hash table (all known tids are simply numbered)
         transactionIDString = new String(miningVector.toVector().elementAt(0).toString());

         // if transaction id is found in hash table, extract corresponding value
         // otherwise, t-id and new value are added to the hashtable
         if (transactionIDHash.containsKey(transactionIDString))
         {
           globalBlockSortList.transactionIDNumber = new Long(transactionIDHash.get(transactionIDString).toString()).longValue();
         } else
         {
           transactionIDHash.put(transactionIDString, new Long(transactionIDCounter));
           globalBlockSortList.transactionIDNumber = transactionIDCounter;
           transactionIDCounter++;
         }

         // get key of item id
         globalBlockSortList.itemIDkey = miningVector.getValue(1);

         globalBlockSortList.next = new GlobalBlockListClass(); // note: this yields a single unused element, but avoids an "if (ptr==null)" in the while loop
         globalBlockSortList = globalBlockSortList.next;

         globalBlockValidEntries++;
       }

       globalBlockSortList = anchor;
       anchor = null;

       // reserve continuous sort buffer in memory
       globalBlockSortArray = new GlobalBlockBaseClass[globalBlockValidEntries];

       // copy contents of list to sort buffer
       for (int i=0;i<globalBlockValidEntries;i++)
       {
         globalBlockSortArray[i] = new GlobalBlockBaseClass();
         globalBlockSortArray[i].itemIDkey = globalBlockSortList.itemIDkey;
         globalBlockSortArray[i].transactionIDNumber = globalBlockSortList.transactionIDNumber;
         globalBlockSortList = globalBlockSortList.next;
       }

       // this is the sorting part: a stable merge sort implementation is fed
       // w/ the data structure and a fitting comparator
       long start = ( new java.util.Date() ).getTime();
       Arrays.sort(globalBlockSortArray, new GlobalBlockComparator());
       long end = ( new java.util.Date() ).getTime();
       sortTimeTaken = end - start;

       // read cursor (cursor for providing data elements)
       globalBlockCursor = 0;
     } // end of initialisation

     // are there more vectors available in the block?
     if(globalBlockCursor<globalBlockValidEntries)
     {
       double instance[] = new double[3];

       // convert data from sorting block back to mining vectors
       // this is simple for the item-id, because it's a "stored" type and we have stored the key in memory
       // but a the transaction id needs to be generated anew, because it's an "unstored" type
       // (an besides there's no true reference available anyways)
       MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
       Category transactionCat = new Category( ""+globalBlockSortArray[globalBlockCursor].transactionIDNumber );
       double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
       instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;

       instance[1] = globalBlockSortArray[globalBlockCursor].itemIDkey;

       // numerical attribute is not yet initialised, still
       instance[2] = Category.MISSING_VALUE;

       nextMiningVector = new MiningVector(instance);
       nextMiningVector.setMetaData(processedStream.metaDataProcessed);

       nextTransactionID = globalBlockSortArray[globalBlockCursor].transactionIDNumber;

       globalBlockCursor++;

       return true;
     } else
       return false;
   }

   /**
    * next() implementation based on an algorithm that decomposes the data and processes it in equally sized chunks.<BR>
    * The data is processed sequentially in non-overlapping blocks, which induces
    * a certain amount of errors ("artificial" transactions); the advantage of
    * this algorithm is the linear runtime (vs. L) and constant amount of memory
    * required, so it can be applied to sources of any size.
    *
    * @throws MiningException
    * @return boolean
    */
   private boolean decompBlockNext() throws MiningException
   {
     // check if block contains data
     if (!decompBlockDataPresent)
     {
       decompBlockDataPresent = true;

       // uninitialised means we have to ensure, the input stream is ready for reading (start of stream)
       if (!decompBlockInitialised)
       {
         processedStream.reset();
         decompBlockInitialised = true;
       }

       decompBlockEntries = 0;
       decompBlockCursor = 0;

       Hashtable transactionIDHash = new Hashtable();

       // initialise TID counter w/ a value that will not clash w/ any earlier TID number
       int transactionIDCounter = decompBlockMaxTransactionNumber;

 //       System.out.println("memory " + Runtime.getRuntime().totalMemory()+ " free = " + Runtime.getRuntime().freeMemory());

       // read a block from file
       MiningVector miningVector;
       int i = 0;
       String transactionIDString;
       while ((decompBlockEntries<decompBlockBlockSize)&&(processedStream.next()))
       {
         decompBlockEntries++;

         miningVector = processedStream.read();

         // extract transaction id
         // transaction id is converted to a number using a hash table (all known tids are simply numbered)
         transactionIDString = new String(miningVector.toVector().elementAt(0).toString());

         if (transactionIDHash.containsKey(transactionIDString)) {
           decompBlockSortArray[i].transactionIDNumber = new Long(transactionIDHash.get(transactionIDString).toString()).intValue();
         }
         else {
           transactionIDCounter++;
           transactionIDHash.put(transactionIDString, new Long(transactionIDCounter));
           decompBlockSortArray[i].transactionIDNumber = transactionIDCounter;

           // in case of debugging "new" transaction id is checked versus global hashtable
           if(decompBlockDebug)
           {
             if (decompBlockDebugHashtable.containsKey(transactionIDString))
             {
               // in this case a wrong association happened
               decompBlockDebugWrongSingle++;
               // check if "wrong" session was already referenced
               if (!((Boolean)decompBlockDebugHashtable.get(transactionIDString)).booleanValue())
               {
                 decompBlockDebugHashtable.remove(transactionIDString);
                 decompBlockDebugHashtable.put(transactionIDString, new Boolean(true));
               } else
                 decompBlockDebugWrongMulti++;
             }
             else
               // also add transaction id to global hashtable
               decompBlockDebugHashtable.put(transactionIDString, new Boolean(false));
           }

         }
         // copy key of item
         decompBlockSortArray[i].itemIDkey = miningVector.getValue(1);
         i++;
       }

       // remember new "highest" transaction number
       decompBlockMaxTransactionNumber = transactionIDCounter;

       // reached end of file (we were unable to read any more records)
       if (decompBlockEntries==0)
         return false;

       // last block is typically incomplete, so padding is required as sorting algorithm does not accept comparator and limiting indices at once
       if(decompBlockEntries<decompBlockBlockSize)
         for (i=decompBlockEntries;i<decompBlockBlockSize;i++)
           decompBlockSortArray[i].transactionIDNumber = java.lang.Integer.MAX_VALUE;

       // sort data
       long start = ( new java.util.Date() ).getTime();
       Arrays.sort(decompBlockSortArray, new DecompBlockComparator());
       long end = ( new java.util.Date() ).getTime();
       sortTimeTaken += end - start;
     }

     // initialisation part complete; return data

     double instance[] = new double[3];

     // convert data from sorting block back to mining vectors
     MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
     Category transactionCat = new Category( ""+decompBlockSortArray[decompBlockCursor].transactionIDNumber );
     double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
     instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;

     instance[1] = decompBlockSortArray[decompBlockCursor].itemIDkey;

     // numerical attribute is not yet initialised, still
     instance[2] = Category.MISSING_VALUE;

     nextMiningVector = new MiningVector(instance);
     nextMiningVector.setMetaData(processedStream.metaDataProcessed);

     nextTransactionID = decompBlockSortArray[decompBlockCursor].transactionIDNumber;

     decompBlockCursor++;

     // check if current entry was the last available
     if(decompBlockCursor>=decompBlockEntries)
     {
       decompBlockDataPresent = false;
     }

     return true;
   }

   /**
    * next() implementation based on an algorithm that decomposes the data and processes it in a floating window.<BR>
    * The data is processed in a list (finite specified length, uses variable )
    *
    * @throws MiningException
    * @return boolean
    */
   private boolean decompNext() throws MiningException
   {
     MiningVector miningVector;
     DecompHashClass miningVectorStorageData;
     DecompListClass miningVectorListEntry;

       // read at least one entry into list
       // initially, list is filled up w/ until number of elements equals decompWindowSize
       // after init/ list size is always decompWindowSize-1, so one entry is read per cycle until p/.next() returns false
     while ((decompListEntries <= decompWindowSize)&&(processedStream.next()))
     {
       long start = ( new java.util.Date() ).getTime();
       // add entry to list

         // read mining vector
       miningVector = processedStream.read();
       decompListEntries++;

       // create new entry for list
       miningVectorListEntry = new DecompListClass();
       miningVectorListEntry.nextElement = null;
       miningVectorListEntry.itemIDkey = miningVector.getValue(1);

         // extract tid string
       String transactionIDString = miningVector.toVector().elementAt(0).toString();
       // identical - check which one is faster later
       // transactionIDString = ((CategoricalAttribute)miningVector.getMetaData().getMiningAttribute(0)).getCategory(miningVector.getValue(0)).getDisplayValue();

       // copy pointer to string - string is required for "reverse"-lookup in hashtable
       miningVectorListEntry.transactionIDString = transactionIDString;

       // if transaction id is found in hash table, extract corresponding object for storage
       // otherwise, t-id and new value are added to the hashtable
       if (decompHashtable.containsKey(transactionIDString))
       {
           // retrieves stored object
           miningVectorStorageData = (DecompHashClass)decompHashtable.get(transactionIDString);

           miningVectorListEntry.transactionIDNumber = miningVectorStorageData.transactionIDNumber;

           // update decompListLastElement if applicable
           if (decompListLastElement == miningVectorStorageData.lastElement)
             decompListLastElement = miningVectorListEntry;

             // set correct next element for current entry
           miningVectorListEntry.nextElement = miningVectorStorageData.lastElement.nextElement;

           // add miningVectorListEntry to list (list cannot be empty)
           // update [miningVectorStorageData.lastElement]
           miningVectorStorageData.lastElement.nextElement = miningVectorListEntry; // entry is going to be the "new" last entry in the list

           // update miningVectorStorageData.lastElement
           miningVectorStorageData.lastElement = miningVectorListEntry;
//           decompHashtable.put(transactionIDString, miningVectorStorageData);

           // - Optimize

           // we have to try to move all entries of the current transaction to the end of the list;
           // however, if entries from the current transaction have been "supplied" already,
           //   they can only be appended to where they belong
           // also, if current entry is already at the bottom of the list, there's nothing to do as well
           if ((nextTransactionID != miningVectorListEntry.transactionIDNumber)&&(decompListLastElement != miningVectorListEntry)&&(decompOptimize))
           {
             // fetch control entry of next transaction in list
             DecompHashClass nextControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.lastElement.nextElement.transactionIDString);

             // in case this is not the first transaction "part" in the list, the list needs to be corrected
             if (miningVectorStorageData.previousTransactionIDString != null)
             {
               // "exclude" the part with the current transaction entries from the list
               DecompHashClass preceedingControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.previousTransactionIDString);
               preceedingControlEntry.lastElement.nextElement = miningVectorStorageData.lastElement.nextElement;

               // update previousTransactionIDString of previously following transaction in list
               nextControlEntry.previousTransactionIDString = miningVectorStorageData.previousTransactionIDString;
             } else
             {
               // the current part was the first in the list, so the list anchor needs to be updated
               decompListFirstElement = miningVectorStorageData.lastElement.nextElement;

               // no more preceeding transactions in the list
               nextControlEntry.previousTransactionIDString = null;
             }

             // update previousTransactionIDString of current control entry
             miningVectorStorageData.previousTransactionIDString = decompListLastElement.transactionIDString;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -