⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aprioriall.java

📁 为了下东西 随便发了个 datamining 的源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
   *
   * @param array input data
   * @param value for search
   * @return pos in array
   * @author Alexey Grinyuk
   */
  private int findInArray(int array[][], int value) {
    int start = 0;
    int end = array.length-1;
    int pos;

    if(end == -1) return -1;

    while(start + 1 < end) {
      pos = (end - start) / 2 + start;

      if(array[pos][0] < value) {
        start = pos + 1;
      } else if(array[pos][0] > value) {
        end = pos - 1;
      } else return pos;
    }

    if(array[start][0] == value) return start;
    if(array[end][0] == value) return end;
    return -1;
  }


  /**
   * Compare 2 integer arrays
   *
   * @param array1 input data
   * @param array2 input data
   * @param length number items for compare
   * @return array1 == array2
   * @author Alexey Grinyuk
   */
  private boolean arrayCompare(int array1[], int array2[], int length) {
    int i;
    for(i=0; i<length; i++) {
      if(array1[i] != array2[i]) return false;
    }
    return true;
  }


  /**
   * Global variable L[][]  And  TidTable[][] are parameters this recursive function
   */
  private void createAllLk() {
    int i, j, counter, numberOfAllChildren;

    int LLength = L.length;
    if(LLength < 2) return;
    int LWidth = L[0].length;

    int firstChild[] = new int[LLength];
    int lastChild[] = new int[LLength];
    int childSupp[];
    int toLargeChild[];
    int largeChildren[][];
    int largeChildSupp[];

    numberOfAllChildren = createFirstAndLastChildArrays(L, /*Out*/ firstChild, /*Out*/ lastChild);

    // Allocate memory...
    childSupp = new int[numberOfAllChildren];     // ...for Support of all children
    toLargeChild = new int[numberOfAllChildren];  // ...for associate table  (All Children -> Large Children)
    for(i=0; i<numberOfAllChildren; i++) { // Initialize memeory childLastCustomer
      //childSupp[i] = 0;
      toLargeChild[i] = -1;
    }

    TIDTable = calculateSupportAndBuildNewTIDTable(TIDTable, firstChild, lastChild, /*Out*/ childSupp);

    // Calculate number of large children and
    // build associate table (All Children -> Large Children)
    counter = 0;
    for(i=0; i<childSupp.length; i++)
      if(childSupp[i] >= m_minSuppInt) toLargeChild[i] = counter++;

    // Allocate memory...
    largeChildren = new int[counter][];  //...for array of large children
    largeChildSupp = new int[counter]; //...for supports of large children

    buildArrayOfLargeChildren(L, firstChild, lastChild, childSupp, /*Out*/ largeChildren, /*Out*/ largeChildSupp);

    // Mark little itemsets in TIDTable as removed
    for(i=0; i<TIDTable.length; i++) {
      if(TIDTable[i] != null)
        for(j=0; j<TIDTable[i].length; j++)
          TIDTable[i][j] = toLargeChild[ TIDTable[i][j] ];
    }

    // Add Lk to general result
    allLargeItemsets.add(largeChildren);
    allLargeItemsetsSupp.add(largeChildSupp);
    allTIDs.add(TIDTable);

    L = largeChildren;

    firstChild = null;
    lastChild = null;
    childSupp = null;
    toLargeChild = null;
    largeChildren = null;
    largeChildSupp = null;

    System.out.println("|L" + (LWidth + 1) + "| = " + L.length);
    if( m_maxItemSize == LWidth + 1 ) return;
    createAllLk(); // recursive call
  }


  /**
   * @param L large itemsets
   * @param firstChild array of first children
   * @param lastChild array of last children
   * @return number of all children
   * @author Alexey Grinyuk
   */
  private int createFirstAndLastChildArrays(int L[][], /*Out*/ int firstChild[], /*Out*/ int lastChild[]) {
    int i, j;
    int counter;
    int LLength = L.length;
    if(LLength < 2) return 0;
    int LWidth = L[0].length;

    for(i=0; i < LLength; i++) {
      firstChild[i] = -1;
      lastChild[i] = -1;
    }

    // Generate ordinal numbers for first and last child of each item L (firstChild and lastChild)
    counter = 0;
    for(i=0; i < LLength-1; i++) { // for each itemset in L (parent1)
      for(j=i+1; j < LLength; j++) { // for each itemset in L (parent2)
        if( arrayCompare(L[i], L[j], LWidth-1) ) { // if parent1 and parent2 can bear a child
          if(firstChild[i] == -1) firstChild[i] = counter; // first child of parent1
          lastChild[i] = counter; // last child of parent1
          counter++;
        } else break;
      }
    }

    return counter;
  }


  /**
   * @param TIDTable association table: Transaction ID -> large itemsets list
   * @param firstChild array of first children
   * @param lastChild array of last children
   * @param childSupp supports for all children
   * @return new TIDTable
   * @author Alexey Grinyuk
   */
  private int[][] calculateSupportAndBuildNewTIDTable(int TIDTable[][],
                                                      int firstChild[],
                                                      int lastChild[],
                                                      int childSupp[])
  {
    int i, TID, pos1, pos2;
    int parent1, parent2;
    int lastParent;
    int child;

    IntVector temp = new IntVector();
    int tempSize;
    int newTIDTable[][] = new int[TIDTable.length][];
    int newTIDTableCounter = 0;
    int rezult[][];
    int childLastCustomer[];

    // Allocate and initialize memory for last customers which increments child supp
    childLastCustomer = new int[childSupp.length];
    for(i=0; i<childLastCustomer.length; i++) childLastCustomer[i] = -1;

    for(TID=0; TID<TIDTable.length; TID++) {  // for each transaction in TIDTable
      if(TIDTable[TID] == null) continue;

      // for each itemset associated with transaction (for each parent1)
      for(pos1=0; pos1 < TIDTable[TID].length-1; pos1++) {
        parent1 = TIDTable[ TID ][ pos1 ];

        if( parent1 == -1 ) continue;
        if( firstChild[ parent1 ] == -1 ) continue;

        //  ordinal number (index in L[]) of last parent2
        lastParent = parent1 + (lastChild[ parent1 ] - firstChild[ parent1 ] + 1);

        // for each parent2
        for(pos2 = pos1+1; pos2 < TIDTable[TID].length && TIDTable[TID][pos2] <= lastParent; pos2++) {
          parent2 = TIDTable[ TID ][ pos2 ];

          if(parent2 != -1) {  // if cell of TIDTable[TID] not marked as removed
            // ordinal number (index in childSupp[]) of child that born parent1 and parent2:
            child = firstChild[ parent1 ] + (parent2 - parent1 - 1);
            if(childLastCustomer[child] != CustomerID[TID]) {
              childSupp[child]++;
              childLastCustomer[child] = CustomerID[TID];
            }
            temp.addElement(child); // build row of new TIDTable
          }
        }

      }
      tempSize = temp.size(); // number of itemsets assosiated with transaction
      if(tempSize > 0) {
        // allocate memory for new row of new TIDTable
        newTIDTable[TID] = new int[tempSize];
        for(i=0; i<tempSize; i++) newTIDTable[TID][i] = temp.IntegerAt(i);
        temp.clear();
      }
    }

    return newTIDTable;
  }


  /**
   * @param L large itemsets
   * @param firstChild array of first children
   * @param lastChild array of last children
   * @param childSupp support for all children
   * @param largeChildren array of large children
   * @param largeChildSupp support for large children
   * @author Alexey Grinyuk
   */
  private void buildArrayOfLargeChildren(int L[][],
                                         int firstChild[],
                                         int lastChild[],
                                         int childSupp[],
                                         int largeChildren[][],
                                         int largeChildSupp[])
  {
    int parent1, parent2;
    int child;
    int counter;

    int LLength = L.length;
    int LWidth = L[0].length;

    counter = 0;
    // for each itemset in L (for each parent1)
    for(parent1=0; parent1<LLength; parent1++) {
      // if this itemset have children
      if(firstChild[parent1] != -1) {
        // for all children of itemset
        for(child = firstChild[parent1]; child <= lastChild[parent1]; child++) {

          if(childSupp[child] >= m_minSuppInt) { // if itemset is large
            largeChildSupp[counter] = childSupp[child];
            largeChildren[counter] = new int[LWidth + 1];

            // get parent2 of child
            parent2 = parent1 + (child - firstChild[parent1] + 1);
            // create child itemset
            System.arraycopy(L[parent1], 0, largeChildren[counter], 0, LWidth);
            largeChildren[counter][LWidth] = L[parent2][LWidth - 1];

            counter++;
          }
        }
      }
    }
  }



  //               << By Victor Borichev>>

  private CustomTransSet convertMiningminingInputStream() throws MiningException {
    
    fireMiningEvent(new ReadingBeginMessage(getAlgorithmLevel()));
    
    CustomTransSet cts = new CustomTransSet();
    Hashtable customers = new Hashtable();

    int rows = 0;
    
    while(miningInputStream.next())
    {
      MiningVector vector = miningInputStream.read();
// +++++++++ Invalid vector => ignore:
      if (vector == null)
        continue;
// --------- Invalid vector, ignore.

      double custId = vector.getValue(customerId);
      double transPos = vector.getValue(transactionPosition);
      double itId = vector.getValue(itemId);

      // Missing value => ignore line:
      if ( Category.isMissingValue(custId) || Category.isMissingValue(transPos) || Category.isMissingValue(itId) )
        continue;

      Double value = new Double(custId); // customer id
      Hashtable sequence = (Hashtable)customers.get(value);  // its transactions
      if(sequence == null)
      {
        sequence = new Hashtable();
        customers.put(value,sequence);
      }
      Integer pos = new Integer((int)transPos);
      ItemSet transaction = (ItemSet)sequence.get(pos);
      if(transaction == null)
      {
        transaction = new ItemSet();
        sequence.put(pos,transaction);
      }
      int item = (int)itId;
      transaction.addItem(item);
      
      rows++;
    }
    Enumeration cids = customers.keys();
    while(cids.hasMoreElements()) {
      Double cid = (Double)cids.nextElement();
      Hashtable sequence = (Hashtable)customers.get(cid);
      int size = sequence.size();
      int[] poss = new int[size];
      ItemSet[] itemsets = new ItemSet[size];
      Enumeration em = sequence.keys();
      int i = 0;
      while(em.hasMoreElements()) {
        Integer pos = (Integer)em.nextElement();
        poss[i] = pos.intValue();
        itemsets[i++] = (ItemSet)sequence.get(pos);
      }
      // sorting transactions by position
      int j,min,element;
      for(i=0;i<size;i++)
      {
        min = poss[i]; element = -1;
        for(j=i+1;j<size;j++)
          if(poss[j]<min)
          {
            min = poss[j];
            element = j;
          }
        if(element!=-1)
        {
          int iswap = poss[i];
          poss[i] = poss[element];
          poss[element] = iswap;
          ItemSet oswap = itemsets[i];
          itemsets[i] = itemsets[element];
          itemsets[element] = oswap;
        }
      }
      TransactionSet ts = new TransactionSet();
      for(i=0;i<size;i++)
        ts.addTransaction(itemsets[i]);
      cts.addCustomerTransSet(ts);
    }

    fireMiningEvent(new ReadingEndMessage(rows, getAlgorithmLevel()));
    
    return cts;
  }

  /**
   * AprioriAll. If a customerId, transactionPosition, or itemId has
   * a missing value, the (customerId, transactionPosition, itemId)-tuple
   * is ignored.
   */
  public void runAlgorithm() throws MiningException
  {
    int i,j,k,l,m,n,q,w;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -