⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aprioriall.java

📁 为了下东西 随便发了个 datamining 的源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    // noprune
    if(doPruning)
      Bk = new boolean[Lk.length];
    for(i=k=m=0;i<Lk.length;i++)  // for each k-sequence
    {
      if(boundsLk[i][0]==-1) {  // if doesn't make any candidate
        Sk[i][0] = -1;
        continue;   // to next k-sequence
      }
      Sk[i][0] = k;
      n = boundsLk[i][1] - boundsLk[i][0];  // number of parents it can be merged with
      for(j=0;j<=n;j++)
      {
        l = boundsLk[i][2]+j; // candidate number
        if(suppCk[l]>=m_minSuppInt)
        {
          newLk[k] = new int[3];
          newLk[k][0] = i; newLk[k][1] = j+boundsLk[i][0]; newLk[k][2] = suppCk[l];
          suppCk[l] = -m-1;
          // noprune
          if(doPruning)
          {
            Bk[i] = true; // mark as not maximal
            Bk[boundsLk[i][0] + j] = true;
          }
          k++;
        } else m++;
      }
      if(k!=Sk[i][0]) Sk[i][1] = k-1;
      else Sk[i][0] = -1;
    }
    allLargeSequences.add(Lk);  // add to results
    // noprune
    if(doPruning)
      allBk.add(Bk);
    Lk = newLk;
    System.out.print("lits");  // debug
    tidTable = checkTIDs(newTidTable,suppCk); // correct tidTable
    if(debug>1)
    {
      System.out.println("Lk"+nStep);
      printLk(Lk);
      System.out.println("TID"+nStep);
      printTIDs(tidTable);
    }
  }

  allLargeSequences.add(Lk);  // add to results last Lk
  allBk.add(null);

  if(debug>0)
  {
    System.out.println("Maximum customer sequence length "+maxCustSeqLen);
    System.out.println("Maximum tid table size was "+maxTidSize);
  }

    saveResults();
  }

  /**
   * Corrects tidTable: indices in Ck -> indices in Lk
   * @param lits new generated tidTable
   * @param supp support counters of candidates
   * @return corrected tidTable as an arrays
   */
  private int[][][] checkTIDs(int[][][] lits, int[] supp)
  {
    int i,j,k,l,m,n,p,q;
    // number of non-empty customers
    for(i=n=0;i<lits.length;i++) if(lits[i]!=null) n++;
    int[][][] newTID = new int[n][][];
    for(i=k=0;i<lits.length;i++)  // for each customer
    {
      if(lits[i]==null) continue;
      // actual number of rows
      for(j=n=0;j<lits[i].length;j++) if(lits[i][j]!=null) n++;
      newTID[k] = new int[n][];
      for(j=m=0;j<lits[i].length;j++)
      {
        if(lits[i][j]==null) continue;
        // number of supported candidates in row
        for(p=n=0;p<lits[i][j].length;p++) if(supp[lits[i][j][p]]<0) n++;
        if(n==0) continue;
        if(maxTidSize<n) maxTidSize = n;  // for debug
        newTID[k][m] = new int[n];  // new row
        for(p=q=0;p<lits[i][j].length;p++)
        {
          l = lits[i][j][p];  // element of old row
          // supp[l] must be < 0 for supported candidate cause it contains -m-1
          if(supp[l]<0) newTID[k][m][q++] = lits[i][j][p] + supp[l] + 1;
        }
        java.util.Arrays.sort(newTID[k][m]);
        m++;
      }
      k++;
    }
    return newTID;
  }

  private int findStart(int[] arr, int lo, int hi)
  {
    if(hi<arr[0]||lo>arr[arr.length-1]) return -1;
    if(lo<arr[0]) return 0;
    int p = java.util.Arrays.binarySearch(arr,lo);
    if(p<0) p = -p-1;
    return p;
  }

/*
  private void addResult(CustomItemSetList cisl, int[] ab, int supp)
  {
    int i;
    CustomSequence cs = new CustomSequence();
    for(i=0;i<ab.length;i++)
      cs.addItemSet(allLits[ab[i]]);
    cs.setSupportCount(supp);
    cisl.addItemSetList(cs);
  }
*/

/**
 * Save results in specified way
 * process each large k-sequence begin from L2
 * excluding sequences which are parents of L(k+1) sequences as it's marked in Bk
 * and exclude large sequences contained in other large sequences
 */
private Vector saveResults()
{
  int i,j,l,k,c;
  int[][] Lk;
  boolean[] Bk = null;
  Vector cisl = new Vector(); // result sequences list
  Vector all = new Vector();
  for(i=0;i<allLits.length;i++) // for litemset
  {
    // noprune
    if(doPruning)
    {
      if(B1!=null&&B1[i]) continue; // if it make and 2-sequence exclude it
      for (j = 0; j < allLits.length; j++) // for each litemset except for same

        // i-th litemsets contained in j-th
        if (i != j && subset(allLitsArr[i], allLitsArr[j]))
          break;
      if (j != allLits.length)
        continue; // take next i-th litemset
    }
    int[] l1 = new int[2];  // 1-sequence with support
    l1[0] = i; l1[1] = allLitsSupp[i];
    all.add(l1);
  }
  if(debug>0) System.out.println("L1 done: "+all.size());
  if(allLargeSequences.size()!=0) // if there is any L2 and more sequences
  {
    Lk = (int[][])allLargeSequences.get(0); // take L2 sequences
    // noprune
    if(doPruning)
      Bk = (boolean[])allBk.get(0); // take L2's mask
    int[][] ab = new int[Lk.length][3]; // sequences as arrays indices in litemsets array
    Vector v = new Vector();
    for(i=0;i<Lk.length;i++)
    {
      ab[i][0] = Lk[i][0];  // copy to ab
      ab[i][1] = Lk[i][1];
      ab[i][2] = Lk[i][2];  // support
      if(Bk==null||!Bk[i])  // if mask exists and L2[i] isn't marked as not maximal
        v.add(ab[i]);
    }
    // noprune
    if(doPruning)
    {
      // contains all sequences not ejected at previous step
      int[][] arr = new int[v.size()][];
      for (i = 0; i < v.size(); i++)
        arr[i] = (int[]) v.get(i);
      sweep(all, arr); // sweep all k-sequences which contained in other k-sequences
    }
    else all.addAll(v);

    if(debug>0) System.out.println("L2 done: "+all.size());
    int[][] newab;  // new (k+1)-sequences maked from k-sequences
    for(l=1;l<allLargeSequences.size();l++) // for all Lk begin from L3
    {
      Lk = (int[][])allLargeSequences.get(l); // take pairs of parents
      // noprune
      if(doPruning)
        Bk = (boolean[])allBk.get(l); // take Lk's mask

      newab = new int[Lk.length][l+3];  // (k+1)-sequences as arrays of indices
      v.clear();
      for(i=0;i<Lk.length;i++)
      {
        System.arraycopy(ab[Lk[i][0]],0,newab[i],0,l+1); // first k elements
        newab[i][l+1] = ab[Lk[i][1]][l];  // last k+1 element
        newab[i][l+2] = Lk[i][2]; // support
        if(Bk==null||!Bk[i])
          v.add(newab[i]);
      }
      // no prune
      if(doPruning)
      {
        int[][] arr = new int[v.size()][];
        for (i = 0; i < v.size(); i++)
          arr[i] = (int[]) v.get(i);
        sweep(all, arr);
      }
      else all.addAll(v);

      ab = newab;
      if(debug>0) System.out.println("L"+(l+2)+" done: "+all.size());
    }
  }
  // all large sequences which are not ejected yet
  int[][] allArr = new int[all.size()][];
  for(i=0;i<all.size();i++)
    allArr[i] = (int[])all.get(i);
  all = null;
  // noprune
  boolean bAdd = true;
  for(i=0;i<allArr.length;i++)  // for each sequence
  {
    // noprune
    if(doPruning)
    {
      for (j = 0; j < allArr.length; j++)
        if (i != j) { // for each another sequence
          // check if i-th sequence contained in j-th sequence
          for (l = k = 0;
               k < allArr[j].length - 1 && l < allArr[i].length - 1; k++)
            if (subset(allLitsArr[allArr[i][l]], allLitsArr[allArr[j][k]]))
              l++;
          if (l == allArr[i].length - 1)
            break; // contained
        }
      if (j == allArr.length) { // not contained => add to result
        bAdd = true;
      } else bAdd = false;
    }
    if(bAdd) {
      CustomSequence cs = new CustomSequence();
      for(j=0;j<allArr[i].length-1;j++)
        cs.addItemSet(allLits[allArr[i][j]]);
      cs.setSupportCount(allArr[i][allArr[i].length-1]);
      cisl.add(cs);
    }
  }
  rules = cisl;
  cleanup();
  return rules;
}

  public Vector getSequentialRules() {
    return rules;
  }

  private void sweep(Vector all, int[][] arr)
  {
    int i,j,k,c;
    for(i=0;i<arr.length;i++)
    {
      for(j=0;j<arr.length;j++)
      if(i!=j)
      {
        for(k=c=0;k<arr[i].length-1;k++)
        {
          int[] ai = allLitsArr[arr[i][k]];
          int[] bi = allLitsArr[arr[j][k]];
          if(!subset(ai,bi)) break;
        }
        if(k==arr[i].length-1) break;
      }
      if(j==arr.length) all.add(arr[i]);
    }
  }

  /**
   * return true if bi contains ai
   * @param ai first itemset
   * @param bi second itemset
   * @return true if bi contains ai
   */
  private boolean subset(int[] ai, int[] bi)
  {
    int i,k;
    if(ai.length>bi.length) return false;
    for(i=k=0;i<ai.length;i++)
    {
      while(k<bi.length&&bi[k]<ai[i]) k++;
      if(k==bi.length) return false;
      if(bi[k]!=ai[i]) return false;
    }
    return true;
  }

  /**
   * initialize vectors used in algorithms for containing data
   * associated with result
   */
  private void initVectors()
  {
    allLargeItemsets = new Vector(20);
    allLargeItemsetsSupp = new Vector(20);
    allTIDs = new Vector(20);
    allLargeSequences = new Vector(20);
    allBk = new Vector(20);
    rules.clear();
  }

  /**
   * clear algorithm internal data
   */
  private void cleanup()
  {
    allLits = null;
    allLargeSequences = null;
    allBk = null;
    B1 = null;
    allLitsArr = null;
    allLitsSupp = null;
  }

  /**
   * clear apriori phase internal data
   */
  private void clearApriori()
  {
    L = null;
    TIDTable = null;
    CustomerID = null;
    allLargeItemsets = null;
    allLargeItemsetsSupp = null;
    allTIDs = null;
  }

  /**
   * prints Lk, used debugging
   */
  private void printLk(int[][] Lk)
  {
    for(int i=0;i<Lk.length;i++)
      if(Lk[i]!=null)
      {
        System.out.print(i+":");
        for(int j=0;j<Lk[i].length;j++) System.out.print(" "+Lk[i][j]);
        System.out.println();
      }
  }

  /**
   * prints tidTable, used for debugging
   */
  private void printTIDs(int[][][] lits)
  {
    for(int i=0;i<lits.length;i++)
    {
      System.out.println("Customer #"+i+":");
      if(lits[i]!=null)
        for(int j=0;j<lits[i].length;j++)
        {
          System.out.print("\t");
          if(lits[i][j]!=null)
            for(int k=0;k<lits[i][j].length;k++) System.out.print(lits[i][j][k]+" ");
          else System.out.print("empty");
          System.out.println();
        }
      else System.out.println("\tempty");
    }
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -