⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aprioriall.java

📁 为了下东西 随便发了个 datamining 的源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    // init all vectors
    initVectors();
    if (customerTransSet == null) {
      customerTransSet = convertMiningminingInputStream();
    };

    fireMiningEvent(new CreationModelBeginMessage(getAlgorithmLevel()));
    
    // apply modified apriori tid for finding of litemsets
    apriori(customerTransSet);
    if(debug>1)
    {
      System.out.println("Litemsets:");
      for(i=n=0;i<allLargeItemsets.size();i++)
      {
        int[][] lk = (int[][])allLargeItemsets.get(i);
        for(j=0;j<lk.length;j++,n++)
        {
          System.out.print(n+":");
          for(k=0;k<lk[j].length;k++)
            System.out.print(" "+lk[j][k]);
          System.out.println();
        }
      }
    }
    // transformation using information found at previous phase
    // allTIDs and allLargeItemsets was created in the apriori method

    // total number of transactions of all customers
    int nTotalTrans = ((int[][])allTIDs.get(0)).length;
    int[][] lits;
    // ts will contain all transformed transactions
    int[][] ts = new int[nTotalTrans][];
    for(i=0;i<nTotalTrans;i++)
    {
      // for each transaction #i
      // counting number of all litemsets it contains
      for(l=n=0;l<allTIDs.size();l++)
      {
        lits = (int[][])allTIDs.get(l);
        if(lits[i]!=null)
        {
          for(m=0;m<lits[i].length;m++)
            if(lits[i][m]!=-1) n++; // increment counter
        } else break;
      }
      if(n==0) continue;  // take next transaction if no litemsets
      ts[i] = new int[n];
      // copying litemsets indices adding with previous number of litemsets
      // lits[i][j] on l-level is an index in L(l-1) litemsets
      // q - correction of index and is a sum of |L(i)| for each i in [1:l-2]
      for(l=q=n=0;l<allTIDs.size();l++) // for each level (length of litemsets)
      {
        lits = (int[][])allTIDs.get(l); // take litemsets indices on l-level
        if(lits[i]!=null)
        {
          for(m=0;m<lits[i].length;m++)
            if(lits[i][m]!=-1) ts[i][n++] = lits[i][m]+q;
        } else break;
        q += ((int[][])allLargeItemsets.get(l)).length;
      }
    }

    if(debug>1)
    {
      System.out.println("\nTS:");
      for(i=0;i<ts.length;i++)
      {
        System.out.print(i+":");
        if(ts[i]!=null)
        {
          for(j=0;j<ts[i].length;j++) System.out.print(" "+ts[i][j]);
          System.out.println();
        } else System.out.println(" empty");
      }
    }
    // number of customers in original transaction set
    int nCustom = customerTransSet.getSize();
    // stores number of non-empty transactions for each customer
    int[] nCustomTrans = new int[nCustom];
    // k - counter of customers after transformation
    for(i=k=0,w=-1;i<nTotalTrans;i++)
      if(ts[i]!=null)
      {
        nCustomTrans[CustomerID[i]]++;
        if(CustomerID[i]!=w)
        {
          w = CustomerID[i];
          k++;
        }
      }
    if(k==0) return;
    if(debug>0) System.out.println(k+" customers retained");
    // transformed customer sequences
    int[][][] tts = new int[k][][]; // k customers, k <= nCustom
    int maxCustSeqLen = 0;          // maximum customer's sequences length
    // w - last customer ID
    for(i=n=0,w=k=-1;i<nTotalTrans;i++)
    {
      if(ts[i]==null) continue;
      if(CustomerID[i]!=w)    // new customer
      {
        int custLen = nCustomTrans[CustomerID[i]];
        k++; tts[k] = new int[custLen][]; // add new customer
        if(maxCustSeqLen<custLen) maxCustSeqLen = custLen;
        n = 0;
        w = CustomerID[i];
      }
      tts[k][n++] = ts[i];
    }
    if(debug>1)
    {
      System.out.println("Transformed transaction set:");
      for(i=0;i<tts.length;i++)
      {
        System.out.println("Customer #"+i+":");
        if(tts[i]!=null)
          for(j=0;j<tts[i].length;j++)
          {
            System.out.print("Transaction #"+j+":");
            if(tts[i][j]!=null)
              for(k=0;k<tts[i][j].length;k++) System.out.print(" "+tts[i][j][k]);
            else System.out.print(" empty");
            System.out.println();
          }
        else System.out.println(" empty");
      }
    }
    allTIDs = null;

    // creating L2
    int nL1;  // number of all litemsets
    for(l=nL1=0;l<allLargeItemsets.size();l++) nL1 += ((int[][])allLargeItemsets.get(l)).length;
    allLits = new ItemSet[nL1];   // litemsets as itemsets for results list
    allLitsArr = new int[nL1][];  // litemsets as arrays for maximal phase
    allLitsSupp = new int[nL1];   // litemsets supports

    for(l=k=0;l<allLargeItemsets.size();l++)
    {
      int[][] L1 = (int[][])allLargeItemsets.get(l);  // litemsets of length l
      int[] S1 = (int[])allLargeItemsetsSupp.get(l);  // their supports
      for(i=0;i<L1.length;i++)
      {
        allLits[k] = new ItemSet();
        allLitsArr[k] = L1[i];
        allLitsSupp[k] = S1[i];
        for(j=0;j<L1[i].length;j++) allLits[k].addItem(L1[i][j]);
        k++;
      }
    }

    clearApriori();

    int nC12 = nL1*nL1; // number of candidates of length 2
    if(debug>0)
    {
      System.out.println("nL1 = "+nL1);
      System.out.println("nC2 = "+nC12+" ("+(long)nC12*8l+" bytes)");
    }
    int suppCk[] = new int[nC12]; // support counters for candidates
    int cidCk[] = new int[nC12];  // customer id's for candidates
    for(i=0;i<nC12;i++) cidCk[i] = -1;
    int a,b;
    int[][][] tidTable = new int[tts.length][][];
    IntVec[] tvec = new IntVec[maxCustSeqLen-1];
    for(i=0;i<maxCustSeqLen-1;i++) tvec[i] = new IntVec();
    // counting support of candidates of length 2
    for(k=0;k<tts.length;k++)   // for each customer k
    {
      for(i=0;i<tts[k].length-1;i++)  // for its each transformed transaction
      {
        tvec[i].clear();
        for(j=0;j<tts[k][i].length;j++) // for each element of that transaction
        {
          a = tts[k][i][j];
          for(m=i+1;m<tts[k].length;m++)  // for each transaction after i-th
            for(n=0;n<tts[k][m].length;n++) // for its each element
            {
              b = tts[k][m][n];
              l = a*nL1+b;    // candidate's number
              // if it is not incremented by i-th transaction of k-th customer
              if(cidCk[l]!=k+i+1)
              {
                cidCk[l] = k+i+1;
                // add to list of candidates of i-th transaction of k-th cust.
                tvec[i].add(l);
              }
            }
        }
      }
      // convert vectors to arrays
      tidTable[k] = new int[tts[k].length-1][];
      for(i=0;i<tts[k].length-1;i++)
      {
        l = tvec[i].size();
        if(l==0) continue;
        tidTable[k][i] = new int[tvec[i].size()];
        tvec[i].copy(tidTable[k][i]);
//        java.util.Arrays.sort(tidTable[k][i]);
        for(j=0;j<tidTable[k][i].length;j++)
        {
          l = tidTable[k][i][j];
          // if candidate's support counter is not incremented by k-th customer
          if(cidCk[l]!=k)
          {
            suppCk[l]++;  // increment it
            cidCk[l] = k;
          }
        }
      }
    }
    // k - counter of supported candidates
    for(i=k=0;i<nC12;i++) if(suppCk[i]>=m_minSuppInt) k++;
    if(k==0) {
      saveResults();  // save L1 and exit
      return;
    }
    int Lk[][] = new int[k][];  // L2 sequences
    int Sk[][] = new int[nL1][2];
    // noprune
    if(doPruning)
      B1 = new boolean[nL1];
    for(i=k=l=m=0;i<nL1;i++)  // for each 1-sequence
    {
      Sk[i][0] = k; // number of first 2-sequence which have
                    // i-th 1-sequence as first parent
      for(j=0;j<nL1;j++,l++)
        if(suppCk[l]>=m_minSuppInt)
        {
          Lk[k] = new int[3]; // new large 2-sequence
          Lk[k][0] = i; Lk[k][1] = j; Lk[k][2] = suppCk[l];
          suppCk[l] = -m-1; // (-m-1) value used for correcting tidTable
          // noprune
          if(doPruning)
          {
            B1[i] = true; // mark i-th and j-th 1-sequences
            B1[j] = true; // as not maximal
          }
          k++;  // counter of large 2-sequences
        }
        else m++; // counter of not supported candidates
      if(k!=Sk[i][0]) Sk[i][1] = k-1; // number of last 2-sequence which have
                                      // i-th 1-sequence as first parent
      else Sk[i][0] = -1; // i-th 1-sequences doesn't have any children
                          // with it as first parent
    }
    tidTable = checkTIDs(tidTable,suppCk);  // correct tidTable

    if(debug>1)
    {
      System.out.println("L2:");
      for(i=0;i<Lk.length;i++) System.out.println(i+": "+Lk[i][0]+","+Lk[i][1]);
      System.out.println("TID Table #2:");
      printTIDs(tidTable);
    }

  int ckCounter;  // counter of generated candidates
  int newTidTable[][][];
  int newLk[][];
  int nStep = 2;
  // for each k-sequence boundsLk[i][0] - number of first k-sequence it can be
  // merged with, boundsLk[i][1] - number of last k-sequence it can be merged
  // with, boundsLk[i][2] - number of first candidate it generated
  // boundsLk[i][0] = -1 if k-sequence doesn't generate any candidates
  int[][] boundsLk;
  boolean [] Bk = null;
  int hi,lo,cand,s;
  while(true)
  {
    nStep++;
    System.out.println("\t"+Lk.length+" large sequences #"+(nStep-1));
    if(debug>0) System.out.println("Level = "+nStep+" Lk = "+Lk.length);
    newTidTable = new int[tidTable.length][][];
    boundsLk = new int[Lk.length][3];
    ckCounter = 0;
    if(debug>1) System.out.print("Making bounds...");
    for(i=0;i<Lk.length;i++)  // for each large sequence found at previous step
    {
      b = Lk[i][1]; // second parent
      if(Sk[b][0]!=-1)  // if it has generated any large k-sequence
      {
        boundsLk[i][0] = Sk[b][0];
        boundsLk[i][1] = Sk[b][1];
        boundsLk[i][2] = ckCounter;
        ckCounter += Sk[b][1]-Sk[b][0]+1;
      } else boundsLk[i][0] = -1;
    }
    if(debug>1)
    {
      System.out.println("\tok:");
      printLk(boundsLk);
      System.out.println("\nckCounter = "+ckCounter);
    }
    if(ckCounter==0) break; // no candidates
    System.out.print(ckCounter+" candidates");
    suppCk = new int[ckCounter];  // support counters
    cidCk = new int[ckCounter];   // last customer id's
    for(i=0;i<ckCounter;i++) cidCk[i] = -1;
    int nPoints = tidTable.length / 20; // for debug
    if(nPoints==0) nPoints = 1;  // for debug
    for(i=0;i<tidTable.length;i++)  // take i-th customer
    {
      if(i%nPoints==0) System.out.print(".");  // for debug
      if(tidTable[i]==null||tidTable[i].length<2) continue; // can't generate any pairs
      for(j=n=0;j<tidTable[i].length-1;j++,n=0) // for each row of tidTable except for last
      {
        tvec[j].clear();  // clear vector of candidates numbers
        if(tidTable[i][j]==null) continue; // empty row => take next
        s = i+j+1;  // used for excluding repeating candidates numbers in new rows
        for(k=0;k<tidTable[i][j].length;k++)  // for each element of j-th row
        {
          a = tidTable[i][j][k];  // element
          lo = boundsLk[a][0];  // first parent it can be merged with
          if(lo!=-1)  // if no take next
          {
            hi = boundsLk[a][1];  // last parent
            cand = boundsLk[a][2] - lo; // first candidate number
            for(m=j+1;m<tidTable[i].length;m++) // for each next row of tidTable
            {
              if(tidTable[i][m]==null) continue;  // empty => next row
              // find position of first parent
              l = findStart(tidTable[i][m],lo,hi);
              if(l<0) continue; // not found => next row
              while(l<tidTable[i][m].length)
              {
                b = tidTable[i][m][l++];
                if(b>hi) break; // more number than last parent => next row
                b += cand;  // candidate number
                if(cidCk[b]!=s) // if not processed by j-th row of i-th customer
                {
                  tvec[j].add(b); // add to vector of candidates numbers of j-th row
                  cidCk[b] = s;
                }
              }
            }
          }
        }
      }
      // make new tidTable for i-th customer
      newTidTable[i] = new int[tidTable[i].length-1][];
      for(j=0;j<tidTable[i].length-1;j++)
      {
        if(tvec[j].size()==0) continue; // empty row
        newTidTable[i][j] = new int[tvec[j].size()];
        tvec[j].copy(newTidTable[i][j]);
//        java.util.Arrays.sort(newTidTable[i][j]);
        for(k=0;k<newTidTable[i][j].length;k++)
        {
          b = newTidTable[i][j][k]; // candidate number
          if(cidCk[b]!=i) // if candidate support counter isn't incremented
          {               // by i-th customer
            cidCk[b] = i;
            suppCk[b]++;  // increment support counter
          }
        }
      }
    }
    if(debug>1)
    {
      System.out.println("newTidTable:");
      printTIDs(newTidTable);
    }
    // number of large (k+1)-sequences
    for(i=k=0;i<ckCounter;i++) if(suppCk[i]>=m_minSuppInt) k++;
    if(debug>0)
    {
      if(k==0) System.out.println("no supported candidates");
    }
    if(k==0) break; // no large sequences
    newLk = new int[k][]; // new large sequences as an arrays
    Sk = new int[Lk.length][2];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -