⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 twodimensiongibbslda.java

📁 dragontoolkit用于机器学习
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
                arrTVCountO[termIndex][view]--;
                arrDVCount[docIndex][view]--;

                // sample a view from the distribution for the current instance
                view=sampleView(random,arrTVCountO[termIndex],arrDVCount[docIndex],arrViewCount);

                //update counts
                arrZO[i] = view;
                arrTVCountO[termIndex][view]++;
                arrDVCount[docIndex][view]++;
                arrViewCount[view]++;
            }

            //sampling words in the secondary dimension
            for (k = 0; k <tokenNumP; k++) {
                i = arrOrderP[k]; // current word token to assess
                termIndex = arrTermP[i];
                docIndex =arrDocP[i];
                topic = arrY[i];
                view = arrZP[i];
                status=arrX[i];

                //sample a view
                // substract the current instance from counts
                arrViewCount[view]--;
                arrTVCountP[termIndex][view]--;
                arrDVCount[docIndex][view]--;
                if(status==0){
                    arrTTComCount[termIndex][view][topic]--;
                }
                else{
                    arrTTViewCount[termIndex][view][topic]--;
                    arrViewTopicCount[view][topic]--;
                }
                arrDTCount[docIndex][view][topic]--;

                // sample a view from the distribution for the current instance
                view=sampleView(random,arrTVCountP[termIndex],arrDVCount[docIndex],arrViewCount);

                //update counts
                arrZP[i] = view;
                arrTVCountP[termIndex][view]++;
                arrDVCount[docIndex][view]++;
                arrViewCount[view]++;
                if(status==0)
                    arrTTComCount[termIndex][view][topic]++;
                else{
                    arrTTViewCount[termIndex][view][topic]++;
                    arrViewTopicCount[view][topic]++;
                }
                arrDTCount[docIndex][view][topic]++;


                //sample a theme(topic)
                if(status==0){
                    //the theme is view-free
                    // substract the current instance from counts
                    arrTopicCount[topic]--;
                    arrTTCount[termIndex][topic]--;
                    arrTTComCount[termIndex][view][topic]--;
                    arrDTCount[docIndex][view][topic]--;

                    // sample a topic from the distribution for the current instance
                    topic = sampleCommonTopic(random, arrTTCount[termIndex], arrDTCount[docIndex][view], arrTopicCount);

                    //update counts
                    arrY[i] = topic;
                    arrTTComCount[termIndex][view][topic]++;
                    arrTTCount[termIndex][topic]++;
                    arrDTCount[docIndex][view][topic]++;
                    arrTopicCount[topic]++;
                }
                else{
                    //the theme is view-specific
                    // substract the current instance from counts
                    arrViewTopicCount[view][topic]--;
                    arrTTViewCount[termIndex][view][topic]--;
                    arrDTCount[docIndex][view][topic]--;

                    // sample a topic from the distribution for the current instance
                    topic = sampleViewTopic(random, arrTTViewCount[termIndex][view], arrDTCount[docIndex][view], arrViewTopicCount[view]);

                    //update counts
                    arrY[i] = topic;
                    arrTTViewCount[termIndex][view][topic]++;
                    arrDTCount[docIndex][view][topic]++;
                    arrViewTopicCount[view][topic]++;
                }

                //sample a status
                // substract the current instance from counts
                if(status==0){
                    arrTopicCount[topic]--;
                    arrTTCount[termIndex][topic]--;
                    arrTTComCount[termIndex][view][topic]--;
                    arrDTCount[docIndex][view][topic]--;
                }
                else{
                    arrViewTopicCount[view][topic]--;
                    arrTTViewCount[termIndex][view][topic]--;
                    arrDTCount[docIndex][view][topic]--;
                }

                // sample a status from the distribution for the current instance
                status = sampleStatus(random, arrTTViewCount[termIndex][view][topic], arrViewTopicCount[view][topic],
                                      arrTTComCount[termIndex][view][topic], arrTTCount[termIndex][topic], arrTopicCount[topic]);

                //update counts
                arrX[i] = status;
                if(status==0){
                    arrTTComCount[termIndex][view][topic]++;
                    arrTTCount[termIndex][topic]++;
                    arrDTCount[docIndex][view][topic]++;
                    arrTopicCount[topic]++;
                }
                else{
                    arrTTViewCount[termIndex][view][topic]++;
                    arrDTCount[docIndex][view][topic]++;
                    arrViewTopicCount[view][topic]++;
                }
            }
        }
    }

    private int sampleView(Random random, int[] arrTVCount, int[] arrDVCount, int[] arrViewCount){
        double[] arrProb;
        double totalProb, r, max;
        int j,view;

        totalProb = 0;
        arrProb=new double[viewNum];
        for (j = 0; j < viewNum; j++) {
            arrProb[j] = (arrTVCount[j] + beta)/(arrViewCount[j] + wBeta) *(arrDVCount[j]+ alpha);
            totalProb += arrProb[j];
        }
        r = totalProb * random.nextDouble();
        max = arrProb[0];
        view = 0;
        while (r > max) {
            view++;
            max += arrProb[view];
        }
        return view;
    }

    private int sampleCommonTopic(Random random, int[] arrTTCount, int[] arrDTCount, int[] arrTopicCount){
        double[] arrProb;
        double totalProb, r, max;
        int j,topic;

        totalProb = 0;
        arrProb=new double[themeNum];
        for (j = 0; j < themeNum; j++) {
            arrProb[j] = (arrTTCount[j] + delta)/(arrTopicCount[j] + wDelta) *(arrDTCount[j]+ rho);
            totalProb += arrProb[j];
        }
        r = totalProb * random.nextDouble();
        max = arrProb[0];
        topic = 0;
        while (r > max) {
            topic++;
            max += arrProb[topic];
        }
        return topic;
    }

    private int sampleViewTopic(Random random, int[] arrTTViewCount, int[] arrDTCount, int[] arrViewTopicCount){
        double[] arrProb;
        double totalProb, r, max;
        int j,topic;

        totalProb = 0;
        arrProb=new double[themeNum];
        for (j = 0; j < themeNum; j++) {
            arrProb[j] = (arrTTViewCount[j] + epsilon)/(arrViewTopicCount[j] + wEpsilon) *(arrDTCount[j]+ rho);
            totalProb += arrProb[j];
        }
        r = totalProb * random.nextDouble();
        max = arrProb[0];
        topic = 0;
        while (r > max) {
            topic++;
            max += arrProb[topic];
        }
        return topic;
    }

    private int sampleStatus(Random random, int ttViewCount, int sumTTViewCount, int ttCommonCount, int ttCount, int sumTTCount){
        double x0, x1, prob;

        x0=(gamma0+ttCommonCount)* (ttCount + delta)/(sumTTCount + wDelta);
        x1=(gamma1+ttViewCount)*(ttViewCount + epsilon)/(sumTTViewCount + wEpsilon);
        x0=x0/(x0+x1);
        prob=random.nextDouble();
        if(prob<=x0)
            return 0;
        else
            return 1;
    }

    private void readSequence(IndexReader indexReader, int[] arrTerm, int[] arrDoc){
        int[] arrIndex, arrFreq;
        int docNum, i, j, k, count;

        docNum=indexReader.getCollection().getDocNum();
        count=0;
        for(i=0;i<docNum;i++){
            arrIndex=indexReader.getTermIndexList(i);
            arrFreq=indexReader.getTermFrequencyList(i);
            if(arrIndex==null || arrIndex.length==0) continue;
            for(j=0;j<arrIndex.length;j++){
                for(k=0;k<arrFreq[j];k++){
                    arrTerm[count + k] = arrIndex[j];
                    arrDoc[count + k] = i;
                }
                count+=arrFreq[j];
            }
        }
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -