📄 twodimensiongibbslda.java
字号:
arrTVCountO[termIndex][view]--;
arrDVCount[docIndex][view]--;
// sample a view from the distribution for the current instance
view=sampleView(random,arrTVCountO[termIndex],arrDVCount[docIndex],arrViewCount);
//update counts
arrZO[i] = view;
arrTVCountO[termIndex][view]++;
arrDVCount[docIndex][view]++;
arrViewCount[view]++;
}
//sampling words in the secondary dimension
for (k = 0; k <tokenNumP; k++) {
i = arrOrderP[k]; // current word token to assess
termIndex = arrTermP[i];
docIndex =arrDocP[i];
topic = arrY[i];
view = arrZP[i];
status=arrX[i];
//sample a view
// substract the current instance from counts
arrViewCount[view]--;
arrTVCountP[termIndex][view]--;
arrDVCount[docIndex][view]--;
if(status==0){
arrTTComCount[termIndex][view][topic]--;
}
else{
arrTTViewCount[termIndex][view][topic]--;
arrViewTopicCount[view][topic]--;
}
arrDTCount[docIndex][view][topic]--;
// sample a view from the distribution for the current instance
view=sampleView(random,arrTVCountP[termIndex],arrDVCount[docIndex],arrViewCount);
//update counts
arrZP[i] = view;
arrTVCountP[termIndex][view]++;
arrDVCount[docIndex][view]++;
arrViewCount[view]++;
if(status==0)
arrTTComCount[termIndex][view][topic]++;
else{
arrTTViewCount[termIndex][view][topic]++;
arrViewTopicCount[view][topic]++;
}
arrDTCount[docIndex][view][topic]++;
//sample a theme(topic)
if(status==0){
//the theme is view-free
// substract the current instance from counts
arrTopicCount[topic]--;
arrTTCount[termIndex][topic]--;
arrTTComCount[termIndex][view][topic]--;
arrDTCount[docIndex][view][topic]--;
// sample a topic from the distribution for the current instance
topic = sampleCommonTopic(random, arrTTCount[termIndex], arrDTCount[docIndex][view], arrTopicCount);
//update counts
arrY[i] = topic;
arrTTComCount[termIndex][view][topic]++;
arrTTCount[termIndex][topic]++;
arrDTCount[docIndex][view][topic]++;
arrTopicCount[topic]++;
}
else{
//the theme is view-specific
// substract the current instance from counts
arrViewTopicCount[view][topic]--;
arrTTViewCount[termIndex][view][topic]--;
arrDTCount[docIndex][view][topic]--;
// sample a topic from the distribution for the current instance
topic = sampleViewTopic(random, arrTTViewCount[termIndex][view], arrDTCount[docIndex][view], arrViewTopicCount[view]);
//update counts
arrY[i] = topic;
arrTTViewCount[termIndex][view][topic]++;
arrDTCount[docIndex][view][topic]++;
arrViewTopicCount[view][topic]++;
}
//sample a status
// substract the current instance from counts
if(status==0){
arrTopicCount[topic]--;
arrTTCount[termIndex][topic]--;
arrTTComCount[termIndex][view][topic]--;
arrDTCount[docIndex][view][topic]--;
}
else{
arrViewTopicCount[view][topic]--;
arrTTViewCount[termIndex][view][topic]--;
arrDTCount[docIndex][view][topic]--;
}
// sample a status from the distribution for the current instance
status = sampleStatus(random, arrTTViewCount[termIndex][view][topic], arrViewTopicCount[view][topic],
arrTTComCount[termIndex][view][topic], arrTTCount[termIndex][topic], arrTopicCount[topic]);
//update counts
arrX[i] = status;
if(status==0){
arrTTComCount[termIndex][view][topic]++;
arrTTCount[termIndex][topic]++;
arrDTCount[docIndex][view][topic]++;
arrTopicCount[topic]++;
}
else{
arrTTViewCount[termIndex][view][topic]++;
arrDTCount[docIndex][view][topic]++;
arrViewTopicCount[view][topic]++;
}
}
}
}
private int sampleView(Random random, int[] arrTVCount, int[] arrDVCount, int[] arrViewCount){
double[] arrProb;
double totalProb, r, max;
int j,view;
totalProb = 0;
arrProb=new double[viewNum];
for (j = 0; j < viewNum; j++) {
arrProb[j] = (arrTVCount[j] + beta)/(arrViewCount[j] + wBeta) *(arrDVCount[j]+ alpha);
totalProb += arrProb[j];
}
r = totalProb * random.nextDouble();
max = arrProb[0];
view = 0;
while (r > max) {
view++;
max += arrProb[view];
}
return view;
}
private int sampleCommonTopic(Random random, int[] arrTTCount, int[] arrDTCount, int[] arrTopicCount){
double[] arrProb;
double totalProb, r, max;
int j,topic;
totalProb = 0;
arrProb=new double[themeNum];
for (j = 0; j < themeNum; j++) {
arrProb[j] = (arrTTCount[j] + delta)/(arrTopicCount[j] + wDelta) *(arrDTCount[j]+ rho);
totalProb += arrProb[j];
}
r = totalProb * random.nextDouble();
max = arrProb[0];
topic = 0;
while (r > max) {
topic++;
max += arrProb[topic];
}
return topic;
}
private int sampleViewTopic(Random random, int[] arrTTViewCount, int[] arrDTCount, int[] arrViewTopicCount){
double[] arrProb;
double totalProb, r, max;
int j,topic;
totalProb = 0;
arrProb=new double[themeNum];
for (j = 0; j < themeNum; j++) {
arrProb[j] = (arrTTViewCount[j] + epsilon)/(arrViewTopicCount[j] + wEpsilon) *(arrDTCount[j]+ rho);
totalProb += arrProb[j];
}
r = totalProb * random.nextDouble();
max = arrProb[0];
topic = 0;
while (r > max) {
topic++;
max += arrProb[topic];
}
return topic;
}
private int sampleStatus(Random random, int ttViewCount, int sumTTViewCount, int ttCommonCount, int ttCount, int sumTTCount){
double x0, x1, prob;
x0=(gamma0+ttCommonCount)* (ttCount + delta)/(sumTTCount + wDelta);
x1=(gamma1+ttViewCount)*(ttViewCount + epsilon)/(sumTTViewCount + wEpsilon);
x0=x0/(x0+x1);
prob=random.nextDouble();
if(prob<=x0)
return 0;
else
return 1;
}
private void readSequence(IndexReader indexReader, int[] arrTerm, int[] arrDoc){
int[] arrIndex, arrFreq;
int docNum, i, j, k, count;
docNum=indexReader.getCollection().getDocNum();
count=0;
for(i=0;i<docNum;i++){
arrIndex=indexReader.getTermIndexList(i);
arrFreq=indexReader.getTermFrequencyList(i);
if(arrIndex==null || arrIndex.length==0) continue;
for(j=0;j<arrIndex.length;j++){
for(k=0;k<arrFreq[j];k++){
arrTerm[count + k] = arrIndex[j];
arrDoc[count + k] = i;
}
count+=arrFreq[j];
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -