⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ub.java

📁 改程序实现UB算法
💻 JAVA
字号:
package ymh;

import java.io.*;
import java.util.*;

class UBSession {
    HashSet pagesSet = new HashSet(); // 一个会话由多个页面对象组成
}

public class UB {

    static ArrayList sessionList = new ArrayList(); // 日志中的会话集合

    public static void main(String[] args) throws IOException {
        BufferedReader in = new BufferedReader(new FileReader(
                "NASASession3.log"));
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(
                "NASAResult1.log")));
        PrintWriter outAnalysis = new PrintWriter(new BufferedWriter(
                new FileWriter("NASAAnalysis.log")));
        UBSession sess = new UBSession(); // 识别出的当前会话
        String s;
        while ((s = in.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(s);
            int i = 0;
            String userName = null;
            String accessTime = null;
            String url = null;
            while (st.hasMoreTokens()) {
                switch (i) {
                case 0:
                    userName = st.nextToken();
                    break;
                case 1:
                    accessTime = st.nextToken();
                    break;
                case 2:
                    url = st.nextToken();
                    break;
                }
                i++;
            }

            if (i != 3)
                continue;

            // 小于1800,表示属于同一个会话
            if (Integer.parseInt(accessTime) < 1800) {
                // System.out.println(url);
                sess.pagesSet.add(url);
            } else { // 等于1800,当前会话结束,开始一个新会话
                sess.pagesSet.add(url);
                Iterator it = sess.pagesSet.iterator();
                while (it.hasNext()) {
                    outAnalysis.println(it.next());
                }
                outAnalysis.println("\n");
                sessionList.add(sess);
                sess = new UBSession(); // 清除当前会话内容,为下一次的会话识别作准备
            }
        }

        // System.out.println("-------------------------------------");
        int n = sessionList.size();
        System.out.println("sessionList.size():" + n);

        // 计算相似性矩阵
        float simMatrix[][] = new float[n][n];
        float simCoef = 0.0f;
        for (int i = 0; i < n; i++) {
            for (int j = 0; j < n; j++) {
                simCoef = computeSessionSim((UBSession) sessionList.get(i),
                        (UBSession) sessionList.get(j), outAnalysis);
                simMatrix[i][j] = simCoef;
                if (i == j && simCoef != 1.0)
                    System.out.println("i = " + i + "\tj = " + j + "\t"
                            + simMatrix[i][j]);
                out.print(simCoef + "\t");
            }
            out.print("\n");
        }

        // 聚类分析
        int isClass[] = new int[n];
        int classNum = 0;
        float t = 1.0f; // t为阀值
        int totalSession = 0;
        for (int i = 0; i < n; i++)
            isClass[i] = 0;
        for (int i = 0; i < n; i++) {
            if (isClass[i] == 0) {
                int sessNum = 1;
                float totalSim = 0.0f;
                classNum++;
                out.println("\n第" + classNum + "个类:\n");
                for (int j = 0; j < n; j++) {
                    if (simMatrix[i][j] > t) {
                        sessNum++;
                        totalSim += simMatrix[i][j];
                        isClass[j] = 1;
                    }
                }
                totalSession += sessNum;
                out.println("sessNum = " + sessNum);
                out.println("meanSim = " + (float) totalSim / sessNum);
            }
        }
        out.println("\ntotalSession = " + totalSession);
        /*
         * 测试会话识别正确与否
         * System.out.println("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$");
         * for (int i = 0; i < sessionList.size(); i++) { Session ses =
         * (Session) sessionList.get(i); for (int j = 0; j <
         * ses.interestList.size(); j++) { InterestPoint ip = (InterestPoint)
         * ses.interestList.get(j); System.out.print(ip.catalog + "\t");
         * System.out.print(ip.pagesCount + "\t"); System.out.print(ip.pages);
         * System.out.println("InterestPoint***************************"); }
         * System.out.println("Session***************************"); }
         */
        in.close();
        out.close();
        outAnalysis.close();
        System.out.println("The program is over.");
    }

    /* 计算会话之间的相似性 */
    static float computeSessionSim(UBSession s1, UBSession s2,
            PrintWriter outAnalysis) {
        float SessionSim = 0.0f;
        int size1 = s1.pagesSet.size();
        int size2 = s2.pagesSet.size();
        int commonPages = 0;
        //  System.out.println("size1:\t" + size1);
        //  System.out.println("size2:\t" + size2);

        // System.out.println("size1:\t" + s1.interestList.size());
        // System.out.println("size2:\t" + s2.interestList.size());

        Iterator it = s1.pagesSet.iterator();
        while (it.hasNext()) {
            if (s2.pagesSet.contains(it.next()))
                commonPages++;
        }
        SessionSim = (float) commonPages / (float) Math.sqrt(size1 * size2);
        return SessionSim;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -