⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 knn.java

📁 knn算法,java 源码实现,简单有效的算法
💻 JAVA
字号:
package edu.ustc.cs.classfier;

import edu.ustc.cs.process.Corpus;
import edu.ustc.cs.structer.WordVector;
import edu.ustc.cs.structer.CategoryResult;

import java.util.*;

/**
 * Created by IntelliJ IDEA.
 * User: dolphin
 * Date: 2008-11-30
 * Time: 11:10:56
 * To change this template use File | Settings | File Templates.
 */
public class KNN {

    private int k = 20; // 预设值
    private int topN=1;//将文本分类到前topN最大相似度的类别中
    private double[] ClassSim = null;
    private  Map<Integer, String> indexmap = null;//类别及类别标号
    //======================================
    
    public KNN(){ 
    	 indexmap = new HashMap<Integer,String>();
         Iterator it=Corpus.classLabel.iterator();
         int classID=0;
         while(it.hasNext()){
         	String classValue=(String)it.next();
         	indexmap.put(classID, classValue);
         	++classID;
         }
    }
    //=======================================
    
    public double QuarForVector(WordVector v){// 向量长度
    	Map map=v.getWordMap();
    	Iterator it=map.values().iterator();
    	double quare=0;
    	while(it.hasNext()){
    		double element=(Double)it.next();
    		quare+=element*element;
    	}
    	return Math.sqrt(quare);
    }
 	//==========================================
    
    public void LazyLearning(WordVector v, WordVector[] vectors, int numClasses)
    {
        if (v == null || vectors == null)
            return ;
        ClassSim = new double[numClasses];
        for (int i = 0; i < numClasses; i++)
        {
            ClassSim[i] = 0;
        }

        k = (k < vectors.length)? k : vectors.length;
        double[] Sim = new double[vectors.length];//存储测试文档与每个训练文档的相似度
        for (int i = 0; i < Sim.length; i++)
        {
            Sim[i] = 0;
            Map map1 = v.getWordMap();
            Map map2 = vectors[i].getWordMap();
            
            for (Iterator it = map1.keySet().iterator(); it.hasNext();)
            {
                String word1 = (String)it.next();
                if (map2.containsKey(word1))
                {
                    double value1 = Double.valueOf(map1.get(word1).toString());
                    double value2 = Double.valueOf(map2.get(word1).toString());
                    Sim[i] += (value1 * value2);
                }
            }
            Sim[i]=Sim[i]/(QuarForVector(v)*QuarForVector(vectors[i]));//相似度为cosin
        }

        for (int i = 0; i < k; i++)   //相似度向量排序,选取离当前实例最近的K个训练样本
        {
            for (int j = i + 1; j < Sim.length; j++)
            {
                if (Sim[j] > Sim[i])
                {
                    double dtemp = Sim[i];   //点积向量排序
                    Sim[i] = Sim[j];
                    Sim[j] = dtemp;

                    WordVector wv = vectors[i];   //训练文档向量排序
                    vectors[i] = vectors[j];
                    vectors[j] = wv;
                }
            }
        }     
        for (int i = 0; i < k; i++)
        {
            WordVector wv = vectors[i];
            int numClass = wv.getDocumentInfo().getClassValue();
            ClassSim[numClass] += Sim[i];  
        }
      
        //  输出前topN个类型 ,首先进行排序
        int[] index = new int[ClassSim.length];
        for (int i = 0; i < ClassSim.length; i++)
            index[i] = i;
        for (int i = 0; i < topN; i++)
        {
            for (int j = i + 1; j < ClassSim.length; j++)
            {
                if (ClassSim[j] > ClassSim[i])
                {
                    double dtemp = ClassSim[i];   //对ClassSim进行排序,只找出最大的三个
                    ClassSim[i] = ClassSim[j];
                    ClassSim[j] = dtemp;

                    int itemp = index[i];   //排出最大的三个ClassID
                    index[i] = index[j];
                    index[j] = itemp;
                }
            }
        }
        v.getDocumentInfo().setClassVaue(index[0]);
        
        /*
        for (int i = 0; i < k; i++)
        {
            WordVector wv = vectors[i];
            String id;
                id = wv.getDocumentInfo().getSourceName();

            System.out.println(id + ": " + Sim[i]);
        }
         */
        
    }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -