getfiletimes.java

来自「用java编写的tf*idf 结果输出txt文本」· Java 代码 · 共 181 行

JAVA
181
字号
import java.io.*;
import java.math.*;
import java.util.HashMap;
import java.util.Scanner;
import java.util.Iterator;

public class GetFileTimes {

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception
    {
        // TODO Auto-generated method stub
        boolean flg = true;
        String title = "plase input files path";
        String path = "";
        while(flg)
        {
            
            System.out.println(title + "!");
            Scanner sc=new  Scanner(System.in);
            path = sc.nextLine();
     
            System.out.println("is right input path: " + path +"  Yes or No?");
        
            Scanner sc1=new  Scanner(System.in);
            String ret = sc1.nextLine();

            if("Y".equals(ret))
            {
                flg = false;
            }else
            {
                title = "plase input files path anage!!";
            }
            
        }

        HashMap<String,Integer> hash = getAllWord(path);
        
            File d = new File(path);
            File lists[] = d.listFiles();
            File delete = new File(path + "\\wrter.txt");
            delete.delete();
            BufferedWriter writer = new BufferedWriter(new FileWriter(path + "\\wrter.txt"));
            for(int i =0; i < lists.length; i++ )
            {
                writer.write(i + 1 + " ");
                writer.flush();
                             
                BufferedReader readerKhi = new BufferedReader(new FileReader(lists[i]));
                String khi = readerKhi.readLine();
                while(khi != null)
                {
                    if(khi.length()!= 0)
                    {
                        String strs [] = khi.split(" ");
                        for(int k = 0; k < strs.length; k++)
                        {
                           double tf = Math.log(gettimes(lists[i].getAbsolutePath(),strs[k]));
                           double k1 = 1.0d + tf;
                           double t2 = lists.length;
                           double t3 = getfiles(path,strs[k]);
                           double k2 = Math.log(t2 / t3);
                           double k3 = k1*k2;
                           
                           
                           writer.write(String.valueOf(hash.get(strs[k])) +":" + String.valueOf(k3));
                           writer.write(" ");
                           writer.flush();
                        }
                    }
                    
                    khi = readerKhi.readLine();
                }
                writer.newLine();
                writer.flush();
                
            }
            writer.close();
    }
    	
    
    public static int getfiles(String filesPath, String str) throws Exception
    {
        	int ret = 0;    

            File d = new File(filesPath);
            File lists[] = d.listFiles();
            for(int i =0; i < lists.length; i++ )
            {
                boolean find = false;
                BufferedReader readerKhi = new BufferedReader(new FileReader(lists[i]));
                String khi = readerKhi.readLine();
                while(khi!= null)
                {
                    String strs [] = khi.split(" ");
                    for(int k = 0; k < strs.length; k++)
                    {
                        if(str.equals(strs[k]))
                        {
                            ret = ret + 1;
                            find = true;
                        }
                        if(find)
                        {
                            k = strs.length;
                        }
                    }
                    khi = readerKhi.readLine();
                    if(find)
                    {
                        khi = null;
                    }
                }
                readerKhi.close();
            }
            System.out.println(str + ret);
            return ret;
    }
    
    
    public static int gettimes(String fileName, String str)
    {
        int ret = 0;
        try{
        
        BufferedReader readerKhi = new BufferedReader(new FileReader(fileName));
        String khi = readerKhi.readLine();
        
        while(khi!=null)
        {
            String strs [] = khi.split(" ");
            for(int i = 0; i < strs.length; i++)
            {
                if(str.equals(strs[i]))
                {
                    ret = ret + 1; 
                }
            }
            khi = readerKhi.readLine();
        }
        readerKhi.close();
        System.out.println("tf" + str + ret);
        
        return ret;
        }catch(Exception e)
        {
            System.out.println(e.toString());
        }
        return ret;
    }
    
    public static HashMap<String,Integer> getAllWord(String path){
    	HashMap<String,Integer> hash = new HashMap<String,Integer>();
    	
    	File d = new File(path);
        File lists[] = d.listFiles();
        int index =1;
        for(int i=0; i<lists.length; i++){
        	try{
        		String s = lists[i].getAbsolutePath();
        		BufferedReader br = new BufferedReader(new FileReader(s));
        		String line;
        		while((line=br.readLine())!=null){
        			String[] arr = line.split(" ");
        			for(int j=0; j<arr.length; j++){
        				if(!hash.containsKey(arr[j])){
        					hash.put(arr[j], index++);
        				}
        			}
        		}
        		br.close();
        	}catch(Exception e){
        		e.printStackTrace();
        	}
        }
        return hash;
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?