⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word_freq.cpp

📁 此程序完 成任意一个英文文本文件中英文单词的统 计工作。要求:把英文单词出现频率次数 由高到低打印出来;只要英文单词形式不 一样就算两个词
💻 CPP
字号:
//包括读入文本的每一行把它们分成独立的单词,去掉标点符
//把大写字母变成小写以及去掉无语义的词比如and a 和the等
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector> 
#include <ctype.h>
#include <time.h>
   
using namespace std;

void exclusion_set(set<string>&exs)//words exclusion
{
    static string default_excluded_words[30]={
        "the","and","to","i","they","a","is","his","my",
         "her","it","you","then","are","been","am","can",
         "can't","cannot","could","did","do","its","for","of"		
      };
    exs.insert(default_excluded_words,default_excluded_words+30);
}

void filter_text(string &word ,string &filter)// take off interpunction 
{
    string::size_type pos = 0;
    if((pos=word.find_first_of(filter,pos))!=string::npos)
        word.erase(pos,1);  
}

void change_caps(string &word,string &cap)//capital letter into small letter 
{
    string::size_type pos = 0;
    if((pos=word.find_first_of(cap,pos))!=string::npos)
	   word[pos] = tolower(word[pos]);
}
   
void process(map<string,int> &word_count, set<string> &the_set,ifstream &infile)
{
    string word;
    string filter;
    string cap;
    filter.insert(0,"\".,!:;(){}[]/");//interpunction
    cap.insert(0,"ABCDEFGHIJKLMNOPQRSTUVWXYZ");//capital letters
    while(infile>>word)
    {
 	    filter_text(word,filter);
 	    change_caps(word,cap);
        if(the_set.count(word)) 
            continue;
        word_count[word]++;        
    }
}

class WordFrequence
{
public:
	string word;
	double frequence;
};

bool operator < (const WordFrequence &a, const WordFrequence &b)
{
	return a.frequence>b.frequence || (a.frequence==b.frequence && a.word<b.word);
}

void sort_frequence(map<string, int> &word_count, set<WordFrequence> &word_frequence)//sort by frequence
{
	WordFrequence temp;
	for (map<string, int>::iterator gm=word_count.begin(); gm!=word_count.end(); ++gm)
	{
		temp.word=gm->first;
		temp.frequence=gm->second;
		word_frequence.insert(temp);
	}
}

void display(set<WordFrequence> &word_frequence)
{
	for (set<WordFrequence>::iterator iter=word_frequence.begin(); iter!=word_frequence.end(); ++iter)
		cout<<"\t\t"<<iter->frequence<<" time(s)"<<":"<<"\t"<<iter->word<<endl;
	cout<<endl;
	cout<<"There are "<<word_frequence.size()<<" words in this file."<<endl;//the size of word 
}
	
int main(){
    string file_name;
    cout<<"Please enter the file name :";
    cin>>file_name;
    ifstream infile(file_name.c_str(),ios::in);
    if(!infile)
    {
      cout<<"Unable to open file "<<file_name<<"bailing out!"<<endl;
      exit(- 1);
      }
    clock_t start,end;//time record 
    start=clock();
  
    set<string> exclude_set;
    map<string,int> word_count;
    exclusion_set(exclude_set);
    process(word_count,exclude_set,infile);
  
    set<WordFrequence> word_frequence;
    sort_frequence(word_count, word_frequence);
    display(word_frequence);
  
    end=clock();
    cout<<"This program is cost: "<<end-start<<"ms"<<endl; //time costed 
  
    return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -