📄 word_freq.cpp
字号:
//包括读入文本的每一行把它们分成独立的单词,去掉标点符
//把大写字母变成小写以及去掉无语义的词比如and a 和the等
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector>
#include <ctype.h>
#include <time.h>
using namespace std;
void exclusion_set(set<string>&exs)//words exclusion
{
static string default_excluded_words[30]={
"the","and","to","i","they","a","is","his","my",
"her","it","you","then","are","been","am","can",
"can't","cannot","could","did","do","its","for","of"
};
exs.insert(default_excluded_words,default_excluded_words+30);
}
void filter_text(string &word ,string &filter)// take off interpunction
{
string::size_type pos = 0;
if((pos=word.find_first_of(filter,pos))!=string::npos)
word.erase(pos,1);
}
void change_caps(string &word,string &cap)//capital letter into small letter
{
string::size_type pos = 0;
if((pos=word.find_first_of(cap,pos))!=string::npos)
word[pos] = tolower(word[pos]);
}
void process(map<string,int> &word_count, set<string> &the_set,ifstream &infile)
{
string word;
string filter;
string cap;
filter.insert(0,"\".,!:;(){}[]/");//interpunction
cap.insert(0,"ABCDEFGHIJKLMNOPQRSTUVWXYZ");//capital letters
while(infile>>word)
{
filter_text(word,filter);
change_caps(word,cap);
if(the_set.count(word))
continue;
word_count[word]++;
}
}
class WordFrequence
{
public:
string word;
double frequence;
};
bool operator < (const WordFrequence &a, const WordFrequence &b)
{
return a.frequence>b.frequence || (a.frequence==b.frequence && a.word<b.word);
}
void sort_frequence(map<string, int> &word_count, set<WordFrequence> &word_frequence)//sort by frequence
{
WordFrequence temp;
for (map<string, int>::iterator gm=word_count.begin(); gm!=word_count.end(); ++gm)
{
temp.word=gm->first;
temp.frequence=gm->second;
word_frequence.insert(temp);
}
}
void display(set<WordFrequence> &word_frequence)
{
for (set<WordFrequence>::iterator iter=word_frequence.begin(); iter!=word_frequence.end(); ++iter)
cout<<"\t\t"<<iter->frequence<<" time(s)"<<":"<<"\t"<<iter->word<<endl;
cout<<endl;
cout<<"There are "<<word_frequence.size()<<" words in this file."<<endl;//the size of word
}
int main(){
string file_name;
cout<<"Please enter the file name :";
cin>>file_name;
ifstream infile(file_name.c_str(),ios::in);
if(!infile)
{
cout<<"Unable to open file "<<file_name<<"bailing out!"<<endl;
exit(- 1);
}
clock_t start,end;//time record
start=clock();
set<string> exclude_set;
map<string,int> word_count;
exclusion_set(exclude_set);
process(word_count,exclude_set,infile);
set<WordFrequence> word_frequence;
sort_frequence(word_count, word_frequence);
display(word_frequence);
end=clock();
cout<<"This program is cost: "<<end-start<<"ms"<<endl; //time costed
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -