📄 stawords.cpp
字号:
#pragma warning(disable:4786)
#include"stdafx.h"
#include"EngWordSout.h"
typedef map<string, int>::value_type sival_type;
//vector<string> *retrieve_text(string file_name)
void retrieve_text(string file_name)
//将文件读入存到Vector中
{
ifstream artcile_file( file_name.c_str(), ios::in );
if (!artcile_file) {
cout << "Conn't open " << file_name.c_str() << " !" << endl;
exit (1);
}
// vector<string> *lines_of_text = new vector<string>;
string textline;
while ( getline(artcile_file, textline, '\n'))
{
//cout << " " << textline << '\n';
lines_of_text->push_back(textline);
}
//return lines_of_text;
}
void strip_caps( vector<string> *text_file )
{
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
vector<string>::iterator iter = text_file->begin();
for ( ; iter != text_file->end(); ++iter )
{
string::size_type pos = 0;
while ( (pos = (*iter).find_first_of( caps, pos ))//只需要一个匹配就行了
!= string::npos )
{
(*iter)[ pos ] = tolower( (*iter)[pos] );
}
} //end of for
}
vector<string> *separate_words( const vector<string> *text_file )
{
string filter("abcdefghijklmnopqrstuvwxy");
//包含独立的单词集合
vector<string> *words = new vector<string>;
short line_pos = 0;
for ( ; line_pos<text_file->size(); ++line_pos )
//一行行处理!
{
string textline = (*text_file)[line_pos];
// 用来遍历所有的字母
string::size_type pos = 0;
// 单词的开始位置
string::size_type prev_pos = textline.find_first_of(filter);
// 单词末尾的下一空格位置
string::size_type temp_pos = textline.find_first_of(filter);
// 一个小开关,其值为TURE时,prev_pos指向单词开始的位置
bool onoff = false;
while ( (pos = textline.find_first_of(filter, pos))
!= string::npos )
{
if ( onoff )
{
prev_pos = temp_pos - 1;
// 将onoff值改为false,使单词开始的位置不会改变
onoff = false;
}
++pos;
if ( (pos - temp_pos) != 1 )
{
// 为下一次的赋值做准备
onoff = true;
// 将分离出的单词输入words
words->push_back(
textline.substr( prev_pos, temp_pos - prev_pos ));
}
temp_pos = pos;
} // end of while
// 输入最后一个单词,除非这一段没有找到任何字母
if ( prev_pos != string::npos )
{
words->push_back(
textline.substr( prev_pos, temp_pos - prev_pos ));
}
} // end of for
return words;
}
map< string, int > *appear_total( const vector<string> *words )
{
// 创建单词排除集合
set<string> exclusion_set;
ifstream exclusion_file( "pkg95.txt", ios::in );
if (!exclusion_file) {
cout << "Conn't open pkg95.txt !" << endl;
exit (1);
}
string textline;
while ( getline(exclusion_file, textline, '\n'))
{
//cout << " " << textline << '\n';
exclusion_set.insert(textline);
}
map<string, int> *word_map = new map<string, int>;
// 开始向word_map中记录数据
vector<string>::const_iterator iter = words->begin();
for ( ; iter != words->end(); ++iter )
{
// 如果少于3个字符或在排除集合中存在,则不输入到map中
if ( (*iter).size() < 3||exclusion_set.count( *iter ) )
{
continue;
}
// 如果count()返回0,则单词不存在,加入它
if ( !word_map->count(*iter) )
{
word_map->insert( sival_type( (*iter), 1 ) );
}
else
{
//将单词的出现次数加1
(*word_map)[ (*iter) ] += 1;
}
} //end of for
return word_map;
}
multimap< int, string, greater<int> > * multimap_total( map<string, int> *text_map )
//multimap它与map 类似,所不同的是它允许重复键
{
multimap<int, string, greater<int> > *word_map =
new multimap< int, string, greater<int> >;
map< string, int >::iterator map_siter = text_map->begin();
for ( ; map_siter != text_map->end(); ++map_siter )
{
word_map->insert(make_pair((*map_siter).second, (*map_siter).first));
}
{
string ofile("3_1_2out.txt");
ofstream outfile( ofile.c_str() );
if (!outfile)
{
cerr << "error: unable to open output file: "
<< ofile << endl;
}
multimap< int, string, greater<int> >::iterator map_siter = word_map->begin();
for ( ; map_siter != word_map->end(); ++map_siter )
{
outfile << (*map_siter).second;
for ( int n = 0; n < 15 - (*map_siter).second.size(); ++n )
{
outfile << ' ';
}
outfile << "出现 " << (*map_siter).first << "\t次" << endl;
//To access the value of the key for the element, use Iter -> first
} // end of for
cout << "程序已将处理结果写入3_1_2out.txt,该文件保存在当前目录"
<< endl;
}
return word_map;
}
void map_output( map<string, int> *text_map )
{
string ofile("3_1_1out.txt");
ofstream outfile( ofile.c_str() );
if (!outfile)
{
cerr << "error: unable to open output file: "
<< ofile << endl;
}
map< string, int >::iterator map_siter = text_map->begin();
for ( ; map_siter != text_map->end(); ++map_siter )
{
outfile << (*map_siter).first;
for ( int n = 0; n < 15 - (*map_siter).first.size(); ++n )
{
outfile << ' ';
}
outfile << "出现 " << (*map_siter).second << "\t次" << endl;
} // end of for
cout << "程序已将处理结果写入3_1_1out.txt,该文件保存在当前目录"
<< endl;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -