⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rongqi.cpp

📁 我们没有按照过程化 程序设计的方式把数据结构和函数分开而是引入了一个类TextQuery 来封装它们我们将 在后面章节中更详细地了解类的使用文本的表示也做了修改以便能够在当前可用的编 259 第
💻 CPP
字号:
// 标准库头文件
#include <algorithm>
#include <string>
#include <vector>
#include <utility>
#include <map>
#include <set>
// 标准C++之前的iostream 头文件
#include <fstream.h>
// 标准C 头文件
#include <stddef.h>
#include <ctype.h>
// typedefs 使声明更简单
typedef pair<short,short> location;
typedef vector<location,allocator> loc;
typedef vector<string,allocator> text;
typedef pair<text*,loc*> text_loc;
class TextQuery {
public:
TextQuery() { memset( this, 0, sizeof( TextQuery )); }
static void
filter_elements( string felems ) { filt_elems = felems; }
void query_text();
void display_map_text();
void display_text_locations();
void doit() {
retrieve_text();
separate_words();
filter_text();
suffix_text();
strip_caps();
build_word_map();
}
private:
void retrieve_text();
void separate_words();
void filter_text();
void strip_caps();
void suffix_text();
void suffix_s( string& );
void build_word_map();
private:
vector<string,allocator> *lines_of_text;
260 第六章 抽象容器类型
text_loc *text_locations;
map< string,loc*,
less<string>,allocator> *word_map;
static string filt_elems;
};
string TextQuery::filt_elems( "\",.;:!<<)(\\/" );
int main()
{
TextQuery tq;
tq.doit();
tq.query_text();
tq.display_map_text();
}
void
TextQuery::
retrieve_text()
{
string file_name;
cout << "please enter file name: ";
cin >> file_name;
ifstream infile( file_name.c_str(), ios::in );
if ( !infile ) {
cerr << "oops! unable to open file "
<< file_name << " -- bailing out!\n";
exit( - 1 );
}
else cout << "\n";
lines_of_text = new vector<string,allocator>;
string textline;
while ( getline( infile, textline, '\n' ))
lines_of_text->push_back( textline );
}
void
TextQuery::
separate_words()
{
vector<string,allocator> *words = new vector<string,allocator>;
vector<location,allocator> *locations =
new vector<location,allocator>;
for ( short line_pos = 0; line_pos < lines_of_text->size();
line_pos++ )
{
short word_pos = 0;
string textline = (*lines_of_text)[ line_pos ];
string::size_type eol = textline.length();
string::size_type pos = 0, prev_pos = 0;
261 第六章 抽象容器类型
while (( pos = textline.find_first_of( ' ', pos ))
!= string::npos )
{
words->push_back(
textline.substr( prev_pos, pos - prev_pos ));
locations->push_back(
make_pair( line_pos, word_pos ));
word_pos++; pos++; prev_pos = pos;
}
words->push_back(
textline.substr( prev_pos, pos - prev_pos ));
locations ->push_back(make_pair(line_pos,word_pos));
}
text_locations = new text_loc( words, locations );
}
void
TextQuery::
filter_text()
{
if ( filt_elems.empty() )
return;
vector<string,allocator> *words = text_locations ->first;
vector<string,allocator>::iterator iter = words ->begin();
vector<string,allocator>::iterator iter_end = words ->end();
while ( iter != iter_end )
{
string::size_type pos = 0;
while (( pos = (*iter).find_first_of( filt_elems, pos ))
!= string::npos )
(*iter).erase(pos,1);
++iter;
}
}
void
TextQuery::
suffix_text()
{
vector<string,allocator> *words = text_locations ->first;
vector<string,allocator>::iterator iter = words ->begin();
vector<string,allocator>::iterator iter_end = words ->end();
while ( iter != iter_end )
{
if ( (*iter).size() <= 3 )
{ iter++; continue; }
262 第六章 抽象容器类型
if ( (*iter)[ (*iter).size()- 1 ] == 's' )
suffix_s( *iter );
// 其他的后缀处理放在这里
iter++;
}
}
void
TextQuery::
suffix_s( string &word )
{
string::size_type spos = 0;
string::size_type pos3 = word.size()- 3;
// "ous", "ss", "is", "ius"
string suffixes( "oussisius" );
if ( ! word.compare( pos3, 3, suffixes, spos, 3 ) ||
! word.compare( pos3, 3, suffixes, spos+6, 3 ) ||
! word.compare( pos3+1, 2, suffixes, spos+2, 2 ) ||
! word.compare( pos3+1, 2, suffixes, spos+4, 2 ))
return;
string ies( "ies" );
if ( ! word.compare( pos3, 3, ies ))
{
word.replace( pos3, 3, 1, 'y' );
return;
}
string ses( "ses" );
if ( ! word.compare( pos3, 3, ses ))
{
word.erase( pos3+1, 2 );
return;
}
// 去掉尾部的 's'
word.erase( pos3+2 );
// watch out for "'s"
if ( word[ pos3+1 ] == '\'' )
word.erase( pos3+1 );
}
void
TextQuery::
strip_caps()
{
vector<string,allocator> *words = text_locations ->first;
vector<string,allocator>::iterator iter = words ->begin();
vector<string,allocator>::iterator iter_end = words ->end();
263 第六章 抽象容器类型
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
while ( iter != iter_end ) {
string::size_type pos = 0;
while (( pos = (*iter).find_first_of( caps, pos ))
!= string::npos )
(*iter)[ pos ] = tolower( (*iter)[pos] );
++iter;
}
}
void
TextQuery::
build_word_map()
{
word_map = new map< string, loc*, less<string>, allocator >;
typedef map<string,loc*,less<string>,allocator>::value_type
value_type;
typedef set<string,less<string>,allocator>::difference_type
diff_type;
set<string,less<string>,allocator> exclusion_set;
ifstream infile( "exclusion_set" );
if ( !infile )
{
static string default_excluded_words[25] = {
"the","and","but","that","then","are","been",
"can","can't","cannot","could","did","for",
"had","have","him","his","her","its","into",
"were","which","when","with","would"
};
cerr << "warning! unable to open word exclusion file! -- "
<< "using default set\n";
copy( default_excluded_words, default_excluded_words+25,
inserter( exclusion_set, exclusion_set.begin() ));
}
else {
istream_iterator< string, diff_type >
input_set( infile ), eos;
copy( input_set, eos,
inserter( exclusion_set, exclusion_set.begin() ));
}
// 遍历单词, 输入键/值对
vector<string,allocator> *text_words = text_locations ->first;
vector<location,allocator> *text_locs = text_locations ->second;
264 第六章 抽象容器类型
register int elem_cnt = text_words ->size();
for ( int ix = 0; ix < elem_cnt; ++ix )
{
string textword = ( *text_words )[ ix ];
if ( textword.size() < 3 ||
exclusion_set.count( textword ))
continue;
if ( ! word_map->count((*text_words)[ix] ))
{ // 没有, 添加:
loc *ploc = new vector<location,allocator>;
ploc->push_back( (*text_locs)[ix] );
word_map->insert( value_type( (*text_words)[ix], ploc ));
}
else (*word_map)[(*text_words)[ix]]->
push_back( (*text_locs)[ix] );
}
}
void
TextQuery::
query_text()
{
string query_text;
do {
cout << "enter a word against which to search the text.\n"
<< "to quit, enter a single character ==> ";
cin >> query_text;
if ( query_text.size() < 2 ) break;
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
string::size_type pos = 0;
while (( pos = query_text.find_first_of( caps, pos ))
!= string::npos )
query_text[ pos ] = tolower( query_text[pos] );
// 如果对map 索引, 输入query_text, 如无
// 说明没有要找的词
if ( !word_map->count( query_text )) {
cout << "\nSorry. There are no entries for "
<< query_text << ".\n\n";
continue;
}
loc *ploc = (*word_map)[ query_text ];
set<short,less<short>,allocator> occurrence_lines;
loc::iterator liter = ploc->begin(),
liter_end = ploc->end();
while ( liter != liter_end ) {
occurrence_lines.insert(
265 第六章 抽象容器类型
occurrence_lines.end(), (*liter).first);
++liter;
}
register int size = occurrence_lines.size();
cout << "\n" << query_text
<< " occurs " << size
<< (size == 1 << " time:" : " times:")
<< "\n\n";
set<short,less<short>,allocator>::iterator
it=occurrence_lines.begin();
for ( ; it != occurrence_lines.end(); ++it ) {
int line = *it;
cout << "\t( line "
// 不要用从 0 开始有
// 文本行把用户弄迷糊了
<< line + 1 << " ) "
<< (*lines_of_text)[line] << endl;
}
cout << endl;
}
while ( ! query_text.empty() );
cout << "Ok, bye!\n";
}
void
TextQuery::
display_map_text()
{
typedef map<string,loc*,less<string>,allocator> map_text;
map_text::iterator iter = word_map->begin(),
iter_end = word_map->end();
while ( iter != iter_end ) {
cout << "word: " << (*iter).first << " (";
int loc_cnt = 0;
loc *text_locs = (*iter).second;
loc::iterator liter = text_locs->begin(),
liter_end = text_locs->end();
while ( liter != liter_end )
{
if ( loc_cnt )
cout << ",";
else ++loc_cnt;
cout << "(" << (*liter).first
<< "," << (*liter).second << ")";
++liter;
}
266 第六章 抽象容器类型
cout << ")\n";
++iter;
}
cout << endl;
}
void
TextQuery::
display_text_locations()
{
vector<string,allocator> *text_words = text_locations ->first;
vector<location,allocator> *text_locs = text_locations ->second;
register int elem_cnt = text_words ->size();
if ( elem_cnt != text_locs->size() )
{
cerr << "oops! internal error: word and position vectors "
<< "are of unequal size \n"
<< "words: " << elem_cnt << " "
<< "locs: " << text_locs->size()
<< " -- bailing out!\n";
exit( - 2 );
}
for ( int ix = 0; ix < elem_cnt; ix++ )
{
cout << "word: " << (*text_words)[ ix ] << "\t"
<< "location: ("
<< (*text_locs)[ix].first << ","
<< (*text_locs)[ix].second << ")"
<< "\n";
}
cout << endl;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -