📄 textquery.h
字号:
#pragma once
#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <utility>
#include <iostream>
#include <fstream>
#include <stddef.h>
#include <ctype.h>
//this system will be used to query any word exists in a text
//first, we will use a vector to store the whole text
//and then, we will use another vector to store the words in the text
//next, we will use a vector to store the array information of each word
//and we will bind them together to form a pair
//we store the pair into a map
//some definition
using namespace std;
typedef pair<short, short> location;
typedef vector<location> loc;
typedef vector<string> text;
typedef pair<text *, loc *> text_loc;
//class definition
class textQuery{
public:
textQuery(){memset(this, 0, sizeof(textQuery));}
static void FilterElements(string felems){filt_elems = felems;}
void QueryText();
void DisplayMapText();
void DisplayTextLocations();
void DoRun()
{
RetriveText();
SeparateText();
FilteText();
SuffixText();
StripCaps();
BuildWordMap();
}
protected:
void RetriveText();
void SeparateText();
void FilteText();
void SuffixText();
void StripCaps();
void BuildWordMap();
private:
text *linesOfText;
text_loc *textLocations;
map<string, loc*> *wordMap;
static string filt_elems;
};
string textQuery::filt_elems("\",.;:!<<)(\\/");
//read the text
void textQuery::RetriveText()
{
string fileName;
char fileName_[100];
cout << "Please input the name of the text:";
cin >> fileName_;
ifstream inputFile(fileName_);
if (!inputFile)
{
cerr << "Oops! Unable to open file" << fileName_ << endl;
cerr << "Quit!";
exit( - 1);
}
else
cout << endl;
linesOfText = new text;
string sText;
while (getline(inputFile, sText))
{
linesOfText->push_back(sText);
}
}
//separate the words
void textQuery::SeparateText()
{
text *separatedWords = new text;
loc *wordsLocatioin = new loc;
for (int textPos = 0; textPos < linesOfText->size(); textPos++)
{
short wordPos = 0;
string textLine = (*linesOfText)[textPos];
string::size_type eol = textLine.length();
string::size_type pos = 0, pre_pos = 0;
while ((pos = textLine.find(' ', pos)) != string::npos)
{
short wordPos = 0;
separatedWords->push_back(
textLine.substr(pre_pos, pos - pre_pos));
wordsLocatioin->push_back(
make_pair(textPos, wordPos));
wordPos++;
pos++;
pre_pos = pos;
}
separatedWords->push_back(
textLine.substr(pre_pos, pos - pre_pos));
wordsLocatioin->push_back(
make_pair(textPos, wordPos));
}
textLocations = new text_loc(separatedWords, wordsLocatioin);
}
//erase those unused symbols like ' or "
void textQuery::FilteText()
{
if (filt_elems.empty())
{
return;
}
text *words = textLocations->first;
text::iterator iter = words->begin();
text::iterator iter_end = words->end();
while (iter != iter_end)
{
string::size_type pos = 0;
if ((pos = (*iter).find_first_of(filt_elems, pos)) != string::npos)
{
(*iter).erase(pos);
}
iter++;
}
}
void textQuery::SuffixText()
{
text *words = textLocations->first;
text::iterator iter = words->begin();
text::iterator iter_end = words->end();
while (iter != iter_end)
{
if ((*iter).size() < 3)
{
iter++;
continue;
}
//string::size_type pos = 0;
if ((*iter)[(*iter).size() - 1] == 's')
{
//suffix_s(*iter);
}
iter++;
}
}
void textQuery::StripCaps()
{
text *words = textLocations->first;
text::iterator iter = words->begin();
text::iterator iter_end = words->end();
string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
while (iter != iter_end)
{
string::size_type pos = 0;
while ((pos = (*iter).find_first_of(caps, pos)) != string::npos)
{
(*iter)[pos] = tolower((*iter)[pos]);
}
iter++;
}
}
void textQuery::BuildWordMap()
{
wordMap = new map<string, loc*>;
set<string> diffWords;
char excludeFileName[] = "exclude.txt";
ifstream exCludeFile(excludeFileName, ios::in);
if (!exCludeFile)
{
static string exWords[] = {
"the","and","but","that","then","are","been",
"can","can't","cannot","could","did","for",
"had","have","him","his","her","its","into",
"were","which","when","with","would"
};
cerr << "Exclusion file not found, use default!" << endl;
copy(exWords, exWords + 25, inserter(diffWords, diffWords.begin()));
}
else
{
//istream_iterator<string, set<string>> inputSet(exCludeFile);
//copy(inputSet, inputSet::)
exit(0);
}
text * words = textLocations->first;
loc * locations = textLocations->second;
register int elemCnt = words->size();
for (int ix = 0; ix < elemCnt; ix++)
{
string sWord((*words)[ix]);
if (sWord.size() < 3 || diffWords.count(sWord))
{
continue;
}
if (!wordMap->count(sWord))
{
loc *ploc = new loc;
ploc->push_back((*locations)[ix]);
wordMap->insert(map<string, loc *>::value_type(sWord, ploc));
}
else
(*wordMap)[sWord]->push_back((*locations)[ix]);
}
}
void textQuery::QueryText()
{
string qWord;
do
{
cout << "Please input the word you want to query, or you input a single letter to quit"<<endl;
cout << "The word you want to query:";
cin >> qWord;
cout << endl;
if (qWord.size() < 2)
{
break;
}
string::size_type pos = 0;
string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
while ((pos = qWord.find_first_of(caps, pos)) != string::npos)
{
qWord[pos] = tolower(qWord[pos]);
}
if (!wordMap->count(qWord))
{
cout << "Sorry, "<<qWord<<" was not found! try another word!"<<endl;
cout << "Entries for " << qWord << " is 0."<<endl;
continue;
}
loc *ploc = (*wordMap)[qWord];
set<short> occurrence_lines;
loc::iterator iter = ploc->begin();
loc::iterator iter_end = ploc->end();
while (iter != iter_end)
{
occurrence_lines.insert(occurrence_lines.end(), (*iter).first);
iter++;
}
int size = occurrence_lines.size();
cout << endl;
cout << qWord << " occurs"
<< size << (size == 1 ? "time" : "times");
cout << endl;
cout << endl;
set<short>::iterator it = occurrence_lines.begin();
for (; it != occurrence_lines.end(); it++)
{
int line = *it;
cout << "\t( line"
<< line + 1 << " ) "
<< (*linesOfText)[line] << endl;
}
cout << endl;
} while(!qWord.empty());
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -