⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 textquery.h

📁 一个简单的字符串查询程序
💻 H
字号:
#pragma once

#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include <string>
#include <utility>
#include <iostream>

#include <fstream>

#include <stddef.h>
#include <ctype.h>

//this system will be used to query any word exists in a text
//first, we will use a vector to store the whole text
//and then, we will use another vector to store the words in the text
//next, we will use a vector to store the array information of each word
//and we will bind them together to form a pair
//we store the pair into a map

//some definition
using namespace std;
typedef  pair<short, short> location;
typedef  vector<location> loc;
typedef  vector<string> text;
typedef  pair<text *, loc *> text_loc;

//class definition
class textQuery{
public:
	textQuery(){memset(this, 0, sizeof(textQuery));}

	static void FilterElements(string felems){filt_elems = felems;}
	void QueryText();
	void DisplayMapText();
	void DisplayTextLocations();
	void DoRun()
	{
		RetriveText();
		SeparateText();
		FilteText();
		SuffixText();
		StripCaps();
		BuildWordMap();
	}

protected:
	void RetriveText();
	void SeparateText();
	void FilteText();
	void SuffixText();
	void StripCaps();
	void BuildWordMap();

private:
	text *linesOfText;
	text_loc *textLocations;
	map<string, loc*> *wordMap;
	static string filt_elems;
};

string textQuery::filt_elems("\",.;:!<<)(\\/");



//read the text
void textQuery::RetriveText()
{
	string fileName;
	char fileName_[100];
	cout << "Please input the name of the text:";
	cin >> fileName_;
	ifstream inputFile(fileName_);
	if (!inputFile)
	{
		cerr << "Oops! Unable to open file" << fileName_ << endl;
		cerr << "Quit!";
		exit( - 1);
	}
	else
		cout << endl;

	linesOfText = new text;
	string sText;
	while (getline(inputFile, sText))
	{
		linesOfText->push_back(sText);
	}
}


//separate the words
void textQuery::SeparateText()
{
	text *separatedWords = new text;
	loc *wordsLocatioin = new loc;

	for (int textPos = 0; textPos < linesOfText->size(); textPos++)
	{
		short wordPos = 0;
		string textLine = (*linesOfText)[textPos];
		string::size_type eol = textLine.length();
		string::size_type pos = 0, pre_pos = 0;

		while ((pos = textLine.find(' ', pos)) != string::npos)
		{
			short wordPos = 0;
			separatedWords->push_back(
				textLine.substr(pre_pos, pos - pre_pos));
			wordsLocatioin->push_back(
				make_pair(textPos, wordPos));
			wordPos++;
			pos++;
			pre_pos = pos;
		}

		separatedWords->push_back(
			textLine.substr(pre_pos, pos - pre_pos));
		wordsLocatioin->push_back(
			make_pair(textPos, wordPos));
	}

	textLocations = new text_loc(separatedWords, wordsLocatioin);
}

//erase those unused symbols like ' or "
void textQuery::FilteText()
{
	if (filt_elems.empty())
	{
		return;
	}


	text *words = textLocations->first;
	text::iterator iter = words->begin();
	text::iterator iter_end = words->end();

	while (iter != iter_end)
	{
		string::size_type pos = 0;
		if ((pos = (*iter).find_first_of(filt_elems, pos)) != string::npos)
		{
			(*iter).erase(pos);
		}

		iter++;
	}
}

void textQuery::SuffixText()
{
	text *words = textLocations->first;
	text::iterator iter = words->begin();
	text::iterator iter_end = words->end();

	while (iter != iter_end)
	{
		if ((*iter).size() < 3)
		{
			iter++;
			continue;
		}

		//string::size_type pos = 0;
		if ((*iter)[(*iter).size() - 1] == 's')
		{
			//suffix_s(*iter);
		}

		iter++;
	}
}

void textQuery::StripCaps()
{
	text *words = textLocations->first;
	text::iterator iter = words->begin();
	text::iterator iter_end = words->end();

	string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
	while (iter != iter_end)
	{
		string::size_type pos = 0;
		while ((pos = (*iter).find_first_of(caps, pos)) != string::npos)
		{
			(*iter)[pos] = tolower((*iter)[pos]);
		}
		iter++;
	}
}

void textQuery::BuildWordMap()
{
	wordMap = new map<string, loc*>;

	set<string> diffWords;

	char excludeFileName[] = "exclude.txt";
	ifstream exCludeFile(excludeFileName, ios::in);
	if (!exCludeFile)
	{
		static string exWords[] = {
			"the","and","but","that","then","are","been",
			"can","can't","cannot","could","did","for",
			"had","have","him","his","her","its","into",
			"were","which","when","with","would"
		};

		cerr << "Exclusion file not found, use default!" << endl;
		copy(exWords, exWords + 25, inserter(diffWords, diffWords.begin()));
	}
	else
	{
		//istream_iterator<string, set<string>> inputSet(exCludeFile);
		//copy(inputSet, inputSet::)
		exit(0);
	}
	
	text * words = textLocations->first;
	loc * locations = textLocations->second;

	register int elemCnt = words->size();
	for (int ix = 0; ix < elemCnt; ix++)
	{
		string sWord((*words)[ix]);
		if (sWord.size() < 3 || diffWords.count(sWord))
		{
			continue;
		}

		if (!wordMap->count(sWord))
		{
			loc *ploc = new loc;
			ploc->push_back((*locations)[ix]);
			wordMap->insert(map<string, loc *>::value_type(sWord, ploc));
		}
		else
			(*wordMap)[sWord]->push_back((*locations)[ix]);
	}
}

void textQuery::QueryText()
{
	string qWord;
	do 
	{
		cout << "Please input the word you want to query, or you input a single letter to quit"<<endl;
		cout << "The word you want to query:";
		cin >> qWord;
		cout << endl;

		if (qWord.size() < 2)
		{
			break;
		}
		string::size_type pos = 0;
		string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
		while ((pos = qWord.find_first_of(caps, pos)) != string::npos)
		{
			qWord[pos] = tolower(qWord[pos]);
		}

		if (!wordMap->count(qWord))
		{
			cout << "Sorry, "<<qWord<<" was not found! try another word!"<<endl;
			cout << "Entries for " << qWord << " is 0."<<endl;
			continue;
		}
		
		loc *ploc = (*wordMap)[qWord];
		set<short> occurrence_lines;
		loc::iterator iter = ploc->begin();
		loc::iterator iter_end = ploc->end();

		while (iter != iter_end)
		{
			occurrence_lines.insert(occurrence_lines.end(), (*iter).first);
			iter++;
		}

		int size = occurrence_lines.size();
		cout << endl;
		cout << qWord << " occurs" 
			 << size << (size == 1 ? "time" : "times");
		cout << endl;
		cout << endl;

		set<short>::iterator it = occurrence_lines.begin();

		for (; it != occurrence_lines.end(); it++)
		{
			int line = *it;
			cout << "\t( line"
				 << line + 1 << " ) "
				 << (*linesOfText)[line] << endl;
		}

		cout << endl;

	} while(!qWord.empty());
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -