⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 task1.cpp

📁 testing of file to be insert into the application
💻 CPP
字号:
//Name: i'll be back(Parttime)
//Assign 2- Task 1
//Program count the number of unique words

#include <fstream>
#include <iostream>
#include <cstring>
#include <cstdlib>
#include <cctype>
#include <iomanip>
using namespace std;

#define BUFSIZE 10000
#define MAXSIZE 50000

struct Word{
	char text[20];
	int count;
};

void removeNumbersPunctuation(char *,char *);
int countTotalWords(char *);
void sort(Word *,Word *, Word *,int);
void initialize(Word *, int);
void separateWords(Word *,Word *, Word *, char [], int &);

int main(){
	
	//Variables
	
	//For reading of text
	char filename[100]="";
	char filename_output[100]="index_";
	char buf[BUFSIZE];
	char processedBuf[BUFSIZE];
	char alltext[MAXSIZE];
	
	//to count total number of words
	int counterTotalwords;
	
	//to count number of unqiue words
	int totalUniqueWords=0;
		
	//p,ptemp,ptrack : to store the unique words into struct
	//resultptr,resultTempPtr: to store the sorted struct
	Word *p,*ptemp, *pTrack;
	Word *resultptr,*resultTempPtr;
	
	//file streams
	ifstream infile;
	ofstream outfile;
	
	cout << "Please wait while the program loads...\n" << endl;
	
	//Prompt user for filename to read and encode
	while (strlen(filename)==0){
		system("cls");
		cout << "\nPlease enter the filename that you would " 
		 	 << "like to read (Note that the file has to " 
	     	 << "be of same directory as this program:" <<endl;
		cout << "File Name : ";
		fflush(stdin);
		cin.getline(filename,100,'\n');
	}	 
	
	   	        
	//read all the current contacts from txt file
	infile.open(filename);	     	 
	while(!infile.good())
	{
		cout << "File does not exist. Program will exit now." << endl;	  
		exit(1);
	}
	
	cout << "\nCounting words in progress... Please wait." << endl;
	
	//to count total number of words in the file
	while(!infile.eof())
	{
		fflush(stdin);
		infile.getline(buf,BUFSIZE,'\n');
		removeNumbersPunctuation(buf,processedBuf);
		counterTotalwords+=countTotalWords(processedBuf);
		strcat(alltext,processedBuf);
		strcpy(buf," ");
		strcpy(processedBuf," ");
	}
	
	
	//read the file again and count the number of occurance for each word
	p=new Word[counterTotalwords];
	if(p == 0)
    {
          //allocating memory failed
		  cout << "Error allocating memory. Program will exit now." << endl;
          exit(1);
    } 

	//to keep track of the pointer p
	ptemp=p;
	pTrack=p;
	
	//initialize all to default values
	initialize(p,counterTotalwords);
	
	//Separate the whole text and count the uniqueness
	p=ptemp;
	separateWords(p,ptemp,pTrack,alltext,totalUniqueWords);
	
	//write to a file
	strcat(filename_output,filename);
	outfile.open(filename_output);   
	
	outfile << "Total:" << counterTotalwords << endl;
	outfile << "Total Unique Words:" << totalUniqueWords << endl;
	
	/* output the total unique words with
	   all the words and its occurance (not sorted)
	p=pTrack;
	for(int z=0;z<(totalUniqueWords);z++){
		outfile << p->text << ":" << p->count << endl;	
		p++;
	}
	
	outfile << "=======================================" << endl;
	*/
	
	
	//sort the struct
	resultptr=new Word[totalUniqueWords];
	if(resultptr == 0)
    {
          //allocating memory failed
		  cout << "Error allocating memory" << endl;
          exit(1);
    } 
	
	resultTempPtr=resultptr;
	//initialize all to default values
	initialize(resultptr,totalUniqueWords);
	
	//sort the struct
	sort(p,pTrack,resultptr,totalUniqueWords);
	resultptr=resultTempPtr;
	
	//Print out the least 50% of the words
	for(int z=0;z<(totalUniqueWords/2);z++){
		outfile << resultptr->text << "," << resultptr->count << endl;	
		resultptr++;
	}
	
	//close filestream
	infile.close();
	outfile.close();
	
	delete resultTempPtr;
	delete pTrack;
	
	cout << "\nCounting of words has completed."
		 << "\n\nResults will be stored in "
		 << filename_output 
		 << "\n\nProgram will exit now."
		 << endl;	 
	return 0;
}

//######################################################
//# To remove numbers and Punctutation marks in inBuf
//######################################################

void removeNumbersPunctuation(char *inBuf, char *outBuf)
{
	while(*inBuf!='\0')
	{
		if((isalpha(*inBuf))||(*inBuf==' '))
		{
			if(!(*inBuf==' ' && ((*outBuf-1)==' '))){
				*outBuf=tolower(*inBuf);
				*outBuf++;
			}
		}
		else
		{
			//means if the char is not a alphabet, 
			//i will replace with a space
			*outBuf=' ';
			*outBuf++;
		}
		*inBuf++;
	}
	*outBuf='\0';
	
}

//######################################################
//# To count total number of words in buf
//######################################################

int countTotalWords(char *buf){
	
	int counter=0;
	if(*buf!='\0'){
		counter=1;
		for(int i=0;i<strlen(buf);i++){
			if(buf[i]==' ' && buf[i+1]!=' ')
				counter++;
		}
	}
	return counter;
	
}


//######################################################
//# To sort the words according to count ASC order
//######################################################
void sort(Word *p,Word *pTrack, Word *resultptr,int totalUniqueWords)
{
	int smallest=10000;
	char word[20]="";
	Word *temp;
	for(int k=0;k<totalUniqueWords;k++)
	{
		p=pTrack;
		smallest=10000;
		strcpy(word,"");
		for(int j=0;j<totalUniqueWords;j++)
		{
			if(p->count!=0 && p->count<smallest){
				smallest=p->count;
				strcpy(word,p->text);
				temp=p;
			}
			p++;	
		}
		resultptr->count=smallest;
		strcpy(resultptr->text,word);
		resultptr++;
		temp->count=0;	  	  	   
	}
}

//######################################################
//# To initialize the struct in p to default values
//######################################################

void initialize(Word *p, int size)
{
	for(int k=0;k<size;k++){
			strcpy(p->text," ");
			p->count=0;
			p++;
		}

}

//######################################################
//# To take out the words in alltext and count the
//# number of occurance of each word
//######################################################
void separateWords(Word *p,Word *ptemp, Word *pTrack, char alltext[], int &totalUniqueWords)
{
	char *tokenPtr;
	char tempWord[20];
	bool exist=false;
	 
	tokenPtr=strtok(alltext," ");
	while(tokenPtr!=NULL)
	{
		ptemp=pTrack;
		strcpy(tempWord,tokenPtr);
		for(int j=0;j<totalUniqueWords;j++){
			if(strcmp(ptemp->text,tempWord)==0)
			{
				(ptemp->count)++;	 
				exist=true;
			}
			ptemp++;
			
		}
		
		if(!exist)
		{
			strcpy(p->text,tempWord);
			p->count=1;
			totalUniqueWords++;
			p++;
		}
		exist=false;	 	 
		tokenPtr=strtok(NULL," ");
	}
}







 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -