📄 task1.cpp
字号:
//Name: i'll be back(Parttime)
//Assign 2- Task 1
//Program count the number of unique words
#include <fstream>
#include <iostream>
#include <cstring>
#include <cstdlib>
#include <cctype>
#include <iomanip>
using namespace std;
#define BUFSIZE 10000
#define MAXSIZE 50000
struct Word{
char text[20];
int count;
};
void removeNumbersPunctuation(char *,char *);
int countTotalWords(char *);
void sort(Word *,Word *, Word *,int);
void initialize(Word *, int);
void separateWords(Word *,Word *, Word *, char [], int &);
int main(){
//Variables
//For reading of text
char filename[100]="";
char filename_output[100]="index_";
char buf[BUFSIZE];
char processedBuf[BUFSIZE];
char alltext[MAXSIZE];
//to count total number of words
int counterTotalwords;
//to count number of unqiue words
int totalUniqueWords=0;
//p,ptemp,ptrack : to store the unique words into struct
//resultptr,resultTempPtr: to store the sorted struct
Word *p,*ptemp, *pTrack;
Word *resultptr,*resultTempPtr;
//file streams
ifstream infile;
ofstream outfile;
cout << "Please wait while the program loads...\n" << endl;
//Prompt user for filename to read and encode
while (strlen(filename)==0){
system("cls");
cout << "\nPlease enter the filename that you would "
<< "like to read (Note that the file has to "
<< "be of same directory as this program:" <<endl;
cout << "File Name : ";
fflush(stdin);
cin.getline(filename,100,'\n');
}
//read all the current contacts from txt file
infile.open(filename);
while(!infile.good())
{
cout << "File does not exist. Program will exit now." << endl;
exit(1);
}
cout << "\nCounting words in progress... Please wait." << endl;
//to count total number of words in the file
while(!infile.eof())
{
fflush(stdin);
infile.getline(buf,BUFSIZE,'\n');
removeNumbersPunctuation(buf,processedBuf);
counterTotalwords+=countTotalWords(processedBuf);
strcat(alltext,processedBuf);
strcpy(buf," ");
strcpy(processedBuf," ");
}
//read the file again and count the number of occurance for each word
p=new Word[counterTotalwords];
if(p == 0)
{
//allocating memory failed
cout << "Error allocating memory. Program will exit now." << endl;
exit(1);
}
//to keep track of the pointer p
ptemp=p;
pTrack=p;
//initialize all to default values
initialize(p,counterTotalwords);
//Separate the whole text and count the uniqueness
p=ptemp;
separateWords(p,ptemp,pTrack,alltext,totalUniqueWords);
//write to a file
strcat(filename_output,filename);
outfile.open(filename_output);
outfile << "Total:" << counterTotalwords << endl;
outfile << "Total Unique Words:" << totalUniqueWords << endl;
/* output the total unique words with
all the words and its occurance (not sorted)
p=pTrack;
for(int z=0;z<(totalUniqueWords);z++){
outfile << p->text << ":" << p->count << endl;
p++;
}
outfile << "=======================================" << endl;
*/
//sort the struct
resultptr=new Word[totalUniqueWords];
if(resultptr == 0)
{
//allocating memory failed
cout << "Error allocating memory" << endl;
exit(1);
}
resultTempPtr=resultptr;
//initialize all to default values
initialize(resultptr,totalUniqueWords);
//sort the struct
sort(p,pTrack,resultptr,totalUniqueWords);
resultptr=resultTempPtr;
//Print out the least 50% of the words
for(int z=0;z<(totalUniqueWords/2);z++){
outfile << resultptr->text << "," << resultptr->count << endl;
resultptr++;
}
//close filestream
infile.close();
outfile.close();
delete resultTempPtr;
delete pTrack;
cout << "\nCounting of words has completed."
<< "\n\nResults will be stored in "
<< filename_output
<< "\n\nProgram will exit now."
<< endl;
return 0;
}
//######################################################
//# To remove numbers and Punctutation marks in inBuf
//######################################################
void removeNumbersPunctuation(char *inBuf, char *outBuf)
{
while(*inBuf!='\0')
{
if((isalpha(*inBuf))||(*inBuf==' '))
{
if(!(*inBuf==' ' && ((*outBuf-1)==' '))){
*outBuf=tolower(*inBuf);
*outBuf++;
}
}
else
{
//means if the char is not a alphabet,
//i will replace with a space
*outBuf=' ';
*outBuf++;
}
*inBuf++;
}
*outBuf='\0';
}
//######################################################
//# To count total number of words in buf
//######################################################
int countTotalWords(char *buf){
int counter=0;
if(*buf!='\0'){
counter=1;
for(int i=0;i<strlen(buf);i++){
if(buf[i]==' ' && buf[i+1]!=' ')
counter++;
}
}
return counter;
}
//######################################################
//# To sort the words according to count ASC order
//######################################################
void sort(Word *p,Word *pTrack, Word *resultptr,int totalUniqueWords)
{
int smallest=10000;
char word[20]="";
Word *temp;
for(int k=0;k<totalUniqueWords;k++)
{
p=pTrack;
smallest=10000;
strcpy(word,"");
for(int j=0;j<totalUniqueWords;j++)
{
if(p->count!=0 && p->count<smallest){
smallest=p->count;
strcpy(word,p->text);
temp=p;
}
p++;
}
resultptr->count=smallest;
strcpy(resultptr->text,word);
resultptr++;
temp->count=0;
}
}
//######################################################
//# To initialize the struct in p to default values
//######################################################
void initialize(Word *p, int size)
{
for(int k=0;k<size;k++){
strcpy(p->text," ");
p->count=0;
p++;
}
}
//######################################################
//# To take out the words in alltext and count the
//# number of occurance of each word
//######################################################
void separateWords(Word *p,Word *ptemp, Word *pTrack, char alltext[], int &totalUniqueWords)
{
char *tokenPtr;
char tempWord[20];
bool exist=false;
tokenPtr=strtok(alltext," ");
while(tokenPtr!=NULL)
{
ptemp=pTrack;
strcpy(tempWord,tokenPtr);
for(int j=0;j<totalUniqueWords;j++){
if(strcmp(ptemp->text,tempWord)==0)
{
(ptemp->count)++;
exist=true;
}
ptemp++;
}
if(!exist)
{
strcpy(p->text,tempWord);
p->count=1;
totalUniqueWords++;
p++;
}
exist=false;
tokenPtr=strtok(NULL," ");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -