📄 main.cpp
字号:
#include <hash_map>
#include <fstream>
#include <string>
#include <Windows.h>
#include <iostream>
#include <algorithm>
using namespace std ;
using stdext::hash_map ;
class Item
{
public:
string py_ ;
wstring word_ ;
double frequency_ ;
Item(string py, wstring word, double frequency):py_(py), word_(word), frequency_(frequency)
{
}
bool operator<(Item b)
{
bool result ;
if (strcmp(py_.c_str(), b.py_.c_str()) == 0)
{
result = ( frequency_ > b.frequency_ ) ;
}
else
{
result = strcmp(py_.c_str(), b.py_.c_str())<0 ;
}
return result ;
}
bool operator == (Item b)
{
return word_ == b.word_ ;
}
};
void main()
{
typedef vector<string> PYTable ;
hash_map<wstring, PYTable> word2py ;
ifstream input1("..\\py.txt") ;
string py ;
wstring word ;
string temp ;
while(input1 >> py)
{
input1 >> temp ;
DWORD dwNum = MultiByteToWideChar (CP_ACP, 0, temp.c_str(), -1, NULL, 0);
wchar_t *pwText = new wchar_t[dwNum];
MultiByteToWideChar (CP_ACP, 0, temp.c_str(), -1, pwText, dwNum);
for (size_t i = 0; i<dwNum-1; i++)
{
word = pwText[i] ;
word2py[word].push_back(py) ;
}
}
input1.close() ;
ifstream input2("..\\fre.txt") ;
if( !input2.is_open() )
{
cout << "error" << endl;
}
double frequency ;
vector<Item> py_word_frequency_table ;
getline(input2, temp) ; //ignore the first line
while (input2 >> temp)
{
DWORD dwNum = MultiByteToWideChar (CP_ACP, 0, temp.c_str(), -1, NULL, 0);
wchar_t *pwText = new wchar_t[dwNum];
MultiByteToWideChar (CP_ACP, 0, temp.c_str(), -1, pwText, dwNum);
word = pwText[0] ;
input2 >> frequency >> frequency >> frequency ;
hash_map<wstring, PYTable>::iterator it = word2py.find(word) ;
if ( it!=word2py.end() )
{
PYTable& pyTable = it->second ;
for (PYTable::iterator itr = pyTable.begin(); itr!=pyTable.end(); itr++)
{
Item item(*itr, word, frequency) ;
py_word_frequency_table.push_back(item) ;
}
}
input2 >> frequency ;
}
input2.close() ;
//add 0 frequency word
for(hash_map<wstring, PYTable>::iterator hash_it = word2py.begin(); hash_it!=word2py.end(); hash_it++)
{
Item tempItem("", hash_it->first, 0) ;
vector<Item>::iterator it = find(py_word_frequency_table.begin(), py_word_frequency_table.end(), tempItem);
if (it == py_word_frequency_table.end())
{
PYTable& t = hash_it->second ;
for (size_t i = 0; i!=t.size(); i++)
{
py_word_frequency_table.push_back(Item(t[i], hash_it->first, 0.0)) ;
}
}
}
sort(py_word_frequency_table.begin(), py_word_frequency_table.end()) ;
ofstream output("..\\py_word_fre_table.txt") ;
for (size_t i = 0; i<py_word_frequency_table.size(); i++)
{
Item& item = py_word_frequency_table[i] ;
DWORD dwNum = WideCharToMultiByte(CP_OEMCP, NULL, item.word_.c_str(), -1, NULL, 0, NULL, FALSE);
char *psText;
psText = new char[dwNum];
WideCharToMultiByte (CP_ACP, NULL, item.word_.c_str(), -1, psText, dwNum, NULL, FALSE);
string w(psText) ;
delete []psText;
output << item.py_ << " " << w<< " " << item.frequency_ << endl;
}
output.close() ;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -