📄 doclist.h
字号:
#ifndef __DOCLIST_H_
#define __DOCLIST_H_
#pragma warning( disable : 4786 )
#include <vector>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <fstream>
#include <algorithm>
using namespace std;
#define MAX_DOC_FEATURE 400*1024
#define MAX_LABEL_LEN 30
enum {PARSE_NONE_FLAG = 0x0, PARSE_ID_FLAG = 0x1, PARSE_LABEL_FLAG = 0x2, PARSE_BOTH_FLAG = 0x3};
typedef struct {
long wnum; // word number
float weight; // word weight
} WORDITEM;
typedef struct
{
long lDocId;
set<int> setDocCat;
set<int> setPredCat;
double dbPredValue;
bool bIsEmpty;
}SDoc;
typedef struct {
long docnum;
long DocId; //keep unchanged
long queryid; //
double costfactor; //
double twonorm_sq; //squared euclidian length
WORDITEM *content; //The content/values pairs
int dim_content;
} DOC;
class CDocList
{
public:
void ReadVector( string strFile);
void ReadDocList(string strFile);
protected:
private:
int MaxWordsCount;
set<long> setWordId;
set<int> setCat;
//map<long,set<int> > mapDoc2SetLabel;
public:
DOC *docs;
map<long,long> mapDocId_Pos;//docid->index in docs array
//vector< set<int> > vSDocCat;
vector <SDoc> vSDoc;
};
void *my_malloc(size_t size);
float sprod_ss(WORDITEM *a, WORDITEM *b);
int ReadDoc( string& sLine, DOC& test_doc );
//sLine begins with non sSep character
template<typename T> void GetvIdFromStr( string& sLine, string sSep, vector<T>& vId )
{
string sId;
int iIndexId;
vId.clear();
int posBegin = 0;
int pos = sLine.find( sSep, posBegin );
while ( pos>0 ) {
sId = sLine.substr( posBegin, pos-posBegin );
iIndexId = atoi( (char*)sId.c_str() );
vId.push_back( iIndexId );
posBegin = pos+1;
pos = sLine.find( sSep, posBegin );
}
sId = sLine.substr( posBegin, sLine.size()-posBegin );
iIndexId = atoi( (char*)sId.c_str() );
vId.push_back( iIndexId );
}
template<typename T> void ReadIdSetMap( map<T,set<T > >& mapIdMap, string strFile)
{
mapIdMap.clear();
cout << "Reading Id Map..." << endl;
ifstream ifile((char*)strFile.c_str());
int iPageId;
string sLine;
string sId;
int iIndexIdNull = 0;
int iLineNum = 0;
size_t pos, posBegin = 0;
while ( getline( ifile, sLine) ) {
iLineNum++;
pos = sLine.find( '\t', posBegin );
if ( pos<0 ) {
iIndexIdNull++;
continue;
}
sId = sLine.substr( 0, pos );
iPageId = atoi( (char*)sId.c_str() );
string sLast = sLine.substr( pos+1, sLine.size()-pos-1 );
vector<int> vId;
GetvIdFromStr( sLast, "\t", vId );
for ( unsigned i=0;i<vId.size();i++ )
mapIdMap[ iPageId ].insert( vId[i] );
vId.clear();
}
ifile.close();
cout << iLineNum << " lines processed, " << iIndexIdNull << " without indexid. " << endl;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -