📄 tseries.head.hpp
字号:
} template<class T, template<class U, class = allocator<U> > class Seq> friend tseries* cbind(Seq<T>& seq, const bool intersection = true) { tseries *ans; // empty sequence if(seq.size()==0) { return new tseries; } // sequence w/ only 1 element // make copy of tseries and return if(seq.size()==1) { typename Seq<T>::iterator tsp_it = seq.begin(); return new tseries(**tsp_it); } // count total columns (need to know to alloc space for answer) unsigned int total_cols = 0; for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) { total_cols += (*tsp_it)->cols; } if(intersection) { // find tseries w/ max number of rows // since we have to alloc temp buffer to store intersection of dates // it needs to be as big as the biggest tseries unsigned int iDateRows = 0; for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) { iDateRows = ( (*tsp_it)->rows > iDateRows ) ? (*tsp_it)->rows : iDateRows; } //cout << "DBG: iDateRows" << iDateRows << endl; // alloc space to store intersecting dates // this sucks but set_intersection cannot use same memory as input buffer and output buffer // so we have to alloc this twice DateT *in_buffer = new DateT[iDateRows]; DateT *out_buffer = new DateT[iDateRows]; DateT *switch_buff; // copy dates of series 0 into input buffer memcpy(in_buffer, (*seq.begin())->dates, (*seq.begin())->rows * sizeof(DateT) ); unsigned int in_size = (*seq.begin())->rows; DateT * last_element; // go through series 1 to N (skipping series 0, since those dates are already in iDates) for(typename Seq<T>::iterator tsp_it = seq.begin()+1; tsp_it != seq.end(); tsp_it++) { //cout << "dts2: " <<(*tsp_it)->dates << endl; //cout << "rows2: " <<(*tsp_it)->rows << endl; last_element = set_intersection(in_buffer, in_buffer + in_size, (*tsp_it)->dates, (*tsp_it)->dates + (*tsp_it)->rows, out_buffer); // now switch buffers (instead of copying over to other buffer) // new size of input buffer (because input buffer for next tsp_it will be this out_buffer in_size = distance(out_buffer,last_element); // switch input and output buffers switch_buff = in_buffer; in_buffer = out_buffer; out_buffer = switch_buff; } // number of rows is size of iDates ans = new tseries(in_size, total_cols); ans->setDates(out_buffer); delete []in_buffer; delete []out_buffer; } else { set<DateT> ans_dates; // collect union of dates using set to find union for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) { DateT* dp = (*tsp_it)->getDates(); for(unsigned int i = 0; i < (*tsp_it)->rows; i++) { ans_dates.insert(dp[i]); } } ans = new tseries(ans_dates.size(), total_cols); ans->setDates(ans_dates); } // add elements to answer unsigned int ans_col = 0; for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) { RangeSpecifier<DateT> r(ans->dates,(*tsp_it)->dates,ans->rows,(*tsp_it)->rows); for(unsigned int c = 0; c < (*tsp_it)->cols; c++) { double* ans_col_ptr = ans->getCol(ans_col); double* source_col_ptr = (*tsp_it)->getCol(c); for(unsigned int row = 0; row < r.size; row++) { ans_col_ptr[ r.arg1[row] ] = source_col_ptr[ r.arg2[row] ]; } ans_col++; } } // set colnames ans->setColNames(mkColNms(seq)); return ans; } template<class T, template<class U, class = allocator<U> > class Seq> friend vector<string> mkColNms(Seq<T>& seq) { //cout << "mkColNms" << endl; //cout << "size: " << seq.size() << endl; vector<string> ans; vector<bool> has_colnames(seq.size()); unsigned int i = 0; for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++, i++) { if((*tsp_it)->getColNames().size()) { has_colnames[i] = true; } else { has_colnames[i] = false; } } // if there are no tseries w/ colnames then return empty string vector unsigned int ts_with_cnames = accumulate(has_colnames.begin(),has_colnames.end(),0); if(ts_with_cnames==0) { return ans; } for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++, i++) { vector<string> cnms = (*tsp_it)->getColNames(); if(cnms.size()) { for(vector<string>::iterator it = cnms.begin(); it != cnms.end(); it++) { ans.push_back(*it); } } else { for(unsigned int i = 0; i < (*tsp_it)->ncol(); i++) { ans.push_back(""); } } } return ans; } // template<class T, template<class U, class = allocator<U> > class Seq> void trim(Seq<T>& seq) { template<class T, template<class U> class Seq> void trim(Seq<T>& seq) { //printSeq(seq); unsigned int new_nrows = seq.size(); DateT *newDates = new DateT[new_nrows]; double *newData = new double[new_nrows*cols]; if(newDates==NULL || newData==NULL) { cerr << "void trim(Seq<T>& seq):" << endl; cerr << "can't allocate memory." << endl; // in case one got allocated but not the other delete []newDates; delete []newData; return; } // fill dates unsigned int i = 0; for(typename Seq<T>::iterator date_it=seq.begin(); date_it != seq.end(); date_it++, i++) { newDates[i] = *date_it; } // fill w/ NAs for(i = 0; i < new_nrows*cols; i++) { newData[i] = NAN; } // find intersection and set values RangeSpecifier<DateT> r(newDates, dates, new_nrows, rows); for(unsigned int r_index = 0; r_index < r.size; r_index++) { for(unsigned int col = 0; col < cols; col++) { newData[r.arg1[r_index]+col*new_nrows] = data[r.arg2[r_index]+col*rows]; } } // delete old data delete []data; delete []dates; data = newData; dates = newDates; rows = new_nrows; } // pads existing dates with dates in Seq template<class T, template<class U> class Seq> void pad(Seq<T>& seq) { // our new dates set<DateT> padDates; // put existing dates into set for(unsigned int i = 0; i < rows*cols; i++) { padDates.insert(dates[i]); } // put new pad dates into set unsigned int i = 0; for(typename Seq<T>::iterator date_it=seq.begin(); date_it != seq.end(); date_it++) { padDates.insert(*date_it); } // just in case // if these are equal, then we have not added any new dates unsigned int new_nrows = padDates.size(); if(new_nrows==rows) { return; } DateT *newDates = new DateT[new_nrows]; double *newData = new double[new_nrows*cols]; if(newDates==NULL || newData==NULL) { cerr << "void pad(Seq<T>& seq):" << endl; cerr << "can't allocate memory." << endl; // in case one got allocated but not the other delete []newDates; delete []newData; return; } // set new dates i = 0; for(typename set<DateT>::iterator it = padDates.begin(); it != padDates.end(); it++,i++) { newDates[i] = *it; } // fill w/ NAs for(i = 0; i < new_nrows*cols; i++) { newData[i] = NAN; } // find intersection and set values RangeSpecifier<DateT> r(newDates,dates,new_nrows,rows); for(unsigned int r_index = 0; r_index < r.size; r_index++) { for(unsigned int col = 0; col < cols; col++) { newData[r.arg1[r_index]+col*new_nrows] = data[r.arg2[r_index]+col*rows]; } } delete []dates; delete []data; dates = newDates; data = newData; rows = new_nrows; } template<class T, template<class U> class Seq> void setDates(Seq<T>& seq) { typename Seq<T>::iterator start = seq.begin(); typename Seq<T>::iterator end = seq.end(); int newDates_count = distance(start,end); if(newDates_count<1) { cerr << "ERROR: setDates" << endl; cerr << "dates sequence is empty." << endl; return; } if(static_cast<unsigned int>(newDates_count)!=rows) { cerr << "ERROR: setDates" << endl; cerr << "sequence length not equal to number of rows." << endl; return; } unsigned int i = 0; for(typename Seq<T>::iterator date_it = start; date_it != end; date_it++, i++) { dates[i] = *date_it; } } void write_csv_row(ofstream &out, unsigned int r, const char* date_format) const { out << date2string(dates[r],date_format) << ","; for(unsigned int c = 0; c < (cols-1); c++) { out << data[r + c*rows]; out << ","; } // no comma after the last element out << data[r + (cols-1)*rows]; } void writecsv(const string &fname, const char* date_format) const { if(rows==0 || cols==0) { cerr << "ERROR: tseries::writecsv:" << endl; cerr << "cannot export NULL tseries." << endl; return; } ofstream out(fname.c_str()); if(!out) { cerr << "ERROR: tseries::writecsv(const string &fname, const char* date_format)" << endl; cerr << "cannot open file for writing: " << fname << endl; } unsigned int cns = colnames.size(); if(cns) { // spacer for dates out << "dates"; out << ","; for(unsigned int i=0; i < cns; i++) { out << colnames[i]; if(i != (cns-1)) out << ","; } out << endl; } out.setf(ios::fixed); // FIXME: add as argument to function instead of here out << setprecision(10); for(unsigned int r = 0; r < (rows-1); r++) { write_csv_row(out,r,date_format); out << endl; } // no endl after this one write_csv_row(out,rows-1,date_format); //write newline out << endl; out.close(); } void csv2tseries(const string &fname, const char* date_format) { // delete existing data if(local_data) { delete []dates; delete []data; } ifstream in(fname.c_str()); if(!in) { cerr << "ERROR: tseries csv2tseries(const string &fname, const char* date_format)" << endl; cerr << fname << " not found." << endl; return; } vector<string> v; string line; while(getline(in, line)) v.push_back(line); in.close(); // assumes colnames are present unsigned int rows = v.size() - 1; // there is a comma here for the date // so we don't need to add 1 to cols unsigned int cols = countCommas(v.at(1)); // initialize new memory init(rows,cols); //cout << "cols: " << cols << endl; //cout << "rows: " << rows << endl; vector<string> thisRow; const char delim = ','; // first element in vector is the colnames vector<string> cnames = splitString(v[0],delim); // but delete the spaceholder above the dates column if(cnames.size()) { cnames.erase(cnames.begin()); // set colnames colnames = cnames; } else { colnames.clear(); } for(unsigned int r = 0; r < rows; r++) { // our data starts at 0 index, the file data starts at 1 index (colnames are at 0 index) thisRow = splitString(v[r+1],delim); dates[r] = mkDate(thisRow[0].c_str(),date_format); for(unsigned int c = 0; c < cols; c++) { std::istringstream is(thisRow[c+1]); is >> data[r+rows*c]; } } } /* layout of binary tseries file: <uint> rows <uint> cols <uint> number of colnames <int> size of 1st colname <char> 1st colname <int> size of 2nd colname <char> 2nd colname ... <double> dates <double> data */ void write(const string &fname) { if(rows==0 || cols==0) { cerr << "ERROR: tseries::write:" << endl; cerr << "cannot export NULL tseries." << endl; return; } ofstream out(fname.c_str(), ios::out | ios::binary); // write dims out.write ( reinterpret_cast<char *>(&rows), sizeof(rows) ); out.write ( reinterpret_cast<char *>(&cols), sizeof(cols) ); // write number of cols unsigned int cns = colnames.size(); out.write ( reinterpret_cast<char *>(&cns), sizeof(cns) ); if(cns) { char *c; int c_size; for(unsigned int i=0; i < cns; i++) { c = const_cast<char*>(colnames[i].c_str()); c_size = strlen(c); out.write(reinterpret_cast<char *>(&c_size),sizeof(c_size)); out.write(c,strlen(c)); } } out.write ( reinterpret_cast<char *>(dates), sizeof(DateT)*rows ); out.write ( reinterpret_cast<char *>(data), sizeof(double)*rows*cols ); out.close(); } void read_tseries(const string &fname) { // delete current data if(local_data) { delete []dates; delete []data; } ifstream in(fname.c_str(), ios::binary | ios::in); if(!in) { cerr << "ERROR: tseries read_tseries(const string &fname)" << endl; cerr << fname << " not found." << endl; return; } // read dims in.read ( reinterpret_cast<char *>(&rows), sizeof(rows) ); in.read ( reinterpret_cast<char *>(&cols), sizeof(cols) ); init(rows,cols); unsigned int cns; in.read ( reinterpret_cast<char *>(&cns), sizeof(cns) ); if(cns) { char c[1024]; // FIXME: move to a header file int c_size; for(unsigned int i=0; i < cns; i++) { in.read(reinterpret_cast<char *>(&c_size),sizeof(c_size)); //cout << "c_size: " << c_size << endl; in.read(c,c_size); c[c_size] = '\0'; colnames.push_back(c); } } in.read ( reinterpret_cast<char *>(dates), sizeof(DateT)*rows); in.read ( reinterpret_cast<char *>(data), sizeof(double)*rows*cols); in.close(); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -