📄 tseries.head.hpp

📁 神经网络是序列预测,C++实现
💻 HPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
  }  template<class T, template<class U, class = allocator<U> > class Seq> friend tseries* cbind(Seq<T>& seq, const bool intersection = true) {    tseries *ans;    // empty sequence    if(seq.size()==0) {      return new tseries;    }    // sequence w/ only 1 element    // make copy of tseries and return    if(seq.size()==1) {      typename Seq<T>::iterator tsp_it = seq.begin();	      return new tseries(**tsp_it);    }		    // count total columns (need to know to alloc space for answer)    unsigned int total_cols = 0;    for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) {      total_cols += (*tsp_it)->cols;    }    if(intersection) {			      // find tseries w/ max number of rows      // since we have to alloc temp buffer to store intersection of dates      // it needs to be as big as the biggest tseries      unsigned int iDateRows = 0;      for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) {	iDateRows = ( (*tsp_it)->rows > iDateRows ) ? (*tsp_it)->rows : iDateRows;      }      //cout << "DBG: iDateRows" << iDateRows << endl;      // alloc space to store intersecting dates      // this sucks but set_intersection cannot use same memory as input buffer and output buffer      // so we have to alloc this twice      DateT *in_buffer = new DateT[iDateRows];      DateT *out_buffer = new DateT[iDateRows];      DateT *switch_buff;      // copy dates of series 0 into input buffer      memcpy(in_buffer, (*seq.begin())->dates, (*seq.begin())->rows * sizeof(DateT) );      unsigned int in_size = (*seq.begin())->rows;      DateT * last_element;      // go through series 1 to N (skipping series 0, since those dates are already in iDates)      for(typename Seq<T>::iterator tsp_it = seq.begin()+1; tsp_it != seq.end(); tsp_it++) {					//cout << "dts2: " <<(*tsp_it)->dates << endl;	//cout << "rows2: " <<(*tsp_it)->rows << endl;	last_element = set_intersection(in_buffer,					in_buffer + in_size,					(*tsp_it)->dates,					(*tsp_it)->dates + (*tsp_it)->rows,					out_buffer);	// now switch buffers (instead of copying over to other buffer)					// new size of input buffer (because input buffer for next tsp_it will be this out_buffer	in_size = distance(out_buffer,last_element);	// switch input and output buffers	switch_buff = in_buffer;	in_buffer = out_buffer;	out_buffer = switch_buff;      }      // number of rows is size of iDates      ans = new tseries(in_size, total_cols);      ans->setDates(out_buffer);      delete []in_buffer;      delete []out_buffer;    } else {      set<DateT> ans_dates;      // collect union of dates using set to find union      for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) {	DateT* dp = (*tsp_it)->getDates();	for(unsigned int i = 0; i < (*tsp_it)->rows; i++) {	  ans_dates.insert(dp[i]);	}      }      ans = new tseries(ans_dates.size(), total_cols);      ans->setDates(ans_dates);    }    // add elements to answer    unsigned int ans_col = 0;    for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++) {      RangeSpecifier<DateT> r(ans->dates,(*tsp_it)->dates,ans->rows,(*tsp_it)->rows);      for(unsigned int c = 0; c < (*tsp_it)->cols; c++) {	double* ans_col_ptr = ans->getCol(ans_col);	double* source_col_ptr = (*tsp_it)->getCol(c);	for(unsigned int row = 0; row < r.size; row++) {	  ans_col_ptr[ r.arg1[row] ] = source_col_ptr[ r.arg2[row] ];	}	ans_col++;      }    }    // set colnames    ans->setColNames(mkColNms(seq));    return ans;  }  template<class T, template<class U, class = allocator<U> > class Seq> friend vector<string> mkColNms(Seq<T>& seq) {		    //cout << "mkColNms" << endl;    //cout << "size: " << seq.size() << endl;    vector<string> ans;    vector<bool> has_colnames(seq.size());    unsigned int i = 0;    for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++, i++) {      if((*tsp_it)->getColNames().size()) {	has_colnames[i] = true;      } else {	has_colnames[i] = false;      }    }    // if there are no tseries w/ colnames then return empty string vector    unsigned int ts_with_cnames = accumulate(has_colnames.begin(),has_colnames.end(),0);    if(ts_with_cnames==0) {      return ans;    }    for(typename Seq<T>::iterator tsp_it = seq.begin(); tsp_it != seq.end(); tsp_it++, i++) {      vector<string> cnms = (*tsp_it)->getColNames();      if(cnms.size()) {	for(vector<string>::iterator it = cnms.begin(); it != cnms.end(); it++) {	  ans.push_back(*it);	}      } else {	for(unsigned int i = 0; i < (*tsp_it)->ncol(); i++) {	  ans.push_back("");	}      }    }    return ans;  }  //	template<class T, template<class U, class = allocator<U> > class Seq> void trim(Seq<T>& seq) {  template<class T, template<class U> class Seq> void trim(Seq<T>& seq) {    //printSeq(seq);    unsigned int new_nrows = seq.size();    DateT *newDates = new DateT[new_nrows];    double *newData = new double[new_nrows*cols];    if(newDates==NULL || newData==NULL) {      cerr << "void trim(Seq<T>& seq):" << endl;      cerr << "can't allocate memory." << endl;      // in case one got allocated but not the other      delete []newDates;      delete []newData;      return;    }    // fill dates    unsigned int i = 0;    for(typename Seq<T>::iterator date_it=seq.begin(); date_it != seq.end(); date_it++, i++) {      newDates[i] = *date_it;    }    // fill w/ NAs    for(i = 0; i < new_nrows*cols; i++) {      newData[i] = NAN;    }    // find intersection and set values    RangeSpecifier<DateT> r(newDates, dates, new_nrows, rows);    for(unsigned int r_index = 0; r_index < r.size; r_index++) {      for(unsigned int col = 0; col < cols; col++) {	newData[r.arg1[r_index]+col*new_nrows] = data[r.arg2[r_index]+col*rows];      }    }    // delete old data    delete []data;    delete []dates;    data = newData;    dates = newDates;    rows = new_nrows;  }  // pads existing dates with dates in Seq  template<class T, template<class U> class Seq> void pad(Seq<T>& seq) {    // our new dates    set<DateT> padDates;    // put existing dates into set    for(unsigned int i = 0; i < rows*cols; i++) {      padDates.insert(dates[i]);    }    // put new pad dates into set    unsigned int i = 0;    for(typename Seq<T>::iterator date_it=seq.begin(); date_it != seq.end(); date_it++) {      padDates.insert(*date_it);    }    // just in case    // if these are equal, then we have not added any new dates    unsigned int new_nrows = padDates.size();    if(new_nrows==rows) {      return;    }    DateT *newDates = new DateT[new_nrows];    double *newData = new double[new_nrows*cols];    if(newDates==NULL || newData==NULL) {      cerr << "void pad(Seq<T>& seq):" << endl;      cerr << "can't allocate memory." << endl;      // in case one got allocated but not the other      delete []newDates;      delete []newData;      return;    }		    // set new dates    i = 0;    for(typename set<DateT>::iterator it = padDates.begin(); it != padDates.end(); it++,i++) {      newDates[i] = *it;    }    // fill w/ NAs    for(i = 0; i < new_nrows*cols; i++) {      newData[i] = NAN;    }    // find intersection and set values    RangeSpecifier<DateT> r(newDates,dates,new_nrows,rows);    for(unsigned int r_index = 0; r_index < r.size; r_index++) {      for(unsigned int col = 0; col < cols; col++) {	newData[r.arg1[r_index]+col*new_nrows] = data[r.arg2[r_index]+col*rows];      }    }		    delete []dates;    delete []data;    dates = newDates;    data = newData;    rows = new_nrows;  }  template<class T, template<class U> class Seq> void setDates(Seq<T>& seq) {    typename Seq<T>::iterator start = seq.begin();    typename Seq<T>::iterator end = seq.end();    int newDates_count = distance(start,end);    if(newDates_count<1) {      cerr << "ERROR: setDates" << endl;      cerr << "dates sequence is empty." << endl;      return;    }    if(static_cast<unsigned int>(newDates_count)!=rows) {      cerr << "ERROR: setDates" << endl;      cerr << "sequence length not equal to number of rows." << endl;      return;    }    unsigned int i = 0;    for(typename Seq<T>::iterator date_it = start; date_it != end; date_it++, i++) {      dates[i] = *date_it;    }  }  void write_csv_row(ofstream &out, unsigned int r, const char* date_format) const {      out << date2string(dates[r],date_format) << ",";     for(unsigned int c = 0; c < (cols-1); c++) {       out << data[r + c*rows];       out << ",";     }      // no comma after the last element     out << data[r + (cols-1)*rows]; }   void writecsv(const string &fname, const char* date_format) const {    if(rows==0 || cols==0) {      cerr << "ERROR: tseries::writecsv:" << endl;      cerr << "cannot export NULL tseries." << endl;      return;    }    ofstream out(fname.c_str());	    if(!out) {      cerr << "ERROR: tseries::writecsv(const string &fname, const char* date_format)" << endl;      cerr << "cannot open file for writing: " << fname << endl;    }    unsigned int cns = colnames.size();    if(cns) {      // spacer for dates      out << "dates";      out << ",";      for(unsigned int i=0; i < cns; i++) {	out << colnames[i];	if(i != (cns-1))	  out << ",";      }      out << endl;    }    out.setf(ios::fixed);    // FIXME: add as argument to function instead of here    out << setprecision(10);    for(unsigned int r = 0; r < (rows-1); r++) {      write_csv_row(out,r,date_format);      out << endl;    }    // no endl after this one    write_csv_row(out,rows-1,date_format);    //write newline    out << endl;    out.close();  }  void csv2tseries(const string &fname, const char* date_format) {        // delete existing data    if(local_data) {      delete []dates;      delete []data;    }    ifstream in(fname.c_str());    if(!in) {      cerr << "ERROR: tseries csv2tseries(const string &fname, const char* date_format)" << endl;      cerr << fname << " not found." << endl;      return;    }    vector<string> v;    string line;    while(getline(in, line))      v.push_back(line);	    in.close();    // assumes colnames are present    unsigned int rows = v.size() - 1;    // there is a comma here for the date    // so we don't need to add 1 to cols    unsigned int cols = countCommas(v.at(1));    // initialize new memory    init(rows,cols);    //cout << "cols: " << cols << endl;    //cout << "rows: " << rows << endl;    vector<string> thisRow;    const char delim = ',';    // first element in vector is the colnames    vector<string> cnames = splitString(v[0],delim);    // but delete the spaceholder above the dates column                                                                                                                                                                if(cnames.size()) {      cnames.erase(cnames.begin());      // set colnames                                                                                                                                                                                                    colnames = cnames;    } else {      colnames.clear();    }    for(unsigned int r = 0; r < rows; r++) {      // our data starts at 0 index, the file data starts at 1 index (colnames are at 0 index)      thisRow = splitString(v[r+1],delim);      dates[r] = mkDate(thisRow[0].c_str(),date_format);      for(unsigned int c = 0; c < cols; c++) {	std::istringstream is(thisRow[c+1]);	is >> data[r+rows*c];      }    }  }  /* layout of binary tseries file:     <uint> rows     <uint> cols     <uint> number of colnames     <int> size of 1st colname     <char> 1st colname     <int> size of 2nd colname     <char> 2nd colname     ...     <double> dates     <double> data  */  void write(const string &fname) {    if(rows==0 || cols==0) {      cerr << "ERROR: tseries::write:" << endl;      cerr << "cannot export NULL tseries." << endl;      return;    }    ofstream out(fname.c_str(), ios::out | ios::binary);	    // write dims    out.write ( reinterpret_cast<char *>(&rows), sizeof(rows) );    out.write ( reinterpret_cast<char *>(&cols), sizeof(cols) );	    // write number of cols    unsigned int cns = colnames.size();    out.write ( reinterpret_cast<char *>(&cns), sizeof(cns) );    if(cns) {      char *c;      int c_size;      for(unsigned int i=0; i < cns; i++) {	c = const_cast<char*>(colnames[i].c_str());	c_size = strlen(c);	out.write(reinterpret_cast<char *>(&c_size),sizeof(c_size));	out.write(c,strlen(c));      }    }    out.write ( reinterpret_cast<char *>(dates), sizeof(DateT)*rows );    out.write ( reinterpret_cast<char *>(data), sizeof(double)*rows*cols );    out.close();  }  void read_tseries(const string &fname) {    // delete current data    if(local_data) {      delete []dates;      delete []data;    }    ifstream in(fname.c_str(), ios::binary | ios::in);    if(!in) {      cerr << "ERROR: tseries read_tseries(const string &fname)" << endl;      cerr << fname << " not found." << endl;      return;    }    // read dims    in.read ( reinterpret_cast<char *>(&rows), sizeof(rows) );    in.read ( reinterpret_cast<char *>(&cols), sizeof(cols) );    init(rows,cols);    unsigned int cns;    in.read ( reinterpret_cast<char *>(&cns), sizeof(cns) );    if(cns) {      char c[1024]; // FIXME: move to a header file      int c_size;      for(unsigned int i=0; i < cns; i++) {	in.read(reinterpret_cast<char *>(&c_size),sizeof(c_size));	//cout << "c_size: " << c_size  << endl;	in.read(c,c_size);	c[c_size] = '\0';	colnames.push_back(c);      }    }    in.read ( reinterpret_cast<char *>(dates), sizeof(DateT)*rows);    in.read ( reinterpret_cast<char *>(data), sizeof(double)*rows*cols);    in.close();  }
上一页 1 23
💿 文件大小 20 K
👤 上传用户 cdcgl
📂 所属分类其他
🏷️ 相关标签

#神经网络 #序列
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -