📄 example_set.cpp
字号:
SVMINT i; for(i=0;i<capacity;i++){ put_alpha(i,0); }; if(all_alphas){ for(i=0;i<capacity;i++){ all_alphas[i] = 0; }; };};SVMFLOAT example_set_c::sum(){ // set examples in a consistent state. SVMFLOAT sum_alpha=0; SVMINT i; for(i=0;i<capacity;i++){ sum_alpha += get_alpha(i); }; return(sum_alpha);};void example_set_c::output_ys(ostream& data_stream) const{ data_stream<<"# examples ys"<<endl; SVMINT i; for(i=0;i<examples_total;i++){ data_stream<<(the_set[i].y)<<endl; }; };void readnext(istream& i, char* s, const char delimiter){ SVMINT pos=0; char next = i.peek(); if(next == EOF){ // set stream to eof next = i.get(); }; // skip whitespace while((! i.eof()) && (('\n' == next) || (' ' == next) || ('\t' == next) || ('\r' == next) || ('\f' == next))){ i.get(); next = i.peek(); if(next == EOF){ // set stream to eof next = i.get(); }; }; // read next token if(delimiter == next){ s[pos] = '0'; pos++; next = i.peek(); if(next == EOF){ // set stream to eof next = i.get(); }; } else{ while((! i.eof()) && ('\n' != next) && (' ' != next) && ('\t' != next) && ('\r' != next) && ('\f' != next) && (delimiter != next) && (pos < MAXCHAR-1)){ s[pos] = i.get(); pos++; next = i.peek(); if(next == EOF){ // set stream to eof next = i.get(); }; }; }; s[pos] = '\0'; if(! (i.eof() || ('\n' == next))){ // remove delimiter i.get(); };};istream& operator>> (istream& data_stream, example_set_c& examples){ // lower case, scale (y/n) char* s = new char[MAXCHAR]; // next item in the stream char* s2 = new char[MAXCHAR]; long count=0; // number of examples read (does not necessarily equal examples_total char next=0; // first character in the stream char delimiter = examples.my_format.delimiter; // By which character are the numbers separated? int sparse = examples.my_format.sparse; int where_x = examples.my_format.where_x; // format of the file int where_y = examples.my_format.where_y; int where_alpha = examples.my_format.where_alpha; SVMINT i,j; SVMINT given_total = 0; // what does the user say is the total of examples? SVMINT pos; // dummy for pos of attribute in example SVMINT dim = examples.get_dim(); SVMFLOAT* new_example = new SVMFLOAT[dim+2]; // examples to be inserted while((next != EOF) && ('@' != next) && (! data_stream.eof())){ try{ next = data_stream.peek(); if(next == EOF){ // set stream to eof next = data_stream.get(); }; if(('@' == next) || (data_stream.eof())){ // end of this section } else if(('\n' == next) || (' ' == next) || ('\r' == next) || ('\f' == next) || ('\t' == next)){ // ignore next = data_stream.get(); } else if('#' == next){ // line contains commentary data_stream.getline(s,MAXCHAR); } else if(('+' == next) || ('-' == next) || ('y' == next) || ('a' == next) || ((next >= '0') && (next <= '9'))){ // read an example pos = 0; new_example[dim] = 0; new_example[dim+1] = 0; if(sparse){ for(pos=0;pos<dim;pos++){ new_example[pos] = 0; }; while((! data_stream.eof()) && ('\n' != data_stream.peek())){ readnext(data_stream,s,delimiter); SVMINT spos = 0; while((s[spos] != '\0') && (s[spos] != ':')){ spos++; }; if(s[spos] == '\0'){ // read y try{ new_example[dim] = string2svmfloat(s); } catch(...){ throw read_exception("Class is no number - could not read example"); }; examples.set_initialised_y(); } else{ if(s[spos-1] == 'a'){ // read alpha strncpy(s2,s+spos+1,MAXCHAR-spos); try{ new_example[dim+1] = string2svmfloat(s2); } catch(...){ throw read_exception("Alpha is no number - could not read example"); }; examples.set_initialised_alpha(); } else if(s[spos-1] == 'y'){ // read y strncpy(s2,s+spos+1,MAXCHAR-spos); try{ new_example[dim] = string2svmfloat(s2); } catch(...){ throw read_exception("Class is no number - could not read example"); }; examples.set_initialised_y(); } else{ // input index runs from 1 to dim (svmlight-compatibility): pos = atoi(s); if(pos <= 0){ throw read_exception("Index number not positive."); }; if(pos>dim){ // raise dimension examples.set_dim(pos); SVMFLOAT* example_dummy = new SVMFLOAT[pos+2]; example_dummy[pos] = new_example[dim]; example_dummy[pos+1] = new_example[dim+1]; for(i=0;i<dim;i++){ example_dummy[i] = new_example[i]; }; for(i=dim;i<pos;i++){ example_dummy[i] = 0; }; dim = pos; delete []new_example; new_example = example_dummy; }; try{ new_example[pos-1] = string2svmfloat(s+spos+1); } catch(...){ char* t = new char[MAXCHAR]; strcpy(t,"Attribute is no number - could not read example: "); t = strcat(t,s); throw read_exception(t); }; }; }; while((! data_stream.eof()) && ((' ' == data_stream.peek()) || ('\t' == data_stream.peek()))){ data_stream.get(); }; }; pos = dim; // mark as ok } else{ // not sparse for(int i=1;i<=3;i++) { if(i == where_x){ // read attributes if(dim <= 0) { // read & get dim char next_ws = data_stream.peek(); if(next_ws == EOF){ // set stream to eof next_ws = data_stream.get(); }; dim=0; pos = 0; while(!(data_stream.eof() || ('\n' == next_ws))){ // try to read another attribute while((! data_stream.eof()) && ((' ' == next_ws) || ('\t' == next_ws))){ data_stream.get(); next_ws = data_stream.peek(); if(next_ws == EOF){ // set stream to eof next_ws = data_stream.get(); }; }; if(!(data_stream.eof() || ('\n' == next_ws))){ // attribute is there, read it if(pos == dim){ // double dim dim = 2*dim+1; SVMFLOAT* dummy = new_example; new_example = new SVMFLOAT[dim+2]; new_example[dim] = dummy[pos]; new_example[dim+1] = dummy[pos+1]; for(j=0;j<pos;j++){ new_example[j] = dummy[j]; }; delete []dummy; }; // read example into act_pos readnext(data_stream,s,delimiter); try{ new_example[pos]= string2svmfloat(s); } catch(...){ throw read_exception("Attribute is no number - could not read example"); }; pos++; next_ws = data_stream.peek(); if(next_ws == EOF){ // set stream to eof next_ws = data_stream.get(); }; }; }; // line finished, set dim and exit if(where_y > where_x){ pos--; // y at pos or pos+1 (one of xya xay xy) if(where_y < where_alpha){ // xya pos--; new_example[dim] = new_example[pos]; new_example[dim+1] = new_example[pos+1]; } else if(where_alpha < where_x){ // xy new_example[dim] = new_example[pos]; } else{ // xay pos--; SVMFLOAT dummy = new_example[pos]; // if pos==dim new_example[dim] = new_example[pos+1]; new_example[dim+1] = dummy; }; } else if(where_alpha > where_x){ // xa pos--; new_example[dim+1] = new_example[pos]; }; SVMFLOAT* dummy = new_example; new_example = new SVMFLOAT[pos+2]; for(j=0;j<pos;j++){ new_example[j] = dummy[j]; }; new_example[pos] = dummy[dim]; new_example[pos+1] = dummy[dim+1]; delete []dummy; dim = pos; examples.set_dim(dim); i=4; } else{ // read dense data line for(pos=0;pos<dim;pos++){ readnext(data_stream,s,delimiter); if(s[0] == '\0'){ throw read_exception("Not enough attributes - could not read examples"); }; try{ new_example[pos] = string2svmfloat(s); } catch(...){ char* t = new char[MAXCHAR]; strcpy(t,"Attribute is no number - could not read example: "); t = strcat(t,s); throw read_exception(t); }; }; }; } else if(i == where_y){ // read classification readnext(data_stream,s,delimiter); if(s[0] == '\0'){ throw read_exception("Not enough attributes - could not read examples"); }; try{ new_example[dim] = string2svmfloat(s); } catch(...){ throw read_exception("Class is no number - could not read example"); }; examples.set_initialised_y(); } else if(i == where_alpha){ // read alpha readnext(data_stream,s,delimiter); if(s[0] == '\0'){ throw read_exception("Not enough attributes - could not read examples"); }; try{ new_example[dim+1] = string2svmfloat(s); } catch(...){ throw read_exception("Alpha is no number - could not read example"); }; examples.set_initialised_alpha(); }; }; }; // insert examples, if ok. if(pos==dim){ // example ok, insert examples.put_example(new_example); count++; }; } else{ // line contains parameters data_stream >> s; if((0 == strcmp("dimension",s)) || (0==strcmp("dim",s))){ // dimension already set => error SVMINT new_dim; data_stream >> new_dim; examples.set_dim(new_dim); dim = new_dim; if(new_example != 0){ delete []new_example; }; new_example = new SVMFLOAT[dim+2]; } else if(0 == strcmp("number",s)){ // number of examples, check later for consistency data_stream >> given_total; if(given_total > 0){ // (examples.the_set).reserve((examples.the_set).size() + given_total); examples.resize(examples.size()+given_total); }; } else if(0==strcmp("b",s)){ // hyperplane constant data_stream >> s; examples.b = string2svmfloat(s); } else if(0==strcmp("delimiter",s)){ data_stream >> s; if((s[0] != '\0') && (s[1] != '\0')){ delimiter = s[1]; } else if ((s[1] == '\0') && (s[0] != '\0')){ delimiter = s[0]; if(' ' == data_stream.peek()){ // if delimiter = ' ' we have only read one ' data_stream.get(); if(delimiter == data_stream.peek()){ data_stream.get(); delimiter = ' '; }; }; } else{ delimiter = ' '; }; examples.my_format.delimiter = delimiter; } else if(0==strcmp("format",s)){ data_stream >> s; if(0==strcmp("sparse",s)){ sparse = 1; } else{ sparse = 0; where_x = 0; where_y = 0; where_alpha = 0; for(int i=0;s[i] != '\0';i++){ if('x' == s[i]){ where_x = i+1; } else if('y' == s[i]){ where_y = i+1; } else if('a' == s[i]){ where_alpha = i+1; } else{ throw read_exception("Invalid format for examples"); }; }; if(0 == where_x){ throw read_exception("Invalid format for examples: x must be given"); }; }; if(0 == where_y){ examples.has_y = 0; }; if(0 == where_alpha){ examples.has_alphas = 0; }; examples.my_format.sparse = sparse; examples.my_format.where_x = where_x; examples.my_format.where_y = where_y; examples.my_format.where_alpha = where_alpha; } else{ char* t = new char[MAXCHAR]; strcpy(t,"Unknown parameter: "); strcat(t,s); throw read_exception(t); }; }; } catch(general_exception g){ // re-throw own exceptions if(new_example) delete []new_example; throw g; } catch(...){ if(new_example) delete []new_example; throw read_exception("Error while reading from stream"); }; }; if(new_example) delete []new_example; examples.compress(); // check for consistency if((0 < given_total) && (count != given_total)){ cout<<"WARNING: Wrong number of examples read ("<<count<<" read instead of "<<given_total<<")."<<endl; }; delete []s; delete []s2; return data_stream;};ostream& operator<< (ostream& data_stream, example_set_c& examples){ // output examples data_stream << "# svm example set" << endl; data_stream << "dimension "<< examples.dim << endl; data_stream << "number "<< examples.examples_total << endl; data_stream << "b " << examples.b << endl; char delimiter = examples.my_format.delimiter; if(delimiter != ' '){ data_stream<<"delimiter '"<<delimiter<<"'"<<endl; }; SVMINT total = examples.examples_total; SVMINT dim = examples.dim; SVMINT i; SVMINT pos; SVMINT j=0; svm_example the_example; // output examples; if(examples.my_format.sparse){ data_stream<<"format "<<examples.my_format<<endl; for(i=0;i<total;i++){ // output example i the_example = examples.get_example(i); if((examples.Exp != 0) && (examples.Var != 0)){ for(pos=0;pos<the_example.length-1;pos++){ // output x_j j = the_example.example[pos].index; data_stream<<(the_example.example[pos].index+1)<<":"; if(0 != examples.Var[j]){ data_stream<<(the_example.example[pos].att*examples.Var[j]+examples.Exp[j]); } else{ data_stream<<the_example.example[pos].att+examples.Exp[j]; }; data_stream<<delimiter; } data_stream<<(the_example.example[the_example.length-1].index+1)<<":"; if(0 != examples.Var[dim-1]){ data_stream<<(the_example.example[the_example.length-1].att*examples.Var[dim-1]+examples.Exp[dim-1]); } else{ data_stream<<the_example.example[the_example.length-1].att+examples.Exp[dim-1]; }; if(examples.has_y){ if(0 != examples.Var[dim]){ data_stream << delimiter << "y:" << examples.get_y(i)*examples.Var[dim]+examples.Exp[dim]; } else{ data_stream << delimiter << "y:" << examples.get_y(i)+examples.Exp[dim]; }; }; } else{ for(pos=0;pos<the_example.length-1;pos++){ data_stream<<(the_example.example[pos].index+1)<<":" <<(the_example.example[pos].att)<<delimiter; }; data_stream<<(the_example.example[the_example.length-1].index+1)<<":" <<(the_example.example[the_example.length-1].att); if(examples.has_y){ data_stream << delimiter << "y:" << examples.get_y(i); }; }; if(examples.has_alphas){ if(examples.get_alpha(i) != 0){ data_stream << delimiter << "a:" << examples.get_alpha(i); }; }; data_stream << endl; }; } else{ // output dense format int where_x = examples.my_format.where_x; int where_y = examples.my_format.where_y; int where_alpha = examples.my_format.where_alpha; // output computed values as well if((0 == where_y) && (examples.initialised_y())){ examples.my_format.where_y = 4; where_y = 4; } if((0 == where_alpha) && (examples.initialised_alpha())){ examples.my_format.where_alpha = 5; where_alpha = 5; } data_stream<<"format "<<examples.my_format<<endl; SVMINT pos; for(i=0;i<total;i++){ // output example i the_example = examples.get_example(i); for(int s=1;s<=5;s++){ if(where_x == s){ if(1 != s) data_stream<<delimiter; pos=0; // index in example (0..the_example.length-1 for(j=0;j<dim;j++){ // output attribute j if(j != 0) data_stream<<delimiter; if((pos<the_example.length) && (the_example.example[pos].index == j)){ // output the_example.example[pos].att if((examples.Exp != 0) && (examples.Var != 0)){ if(0 != examples.Var[j]){ data_stream<<(the_example.example[pos].att*examples.Var[j]+examples.Exp[j]); } else{ data_stream<<the_example.example[pos].att+examples.Exp[j]; }; } else{ data_stream<<the_example.example[pos].att; }; if(pos<the_example.length-1) pos++; } else{ data_stream<<"0"; }; }; } else if(where_y == s){ if(1 != s) data_stream<<delimiter; if((examples.Exp != 0) && (examples.Var != 0)){ if(0 != examples.Var[dim]){ data_stream<<examples.get_y(i)*examples.Var[dim]+examples.Exp[dim]; } else{ data_stream<<examples.get_y(i)+examples.Exp[dim]; }; } else{ data_stream<<examples.get_y(i); }; } else if (where_alpha == s){ if(1 != s) data_stream<<delimiter; data_stream<<examples.get_alpha(i); }; }; data_stream<<endl; }; }; return data_stream;};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -