📄 example_set.cpp
字号:
void example_set_c::output_ys(ostream& data_stream) const{
data_stream<<"# examples ys"<<endl;
for(SVMINT i=0;i<examples_total;i++){
data_stream<<(the_set[i].y)<<endl;
};
};
void readnext(istream& i, char* s, const char delimiter){
SVMINT pos=0;
char next = i.peek();
// skip whitespace
while((! i.eof()) &&
(('\n' == next) ||
(' ' == next) ||
('\t' == next) ||
('\r' == next) ||
('\f' == next))){
i.get();
next = i.peek();
};
// read next token
if(delimiter == next){
s[pos] = '0';
pos++;
next = i.peek();
}
else{
while((! i.eof()) &&
('\n' != next) &&
(' ' != next) &&
('\t' != next) &&
('\r' != next) &&
('\f' != next) &&
(delimiter != next) &&
(pos < MAXCHAR-1)){
s[pos] = i.get();
pos++;
next = i.peek();
};
};
s[pos] = '\0';
if(! (i.eof() || ('\n' == next))){
// remove delimiter
i.get();
};
};
istream& operator>> (istream& data_stream, example_set_c& examples){
// lower case, scale (y/n)
char* s = new char[MAXCHAR]; // next item in the stream
char* s2 = new char[MAXCHAR];
long count=0; // number of examples read (does not necessarily equal examples_total
char next=0; // first character in the stream
char delimiter = examples.my_format.delimiter; // By which character are the numbers separated?
int sparse = examples.my_format.sparse;
int where_x = examples.my_format.where_x; // format of the file
int where_y = examples.my_format.where_y;
int where_alpha = examples.my_format.where_alpha;
SVMINT i;
SVMINT given_total = 0; // what does the user say is the total of examples?
SVMINT pos; // dummy for pos of attribute in example
SVMINT dim = examples.get_dim();
SVMFLOAT* new_example = new SVMFLOAT[dim+2]; // examples to be inserted
while(('@' != next) && (! data_stream.eof())){
try{
next = data_stream.peek();
if(('@' == next) || (data_stream.eof())){
// end of this section
}
else if(('\n' == next) ||
(' ' == next) ||
('\r' == next) ||
('\f' == next) ||
('\t' == next)){
// ignore
next = data_stream.get();
}
else if('#' == next){
// line contains commentary
data_stream.getline(s,MAXCHAR);
}
else if(('+' == next) || ('-' == next) ||
('y' == next) || ('a' == next) ||
((next >= '0') && (next <= '9'))){
// read an example
pos = 0;
new_example[dim] = 0;
new_example[dim+1] = 0;
if(sparse){
for(pos=0;pos<dim;pos++){
new_example[pos] = 0;
};
while((! data_stream.eof()) && ('\n' != data_stream.peek())){
readnext(data_stream,s,delimiter);
SVMINT spos = 0;
while((s[spos] != '\0') && (s[spos] != ':')){
spos++;
};
if(s[spos] == '\0'){
// read y
try{
new_example[dim] = string2svmfloat(s);
}
catch(...){
throw read_exception("Class is no number - could not read example");
};
examples.set_initialised_y();
}
else{
if(s[spos-1] == 'a'){
// read alpha
strncpy(s2,s+spos+1,MAXCHAR-spos);
try{
new_example[dim+1] = string2svmfloat(s2);
}
catch(...){
throw read_exception("Alpha is no number - could not read example");
};
examples.set_initialised_alpha();
}
else if(s[spos-1] == 'y'){
// read y
strncpy(s2,s+spos+1,MAXCHAR-spos);
try{
new_example[dim] = string2svmfloat(s2);
}
catch(...){
throw read_exception("Class is no number - could not read example");
};
examples.set_initialised_y();
}
else{
// input index runs from 1 to dim (svmlight-compatibility):
pos = atoi(s);
if(pos <= 0){
throw read_exception("Index number not positive.");
};
if(pos>dim){
// raise dimension
examples.set_dim(pos);
SVMFLOAT* example_dummy = new SVMFLOAT[pos+2];
example_dummy[pos] = new_example[dim];
example_dummy[pos+1] = new_example[dim+1];
for(i=0;i<dim;i++){
example_dummy[i] = new_example[i];
};
for(i=dim;i<pos;i++){
example_dummy[i] = 0;
};
dim = pos;
delete []new_example;
new_example = example_dummy;
};
try{
new_example[pos-1] = string2svmfloat(s+spos+1);
}
catch(...){
throw read_exception("Attribute is no number - could not read example");
};
};
};
while((! data_stream.eof()) &&
((' ' == data_stream.peek()) ||
('\t' == data_stream.peek()))){
data_stream.get();
};
};
pos = dim; // mark as ok
}
else{
// not sparse
for(int i=1;i<=3;i++) {
if(i == where_x){
// read attributes
if(dim <= 0) {
// read & get dim
char next_ws = data_stream.peek();
dim=0;
pos = 0;
while(!(data_stream.eof() || ('\n' == next_ws))){
// try to read another attribute
while((! data_stream.eof()) &&
((' ' == next_ws) ||
('\t' == next_ws))){
data_stream.get();
next_ws = data_stream.peek();
};
if(!(data_stream.eof() || ('\n' == next_ws))){
// attribute is there, read it
if(pos == dim){
// double dim
dim = 2*dim+1;
SVMFLOAT* dummy = new_example;
new_example = new SVMFLOAT[dim+2];
new_example[dim] = dummy[pos];
new_example[dim+1] = dummy[pos+1];
for(SVMINT j=0;j<pos;j++){
new_example[j] = dummy[j];
};
delete []dummy;
};
// read example into act_pos
readnext(data_stream,s,delimiter);
try{
new_example[pos]= string2svmfloat(s);
}
catch(...){
throw read_exception("Attribute is no number - could not read example");
};
pos++;
next_ws = data_stream.peek();
};
};
// line finished, set dim and exit
if(where_y > where_x){
pos--;
// y at pos or pos+1 (one of xya xay xy)
if(where_y < where_alpha){
// xya
pos--;
new_example[dim] = new_example[pos];
new_example[dim+1] = new_example[pos+1];
}
else if(where_alpha < where_x){
// xy
new_example[dim] = new_example[pos];
}
else{
// xay
pos--;
SVMFLOAT dummy = new_example[pos]; // if pos==dim
new_example[dim] = new_example[pos+1];
new_example[dim+1] = dummy;
};
}
else if(where_alpha > where_x){
// xa
pos--;
new_example[dim+1] = new_example[pos];
};
SVMFLOAT* dummy = new_example;
new_example = new SVMFLOAT[pos+2];
for(SVMINT j=0;j<pos;j++){
new_example[j] = dummy[j];
};
new_example[pos] = dummy[dim];
new_example[pos+1] = dummy[dim+1];
delete []dummy;
dim = pos;
examples.set_dim(dim);
i=4;
}
else{
// read dense data line
for(pos=0;pos<dim;pos++){
readnext(data_stream,s,delimiter);
if(s[0] == '\0'){
throw read_exception("Not enough attributes - could not read examples");
};
try{
new_example[pos]= string2svmfloat(s);
}
catch(...){
throw read_exception("Attribute is no number - could not read example");
};
};
};
}
else if(i == where_y){
// read classification
readnext(data_stream,s,delimiter);
if(s[0] == '\0'){
throw read_exception("Not enough attributes - could not read examples");
};
try{
new_example[dim] = string2svmfloat(s);
}
catch(...){
throw read_exception("Class is no number - could not read example");
};
examples.set_initialised_y();
}
else if(i == where_alpha){
// read alpha
readnext(data_stream,s,delimiter);
if(s[0] == '\0'){
throw read_exception("Not enough attributes - could not read examples");
};
try{
new_example[dim+1] = string2svmfloat(s);
}
catch(...){
throw read_exception("Alpha is no number - could not read example");
};
examples.set_initialised_alpha();
};
};
};
// insert examples, if ok.
if(pos==dim){
// example ok, insert (more checks here if reading of x is reworked!!!
examples.put_example(new_example);
count++;
};
}
else{
// line contains parameters
data_stream >> s;
if((0 == strcmp("dimension",s)) || (0==strcmp("dim",s))){
// dimension already set => error
SVMINT new_dim;
data_stream >> new_dim;
examples.set_dim(new_dim);
dim = new_dim;
if(new_example != 0){ delete []new_example; };
new_example = new SVMFLOAT[dim+2];
}
else if(0 == strcmp("number",s)){
// number of examples, check later for consistency
data_stream >> given_total;
if(given_total > 0){
// (examples.the_set).reserve((examples.the_set).size() + given_total);
examples.resize(examples.size()+given_total);
};
}
else if(0==strcmp("b",s)){
// hyperplane constant
data_stream >> s;
examples.b = string2svmfloat(s);
}
else if(0==strcmp("delimiter",s)){
data_stream >> s;
if((s[0] != '\0') && (s[1] != '\0')){
delimiter = s[1];
}
else if ((s[1] == '\0') && (s[0] != '\0')){
delimiter = s[0];
if(' ' == data_stream.peek()){
// if delimiter = ' ' we have only read one '
data_stream.get();
if(delimiter == data_stream.peek()){
data_stream.get();
delimiter = ' ';
};
};
}
else{
delimiter = ' ';
};
examples.my_format.delimiter = delimiter;
}
else if(0==strcmp("format",s)){
data_stream >> s;
if(0==strcmp("sparse",s)){
sparse = 1;
}
else{
sparse = 0;
where_x = 0;
where_y = 0;
where_alpha = 0;
for(int i=0;s[i] != '\0';i++){
if('x' == s[i]){
where_x = i+1;
}
else if('y' == s[i]){
where_y = i+1;
}
else if('a' == s[i]){
where_alpha = i+1;
}
else{
throw read_exception("Invalid format for examples");
};
};
if(0 == where_x){
throw read_exception("Invalid format for examples: x must be given");
};
};
if(0 == where_y){ examples.has_y = 0; };
if(0 == where_alpha){ examples.has_alphas = 0; };
examples.my_format.sparse = sparse;
examples.my_format.where_x = where_x;
examples.my_format.where_y = where_y;
examples.my_format.where_alpha = where_alpha;
}
else{
char* t = new char[MAXCHAR];
strcpy(t,"Unknown parameter: ");
strcat(t,s);
throw read_exception(t);
};
};
}
catch(general_exception g){
// re-throw own exceptions
if(new_example) delete []new_example;
throw g;
}
catch(...){
if(new_example) delete []new_example;
throw read_exception("Error while reading from stream");
};
};
if(new_example) delete []new_example;
examples.compress();
// check for consistency
if((0 < given_total) && (count != given_total)){
cout<<"WARNING: Wrong number of examples read ("<<count<<" read instead of "<<given_total<<")."<<endl;
};
delete []s;
delete []s2;
return data_stream;
};
ostream& operator<< (ostream& data_stream, example_set_c& examples){
// output examples
data_stream << "# svm example set" << endl;
data_stream << "dimension "<< examples.dim << endl;
data_stream << "number "<< examples.examples_total << endl;
data_stream << "b " << examples.b << endl;
char delimiter = examples.my_format.delimiter;
if(delimiter != ' '){
data_stream<<"delimiter '"<<delimiter<<"'"<<endl;
};
SVMINT total = examples.examples_total;
SVMINT dim = examples.dim;
SVMINT j=0;
svm_example the_example;
// output examples;
if(examples.my_format.sparse){
data_stream<<"format "<<examples.my_format<<endl;
for(SVMINT i=0;i<total;i++){
// output example i
the_example = examples.get_example(i);
if((examples.Exp != 0) && (examples.Var != 0)){
for(SVMINT pos=0;pos<the_example.length-1;pos++){
// output x_j
j = the_example.example[pos].index;
data_stream<<(the_example.example[pos].index+1)<<":";
if(0 != examples.Var[j]){
data_stream<<(the_example.example[pos].att*examples.Var[j]+examples.Exp[j]);
}
else{
data_stream<<the_example.example[pos].att+examples.Exp[j];
};
data_stream<<delimiter;
}
data_stream<<(the_example.example[the_example.length-1].index+1)<<":";
if(0 != examples.Var[dim-1]){
data_stream<<(the_example.example[the_example.length-1].att*examples.Var[dim-1]+examples.Exp[dim-1]);
}
else{
data_stream<<the_example.example[the_example.length-1].att+examples.Exp[dim-1];
};
}
else{
for(SVMINT pos=0;pos<the_example.length-1;pos++){
data_stream<<(the_example.example[pos].index+1)<<":"
<<(the_example.example[pos].att)<<delimiter;
};
data_stream<<(the_example.example[the_example.length-1].index+1)<<":"
<<(the_example.example[the_example.length-1].att);
};
if(examples.has_y){
data_stream << delimiter << "y:" << examples.get_y(i);
};
if(examples.has_alphas){
if(examples.get_alpha(i) != 0){
data_stream << delimiter << "a:" << examples.get_alpha(i);
};
};
data_stream << endl;
};
}
else{
// output dense format
int where_x = examples.my_format.where_x;
int where_y = examples.my_format.where_y;
int where_alpha = examples.my_format.where_alpha;
// output computed values as well
if((0 == where_y) && (examples.initialised_y())){
examples.my_format.where_y = 4;
where_y = 4;
}
if((0 == where_alpha) && (examples.initialised_alpha())){
examples.my_format.where_alpha = 5;
where_alpha = 5;
}
data_stream<<"format "<<examples.my_format<<endl;
SVMINT pos;
for(SVMINT i=0;i<total;i++){
// output example i
the_example = examples.get_example(i);
for(int s=1;s<=5;s++){
if(where_x == s){
if(1 != s) data_stream<<delimiter;
pos=0; // index in example (0..the_example.length-1
for(j=0;j<dim;j++){
// output attribute j
if(j != 0) data_stream<<delimiter;
if(the_example.example[pos].index == j){
// output the_example.example[pos].att
if((examples.Exp != 0) && (examples.Var != 0)){
if(0 != examples.Var[j]){
data_stream<<(the_example.example[pos].att*examples.Var[j]+examples.Exp[j]);
}
else{
data_stream<<the_example.example[pos].att+examples.Exp[j];
};
}
else{
data_stream<<the_example.example[pos].att;
};
if(pos<the_example.length-1) pos++;
}
else{
data_stream<<"0";
};
};
}
else if(where_y == s){
if(1 != s) data_stream<<delimiter;
data_stream<<examples.get_y(i);
}
else if (where_alpha == s){
if(1 != s) data_stream<<delimiter;
data_stream<<examples.get_alpha(i);
};
};
data_stream<<endl;
};
};
return data_stream;
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -