⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_c.cpp

📁 支持向量机(4)mySVM
💻 CPP
📖 第 1 页 / 共 5 页
字号:
			
			// set variables free again
			shrink_const += 10;
			lambda_factor*=1.2;
			for(SVMINT i=0;i<examples_total;i++){
				at_bound[i]=0;
			};
		};
		
		shrink();
		//    if(iteration%100 == 0){ feasible_epsilon=1; };
		
		calculate_working_set();
		update_working_set();
		
		if(parameters->verbosity >= 4){
			SVMINT shrinked=0;
			SVMINT upper_bound=0;
			SVMINT lower_bound=0;
			
			for(SVMINT i=0;i<examples_total;i++){
				if(at_bound[i] >= shrink_const){ 
					shrinked++;
				};
				if(abs(all_alphas[i]) < is_zero){
					lower_bound++;
				}
				else if((all_alphas[i]-Cneg>-is_zero) || (all_alphas[i]+Cpos<is_zero)){
					upper_bound++;
				};
			};
			cout<<examples_total<<" variables total, ";
			cout<<lower_bound<<" variables at lower bound, ";
			cout<<upper_bound<<" variables at upper bound, ";
			cout<<shrinked<<" variables shrinked"<<endl;
		};
  };
  
  
  if(iteration >= max_iterations){
	  cout<<"*** No convergence: Time up."<<endl;
	  if(examples_total<examples->size()){
		  // set sums for all variables for statistics
		  reset_shrinked();
		  SVMFLOAT new_convergence_epsilon = 0;
		  SVMFLOAT the_lambda;
		  for(SVMINT i=0;i<examples_total;i++){
			  the_lambda = lambda(i);
			  if(the_lambda < new_convergence_epsilon){
				  new_convergence_epsilon = the_lambda;
			  };
		  };
		  convergence_epsilon = -new_convergence_epsilon;
	  };
  };
  
  time_all = get_time() - time_all;
  
  // calculate b
  SVMFLOAT new_b=0;
  SVMINT new_b_count=0;
  for(SVMINT i=0;i<examples_total;i++){
	  if((all_alphas[i]-Cneg < -is_zero) && (all_alphas[i]>is_zero)){
		  new_b +=  all_ys[i] - sum[i]-epsilon_neg;
		  new_b_count++;
	  }
	  else if((all_alphas[i]+Cpos > is_zero) && (all_alphas[i]<-is_zero)){
		  new_b +=  all_ys[i] - sum[i]+epsilon_pos;
		  new_b_count++;
	  };
	  examples->put_alpha(i,all_alphas[i]);
  };
  
  if(new_b_count>0){
	  examples->put_b(new_b/((SVMFLOAT)new_b_count));
  }
  else{
	  // unlikely
	  for(SVMINT i=0;i<examples_total;i++){
		  if((all_alphas[i]<is_zero) && (all_alphas[i]>-is_zero)) {
			  new_b +=  all_ys[i]- sum[i];
			  new_b_count++;
		  };
	  };
	  if(new_b_count>0){
		  examples->put_b(new_b/((SVMFLOAT)new_b_count));
	  }
	  else{
		  // even unlikelier
		  for(SVMINT i=0;i<examples_total;i++){
			  new_b +=  all_ys[i]- sum[i];
			  new_b_count++;
		  };
		  examples->put_b(new_b/((SVMFLOAT)new_b_count));
	  };
  };
  
  
  if(parameters->verbosity>= 2){
	  cout<<"Done training: "<<iteration<<" iterations."<<endl;
	  if(parameters->verbosity>= 3){
		  // cout<<"lambda_eq = "<<lambda_eq<<endl;
		  SVMFLOAT now_target=0;
		  SVMFLOAT now_target_dummy=0;
		  for(SVMINT i=0;i<examples_total;i++){
			  now_target_dummy=sum[i]/2-all_ys[i];
			  if(is_alpha_neg(i)){
				  now_target_dummy+= epsilon_pos;
			  }
			  else{
				  now_target_dummy-= epsilon_neg;
			  };
			  now_target+=all_alphas[i]*now_target_dummy;
		  };
		  cout<<"Target function: "<<now_target<<endl;
	  };
  };
  
  the_result = print_statistics();
  
  exit_optimizer();
  examples->set_initialised_alpha();
  parameters->working_set_size = param_wss;
  return the_result;
};


void svm_c::shrink(){
	// move shrinked examples to back
	if(to_shrink>examples_total/10){
		SVMINT i;
		SVMINT last_pos=examples_total;
		for(i=0;i<last_pos;i++){
			if(at_bound[i] >= shrink_const){
				// shrink i
				sum_alpha += all_alphas[i];
				last_pos--;
				examples->swap(i,last_pos);
				kernel->overwrite(i,last_pos);
				sum[i] = sum[last_pos];
				at_bound[i] = at_bound[last_pos];
			};
			to_shrink=0;
		};
		
		examples_total = last_pos;
		kernel->set_examples_size(examples_total);
		
		if(parameters->verbosity>=4){
			cout<<"shrinked to "<<examples_total<<" variables"<<endl;
		};
	};
};


int svm_c::convergence(){
	long time_start = get_time();
	SVMFLOAT the_lambda_eq = 0;
	SVMINT total = 0;
	SVMFLOAT alpha_sum=0;
	SVMFLOAT alpha=0;
	SVMINT i;
	int result=1;
	
	// actual convergence-test
	total = 0; alpha_sum=0;
	
	for(i=0;i<examples_total;i++){
		alpha = all_alphas[i];
		alpha_sum += alpha;
		if((alpha>is_zero) && (alpha-Cneg < -is_zero)){
			// alpha^* = - nabla
			the_lambda_eq += -nabla(i); //all_ys[i]-epsilon_neg-sum[i];
			total++;
		}
		else if((alpha<-is_zero) && (alpha+Cpos > is_zero)){
			// alpha = nabla
			the_lambda_eq += nabla(i); //all_ys[i]+epsilon_pos-sum[i];
			total++;
		};
	};
	
	if(parameters->verbosity >= 4){
		cout<<"lambda_eq = "<<(the_lambda_eq/total)<<endl;
	};
	if(total>0){
		lambda_eq = the_lambda_eq / total;
	}
	else{
		// keep WS lambda_eq
		lambda_eq = lambda_WS; //(lambda_eq+4*lambda_WS)/5;
		if(parameters->verbosity>= 4){
			cout<<"*** no SVs in convergence(), lambda_eq = "<<lambda_eq<<"."<<endl;
		};
	};
	
	if(target_count>2){
		// estimate lambda from WS
		if(target_count>20){
			// desperate!
			lambda_eq = ((40-target_count)*lambda_eq + (target_count-20)*lambda_WS)/20;
			if(parameters->verbosity>=5){
				cout<<"Re-Re-calculated lambda from WS: "<<lambda_eq<<endl;
			};
			if(target_count>40){
				// really desperate, kick one example out!
				i = working_set[target_count%working_set_size];
				if(is_alpha_neg(i) > 0){
					lambda_eq = -nabla(i);
				}
				else{
					lambda_eq = nabla(i);
				};
				if(parameters->verbosity>=5){
					cout<<"set lambda_eq to nabla("<<i<<"): "<<lambda_eq<<endl;
				};
			};
		}
		else{
			lambda_eq = lambda_WS;
			if(parameters->verbosity>=5){
				cout<<"Re-calculated lambda_eq from WS: "<<lambda_eq<<endl;
			};
		};
	};
	
	// check linear constraint
	if(abs(alpha_sum+sum_alpha) > convergence_epsilon){
		// equality constraint violated
		project_to_constraint();
		if(parameters->verbosity>= 4){
			cout<<"No convergence: equality constraint violated: |"<<(alpha_sum+sum_alpha)<<"| >> 0"<<endl;
		};
		result = 0;  
	};
	
	i=0;
	while((i<examples_total) && (result != 0)){
		if(lambda(i)>=-convergence_epsilon){
			i++;
		}
		else{
			result = 0;
		};
	};
	
	time_convergence += get_time() - time_start;
	return result;
};


void svm_c::heapify(int minheap, SVMINT start, SVMINT size){
	// build heap of array working_set[start:start+size-1]
	// (i.e. "size" elements starting at "start"th element)
	
	// minheap = 1 <=> maximal element at root 
	// (i.e. we build the heap of minimal elements)
	
	// v_a[i] = w_s_v[start-1+i], count beginning at 1
	SVMFLOAT* value_array = working_set_values+start-1; 
	SVMINT* pos_array = working_set+start-1;
	
	int running = 1;
	SVMINT pos = 1;
	SVMINT left, right, largest;
	SVMFLOAT dummyf;
	SVMINT dummyi;
	if(minheap){
		while(running){
			left = 2*pos;
			right = left+1;
			if((left<=size) && 
				(value_array[left] > value_array[pos]))
				largest = left;
			else{
				largest = pos;
			};
			if((right<=size) && 
				(value_array[right] > value_array[largest])){
				largest = right;
			};
			if(largest == pos){
				running = 0;
			}
			else{
				//cout<<"switching "<<pos<<" and "<<largest<<endl;
				dummyf = value_array[pos];
				dummyi = pos_array[pos];
				value_array[pos] = value_array[largest];
				pos_array[pos] = pos_array[largest];
				value_array[largest] = dummyf;
				pos_array[largest] = dummyi;
				pos = largest;
			};
		};
	}
	else{
		// maxheap
		while(running){
			left = 2*pos;
			right = left+1;
			if((left<=size) && 
				(value_array[left] < value_array[pos]))
				largest = left;
			else{
				largest = pos;
			};
			if((right<=size) && 
				(value_array[right] < value_array[largest]))
				largest = right;
			if(largest == pos){
				running = 0;
			}
			else{
				dummyf = value_array[pos];
				dummyi = pos_array[pos];
				value_array[pos] = value_array[largest];
				pos_array[pos] = pos_array[largest];
				value_array[largest] = dummyf;
				pos_array[largest] = dummyi;
				pos = largest;
			};
		};
	};
};


void svm_c::calculate_working_set(){
/**
*
* Find top and bottom (w.r.t. in_alpha_neg*nabla) feasible 
* variables for working_set
*
	*/
	
	long time_start = get_time();
	
	// reset WSS
	if(working_set_size < parameters->working_set_size){
		working_set_size=parameters->working_set_size;
		if(working_set_size>examples_total) working_set_size = examples_total;
	};
	
	SVMINT heap_min=0;
	SVMINT heap_max=0;
	SVMINT heap_max_bound = working_set_size/2+working_set_size%2;
	
	SVMINT i=0;
	SVMFLOAT sort_value;
	
	working_set_values[0] = infinity;
	working_set_values[working_set_size/2] = -infinity;
	
	SVMFLOAT feasible_sum=0;
	SVMFLOAT infeasible_sum=0;
	SVMFLOAT the_lambda;
	SVMFLOAT the_nabla;
	SVMFLOAT min_lambda = infinity;
	SVMINT infeasible_count=0;
	SVMINT feasible_count=0;
	int is_feasible;
	int atbound;
	
	while(i<examples_total) {
		is_feasible = feasible(i,&the_nabla,&the_lambda,&atbound);
		if(atbound){
			if(the_lambda < min_lambda) min_lambda = the_lambda;
		};
		if(0 != is_feasible){
			if(atbound){
				feasible_count++;
				feasible_sum -= the_lambda;
			};
			if(is_alpha_neg(i) > 0){
				sort_value = -the_nabla;  // - : maximum inconsistency approach
			}
			else{
				sort_value = the_nabla;
			};
			// add to min_heap?
			// min_heap full?
			if(heap_min<working_set_size/2){
				// add to min_heap
				working_set_values[heap_min] = sort_value;
				working_set[heap_min] = i;
				heap_min++;
				if(heap_min == working_set_size/2){
					// build heap
					for(SVMINT j=heap_min;j>0;j--){
						heapify(1,j-1,heap_min+1-j);
					};
				};
			}
			else if(sort_value < working_set_values[0]){
				// replace max of min_heap
				working_set_values[0] = sort_value;
				working_set[0] = i;
				heapify(1,0,heap_min);
			};
			// add to max_heap?
			// max_heap full?
			if(heap_max<heap_max_bound){
				// add to max_heap
				working_set_values[working_set_size/2+heap_max] = sort_value;
				working_set[working_set_size/2+heap_max] = i;
				heap_max++;
				if(heap_max == heap_max_bound){
					// build heap
					for(SVMINT j=heap_max;j>0;j--){
						heapify(0,working_set_size/2+j-1,heap_max+1-j);
					};
				};
			}
			else if(sort_value > working_set_values[working_set_size/2]){
				// replace min element of heap
				working_set_values[working_set_size/2] = sort_value;
				working_set[working_set_size/2] = i;
				heapify(0,working_set_size/2,heap_max);
			};
		}
		else{
			// not feasible
			if(the_lambda<0){
				infeasible_sum -= the_lambda;
				infeasible_count++;
			};
		};
		i++;
	};
	
	// set new lambda_threshold
	if(min_lambda<0){
		lambda_threshold = (127*lambda_threshold-lambda_factor*min_lambda)/128; //16
		if(lambda_threshold < convergence_epsilon) lambda_threshold = convergence_epsilon;
	};
	//  lambda_threshold = 1; //convergence_epsilon;
	
	
	if((working_set_values[0] >= working_set_values[working_set_size/2]) &&
		(heap_min>0) &&
		(heap_max>0)){
		// there could be the same values in the min- and maxheap,
		// sort them out (this is very unlikely)
		SVMINT j=0;
		i=0;
		while(i<heap_min){
			// working_set[i] also in max-heap?
			j=working_set_size/2;
			while((j<working_set_size/2+heap_max) && 
				(working_set[j] != working_set[i])){
				j++;
			};
			if(j<working_set_size/2+heap_max){
				// working_set[i] == working_set[j]
				if(heap_min<heap_max){
					// remove j from WS
					working_set[j] = working_set[working_set_size/2-1+heap_max];
					working_set_values[j] = working_set_values[working_set_size/2-1+heap_max];
					heap_max--;
				}
				else{
					working_set[i] = working_set[heap_min-1];
					working_set_values[i] = working_set_values[heap_min-1];
					heap_min--;
				};
			}
			else{
				i++;
			};
		};
	};
	
	if(parameters->verbosity>=5){
		cout<<feasible_count<<" feasible variables"<<endl;
	};
	
	if((feasible_count < (target_count+1)*working_set_size) ||
		(heap_min+heap_max < working_set_size) ||
		(feasible_epsilon < 0)){
		// need more feasible variables
		if(heap_min+heap_max < working_set_size) {
			// condense WS
			for(SVMINT i=0;i<heap_max;i++){
				working_set[heap_min+i] = working_set[working_set_size/2+i];
				working_set_values[heap_min+i] = working_set_values[working_set_size/2+i];
			};
			working_set_size = heap_min+heap_max;
		};
		
		if(feasible_epsilon<0){
			feasible_epsilon = -feasible_epsilon;
		}
		else if(infeasible_count > 0){
			feasible_epsilon = infeasible_sum / infeasible_count / 2;
		}
		else{
			if(feasible_count == examples_total){
				// problem lies somewhere else, reset feasible_epsilon
				if(heap_min+heap_max < working_set_size){
					feasible_epsilon = 1e-3;
				};
			}
			else if(target_count > 1){
				// need feasible_epsilon < 0
				if(feasible_epsilon >= 0){
					infeasible_sum=0;
					infeasible_count=0;
					SVMFLOAT thelambda;
					for(SVMINT i=0;i<examples_total;i++){
						thelambda = lambda(i);
						if(thelambda>-feasible_epsilon){
							infeasible_sum -= thelambda;
							infeasible_count++;
						};
					};
					if(infeasible_count > 0){
						feasible_epsilon = infeasible_sum / infeasible_count / 2;
					}
					else{
						feasible_epsilon = -infinity;
					};
				};
			};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -