⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 icsiboost.c

📁 Boosting is a meta-learning approach that aims at combining an ensemble of weak classifiers to form
💻 C
📖 第 1 页 / 共 5 页
字号:
						{							string_t* class=vector_get(classes,l);							fprintf(stdout,"%s%s % 5f : %s \n",string_cmp(true_class,class)==0?"*":" ",									(score[l]>0?">":(string_cmp(true_class,class)==0?"*":" ")),score[l],class->data);							if((score[l]<0 && string_cmp(true_class,class)==0))errors++;						}					}					else					{						fprintf(stdout,"correct label = ?\n");						for(l=0; l<classes->length; l++)						{							string_t* class=vector_get(classes,l);							fprintf(stdout,"   % 5f : %s\n", score[l],class->data);						}					}				}			}			vector_free(tokens);			string_array_free(array_of_tokens);			string_free(line);			num_examples++;		}		if(!dryrun_mode && classification_output!=0)		{			fprintf(stderr,"ERROR RATE: %f\n",(double)errors/(double)num_examples);		}		exit(0);	}	// load a train, dev and test sets (if available)	if(data_filename==NULL)	{		data_filename = string_copy(stem);		string_append_cstr(data_filename, ".data");	}	vector_t* examples = load_examples(data_filename->data, templates, classes, feature_count_cutoff, 0);	string_free(data_filename);	// generate a simple random sequence of example ids for sampleing	/*random_sequence=(int*)MALLOC(sizeof(int)*examples->length);	srand(time(NULL));	for(i=0;i<examples->length;i++)	{		int index=rand()%examples->length;		int tmp=random_sequence[i];		random_sequence[i]=random_sequence[index];		random_sequence[index]=tmp;	}*/	vector_t* dev_examples=NULL;	vector_t* test_examples=NULL;// deactivated dev/test that don't work with the new indexed features (need separate handleing)	/*data_filename = string_copy(stem);	string_append_cstr(data_filename, ".dev");	dev_examples = load_examples(data_filename->data, templates, classes, 0, 1);	string_free(data_filename);	data_filename = string_copy(stem);	string_append_cstr(data_filename, ".test");	test_examples = load_examples(data_filename->data, templates, classes, 0, 1);	string_free(data_filename);*/	if(dryrun_mode)exit(0);	// initialize the sum of weights by classes (to infer the other side of the partition in binary classifiers)	// sum_of_weights[b][l]	double **sum_of_weights=(double**)MALLOC(sizeof(double*)*2);	sum_of_weights[0]=(double*)MALLOC(sizeof(double)*classes->length);	sum_of_weights[1]=(double*)MALLOC(sizeof(double)*classes->length);	for(i=0;i<classes->length;i++)	{		sum_of_weights[0][i]=0.0;		sum_of_weights[1][i]=0.0;	}	int l;	for(i=0;i<examples->length;i++)	{		example_t* example=(example_t*)vector_get(examples,i);		for(l=0;l<classes->length;l++)		{			sum_of_weights[b(example,l)][l]+=example->weight[l];		}	}#ifdef USE_THREADS	pthread_t workers[number_of_workers];	//workertoolbox_t* toolbox=MALLOC(sizeof(workertoolbox_t));	toolbox=MALLOC(sizeof(workertoolbox_t));	toolbox->examples=examples;	toolbox->templates=templates;	toolbox->classes=classes;	toolbox->sum_of_weights=sum_of_weights;	toolbox->next_column=0;	pthread_mutex_init(&toolbox->next_column_mutex,NULL);	pthread_mutex_init(&toolbox->best_classifier_mutex,NULL);	toolbox->best_classifier=MALLOC(sizeof(weakclassifier_t));	toolbox->best_classifier->c0=MALLOC(sizeof(double)*classes->length);	toolbox->best_classifier->c1=MALLOC(sizeof(double)*classes->length);	toolbox->best_classifier->c2=MALLOC(sizeof(double)*classes->length);	toolbox->best_classifier->objective=1.0;	//toolbox->ready_to_process=MALLOC(sizeof(sem_t));	//sem_init(toolbox->ready_to_process,0,0);	//sem_unlink("ready_to_process");	//toolbox->ready_to_process=sem_open("ready_to_process",O_CREAT,0700,0);	//if(toolbox->ready_to_process==(sem_t*)SEM_FAILED)die("ready to process %d",SEM_FAILED);	toolbox->ready_to_process=semaphore_new(0);	//toolbox->result_available=MALLOC(sizeof(sem_t));	//sem_init(toolbox->result_available,0,0);	//sem_unlink("result_available");	//toolbox->result_available=sem_open("result_avaiable",O_CREAT,0700,0);	//if(toolbox->result_available==(sem_t*)SEM_FAILED)die("result_available");	toolbox->result_available=semaphore_new(0);	semaphore_feed(toolbox->result_available);	semaphore_eat(toolbox->result_available);	for(i=0; i<number_of_workers; i++)	{		/*pthread_attr_t attributes;		pthread_attr_init(&attributes);		pthread_attr_setdetachstate(&attributes,PTHREAD_CREATE_JOINABLE);*/		//pthread_create(&workers[i],&attributes,threaded_worker,NULL);		pthread_create(&workers[i],NULL,threaded_worker,NULL);		//pthread_attr_destroy(&attributes);	}#endif	int iteration=0;	vector_t* classifiers=vector_new(maximum_iterations);	double theorical_error=1.0;	for(iteration=0;iteration<maximum_iterations;iteration++)	{#ifdef USE_THREADS		LOCK(toolbox->next_column);		toolbox->next_column=0;		UNLOCK(toolbox->next_column);		LOCK(toolbox->best_classifier);		toolbox->best_classifier->objective=1.0;		UNLOCK(toolbox->best_classifier);#else		double min_objective=1.0;#endif		weakclassifier_t* classifier=NULL;#ifdef USE_THREADS		for(i=0;i<templates->length;i++) // find the best classifier		{			//sem_post(toolbox->ready_to_process);			semaphore_feed(toolbox->ready_to_process);		}		// need to store and reorder potential classifiers to get a deterministic behaviour		for(i=0;i<templates->length;i++) // wait for the results		{			//sem_wait(toolbox->result_available);			semaphore_eat(toolbox->result_available);		}		// all results should be available		classifier=MALLOC(sizeof(weakclassifier_t));		memcpy(classifier,toolbox->best_classifier,sizeof(weakclassifier_t));		classifier->c0=MALLOC(sizeof(double)*classes->length);		classifier->c1=MALLOC(sizeof(double)*classes->length);		classifier->c2=MALLOC(sizeof(double)*classes->length);		memcpy(classifier->c0,toolbox->best_classifier->c0,sizeof(double)*classes->length);		memcpy(classifier->c1,toolbox->best_classifier->c1,sizeof(double)*classes->length);		memcpy(classifier->c2,toolbox->best_classifier->c2,sizeof(double)*classes->length);#else		for(i=0;i<templates->length;i++) // fine the best classifier		{			template_t* template=(template_t*)vector_get(templates,i);			weakclassifier_t* current=NULL;			if(template->type==FEATURE_TYPE_CONTINUOUS)			{				current=train_continuous_stump(min_objective, template, examples, classes->length);			}			else if(template->type==FEATURE_TYPE_TEXT || template->type==FEATURE_TYPE_SET)			{				current=train_text_stump(min_objective, template, examples, sum_of_weights, classes->length);			}			// else => FEATURE_TYPE_IGNORE			if(current==NULL)continue;			if(current->objective-min_objective<-1e-11)			{				min_objective=current->objective;				if(classifier!=NULL) // free previous classifier				{					FREE(classifier->c0);					FREE(classifier->c1);					FREE(classifier->c2);					FREE(classifier);				}				classifier=current;			}			else			{				FREE(current->c0);				FREE(current->c1);				FREE(current->c2);				FREE(current);			}		}#endif		vector_push(classifiers,classifier);		if(iteration==maximum_iterations-1) output_scores=1; else output_scores=0;		double error=compute_classification_error(classifiers, examples, sum_of_weights, classes->length); // compute error rate and update weights		double dev_error=NAN;		if(dev_examples!=NULL)dev_error = compute_classification_error(classifiers, dev_examples, NULL, classes->length); // compute error rate on dev		double test_error=NAN;		if(test_examples!=NULL)test_error = compute_classification_error(classifiers, test_examples, NULL, classes->length); // compute error rate on test		// display result "a la" boostexter		if(verbose==1)		{			char* token="";			if(classifier->type==CLASSIFIER_TYPE_TEXT)			{				tokeninfo_t* tokeninfo=(tokeninfo_t*)vector_get(classifier->template->tokens,classifier->token);				token=tokeninfo->key;			}			if(classifier->type==CLASSIFIER_TYPE_THRESHOLD)			{				fprintf(stdout,"\n%s:%s\n",classifier->template->name->data,token);				fprintf(stdout,"Threshold: %7.3g\n",classifier->threshold);				classifier->c0[0]=-1e-11;				fprintf(stdout,"C0: ");for(i=0;i<classes->length;i++)fprintf(stdout," % 4.3f ",classifier->c0[i]); fprintf(stdout,"\n");				fprintf(stdout,"C1: ");for(i=0;i<classes->length;i++)fprintf(stdout," % 4.3f ",classifier->c1[i]); fprintf(stdout,"\n");				fprintf(stdout,"C2: ");for(i=0;i<classes->length;i++)fprintf(stdout," % 4.3f ",classifier->c2[i]); fprintf(stdout,"\n");			}			else if(classifier->type==CLASSIFIER_TYPE_TEXT)			{				fprintf(stdout,"\n%s:%s \n",classifier->template->name->data,token);				fprintf(stdout,"C0: ");for(i=0;i<classes->length;i++)fprintf(stdout," % 4.3f ",classifier->c1[i]); fprintf(stdout,"\n");				fprintf(stdout,"C1: ");for(i=0;i<classes->length;i++)fprintf(stdout," % 4.3f ",classifier->c2[i]); fprintf(stdout,"\n");			}		}		theorical_error*=classifier->objective;		fprintf(stdout,"rnd %4d: wh-err= %.6f  th-err= %.6f  test= %7f  train= %7f\n",iteration+1,classifier->objective,theorical_error,test_error,error);		//fprintf(stdout,"rnd %4d: wh-err= %.6f  th-err= %.6f  dev= %.7f  test= %.7f  train= %.7f\n",iteration+1,classifier->objective,theorical_error,dev_error,test_error,error);		// unlike boostexter, C0 is always unk, C1 below or absent, C2 above or present	}	if(model_name==NULL)	{		model_name = string_copy(stem);		string_append_cstr(model_name, ".shyp");	}	save_model(classifiers,classes,model_name->data,pack_model);	string_free(model_name);	// release data structures (this is why we need a garbage collector ;)	FREE(sum_of_weights[0]);	FREE(sum_of_weights[1]);	FREE(sum_of_weights);	for(i = 0; i < classifiers->length; i++)	{		weakclassifier_t* classifier=(weakclassifier_t*)vector_get(classifiers,i);		if(classifier==NULL)continue;		FREE(classifier->c0);		FREE(classifier->c1);		FREE(classifier->c2);		FREE(classifier);	}	vector_free(classifiers);	for(i=0; i<classes->length; i++) string_free((string_t*)vector_get(classes,i));	vector_free(classes);	for(i=0; i<templates->length; i++)	{		template_t* template=(template_t*)vector_get(templates,i);		string_free(template->name);		hashtable_free(template->dictionary);		int j;		for(j=0; j<template->tokens->length; j++)		{			tokeninfo_t* tokeninfo = (tokeninfo_t*) vector_get(template->tokens, j);			if(tokeninfo->examples!=NULL)vector_free(tokeninfo->examples);			FREE(tokeninfo->key);			FREE(tokeninfo);		}		vector_free(template->tokens);		vector_free(template->values);		if(template->ordered!=NULL)FREE(template->ordered);		if(template->classifiers!=NULL)vector_free(template->classifiers);		FREE(template);	}	vector_free(templates);	if(dev_examples!=NULL)	{		for(i=0; i<dev_examples->length; i++)		{			example_t* example=(example_t*)vector_get(dev_examples,i);			//vector_free(example->features);			FREE(example->weight);			FREE(example->score);			FREE(example);		}		vector_free(dev_examples);	}	if(test_examples!=NULL)	{		for(i=0; i<test_examples->length; i++)		{			example_t* example=(example_t*)vector_get(test_examples,i);			//vector_free(example->features);			FREE(example->weight);			FREE(example->score);			FREE(example);		}		vector_free(test_examples);	}	for(i=0; i<examples->length; i++)	{		example_t* example=(example_t*)vector_get(examples,i);		//vector_free(example->features);		FREE(example->weight);		FREE(example->score);		FREE(example);	}	vector_free(examples);	string_free(stem);	//if(verbose)fprintf(stdout,"FINISHED!!!\n");#ifdef USE_THREADS	LOCK(finished);	finished=1;	UNLOCK(finished);	for(i=0;i<number_of_workers;i++)	{		int j;		for(j=0;j<number_of_workers;j++)			semaphore_feed(toolbox->ready_to_process);			//sem_post(toolbox->ready_to_process); // need more, because you cannot unlock all at a time		void* output;		pthread_join(workers[i],&output);		//if(verbose)fprintf(stdout,"worker joined with return value: %p\n",output);	}	/*sem_unlink("ready_to_process");	sem_close(toolbox->ready_to_process);	sem_unlink("results_available");	sem_close(toolbox->result_available);*/	semaphore_free(toolbox->ready_to_process);	semaphore_free(toolbox->result_available);	FREE(toolbox->best_classifier->c0);	FREE(toolbox->best_classifier->c1);	FREE(toolbox->best_classifier->c2);	FREE(toolbox->best_classifier);	FREE(toolbox);#endif	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -