⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm.py

📁 SVM是一种常用的模式分类机器学习算法
💻 PY
字号:
import svmcfrom svmc import C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVRfrom svmc import LINEAR, POLY, RBF, SIGMOID, PRECOMPUTEDfrom math import exp, fabsdef _int_array(seq):	size = len(seq)	array = svmc.new_int(size)	i = 0	for item in seq:		svmc.int_setitem(array,i,item)		i = i + 1	return arraydef _double_array(seq):	size = len(seq)	array = svmc.new_double(size)	i = 0	for item in seq:		svmc.double_setitem(array,i,item)		i = i + 1	return arraydef _free_int_array(x):	if x != 'NULL' and x != None:		svmc.delete_int(x)def _free_double_array(x):	if x != 'NULL' and x != None:		svmc.delete_double(x)def _int_array_to_list(x,n):	return map(svmc.int_getitem,[x]*n,range(n))def _double_array_to_list(x,n):	return map(svmc.double_getitem,[x]*n,range(n))class svm_parameter:		# default values	default_parameters = {	'svm_type' : C_SVC,	'kernel_type' : RBF,	'degree' : 3,	'gamma' : 0,		# 1/k	'coef0' : 0,	'nu' : 0.5,	'cache_size' : 40,	'C' : 1,	'eps' : 1e-3,	'p' : 0.1,	'shrinking' : 1,	'nr_weight' : 0,	'weight_label' : [],	'weight' : [],	'probability' : 0	}	def __init__(self,**kw):		self.__dict__['param'] = svmc.new_svm_parameter()		for attr,val in self.default_parameters.items():			setattr(self,attr,val)		for attr,val in kw.items():			setattr(self,attr,val)	def __getattr__(self,attr):		get_func = getattr(svmc,'svm_parameter_%s_get' % (attr))		return get_func(self.param)	def __setattr__(self,attr,val):		if attr == 'weight_label':			self.__dict__['weight_label_len'] = len(val)			val = _int_array(val)			_free_int_array(self.weight_label)		elif attr == 'weight':			self.__dict__['weight_len'] = len(val)			val = _double_array(val)			_free_double_array(self.weight)		set_func = getattr(svmc,'svm_parameter_%s_set' % (attr))		set_func(self.param,val)	def __repr__(self):		ret = '<svm_parameter:'		for name in dir(svmc):			if name[:len('svm_parameter_')] == 'svm_parameter_' and name[-len('_set'):] == '_set':				attr = name[len('svm_parameter_'):-len('_set')]				if attr == 'weight_label':					ret = ret+' weight_label = %s,' % _int_array_to_list(self.weight_label,self.weight_label_len)				elif attr == 'weight':					ret = ret+' weight = %s,' % _double_array_to_list(self.weight,self.weight_len)				else:					ret = ret+' %s = %s,' % (attr,getattr(self,attr))		return ret+'>'	def __del__(self):		_free_int_array(self.weight_label)		_free_double_array(self.weight)		svmc.delete_svm_parameter(self.param)def _convert_to_svm_node_array(x):	""" convert a sequence or mapping to an svm_node array """	import operator	# Find non zero elements	iter_range = []	if type(x) == dict:		for k, v in x.iteritems():# all zeros kept due to the precomputed kernel; no good solution yet#			if v != 0:				iter_range.append( k )	elif operator.isSequenceType(x):		for j in range(len(x)):#			if x[j] != 0:				iter_range.append( j )	else:		raise TypeError,"data must be a mapping or a sequence"	iter_range.sort()	data = svmc.svm_node_array(len(iter_range)+1)	svmc.svm_node_array_set(data,len(iter_range),-1,0)	j = 0	for k in iter_range:		svmc.svm_node_array_set(data,j,k,x[k])		j = j + 1	return dataclass svm_problem:	def __init__(self,y,x):		assert len(y) == len(x)		self.prob = prob = svmc.new_svm_problem()		self.size = size = len(y)		self.y_array = y_array = svmc.new_double(size)		for i in range(size):			svmc.double_setitem(y_array,i,y[i])		self.x_matrix = x_matrix = svmc.svm_node_matrix(size)		self.data = []		self.maxlen = 0;		for i in range(size):			data = _convert_to_svm_node_array(x[i])			self.data.append(data);			svmc.svm_node_matrix_set(x_matrix,i,data)			if type(x[i]) == dict:				if (len(x[i]) > 0):					self.maxlen = max(self.maxlen,max(x[i].keys()))			else:				self.maxlen = max(self.maxlen,len(x[i]))		svmc.svm_problem_l_set(prob,size)		svmc.svm_problem_y_set(prob,y_array)		svmc.svm_problem_x_set(prob,x_matrix)	def __repr__(self):		return "<svm_problem: size = %s>" % (self.size)	def __del__(self):		svmc.delete_svm_problem(self.prob)		svmc.delete_double(self.y_array)		for i in range(self.size):			svmc.svm_node_array_destroy(self.data[i])		svmc.svm_node_matrix_destroy(self.x_matrix)class svm_model:	def __init__(self,arg1,arg2=None):		if arg2 == None:			# create model from file			filename = arg1			self.model = svmc.svm_load_model(filename)		else:			# create model from problem and parameter			prob,param = arg1,arg2			self.prob = prob			if param.gamma == 0:				param.gamma = 1.0/prob.maxlen			msg = svmc.svm_check_parameter(prob.prob,param.param)			if msg: raise ValueError, msg			self.model = svmc.svm_train(prob.prob,param.param)		#setup some classwide variables		self.nr_class = svmc.svm_get_nr_class(self.model)		self.svm_type = svmc.svm_get_svm_type(self.model)		#create labels(classes)		intarr = svmc.new_int(self.nr_class)		svmc.svm_get_labels(self.model,intarr)		self.labels = _int_array_to_list(intarr, self.nr_class)		svmc.delete_int(intarr)		#check if valid probability model		self.probability = svmc.svm_check_probability_model(self.model)	def predict(self,x):		data = _convert_to_svm_node_array(x)		ret = svmc.svm_predict(self.model,data)		svmc.svm_node_array_destroy(data)		return ret	def get_nr_class(self):		return self.nr_class	def get_labels(self):		if self.svm_type == NU_SVR or self.svm_type == EPSILON_SVR or self.svm_type == ONE_CLASS:			raise TypeError, "Unable to get label from a SVR/ONE_CLASS model"		return self.labels			def predict_values_raw(self,x):		#convert x into svm_node, allocate a double array for return		n = self.nr_class*(self.nr_class-1)//2		data = _convert_to_svm_node_array(x)		dblarr = svmc.new_double(n)		svmc.svm_predict_values(self.model, data, dblarr)		ret = _double_array_to_list(dblarr, n)		svmc.delete_double(dblarr)		svmc.svm_node_array_destroy(data)		return ret	def predict_values(self,x):		v=self.predict_values_raw(x)		if self.svm_type == NU_SVR or self.svm_type == EPSILON_SVR or self.svm_type == ONE_CLASS:			return v[0]		else: #self.svm_type == C_SVC or self.svm_type == NU_SVC			count = 0			d = {}			for i in range(len(self.labels)):				for j in range(i+1, len(self.labels)):					d[self.labels[i],self.labels[j]] = v[count]					d[self.labels[j],self.labels[i]] = -v[count]					count += 1			return  d	def predict_probability(self,x):		#c code will do nothing on wrong type, so we have to check ourself		if self.svm_type == NU_SVR or self.svm_type == EPSILON_SVR:			raise TypeError, "call get_svr_probability or get_svr_pdf for probability output of regression"		elif self.svm_type == ONE_CLASS:			raise TypeError, "probability not supported yet for one-class problem"		#only C_SVC,NU_SVC goes in		if not self.probability:			raise TypeError, "model does not support probabiliy estimates"		#convert x into svm_node, alloc a double array to receive probabilities		data = _convert_to_svm_node_array(x)		dblarr = svmc.new_double(self.nr_class)		pred = svmc.svm_predict_probability(self.model, data, dblarr)		pv = _double_array_to_list(dblarr, self.nr_class)		svmc.delete_double(dblarr)		svmc.svm_node_array_destroy(data)		p = {}		for i in range(len(self.labels)):			p[self.labels[i]] = pv[i]		return pred, p		def get_svr_probability(self):		#leave the Error checking to svm.cpp code		ret = svmc.svm_get_svr_probability(self.model)		if ret == 0:			raise TypeError, "not a regression model or probability information not available"		return ret	def get_svr_pdf(self):		#get_svr_probability will handle error checking		sigma = self.get_svr_probability()		return lambda z: exp(-fabs(z)/sigma)/(2*sigma)	def save(self,filename):		svmc.svm_save_model(filename,self.model)	def __del__(self):		svmc.svm_destroy_model(self.model)def cross_validation(prob, param, fold):	if param.gamma == 0:		param.gamma = 1.0/prob.maxlen	dblarr = svmc.new_double(prob.size)	svmc.svm_cross_validation(prob.prob, param.param, fold, dblarr)	ret = _double_array_to_list(dblarr, prob.size)	svmc.delete_double(dblarr)	return ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -