📄 cov-1.c
字号:
/* cov-1.c: Contains the code for a specific covariance function. * * This file contains the code for the training (gp-mc) and prediction * (gp-pred) programs for regression with Gaussian processes, which is specific * to particular covariance functions. In this file (cov1.c) the covariance is * of the form c(x^i,x^j)=u_0 + e*\delta(i,j) + u_1*sum_k (x^i)_k*x(^j)_k + * v*exp-0.5*sum_k w_k((x^i)_k-(x^j)_k)^2, where u_0 is the variance controling * the bias, u_1 is a variance controling the parameters of the linear * component, w_k are the (inverse) length scales, and e is the noise * variance. The actual hyperparameters used in the program are exp(u_0), * exp(u_1), exp(w_k) and exp(e), which is a hack to ensure that the variances * stay positive. The hyperparameters are ordered in w=[w_0,...,w_k-1, v, u_0, * u_1, e]. * * (c) Copyright 1996 Carl Edward Rasmussen */#include <stdlib.h>#include <stddef.h>#include <math.h>#include "util.h"real *ew; /* exp of some of w, for convenience */extern int no_inp, no_wts, nfgeval;extern real *w, *q, **K, **K1, **K2;extern struct exampleset train;extern real invspd(real **, int n);void init() /* set no_wts and create and initialise w[] */{ int i; no_wts = no_inp+4; w = (real*) malloc((size_t) no_wts*sizeof(real)); for (i=0; i<no_inp; i++) w[i] = -log((real) no_inp); /* ARD style w's */ w[i++] = 0.0; /* signal variance v */ for (; i<no_wts; i++) w[i] = -2.0; ew = (real*) malloc((size_t) no_wts*sizeof(real));}/* This function returns -log prior for the hyperparameters and augments the * array of derivatives by the effect of the prior. The prior on w is Gaussian. */real prior(real *dw){ int i; real r = 0.0, mean = -3.0, sigma = 3.0, mu = 1.0; /* prior specification */ for (i=0; i<no_inp; i++) { r += log(6.2831953*no_inp*no_inp/mu)+w[i]+mu/(no_inp*no_inp*exp(w[i])); dw[i] += 0.5*(1.0-mu*exp(-w[i])/(no_inp*no_inp)); } r += sq((w[i]+1.0)/1.0); dw[i++] += (w[i]+1.0)/(1.0*1.0); for (; i<no_wts; i++) { r += sq((w[i]-mean)/sigma); dw[i] += (w[i]-mean)/(sigma*sigma); } return 0.5*r;}static real trace_prod(real **a, real **b){ static real r; static int i, j; for (r=0.0, i=0; i<train.num; i++) { for (j=0; j<i; j++) r += a[j][i]*b[j][i]; for (; j<train.num; j++) r += a[i][j]*b[i][j]; } return r;} /* This function returns "-log posterior for w" (plus a constant), and the * derivatives of this with respect to w. A pointer to a function which * augments the function value and the derivatives according to the prior must * be supplied; if this is NULL then the likelihood is used instead of the * posterior. */real fgeval(real *dw){ int i, j, k; /* i and j index training cases, k indexes inputs */ real r, rr; /* miscellaneous */ for (i=0; i<no_wts; i++) ew[i] = exp(w[i]); /* compute ew */ for (i=0; i<train.num; i++) { /* set upper triangle of K[][] to covariance */ for (j=i; j<train.num; j++) { for (r=rr=0.0, k=0; k<no_inp; k++) { r += ew[k]*sq(train.inp[i][k]-train.inp[j][k]); rr += train.inp[i][k]*train.inp[j][k]; } r = ew[no_inp]*exp(-0.5*r); K1[i][j] = r; K[i][j] = r+ew[no_inp+2]*rr+ew[no_inp+1]; } K[i][i] += ew[no_inp+3]; } for (i=0; i<train.num; i++) for (j=i; j<train.num; j++) K2[i][j] = K[i][j]; /* copy */ r = invspd(K2, train.num); /* r = log det K; K2 = inv K */ for (i=0; i<train.num; i++) { /* compute q[] and r */ for (rr=0.0, j=0; j<i; j++) rr += train.tar[j][0]*K2[j][i]; for (; j<train.num; j++) rr += train.tar[j][0]*K2[i][j]; q[i] = rr; /* q = t * inv(K) */ r += train.tar[i][0]*rr; /* r = t * inv(K) * t */ } r *= 0.5; /* r = 0.5 * log det K + 0.5 * t * inv(K) * t *//* Work out derivatives of -log posterior with respect to the hp. First for the * scales parameters w[0,...,no_inp-1], the signal scale w[no_inp], the scale * for the bias w[no_inp+1], the scale for the linear part of the model * w[no_inp+2] and lastly the noise scale w[no_inp+3]. */ dw[no_inp] = trace_prod(K1, K2); /* Tr[inv(K)*dK/dv] */ for (i=0; i<train.num; i++) { for (rr=0.0, j=0; j<i; j++) rr += q[j]*K1[j][i]; for (; j<train.num; j++) rr += q[j]*K1[i][j]; dw[no_inp] -= rr*q[i]; } dw[no_inp] *= 0.5; for (k=0; k<no_inp; k++) { /* input scales */ for (i=0; i<train.num; i++) for (j=i; j<train.num; j++) K[i][j] = -K1[i][j]*0.5*ew[k]*sq(train.inp[i][k]-train.inp[j][k]); dw[k] = trace_prod(K, K2); for (i=0; i<train.num; i++) { for (rr=0.0, j=0; j<i; j++) rr += q[j]*K[j][i]; for (; j<train.num; j++) rr += q[j]*K[i][j]; dw[k] -= rr*q[i]; } dw[k] *= 0.5; } for (i=0; i<train.num; i++) /* set K1 = dK/du_1 */ for (j=i; j<train.num; j++) { for (rr=0.0, k=0; k<no_inp; k++) rr += train.inp[i][k]*train.inp[j][k]; K1[i][j] = ew[no_inp+2]*rr; } dw[no_inp+2] = trace_prod(K1, K2); for (i=0; i<train.num; i++) { for (rr=0.0, j=0; j<i; j++) rr += q[j]*K1[j][i]; for (; j<train.num; j++) rr += q[j]*K1[i][j]; dw[no_inp+2] -= rr*q[i]; } dw[no_inp+2] *= 0.5; for (rr=0.0, i=0; i<train.num; i++) /* bias */ for (j=i+1; j<train.num; j++) rr += K2[i][j]; rr *= 2.0; for (i=0; i<train.num; i++) rr += K2[i][i]; dw[no_inp+1] = rr; for (rr=0.0, i=0; i<train.num; i++) rr += q[i]; dw[no_inp+1] -= rr*rr; dw[no_inp+1] *= 0.5*ew[no_inp+1]; for (rr=0.0, i=0; i<train.num; i++) rr += K2[i][i]-q[i]*q[i]; /* noise */ dw[no_inp+3] = 0.5*rr*ew[no_inp+3]; r += prior(dw); /* augment by prior */ nfgeval++; return r;} /* This function returns mean and variance for predictions for a set of test * cases, given values fo the hyperparameters. It uses globals: w, ew, no_wts, * t, train and K. */void pred(real *y, real *s2, struct exampleset test){ int i, j, k; real r, rr, *g, *h; h = (real *) malloc((size_t) train.num*sizeof(real)); g = (real *) malloc((size_t) train.num*sizeof(real)); for (i=0; i<no_wts; i++) ew[i] = exp(w[i]); /* compute ew */ for (i=0; i<train.num; i++) { /* set upper triangle of K[][] to covariance */ for (j=i; j<train.num; j++) { for (r=rr=0.0, k=0; k<no_inp; k++) { r += ew[k]*sq(train.inp[i][k]-train.inp[j][k]); rr += train.inp[i][k]*train.inp[j][k]; } K[i][j] = ew[no_inp]*exp(-0.5*r)+ew[no_inp+2]*rr+ew[no_inp+1]; } K[i][i] += ew[no_inp+3]; } invspd(K, train.num); /* invert covariance */ for (i=0; i<test.num; i++) { for (j=0; j<train.num; j++) { for (r=rr=0.0, k=0; k<no_inp; k++) { r += ew[k]*sq(test.inp[i][k]-train.inp[j][k]); rr += test.inp[i][k]*train.inp[j][k]; } g[j] = ew[no_inp]*exp(-0.5*r)+ew[no_inp+1]+ew[no_inp+2]*rr; } for (j=0; j<train.num; j++) { for (r=0.0, k=0; k<j; k++) r += g[k]*K[k][j]; for (; k<train.num; k++) r += g[k]*K[j][k]; h[j] = r; } r = 0.0; for (k=0; k<train.num; k++) r += h[k]*train.tar[k][0]; y[i] = r; r = 0.0; for (k=0; k<train.num; k++) r += h[k]*g[k]; rr = 0.0; for (k=0; k<no_inp; k++) rr += test.inp[i][k]*test.inp[i][k]; s2[i] = ew[no_inp]+ew[no_inp+1]+ew[no_inp+2]*rr+ew[no_inp+3]-r; } free(g); free(h);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -