📄 vslsmissingvaluesrow.c
字号:
/*******************************************************************************! INTEL CONFIDENTIAL! Copyright(C) 2007-2008 Intel Corporation. All Rights Reserved.! The source code contained or described herein and all documents related to! the source code ("Material") are owned by Intel Corporation or its suppliers! or licensors. Title to the Material remains with Intel Corporation or its! suppliers and licensors. The Material contains trade secrets and proprietary! and confidential information of Intel or its suppliers and licensors. The! Material is protected by worldwide copyright and trade secret laws and! treaty provisions. No part of the Material may be used, copied, reproduced,! modified, published, uploaded, posted, transmitted, distributed or disclosed! in any way without Intel's prior express written permission.! No license under any patent, copyright, trade secret or other intellectual! property right is granted to or conferred upon you by disclosure or delivery! of the Materials, either expressly, by implication, inducement, estoppel or! otherwise. Any license under such intellectual property rights must be! express and approved by Intel in writing.!!*******************************************************************************! Content:! Support of missing values, Multiple Imputation algorithm Example Program Text!******************************************************************************/#include <stdio.h>#include <math.h>#include "mkl.h"#include "vsl_ss.h"#define DIM 4 /* dimension of the task */ #define P DIM #define N 1000 /* number of observations */#define EPSIILON 10 /* percent missing values*/#define PVT 1 /* number of the missings values in observation*/#define MAXPATTERNLENGHT 10#define BRNG VSL_BRNG_MCG31 /* VSL basic generator to be used */#define SEED 7777777 /* Initial value for stream initialization */#define METHOD VSL_METHOD_DGAUSSIANMV_ICDFstatic MKL_INT TempRandom[N+2]; /* Vector of uniform variates to form defects */static double TempRandomD[N+2]; /* Vector of uniform variates to form defects */static float XX[P*N]; /* Input matrix of observations */static float X[N*P]; /* Transposed matrix of observations */static float S[P*P]; /* Covariance matrix of input data */static float mean[P]; /* Mean vector of input data */static MKL_INT Pattern[(P+1)*N]; /* Matrix with indicators */#define M 5 /* number of sets */#define MI_MP_N 8 /* mi_method_params_n */static float mi_method_params[MI_MP_N];#define MI_SV_N 20000 /* simulated_missing_values_n */static float simulated_missing_values[MI_SV_N];#define MI_ES_N 700 /* estimates_n */static float estimates[MI_ES_N];#define MI_IE_N (P + P*(P+1)/2) /* initial_estimates_n */static float initial_estimates[MI_IE_N];static float Xmean[P*M]; /* Mean vector */static float Raw2Mom[P*M]; /* raw 2 moment vector */static float Variance[P*M]; /* variance vector */static float Q[P]; /* vector of the means */static float Q1[P]; /* vector of the means */static float B[P]; /* "between variance" vector */static float U[P]; /* "within variance" vector */static float T[P]; /* "total variance" vector */#define RETURN_ON_ERROR \ if(errcode < 0) \ { \ printf("Error: %i\n", errcode); \ printf("\nTEST FAILED\n"); \ return errcode; \ }MKL_INT generate_input(MKL_INT n, MKL_INT p, float *XX, float *X, MKL_INT *Pattern, MKL_INT *CounterMissing);int main(){ MKL_INT n; /* number of rows(observation) */ MKL_INT p; /* number of columns (variables) */ MKL_INT i, j, k, I; MKL_INT mi_method_params_n = MI_MP_N, initial_estimates_n = 0, prior_n = 0; MKL_INT simulated_missing_values_n = 0, estimates_n = MI_ES_N; MKL_INT errcode,CounterMissing = 0,index; float *prior = 0, brng, seed, method; float em_iter_num, em_accuracy, missing_value_num; float W[4]={0.0,0.0,0,0}; MKL_INT TestStatus = 0, da_iter_num, copy_num; float TestResult = 0; VSLSSTaskPtr task = 0; MKL_INT storage_format_x; n = N; p = P; errcode = generate_input(n,p, XX, X, Pattern, &CounterMissing); RETURN_ON_ERROR; em_iter_num = 100; /* Number of iterations of EM algorithm */ da_iter_num = 10; /* Number of iterations of DA algorithm */ em_accuracy = 0.001; /* Accuracy of EM algorithm */ copy_num = M; /* Number of sets of imputed values */ missing_value_num = CounterMissing; /* Number of missing values */ brng = BRNG; /* Index of VSL generator to use in DA */ seed = SEED; /* Seed to initialize generator brng */ method = METHOD; /* method to generate gaussian variables */ mi_method_params[0] = em_iter_num; mi_method_params[1] = da_iter_num; mi_method_params[2] = em_accuracy; mi_method_params[3] = copy_num; mi_method_params[4] = missing_value_num; mi_method_params[5] = brng; mi_method_params[6] = seed; mi_method_params[7] = method; simulated_missing_values_n = CounterMissing*copy_num; estimates_n = copy_num * da_iter_num * p * (p+3) / 2; storage_format_x = VSL_SS_MATRIX_ROWS_STORAGE; errcode = vslsSSNewTask( &task, &p, &n, &storage_format_x, XX, 0, 0 ); RETURN_ON_ERROR; errcode = vslsSSEditMissingValues( task, &mi_method_params_n, mi_method_params, &initial_estimates_n, initial_estimates, &prior_n, prior, &simulated_missing_values_n, simulated_missing_values, &estimates_n, estimates ); RETURN_ON_ERROR; errcode = vslsSSCompute( task, VSL_SS_MISSING_VALUES, VSL_SS_MULTIPLE_IMPUTATION ); RETURN_ON_ERROR; /***** Printing results *****/ printf("\nResults of the missing values imputation algorithm\n"); printf("__________________________________________________\n"); printf("\nParameters:\n"); printf(" number of variables: p = %d\n",p); printf(" number of observations: n = %d\n",n); printf("\nTotal number of the missing values: %g\n",(float)CounterMissing ); printf("\n\nComputing means and variances after imputation\n" ); printf("of 5 sets:\n" ); index = 0; for(I = 0; I < M; I++) { /* Recover of the input matrix*/ for(i = 0; i < n; i++) { if(Pattern[i] != 0) { continue; } for(j = 0; j < p; j++) { if(Pattern[(j+1)*n+i] != 0) { continue; } XX[j*n+i]=simulated_missing_values[index]; index++; } } for(k = 0; k < 4; k++) { W[k] = 0; } errcode = vslsSSEditTask( task, VSL_SS_ACCUMULATED_WEIGHT, W ); RETURN_ON_ERROR; errcode = vslsSSEditMoments( task, Xmean + I*p, Raw2Mom + I*p, 0,0, Variance + I*p, 0, 0 ); RETURN_ON_ERROR; /* compute mean using fast method */ errcode = vslsSSCompute(task, VSL_SS_MEAN|VSL_SS_2CENTRAL_MOMENT, VSL_SS_FAST_METHOD ); RETURN_ON_ERROR; }//for(I = 0; I < M; I++) storage_format_x = VSL_SS_MATRIX_COLUMNS_STORAGE; I = M; errcode = vslsSSEditTask( task, VSL_SS_OBSERVATIONS, Xmean ); RETURN_ON_ERROR; I = M; errcode = vsliSSEditTask( task, VSL_SS_OBSERVATIONS_NUMBER, &I ); RETURN_ON_ERROR; errcode = vsliSSEditTask( task, VSL_SS_OBSERVATIONS_STORAGE_FORMAT, &storage_format_x ); RETURN_ON_ERROR; errcode = vslsSSEditMoments( task,Q, Raw2Mom, 0,0, B, 0, 0); RETURN_ON_ERROR; errcode = vslsSSCompute(task, VSL_SS_MEAN|VSL_SS_2CENTRAL_MOMENT, VSL_SS_FAST_METHOD ); RETURN_ON_ERROR; printf("\n Set: Mean:\n"); for(I = 0; I < M; I++) { printf(" %g ",(float)I+1); for(j = 0; j < p; j++) { printf("%+6f ",Xmean[I*p+j]); } printf("\n"); } printf("__________________________________________________\n"); printf("\nAverage "); for(j = 0; j < p; j++) { printf("%6f ",Q[j]); } printf("\n\n\n Set: Variance:\n"); for(I = 0; I < M; I++) { printf(" %g ",(float)I+1); for(j = 0; j < p; j++) { printf("%06f ",Variance[I*p+j]); } printf("\n"); } printf("__________________________________________________\n"); printf("\nAverage "); for(j = 0; j < p; j++) { Q1[j] = 0; for(I = 0; I < M; I++) { Q1[j] = Q1[j] + Variance[I*p+j]; } Q1[j] = Q1[j]/M; printf("%06f ",Q1[j]); } printf("\n"); printf("\nBetween-imputation variance:\n"); printf("\n "); for(j = 0; j < p; j++) { printf("%-6f ",B[j]); } printf("\n"); printf("\nWithin-imputation variance:\n"); printf("\n "); for(j = 0; j < p; j++) { U[j] = 0; for(I = 0; I < M; I++) { U[j] = U[j] + Variance[I*p+j]; } U[j] = U[j]/M/n; printf("%-6f ",U[j]); } printf("\n"); printf("\nTotal variance:\n"); printf("\n "); for(j = 0; j < p; j++) { for(I = 0; I < M; I++) { T[j] = U[j] + (1+1./M)*B[j]; } printf("%#6f ",T[j]); } printf("\n"); printf("\n95%% confidence interval:\n"); printf("\nright "); for(j = 0; j < p; j++) { TestResult = Q[j] + 2*sqrt(T[j]); printf("%+6f ",TestResult); if( TestResult > 2.27) { TestStatus = -1; } } printf("\n left "); for(j = 0; j < p; j++) { TestResult = Q[j] - 2*sqrt(T[j]); printf("%+6f ",TestResult); if( TestResult < -2.27) { TestStatus = -1; } } printf("\n"); errcode = vslSSDeleteTask( &task ); printf("\nTEST PASSED\n"); return 0;}MKL_INT generate_input(MKL_INT n, MKL_INT p, float *XX, float *X, MKL_INT *Pattern, MKL_INT *CounterMissing){ MKL_INT i, j, k, ii, pvt,MaxPatternLength = MAXPATTERNLENGHT,PatternLength,MissingValueNumber; MKL_INT errcode; MKL_INT CM = 0; float Ro = 1. / ( P * 2 ); float Epsilon = EPSIILON/100.; float NAN; long long iNAN = VSL_SS_SNAN; /* Following variables are used in Cholesky factorization subroutine */ char uplo; VSLStreamStatePtr stream_good; VSLStreamStatePtr stream_outl; NAN =*((float *)(&iNAN)); /* Definition of parameters (covariance matrix, means) for input data */ k = 0; for ( i = 0; i < p; i++) { mean[i] = 0.0; initial_estimates[i] = mean[i]; for( j = 0; j < i; j++ ) { S[i*p+j] = Ro; S[j*p+i] = Ro; initial_estimates[p+k] = S[i*p+j]; k++; } S[i*p+i] = 1.0; } /* Here start generation of observations matrix (multivarite Gaussian random numbers) */ uplo = 'U'; /* MKL Choelsky factorization routine call */ spotrf( &uplo, &p, S, &p, &errcode ); RETURN_ON_ERROR; /* Stream initialization */ errcode = vslNewStream( &stream_good, BRNG, SEED ); RETURN_ON_ERROR; errcode = vslNewStream( &stream_outl, BRNG, SEED ); RETURN_ON_ERROR; /* Generating random numbers from multivariate normal distribution */ errcode = vsRngGaussianMV( METHOD, stream_good, N, X, P, VSL_MATRIX_STORAGE_FULL, mean, S ); RETURN_ON_ERROR; for( ii = 0; ii < (p+1)*n; ii++ ) { Pattern[ii] = 1; } /* Integration of missing values into matrix of observations */ CM = 0; for( pvt = 0; pvt < PVT; pvt++) { /* Generating random numbers from uniform distribution to form missing values */ errcode = vdRngUniform(VSL_METHOD_DUNIFORM_STD, stream_outl, N + 2, TempRandomD, 0.0, (double)p ); RETURN_ON_ERROR; for( i = 0; i < N+2; i++ ) { TempRandom[i]=(MKL_INT)TempRandomD[i]; if(TempRandom[i]==p) TempRandom[i]--; } for( i=0; i < n - MaxPatternLength; i++) { if( TempRandom[i]/p < Epsilon ) { PatternLength = (TempRandom[i+1]*MaxPatternLength)/p; MissingValueNumber = TempRandom[i+2]; for( ii = 0; ii < PatternLength; ii++ ) { Pattern[i] = 0; if( Pattern[(MissingValueNumber+1)*n+i] == 0 )continue; Pattern[(MissingValueNumber+1)*n+i] = 0; X[i*p+MissingValueNumber] = NAN; i++; CM++; } } } } // * Stream deallocation * / errcode = vslDeleteStream( &stream_good ); RETURN_ON_ERROR; errcode = vslDeleteStream( &stream_outl ); RETURN_ON_ERROR; for( i=0; i < n; i++ ) { for( j=0; j < p; j++) XX[j*n+i] = X[i*p+j]; } CounterMissing[0]=CM; return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -