📄 svm_jni.c
字号:
learn_parm->svm_iter_to_shrink=100; } if((learn_parm->skip_final_opt_check) && (kernel_parm->kernel_type == LINEAR)) { printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n"); learn_parm->skip_final_opt_check=0; } if((learn_parm->skip_final_opt_check) && (learn_parm->remove_inconsistent)) { printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if((learn_parm->svm_maxqpsize<2)) { printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) { printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->svm_iter_to_shrink<1) { printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->svm_c<0) { printf("\nThe C parameter must be greater than zero!\n\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->transduction_posratio>1) { printf("\nThe fraction of unlabeled examples to classify as positives must\n"); printf("be less than 1.0 !!!\n\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->svm_costratio<=0) { printf("\nThe COSTRATIO parameter must be greater than zero!\n\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->epsilon_crit<=0) { printf("\nThe epsilon parameter must be greater than zero!\n\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if(learn_parm->rho<0) { printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n"); printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n"); printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); } if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) { printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n"); printf("for switching to the conventional xa/estimates described in T. Joachims,\n"); printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n"); fflush(stdout); printf("\n\nPress Return for help\n\n"); fflush(stdout); wait_any_key(); print_help(); exit(0); }}JNIEXPORT jdouble JNICALL Java_jnisvmlight_SVMLightInterface_classifyNative(JNIEnv * env, jobject jo1, jobject testdoc) { DOC *doc; /* test example */
WORD *words;
SVECTOR *sv;
long j,wpos,queryid,slackid,max_words_doc; double dist,costfactor; jclass classid = (*env)->GetObjectClass(env, testdoc); jintArray dim = (*env)->GetObjectField(env, testdoc, (*env)->GetFieldID(env, classid, "m_dims", "[I")); jdoubleArray val = (*env)->GetObjectField(env, testdoc, (*env)->GetFieldID(env, classid, "m_vals", "[D")); jsize dimLen = (*env)->GetArrayLength(env, dim); jsize valLen = (*env)->GetArrayLength(env, val); jint *dimEl = (*env)->GetIntArrayElements(env, dim, 0); jdouble *valEl = (*env)->GetDoubleArrayElements(env, val, 0); int* ds; double *vs; if (sizeof(int) == sizeof(jint)) { ds = (int*) dimEl; } else { int fi=0; printf("!!!!!!!!!!!!!!! Warning: java datatype \"jint\" isn't of the same size as C datatype \"int\"\n"); ds = (int*) my_malloc(sizeof(int)*dimLen); for(fi=0;fi<dimLen;fi++) { ds[fi] = (int) dimEl[fi]; } } if (sizeof(double) == sizeof(jdouble)) { vs = (double*) valEl; } else { int fi=0; printf("!!!!!!!!!!!!!!! Warning: Java-Datatype (jdouble) isn't of the same size as C datatype"); vs = (double*) my_malloc(sizeof(double)*valLen); for(fi=0;fi<valLen;fi++) { vs[fi] = (double) valEl[fi]; } } //fprintf("totalwords: %ld \n",(int)dimLen); max_words_doc=dimLen; words = (WORD *)my_malloc(sizeof(WORD)*(dimLen+10)); jparse_document(words,&queryid,&slackid,&costfactor,&wpos,max_words_doc,ds,vs); (*env)->ReleaseIntArrayElements(env,dim,dimEl,0); (*env)->ReleaseDoubleArrayElements(env,val,valEl,0); if(_model->kernel_parm.kernel_type == 0) { /* linear kernel */
for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */
if((words[j]).wnum>_model->totwords) /* are not larger than in */
(words[j]).wnum=0; /* model. Remove feature if */
} /* necessary. */
sv = create_svector2(words,1.0);
doc = create_example(-1,0,0,0.0,sv);
dist=classify_example_linear(_model,doc);
} else { /* non-linear kernel */
sv = create_svector2(words,1.0);
doc = create_example(-1,0,0,0.0,sv);
dist=classify_example(_model,doc);
}
free(words); free(doc);
return (jdouble)dist;
}SVECTOR *create_svector2(WORD *words, double factor)
{
SVECTOR *vec;
long fnum,i;
fnum=0;
while(words[fnum].wnum) {
fnum++;
}
fnum++;
vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
for(i=0;i<fnum;i++) {
vec->words[i]=words[i];
}
vec->twonorm_sq=sprod_ss(vec,vec);
vec->kernel_id=0;
vec->next=NULL;
vec->factor=factor;
return(vec);
}JNIEXPORT jobject JNICALL Java_jnisvmlight_SVMLightInterface_trainmodel(JNIEnv * env, jobject obj, jobjectArray tdata, jobject tparm){ DOC **docs; /* training examples */ long i; long* totdoc = (long*) my_malloc(sizeof(long)); long* totwords = (long*) my_malloc(sizeof(long)); long* ndocuments = (long*) my_malloc(sizeof(long)); double *target=NULL; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; LEARN_PARM learn_parm; KERNEL_PARM kernel_parm; _model=(MODEL *)my_malloc(sizeof(MODEL)); // --------------------- init stuff ---------------------------- JavaParamIDs *JIDs = GetJParamIDs(env, &tdata); JTrainParams* targs = GetJTrainParamIDs(env,&tparm); SVMparmInit(kernel_cache,&learn_parm,&kernel_parm,_model,targs); if(verbosity>=1) { printf("\n --- Native C function: scanning examples, now .. (JNI Interface)\n"); fflush(stdout); } // --------------------- create DOCs --------------------------- // allocate memory for all training documents createDOCs(env,JIDs,&tdata,&docs,&target,totwords,totdoc,ndocuments); if(verbosity>=1) printf(" --- Native C function: documents allocated successully.\n"); fflush(stdout); learn_parm.totwords = *totwords; // --------------------- create kernel ------------------------- FILE * dump = NULL; long int z = 0; long int y = 0; if (verbosity>10) { if ((dump = fopen("jni-traindump.dat","w")) == NULL) { perror("Writing to \"traindump.txt\" doesn't work!\n"); exit(1); } printf("\n|||||||||||||||||||||||||||||||||| dumping ..\n"); fprintf(dump,"totaldocuments: %ld \n",*totdoc); while(z<(*totdoc)) { fprintf(dump,"(%ld) (QID: %ld) (CF: %.16g) (SID: %ld) ",docs[z]->docnum,docs[z]->queryid,docs[z]->costfactor,docs[z]->slackid); SVECTOR *v = docs[z]->fvec; fprintf(dump,"(NORM:%.32g) (UD:%s) (KID:%ld) (VL:%p) (F:%.32g) %.32g ",v->twonorm_sq,(v->userdefined == NULL ? "" : v->userdefined),v->kernel_id,v->next,v->factor,target[z]); if (v != NULL && v->words != NULL) { while ((v->words[y]).wnum) { fprintf(dump,"%ld:%.32g ",(v->words[y]).wnum, (v->words[y]).weight); y++; } } else fprintf(dump, "NULL WORTE\n"); fprintf(dump,"\n"); y=0; z++; } fprintf(dump,"---------------------------------------------------\n"); fprintf(dump,"kernel_type: %ld\n",kernel_parm.kernel_type); fprintf(dump,"poly_degree: %ld\n",kernel_parm.poly_degree); fprintf(dump,"rbf_gamma: %.32g\n",kernel_parm.rbf_gamma); fprintf(dump,"coef_lin: %.32g\n",kernel_parm.coef_lin); fprintf(dump,"coef_const: %.32g\n",kernel_parm.coef_const); fprintf(dump,"custom: %s\n",kernel_parm.custom); fprintf(dump,"type: %ld\n",learn_parm.type); fprintf(dump,"svm_c: %.32g\n",learn_parm.svm_c); fprintf(dump,"eps: %.32g\n",learn_parm.eps); fprintf(dump,"svm_costratio: %.32g\n",learn_parm.svm_costratio); fprintf(dump,"transduction_posratio: %.32g\n",learn_parm.transduction_posratio); fprintf(dump,"biased_hyperplane: %ld\n",learn_parm.biased_hyperplane); fprintf(dump,"svm_maxqpsize: %ld\n",learn_parm.svm_maxqpsize); fprintf(dump,"svm_newvarsinqp: %ld\n",learn_parm.svm_newvarsinqp); fprintf(dump,"epsilon_crit: %.32g\n",learn_parm.epsilon_crit); fprintf(dump,"epsilon_shrink: %.32g\n",learn_parm.epsilon_shrink); fprintf(dump,"svm_iter_to_shrink: %ld\n",learn_parm.svm_iter_to_shrink); fprintf(dump,"remove_inconsistent: %ld\n",learn_parm.remove_inconsistent); fprintf(dump,"skip_final_opt_check: %ld\n",learn_parm.skip_final_opt_check); fprintf(dump,"compute_loo: %ld\n",learn_parm.compute_loo); fprintf(dump,"rho: %.32g\n",learn_parm.rho); fprintf(dump,"xa_depth: %ld\n",learn_parm.xa_depth); fprintf(dump,"predfile: %s\n",learn_parm.predfile); fprintf(dump,"alphafile: %s\n",learn_parm.alphafile); fprintf(dump,"epsilon_const: %.32g\n",learn_parm.epsilon_const); fprintf(dump,"epsilon_a: %.32g\n",learn_parm.epsilon_a); fprintf(dump,"opt_precision: %.32g\n",learn_parm.opt_precision); fprintf(dump,"svm_c_steps: %ld\n",learn_parm.svm_c_steps); fprintf(dump,"svm_c_factor: %.32g\n",learn_parm.svm_c_factor); fprintf(dump,"svm_costratio_unlab: %.32g\n",learn_parm.svm_costratio_unlab); fprintf(dump,"svm_unlabbound: %.32g\n",learn_parm.svm_unlabbound); } if (*ndocuments > 0) { if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { /* Always get a new kernel cache. It is not possible to use the same cache for two different training runs */ kernel_cache=kernel_cache_init(*totdoc,learn_parm.kernel_cache_size); } if(verbosity>=1) printf(" --- Native C function: engaging the training process.\n"); fflush(stdout); if(learn_parm.type == CLASSIFICATION) { svm_learn_classification(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,kernel_cache,_model,alpha_in); } else if(learn_parm.type == REGRESSION) { svm_learn_regression(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model); } else if(learn_parm.type == RANKING) { svm_learn_ranking(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model); } else if(learn_parm.type == OPTIMIZATION) { svm_learn_ranking(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model); } if(verbosity>=1) printf(" --- Native C function: training has been done.\n"); fflush(stdout); if(_model->kernel_parm.kernel_type == 0) { /* linear kernel */
/* compute weight vector */
add_weight_vector_to_linear_model(_model);
}
} else { _model->supvec = (DOC **)my_malloc(sizeof(DOC *)*2); _model->alpha = (double *)my_malloc(sizeof(double)*2); _model->index = (long *)my_malloc(sizeof(long)*2); _model->at_upper_bound=0; _model->b=0; _model->supvec[0]=0; /* element 0 reserved and empty for now */ _model->alpha[0]=0; _model->lin_weights=NULL; _model->totwords=0; _model->totdoc=0; _model->kernel_parm=(kernel_parm); _model->sv_num=1; _model->loo_error=-1; _model->loo_recall=-1; _model->loo_precision=-1; _model->xa_error=-1; _model->xa_recall=-1; _model->xa_precision=-1; } if (verbosity>10) { fprintf(dump,"totwords: %ld\n",learn_parm.totwords); printf("|||||||||||||||||||||||||||||||||| z: %ld, totdoc: %ld\n",z,*totdoc); } // ---------------------- build the model ----------------------- if (verbosity>10) write_model("model-jnisvmlib.dat",_model); // baue C-Struktur des SVMLight-Models in Java-Objekt um. if(verbosity>=1) printf(" --- Native C function: creating Java return type.\n"); fflush(stdout); jobject ret = buildModelData(env,obj,_model,JIDs); if(verbosity>=1) printf(" --- Native C function: creating Java object has been done.\n"); fflush(stdout); // Uncomment the following when using Java-side classification only! // For native classification we need to remember all model-related parameters. //free(alpha_in); //free_model(_model,0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -