📄 svmlight.xs
字号:
#ifdef __cplusplusextern "C" {#endif#include "EXTERN.h"#include "perl.h"#include "XSUB.h"#ifdef __cplusplus}#endif/* Standard system headers: */#include <stdio.h>#include <string.h>#include <math.h>#include <stdlib.h>#include <time.h>#include <assert.h>/* Stuff from the SVM Light source: *//* #include "kernel.h" */#include "svm_common.h"#include "svm_learn.h"#define INITIAL_DOCS 8#define EXPANSION_FACTOR 2.0typedef struct { long num_features; long num_docs; long allocated_docs; DOC **docs; double *labels;} corpus;double ranking_callback(DOC **docs, double *rankvalue, long i, long j, LEARN_PARM *learn_parm) { dSP; SV *callback = (SV *) learn_parm->costfunccustom; int count; double result; /* Don't bother checking the type of 'callback' - it could be a CODE * reference or a string, and perl will throw its own error otherwise. */ ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(sv_2mortal(newSVnv(rankvalue[i]))); XPUSHs(sv_2mortal(newSVnv(rankvalue[j]))); XPUSHs(sv_2mortal(newSVnv(docs[i]->costfactor))); XPUSHs(sv_2mortal(newSVnv(docs[j]->costfactor))); PUTBACK ; count = call_sv(callback, G_SCALAR); SPAGAIN; if (count != 1) croak("Expected 1 return value from ranking callback, but got %d", count); result = POPn; PUTBACK; FREETMPS; LEAVE; return result;}SV **self_store(SV *self, void *ptr, const char *slot, int make_readonly) { HV *self_hash = (HV*) SvRV(self); SV **fetched = hv_fetch(self_hash, slot, strlen(slot), 1); if (fetched == NULL) croak("Couldn't create the %s slot in $self", slot); SvREADONLY_off(*fetched); sv_setiv(*fetched, (IV) ptr); if (make_readonly) SvREADONLY_on(*fetched); return fetched;}void *self_fetch(SV *self, const char *slot) { HV *self_hash = (HV*) SvRV(self); SV **fetched = hv_fetch(self_hash, slot, strlen(slot), 0); if (fetched == NULL) croak("Couldn't fetch the %s slot in $self", slot); return (void *) SvIV(*fetched);}/* Extract the '_corpus' structure out of $self */corpus *get_corpus (SV *self) { return (corpus *) self_fetch(self, "_corpus");}/* Convert a SV* containing an arrayref into an AV* */AV *unpack_aref(SV *input_rv, char *name) { if ( !SvROK(input_rv) || SvTYPE(SvRV(input_rv)) != SVt_PVAV ) { croak("Argument '%s' must be array reference", name); } return (AV*) SvRV(input_rv);}WORD *create_word_array(AV *indices_av, AV *values_av, int *num_words) { WORD *words; SV **fetched; int j, n; n = av_len(indices_av)+1; if (num_words) *num_words = n; /* Get the data from the two parallel arrays and put into *words array */ if (n != av_len(values_av)+1) croak("Different number of entries in indices & values arrays"); New(id, words, n+1, WORD); for (j=0; j<n; j++) { fetched = av_fetch(indices_av, j, 0); if (fetched == NULL) croak("Missing index for element number %d", j); words[j].wnum = SvIV(*fetched); if (words[j].wnum <= 0) croak("Feature indices must be positive integers"); if (j>0 && words[j-1].wnum >= words[j].wnum) croak("Feature indices must be in strictly increasing order"); fetched = av_fetch(values_av, j, 0); if (fetched == NULL) croak("Missing value for element number %d", j); words[j].weight = (FVAL) SvNV(*fetched); } words[n].wnum = 0; words[n].weight = 0.0; return words;}MODULE = Algorithm::SVMLight PACKAGE = Algorithm::SVMLightPROTOTYPES: ENABLEvoid_xs_init (SV *self)CODE:{ /* Initializes data structures that will be used in the lifetime of $self */ corpus *c; LEARN_PARM *learn_parm; KERNEL_PARM *kernel_parm; /* Initialize the corpus */ New(id, c, 1, corpus); c->num_features = 0; c->num_docs = 0; New(id, c->docs, INITIAL_DOCS, DOC*); New(id, c->labels, INITIAL_DOCS, double); c->allocated_docs = INITIAL_DOCS; self_store(self, c, "_corpus", 1); New(id, learn_parm, 1, LEARN_PARM); New(id, kernel_parm, 1, KERNEL_PARM); self_store(self, learn_parm, "_learn_parm", 1); self_store(self, kernel_parm, "_kernel_parm", 1); set_learning_defaults(learn_parm, kernel_parm); check_learning_parms(learn_parm, kernel_parm);}voidranking_callback (SV *self, SV *callback)CODE:{ LEARN_PARM *learn_parm = (LEARN_PARM*) self_fetch(self, "_learn_parm"); learn_parm->costfunc = &ranking_callback; learn_parm->costfunccustom = newSVsv(callback);}voidadd_instance_i (SV *self, double label, char *name, SV *indices, SV *values, long query_id = 0, long slack_id = 0, double cost_factor = 1.0)CODE:{ corpus *c = get_corpus(self); AV *indices_av = unpack_aref(indices, "indices"); AV *values_av = unpack_aref( values, "values"); WORD *words; int num_words; words = create_word_array(indices_av, values_av, &num_words); if (words[num_words-1].wnum > c->num_features) c->num_features = words[num_words-1].wnum; /* Check whether we need to allocate more slots for documents */ if (c->num_docs >= c->allocated_docs) { c->allocated_docs *= EXPANSION_FACTOR; Renew(c->docs, c->allocated_docs, DOC*); if (!(c->docs)) croak("Couldn't allocate more array space for documents"); Renew(c->labels, c->allocated_docs, double); if (!(c->labels)) croak("Couldn't allocate more array space for document labels"); } c->labels[ c->num_docs ] = label; c->docs[ c->num_docs ] = create_example( c->num_docs, query_id, slack_id, cost_factor, create_svector(words, name, 1.0) ); c->num_docs++;}voidread_instances (SV *self, char *docfile)CODE:{ corpus *c = get_corpus(self); Safefree(c->docs); Safefree(c->labels); read_documents(docfile, &(c->docs), &(c->labels), &(c->num_features), &(c->num_docs));}voidtrain (SV *self)CODE:{ corpus *c = get_corpus(self); MODEL *model; double *alpha_in=NULL; KERNEL_CACHE *kernel_cache; KERNEL_PARM *kernel_parm = (KERNEL_PARM*) self_fetch(self, "_kernel_parm"); LEARN_PARM *learn_parm = (LEARN_PARM*) self_fetch(self, "_learn_parm"); /* XXX may need to "restart" alpha_in */ if(kernel_parm->kernel_type == LINEAR) { /* don't need the cache */ kernel_cache=NULL; } else { kernel_cache=kernel_cache_init(c->num_docs, learn_parm->kernel_cache_size); } New(id, model, 1, MODEL); switch (learn_parm->type) { case CLASSIFICATION: svm_learn_classification(c->docs, c->labels, c->num_docs, c->num_features, learn_parm, kernel_parm,kernel_cache,model,alpha_in); break; case REGRESSION: svm_learn_regression(c->docs, c->labels, c->num_docs, c->num_features, learn_parm, kernel_parm,&kernel_cache,model); break; case RANKING: svm_learn_ranking(c->docs, c->labels, c->num_docs, c->num_features, learn_parm, kernel_parm,&kernel_cache,model); break; case OPTIMIZATION: svm_learn_optimization(c->docs, c->labels, c->num_docs, c->num_features, learn_parm, kernel_parm,kernel_cache,model,alpha_in); break; default: croak("Unknown learning type '%d'", learn_parm->type); } if (model->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector */ add_weight_vector_to_linear_model(model); } /* free(alpha_in); */ if(kernel_cache) { /* Free the memory used for the cache. */ kernel_cache_cleanup(kernel_cache); } /* Since the model contains references to the training documents, we * can't free up space by freeing the documents. We could revisit * this in a future release if memory becomes tight. */ self_store(self, model, "_model", 1);}doublepredict_i(SV *self, SV *indices, SV *values)CODE:{ MODEL *model = (MODEL*) self_fetch(self, "_model"); AV *indices_av = unpack_aref(indices, "indices"); AV *values_av = unpack_aref( values, "values"); WORD *words = create_word_array(indices_av, values_av, NULL); DOC *d = create_example(-1, 0, 0, 0.0, create_svector(words, "", 1.0)); double dist = (model->kernel_parm.kernel_type == 0 ? classify_example_linear(model, d) : classify_example(model, d)); RETVAL = dist;}OUTPUT: RETVALvoid_write_model(SV *self, char *modelfile)CODE:{ MODEL *m = (MODEL*) self_fetch(self, "_model"); write_model(modelfile, m);}SV*get_linear_weights(SV *self)CODE:{ MODEL *m; int i; AV *result; if (!hv_exists((HV*) SvRV(self), "_model", 6)) croak("Model has not yet been trained"); m = (MODEL*) self_fetch(self, "_model"); if (m->kernel_parm.kernel_type != 0) croak("Kernel type is not linear"); result = newAV(); av_push(result, newSVnv(m->b)); for (i=1; i<m->totwords+1; i++) { av_push(result, newSVnv(m->lin_weights[i])); } RETVAL = newRV_noinc((SV*) result);}OUTPUT: RETVALvoid_read_model(SV *self, char *modelfile)CODE:{ MODEL *m = read_model(modelfile); corpus *c = get_corpus(self); if (m->kernel_parm.kernel_type == 0) { /* linear kernel */ /* compute weight vector - it's not stored in the model file */ add_weight_vector_to_linear_model(m); } self_store(self, m, "_model", 1); /* Fetch a little training info from the model struct */ c->num_docs = m->totdoc; c->num_features = m->totwords; /* No actual documents are stored in our corpus - free the memory now so DESTROY doesn't get confused later. */ Safefree(c->docs); c->docs = NULL;}intnum_features (SV *self)CODE: RETVAL = (get_corpus(self))->num_features;OUTPUT: RETVALintnum_instances (SV *self)CODE: RETVAL = (get_corpus(self))->num_docs;OUTPUT: RETVALvoidDESTROY(SV *self)CODE:{ corpus *c = get_corpus(self); MODEL *m; int i; HV *self_hash = (HV*) SvRV(self); if (hv_exists(self_hash, "_model", strlen("_model"))) { m = (MODEL*) self_fetch(self, "_model"); free_model(m, 0); } if (c->docs != NULL) { for(i=0;i<c->num_docs;i++) free_example(c->docs[i],1); Safefree(c->docs); } Safefree(c->labels); Safefree(c);}INCLUDE: param_set.c
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -