📄 svm_struct_api.c
字号:
/***********************************************************************//* *//* svm_struct_api.c *//* *//* Definition of API for attaching implementing SVM learning of *//* structures (e.g. parsing, multi-label classification, HMM) */ /* *//* Author: Thorsten Joachims *//* Date: 03.07.04 *//* *//* Copyright (c) 2004 Thorsten Joachims - All rights reserved *//* *//* This software is available for non-commercial use only. It must *//* not be modified and distributed without prior permission of the *//* author. The author is not responsible for implications from the *//* use of this software. *//* *//***********************************************************************//* If the Numeric module has been loaded, that means SVM^python doesn't have to copy over the model arrays, saving time and memory. It also makes the Scipy module easier to use! */#include <Python.h>#ifdef NUMARRAY#include <numarray/libnumarray.h>#include <numarray/arrayobject.h>#elif defined NUMERIC#include <Numeric/arrayobject.h>#endif /* NUMERIC and NUMARRAY */#include <stdio.h>#include <string.h>#include "svm_struct/svm_struct_common.h"#include "svm_struct_api.h"/* This is the module that the user functions and other declarations come from. */PyObject *pModule;/* In the C code, the array sm.w has meaningful values starting at index 1 (so index 0 was ignored). Further, in SVM^light's sparse WORD arrays, the first meaningful .wnum index was 1. I found this very annoying for my applications, as my data structures were just based on regular arrays indexed from 0 so I was constantly adding (or forgetting to add) 1 here and there. The default is to have pStartingIndex=1. In this case everything is considered to be indexed from 1, and the Python glue code does the translation between the two transparently. The Python code behaves like the C code (that is, sm.w[0] is useless, and the 0 entry of the vector returned by psi is invalid). If you want things to index from 0, define in the PYTHON_PARAM dictionary a string 'index_from_one' that maps to False. If this variable is not defined or evaluates to True, then the default behavior is assumed. */int pStartingIndex;#define PYTHON_PARAM "svmpython_parameters"#define PYTHON_PARAM_INDEX_FROM_ONE "index_from_one"/* This is the default module name in case the --m <module> option is not specified on the command line. */#define PYTHON_MODULE_NAME "svmstruct"/* The following functions are required. The program will exit in the event that they are not implemented. It's worth noting, however, that if a function is not called by the program, its absence will not be noticed, e.g., if init_model (only required during learning) is absent during classification, the program will not care. */#define PYTHON_READ_EXAMPLES "read_struct_examples"#define PYTHON_INIT_MODEL "init_struct_model"#define PYTHON_CLASSIFY_EXAMPLE "classify_struct_example"#define PYTHON_PSI "psi"/* The following functions are technically not required because some default behavior is defined, but you probably will want to implement them anyway since the default behavior will not be generally acceptable. If they are not implemented a warning message will be output. */#define PYTHON_FIND_MOST_VIOLATED "find_most_violated_constraint"#define PYTHON_LOSS "loss"#define PYTHON_PARSE_ARGUMENTS "parse_struct_parameters"/* The following functions are not required, and if not present in the python module some default behavior will be performed. Unlike the above unrequired functions, it is quite reasonable to rely on the default behavior of these functions in many cases, and so if not implemented a warning message will not be printed out. */#define PYTHON_INIT_CONSTRAINTS "init_struct_constraints"#define PYTHON_PRINT_LEARNING_STATS "print_struct_learning_stats"#define PYTHON_PRINT_TESTING_STATS "print_struct_testing_stats"#define PYTHON_EVAL_PREDICTION "eval_prediction"#define PYTHON_WRITE_MODEL "write_struct_model"#define PYTHON_READ_MODEL "read_struct_model"#define PYTHON_WRITE_LABEL "write_label"#define PYTHON_PRINT_HELP "print_struct_help"#define PYTHON_CALL(RET,FUN,ARG) /*if (PyErr_Occurred()) { PyErr_Print(); }*/ PyErr_Clear(); RET = PyObject_CallObject(FUN,ARG); if (PyErr_Occurred()) { PyErr_Print(); Py_Exit(1); }/************* PYTHON SVM MODULE DEFINITION *********/static PyObject* emb_classify_example(PyObject *self, PyObject *args);static PyObject* emb_create_svector(PyObject *self, PyObject *args);static PyObject* emb_create_doc(PyObject *self, PyObject *args);static PyObject* emb_kernel(PyObject *self, PyObject *args);static PyMethodDef EmbMethods[] = { {"classify_example", emb_classify_example, 2, "Classify a feature vector with the model's kernel function."}, {"create_doc", emb_create_doc, METH_VARARGS, "Create a Python document object."}, {"create_svector", emb_create_svector, METH_VARARGS, "Create a Python support vector object."}, {"kernel", emb_kernel, 3, "Evaluate a kernel function on two feature vectors."}, {NULL, NULL, 0, NULL}};/************* PYTHON EMBEDDED INITIALIZATION/FINALIZATION *********/void api_initialize(char * name) { /* This is called before anything else in the API, allowing whatever initialization is required. */ Py_SetProgramName(name); Py_Initialize();#ifdef NUMARRAY import_libnumarray(); import_libnumeric();#elif defined NUMERIC import_array();#endif Py_InitModule("svmlight", EmbMethods); /* Create the blank object type. */ if (PyRun_SimpleString("class SVMBlank(object):\n\tpass\n")) { fprintf(stderr, "Could not define SVMBlank type!\n"); Py_Exit(1); }}void api_load_module(const char *module_name) { /* This is typically called soon-ish after api_initialize, so the module with the user functions can be imported. Only the first call to this function is effective. If module_name==NULL, a default name is assumed. */ PyObject *pDict, *pValue, *pName, *pParam; static int alreadyCalled = 0; if (alreadyCalled) return; alreadyCalled = 1; if (module_name==NULL) module_name = PYTHON_MODULE_NAME; /* Load the module! */ pName = PyString_FromString(module_name); pModule = PyImport_Import(pName); Py_DECREF(pName); if (pModule == NULL) { fprintf(stderr, "Could not load module %s!\n", module_name); fprintf(stderr, "Is your PYTHONPATH environment variable set properly?\n"); Py_Exit(1); } else { //printf("Loaded module %s!\n", module_name); } /* Detect parameters. */ pDict = PyModule_GetDict(pModule); pValue = pParam = NULL; pStartingIndex = 1; if (!PyMapping_HasKeyString(pDict,PYTHON_PARAM)) return; pParam = PyMapping_GetItemString(pDict,PYTHON_PARAM); /* Check if we should index from one. */ if (PyMapping_HasKeyString(pParam, PYTHON_PARAM_INDEX_FROM_ONE)) { pValue = PyMapping_GetItemString(pParam,PYTHON_PARAM_INDEX_FROM_ONE); pStartingIndex = (pValue && PyObject_IsTrue(pValue)) ? 1 : 0; Py_XDECREF(pValue); } Py_XDECREF(pParam);}void api_finalize() { /* This is called after everything else in the API, allowing whatever cleanup is required. */ if (PyErr_Occurred()) { PyErr_Print(); Py_Exit(1); } Py_Finalize();}/************ PYTHON EMBEDDED CONVENIENCE FUNCTIONS **********/inline int pythonSetI(PyObject *obj, char*attr_name, long i) { PyObject *integer = PyInt_FromLong(i); int retval = PyObject_SetAttrString(obj, attr_name, integer); Py_DECREF(integer); return retval;}inline int pythonSetF(PyObject *obj, char*attr_name, double d) { PyObject *number = PyFloat_FromDouble(d); int retval = PyObject_SetAttrString(obj, attr_name, number); Py_DECREF(number); return retval;}inline int pythonSetS(PyObject *obj, char*attr_name, char *s) { PyObject *string = PyString_FromString(s); int retval = PyObject_SetAttrString(obj, attr_name, string); Py_DECREF(string); return retval;}inline int pythonGetI(PyObject *obj, char*attr_name, long *i) { PyObject *value = PyObject_GetAttrString(obj, attr_name); if (!value || !PyNumber_Check(value)) return 0; *i = PyInt_AsLong(value); Py_DECREF(value); return 1;}inline int pythonGetF(PyObject *obj, char*attr_name, double *d) { PyObject *value = PyObject_GetAttrString(obj, attr_name); if (!value || !PyNumber_Check(value)) return 0; *d = PyFloat_AsDouble(value); Py_DECREF(value); return 1;}inline int pythonGetS(PyObject *obj, char*attr_name, char *s, int maxlen) { char *str; PyObject *value2 = PyObject_GetAttrString(obj, attr_name), *value; if (!value2) return 0; // No check...Anything can be represented as a string. value = PyObject_Str(value2); Py_DECREF(value2); str = PyString_AsString(value); if (maxlen<0) strcpy(s, str); // Unsafe, but convenient! Yay! else strncpy(s, str, maxlen); Py_DECREF(value); return 1;}/* Acts like SetItemString but with reference stealing for a shortcut! */inline int pythonMap(PyObject*mapping, char*key, PyObject*value) { if (PyMapping_HasKeyString(mapping, key)) PyMapping_DelItemString(mapping, key); int result = PyMapping_SetItemString(mapping, key, value); Py_DECREF(value); return result;}/* This creates a totally blank instance with absolutely no data. */PyObject* pythonCreateBlank() { PyObject *obj; PyRun_SimpleString("_svmblank = SVMBlank()"); obj = PyMapping_GetItemString(PyModule_GetDict(PyImport_AddModule ("__main__")), "_svmblank"); if (obj != NULL) { //PyObject_Print(obj, stdout, 0); printf("\n"); } else { fprintf(stderr, "Could not create blank object\n"); Py_Exit(1); } /* Without the following, the reference would stick around until we created another blank item. That isn't so terrible, but why wait? */ PyMapping_DelItemString(PyModule_GetDict(PyImport_AddModule ("__main__")), "_svmblank"); return obj;}PyObject* sampleToPythonObject(SAMPLE sample) { PyObject *pExamples; int i; /* Build the example list. */ pExamples = PyList_New(sample.n); for (i=0; i<sample.n; ++i) { PyList_SET_ITEM(pExamples, i, Py_BuildValue ("(OO)", sample.examples[i].x.py_pattern, sample.examples[i].y.py_label)); } return pExamples;}/* Given a support vector, return a python representation of the support vector. Consider the returned object as a sequence which we shall call sv. In the interpreter, len(sv) would equal the number of features in the support vector. sv[i][0] would equal the feature number, and sv[i][1] the feature value. */PyObject* svToPythonObject(SVECTOR *sv) { PyObject *py_words, *py_sv, *py_list; int num_features = 0, num_svs = 0; SVECTOR *temp_sv; /* Create the empty list. */ for (temp_sv = sv; temp_sv; temp_sv = temp_sv->next) num_svs++; py_list = PyTuple_New(num_svs); num_svs = 0; //printf("Adding stuff.\n"); while (sv) { int i=0; WORD *temp = sv->words; //printf("Adding more stuff.\n"); /* Create the empty object. */ py_sv = pythonCreateBlank(); //Py_DECREF(py_sv); //printf("%d\n", py_sv->ob_refcnt); /* Count the number of features. */ num_features = 0; while ((temp++)->wnum) ++num_features; /* Create the tuple. */ py_words = PyTuple_New(num_features); for (temp=sv->words; temp->wnum; ++temp) { PyObject *temp_tuple = PyTuple_New(2); PyTuple_SET_ITEM(temp_tuple, 0, PyInt_FromLong (temp->wnum-1+pStartingIndex)); PyTuple_SET_ITEM(temp_tuple, 1, PyFloat_FromDouble(temp->weight)); PyTuple_SET_ITEM(py_words,i++,temp_tuple); } PyObject_SetAttrString(py_sv, "words", py_words); Py_DECREF(py_words); /* Copy over other information. */ pythonSetI(py_sv, "kernel_id", sv->kernel_id); pythonSetF(py_sv, "factor", sv->factor); pythonSetS(py_sv, "userdefined", sv->userdefined); /* Add it to the list and move on. */ PyTuple_SET_ITEM(py_list, num_svs++, py_sv); sv = sv->next; } /* Return the list. */ return py_list;}SVECTOR* pythonObjectToSingleSV(PyObject *py_sv) { SVECTOR *sv; PyObject *pTemp, *py_words; WORD *words; char isNumbers=0; int n, i; /* Stuff for the thing. */ char *userdefined = ""; double factor = 1.0; long kernel_id = 0; if (!PyObject_HasAttrString(py_sv, "words")) { /* Well, we should at LEAST have that. Jeez... */ return NULL; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -