📄 svm_struct_api.c
字号:
} if (isNumbers) { /* This is a list of numbers. */ PyObject *new_words = PyTuple_New(n-pStartingIndex); for (i=pStartingIndex; i<n; ++i) { PyObject *tuple = PyTuple_New(2); PyTuple_SET_ITEM(tuple, 0, PyInt_FromLong(i)); PyTuple_SET_ITEM(tuple, 1, PySequence_GetItem(words, i)); PyTuple_SET_ITEM(new_words, i-pStartingIndex, tuple); } PyObject_SetAttrString(sv, "words", new_words); Py_DECREF(new_words); } else { /* This is a list of sequences, I guess. */ PyObject_SetAttrString(sv, "words", words); } pythonSetS(sv, "userdefined", userdefined); pythonSetF(sv, "factor", factor); pythonSetI(sv, "kernel_id", kernel_id); return sv;}static PyObject* emb_create_doc(PyObject *self, PyObject *args) { PyObject *py_svec, *py_doc; long docnum=0xdeadbeef, slackid=0xdeadbeef, num_args; double costfactor=1.0; if (!PyArg_ParseTuple(args,"O|dll:create_doc", &py_svec,&costfactor,&slackid,&docnum)) { return NULL; } //PyObject_Print(py_svec, stdout, 0); printf("\n"); num_args = PySequence_Size(args); py_doc = pythonCreateBlank(); /* Set the support vector, and cost factor. */ if (PySequence_Check(py_svec)) { PyObject_SetAttrString(py_doc, "fvec", py_svec); } else { PyObject *temp = PyTuple_New(1); Py_INCREF(py_svec); // The reference is stolen, so we must increment. PyTuple_SET_ITEM(temp, 0, py_svec); PyObject_SetAttrString(py_doc, "fvec", temp); } pythonSetF(py_doc, "costfactor", costfactor); /* Set slack ID and perhaps even document number. */ if (num_args >= 3) { pythonSetI(py_doc, "slackid", slackid); } else { PyObject_SetAttrString(py_doc, "slackid", Py_None); } if (num_args == 4) { pythonSetI(py_doc, "docnum", docnum); } else { PyObject_SetAttrString(py_doc, "docnum", Py_None); } return py_doc;}static PyObject* emb_kernel(PyObject *self, PyObject *args) { PyObject *kp, *a, *b; DOC da, db; double result; /* Initialize the default kernel parameter. */ KERNEL_PARM kernel_parm = {0, 3, 1.0, 1.0, 1.0, "empty"}; kernel_parm.kernel_type = 0; kernel_parm.poly_degree = 3; kernel_parm.rbf_gamma = 1.0; kernel_parm.coef_lin = 1.0; kernel_parm.coef_const = 1.0; strcpy(kernel_parm.custom, "empty"); if (!PyArg_ParseTuple(args, "OOO", &kp, &a, &b)) { return NULL; } /* Extract the support vectors. */ da.fvec = pythonObjectToSV(a); if (!da.fvec) { fprintf(stderr, "First document does not appear to be support vector!\n"); Py_Exit(1); } db.fvec = pythonObjectToSV(b); if (!db.fvec) { fprintf(stderr, "Second document does not appear to be support vector!\n"); free_svector(da.fvec); Py_Exit(1); } /* Copy over the kernel parameters, if possible. */ pythonGetI(kp, "kernel_type", &kernel_parm.kernel_type); pythonGetI(kp, "poly_degree", &kernel_parm.poly_degree); pythonGetF(kp, "rbf_gamma", &kernel_parm.rbf_gamma); pythonGetF(kp, "coef_lin", &kernel_parm.coef_lin); pythonGetF(kp, "coef_const", &kernel_parm.coef_const); pythonGetS(kp, "custom", kernel_parm.custom, sizeof(kernel_parm.custom)); /* Call that function! */ result = kernel(&kernel_parm, &da, &db); /* Get rid of the allocated support vectors. */ free_svector(da.fvec); free_svector(db.fvec); /* Return the result. */ return PyFloat_FromDouble(result);}/************ SVM STRUCT FUNCTIONS **********/SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm){ /* Reads struct examples and returns them in sample. The number of examples must be written into sample.n */ SAMPLE sample; /* sample */ EXAMPLE *examples; long i; PyObject *pDict, *pFunc, *pArgs, *pValue; /* Call the Python function read_examples in the modules. */ pDict = PyModule_GetDict(pModule); pFunc = PyDict_GetItemString(pDict, PYTHON_READ_EXAMPLES); if (pFunc == NULL) { fprintf(stderr, "Could not find function %s!\n", PYTHON_READ_EXAMPLES); Py_Exit(1); } pArgs = PyTuple_New(2); /* This is a new instance! */ pValue = PyString_FromString(file); PyTuple_SetItem(pArgs, 0, pValue); PyTuple_SetItem(pArgs, 1, sparmToPythonObject(sparm)); /* Call the embedded python function!! */ PYTHON_CALL(pValue, pFunc, pArgs); /* Make sure that it's a sequence. */ Py_DECREF(pArgs); if (pValue == NULL) { fprintf(stderr, "%s function failed!\n", PYTHON_READ_EXAMPLES); Py_Exit(1); } if (!PySequence_Check(pValue)) { fprintf(stderr, "%s function did not return a sequence!\n", PYTHON_READ_EXAMPLES); Py_Exit(1); } /* Everything's checked out sofar. Retrieve the result. */ //PyObject_Print(pValue, stdout, 0); printf("\n"); sample.n = PySequence_Size(pValue); examples=(EXAMPLE *)my_malloc(sizeof(EXAMPLE)*sample.n); for (i=0; i<sample.n; ++i) { PyObject *pExample = PySequence_GetItem(pValue, i); if (!pExample || !PySequence_Check(pExample) || PySequence_Size(pExample)<2){ fprintf(stderr, "%s's item %ld is not a sequence element of " "at least two items!\n", PYTHON_READ_EXAMPLES, i); Py_Exit(1); } examples[i].x.py_pattern = PySequence_GetItem(pExample, 0); examples[i].y.py_label = PySequence_GetItem(pExample, 1); Py_DECREF(pExample); } Py_DECREF(pValue); /* Store the result, and get the hell out of Dodge. */ sample.examples=examples; return(sample);}void init_struct_model(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm){ /* Initialize structmodel sm. The weight vector w does not need to be initialized, but you need to provide the maximum size of the feature space in sizePsi. This is the maximum number of different weights that can be learned. Later, the weight vector w will contain the learned weights for the model. */ PyObject *pDict, *pFunc, *pArgs, *pValue, *py_sm; sm->dirty = 1; sm->svm_model=sm->py_sm=NULL; sm->sizePsi=0; /* Set up the call the Python function parse_parameters in the module. */ pDict = PyModule_GetDict(pModule); pFunc = PyDict_GetItemString(pDict, PYTHON_INIT_MODEL); if (pFunc == NULL) { fprintf(stderr, "Could not find function %s!\n", PYTHON_INIT_MODEL); Py_Exit(1); } /* Build the argument list. */ pArgs = PyTuple_New(3); PyTuple_SetItem(pArgs, 0, sampleToPythonObject(sample)); PyTuple_SetItem(pArgs, 1, smToPythonObject(sm)); PyTuple_SetItem(pArgs, 2, sparmToPythonObject(sparm)); /* Call the embedded python function!! */ PYTHON_CALL(pValue, pFunc, pArgs); Py_DECREF(pArgs); Py_DECREF(pValue); /* Sets sizePsi to the value set to sm.size_psi in the python function. */ py_sm = smToPythonObject(sm); pValue = PyObject_GetAttrString(py_sm, "size_psi"); sm->sizePsi = PyInt_AsLong(pValue); Py_DECREF(py_sm); Py_DECREF(pValue);}CONSTSET init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm){ /* Initializes the optimization problem. Typically, you do not need to change this function, since you want to start with an empty set of constraints. However, if for example you have constraints that certain weights need to be positive, you might put that in here. The constraints are represented as lhs[i]*w >= rhs[i]. lhs is an array of feature vectors, rhs is an array of doubles. m is the number of constraints. The function returns the initial set of constraints. */ CONSTSET c; long sizePsi=sm->sizePsi; long i; WORD words[2]; PyObject *pDict, *pFunc, *pArgs, *pValue, *pItem, *pFirst, *pSecond; if(1) { /* normal case: start with empty set of constraints */ c.lhs=NULL; c.rhs=NULL; c.m=0; } else { /* add constraints so that all learned weights are positive. WARNING: Currently, they are positive only up to precision epsilon set by -e. */ c.lhs=my_malloc(sizeof(DOC *)*sizePsi); c.rhs=my_malloc(sizeof(double)*sizePsi); for(i=0; i<sizePsi; i++) { words[0].wnum=i+1; words[0].weight=1.0; words[1].wnum=0; /* the following slackid is a hack. we will run into problems, if we have move than 1000000 slack sets (ie examples) */ c.lhs[i]=create_example(i,0,1000000+i,1,create_svector(words,"",1.0)); c.rhs[i]=0.0; } } /* Set up the call the Python function. */ pDict = PyModule_GetDict(pModule); pFunc = PyDict_GetItemString(pDict, PYTHON_INIT_CONSTRAINTS); if (pFunc == NULL) { /*fprintf(stderr, "Could not find function %s!\n", PYTHON_INIT_CONSTRAINTS);*/ return c; } /* Build the argument list. */ pArgs = PyTuple_New(3); PyTuple_SetItem(pArgs, 0, sampleToPythonObject(sample)); PyTuple_SetItem(pArgs, 1, smToPythonObject(sm)); PyTuple_SetItem(pArgs, 2, sparmToPythonObject(sparm)); /* Call the embedded python function!! */ PYTHON_CALL(pValue, pFunc, pArgs); Py_DECREF(pArgs); /* Process the list of constraints. */ if (!pValue) { fprintf(stderr, "Badness happend in %s.\n", PYTHON_INIT_CONSTRAINTS); Py_Exit(1); } if (pValue == Py_None) { return c; } if (!PySequence_Check(pValue)) { fprintf(stderr, "Object from %s is not None or a sequence.\n", PYTHON_INIT_CONSTRAINTS); Py_Exit(1); } /* Make the constraints. */ c.m = PySequence_Size(pValue); c.lhs=(DOC**)my_malloc(sizeof(DOC *)*c.m); c.rhs=(double*)my_malloc(sizeof(double)*c.m); for (i=0; i<c.m; ++i) { /* Make one constraint! */ DOC *doc; pItem = PySequence_GetItem(pValue, i); pFirst=PySequence_GetItem(pItem, 0); pSecond=PySequence_GetItem(pItem, 1); if (!pSecond || !pFirst) { fprintf(stderr, "Item %ld in sequence from %s doesn't have 2 items.\n", i, PYTHON_INIT_CONSTRAINTS); Py_Exit(1); } /* Make the right hand side of the constriant. */ if (!PyNumber_Check(pSecond)) { fprintf(stderr, "Second item if item %ld in sequence from %s not a " "number.\n", i, PYTHON_INIT_CONSTRAINTS); Py_Exit(1); } c.rhs[i] = PyFloat_AsDouble(pSecond); /* Make the left hand side of the constraint. */ doc = pythonObjectToDoc(pFirst); if (!doc) { fprintf(stderr, "First item of item %ld in sequence from %s not " "a doc.\n", i, PYTHON_INIT_CONSTRAINTS); Py_Exit(1); } /* Some things must be set. */ doc->docnum = i; if (doc->slackid == 0xdeadbeef) doc->slackid = sample.n+i; c.lhs[i] = doc; } Py_DECREF(pValue); return(c);}LABEL classify_struct_example(PATTERN x, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm){ /* Finds the label yhat for pattern x that scores the highest according to the linear evaluation function in sm, especially the weights sm.w. The returned label is taken as the prediction of sm for the pattern x. The weights correspond to the features defined by psi() and range from index 1 to index sm->sizePsi. If the function cannot find a label, it shall return an empty label as recognized by the function empty_label(y). */ LABEL y; /* insert your code for computing the predicted label y here */ PyObject *pDict, *pFunc, *pArgs; /* Set up the call the Python function. */ pDict = PyModule_GetDict(pModule); pFunc = PyDict_GetItemString(pDict, PYTHON_CLASSIFY_EXAMPLE); if (pFunc == NULL) { fprintf(stderr, "Could not find function %s!\n", PYTHON_CLASSIFY_EXAMPLE); Py_Exit(1); } pArgs = PyTuple_New(3); /* This is a new instance! */ Py_INCREF((PyObject*)x.py_pattern); PyTuple_SetItem(pArgs, 0, (PyObject*)x.py_pattern); PyTuple_SetItem(pArgs, 1, smToPythonObject(sm)); PyTuple_SetItem(pArgs, 2, sparmToPythonObject(sparm)); /* Call the embedded python function!! */ PYTHON_CALL(y.py_label, pFunc, pArgs); Py_DECREF(pArgs); return(y);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -