📄 regexmodule.c
字号:
/*
XXX support range parameter on search
XXX support mstop parameter on search
*/
/* Regular expression objects */
/* This uses Tatu Ylonen's copyleft-free reimplementation of
GNU regular expressions */
#include "Python.h"
#include <ctype.h>
#include "regexpr.h"
static PyObject *RegexError; /* Exception */
typedef struct {
PyObject_HEAD
struct re_pattern_buffer re_patbuf; /* The compiled expression */
struct re_registers re_regs; /* The registers from the last match */
char re_fastmap[256]; /* Storage for fastmap */
PyObject *re_translate; /* String object for translate table */
PyObject *re_lastok; /* String object last matched/searched */
PyObject *re_groupindex; /* Group name to index dictionary */
PyObject *re_givenpat; /* Pattern with symbolic groups */
PyObject *re_realpat; /* Pattern without symbolic groups */
} regexobject;
/* Regex object methods */
static void
reg_dealloc(regexobject *re)
{
if (re->re_patbuf.buffer)
free(re->re_patbuf.buffer);
Py_XDECREF(re->re_translate);
Py_XDECREF(re->re_lastok);
Py_XDECREF(re->re_groupindex);
Py_XDECREF(re->re_givenpat);
Py_XDECREF(re->re_realpat);
PyObject_Del(re);
}
static PyObject *
makeresult(struct re_registers *regs)
{
PyObject *v;
int i;
static PyObject *filler = NULL;
if (filler == NULL) {
filler = Py_BuildValue("(ii)", -1, -1);
if (filler == NULL)
return NULL;
}
v = PyTuple_New(RE_NREGS);
if (v == NULL)
return NULL;
for (i = 0; i < RE_NREGS; i++) {
int lo = regs->start[i];
int hi = regs->end[i];
PyObject *w;
if (lo == -1 && hi == -1) {
w = filler;
Py_INCREF(w);
}
else
w = Py_BuildValue("(ii)", lo, hi);
if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
Py_DECREF(v);
return NULL;
}
}
return v;
}
static PyObject *
regobj_match(regexobject *re, PyObject *args)
{
PyObject *argstring;
char *buffer;
int size;
int offset = 0;
int result;
if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
return NULL;
if (!PyArg_Parse(argstring, "t#", &buffer, &size))
return NULL;
if (offset < 0 || offset > size) {
PyErr_SetString(RegexError, "match offset out of range");
return NULL;
}
Py_XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
&re->re_regs);
if (result < -1) {
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if (!PyErr_Occurred())
PyErr_SetString(RegexError, "match failure");
return NULL;
}
if (result >= 0) {
Py_INCREF(argstring);
re->re_lastok = argstring;
}
return PyInt_FromLong((long)result); /* Length of the match or -1 */
}
static PyObject *
regobj_search(regexobject *re, PyObject *args)
{
PyObject *argstring;
char *buffer;
int size;
int offset = 0;
int range;
int result;
if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
return NULL;
if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
return NULL;
if (offset < 0 || offset > size) {
PyErr_SetString(RegexError, "search offset out of range");
return NULL;
}
/* NB: In Emacs 18.57, the documentation for re_search[_2] and
the implementation don't match: the documentation states that
|range| positions are tried, while the code tries |range|+1
positions. It seems more productive to believe the code! */
range = size - offset;
Py_XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
&re->re_regs);
if (result < -1) {
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if (!PyErr_Occurred())
PyErr_SetString(RegexError, "match failure");
return NULL;
}
if (result >= 0) {
Py_INCREF(argstring);
re->re_lastok = argstring;
}
return PyInt_FromLong((long)result); /* Position of the match or -1 */
}
/* get the group from the regex where index can be a string (group name) or
an integer index [0 .. 99]
*/
static PyObject*
group_from_index(regexobject *re, PyObject *index)
{
int i, a, b;
char *v;
if (PyString_Check(index))
if (re->re_groupindex == NULL ||
!(index = PyDict_GetItem(re->re_groupindex, index)))
{
PyErr_SetString(RegexError,
"group() group name doesn't exist");
return NULL;
}
i = PyInt_AsLong(index);
if (i == -1 && PyErr_Occurred())
return NULL;
if (i < 0 || i >= RE_NREGS) {
PyErr_SetString(RegexError, "group() index out of range");
return NULL;
}
if (re->re_lastok == NULL) {
PyErr_SetString(RegexError,
"group() only valid after successful match/search");
return NULL;
}
a = re->re_regs.start[i];
b = re->re_regs.end[i];
if (a < 0 || b < 0) {
Py_INCREF(Py_None);
return Py_None;
}
if (!(v = PyString_AsString(re->re_lastok)))
return NULL;
return PyString_FromStringAndSize(v+a, b-a);
}
static PyObject *
regobj_group(regexobject *re, PyObject *args)
{
int n = PyTuple_Size(args);
int i;
PyObject *res = NULL;
if (n < 0)
return NULL;
if (n == 0) {
PyErr_SetString(PyExc_TypeError, "not enough arguments");
return NULL;
}
if (n == 1) {
/* return value is a single string */
PyObject *index = PyTuple_GetItem(args, 0);
if (!index)
return NULL;
return group_from_index(re, index);
}
/* return value is a tuple */
if (!(res = PyTuple_New(n)))
return NULL;
for (i = 0; i < n; i++) {
PyObject *index = PyTuple_GetItem(args, i);
PyObject *group = NULL;
if (!index)
goto finally;
if (!(group = group_from_index(re, index)))
goto finally;
if (PyTuple_SetItem(res, i, group) < 0)
goto finally;
}
return res;
finally:
Py_DECREF(res);
return NULL;
}
static struct PyMethodDef reg_methods[] = {
{"match", (PyCFunction)regobj_match, 1},
{"search", (PyCFunction)regobj_search, 1},
{"group", (PyCFunction)regobj_group, 1},
{NULL, NULL} /* sentinel */
};
static char* members[] = {
"last", "regs", "translate",
"groupindex", "realpat", "givenpat",
NULL
};
static PyObject *
regobj_getattr(regexobject *re, char *name)
{
if (strcmp(name, "regs") == 0) {
if (re->re_lastok == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
return makeresult(&re->re_regs);
}
if (strcmp(name, "last") == 0) {
if (re->re_lastok == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
Py_INCREF(re->re_lastok);
return re->re_lastok;
}
if (strcmp(name, "translate") == 0) {
if (re->re_translate == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
Py_INCREF(re->re_translate);
return re->re_translate;
}
if (strcmp(name, "groupindex") == 0) {
if (re->re_groupindex == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
Py_INCREF(re->re_groupindex);
return re->re_groupindex;
}
if (strcmp(name, "realpat") == 0) {
if (re->re_realpat == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
Py_INCREF(re->re_realpat);
return re->re_realpat;
}
if (strcmp(name, "givenpat") == 0) {
if (re->re_givenpat == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
Py_INCREF(re->re_givenpat);
return re->re_givenpat;
}
if (strcmp(name, "__members__") == 0) {
int i = 0;
PyObject *list = NULL;
/* okay, so it's unlikely this list will change that often.
still, it's easier to change it in just one place.
*/
while (members[i])
i++;
if (!(list = PyList_New(i)))
return NULL;
i = 0;
while (members[i]) {
PyObject* v = PyString_FromString(members[i]);
if (!v || PyList_SetItem(list, i, v) < 0) {
Py_DECREF(list);
return NULL;
}
i++;
}
return list;
}
return Py_FindMethod(reg_methods, (PyObject *)re, name);
}
static PyTypeObject Regextype = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"regex.regex", /*tp_name*/
sizeof(regexobject), /*tp_size*/
0, /*tp_itemsize*/
/* methods */
(destructor)reg_dealloc, /*tp_dealloc*/
0, /*tp_print*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -