📄 pcremodule.c
字号:
{
/* XXX should include the text of the reference */
PyErr_SetString(ErrorObject, "illegal symbolic reference");
return NULL;
}
}
*typeptr = MEMORY_REFERENCE;
*indexptr = end+1;
/* If it's a number, return the integer value of the group */
if (is_number) return Py_BuildValue("i", group_num);
/* Otherwise, return a string containing the group name */
return Py_BuildValue("s#", pattern+index, end-index);
}
case('0'):
{
/* \0 always indicates an octal escape, so we consume up to 3
characters, as long as they're all octal digits */
int octval=0, i;
index--;
for(i=index;
i<=index+2 && i<pattern_len
&& (pcre_ctypes[ pattern[i] ] & ctype_odigit );
i++)
{
octval = octval * 8 + pattern[i] - '0';
}
if (octval>255)
{
PyErr_SetString(ErrorObject, "octal value out of range");
return NULL;
}
*indexptr = i;
return Py_BuildValue("c", (unsigned char)octval);
}
case('1'): case('2'): case('3'): case('4'):
case('5'): case('6'): case('7'): case('8'):
case('9'):
{
/* Handle \?, where ? is from 1 through 9 */
int value=0;
index--;
/* If it's at least a two-digit reference, like \34, it might
either be a 3-digit octal escape (\123) or a 2-digit
decimal memory reference (\34) */
if ( (index+1) <pattern_len &&
(pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
{
if ( (index+2) <pattern_len &&
(pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
(pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
(pcre_ctypes[ pattern[index ] ] & ctype_odigit)
)
{
/* 3 octal digits */
value= 8*8*(pattern[index ]-'0') +
8*(pattern[index+1]-'0') +
(pattern[index+2]-'0');
if (value>255)
{
PyErr_SetString(ErrorObject, "octal value out of range");
return NULL;
}
*indexptr = index+3;
return Py_BuildValue("c", (unsigned char)value);
}
else
{
/* 2-digit form, so it's a memory reference */
value= 10*(pattern[index ]-'0') +
(pattern[index+1]-'0');
if (value<1 || EXTRACT_MAX<=value)
{
PyErr_SetString(ErrorObject, "memory reference out of range");
return NULL;
}
*typeptr = MEMORY_REFERENCE;
*indexptr = index+2;
return Py_BuildValue("i", value);
}
}
else
{
/* Single-digit form, like \2, so it's a memory reference */
*typeptr = MEMORY_REFERENCE;
*indexptr = index+1;
return Py_BuildValue("i", pattern[index]-'0');
}
}
default:
/* It's some unknown escape like \s, so return a string containing
\s */
*typeptr = STRING;
*indexptr = index;
return Py_BuildValue("s#", pattern+index-2, 2);
}
}
static PyObject *
PyPcre_expand(PyObject *self, PyObject *args)
{
PyObject *results, *match_obj;
PyObject *repl_obj, *newstring;
unsigned char *repl;
int size, total_len, i, start, pos;
if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
return NULL;
repl=(unsigned char *)PyString_AsString(repl_obj);
size=PyString_Size(repl_obj);
results=PyList_New(0);
if (results==NULL) return NULL;
for(start=total_len=i=0; i<size; i++)
{
if (repl[i]=='\\')
{
PyObject *value;
int escape_type;
if (start!=i)
{
int status;
PyObject *s = PyString_FromStringAndSize(
(char *)repl+start, i-start);
if (s == NULL) {
Py_DECREF(results);
return NULL;
}
status = PyList_Append(results, s);
Py_DECREF(s);
if (status < 0) {
Py_DECREF(results);
return NULL;
}
total_len += i-start;
}
i++;
value=PyPcre_expand_escape(repl, size, &i, &escape_type);
if (value==NULL)
{
/* PyPcre_expand_escape triggered an exception of some sort,
so just return */
Py_DECREF(results);
return NULL;
}
switch (escape_type)
{
case (CHAR):
PyList_Append(results, value);
total_len += PyString_Size(value);
break;
case(MEMORY_REFERENCE):
{
PyObject *r, *tuple, *result;
r=PyObject_GetAttrString(match_obj, "group");
if (r == NULL) {
Py_DECREF(results);
return NULL;
}
tuple=PyTuple_New(1);
Py_INCREF(value);
PyTuple_SetItem(tuple, 0, value);
result=PyEval_CallObject(r, tuple);
Py_DECREF(r); Py_DECREF(tuple);
if (result==NULL)
{
/* The group() method triggered an exception of some sort */
Py_DECREF(results);
Py_DECREF(value);
return NULL;
}
if (result==Py_None)
{
char message[50];
PyOS_snprintf(message, sizeof(message),
"group did not contribute to the match");
PyErr_SetString(ErrorObject,
message);
Py_DECREF(result);
Py_DECREF(value);
Py_DECREF(results);
return NULL;
}
/* typecheck that it's a string! */
if (!PyString_Check(result))
{
Py_DECREF(results);
Py_DECREF(result);
PyErr_SetString(ErrorObject,
"group() must return a string value for replacement");
return NULL;
}
PyList_Append(results, result);
total_len += PyString_Size(result);
Py_DECREF(result);
}
break;
case(STRING):
{
PyList_Append(results, value);
total_len += PyString_Size(value);
break;
}
default:
Py_DECREF(results);
PyErr_SetString(ErrorObject,
"bad escape in replacement");
return NULL;
}
Py_DECREF(value);
start=i;
i--; /* Decrement now, because the 'for' loop will increment it */
}
} /* endif repl[i]!='\\' */
if (start!=i)
{
int status;
PyObject *s = PyString_FromStringAndSize((char *)repl+start,
i-start);
if (s == NULL) {
Py_DECREF(results);
return NULL;
}
status = PyList_Append(results, s);
Py_DECREF(s);
if (status < 0) {
Py_DECREF(results);
return NULL;
}
total_len += i-start;
}
/* Whew! Now we've constructed a list containing various pieces of
strings that will make up our final result. So, iterate over
the list concatenating them. A new string measuring total_len
bytes is allocated and filled in. */
newstring=PyString_FromStringAndSize(NULL, total_len);
if (newstring==NULL)
{
Py_DECREF(results);
return NULL;
}
repl=(unsigned char *)PyString_AsString(newstring);
for (pos=i=0; i<PyList_Size(results); i++)
{
PyObject *item=PyList_GetItem(results, i);
memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
pos += PyString_Size(item);
}
Py_DECREF(results);
return newstring;
}
/* List of functions defined in the module */
static PyMethodDef pcre_methods[] = {
{"pcre_compile", PyPcre_compile, 1},
{"pcre_expand", PyPcre_expand, 1},
{NULL, NULL} /* sentinel */
};
/*
* Convenience routine to export an integer value.
* For simplicity, errors (which are unlikely anyway) are ignored.
*/
static void
insint(PyObject *d, char *name, int value)
{
PyObject *v = PyInt_FromLong((long) value);
if (v == NULL) {
/* Don't bother reporting this error */
PyErr_Clear();
}
else {
PyDict_SetItemString(d, name, v);
Py_DECREF(v);
}
}
/* Initialization function for the module (*must* be called initpcre) */
DL_EXPORT(void)
initpcre(void)
{
PyObject *m, *d;
Pcre_Type.ob_type = &PyType_Type;
/* Create the module and add the functions */
m = Py_InitModule("pcre", pcre_methods);
/* Add some symbolic constants to the module */
d = PyModule_GetDict(m);
ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
PyDict_SetItemString(d, "error", ErrorObject);
/* Insert the flags */
insint(d, "IGNORECASE", PCRE_CASELESS);
insint(d, "ANCHORED", PCRE_ANCHORED);
insint(d, "MULTILINE", PCRE_MULTILINE);
insint(d, "DOTALL", PCRE_DOTALL);
insint(d, "VERBOSE", PCRE_EXTENDED);
insint(d, "LOCALE", PCRE_LOCALE);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -