📄 str_util.c
字号:
/* Urwid unicode character processing tables Copyright (C) 2006 Rebecca Breu. This file contains rewritten code of utable.py by Ian Ward. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Urwid web site: http://excess.org/urwid/*/#include <Python.h>#define ENC_UTF8 1#define ENC_WIDE 2#define ENC_NARROW 3#ifndef Py_RETURN_TRUE/* For python2.3 */#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False#endifstatic int widths_len = 2*38;static const long int widths[] = { 126, 1, 159, 0, 687, 1, 710, 0, 711, 1, 727, 0, 733, 1, 879, 0, 1154, 1, 1161, 0, 4347, 1, 4447, 2, 7467, 1, 7521, 0, 8369, 1, 8426, 0, 9000, 1, 9002, 2, 11021, 1, 12350, 2, 12351, 1, 12438, 2, 12442, 0, 19893, 2, 19967, 1, 55203, 2, 63743, 1, 64106, 2, 65039, 1, 65059, 0, 65131, 2, 65279, 1, 65376, 2, 65500, 1, 65510, 2, 120831, 1, 262141, 2, 1114109, 1};static short byte_encoding = ENC_UTF8;static PyObject * to_bool(int val){ if (val) Py_RETURN_TRUE; else Py_RETURN_FALSE;}//======================================================================static char get_byte_encoding_doc[] ="get_byte_encoding() -> string encoding\n\n\Get byte encoding ('utf8', 'wide', or 'narrow').";static PyObject * get_byte_encoding(PyObject *self, PyObject *args){ if (!PyArg_ParseTuple(args, "")) return NULL; if (byte_encoding == ENC_UTF8) return Py_BuildValue("s", "utf8"); if (byte_encoding == ENC_WIDE) return Py_BuildValue("s", "wide"); if (byte_encoding == ENC_NARROW) return Py_BuildValue("s", "narrow"); return Py_None; // should never happen}//======================================================================static char set_byte_encoding_doc[] ="set_byte_encoding(string encoding) -> None\n\n\Set byte encoding. \n\n\encoding -- one of 'utf8', 'wide', 'narrow'";static PyObject * set_byte_encoding(PyObject *self, PyObject *args){ char * enc; if (!PyArg_ParseTuple(args, "s", &enc)) return NULL; if (strcmp(enc, "utf8") == 0) byte_encoding = ENC_UTF8; else if (strcmp(enc, "wide") == 0) byte_encoding = ENC_WIDE; else if (strcmp(enc, "narrow") == 0) byte_encoding = ENC_NARROW; else { // got wrong encoding PyErr_SetString(PyExc_ValueError, "Unknown encoding."); return NULL; } return Py_None;}//======================================================================static char get_width_doc[] ="get_width(int ord) -> int width\n\n\Return the screen column width for unicode ordinal ord.\n\n\ord -- ordinal";static int Py_GetWidth(long int ord){ int i; if ((ord == 0xe) || (ord == 0xf)) return 0; for (i=0; i<widths_len; i+=2) { if (ord <= widths[i]) return widths[i+1]; } return 1;}static PyObject * get_width(PyObject *self, PyObject *args){ long int ord; int ret; if (!PyArg_ParseTuple(args, "l", &ord)) return NULL; ret = Py_GetWidth(ord); return Py_BuildValue("i", ret);}//======================================================================static char decode_one_doc[] ="decode_one(string text, int pos) -> (int ord, int nextpos)\n\n\Return (ordinal at pos, next position) for UTF-8 encoded text.\n\n\text -- string text\n\pos -- position in text";static void Py_DecodeOne(const unsigned char *text, int text_len, int pos, int *ret){ int dummy; if (!(text[pos]&0x80)) { ret[0] = text[pos]; ret[1] = pos+1; return; } if (text_len - pos < 2) //error { ret[0] = '?'; ret[1] = pos+1; return; } if ((text[pos]&0xe0) == 0xc0) { if ((text[pos+1]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos+1; return; } dummy = ((text[pos]&0x1f)<<6) | (text[pos+1]&0x3f); if (dummy < 0x80) //error { ret[0] = '?'; ret[1] = pos+1; return; } ret[0] = dummy; ret[1] = pos+2; return; } if (text_len - pos < 3) //error { ret[0] = '?'; ret[1] = pos + 1; return; } if ((text[pos]&0xf0) == 0xe0) { if ((text[pos+1]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos + 1; return; } if ((text[pos+2]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos + 1; return; } dummy = ((text[pos]&0x0f) << 12) | ((text[pos+1]&0x3f) << 6) | (text[pos+2]&0x3f); if (dummy < 0x800) //error { ret[0] = '?'; ret[1] = pos + 1; return; } ret[0] = dummy; ret[1] = pos + 3; return; } if (text_len - pos < 4) { ret[0] = '?'; ret[1] = pos + 1; return; } if ((text[pos]&0xf8) == 0xf0) { if ((text[pos+1]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos + 1; return; } if ((text[pos+2]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos + 1; return; } if ((text[pos+3]&0xc0) != 0x80) //error { ret[0] = '?'; ret[1] = pos + 1; return; } dummy = ((text[pos]&0x07) << 18) | ((text[pos+1]&0x3f) << 12) | ((text[pos+2]&0x3f) << 6) | (text[pos+3]&0x3f); if (dummy < 0x10000) //error { ret[0] = '?'; ret[1] = pos + 1; return; } ret[0] = dummy; ret[1] = pos + 4; return; } ret[0] = '?'; ret[1] = pos + 1; return; }static PyObject * decode_one(PyObject *self, PyObject *args){ PyObject *py_text; int pos, text_len; char *text; int ret[2]; if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos)) return NULL; PyString_AsStringAndSize(py_text, &text, &text_len); Py_DecodeOne((unsigned char *)text, text_len, pos, ret); return Py_BuildValue("(i, i)", ret[0], ret[1]);} //======================================================================static char decode_one_right_doc[] ="decode_one_right(string text, int pos) -> (int ord, int nextpos)\n\n\Return (ordinal at pos, next position) for UTF-8 encoded text.\n\pos is assumed to be on the trailing byte of a utf-8 sequence.\n\text -- text string \n\pos -- position in text";static void Py_DecodeOneRight(const unsigned char *text, int text_len, int pos, int *ret){ int subret[2]; while (pos >= 0) { if ((text[pos]&0xc0) != 0x80) { Py_DecodeOne(text, text_len, pos, subret); ret[0] = subret[0]; ret[1] = pos-1; return; } pos-=1; if (pos == pos-4) //error { ret[0] = '?'; ret[1] = pos - 1; return; } }}static PyObject * decode_one_right(PyObject *self, PyObject *args){ PyObject *py_text; int pos, text_len; char *text; int ret[2] = {'?',0}; if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos)) return NULL; PyString_AsStringAndSize(py_text, &text, &text_len); Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret); return Py_BuildValue("(i, i)", ret[0], ret[1]);}//======================================================================static char within_double_byte_doc[] ="within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\Return whether pos is within a double-byte encoded character.\n\n\str -- string in question\n\line_start -- offset of beginning of line (< pos)\n\pos -- offset in question\n\n\Return values:\n\0 -- not within dbe char, or double_byte_encoding == False\n\1 -- pos is on the 1st half of a dbe char\n\2 -- pos is on the 2nd half of a dbe char";static int Py_WithinDoubleByte(const unsigned char *str, int line_start, int pos){ int i; if ((str[pos] >= 0x40) && (str[pos] < 0x7f)) { //might be second half of big5, uhc or gbk encoding if (pos == line_start) return 0; if (str[pos-1] >= 0x81) { if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1) return 2; else return 0; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -