str_util.c

来自「Urwid is a Python library for making tex」· C语言代码 · 共 830 行 · 第 1/2 页
830 行
/*  Urwid unicode character processing tables    Copyright (C) 2006 Rebecca Breu.    This file contains rewritten code of utable.py by Ian Ward.    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Lesser General Public    License as published by the Free Software Foundation; either    version 2.1 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Lesser General Public License for more details.    You should have received a copy of the GNU Lesser General Public    License along with this library; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA    Urwid web site: http://excess.org/urwid/*/#include <Python.h>#define ENC_UTF8 1#define ENC_WIDE 2#define ENC_NARROW 3#ifndef Py_RETURN_TRUE/* For python2.3 */#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False#endifstatic int widths_len = 2*38;static const long int widths[] = {    126, 1,    159, 0,    687, 1,    710, 0,    711, 1,    727, 0,    733, 1,    879, 0,    1154, 1,    1161, 0,    4347, 1,    4447, 2,    7467, 1,    7521, 0,    8369, 1,    8426, 0,    9000, 1,    9002, 2,    11021, 1,    12350, 2,    12351, 1,    12438, 2,    12442, 0,    19893, 2,    19967, 1,    55203, 2,    63743, 1,    64106, 2,    65039, 1,    65059, 0,    65131, 2,    65279, 1,    65376, 2,    65500, 1,    65510, 2,    120831, 1,    262141, 2,    1114109, 1};static short byte_encoding = ENC_UTF8;static PyObject * to_bool(int val){    if (val)  Py_RETURN_TRUE;    else  Py_RETURN_FALSE;}//======================================================================static char get_byte_encoding_doc[] ="get_byte_encoding() -> string encoding\n\n\Get byte encoding ('utf8', 'wide', or 'narrow').";static PyObject * get_byte_encoding(PyObject *self, PyObject *args){    if (!PyArg_ParseTuple(args, ""))        return NULL;    if (byte_encoding == ENC_UTF8)        return Py_BuildValue("s", "utf8");    if (byte_encoding == ENC_WIDE)        return Py_BuildValue("s", "wide");    if (byte_encoding == ENC_NARROW)        return Py_BuildValue("s", "narrow");    return Py_None; // should never happen}//======================================================================static char set_byte_encoding_doc[] ="set_byte_encoding(string encoding) -> None\n\n\Set byte encoding. \n\n\encoding -- one of 'utf8', 'wide', 'narrow'";static PyObject * set_byte_encoding(PyObject *self, PyObject *args){    char * enc;        if (!PyArg_ParseTuple(args, "s", &enc))        return NULL;    if (strcmp(enc, "utf8") == 0)        byte_encoding = ENC_UTF8;    else if (strcmp(enc, "wide") == 0)        byte_encoding = ENC_WIDE;    else if (strcmp(enc, "narrow") == 0)        byte_encoding = ENC_NARROW;    else    {        // got wrong encoding        PyErr_SetString(PyExc_ValueError, "Unknown encoding.");        return NULL;    }        return Py_None;}//======================================================================static char get_width_doc[] ="get_width(int ord) -> int width\n\n\Return the screen column width for unicode ordinal ord.\n\n\ord -- ordinal";static int Py_GetWidth(long int ord){    int i;    if ((ord == 0xe) || (ord == 0xf))            return 0;    for (i=0; i<widths_len; i+=2)    {        if (ord <= widths[i])            return widths[i+1];    }        return 1;}static PyObject * get_width(PyObject *self, PyObject *args){    long int ord;    int ret;        if (!PyArg_ParseTuple(args, "l", &ord))        return NULL;    ret = Py_GetWidth(ord);    return Py_BuildValue("i", ret);}//======================================================================static char decode_one_doc[] ="decode_one(string text, int pos) -> (int ord, int nextpos)\n\n\Return (ordinal at pos, next position) for UTF-8 encoded text.\n\n\text -- string text\n\pos -- position in text";static void Py_DecodeOne(const unsigned char *text, int text_len, int pos,                         int *ret){    int dummy;        if (!(text[pos]&0x80))    {        ret[0] = text[pos];        ret[1] = pos+1;        return;    }    if (text_len - pos < 2) //error    {        ret[0] = '?';        ret[1] = pos+1;        return;    }    if ((text[pos]&0xe0) == 0xc0)    {        if ((text[pos+1]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos+1;            return;        }        dummy = ((text[pos]&0x1f)<<6) | (text[pos+1]&0x3f);        if (dummy < 0x80) //error        {            ret[0] = '?';            ret[1] = pos+1;            return;        }        ret[0] = dummy;        ret[1] = pos+2;        return;    }        if (text_len - pos < 3) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }    if ((text[pos]&0xf0) == 0xe0)    {        if ((text[pos+1]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }                if ((text[pos+2]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }        dummy = ((text[pos]&0x0f) << 12) | ((text[pos+1]&0x3f) << 6) |            (text[pos+2]&0x3f);        if (dummy < 0x800) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }        ret[0] = dummy;        ret[1] = pos + 3;        return;    }    if (text_len - pos < 4)    {        ret[0] = '?';        ret[1] = pos + 1;        return;    }    if ((text[pos]&0xf8) == 0xf0)    {        if ((text[pos+1]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }                if ((text[pos+2]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }        if ((text[pos+3]&0xc0) != 0x80) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }        dummy = ((text[pos]&0x07) << 18) | ((text[pos+1]&0x3f) << 12) |            ((text[pos+2]&0x3f) << 6) | (text[pos+3]&0x3f);        if (dummy < 0x10000) //error        {            ret[0] = '?';            ret[1] = pos + 1;            return;        }                     ret[0] = dummy;        ret[1] = pos + 4;        return;    }            ret[0] = '?';    ret[1] = pos + 1;    return;    }static PyObject * decode_one(PyObject *self, PyObject *args){    PyObject *py_text;        int pos, text_len;    char *text;    int ret[2];        if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos))        return NULL;    PyString_AsStringAndSize(py_text, &text, &text_len);    Py_DecodeOne((unsigned char *)text, text_len, pos, ret);    return Py_BuildValue("(i, i)", ret[0], ret[1]);}                                     //======================================================================static char decode_one_right_doc[] ="decode_one_right(string text, int pos) -> (int ord, int nextpos)\n\n\Return (ordinal at pos, next position) for UTF-8 encoded text.\n\pos is assumed to be on the trailing byte of a utf-8 sequence.\n\text -- text string \n\pos -- position in text";static void Py_DecodeOneRight(const unsigned char *text, int text_len, int pos,                             int *ret){    int subret[2];        while (pos >= 0)    {        if ((text[pos]&0xc0) != 0x80)        {            Py_DecodeOne(text, text_len, pos, subret);            ret[0] = subret[0];            ret[1] = pos-1;            return;        }        pos-=1;                if (pos == pos-4) //error        {            ret[0] = '?';            ret[1] = pos - 1;            return;        }    }}static PyObject * decode_one_right(PyObject *self, PyObject *args){        PyObject *py_text;    int pos, text_len;    char *text;    int ret[2] = {'?',0};        if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos))        return NULL;    PyString_AsStringAndSize(py_text, &text, &text_len);    Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret);    return Py_BuildValue("(i, i)", ret[0], ret[1]);}//======================================================================static char within_double_byte_doc[] ="within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\Return whether pos is within a double-byte encoded character.\n\n\str -- string in question\n\line_start -- offset of beginning of line (< pos)\n\pos -- offset in question\n\n\Return values:\n\0 -- not within dbe char, or double_byte_encoding == False\n\1 -- pos is on the 1st half of a dbe char\n\2 -- pos is on the 2nd half of a dbe char";static int Py_WithinDoubleByte(const unsigned char *str, int line_start,                               int pos){    int i;    if ((str[pos] >= 0x40) && (str[pos] < 0x7f))    {        //might be second half of big5, uhc or gbk encoding        if (pos == line_start)  return 0;        if (str[pos-1] >= 0x81)        {            if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1)  return 2;            else return 0;        }
str_util.c - 源码说明

本页面展示了「Urwid is a Python library for making text console applications. It has many features including fluid」中的 str_util.c 源码文件，采用 C语言编程语言编写，共 830 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与applications相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?