📄 widechar.c

📁 具有IDE功能的编辑器
💻 C
字号:
/* widechar.c - handle multibyte and UTF-8 encoding   Copyright (C) 1996-2000 Paul Sheer   This program is free software; you can redistribute it and/or modify   it under the terms of the GNU General Public License as published by   the Free Software Foundation; either version 2 of the License, or   (at your option) any later version.   This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.   You should have received a copy of the GNU General Public License   along with this program; if not, write to the Free Software   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307, USA. */#include <config.h>#include <edit.h>#define MB_MARKER_DENSITY 64/*     1 |    7 | 0vvvvvvv     2 |   11 | 110vvvvv 10vvvvvv     3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv     4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv     5 |   26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv     6 |   31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv*//* force utf-8 only multibyte encoding - i.e. ignore locale settings */int option_utf_interpretation = 0;unsigned char *wcrtomb_ucs4_to_utf8 (wchar_t c){    static unsigned char r[32];    int i = 0;#undef APPEND#define APPEND(x) r[i++] = (unsigned char) (x)    if (c < (1 << 7)) {	APPEND (c);    } else if (c < (1 << 11)) {	APPEND ((c >> 6) | 0xC0);	APPEND ((c & 0x3F) | 0x80);    } else if (c < (1 << 16)) {	APPEND ((c >> 12) | 0xE0);	APPEND (((c >> 6) & 0x3F) | 0x80);	APPEND ((c & 0x3F) | 0x80);    } else if (c < (1 << 21)) {	APPEND ((c >> 18) | 0xE0);	APPEND (((c >> 12) & 0x3F) | 0x80);	APPEND (((c >> 6) & 0x3F) | 0x80);	APPEND ((c & 0x3F) | 0x80);    }    APPEND ('\0');    return r;}/* makes sense to me... (although only goes to 21 bits) */static inline int mbrtowc_utf8_to_ucs4 (wchar_t * c, char *t, int n, void *x /* no shifting with utf8 */ ){    unsigned char *s = (unsigned char *) t;    if (!*s) {	*c = 0;	return 0;    }    if (*s < 0x80) {	*c = (wchar_t) * s;	return 1;    }    if (*s < 0xC0)	return -1;    if (*s < 0xE0) {	if (n < 2)	    return -2;	if ((s[1] & 0xC0) != 0x80)	    return -1;	*c = ((wchar_t) (s[0] & 0x1F) << 6) | (wchar_t) (s[1] & 0x3F);	if (*c < (1 << 7))	    return -1;	return 2;    }    if (*s < 0xF0) {	if (n < 3)	    return -2;	if ((s[1] & 0xC0) != 0x80)	    return -1;	if ((s[2] & 0xC0) != 0x80)	    return -1;	*c = ((wchar_t) (s[0] & 0x0F) << 12) | ((wchar_t) (s[1] & 0x3F) << 6) | (wchar_t) (s[2] & 0x3F);	if (*c < (1 << 11))	    return -1;	return 3;    }    if (*s < 0xF8) {	if (n < 4)	    return -2;	if ((s[1] & 0xC0) != 0x80)	    return -1;	if ((s[2] & 0xC0) != 0x80)	    return -1;	if ((s[3] & 0xC0) != 0x80)	    return -1;	*c =	    ((wchar_t) (s[0] & 0x07) << 18) |	    ((wchar_t) (s[1] & 0x3F) << 12) | ((wchar_t) (s[2] & 0x3F) << 6) | (wchar_t) (s[3] & 0x3F);	if (*c < (1 << 16))	    return -1;	return 4;    }    if (*s < 0xFC) {	if (n < 5)	    return -2;	if ((s[1] & 0xC0) != 0x80)	    return -1;	if ((s[2] & 0xC0) != 0x80)	    return -1;	if ((s[3] & 0xC0) != 0x80)	    return -1;	if ((s[4] & 0xC0) != 0x80)	    return -1;	*c =	    ((wchar_t) (s[0] & 0x03) << 24) | ((wchar_t) (s[1] & 0x3F) << 18) |	    ((wchar_t) (s[2] & 0x3F) << 12) | ((wchar_t) (s[3] & 0x3F) << 6) | (wchar_t) (s[4] & 0x3F);	if (*c < (1 << 21))	    return -1;	return 5;    }    if (*s < 0xFE) {	if (n < 6)	    return -2;	if ((s[1] & 0xC0) != 0x80)	    return -1;	if ((s[2] & 0xC0) != 0x80)	    return -1;	if ((s[3] & 0xC0) != 0x80)	    return -1;	if ((s[4] & 0xC0) != 0x80)	    return -1;	if ((s[5] & 0xC0) != 0x80)	    return -1;	*c =	    ((wchar_t) (s[0] & 0x01) << 30) | ((wchar_t) (s[1] & 0x3F) << 24) | ((wchar_t) (s[2] & 0x3F) << 18) |	    ((wchar_t) (s[3] & 0x3F) << 12) | ((wchar_t) (s[4] & 0x3F) << 6) | (wchar_t) (s[5] & 0x3F);	if (*c < (1 << 26))	    return -1;	return 6;    }    return -1;}#if 0/* last arg is len of t to convert NOT len of c */int mbstowcs_utf8_to_ucs4 (wchar_t * c, char *t, int n){    int v = 0;    while (n) {	int r;	if ((r = mbrtowc_utf8_to_ucs4 (c, t, n, 0)) == -1) {	    *c++ = *t++;	    v++;	    n--;	} else if (r == -2) {	    break;	} else {	    t += r;	    n -= r;	    v++;	    c++;	}    }    return v;}wchar_t *mbstowcs_dup (unsigned char *s){    wchar_t *t;    t = CMalloc ((strlen ((char *) s) + 1) * sizeof (wchar_t));    t[mbstowcs_utf8_to_ucs4 (t, (char *) s, strlen ((char *) s))] = 0;    return t;}int wchar_t_strlen (wchar_t * p){    int v;    for (v = 0; *p; p++, v++);    return v;}#endifstatic inline struct mb_rule apply_mb_rules_going_right_utf8_to_ucs4 (WEdit * edit, long byte_index,								      struct mb_rule mb_rule){    wchar_t wc;    unsigned char p[16];    int n;    if (mb_rule.end) {	mb_rule.end--;	mb_rule.ch = -1;	return mb_rule;    }    for (n = 0; n < 6; n++) {	int r;	p[n] = edit_get_byte (edit, byte_index + n);	r = mbrtowc_utf8_to_ucs4 (&wc, (char *) p, n + 1, &mb_rule.shift_state);	if (r >= 0) {	    mb_rule.end = n;	    mb_rule.ch = wc;	    return mb_rule;	}	if (r == -1) {	    mb_rule.end = 0;	    mb_rule.ch = (unsigned long) *p | 0x80000000;	    return mb_rule;	}    }    mb_rule.end = 0;    mb_rule.ch = -1;    return mb_rule;}static inline struct mb_rule apply_mb_rules_going_right (WEdit * edit, long byte_index, struct mb_rule mb_rule){#ifdef HAVE_WCHAR_H    wchar_t wc;    unsigned char p[16];    int n;    if (mb_rule.end) {	mb_rule.end--;	mb_rule.ch = -1;	return mb_rule;    }    for (n = 0; n < MB_CUR_MAX; n++) {	int r;	p[n] = edit_get_byte (edit, byte_index + n);	r = mbrtowc (&wc, (char *) p, n + 1, &mb_rule.shift_state);	if (r >= 0) {	    mb_rule.end = n;	    mb_rule.ch = wc;	    return mb_rule;	}	if (r == -1) {	    mb_rule.end = 0;	    mb_rule.ch = *p;	    return mb_rule;	}    }    mb_rule.end = 0;    mb_rule.ch = -1;#endif    return mb_rule;}struct mb_rule get_mb_rule (WEdit * edit, long byte_index){    long i;    if (#ifndef HAVE_WCHAR_H	   !option_utf_interpretation ||#endif	   (MB_CUR_MAX == 1 && !option_utf_interpretation)) {	struct mb_rule r;	r.end = 0;	r.ch = edit_get_byte (edit, byte_index);	return r;    }    if (edit->mb_invalidate) {	struct _mb_marker *s;	while (edit->mb_marker && edit->mb_marker->offset >= edit->last_get_mb_rule) {	    s = edit->mb_marker->next;	    free (edit->mb_marker);	    edit->mb_marker = s;	}	if (edit->mb_marker) {	    edit->last_get_mb_rule = edit->mb_marker->offset;	    edit->mb_rule = edit->mb_marker->rule;	} else {	    edit->last_get_mb_rule = -1;	    memset (&edit->mb_rule, 0, sizeof (edit->mb_rule));	}	edit->mb_invalidate = 0;    }    if (byte_index > edit->last_get_mb_rule) {	if (option_utf_interpretation) {	    for (i = edit->last_get_mb_rule + 1; i <= byte_index; i++) {		edit->mb_rule = apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule);		if (i >		    (edit->mb_marker ? edit->mb_marker->offset +		     MB_MARKER_DENSITY : MB_MARKER_DENSITY)) {		    struct _mb_marker *s;		    s = edit->mb_marker;		    edit->mb_marker = malloc (sizeof (struct _mb_marker));		    edit->mb_marker->next = s;		    edit->mb_marker->offset = i;		    edit->mb_marker->rule = edit->mb_rule;		}	    }	} else {	    for (i = edit->last_get_mb_rule + 1; i <= byte_index; i++) {		edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule);		if (i >		    (edit->mb_marker ? edit->mb_marker->offset +		     MB_MARKER_DENSITY : MB_MARKER_DENSITY)) {		    struct _mb_marker *s;		    s = edit->mb_marker;		    edit->mb_marker = malloc (sizeof (struct _mb_marker));		    edit->mb_marker->next = s;		    edit->mb_marker->offset = i;		    edit->mb_marker->rule = edit->mb_rule;		}	    }	}    } else if (byte_index < edit->last_get_mb_rule) {	struct _mb_marker *s;	for (;;) {	    if (!edit->mb_marker) {		memset (&edit->mb_rule, 0, sizeof (edit->mb_rule));		if (option_utf_interpretation) {		    for (i = -1; i <= byte_index; i++)			edit->mb_rule =			    apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule);		} else {		    for (i = -1; i <= byte_index; i++)			edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule);		}		break;	    }	    if (byte_index >= edit->mb_marker->offset) {		edit->mb_rule = edit->mb_marker->rule;		if (option_utf_interpretation) {		    for (i = edit->mb_marker->offset + 1; i <= byte_index; i++)			edit->mb_rule =			    apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule);		} else {		    for (i = edit->mb_marker->offset + 1; i <= byte_index; i++)			edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule);		}		break;	    }	    s = edit->mb_marker->next;	    free (edit->mb_marker);	    edit->mb_marker = s;	}    }    edit->last_get_mb_rule = byte_index;    return edit->mb_rule;}long edit_get_wide_byte (WEdit * edit, long byte_index){    struct mb_rule r;    r = get_mb_rule (edit, byte_index);    return r.ch;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -