📄 widechar.c
字号:
/* widechar.c - handle multibyte and UTF-8 encoding Copyright (C) 1996-2000 Paul Sheer This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */#include <config.h>#include <edit.h>#define MB_MARKER_DENSITY 64/* 1 | 7 | 0vvvvvvv 2 | 11 | 110vvvvv 10vvvvvv 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv*//* force utf-8 only multibyte encoding - i.e. ignore locale settings */int option_utf_interpretation = 0;unsigned char *wcrtomb_ucs4_to_utf8 (wchar_t c){ static unsigned char r[32]; int i = 0;#undef APPEND#define APPEND(x) r[i++] = (unsigned char) (x) if (c < (1 << 7)) { APPEND (c); } else if (c < (1 << 11)) { APPEND ((c >> 6) | 0xC0); APPEND ((c & 0x3F) | 0x80); } else if (c < (1 << 16)) { APPEND ((c >> 12) | 0xE0); APPEND (((c >> 6) & 0x3F) | 0x80); APPEND ((c & 0x3F) | 0x80); } else if (c < (1 << 21)) { APPEND ((c >> 18) | 0xE0); APPEND (((c >> 12) & 0x3F) | 0x80); APPEND (((c >> 6) & 0x3F) | 0x80); APPEND ((c & 0x3F) | 0x80); } APPEND ('\0'); return r;}/* makes sense to me... (although only goes to 21 bits) */static inline int mbrtowc_utf8_to_ucs4 (wchar_t * c, char *t, int n, void *x /* no shifting with utf8 */ ){ unsigned char *s = (unsigned char *) t; if (!*s) { *c = 0; return 0; } if (*s < 0x80) { *c = (wchar_t) * s; return 1; } if (*s < 0xC0) return -1; if (*s < 0xE0) { if (n < 2) return -2; if ((s[1] & 0xC0) != 0x80) return -1; *c = ((wchar_t) (s[0] & 0x1F) << 6) | (wchar_t) (s[1] & 0x3F); if (*c < (1 << 7)) return -1; return 2; } if (*s < 0xF0) { if (n < 3) return -2; if ((s[1] & 0xC0) != 0x80) return -1; if ((s[2] & 0xC0) != 0x80) return -1; *c = ((wchar_t) (s[0] & 0x0F) << 12) | ((wchar_t) (s[1] & 0x3F) << 6) | (wchar_t) (s[2] & 0x3F); if (*c < (1 << 11)) return -1; return 3; } if (*s < 0xF8) { if (n < 4) return -2; if ((s[1] & 0xC0) != 0x80) return -1; if ((s[2] & 0xC0) != 0x80) return -1; if ((s[3] & 0xC0) != 0x80) return -1; *c = ((wchar_t) (s[0] & 0x07) << 18) | ((wchar_t) (s[1] & 0x3F) << 12) | ((wchar_t) (s[2] & 0x3F) << 6) | (wchar_t) (s[3] & 0x3F); if (*c < (1 << 16)) return -1; return 4; } if (*s < 0xFC) { if (n < 5) return -2; if ((s[1] & 0xC0) != 0x80) return -1; if ((s[2] & 0xC0) != 0x80) return -1; if ((s[3] & 0xC0) != 0x80) return -1; if ((s[4] & 0xC0) != 0x80) return -1; *c = ((wchar_t) (s[0] & 0x03) << 24) | ((wchar_t) (s[1] & 0x3F) << 18) | ((wchar_t) (s[2] & 0x3F) << 12) | ((wchar_t) (s[3] & 0x3F) << 6) | (wchar_t) (s[4] & 0x3F); if (*c < (1 << 21)) return -1; return 5; } if (*s < 0xFE) { if (n < 6) return -2; if ((s[1] & 0xC0) != 0x80) return -1; if ((s[2] & 0xC0) != 0x80) return -1; if ((s[3] & 0xC0) != 0x80) return -1; if ((s[4] & 0xC0) != 0x80) return -1; if ((s[5] & 0xC0) != 0x80) return -1; *c = ((wchar_t) (s[0] & 0x01) << 30) | ((wchar_t) (s[1] & 0x3F) << 24) | ((wchar_t) (s[2] & 0x3F) << 18) | ((wchar_t) (s[3] & 0x3F) << 12) | ((wchar_t) (s[4] & 0x3F) << 6) | (wchar_t) (s[5] & 0x3F); if (*c < (1 << 26)) return -1; return 6; } return -1;}#if 0/* last arg is len of t to convert NOT len of c */int mbstowcs_utf8_to_ucs4 (wchar_t * c, char *t, int n){ int v = 0; while (n) { int r; if ((r = mbrtowc_utf8_to_ucs4 (c, t, n, 0)) == -1) { *c++ = *t++; v++; n--; } else if (r == -2) { break; } else { t += r; n -= r; v++; c++; } } return v;}wchar_t *mbstowcs_dup (unsigned char *s){ wchar_t *t; t = CMalloc ((strlen ((char *) s) + 1) * sizeof (wchar_t)); t[mbstowcs_utf8_to_ucs4 (t, (char *) s, strlen ((char *) s))] = 0; return t;}int wchar_t_strlen (wchar_t * p){ int v; for (v = 0; *p; p++, v++); return v;}#endifstatic inline struct mb_rule apply_mb_rules_going_right_utf8_to_ucs4 (WEdit * edit, long byte_index, struct mb_rule mb_rule){ wchar_t wc; unsigned char p[16]; int n; if (mb_rule.end) { mb_rule.end--; mb_rule.ch = -1; return mb_rule; } for (n = 0; n < 6; n++) { int r; p[n] = edit_get_byte (edit, byte_index + n); r = mbrtowc_utf8_to_ucs4 (&wc, (char *) p, n + 1, &mb_rule.shift_state); if (r >= 0) { mb_rule.end = n; mb_rule.ch = wc; return mb_rule; } if (r == -1) { mb_rule.end = 0; mb_rule.ch = (unsigned long) *p | 0x80000000; return mb_rule; } } mb_rule.end = 0; mb_rule.ch = -1; return mb_rule;}static inline struct mb_rule apply_mb_rules_going_right (WEdit * edit, long byte_index, struct mb_rule mb_rule){#ifdef HAVE_WCHAR_H wchar_t wc; unsigned char p[16]; int n; if (mb_rule.end) { mb_rule.end--; mb_rule.ch = -1; return mb_rule; } for (n = 0; n < MB_CUR_MAX; n++) { int r; p[n] = edit_get_byte (edit, byte_index + n); r = mbrtowc (&wc, (char *) p, n + 1, &mb_rule.shift_state); if (r >= 0) { mb_rule.end = n; mb_rule.ch = wc; return mb_rule; } if (r == -1) { mb_rule.end = 0; mb_rule.ch = *p; return mb_rule; } } mb_rule.end = 0; mb_rule.ch = -1;#endif return mb_rule;}struct mb_rule get_mb_rule (WEdit * edit, long byte_index){ long i; if (#ifndef HAVE_WCHAR_H !option_utf_interpretation ||#endif (MB_CUR_MAX == 1 && !option_utf_interpretation)) { struct mb_rule r; r.end = 0; r.ch = edit_get_byte (edit, byte_index); return r; } if (edit->mb_invalidate) { struct _mb_marker *s; while (edit->mb_marker && edit->mb_marker->offset >= edit->last_get_mb_rule) { s = edit->mb_marker->next; free (edit->mb_marker); edit->mb_marker = s; } if (edit->mb_marker) { edit->last_get_mb_rule = edit->mb_marker->offset; edit->mb_rule = edit->mb_marker->rule; } else { edit->last_get_mb_rule = -1; memset (&edit->mb_rule, 0, sizeof (edit->mb_rule)); } edit->mb_invalidate = 0; } if (byte_index > edit->last_get_mb_rule) { if (option_utf_interpretation) { for (i = edit->last_get_mb_rule + 1; i <= byte_index; i++) { edit->mb_rule = apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule); if (i > (edit->mb_marker ? edit->mb_marker->offset + MB_MARKER_DENSITY : MB_MARKER_DENSITY)) { struct _mb_marker *s; s = edit->mb_marker; edit->mb_marker = malloc (sizeof (struct _mb_marker)); edit->mb_marker->next = s; edit->mb_marker->offset = i; edit->mb_marker->rule = edit->mb_rule; } } } else { for (i = edit->last_get_mb_rule + 1; i <= byte_index; i++) { edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule); if (i > (edit->mb_marker ? edit->mb_marker->offset + MB_MARKER_DENSITY : MB_MARKER_DENSITY)) { struct _mb_marker *s; s = edit->mb_marker; edit->mb_marker = malloc (sizeof (struct _mb_marker)); edit->mb_marker->next = s; edit->mb_marker->offset = i; edit->mb_marker->rule = edit->mb_rule; } } } } else if (byte_index < edit->last_get_mb_rule) { struct _mb_marker *s; for (;;) { if (!edit->mb_marker) { memset (&edit->mb_rule, 0, sizeof (edit->mb_rule)); if (option_utf_interpretation) { for (i = -1; i <= byte_index; i++) edit->mb_rule = apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule); } else { for (i = -1; i <= byte_index; i++) edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule); } break; } if (byte_index >= edit->mb_marker->offset) { edit->mb_rule = edit->mb_marker->rule; if (option_utf_interpretation) { for (i = edit->mb_marker->offset + 1; i <= byte_index; i++) edit->mb_rule = apply_mb_rules_going_right_utf8_to_ucs4 (edit, i, edit->mb_rule); } else { for (i = edit->mb_marker->offset + 1; i <= byte_index; i++) edit->mb_rule = apply_mb_rules_going_right (edit, i, edit->mb_rule); } break; } s = edit->mb_marker->next; free (edit->mb_marker); edit->mb_marker = s; } } edit->last_get_mb_rule = byte_index; return edit->mb_rule;}long edit_get_wide_byte (WEdit * edit, long byte_index){ struct mb_rule r; r = get_mb_rule (edit, byte_index); return r.ch;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -