📄 mbfilter.c
字号:
filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2); if (filter1 == NULL) { mbfl_convert_filter_delete(filter2); } } } if (filter1 == NULL) { return NULL; } if (filter2 != NULL) { filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; filter2->illegal_substchar = 0x3f; /* '?' */ } mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8); /* feed data */ n = string->len; p = string->val; if (p != NULL) { while (n > 0) { if ((*filter1->filter_function)(*p++, filter1) < 0) { break; } n--; } } mbfl_convert_filter_flush(filter1); mbfl_convert_filter_delete(filter1); if (filter2 != NULL) { mbfl_convert_filter_flush(filter2); mbfl_convert_filter_delete(filter2); } return mbfl_memory_device_result(&device, result);}/* * identify encoding */const mbfl_encoding *mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict){ int i, n, num, bad; unsigned char *p; mbfl_identify_filter *flist, *filter; const mbfl_encoding *encoding; /* flist is an array of mbfl_identify_filter instances */ flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter)); if (flist == NULL) { return NULL; } num = 0; if (elist != NULL) { for (i = 0; i < elistsz; i++) { if (!mbfl_identify_filter_init(&flist[num], elist[i])) { num++; } } } /* feed data */ n = string->len; p = string->val; if (p != NULL) { bad = 0; while (n > 0) { for (i = 0; i < num; i++) { filter = &flist[i]; if (!filter->flag) { (*filter->filter_function)(*p, filter); if (filter->flag) { bad++; } } } if ((num - 1) <= bad && !strict) { break; } p++; n--; } } /* judge */ encoding = NULL; for (i = 0; i < num; i++) { filter = &flist[i]; if (!filter->flag) { if (strict && filter->status) { continue; } encoding = filter->encoding; break; } } /* fall-back judge */ if (!encoding) { for (i = 0; i < num; i++) { filter = &flist[i]; if (!filter->flag) { encoding = filter->encoding; break; } } } /* cleanup */ /* dtors should be called in reverse order */ i = num; while (--i >= 0) { mbfl_identify_filter_cleanup(&flist[i]); } mbfl_free((void *)flist); return encoding;}const char*mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict){ const mbfl_encoding *encoding; encoding = mbfl_identify_encoding(string, elist, elistsz, strict); if (encoding != NULL && encoding->no_encoding > mbfl_no_encoding_charset_min && encoding->no_encoding < mbfl_no_encoding_charset_max) { return encoding->name; } else { return NULL; }}enum mbfl_no_encodingmbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict){ const mbfl_encoding *encoding; encoding = mbfl_identify_encoding(string, elist, elistsz, strict); if (encoding != NULL && encoding->no_encoding > mbfl_no_encoding_charset_min && encoding->no_encoding < mbfl_no_encoding_charset_max) { return encoding->no_encoding; } else { return mbfl_no_encoding_invalid; }}/* * strlen */static intfilter_count_output(int c, void *data){ (*(int *)data)++; return c;}intmbfl_strlen(mbfl_string *string){ int len, n, m, k; unsigned char *p; const unsigned char *mbtab; const mbfl_encoding *encoding; encoding = mbfl_no2encoding(string->no_encoding); if (encoding == NULL || string == NULL) { return -1; } len = 0; if (encoding->flag & MBFL_ENCTYPE_SBCS) { len = string->len; } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { len = string->len/2; } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { len = string->len/4; } else if (encoding->mblen_table != NULL) { mbtab = encoding->mblen_table; n = 0; p = string->val; k = string->len; /* count */ if (p != NULL) { while (n < k) { m = mbtab[*p]; n += m; p += m; len++; }; } } else { /* wchar filter */ mbfl_convert_filter *filter = mbfl_convert_filter_new( string->no_encoding, mbfl_no_encoding_wchar, filter_count_output, 0, &len); if (filter == NULL) { return -1; } /* count */ n = string->len; p = string->val; if (p != NULL) { while (n > 0) { (*filter->filter_function)(*p++, filter); n--; } } mbfl_convert_filter_delete(filter); } return len;} /* * strpos */struct collector_strpos_data { mbfl_convert_filter *next_filter; mbfl_wchar_device needle; int needle_len; int start; int output; int found_pos; int needle_pos; int matched_pos;};static intcollector_strpos(int c, void* data){ int *p, *h, *m, n; struct collector_strpos_data *pc = (struct collector_strpos_data*)data; if (pc->output >= pc->start) { if (c == (int)pc->needle.buffer[pc->needle_pos]) { if (pc->needle_pos == 0) { pc->found_pos = pc->output; /* found position */ } pc->needle_pos++; /* needle pointer */ if (pc->needle_pos >= pc->needle_len) { pc->matched_pos = pc->found_pos; /* matched position */ pc->needle_pos--; goto retry; } } else if (pc->needle_pos != 0) {retry: h = (int *)pc->needle.buffer; h++; for (;;) { pc->found_pos++; p = h; m = pc->needle.buffer; n = pc->needle_pos - 1; while (n > 0 && *p == *m) { n--; p++; m++; } if (n <= 0) { if (*m != c) { pc->needle_pos = 0; } break; } else { h++; pc->needle_pos--; } } } } pc->output++; return c;}/* * oddlen */int mbfl_oddlen(mbfl_string *string){ int len, n, m, k; unsigned char *p; const unsigned char *mbtab; const mbfl_encoding *encoding; if (string == NULL) { return -1; } encoding = mbfl_no2encoding(string->no_encoding); if (encoding == NULL) { return -1; } len = 0; if (encoding->flag & MBFL_ENCTYPE_SBCS) { return 0; } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { return len % 2; } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { return len % 4; } else if (encoding->mblen_table != NULL) { mbtab = encoding->mblen_table; n = 0; p = string->val; k = string->len; /* count */ if (p != NULL) { while (n < k) { m = mbtab[*p]; n += m; p += m; }; } return n-k; } else { /* how can i do ? */ return 0; } /* NOT REACHED */}intmbfl_strpos( mbfl_string *haystack, mbfl_string *needle, int offset, int reverse){ int n, result; unsigned char *p; mbfl_convert_filter *filter; struct collector_strpos_data pc; if (haystack == NULL || needle == NULL) { return -8; } /* needle is converted into wchar */ mbfl_wchar_device_init(&pc.needle); filter = mbfl_convert_filter_new( needle->no_encoding, mbfl_no_encoding_wchar, mbfl_wchar_device_output, 0, &pc.needle); if (filter == NULL) { return -4; } p = needle->val; n = needle->len; if (p != NULL) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { break; } n--; } } mbfl_convert_filter_flush(filter); mbfl_convert_filter_delete(filter); pc.needle_len = pc.needle.pos; if (pc.needle.buffer == NULL) { return -4; } if (pc.needle_len <= 0) { mbfl_wchar_device_clear(&pc.needle); return -2; } /* initialize filter and collector data */ filter = mbfl_convert_filter_new( haystack->no_encoding, mbfl_no_encoding_wchar, collector_strpos, 0, &pc); if (filter == NULL) { mbfl_wchar_device_clear(&pc.needle); return -4; } pc.start = offset; pc.output = 0; pc.needle_pos = 0; pc.found_pos = 0; pc.matched_pos = -1; /* feed data */ p = haystack->val; n = haystack->len; if (p != NULL) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { pc.matched_pos = -4; break; } if (pc.matched_pos >= 0 && !reverse) { break; } n--; } } mbfl_convert_filter_flush(filter); result = pc.matched_pos; mbfl_convert_filter_delete(filter); mbfl_wchar_device_clear(&pc.needle); return result;}/* * substr_count */intmbfl_substr_count( mbfl_string *haystack, mbfl_string *needle ){ int n, result = 0; unsigned char *p; mbfl_convert_filter *filter; struct collector_strpos_data pc; if (haystack == NULL || needle == NULL) { return -8; } /* needle is converted into wchar */ mbfl_wchar_device_init(&pc.needle); filter = mbfl_convert_filter_new( needle->no_encoding, mbfl_no_encoding_wchar, mbfl_wchar_device_output, 0, &pc.needle); if (filter == NULL) { return -4; } p = needle->val; n = needle->len; if (p != NULL) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { break; } n--; } } mbfl_convert_filter_flush(filter); mbfl_convert_filter_delete(filter); pc.needle_len = pc.needle.pos; if (pc.needle.buffer == NULL) { return -4; } if (pc.needle_len <= 0) { mbfl_wchar_device_clear(&pc.needle); return -2; } /* initialize filter and collector data */ filter = mbfl_convert_filter_new( haystack->no_encoding, mbfl_no_encoding_wchar, collector_strpos, 0, &pc); if (filter == NULL) { mbfl_wchar_device_clear(&pc.needle); return -4; } pc.start = 0; pc.output = 0; pc.needle_pos = 0; pc.found_pos = 0; pc.matched_pos = -1; /* feed data */ p = haystack->val; n = haystack->len; if (p != NULL) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { pc.matched_pos = -4; break; } if (pc.matched_pos >= 0) { ++result; pc.matched_pos = -1; } n--; } } mbfl_convert_filter_flush(filter); mbfl_convert_filter_delete(filter); mbfl_wchar_device_clear(&pc.needle); return result;}/* * substr */struct collector_substr_data { mbfl_convert_filter *next_filter; int start; int stop; int output;};static intcollector_substr(int c, void* data){ struct collector_substr_data *pc = (struct collector_substr_data*)data;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -