📄 charmap.c
字号:
*/static void fun_add_map(const char *s, void *data, int num){ chrwork *arg = (chrwork *) data; assert(arg->map->input); logf (LOG_DEBUG, "set map %.*s", (int) strlen(s), s); set_map_string(arg->map->input, arg->map->nmem, s, strlen(s), arg->string, 0); for (s = arg->string; *s; s++) logf (LOG_DEBUG, " %3d", (unsigned char) *s);}/* * Add a query map to the string contained in the argument. */static void fun_add_qmap(const char *s, void *data, int num){ chrwork *arg = (chrwork *) data; assert(arg->map->q_input); logf (LOG_DEBUG, "set qmap %.*s", (int) strlen(s), s); set_map_string(arg->map->q_input, arg->map->nmem, s, strlen(s), arg->string, 0); for (s = arg->string; *s; s++) logf (LOG_DEBUG, " %3d", (unsigned char) *s);}static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft){ size_t inbytesleft = inlen * sizeof(ucs4_t); char *inbuf = (char*) from; size_t ret; if (t == 0) *outbuf++ = *from; /* ISO-8859-1 is OK here */ else { ret = yaz_iconv (t, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (ret == (size_t) (-1)) { yaz_log (LOG_WARN|LOG_ERRNO, "bad unicode sequence"); return -1; } } *outbuf = '\0'; return 0;}static int scan_string(char *s_native, yaz_iconv_t t_unicode, yaz_iconv_t t_utf8, void (*fun)(const char *c, void *data, int num), void *data, int *num){ char str[1024]; ucs4_t arg[512]; ucs4_t *s0, *s = arg; ucs4_t c, begin, end; size_t i; if (t_unicode != 0) { char *outbuf = (char *) arg; char *inbuf = s_native; size_t outbytesleft = sizeof(arg)-4; size_t inbytesleft = strlen(s_native); size_t ret; ret = yaz_iconv(t_unicode, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (ret == (size_t)(-1)) return -1; i = (outbuf - (char*) arg)/sizeof(ucs4_t); } else { for (i = 0; s_native[i]; i++) arg[i] = s_native[i] & 255; /* ISO-8859-1 conversion */ } arg[i] = 0; /* terminate */ if (s[0] == 0xfeff || s[0] == 0xfeff) /* skip byte Order Mark */ s++; while (*s) { switch (*s) { case '{': s++; begin = zebra_prim_w(&s); if (*s != '-') { logf(LOG_FATAL, "Bad range in char-map"); return -1; } s++; end = zebra_prim_w(&s); if (end <= begin) { logf(LOG_FATAL, "Bad range in char-map"); return -1; } s++; for (c = begin; c <= end; c++) { if (scan_to_utf8 (t_utf8, &c, 1, str, sizeof(str)-1)) return -1; (*fun)(str, data, num ? (*num)++ : 0); } break; case '[': s++; abort(); break; case '(': ++s; s0 = s; while (*s != ')' || s[-1] == '\\') s++; *s = 0; if (scan_to_utf8 (t_utf8, s0, s - s0, str, sizeof(str)-1)) return -1; (*fun)(str, data, num ? (*num)++ : 0); s++; break; default: c = zebra_prim_w(&s); if (scan_to_utf8 (t_utf8, &c, 1, str, sizeof(str)-1)) return -1; (*fun)(str, data, num ? (*num)++ : 0); } } return 0;}chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, const char *tabroot){ FILE *f; char line[512], *argv[50]; chrmaptab res; int lineno = 0; int errors = 0; int argc, num = (int) *CHR_BASE, i; NMEM nmem; yaz_iconv_t t_unicode = 0; yaz_iconv_t t_utf8 = 0; unsigned endian = 31; const char *ucs4_native = "UCS-4"; if (*(char*) &endian == 31) /* little endian? */ ucs4_native = "UCS-4LE"; t_utf8 = yaz_iconv_open ("UTF-8", ucs4_native); logf (LOG_DEBUG, "maptab %s open", name); if (!(f = yaz_fopen(tabpath, name, "r", tabroot))) { logf(LOG_WARN|LOG_ERRNO, "%s", name); return 0; } nmem = nmem_create (); res = (chrmaptab) nmem_malloc(nmem, sizeof(*res)); res->nmem = nmem; res->input = (chr_t_entry *) nmem_malloc(res->nmem, sizeof(*res->input)); res->input->target = (unsigned char **) nmem_malloc(res->nmem, sizeof(*res->input->target) * 2); res->input->target[0] = (unsigned char*) CHR_UNKNOWN; res->input->target[1] = 0; res->input->children = (chr_t_entry **) nmem_malloc(res->nmem, sizeof(res->input) * 256); for (i = 0; i < 256; i++) { res->input->children[i] = (chr_t_entry *) nmem_malloc(res->nmem, sizeof(*res->input)); res->input->children[i]->children = 0; res->input->children[i]->target = (unsigned char **) nmem_malloc (res->nmem, 2 * sizeof(unsigned char *)); res->input->children[i]->target[1] = 0; if (map_only) { res->input->children[i]->target[0] = (unsigned char *) nmem_malloc (res->nmem, 2 * sizeof(unsigned char)); res->input->children[i]->target[0][0] = i; res->input->children[i]->target[0][1] = 0; } else res->input->children[i]->target[0] = (unsigned char*) CHR_UNKNOWN; } res->q_input = (chr_t_entry *) nmem_malloc(res->nmem, sizeof(*res->q_input)); res->q_input->target = 0; res->q_input->children = 0; for (i = *CHR_BASE; i < 256; i++) res->output[i] = 0; res->output[(int) *CHR_SPACE] = (unsigned char *) " "; res->output[(int) *CHR_UNKNOWN] = (unsigned char*) "@"; res->base_uppercase = 0; while (!errors && (argc = readconf_line(f, &lineno, line, 512, argv, 50))) if (!map_only && !yaz_matchstr(argv[0], "lowercase")) { if (argc != 2) { logf(LOG_FATAL, "Syntax error in charmap"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry, res, &num) < 0) { logf(LOG_FATAL, "Bad value-set specification"); ++errors; } res->base_uppercase = num; res->output[(int) *CHR_SPACE + num] = (unsigned char *) " "; res->output[(int) *CHR_UNKNOWN + num] = (unsigned char*) "@"; num = (int) *CHR_BASE; } else if (!map_only && !yaz_matchstr(argv[0], "uppercase")) { if (!res->base_uppercase) { logf(LOG_FATAL, "Uppercase directive with no lowercase set"); ++errors; } if (argc != 2) { logf(LOG_FATAL, "Missing arg for uppercase directive"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addentry, res, &num) < 0) { logf(LOG_FATAL, "Bad value-set specification"); ++errors; } } else if (!map_only && !yaz_matchstr(argv[0], "space")) { if (argc != 2) { logf(LOG_FATAL, "Syntax error in charmap"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_addspace, res, 0) < 0) { logf(LOG_FATAL, "Bad space specification"); ++errors; } } else if (!yaz_matchstr(argv[0], "map")) { chrwork buf; if (argc != 3) { logf(LOG_FATAL, "charmap directive map requires 2 args"); ++errors; } buf.map = res; buf.string[0] = '\0'; if (scan_string(argv[2], t_unicode, t_utf8, fun_mkstring, &buf, 0) < 0) { logf(LOG_FATAL, "Bad map target"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_add_map, &buf, 0) < 0) { logf(LOG_FATAL, "Bad map source"); ++errors; } } else if (!yaz_matchstr(argv[0], "qmap")) { chrwork buf; if (argc != 3) { logf(LOG_FATAL, "charmap directive qmap requires 2 args"); ++errors; } buf.map = res; buf.string[0] = '\0'; if (scan_string(argv[2], t_unicode, t_utf8, fun_mkstring, &buf, 0) < 0) { logf(LOG_FATAL, "Bad qmap target"); ++errors; } if (scan_string(argv[1], t_unicode, t_utf8, fun_add_qmap, &buf, 0) < 0) { logf(LOG_FATAL, "Bad qmap source"); ++errors; } } else if (!yaz_matchstr(argv[0], "encoding")) { /* * Fix me. When t_unicode==0 and use encoding directive in *.chr file the beheviour of the * zebra need to comment next part of code. */ /* if (t_unicode != 0) yaz_iconv_close (t_unicode); t_unicode = yaz_iconv_open (ucs4_native, argv[1]); */ /* * Fix me. It is additional staff for conversion of characters from local encoding * of *.chr file to UTF-8 (internal encoding). * NOTE: The derective encoding must be first directive in *.chr file. */ if (t_utf8 != 0) yaz_iconv_close(t_utf8); t_utf8 = yaz_iconv_open ("UTF-8", argv[1]); } else { logf(LOG_WARN, "Syntax error at '%s' in %s", line, name); } yaz_fclose(f); if (errors) { chrmaptab_destroy(res); res = 0; } logf (LOG_DEBUG, "maptab %s close %d errors", name, errors); if (t_utf8 != 0) yaz_iconv_close(t_utf8); if (t_unicode != 0) yaz_iconv_close(t_unicode); return res;}void chrmaptab_destroy(chrmaptab tab){ if (tab) nmem_destroy (tab->nmem);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -