📄 charmap.c
字号:
/* $Id: charmap.c,v 1.27 2003/01/13 10:53:16 oleg Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra. If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*//* * Support module to handle character-conversions into and out of the * Zebra dictionary. */#include <ctype.h>#include <string.h>#include <assert.h>typedef unsigned ucs4_t;#include <yaz/yaz-util.h>#include <charmap.h>#define CHR_MAXSTR 1024#define CHR_MAXEQUIV 32const char *CHR_UNKNOWN = "\001";const char *CHR_SPACE = "\002";const char *CHR_BASE = "\003";struct chrmaptab_info{ chr_t_entry *input; /* mapping table for input data */ chr_t_entry *q_input; /* mapping table for queries */ unsigned char *output[256]; /* return mapping - for display of registers */ int base_uppercase; /* Start of upper-case ordinals */ NMEM nmem;};/* * Character map trie node. */struct chr_t_entry{ chr_t_entry **children; /* array of children */ unsigned char **target; /* target for this node, if any */};/* * General argument structure for callback functions (internal use only) */typedef struct chrwork { chrmaptab map; char string[CHR_MAXSTR+1];} chrwork;/* * Add an entry to the character map. */static chr_t_entry *set_map_string(chr_t_entry *root, NMEM nmem, const char *from, int len, char *to, const char *from_0){ if (!from_0) from_0 = from; if (!root) { root = (chr_t_entry *) nmem_malloc(nmem, sizeof(*root)); root->children = 0; root->target = 0; } if (!len) { if (!root->target || !root->target[0] || strcmp(root->target[0], to)) { if (from_0 && root->target && root->target[0] && root->target[0][0] && strcmp (root->target[0], CHR_UNKNOWN)) { yaz_log (LOG_WARN, "duplicate entry for charmap from '%s'", from_0); } root->target = (unsigned char **) nmem_malloc(nmem, sizeof(*root->target)*2); root->target[0] = (unsigned char *) nmem_strdup(nmem, to); root->target[1] = 0; } } else { if (!root->children) { int i; root->children = (chr_t_entry **) nmem_malloc(nmem, sizeof(chr_t_entry*) * 256); for (i = 0; i < 256; i++) root->children[i] = 0; } if (!(root->children[(unsigned char) *from] = set_map_string(root->children[(unsigned char) *from], nmem, from + 1, len - 1, to, from_0))) return 0; } return root;}static chr_t_entry *find_entry(chr_t_entry *t, const char **from, int len){ chr_t_entry *res; if (len && t->children && t->children[(unsigned char) **from]) { const char *pos = *from; (*from)++; if ((res = find_entry(t->children[(unsigned char) *pos], from, len - 1))) return res; /* no match */ *from = pos; } /* no children match. use ourselves, if we have a target */ return t->target ? t : 0;}static chr_t_entry *find_entry_x(chr_t_entry *t, const char **from, int *len){ chr_t_entry *res; while (*len <= 0) { /* switch to next buffer */ if (*len < 0) break; from++; len++; } if (*len > 0 && t->children && t->children[(unsigned char) **from]) { const char *old_from = *from; int old_len = *len; (*len)--; (*from)++; if ((res = find_entry_x(t->children[(unsigned char) *old_from], from, len))) return res; /* no match */ *len = old_len; *from = old_from; } /* no children match. use ourselves, if we have a target */ return t->target ? t : 0;}const char **chr_map_input_x(chrmaptab maptab, const char **from, int *len){ chr_t_entry *t = maptab->input; chr_t_entry *res; if (!(res = find_entry_x(t, from, len))) abort(); return (const char **) (res->target);}const char **chr_map_input(chrmaptab maptab, const char **from, int len){ chr_t_entry *t = maptab->input; chr_t_entry *res; int len_tmp[2]; len_tmp[0] = len; len_tmp[1] = -1; if (!(res = find_entry_x(t, from, len_tmp))) abort(); return (const char **) (res->target);}const char *chr_map_output(chrmaptab maptab, const char **from, int len){ unsigned char c = ** (unsigned char **) from; (*from)++; return (const char*) maptab->output[c];}unsigned char zebra_prim(char **s){ unsigned char c; unsigned int i = 0; yaz_log (LOG_DEBUG, "prim %.3s", *s); if (**s == '\\') { (*s)++; c = **s; switch (c) { case '\\': c = '\\'; (*s)++; break; case 'r': c = '\r'; (*s)++; break; case 'n': c = '\n'; (*s)++; break; case 't': c = '\t'; (*s)++; break; case 's': c = ' '; (*s)++; break; case 'x': sscanf(*s, "x%2x", &i); c = i; *s += 3; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': sscanf(*s, "%3o", &i); c = i; *s += 3; break; default: (*s)++; } } else { c = **s; ++(*s); } return c;}ucs4_t zebra_prim_w(ucs4_t **s){ ucs4_t c; ucs4_t i = 0; char fmtstr[8]; yaz_log (LOG_DEBUG, "prim %.3s", (char *) *s); if (**s == '\\') { (*s)++; c = **s; switch (c) { case '\\': c = '\\'; (*s)++; break; case 'r': c = '\r'; (*s)++; break; case 'n': c = '\n'; (*s)++; break; case 't': c = '\t'; (*s)++; break; case 's': c = ' '; (*s)++; break; case 'x': fmtstr[0] = (*s)[0]; fmtstr[1] = (*s)[1]; fmtstr[2] = (*s)[2]; fmtstr[3] = 0; sscanf(fmtstr, "x%2x", &i); c = i; *s += 3; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fmtstr[0] = (*s)[0]; fmtstr[1] = (*s)[1]; fmtstr[2] = (*s)[2]; fmtstr[3] = 0; sscanf(fmtstr, "%3o", &i); c = i; *s += 3; break; default: (*s)++; } } else { c = **s; ++(*s); } yaz_log (LOG_DEBUG, "out %d", c); return c;}/* * Callback function. * Add an entry to the value space. */static void fun_addentry(const char *s, void *data, int num){ chrmaptab tab = (chrmaptab) data; char tmp[2]; tmp[0] = num; tmp[1] = '\0'; tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s), tmp, 0); tab->output[num + tab->base_uppercase] = (unsigned char *) nmem_strdup(tab->nmem, s);}/* * Callback function. * Add a space-entry to the value space. */static void fun_addspace(const char *s, void *data, int num){ chrmaptab tab = (chrmaptab) data; tab->input = set_map_string(tab->input, tab->nmem, s, strlen(s), (char*) CHR_SPACE, 0);}/* * Create a string containing the mapped characters provided. */static void fun_mkstring(const char *s, void *data, int num){ chrwork *arg = (chrwork *) data; const char **res, *p = s; res = chr_map_input(arg->map, &s, strlen(s)); if (*res == (char*) CHR_UNKNOWN) logf(LOG_WARN, "Map: '%s' has no mapping", p); strncat(arg->string, *res, CHR_MAXSTR - strlen(arg->string)); arg->string[CHR_MAXSTR] = '\0';}/* * Add a map to the string contained in the argument.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -