📄 ucdata.c
字号:
/* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucdata.c,v 1.30.2.4 2007/01/02 21:43:51 kurt Exp $ *//* This work is part of OpenLDAP Software <http://www.openldap.org/>. * * Copyright 1998-2007 The OpenLDAP Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP * Public License. * * A copy of this license is available in file LICENSE in the * top-level directory of the distribution or, alternatively, at * <http://www.OpenLDAP.org/license.html>. *//* Copyright 2001 Computing Research Labs, New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */#include "portable.h"#include "ldap_config.h"#include <stdio.h>#include <ac/stdlib.h>#include <ac/string.h>#include <ac/unistd.h>#include <ac/bytes.h>#include "lber_pvt.h"#include "ucdata.h"#ifndef HARDCODE_DATA#define HARDCODE_DATA 1#endif#if HARDCODE_DATA#include "uctable.h"#endif/************************************************************************** * * Miscellaneous types, data, and support functions. * **************************************************************************/typedef struct { ac_uint2 bom; ac_uint2 cnt; union { ac_uint4 bytes; ac_uint2 len[2]; } size;} _ucheader_t;/* * A simple array of 32-bit masks for lookup. */static ac_uint4 masks32[32] = { 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL, 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL};#define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))#define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))#if !HARDCODE_DATAstatic FILE *_ucopenfile(char *paths, char *filename, char *mode){ FILE *f; char *fp, *dp, *pp, path[BUFSIZ]; if (filename == 0 || *filename == 0) return 0; dp = paths; while (dp && *dp) { pp = path; while (*dp && *dp != ':') *pp++ = *dp++; *pp++ = *LDAP_DIRSEP; fp = filename; while (*fp) *pp++ = *fp++; *pp = 0; if ((f = fopen(path, mode)) != 0) return f; if (*dp == ':') dp++; } return 0;}#endif/************************************************************************** * * Support for the character properties. * **************************************************************************/#if !HARDCODE_DATAstatic ac_uint4 _ucprop_size;static ac_uint2 *_ucprop_offsets;static ac_uint4 *_ucprop_ranges;/* * Return -1 on error, 0 if okay */static int_ucprop_load(char *paths, int reload){ FILE *in; ac_uint4 size, i; _ucheader_t hdr; if (_ucprop_size > 0) { if (!reload) /* * The character properties have already been loaded. */ return 0; /* * Unload the current character property data in preparation for * loading a new copy. Only the first array has to be deallocated * because all the memory for the arrays is allocated as a single * block. */ free((char *) _ucprop_offsets); _ucprop_size = 0; } if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.bytes = endian_long(hdr.size.bytes); } if ((_ucprop_size = hdr.cnt) == 0) { fclose(in); return -1; } /* * Allocate all the storage needed for the lookup table. */ _ucprop_offsets = (ac_uint2 *) malloc(hdr.size.bytes); /* * Calculate the offset into the storage for the ranges. The offsets * array is on a 4-byte boundary and one larger than the value provided in * the header count field. This means the offset to the ranges must be * calculated after aligning the count to a 4-byte boundary. */ if ((size = ((hdr.cnt + 1) * sizeof(ac_uint2))) & 3) size += 4 - (size & 3); size >>= 1; _ucprop_ranges = (ac_uint4 *) (_ucprop_offsets + size); /* * Load the offset array. */ fread((char *) _ucprop_offsets, sizeof(ac_uint2), size, in); /* * Do an endian swap if necessary. Don't forget there is an extra node on * the end with the final index. */ if (hdr.bom == 0xfffe) { for (i = 0; i <= _ucprop_size; i++) _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]); } /* * Load the ranges. The number of elements is in the last array position * of the offsets. */ fread((char *) _ucprop_ranges, sizeof(ac_uint4), _ucprop_offsets[_ucprop_size], in); fclose(in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); } return 0;}static void_ucprop_unload(void){ if (_ucprop_size == 0) return; /* * Only need to free the offsets because the memory is allocated as a * single block. */ free((char *) _ucprop_offsets); _ucprop_size = 0;}#endifstatic int_ucprop_lookup(ac_uint4 code, ac_uint4 n){ long l, r, m; if (_ucprop_size == 0) return 0; /* * There is an extra node on the end of the offsets to allow this routine * to work right. If the index is 0xffff, then there are no nodes for the * property. */ if ((l = _ucprop_offsets[n]) == 0xffff) return 0; /* * Locate the next offset that is not 0xffff. The sentinel at the end of * the array is the max index value. */ for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; r = _ucprop_offsets[n + m] - 1; while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a range pair. */ m = (l + r) >> 1; m -= (m & 1); if (code > _ucprop_ranges[m + 1]) l = m + 2; else if (code < _ucprop_ranges[m]) r = m - 2; else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) return 1; } return 0;}intucisprop(ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2){ ac_uint4 i; if (mask1 == 0 && mask2 == 0) return 0; for (i = 0; mask1 && i < 32; i++) { if ((mask1 & masks32[i]) && _ucprop_lookup(code, i)) return 1; } for (i = 32; mask2 && i < _ucprop_size; i++) { if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i)) return 1; } return 0;}/************************************************************************** * * Support for case mapping. * **************************************************************************/#if !HARDCODE_DATA/* These record the number of slots in the map. * There are 3 words per slot. */static ac_uint4 _uccase_size;static ac_uint2 _uccase_len[2];static ac_uint4 *_uccase_map;/* * Return -1 on error, 0 if okay */static int_uccase_load(char *paths, int reload){ FILE *in; ac_uint4 i; _ucheader_t hdr; if (_uccase_size > 0) { if (!reload) /* * The case mappings have already been loaded. */ return 0; free((char *) _uccase_map); _uccase_size = 0; } if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) return -1; /* * Load the header. */ fread((char *) &hdr, sizeof(_ucheader_t), 1, in); if (hdr.bom == 0xfffe) { hdr.cnt = endian_short(hdr.cnt); hdr.size.len[0] = endian_short(hdr.size.len[0]); hdr.size.len[1] = endian_short(hdr.size.len[1]); } /* * Set the node count and lengths of the upper and lower case mapping * tables. */ _uccase_size = hdr.cnt; _uccase_len[0] = hdr.size.len[0]; _uccase_len[1] = hdr.size.len[1]; _uccase_map = (ac_uint4 *) malloc(_uccase_size * 3 * sizeof(ac_uint4)); /* * Load the case mapping table. */ fread((char *) _uccase_map, sizeof(ac_uint4), _uccase_size * 3, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < _uccase_size * 3; i++) _uccase_map[i] = endian_long(_uccase_map[i]); } fclose(in); return 0;}static void_uccase_unload(void){ if (_uccase_size == 0) return; free((char *) _uccase_map); _uccase_size = 0;}#endifstatic ac_uint4_uccase_lookup(ac_uint4 code, long l, long r, int field){ long m; ac_uint4 *tmp; /* * Do the binary search. */ while (l <= r) { /* * Determine a "mid" point and adjust to make sure the mid point is at * the beginning of a case mapping triple. */ m = (l + r) >> 1; tmp = &_uccase_map[m*3]; if (code > *tmp) l = m + 1; else if (code < *tmp) r = m - 1; else if (code == *tmp) return tmp[field]; } return code;}ac_uint4uctoupper(ac_uint4 code){ int field; long l, r; if (ucisupper(code)) return code; if (ucislower(code)) { /* * The character is lower case. */ field = 2; l = _uccase_len[0]; r = (l + _uccase_len[1]) - 1; } else { /* * The character is title case. */ field = 1; l = _uccase_len[0] + _uccase_len[1]; r = _uccase_size - 1; } return _uccase_lookup(code, l, r, field);}ac_uint4uctolower(ac_uint4 code){ int field; long l, r; if (ucislower(code)) return code; if (ucisupper(code)) { /* * The character is upper case. */ field = 1; l = 0; r = _uccase_len[0] - 1; } else { /* * The character is title case. */ field = 2; l = _uccase_len[0] + _uccase_len[1]; r = _uccase_size - 1; } return _uccase_lookup(code, l, r, field);}ac_uint4uctotitle(ac_uint4 code){ int field; long l, r; if (ucistitle(code)) return code; /* * The offset will always be the same for converting to title case. */ field = 2; if (ucisupper(code)) { /* * The character is upper case. */ l = 0; r = _uccase_len[0] - 1; } else { /* * The character is lower case. */ l = _uccase_len[0];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -