📄 ure.c
字号:
/* $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/ure.c,v 1.15.2.3 2007/01/02 21:43:51 kurt Exp $ *//* This work is part of OpenLDAP Software <http://www.openldap.org/>. * * Copyright 1998-2007 The OpenLDAP Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP * Public License. * * A copy of this license is available in file LICENSE in the * top-level directory of the distribution or, alternatively, at * <http://www.OpenLDAP.org/license.html>. *//* Copyright 1997, 1998, 1999 Computing Research Labs, * New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//* $Id: ure.c,v 1.2 1999/09/21 15:47:43 mleisher Exp $" */#include "portable.h"#include <ac/stdlib.h>#include <ac/string.h>#include <ac/unistd.h>#include "ure.h"/* * Flags used internally in the DFA. */#define _URE_DFA_CASEFOLD 0x01#define _URE_DFA_BLANKLINE 0x02static unsigned long cclass_flags[] = { 0, _URE_NONSPACING, _URE_COMBINING, _URE_NUMDIGIT, _URE_NUMOTHER, _URE_SPACESEP, _URE_LINESEP, _URE_PARASEP, _URE_CNTRL, _URE_PUA, _URE_UPPER, _URE_LOWER, _URE_TITLE, _URE_MODIFIER, _URE_OTHERLETTER, _URE_DASHPUNCT, _URE_OPENPUNCT, _URE_CLOSEPUNCT, _URE_OTHERPUNCT, _URE_MATHSYM, _URE_CURRENCYSYM, _URE_OTHERSYM, _URE_LTR, _URE_RTL, _URE_EURONUM, _URE_EURONUMSEP, _URE_EURONUMTERM, _URE_ARABNUM, _URE_COMMONSEP, _URE_BLOCKSEP, _URE_SEGMENTSEP, _URE_WHITESPACE, _URE_OTHERNEUT,};/* * Symbol types for the DFA. */#define _URE_ANY_CHAR 1#define _URE_CHAR 2#define _URE_CCLASS 3#define _URE_NCCLASS 4#define _URE_BOL_ANCHOR 5#define _URE_EOL_ANCHOR 6/* * Op codes for converting the NFA to a DFA. */#define _URE_SYMBOL 10#define _URE_PAREN 11#define _URE_QUEST 12#define _URE_STAR 13#define _URE_PLUS 14#define _URE_ONE 15#define _URE_AND 16#define _URE_OR 17#define _URE_NOOP 0xffff#define _URE_REGSTART 0x8000#define _URE_REGEND 0x4000/* * Structure used to handle a compacted range of characters. */typedef struct { ucs4_t min_code; ucs4_t max_code;} _ure_range_t;typedef struct { _ure_range_t *ranges; ucs2_t ranges_used; ucs2_t ranges_size;} _ure_ccl_t;typedef union { ucs4_t chr; _ure_ccl_t ccl;} _ure_sym_t;/* * This is a general element structure used for expressions and stack * elements. */typedef struct { ucs2_t reg; ucs2_t onstack; ucs2_t type; ucs2_t lhs; ucs2_t rhs;} _ure_elt_t;/* * This is a structure used to track a list or a stack of states. */typedef struct { ucs2_t *slist; ucs2_t slist_size; ucs2_t slist_used;} _ure_stlist_t;/* * Structure to track the list of unique states for a symbol * during reduction. */typedef struct { ucs2_t id; ucs2_t type; unsigned long mods; unsigned long props; _ure_sym_t sym; _ure_stlist_t states;} _ure_symtab_t;/* * Structure to hold a single state. */typedef struct { ucs2_t id; ucs2_t accepting; ucs2_t pad; _ure_stlist_t st; _ure_elt_t *trans; ucs2_t trans_size; ucs2_t trans_used;} _ure_state_t;/* * Structure used for keeping lists of states. */typedef struct { _ure_state_t *states; ucs2_t states_size; ucs2_t states_used;} _ure_statetable_t;/* * Structure to track pairs of DFA states when equivalent states are * merged. */typedef struct { ucs2_t l; ucs2_t r;} _ure_equiv_t;/* * Structure used for constructing the NFA and reducing to a minimal DFA. */typedef struct _ure_buffer_t { int reducing; int error; unsigned long flags; _ure_stlist_t stack; /* * Table of unique symbols encountered. */ _ure_symtab_t *symtab; ucs2_t symtab_size; ucs2_t symtab_used; /* * Tracks the unique expressions generated for the NFA and when the NFA is * reduced. */ _ure_elt_t *expr; ucs2_t expr_used; ucs2_t expr_size; /* * The reduced table of unique groups of NFA states. */ _ure_statetable_t states; /* * Tracks states when equivalent states are merged. */ _ure_equiv_t *equiv; ucs2_t equiv_used; ucs2_t equiv_size;} _ure_buffer_t;typedef struct { ucs2_t symbol; ucs2_t next_state;} _ure_trans_t;typedef struct { ucs2_t accepting; ucs2_t ntrans; _ure_trans_t *trans;} _ure_dstate_t;typedef struct _ure_dfa_t { unsigned long flags; _ure_symtab_t *syms; ucs2_t nsyms; _ure_dstate_t *states; ucs2_t nstates; _ure_trans_t *trans; ucs2_t ntrans;} _ure_dfa_t;/************************************************************************* * * Functions. * *************************************************************************/static void_ure_memmove(char *dest, char *src, unsigned long bytes){ long i, j; i = (long) bytes; j = i & 7; i = (i + 7) >> 3; /* * Do a memmove using Ye Olde Duff's Device for efficiency. */ if (src < dest) { src += bytes; dest += bytes; switch (j) { case 0: do { *--dest = *--src; case 7: *--dest = *--src; case 6: *--dest = *--src; case 5: *--dest = *--src; case 4: *--dest = *--src; case 3: *--dest = *--src; case 2: *--dest = *--src; case 1: *--dest = *--src; } while (--i > 0); } } else if (src > dest) { switch (j) { case 0: do { *dest++ = *src++; case 7: *dest++ = *src++; case 6: *dest++ = *src++; case 5: *dest++ = *src++; case 4: *dest++ = *src++; case 3: *dest++ = *src++; case 2: *dest++ = *src++; case 1: *dest++ = *src++; } while (--i > 0); } }}static void_ure_push(ucs2_t v, _ure_buffer_t *b){ _ure_stlist_t *s; if (b == 0) return; /* * If the `reducing' parameter is non-zero, check to see if the value * passed is already on the stack. */ if (b->reducing != 0 && b->expr[v].onstack != 0) return; s = &b->stack; if (s->slist_used == s->slist_size) { if (s->slist_size == 0) s->slist = (ucs2_t *) malloc(sizeof(ucs2_t) << 3); else s->slist = (ucs2_t *) realloc((char *) s->slist, sizeof(ucs2_t) * (s->slist_size + 8)); s->slist_size += 8; } s->slist[s->slist_used++] = v; /* * If the `reducing' parameter is non-zero, flag the element as being on * the stack. */ if (b->reducing != 0) b->expr[v].onstack = 1;}static ucs2_t_ure_peek(_ure_buffer_t *b){ if (b == 0 || b->stack.slist_used == 0) return _URE_NOOP; return b->stack.slist[b->stack.slist_used - 1];}static ucs2_t_ure_pop(_ure_buffer_t *b){ ucs2_t v; if (b == 0 || b->stack.slist_used == 0) return _URE_NOOP; v = b->stack.slist[--b->stack.slist_used]; if (b->reducing) b->expr[v].onstack = 0; return v;}/************************************************************************* * * Start symbol parse functions. * *************************************************************************//* * Parse a comma-separated list of integers that represent character * properties. Combine them into a mask that is returned in the `mask' * variable, and return the number of characters consumed. */static unsigned long_ure_prop_list(ucs2_t *pp, unsigned long limit, unsigned long *mask, _ure_buffer_t *b){ unsigned long n, m; ucs2_t *sp, *ep; sp = pp; ep = sp + limit; for (m = n = 0; b->error == _URE_OK && sp < ep; sp++) { if (*sp == ',') { /* * Encountered a comma, so select the next character property flag * and reset the number. */ m |= cclass_flags[n]; n = 0; } else if (*sp >= '0' && *sp <= '9') /* * Encountered a digit, so start or continue building the cardinal * that represents the character property flag. */ n = (n * 10) + (*sp - '0'); else /* * Encountered something that is not part of the property list. * Indicate that we are done. */ break; /* * If a property number greater than 32 occurs, then there is a * problem. Most likely a missing comma separator. */ if (n > 32) b->error = _URE_INVALID_PROPERTY; } if (n != 0) m |= cclass_flags[n]; /* * Set the mask that represents the group of character properties. */ *mask = m; /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -