uniconv.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 374 行

CPP
374
字号
/* * Copyright 2002-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: uniconv.cpp,v 1.3 2004/09/08 13:56:46 peiyongz Exp $ */#include <stdlib.h>#include <stdio.h>#include <string.h>#include <errno.h>#include <ctype.h>#include <cunhc.h>#include "ccsid.h"#include "uniconv.h"XERCES_CPP_NAMESPACE_BEGIN#define WORK_BUFFER_SIZE 16*1024#define DDA_NEEDED CUNBCPRM_DDA_REQ#define RETRY_THRESHOLD 10000// This is utility routine which strips '-', '_' and spaces from the name and// also upper cases the name. It also returns the length of the string.static int stripNameCopy(const char *s,char *d,int max){   int si=0;   int di=0;   while ( (s[si] != '\0') && (di < max) ) {      if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') )         si++;      else {         d[di] = toupper(s[si]);         si++;di++;      }   }   d[di] = 0;   if (s[si] != '\0')      return -1;   return si;}// This takes a name and does a lookup into the ccsid table (from ccsid.h)// to find the corresponding ccsid. It also checks if the string ends in s390// and returns that information to the caller.// The lookup into the table is done via a binary search since we know that the// table was nicely sorted for us.static int getccsid(const char *s,int * is390){   char tmpstr[_AE_MAX_CODESET_NAME_LENGTH];   int start;   int limit;   int index;   int result;   int thelen;   // Clean up the name....   if (s == NULL)      return -1;   if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1)      return -1;   // Check for the S390 string in the name   *is390 = 0;   if ( (strstr((char *)tmpstr, "S390")) != NULL )      *is390 = 1;   // Now lookup the name via a binary search   start = 0;   limit = _AE_NUM_OF_CODESETS;   index = limit/2;   while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) &&            (start < limit-1) ) {      if (result < 0)         limit = index;      else          start = index;      index = (start+limit)/2;   }   if (result != 0 && start >= limit-1)      return -1;   return CCSID_MAPPING[index].CCSID;}// **********************************************************************// These are the character conversion services// **********************************************************************// "Open" the conversion. Allocate memory to hold the handle which// unicode services requires. Call unicode services with a 0 length// so that it can initialize it's handle.// Note that unicode services must always be called in a loop since// it could be busy reloading its tables.uniconv_t uniconv_open(const char *destenc, const char *srcenc) {   CUNBCPRM  defparms = {CUNBCPRM_DEFAULT};   CUNBCPRM * tmpp;   void * handle_area;   char *cptr;   int srcis390;   int destis390;   errno = 0;   handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE+8);   tmpp = (CUNBCPRM *) handle_area;   if (tmpp==NULL)      return (uniconv_t)-1;   // initialize the parm area with defaults, then start filling it   // in with our values.   memcpy(tmpp,&defparms,sizeof(defparms));   tmpp->Src_Buf_Len= 0;   // get the ccsids.   if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) ||        ((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) {      errno=ENOENT;      free(handle_area);      return (uniconv_t)-1;   }   tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED +8);   tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE;   // Doubleword align the DDA area   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM) +7);   tmpp->DDA_Buf_Ptr = (void*) ((unsigned int) tmpp->DDA_Buf_Ptr & ~7);   tmpp->DDA_Buf_Len=DDA_NEEDED;   // This flag tells the services to automatically refresh the handle if it   // becomes invalid.   tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START;   tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE;   /* Determine which technique to use */   if ( (srcis390) || (destis390) )      // This technique causes it to swap LF and NL.      memcpy(tmpp->Technique,"L       ",8);   else      memcpy(tmpp->Technique,"        ",8);   // Retry if the services are busy reloading their tables.   int retry_count = 0;   while (retry_count < RETRY_THRESHOLD) {      CUNLCNV(tmpp);      if (tmpp->Return_Code == CUN_RC_OK)         break;      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )         // Let it loop around again         retry_count++;      else         break;   }   if (tmpp->Return_Code != CUN_RC_OK) {      free(handle_area);      errno=EINVAL;      handle_area = (uniconv_t)-1;   }   return handle_area;}// All that is required for close is to free the handle buffer.int uniconv_close(uniconv_t handle_area) {   errno = 0;   if (((int)handle_area) <= 0) {      errno=EBADF;      return -1;   }   free(handle_area);   return 0;}// This does the real conversion.// Note that unicode services must always be called in a loop since// it could be busy reloading its tables.int uniconv(uniconv_t cd, char **inbuf,  size_t *inbytesleft,                          char **outbuf, size_t *outbytesleft) {   CUNBCPRM * tmpp;   size_t startinlen = *inbytesleft;   size_t startoutlen = *outbytesleft;   errno = 0;   if (((int)cd) <= 0) {      errno=EBADF;      return -1;   }   // Fill in the parameter area with current values   tmpp = (CUNBCPRM *) cd;   tmpp->Src_Buf_Ptr = *inbuf;   tmpp->Src_Buf_Len = *inbytesleft;   tmpp->Targ_Buf_Ptr = *outbuf;   tmpp->Targ_Buf_Len = *outbytesleft;   // Retry if the services are busy reloading their tables.   int retry_count = 0;   while (retry_count < RETRY_THRESHOLD) {      CUNLCNV(tmpp);      if (tmpp->Return_Code == CUN_RC_OK)         break;      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )         // Let it loop around again         retry_count++;      else         break;   }   *inbuf        = (char *)tmpp->Src_Buf_Ptr;   *inbytesleft  = tmpp->Src_Buf_Len;   *outbuf       = (char *)tmpp->Targ_Buf_Ptr;   *outbytesleft = tmpp->Targ_Buf_Len;   if (tmpp->Return_Code != CUN_RC_OK) {      if (tmpp->Reason_Code == CUN_RS_TRG_EXH)         errno=E2BIG;      else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE)         errno=EINVAL;      else {         errno=EBADF;         return -1;      }   }   return (startinlen-*inbytesleft);}// **********************************************************************// These are the case conversion services.// **********************************************************************// This "opens" the case conversion. It allocates the parameter area// then does a dummy call to unicode services so that it can set up// the handle.// Note that unicode services must always be called in a loop since// it could be busy reloading its tables.static inline uniconv_t uniconv_case_open(unsigned char direction) {CUNBAPRM  defparms = {CUNBAPRM_DEFAULT};CUNBAPRM * tmpp;void * handle_area;   errno = 0;   handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ);   tmpp = (CUNBAPRM *) handle_area;   if (tmpp==NULL)      return (uniconv_t)-1;   // initialize the parm area with defaults, then start filling it   // in with our values.   memcpy(tmpp,&defparms,sizeof(defparms));   tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM));   tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ;   // This flag tells the services to automatically refresh the handle if it   // becomes invalid.   tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START;   unichar_t inchar = 0x61;   unichar_t outchar;   tmpp->Src_Buf_Ptr=&inchar;   tmpp->Targ_Buf_Ptr=&outchar;   tmpp->Targ_Buf_Len=sizeof(unichar_t);   tmpp->Src_Buf_Len=sizeof(unichar_t);   tmpp->Conv_Type=direction;   // Retry if the services are busy reloading their tables.   int retry_count = 0;   while (true) {      CUNLASE ( tmpp );      if (tmpp->Return_Code == CUN_RC_OK) {         break;      } else if ( (tmpp->Return_Code == CUN_RC_WARN) &&                  ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||                    (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {         // Let it loop around again         retry_count++;         if (retry_count > RETRY_THRESHOLD) {            errno = ENOSYS;            break;         }      } else {         errno = ENOSYS;         break;      }   }   if (tmpp->Return_Code != CUN_RC_OK) {      free(handle_area);      errno=EINVAL;      handle_area = (uniconv_t)-1;   }   return handle_area;}// These are the actual external interfaces for the open functionuniconv_t uniconv_toupper_open() {   return uniconv_case_open(CUNBAPRM_TO_UPPER);}uniconv_t uniconv_tolower_open() {   return uniconv_case_open(CUNBAPRM_TO_LOWER);}// This closes the case conversion. All it does is free the handle buffer.int _uniconv_case_close(uniconv_t handle_area) {   errno = 0;   if (((int)handle_area) <= 0) {      errno=EBADF;      return -1;   }   free(handle_area);   return 0;}// This does the actual case conversion. The direction is already// stored in the handle buffer.// Note that unicode services must always be called in a loop since// it could be busy reloading its tables.unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) {   unichar_t outchar;   CUNBAPRM * tmpp;   errno = 0;   if (((int)cd) <= 0) {      errno=EBADF;      return -1;   }   tmpp = (CUNBAPRM *) cd;   tmpp->Src_Buf_Ptr=&inchar;   tmpp->Targ_Buf_Ptr=&outchar;   tmpp->Targ_Buf_Len=sizeof(unichar_t);   tmpp->Src_Buf_Len=sizeof(unichar_t);   // Retry if the services are busy reloading their tables.   int retry_count = 0;   while (true) {      CUNLASE ( tmpp );      if (tmpp->Return_Code == CUN_RC_OK) {         break;      }      else if ( (tmpp->Return_Code == CUN_RC_WARN) &&                ( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||                  (tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {         // Let it loop around again         retry_count++;         if (retry_count > RETRY_THRESHOLD) {            errno = ENOSYS;            break;         }      } else {         errno = ENOSYS;         break;      }   }   return outchar;}XERCES_CPP_NAMESPACE_END

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?