📄 swistring.c

📁 OSB-PIK-OpenVXI-3.0.0源代码 “中国XML论坛 - 专业的XML技术讨论区--XML在语音技术中的应用”
💻 C
字号:
/* SWIstring, locale independant conversions */ /****************License************************************************  *  * Copyright 2000-2003.  ScanSoft, Inc.      *  * Use of this software is subject to notices and obligations set forth   * in the SpeechWorks Public License - Software Version 1.2 which is   * included with this software.   *  * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech,   * SpeechWorks and the SpeechWorks logo are registered trademarks or   * trademarks of SpeechWorks International, Inc. in the United States   * and other countries.  *  ***********************************************************************/  /* -----1=0-------2=0-------3=0-------4=0-------5=0-------6=0-------7=0-------8  */  #include <stdio.h> #include <stdlib.h> #include <string.h> #include <limits.h> #include <SWIstring.h> #include "misc_string.h"  #define INVALID_IUC_16BIT(C) \    (C >= 0xD800 && C <= 0xDBFF) || \    (C == 0xFFFE || C == 0xFFFF)  #define INVALID_IUC_32BIT(C) \    (C & 0xFFFF0000) || INVALID_IUC_16BIT(C)  /*  SWIisvalid_unicode tests that the given wchar string  *    - does not contain high surrogates (D800 to DBFF)  *    - does not contain non-characters (FFFE and FFFF)  *    - the top 16-bit of 32-bit wchar are 0  */ int SWIisvalid_unicode(const wchar_t *wstr) {   int i, len = wcslen(wstr);  #if defined(_linux_) && defined(__GNUC__)     for (i = 0; i < len; i++, wstr++) {       if ( INVALID_IUC_32BIT(*wstr) )         return 0;     } #elif defined (_win32_)     for (i = 0; i < len; i++, wstr++) {       if ( INVALID_IUC_16BIT(*wstr) )         return 0;     } #else   if (sizeof(wchar_t) == 4) {     for (i = 0; i < len; i++, wstr++) {       if ( INVALID_IUC_32BIT(*wstr) )         return 0;     }   }   else {     for (i = 0; i < len; i++, wstr++) {       if ( INVALID_IUC_16BIT(*wstr) )         return 0;     }   } #endif   return 1; }  /*  SWIisascii() function tests that the given wchar string  *  contains only ASCII characters, which are any character  *  with a value less than than or equal to 0x7F.  */ int SWIisascii(const wchar_t *wstr) {   int i, len = wcslen(wstr);    for (i = 0; i < len; i++) {     if (*wstr++ > 0x7F)       return 0;   }   return 1; }  /*  SWIislatin1() function tests that the given wchar string  *  contains only LATIN-1 characters, which are any character  *  with a value less than than or equal to 0xFF.  */ int SWIislatin1(const wchar_t *wstr) {   int i, len = wcslen(wstr);    for (i = 0; i < len; i++) {     if (*wstr++ > 0xFF)       return 0;   }   return 1; }    /* FIXME?  lots of error checking here - strlen, ascii;    *  should we be less risk conscious but more efficient?    */ double SWIwcstod( const wchar_t *wstr) {   int i;   unsigned char tmpbuf[SWIchar_MAXSTRLEN];   unsigned char *str;   double dval;    int len = wcslen(wstr);   str = BUFMALLOC2(tmpbuf, SWIchar_MAXSTRLEN, len);   if (!str)     return 0.0;    for (i = 0; i < len; i++) {     if (*wstr > 0x7F)  /* is it ascii at least? */       return 0.0;     str[i] = (unsigned char) *wstr++;   }   str[i] = '\0';      dval =  SWIatof(str);   BUFFREE(tmpbuf, str);   return dval; }     /* FIXME?  lots of error checking here - strlen, ascii;    *  should we be less risk conscious but more efficient?    */ float SWIwcstof( const wchar_t *wstr) {   int i;   unsigned char tmpbuf[SWIchar_MAXSTRLEN];   unsigned char *str;   float fval;    int len = wcslen(wstr);   str = BUFMALLOC2(tmpbuf, SWIchar_MAXSTRLEN, len);   if (!str)     return 0.0;    for (i = 0; i < len; i++) {     if (*wstr > 0x7F)  /* is it ascii at least? */       return 0.0;     str[i] = (unsigned char) *wstr++;   }   str[i] = '\0';      fval =  SWIatofloat(str);   BUFFREE(tmpbuf, str);    return fval; }    /* FIXME?  lots of error checking here - strlen, ascii;    *  should we be less risk conscious but more efficient?    */ SWIcharResult SWIwtof( const wchar_t *wstr, float *fval) {   int i;   unsigned char tmpbuf[SWIchar_MAXSTRLEN];   unsigned char *str;    int len = wcslen(wstr);   str = BUFMALLOC2(tmpbuf, SWIchar_MAXSTRLEN, len);   if (!str)     return SWIchar_BUFFER_OVERFLOW;    for (i = 0; i < len; i++) {     if (*wstr > 0x7F)  /* is it ascii at least? */       return SWIchar_INVALID_INPUT;     str[i] = (unsigned char) *wstr++;   }   str[i] = '\0';      *fval =  SWIatofloat(str);   BUFFREE(tmpbuf, str);    return SWIchar_SUCCESS; }   /* Converts char-based string to wchar-based string  * - len is size of wstr  * - returns SWIchar_BUFFER_OVERFLOW if len(str)>=len  */ SWIcharResult SWIstrtowcs(const char *str, wchar_t *wstr, int len) {   const wchar_t *funcName = L"SWIstrtowcs";   int l;   int i;    l = strlen(str);    if (l >= len) {     return SWIchar_BUFFER_OVERFLOW;   }    for (i = 0; i < l + 1; i++) {     wstr[i] = (unsigned char)str[i];  // the high bit is not a sign bit //   }    return SWIchar_SUCCESS; }   /* Converts wchar-based string to char-based string.  * - when there is conversion loss, returns SWIchar_CONVERSION_LOSS  *   and each loss-affected char is \277 (upside-down question mark)  * - len is size of str  * - returns SWIchar_BUFFER_OVERFLOW if len(wstr)>=len */ SWIcharResult SWIwcstostr(const wchar_t *wstr, char *str, int len) {   const wchar_t *funcName = L"wstrtostr";   int l;   int i;   SWIcharResult rc = SWIchar_SUCCESS;    l = wcslen(wstr);    if (l > len) {     return SWIchar_BUFFER_OVERFLOW;   }    for (i = 0; i < l + 1; i++) {     str[i] = (unsigned char) wstr[i];     if (str[i] != wstr[i]) {       rc = SWIchar_CONVERSION_LOSS;       str[i] = '\277';     }   }   return rc; }   SWIcharResult SWIitowcs(int i, wchar_t *wstr, int len) {   const wchar_t *funcName = L"SWIitowcs";   unsigned char buf[SWIchar_MAXSTRLEN];   unsigned char *pend = buf+SWIchar_MAXSTRLEN;   unsigned char *pbuf = pend;   int sign = 1;    if ( i < 0 ) {     if ( i == INT_MIN ) {       return SWIchar_INVALID_INPUT;     }      i = -i;     sign = -1;   }    while(i) {     int ch = i%10;     i/=10;      *--pbuf = ch+'0';   }    if (pbuf == pend)     *--pbuf = '0';    if ( sign == -1 ) {     *--pbuf = '-';   }    if ( pend-pbuf > len ) {     return SWIchar_BUFFER_OVERFLOW;   }    while (pbuf < pend) {     *wstr++ = (wchar_t) *pbuf++;   }   *wstr = '\0';   return SWIchar_SUCCESS; }     /* The function stops reading the input string at the first character that it     * cannot recognize as part of a number. This character may be the null character     * (L'\0') terminating the string.    * Returns SWIchar_INVALID_INPUT if string is empty, or    *  does not start with [sign]digit    */ SWIcharResult SWIwcstoi(const wchar_t *wstr, int *pi) {   int value = 0;   int sign = 1;   const wchar_t *start;    while ( SWIchar_isspace(*wstr) && *wstr != '\0' ) wstr++;    if ( *wstr == '-' ) {     sign = -1;     wstr++;   }   else if ( *wstr == '+' ) {     wstr++;   }   start = wstr;    while( *wstr ) {     if ( *wstr < '0' || *wstr > '9' ) {       break;     }     value = 10*value + *wstr++ - '0';   }    if (start == wstr)  /*  no digits in the string */     return SWIchar_INVALID_INPUT;    if ( sign == -1 ) value = -value;    *pi = value;   return SWIchar_SUCCESS; }   int SWIwtoi(const wchar_t *wstr) {   int val;   SWIcharResult rc = SWIwcstoi( wstr, &val );   if (rc != SWIchar_SUCCESS)     return 0;    return val; }   int SWIwcsstrcmp(const wchar_t *w, const char *str) {   while (*w && *str && *w == *str) {w++;str++;}   return *w - *str; }  #ifndef _win32_  wchar_t *SWIwcstok(wchar_t *wcs, const wchar_t *delim, 			      wchar_t **ptr) { #ifndef __GNUC__   return wcstok_r(wcs,delim,ptr); #else   return wcstok(wcs,delim,ptr); #endif }  char *SWIstrtok(char *str, const char *delim, char **ptr) { #ifndef __GNUC__   return strtok_r(str,delim,ptr); #else   return strtok(str,delim); #endif }   #elif (_win32_)  wchar_t *SWIwcstok(wchar_t *wcs, const wchar_t *delim, 			      wchar_t **ptr) {   *ptr = NULL;   return wcstok(wcs, delim); }  char *SWIstrtok(char *str, const char *delim, 			      char **ptr) {   *ptr = NULL;   return strtok(str, delim); }  #endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -