⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 2utf.c

📁 一个UTF的源代码,可以提供参考
💻 C
📖 第 1 页 / 共 5 页
字号:
  }  if ((encp = encoding))  {    PUTC_IN_UTF8 ('?');    while (*encp)    {      PUTC_IN_UTF8 (*encp);      encp++;    }  }}  inline intParse_string (char *charmap_format_string, wchar_t * charmap, char *s){  unsigned int index;  unsigned long unichar;  if (s[0] == '#' || s[0] == '%')    return (0);  if (sscanf (s, charmap_format_string, &index, &unichar) == 2)  {    charmap[index] = (wchar_t) unichar;    if (Debug > 8)      fprintf (stderr, "2UTF: '%s'  -->   0x%.2X is U+%.4lX.\n" ,s, index, (long unsigned int) charmap[index]);    return (1);  }  else    return (0);}  inline voidPipe_to_UTF8 (FILE * in_stream){  register int c;  if (charset_p->type == ICONV)    while ((c = getc (in_stream)) != EOF)      Put_to_iconv_buf (c, charset_p);  else if (charset_p->type == KNOWN)    while ((c = getc (in_stream)) != EOF)      Putc_in_UTF8 (c, charset_p);  else    while ((c = getc (in_stream)) != EOF)      if (putc (c, charset_p->pipe) == EOF)	Error (IO_err);  if (charset_p->type == ICONV)    Put_to_iconv_buf (-1, charset_p);  if (ferror (in_stream))    Error (IO_err);}  inline intPutc_in_UTF8 (int c, struct charset_type * charset_p){  char s[12];  register int length;  length = wctomb (s, charset_p->charmap[c]);  if (length != -1)  {    if (fwrite (s, 1, length, charset_p->pipe) != (size_t) length)      Error (IO_err);  }  else if (putc (0x80, charset_p->pipe) == EOF)    Error (IO_err);  return (c);}  inline intPut_to_iconv_buf (int c, struct charset_type * charset_p){  /* c == -1 means flush buffer */  if (c>=0) {    *charset_p->iconv_p->inendptr++ = c;    charset_p->iconv_p->inleft++;  }  if (c<0 || charset_p->iconv_p->inendptr - charset_p->iconv_p->in_buf > ICONV_IN_BUF_SIZE - 10) {    /* buffer is full - do conversion */    size_t outleft, conv, sz;    char * inptr;    /* inleft = charset_p->iconv_p->in_buf + ICONV_IN_BUF_SIZE - charset_p->iconv_p->inptr; */    outleft = charset_p->iconv_p->out_buf + ICONV_OUT_BUF_SIZE - charset_p->iconv_p->outptr;    inptr = charset_p->iconv_p->in_buf;    while (charset_p->iconv_p->inleft > 0) {      errno = 0;      conv = iconv (charset_p->iconv_p->iconv_handle, &inptr, &charset_p->iconv_p->inleft, &charset_p->iconv_p->outptr, &outleft);      if (conv  == (size_t) -1 || errno) {	if (errno == EINVAL) {	  if (c<0 || Debug > 4)	    fprintf (stderr, "2UTF: iconv error because of incomplete multibyte character; input is '%.2s'\n", inptr);	  memmove (charset_p->iconv_p->in_buf, inptr, charset_p->iconv_p->inleft);	  break;	} else if (errno == EILSEQ) {	  if (Debug>3)	    fprintf (stderr, "2UTF: iconv EILSEQ error; input is '%.2s'\n", inptr);	  charset_p->iconv_p->inleft--; outleft--;	  *charset_p->iconv_p->outptr++ = *inptr++;	  continue;	} else {	  /* if (Debug) */	    fprintf (stderr, "2UTF: iconv unknown error errno %i; input is '%.2s'\n", errno, inptr);	  memmove (charset_p->iconv_p->outptr, inptr, charset_p->iconv_p->inleft);	  charset_p->iconv_p->inleft = 0;	  break;	}      }    }    charset_p->iconv_p->inendptr = charset_p->iconv_p->in_buf + charset_p->iconv_p->inleft;    if (sz = charset_p->iconv_p->outptr - charset_p->iconv_p->out_buf) {      charset_p->iconv_p->outptr = charset_p->iconv_p->out_buf;      if (c == -3) {	char * enc;	if (fputs(enc = Encode_MIME_word(charset_p->iconv_p->out_buf, sz), charset_p->pipe) < 0)	  Error (IO_err);	free(enc);      } else {	if (fwrite(charset_p->iconv_p->out_buf, sizeof(char), sz, charset_p->pipe) <= 0)	  Error (IO_err);      }    }  }  if (c == -2)    fflush (charset_p->pipe);  return 1;}  intRead_config_file (){  FILE *config_file;  enum  {    PATHS, CHARSETS  }  state;  size_t size;  unsigned char *config_pathnames = CONFIG_PATHNAMES;  unsigned char *config_pathname = config_pathnames;  unsigned char *cp, *cp2, *nl;  /* check for double null terminator */  while (*(config_pathname = strchr (config_pathname, '\0') + 1) != '\0');  config_pathname = config_pathnames;  do  {    if (Debug > 4)      fprintf (stderr, "2UTF: Looking for '%s' \n", config_pathname);    if ((config_file = fopen (config_pathname, "rt")) == NULL)      if (*(config_pathname = strchr (config_pathname, '\0') + 1) == '\0')	return (FALSE);  }  while (config_file == NULL);  if (Debug > 2)    fprintf (stderr, "2UTF: Opened configuration file '%s' \n", config_pathname);  if ((size = Getdelim (&line.buf, &line.length, '\0', config_file)) == (size_t) -1 || EOF == fclose (config_file))    Error (IO_err_reading_config);  cp = line.buf;  paths_number = 0;  paths = read_paths;  ext_charsets_number = 0;  state = PATHS;  do  {    if ((nl = strchr (cp, '\n')) != NULL)      *nl = '\0';    if (*cp != '#')    {      switch (state)      {	case PATHS:	  if ((cp = strtok (cp, " \t\n")))	  {	    if (Strcasecmp (cp, "[charsets]") == 0)	    {	      state = CHARSETS;	    }	    else	    {	      if (paths_number >= MAX_PATHNAMES)		Error (too_many_pathnames);	      paths[paths_number++] = xstrdup (cp);	      if (Debug > 2)		fprintf (stderr, "2UTF: Pathname for charmaps in configuration file: \n '%s' \n", paths[paths_number - 1]);	    }	  }	  break;	case CHARSETS:	  {	    if (ext_charsets_number >= MAX_EXT_CHARSETS)	      Error (too_many_ext_charsets);	    ext_charsets[ext_charsets_number].USASCII_is_subset = NO;	    if (strncmp (cp += strspn (cp, " \t"), "[US-ASCII_is_subset]", strlen ("[US-ASCII_is_subset]")) == 0)	    {	      cp += strlen ("[US-ASCII_is_subset]") + 1;	      ext_charsets[ext_charsets_number].USASCII_is_subset = IS;	    }	    *(cp2 = ext_charsets[ext_charsets_number].names = xmalloc (strlen (cp) + 3)) = ' ';	    *(cp2 = Stpcpy (cp2 + 1, cp)) = ' ';	    *(cp2 + 1) = '\0';	    if (!strtok (cp, " \t\n") || nl == NULL)	      break;	    if (Debug > 2)	    {	      fprintf (stderr, "2UTF: Charset names: '%s' \n", ext_charsets[ext_charsets_number].names);	      if (ext_charsets[ext_charsets_number].USASCII_is_subset == IS)		fprintf (stderr, "2UTF: US-ASCII is subset " "of this charset. \n");	    }	    Strtoupper (cp2 = ext_charsets[ext_charsets_number].names);	    while (*cp2++)	      if (*cp2 == '\t')		*cp2 = ' ';	    cp = nl + 1;	    if ((nl = strchr (cp, '\n')) != NULL)	      *nl = '\0';	    ext_charsets[ext_charsets_number].to_UTF = xstrdup (cp);	    if (!strtok (cp, " \t\n"))	      ext_charsets[ext_charsets_number].to_UTF = NULL;	    else if (Debug > 2)	      fprintf (stderr, "2UTF: To UTF-8: '%s' \n", ext_charsets[ext_charsets_number].to_UTF);	    if (nl != NULL)	    {	      cp = nl + 1;	      if ((nl = strchr (cp, '\n')) != NULL)		*nl = '\0';	      ext_charsets[ext_charsets_number].from_UTF = xstrdup (cp);	      if (!strtok (cp, " \t\n"))		ext_charsets[ext_charsets_number].from_UTF = NULL;	      else if (Debug > 2)		fprintf (stderr, "2UTF: From UTF-8: '%s' \n", ext_charsets[ext_charsets_number].from_UTF);	    }	    if (ext_charsets[ext_charsets_number].to_UTF == NULL &&ext_charsets[ext_charsets_number].from_UTF == NULL)	      break;	    if (Debug > 2)	      fprintf (stderr, "2UTF: Entry %i O.K. \n", ext_charsets_number);	    ext_charsets_number++;	  }	  break;      }    }    if (nl == NULL)      break;    else    {      cp = nl + 1;      continue;    }  }  while (TRUE);  if (paths_number <= 0)    paths = compiled_paths;  /*Error (no_pathnames); */  return (TRUE);}  inline intSeek_boundary (FILE * in_stream, char *boundary, int push_boundary){  int ret = FALSE;  size_t length=0;  unsigned char *lbp;  while (ret == FALSE)  {    if (in_stream)    {      if ((length = Getline (&line.buf, &line.length, in_stream)) == (size_t) -1)	if (ferror (in_stream))	  Error (IO_err);	else {	  /* FLUSH_OUTPUT(charset_p); */	  return (FALSE);	}      lbp = line.buf;      if (lbp[0] == '-' && lbp[1] == '-')      {	ret = Str_is_boundary (lbp + 2, boundary, push_boundary);	push_boundary = FALSE;      }    }    else    {      ret = Str_is_boundary (lbp = NULL, boundary, push_boundary);      push_boundary = FALSE;    }    if (ret == TRUE || ret == END_BOUNDARY)      if (!charset_p->USASCII_is_subset) {	/* FLUSH_OUTPUT(charset_p); */	Validate_charset ("us-ascii", 0);      }    if (ret < OUTER_BOUNDARY)    {      if (lbp)	while (lbp < line.buf + length)	{	  PUTC_IN_UTF8 ((unsigned char) *lbp);	  lbp++;	  EMPTY_BUFFERS(charset_p);	}      else	fprintf (charset_p->pipe, ret == END_BOUNDARY ? "--%s--\n" : "--%s\n", boundary);    }  }  /*FLUSH_OUTPUT(charset_p);*/  return (ret);}  voidShort_help (int exit_code){  fprintf (stderr, "%s%s", short_help, more_help);  exit (exit_code);}  intStrcasecmp (const char *str1, const char *str2){  while (toupper(*str1)==toupper(*str2) && *str1)  {    str1++;    str2++;  }  return ( toupper(*str1)-toupper(*str2) );}  intStrcase_has_prefix (const char *string, const char *prefix){  while (toupper(*string)==toupper(*prefix) && *prefix)  {    string++;    prefix++;  }  return (*prefix=='\0' ? 0 : toupper(*string)-toupper(*prefix) );}  char *Stpcpy (char *To, const char *From){  register char *to=To;  while ((*to=*From++))    to++;  return (to);}  char *Stpncpy (char *To, const char *From, size_t count){  register char *to=To;  char *ret;  while ((size_t)(to-To) < count)    if ((*to=*From++))      to++;    else      break;  ret = to;  while ((size_t)(to-To) < count)    *to++ = '\0';  return (ret);}  intStr_has_prefix (const char *string, const char *prefix){  while (*string==*prefix && *prefix)  {    string++;    prefix++;  }  return (*prefix=='\0' ? 0 : *string-*prefix );}  intStr_is_UTF8 (char *charmap_filename){  register int i, iplus;  /* Unicode-1-1-UTF-8 */  if (Strcasecmp (charmap_filename, "UTF-8") == 0 || \      Strcasecmp (charmap_filename, "UTF-2") == 0 || \      (Strcase_has_prefix (charmap_filename, "Unicode-") == 0 \       &&(iplus = strspn ( \	   charmap_filename + (i = strlen ("Unicode-")), "1234567")) > 0 && \       charmap_filename[i += iplus] == '-' && \       (iplus = strspn (charmap_filename + (i += 1), "01234567")) > 0 && \       (Strcasecmp (charmap_filename + (i += iplus), "-UTF-8") == 0 || \	Strcasecmp (charmap_filename + i, "-UTF-2") == 0) \      ) \     )    return (TRUE);  else    return (FALSE);}  inline intStr_is_boundary (char *linebuf, char *boundary, int push_boundary){  int boundary_len;  int ret = FALSE, index;  static struct  {    char **lines;    int last, pending, pending_ret;  }  boundaries =  {    NULL, -1, -1, 0  };  unsigned char *lbp;  if (push_boundary)  {    if (++boundaries.last % 10 == 0)      boundaries.lines = xrealloc (boundaries.lines, \	  sizeof (boundaries.lines[0]) * (boundaries.last + 10));    boundaries.lines[boundaries.last] = xstrdup (boundary);  }  if (boundaries.pending != -1)  {    if (linebuf)      Error (internal_err);    if (line.length < 90)      line.buf = xrealloc (line.buf, 90);    *(lbp = Stpcpy (line.buf, boundaries.lines[boundaries.pending])) = '\n';    lbp[1] = '\0';    if (boundaries.pending_ret == END_BOUNDARY)      strcpy (lbp, "--\n");    lbp = line.buf;  }  else if (!(lbp = linebuf))    Error (internal_err);  for (index = boundaries.last; index >= 0; index--)  {    boundary_len = strlen (boundaries.lines[index]);    if (strncmp (lbp, boundaries.lines[index], boundary_len) == 0)      if (lbp[boundary_len +strspn (lbp + boundary_len, " \t")] == '\n')      {	if (Debug >= 2)	  fprintf (stderr, " header boundary line found\n");	ret = TRUE;	break;      }      else if (lbp[boundary_len] == '-' \	  &&lbp[boundary_len + 1] == '-' \	  &&(lbp[boundary_len + 2 \	    +strspn (lbp + boundary_len + 2, " \t")] == '\n'))      {	if (Debug >= 2)	  fprintf (stderr, " -- end boundary line found\n");	ret = END_BOUNDARY;	break;      }  }  if (ret)    if (index == boundaries.last)      boundaries.pending = -1;    else    {      boundaries.pending = index;      if (Debug)	fprintf (stderr, " -- this is boundary of outer message !\n");      boundaries.pending_ret = ret;      ret = OUTER_BOUNDARY + boundaries.last - index;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -