📄 utf.c

📁 ATMEL单片机可用的文件系统源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
      return (ui32)-1;
    }
    utf8_len -= code_len;

    /*-----------------------------------------------------------------*/
    /* Convert based on code length encoding.                          */
    /*-----------------------------------------------------------------*/
    switch (code_len)
    {
      /*---------------------------------------------------------------*/
      /* 1-byte encoding.                                              */
      /*---------------------------------------------------------------*/
      case 1:
      {
        /*-------------------------------------------------------------*/
        /* This converts directly into a UTF16 code.                   */
        /*-------------------------------------------------------------*/
        *utf16 = byte1;
        break;
      }

      /*---------------------------------------------------------------*/
      /* 2-byte encoding.                                              */
      /*---------------------------------------------------------------*/
      case 2:
      {
        /*-------------------------------------------------------------*/
        /* Get the next byte and advance string pointer.               */
        /*-------------------------------------------------------------*/
        byte2 = *utf8++;

        /*-------------------------------------------------------------*/
        /* Combine the 2 bytes to get a UTF16 code.                    */
        /*-------------------------------------------------------------*/
        *utf16 = ((byte1 & 0x1F) << 6) | (byte2 & 0x3F);
        break;
      }

      /*---------------------------------------------------------------*/
      /* 3-byte encoding.                                              */
      /*---------------------------------------------------------------*/
      case 3:
      {
        /*-------------------------------------------------------------*/
        /* Get the next 2 bytes and advance string pointer.            */
        /*-------------------------------------------------------------*/
        byte2 = *utf8++;
        byte3 = *utf8++;

        /*-------------------------------------------------------------*/
        /* Combine the 3 bytes to get a UTF16 code.                    */
        /*-------------------------------------------------------------*/
        *utf16 = ((byte1 & 0xF) << 12) | ((byte2 & 0x3F) << 6) |
                 (byte3 & 0x3F);
        break;
      }

      /*---------------------------------------------------------------*/
      /* 4-byte encoding.                                              */
      /*---------------------------------------------------------------*/
      case 4:
      {
        /*-------------------------------------------------------------*/
        /* Get the next 3 bytes and advance string pointer.            */
        /*-------------------------------------------------------------*/
        byte2 = *utf8++;
        byte3 = *utf8++;
        byte4 = *utf8++;

        /*-------------------------------------------------------------*/
        /* This encoding translates into 2 UTF16 codes.                */
        /*-------------------------------------------------------------*/
        u = ((byte1 & 0x7) << 2) | ((byte2 & 0x30) >> 4);
        u -= 1;
        x1 = ((byte2 & 0xF) << 2) | ((byte3 & 0x30) >> 4);
        x2 = ((byte3 & 0xF) << 6) | (byte4 & 0x3F);

        /*-------------------------------------------------------------*/
        /* Set the first (surrogate) code of the UTF16 code.           */
        /*-------------------------------------------------------------*/
        *utf16 = 0xD800 | (u << 6) | x1;

        /*-------------------------------------------------------------*/
        /* Check there is room for the additional code.                */
        /*-------------------------------------------------------------*/
        if (++utf16_i >= max_utf16_len)
        {
          set_errno(ENOSPC);
          return (ui32)-1;
        }

        /*-------------------------------------------------------------*/
        /* Set the next code.                                          */
        /*-------------------------------------------------------------*/
        *(++utf16) = 0xDC00 | x2;
        break;
      }

      /*---------------------------------------------------------------*/
      /* Shouldn't be here.                                            */
      /*---------------------------------------------------------------*/
      default:
        PfAssert(FALSE); /*lint !e506, !e774*/
        set_errno(EINVAL);
        return (ui32)-1;
    }
  }

  return utf16_i;
}

/***********************************************************************/
/*  UTF16_UTF8: Convert a UTF16 encoded string to a UTF8 encoded one.  */
/*              The UTF16 string must be properly encoded.             */
/*                                                                     */
/*      Inputs: utf16 = original UTF16 encoded string                  */
/*              utf16_len = length of UTF16 encoded string             */
/*              utf8 = place to store the conversion                   */
/*              max_utf8_len = length (in bytes) of UTF8 string        */
/*                                                                     */
/*     Returns: Length of UTF8 string, -1 on error                     */
/*                                                                     */
/***********************************************************************/
ui32 UTF16_UTF8(const ui16 *utf16, ui32 utf16_len, ui8 *utf8,
                ui32 max_utf8_len)
{
  ui32 utf8_i, u;
  ui16 word1, word2;

  /*-------------------------------------------------------------------*/
  /* Check the arguments are valid.                                    */
  /*-------------------------------------------------------------------*/
  if (utf16 == NULL || utf8 == NULL || max_utf8_len == 0)
  {
    set_errno(EINVAL);
    return (ui32)-1;
  }

  /*-------------------------------------------------------------------*/
  /* An empty UTF16 string converts to an empty UTF8 string.           */
  /*-------------------------------------------------------------------*/
  if (utf16_len == 0)
    return 0;

  /*-------------------------------------------------------------------*/
  /* Go through UTF16 string code by code, converting each one.        */
  /*-------------------------------------------------------------------*/
  for (utf8_i = 0, word1 = *utf16++; utf16_len; word1 = *utf16++,
       --utf16_len)
  {
    /*-----------------------------------------------------------------*/
    /* If surrogate (0xD800 to 0xDFFF), UTF16 code encoded in two      */
    /* words and represents values >= 0x10000 which translate into     */
    /* 4-byte encoded UTF8 codes.                                      */
    /*-----------------------------------------------------------------*/
    if (word1 >= 0xD800 && word1 <= 0xDFFF)
    {
      /*---------------------------------------------------------------*/
      /* Check there is enough room left in UTF8 string.               */
      /*---------------------------------------------------------------*/
      if (utf8_i + 4 >= max_utf8_len)
      {
        set_errno(ENOSPC);
        return (ui32)-1;
      }
      utf8_i += 4;

      /*---------------------------------------------------------------*/
      /* Read the second UTF16 word.                                   */
      /*---------------------------------------------------------------*/
      if (utf16_len == 0)
      {
        set_errno(EINVAL);
        return (ui32)-1;
      }
      word2 = *utf16++;
      --utf16_len;

      /*---------------------------------------------------------------*/
      /* Combine the two UTF16 words to get 4 UTF8 bytes.              */
      /*---------------------------------------------------------------*/
      u = (word1 & 0x3C0) >> 6;
      ++u;
      *utf8++ = 0xF0 | ((u & 0x1C) >> 2);
      *utf8++ = 0x80 | ((u & 0x3) << 4) | ((word1 & 0x3C) >> 2);
      *utf8++ = 0x80 | ((word1 & 0x3) << 4) | ((word2 & 0x3C0) >> 6);
      *utf8++ = 0x80 | (word2 & 0x3F);
    }

    /*-----------------------------------------------------------------*/
    /* If UTF16 code less than 0x80 it's a 1-byte UTF8 encoded code.   */
    /*-----------------------------------------------------------------*/
    else if (word1 <= 0x7F)
    {
      /*---------------------------------------------------------------*/
      /* Check there is enough room left in UTF8 string.               */
      /*---------------------------------------------------------------*/
      if (++utf8_i >= max_utf8_len)
      {
        set_errno(ENOSPC);
        return (ui32)-1;
      }

      /*---------------------------------------------------------------*/
      /* Direct conversion into UTF8 code.                             */
      /*---------------------------------------------------------------*/
      *utf8++ = (ui8)word1;
    }

    /*-----------------------------------------------------------------*/
    /* If UTF16 code less than 0x800 it's a 2-byte UTF8 encoded code.  */
    /*-----------------------------------------------------------------*/
    else if (word1 <= 0x7FF)
    {
      /*---------------------------------------------------------------*/
      /* Check there is enough room left in UTF8 string.               */
      /*---------------------------------------------------------------*/
      if (utf8_i + 2 >= max_utf8_len)
      {
        set_errno(ENOSPC);
        return (ui32)-1;
      }
      utf8_i += 2;

      /*---------------------------------------------------------------*/
      /* Form the 2-byte UTF8 encoding.                                */
      /*---------------------------------------------------------------*/
      *utf8++ = 0xC0 | ((word1 & 0x7C0) >> 6);
      *utf8++ = 0x80 | (word1 & 0x3F);
    }

    /*-----------------------------------------------------------------*/
    /* Else it's a 3-byte UTF8 encoded code.                           */
    /*-----------------------------------------------------------------*/
    else
    {
      /*---------------------------------------------------------------*/
      /* Check there is enough room left in UTF8 string.               */
      /*---------------------------------------------------------------*/
      if (utf8_i + 3 >= max_utf8_len)
      {
        set_errno(ENOSPC);
        return (ui32)-1;
      }
      utf8_i += 3;

      /*---------------------------------------------------------------*/
      /* Form the 3-byte UTF8 encoding.                                */
      /*---------------------------------------------------------------*/
      *utf8++ = 0xE0 | ((word1 & 0xF000) >> 12);
      *utf8++ = 0x80 | ((word1 & 0xFC0) >> 6);
      *utf8++ = 0x80 | (word1 & 0x3F);
    }
  }
  return utf8_i;
}

#endif /* UTF_ENABLED */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -