utf8.cpp

来自「一个支持FTP,SFTP的客户端程序」· C++ 代码 · 共 61 行

CPP
61
字号
#include "stdafx.h"
#include "utf8.h"

// Check for valid UTF-8 string. Code taken from the examples in RFC 2640

int utf8_valid(const unsigned char *buf, unsigned int len)
{
 const unsigned char *endbuf = buf + len;
 unsigned char byte2mask=0x00, c;
 int trailing = 0;  // trailing (continuation) bytes to follow

 while (buf != endbuf)
 {
   c = *buf++;
   if (trailing)
    if ((c&0xC0) == 0x80)  // Does trailing byte follow UTF-8 format?
    {if (byte2mask)        // Need to check 2nd byte for proper range?
      if (c&byte2mask)     // Are appropriate bits set?
       byte2mask=0x00;
      else
       return 0;
     trailing--; }
    else
     return 0;
   else
    if ((c&0x80) == 0x00)  continue;      // valid 1 byte UTF-8
    else if ((c&0xE0) == 0xC0)            // valid 2 byte UTF-8
          if (c&0x1E)                     // Is UTF-8 byte in
                                          // proper range?
           trailing =1;
          else
           return 0;
    else if ((c&0xF0) == 0xE0)           // valid 3 byte UTF-8
          {if (!(c&0x0F))                // Is UTF-8 byte in
                                         // proper range?
            byte2mask=0x20;              // If not set mask
                                         // to check next byte
            trailing = 2;}
    else if ((c&0xF8) == 0xF0)           // valid 4 byte UTF-8
          {if (!(c&0x07))                // Is UTF-8 byte in
                                         // proper range?

            byte2mask=0x30;              // If not set mask
                                         // to check next byte
            trailing = 3;}
    else if ((c&0xFC) == 0xF8)           // valid 5 byte UTF-8
          {if (!(c&0x03))                // Is UTF-8 byte in
                                         // proper range?
            byte2mask=0x38;              // If not set mask
                                         // to check next byte
            trailing = 4;}
    else if ((c&0xFE) == 0xFC)           // valid 6 byte UTF-8
          {if (!(c&0x01))                // Is UTF-8 byte in
                                         // proper range?
            byte2mask=0x3C;              // If not set mask
                                         // to check next byte
            trailing = 5;}
    else  return 0;
 }
  return trailing == 0;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?