📄 idna.c

📁 dns 解析源代码
💻 C
字号:
/*
 * Code for enabling lookup of names with non-ASCII letters via
 * ACE and IDNA (Internationalizing Domain Names in Applications)
 * Ref. RFC-3490.
 */

/*  \version 0.1: Mar 19, 2004 :
 *    G. Vanem - Created.
 *
 *  \version 0.2: Mar 29, 2004 :
 *    G. Vanem - Adapted for Windows (MSVC+MingW) and C++.
 */

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

#include "punycode.h"
#include "idna.h"

#define DIM(array)     ((int)(sizeof(array) / sizeof(array[0])))
#define ARGSUSED(foo)  (void)foo

#ifdef UNICODE
  #define STR_FMT  "%S"
  #define ATOI(s)  _wtoi (s)
#else
  #define STR_FMT  "%s"
  #define ATOI(s)  atoi (s)
#endif

#define IDNA_DEBUG(lvl, args) \
        do { \
          if (_idna_debug >= lvl && _idna_printf) { \
            (*_idna_printf) ("%s(%u): ", __FILE__, __LINE__); \
            (*_idna_printf) args; \
            if (_idna_printf == printf) \
               fflush (stdout); \
          } \
        } while (0)

int            _idna_winnls_errno = 0;
int            _idna_errno = 0;
int            _idna_debug = 0;
int (MS_CDECL *_idna_printf) (const char *fmt, ...) = printf;

/*
 * The following string is used to convert printable
 * Punycode characters to ASCII:
 */
static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
                                  "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
                                  " !\"#$%&'()*+,-./"
                                  "0123456789:;<=>?"
                                  "@ABCDEFGHIJKLMNO"
                                  "PQRSTUVWXYZ[\\]^_"
                                  "`abcdefghijklmno"
                                  "pqrstuvwxyz{|}~\n";

static CRITICAL_SECTION critSection;
static UINT             cur_cp = CP_ACP;

/*
 * Get ANSI/system codepage.
 */
UINT IDNA_GetCodePage (void)
{
  CPINFOEX CPinfo;
  UINT     CP = 0;

  IDNA_DEBUG (2, ("OEM codepage %u\n", GetOEMCP()));
  CP = GetACP();

  if (GetCPInfoEx(CP, 0, &CPinfo))
     IDNA_DEBUG (2, ("ACP-name " STR_FMT "\n", CPinfo.CodePageName));
  return (CP);
}

/*
 * Callback for EnumSystemCodePages()
 */
static BOOL cp_found = FALSE;
static UINT cp_requested = 0;

static BOOL CALLBACK print_cp_info (LPTSTR cp_str)
{
  CPINFOEX cp_info;
  UINT     cp = ATOI (cp_str);

  if(!IsValidCodePage(cp))
  {
    IDNA_DEBUG (1, ("INVALID CODEPAGE: %u\n", cp));
    return (TRUE);
  }
  if (cp == cp_requested)
     cp_found = TRUE;

  IDNA_DEBUG (3, ("CP: %5u, ", cp));

  if (GetCPInfoEx(cp, 0, &cp_info))
       IDNA_DEBUG (3, ("name: " STR_FMT "\n", cp_info.CodePageName));
  else IDNA_DEBUG (3, ("name: <unknown>\n"));
  return (TRUE);
}

/*
 * Check if given codepage is available
 */
BOOL IDNA_CheckCodePage (UINT cp)
{
  cp_requested = cp;
  cp_found = FALSE;
  EnumSystemCodePages (print_cp_info, CP_INSTALLED);
  return (cp_found);
}

static void IDNA_exit (void)
{
  DeleteCriticalSection (&critSection);
}

/*
 * A safer strncpy()
 */
static char *StrLcpy (char *dst, const char *src, size_t len)
{
  assert (src != NULL);
  assert (dst != NULL);
  assert (len > 0);

  if (strlen(src) < len)
     return strcpy (dst, src);

  memcpy (dst, src, len);
  dst [len-1] = '\0';
  return (dst);
}

/*
 * Get active codpage and initialise crit-section.
 */
BOOL IDNA_init (WORD cp)
{
  if (cp == 0)
  {
    cp = IDNA_GetCodePage();
  }
  else if (!IDNA_CheckCodePage(cp))
  {
    _idna_errno = IDNAERR_ILL_CODEPAGE;
    _idna_winnls_errno = GetLastError();
    IDNA_DEBUG (0, ("IDNA_init: %s\n", IDNA_strerror(_idna_errno)));
    return (FALSE);
  }

  cur_cp = cp;
  IDNA_DEBUG (2, ("IDNA_init: Using codepage %u\n", cp));

  InitializeCriticalSection (&critSection);
  atexit (IDNA_exit);
  return (TRUE);
}

const char *IDNA_strerror (int err)
{
  static char buf[200];

  switch ((enum IDNA_errors)err)
  {
    case IDNAERR_OK:
         return ("No error");
    case IDNAERR_NOT_INIT:
         return ("Not initialised");
    case IDNAERR_PUNYCODE_BASE:
         return ("No Punycode error");
    case IDNAERR_PUNYCODE_BAD_INPUT:
         return ("Bad Punycode input");
    case IDNAERR_PUNYCODE_BIG_OUTBUF:
         return ("Punycode output buf too small");
    case IDNAERR_PUNYCODE_OVERFLOW:
         return ("Punycode arithmetic overflow");
    case IDNAERR_PUNY_ENCODE:
         return ("Mysterious Punycode encode result");
    case IDNAERR_ILL_CODEPAGE:
         return ("Illegal or no Codepage defined");
    case IDNAERR_WINNLS:
         if (FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, _idna_winnls_errno,
                            LANG_NEUTRAL, buf, sizeof(buf)-1, NULL))
            return (buf);
  }
  sprintf (buf, "Unknown %d", err);
  return (buf);
}

/*
 * Convert a single ASCII codepoint from active codepage to Unicode.
 */
static BOOL conv_to_unicode (char ch, wchar_t *wc)
{
  int rc = MultiByteToWideChar (cur_cp, 0, (LPCSTR)&ch, 1, wc, 1);

  if (rc == 0)
  {
    _idna_winnls_errno = GetLastError();
    _idna_errno = IDNAERR_WINNLS;
    IDNA_DEBUG (1, ("conv_to_unicode failed; %s\n", IDNA_strerror(_idna_winnls_errno)));
    return (FALSE);
  }
  return (TRUE);
}

/*
 * Convert a single Unicode codepoint to ASCII in active codepage.
 * Allow 4 byte GB18030 Simplified Chinese to be converted.
 */
static BOOL conv_to_ascii (wchar_t wc, char *ch, int *len)
{
  int rc = WideCharToMultiByte (cur_cp, 0, &wc, 1, (LPSTR)ch, 4, NULL, NULL);

  if (rc == 0)
  {
    _idna_winnls_errno = GetLastError();
    _idna_errno = IDNAERR_WINNLS;
    IDNA_DEBUG (1, ("conv_to_ascii failed; %s\n", IDNA_strerror(_idna_winnls_errno)));
    return (FALSE);
  }
  *len = rc;
  return (TRUE);
}

/*
 * Split a domain-name into labels (no trailing dots)
 */
static char **split_labels (const char *name)
{
  static char  buf [MAX_HOST_LABELS][MAX_HOST_LEN];
  static char *res [MAX_HOST_LABELS+1];
  const  char *p = name;
  int    i;

  for (i = 0; i < MAX_HOST_LABELS && *p; i++)
  {
    const char *dot = strchr (p, '.');

    if (!dot)
    {
      res[i] = StrLcpy (buf[i], p, sizeof(buf[i]));
      i++;
      break;
    }
    res[i] = StrLcpy (buf[i], p, dot-p+1);
    p = ++dot;
  }
  res[i] = NULL;
  IDNA_DEBUG (3, ("split_labels: `%s', %d labels\n", name, i));
  return (res);
}

/*
 * Convert a single label to ACE form
 */
static char *convert_to_ACE (const char *name)
{
  static char out_buf [2*MAX_HOST_LEN];  /* A conservative guess */
  DWORD  ucs_input [MAX_HOST_LEN];
  BYTE   ucs_case [MAX_HOST_LEN];
  const  char *p;
  size_t in_len, out_len;
  int    i, c;
  punycode_status status;

  for (i = 0, p = name; *p; i++)
  {
    wchar_t ucs = 0;

    c = *p++;
    if (!conv_to_unicode (c, &ucs))
       break;
    ucs_input[i] = ucs;
    ucs_case[i]  = 0;
    IDNA_DEBUG (3, ("%c -> u+%04X\n", c, ucs));
  }
  in_len  = i;
  out_len = sizeof(out_buf);
  status  = punycode_encode (in_len, ucs_input, ucs_case, &out_len, out_buf);

  if (status != punycode_success)
  {
    _idna_errno = IDNAERR_PUNYCODE_BASE + status;
    out_len = 0;
  }

  for (i = 0; i < (int)out_len; i++)
  {
    c = out_buf[i];
    if (c < 0 || c > 127)
    {
      _idna_errno = IDNAERR_PUNY_ENCODE;
      IDNA_DEBUG (1, ("illegal Punycode result: %c (%d)\n", c, c));
      break;
    }
    if (!print_ascii[c])
    {
      _idna_errno = IDNAERR_PUNY_ENCODE;
      IDNA_DEBUG (1, ("Punycode not ASCII: %c (%d)\n", c, c));
      break;
    }
    out_buf[i] = print_ascii[c];
  }
  out_buf[i] = '\0';

  IDNA_DEBUG (2, ("punycode_encode: status %d, out_len %d, out_buf `%s'\n",
              status, out_len, out_buf));
  if (status == punycode_success && i == (int)out_len)   /* encoding and ASCII conversion okay */
     return (out_buf);
  return (NULL);
}

/*
 * Convert a single ACE encoded label to native encoding
 * u+XXXX is used to signify a lowercase character.
 * U+XXXX is used to signify a uppercase character.
 * Normally only lowercase should be expected here.
 */
static char *convert_from_ACE (const char *name)
{
  static char out_buf [MAX_HOST_LEN];
  DWORD  ucs_output [MAX_HOST_LEN];
  BYTE   ucs_case  [MAX_HOST_LEN];
  size_t ucs_len, i, j;
  punycode_status status;

  memset (&ucs_case, 0, sizeof(ucs_case));
  ucs_len = sizeof(ucs_output);
  status = punycode_decode (strlen(name), name, &ucs_len, ucs_output, ucs_case);

  if (status != punycode_success)
  {
    _idna_errno = IDNAERR_PUNYCODE_BASE + status;
    ucs_len = 0;
  }

  for (i = j = 0; i < ucs_len && j < sizeof(out_buf)-4; i++)
  {
    wchar_t ucs = ucs_output[i];
    int     len;

    if (!conv_to_ascii(ucs, out_buf+j, &len))
       break;
    IDNA_DEBUG (3, ("%c+%04X -> %.*s\n",
                ucs_case[i] ? 'U' : 'u', ucs, len, out_buf+j));
    j += len;
  }
  out_buf[j] = '\0';
  IDNA_DEBUG (2, ("punycode_decode: status %d, out_len %d, out_buf `%s'\n",
              status, ucs_len, out_buf));
  return (status == punycode_success ? out_buf : NULL);
}


/*
 * E.g. convert "www.troms
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -