⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 draft-ietf-idn-amc-ace-m-00.txt

📁 bind-3.2.
💻 TXT
📖 第 1 页 / 共 5 页
字号:
    /* strlen() would return, which is why it is called output_size  */    /* rather than output_length).  The uppercase_flags array must   */    /* hold input_length boolean values, where nonzero means the     */    /* corresponding Unicode character should be forced to uppercase */    /* after being decoded, and zero means it is caseless or should  */    /* be forced to lowercase.  Alternatively, uppercase_flags may   */    /* be a null pointer, which is equivalent to all zeros.  The     */    /* letters a-z and A-Z are always encoded literally, regardless  */    /* of the corresponding flags.  The encoder always outputs       */    /* lowercase base-32 characters except when nonzero values       */    /* of uppercase_flags require otherwise, so the encoder is       */    /* compatible with any of the case models.  The return value     */    /* may be any of the amc_ace_status values defined above; if     */    /* not amc_ace_success, then output_size and output may contain  */    /* garbage.  On success, the encoder will never need to write an */    /* output_size greater than input_length*5+6, because of how the */    /* encoding is defined.                                          */int amc_ace_m_decode(  enum case_sensitivity case_sensitivity,  unsigned char *scratch_space,  const unsigned char *input,  unsigned int *output_length,  u_code_point *output,  unsigned char *uppercase_flags );    /* amc_ace_m_decode() converts AMC-ACE-M to Unicode.  The input   */    /* must be represented as null-terminated ASCII, and the output   */    /* will be represented as an array of Unicode code points.        */    /* The case_sensitivity argument influences the check on the      */    /* well-formedness of the input string; it must be case_sensitive */    /* if case-sensitive comparisons are allowed on encoded strings,  */    /* case_insensitive otherwise (see also section "Case sensitivity */    /* models" of the AMC-ACE-M specification).  The scratch_space    */    /* must point to space at least as large as the input, which will */    /* get overwritten (this allows the decoder to avoid calling      */    /* malloc()).  The output_length is an in/out argument: the       */    /* caller must pass in the maximum number of code points that     */    /* may be output, and on successful return it will contain the    */    /* actual number of code points output.  The uppercase_flags      */    /* array must have room for at least output_length values, or it  */    /* may be a null pointer if the case information is not needed.   */    /* A nonzero flag indicates that the corresponding Unicode        */    /* character should be forced to uppercase by the caller, while   */    /* zero means it is caseless or should be forced to lowercase.    */    /* The letters a-z and A-Z are output already in the proper case, */    /* but their flags will be set appropriately so that applying the */    /* flags would be harmless.  The return value may be any of the   */    /* amc_ace_status values defined above; if not amc_ace_success,   */    /* then output_length, output, and uppercase_flags may contain    */    /* garbage.  On success, the decoder will never need to write     */    /* an output_length greater than the length of the input (not     */    /* counting the null terminator), because of how the encoding is  */    /* defined.                                                       *//**********************************************************//* Implementation (would normally go in its own .c file): */#include <string.h>/* Character utilities: *//* is_ldh(codept) returns 1 if the code point represents an LDH   *//* character (ASCII letter, digit, or hyphen-minus), 0 otherwise. */static int is_ldh(u_code_point codept){  if (codept ==  45) return 1;  if (codept <   48) return 0;  if (codept <=  57) return 1;  if (codept <   65) return 0;  if (codept <=  90) return 1;  if (codept <   97) return 0;  if (codept <= 122) return 1;  return 0;}/* is_AtoZ(c) returns 1 if c is an         *//* uppercase ASCII letter, zero otherwise. */static unsigned char is_AtoZ(unsigned char c){  return c >= 65 && c <= 90;}/* special_row_offset[n] holds the offset of the       *//* bottom of special row 0xD8 + n, where n is in 0..7. */static u_code_point special_row_offset[] =  { 0x0020, 0x005B, 0x007B, 0x00A0, 0x00C0, 0x00DF, 0x0134, 0x0270 };/* base32[n] is the lowercase base-32 character representing  *//* the number n from the range 0 to 31.  Note that we cannot  *//* use string literals for ASCII characters because an ANSI C *//* compiler does not necessarily use ASCII.                   */static const unsigned char base32[] = {  97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,     /* a-k */  109, 110,                                               /* m-n */  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,  /* p-z */  50, 51, 52, 53, 54, 55, 56, 57                          /* 2-9 */};/* base32_decode(c) returns the value of a base-32 character, in the *//* range 0 to 31, or the constant base32_invalid if c is not a valid *//* base-32 character.                                                */enum { base32_invalid = 32 };static unsigned int base32_decode(unsigned char c){  if (c < 50) return base32_invalid;  if (c <= 57) return c - 26;  if (c < 97) c += 32;  if (c < 97 || c == 108 || c == 111 || c > 122) return base32_invalid;  return c - 97 - (c > 108) - (c > 111);}/* unequal(case_sensitivity,a1,a2,n) returns 0 if the arrays   *//* a1 and a2 are equal in the first n positions, 1 otherwise.  *//* If case_sensitivity is case_insensitive, then ASCII A-Z are *//* considered equal to a-z respectively.                       */static int unequal(  enum case_sensitivity case_sensitivity,  const unsigned char *a1,  const unsigned char *a2,  unsigned int n ){  const unsigned char *end;  unsigned char c1, c2;  if (case_sensitivity != case_insensitive) return memcmp(a1,a2,n);  for (end = a1 + n;  a1 < end;  ++a1, ++a2) {    c1 = *a1;    c2 = *a2;    if (c1 >= 65 && c1 <= 90) c1 += 32;    if (c2 >= 65 && c2 <= 90) c2 += 32;    if (c1 != c2) return 1;  }  return 0;}/* Encoder: */int amc_ace_m_encode(  unsigned int input_length,  const u_code_point *input,  const unsigned char *uppercase_flags,  unsigned int *output_size,  unsigned char *output ){  unsigned int literal, wide;  /* boolean */  u_code_point codept, n, diff, morebits;  u_code_point A, B, C, offsetA, offsetB, offsetC, offset;  const u_code_point *input_end, *p, *pp;  unsigned int count, max, next_in, next_out, max_out, codelen, i;  unsigned char c;  input_end = input + input_length;  /* 1) Verify that only valid code points appear: */  for (p = input;  p < input_end;  ++p) {    if (*p >> 11 == 0x1B || *p > 0x10FFFF) return amc_ace_invalid_input;  }  /* 2) Determine the most populous row: B and offsetB */  /* first check the special rows: */  B = 0xD8;  offsetB = special_row_offset[0];  max = 0;  for (n = 0;  n < 8;  ++n) {    offset = special_row_offset[n];    count = 0;    for (p = input;  p < input_end;  ++p) {      if (*p - offset <= 0xFF && !is_ldh(*p)) ++count;    }    if (count > max) {      B = 0xD8 + n;      offsetB = offset;      max = count;    }  }  /* now check the regular rows: */  for (pp = input;  pp < input_end;  ++pp) {    n = *pp >> 8;    count = 0;    for (p = input;  p < input_end;  ++p) {      if (*p >> 8 == n && !is_ldh(*p)) ++count;    }    if (count > max || (count == max && n < B)) {      B = n;      offsetB = n << 8;      max = count;    }  }  /* 3) Determine the most populous 16-window: A and offsetA */  A = 0;  max = 0;  for (n = 0;  n <= 0x1F;  ++n) {    offset = ((offsetB >> 3) + n) << 3;    count = 0;    for (p = input;  p < input_end;  ++p) {      if (*p - offset <= 0xF && !is_ldh(*p)) ++count;    }    if (count > max) {      A = n;      offsetA = offset;      max = count;    }  }  /* 4) Determine the most populous 20k-window: C */  C = 0;  max = 0;  for (pp = input;  pp < input_end;  ++pp) {    count = 0;    n = *pp >> 11;    offset = n << 11;    for (p = input;  p < input_end;  ++p) {      if (*p - offset <= 0x4FFF && !is_ldh(*p)) ++count;      if (count > max || (count == max && n < C)) {        C = n;        max = count;      }    }  }  /* 5) Determine the style to use: wide or narrow */  /* if narrow style were used: */  offsetC = (offsetB >> 12) << 12;  count = 3 + (B > 0xFF);  for (p = input;  p < input_end;  ++p) {    if (is_ldh(*p)) { }    else if (*p - offsetA <= 0xF) count += 1;    else if (*p - offsetB <= 0xFF) count += 2;    else if (*p - offsetC <= 0xFFF) count += 3;    else if (*p <= 0xFFFF) count += 4;    else count += 5;  }  max = count;  /* if wide style were used: */  offsetC = C << 11;  count =  B <= 0xFF && C <= 0x1F ?  3 :  5;  for (p = input;  p < input_end;  ++p) {    if (is_ldh(*p)) { }    else if (*p - offsetB <= 0xFF) count += 2;    else if (*p - offsetC <= 0x4FFF) count += 3;    else if (*p <= 0xFFFF) count += 4;    else count += 5;  }  wide = (count < max);  /* 6) Initialize offsetC, and encode the style and offsets: */  max_out = *output_size;  next_out = 0;  if (wide) {    offsetC = C << 11;    if (B <= 0xFF && C <= 0x1F) {      if (max_out - next_out < 3) return amc_ace_output_too_big;      output[next_out++] = base32[0x10 | (B >> 5)];      output[next_out++] = base32[B & 0x1F];      output[next_out++] = base32[C];    }    else {      if (max_out - next_out < 5) return amc_ace_output_too_big;      output[next_out++] = base32[0x18 | (B >> 10)];      output[next_out++] = base32[(B >> 5) & 0x1F];      output[next_out++] = base32[B & 0x1F];      output[next_out++] = base32[C >> 5];      output[next_out++] = base32[C & 0x1F];    }  }  else {    offsetC = (offsetB >> 12) << 12;    if (B <= 0xFF) {      if (max_out - next_out < 3) return amc_ace_output_too_big;      output[next_out++] = base32[B >> 5];      output[next_out++] = base32[B & 0x1F];    }    else {      if (max_out - next_out < 4) return amc_ace_output_too_big;      output[next_out++] = base32[8 | (B >> 10)];      output[next_out++] = base32[(B >> 5) & 0x1F];      output[next_out++] = base32[B & 0x1F];    }    output[next_out++] = base32[A];  }  /* 7) Main encoding loop: */  literal = 0;  for (next_in = 0;  next_in < input_length;  ++next_in) {    codept = input[next_in];    if (codept == 45 /* hyphen-minus */) {      /* case 7.1 */      if (max_out - next_out < 2) return amc_ace_output_too_big;      output[next_out++] = 45;      output[next_out++] = 45;      continue;    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -