📄 draft-ietf-idn-dude-02.txt
字号:
char output[] ){ unsigned int max_out, in, out, k, j; u_code_point prev, codept, diff, tmp; char shift; prev = 0x60; max_out = *output_size; for (in = out = 0; in < input_length; ++in) { /* At the start of each iteration, in and out are the number of */ /* items already input/output, or equivalently, the indices of */ /* the next items to be input/output. */ codept = input[in]; if (codept == 0x2D) { /* Hyphen-minus stands for itself. */ if (max_out - out < 1) return dude_big_output; output[out++] = 0x2D; continue; } diff = prev ^ codept; /* Compute the number of base-32 characters (k): */ for (tmp = diff >> 4, k = 1; tmp != 0; ++k, tmp >>= 4); if (max_out - out < k) return dude_big_output; shift = uppercase_flags && uppercase_flags[in] ? 32 : 0; /* shift controls the case of the last base-32 digit. */ /* Each quintet has the form 1xxxx except the last is 0xxxx. */ /* Computing the base-32 digits in reverse order is easiest. */ out += k; output[out - 1] = base32[diff & 0xF] - shift; for (j = 2; j <= k; ++j) { diff >>= 4; output[out - j] = base32[0x10 | (diff & 0xF)]; } prev = codept; } /* Append the null terminator: */ if (max_out - out < 1) return dude_big_output; output[out++] = 0; *output_size = out; return dude_success;}/* Decoder: */enum dude_status dude_decode( enum case_sensitivity case_sensitivity, char scratch_space[], const char input[], unsigned int *output_length, u_code_point output[], unsigned char uppercase_flags[] ){ u_code_point prev, q, diff; char c; unsigned int max_out, in, out, scratch_size; enum dude_status status; prev = 0x60; max_out = *output_length; for (c = input[in = 0], out = 0; c != 0; c = input[++in], ++out) { /* At the start of each iteration, in and out are the number of */ /* items already input/output, or equivalently, the indices of */ /* the next items to be input/output. */ if (max_out - out < 1) return dude_big_output; if (c == 0x2D) output[out] = c; /* hyphen-minus is literal */ else { /* Base-32 sequence. Decode quintets until 0xxxx is found: */ for (diff = 0; ; c = input[++in]) { q = base32_decode(c); if (q == base32_invalid) return dude_bad_input; diff = (diff << 4) | (q & 0xF); if (q >> 4 == 0) break; } prev = output[out] = prev ^ diff; } /* Case of last character determines uppercase flag: */ if (uppercase_flags) uppercase_flags[out] = c >= 65 && c <= 90; } /* Enforce the uniqueness of the encoding by re-encoding */ /* the output and comparing the result to the input: */ scratch_size = ++in; status = dude_encode(out, output, uppercase_flags, &scratch_size, scratch_space); if (status != dude_success || scratch_size != in || unequal(case_sensitivity, scratch_space, input) ) return dude_bad_input; *output_length = out; return dude_success;}/******************************************************************//* Wrapper for testing (would normally go in a separate .c file): */#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <string.h>/* For testing, we'll just set some compile-time limits rather than *//* use malloc(), and set a compile-time option rather than using a *//* command-line option. */enum { unicode_max_length = 256, ace_max_size = 256, test_case_sensitivity = case_insensitive /* suitable for host names */};static void usage(char **argv){ fprintf(stderr, "%s -e reads code points and writes a DUDE string.\n" "%s -d reads a DUDE string and writes code points.\n" "Input and output are plain text in the native character set.\n" "Code points are in the form u+hex separated by whitespace.\n" "A DUDE string is a newline-terminated sequence of LDH characters\n" "(without any signature).\n" "The case of the u in u+hex is the force-to-uppercase flag.\n" , argv[0], argv[0]); exit(EXIT_FAILURE);}static void fail(const char *msg){ fputs(msg,stderr); exit(EXIT_FAILURE);}static const char too_big[] = "input or output is too large, recompile with larger limits\n";static const char invalid_input[] = "invalid input\n";static const char io_error[] = "I/O error\n";/* The following string is used to convert LDH *//* characters between ASCII and the native charset: */static const char ldh_ascii[] = "................" "................" ".............-.." "0123456789......" ".ABCDEFGHIJKLMNO" "PQRSTUVWXYZ....." ".abcdefghijklmno" "pqrstuvwxyz";int main(int argc, char **argv){ enum dude_status status; int r; char *p; if (argc != 2) usage(argv); if (argv[1][0] != '-') usage(argv); if (argv[1][2] != 0) usage(argv); if (argv[1][1] == 'e') { u_code_point input[unicode_max_length]; unsigned long codept; unsigned char uppercase_flags[unicode_max_length]; char output[ace_max_size], uplus[3]; unsigned int input_length, output_size, i; /* Read the input code points: */ input_length = 0; for (;;) { r = scanf("%2s%lx", uplus, &codept); if (ferror(stdin)) fail(io_error); if (r == EOF || r == 0) break; if (r != 2 || uplus[1] != '+' || codept > (u_code_point)-1) { fail(invalid_input); } if (input_length == unicode_max_length) fail(too_big); if (uplus[0] == 'u') uppercase_flags[input_length] = 0; else if (uplus[0] == 'U') uppercase_flags[input_length] = 1; else fail(invalid_input); input[input_length++] = codept; } /* Encode: */ output_size = ace_max_size; status = dude_encode(input_length, input, uppercase_flags, &output_size, output); if (status == dude_bad_input) fail(invalid_input); if (status == dude_big_output) fail(too_big); assert(status == dude_success); /* Convert to native charset and output: */ for (p = output; *p != 0; ++p) { i = *p; assert(i <= 122 && ldh_ascii[i] != '.'); *p = ldh_ascii[i]; } r = puts(output); if (r == EOF) fail(io_error); return EXIT_SUCCESS; } if (argv[1][1] == 'd') { char input[ace_max_size], scratch[ace_max_size], *pp; u_code_point output[unicode_max_length]; unsigned char uppercase_flags[unicode_max_length]; unsigned int input_length, output_length, i; /* Read the DUDE input string and convert to ASCII: */ fgets(input, ace_max_size, stdin); if (ferror(stdin)) fail(io_error); if (feof(stdin)) fail(invalid_input); input_length = strlen(input); if (input[input_length - 1] != '\n') fail(too_big); input[--input_length] = 0; for (p = input; *p != 0; ++p) { pp = strchr(ldh_ascii, *p); if (pp == 0) fail(invalid_input); *p = pp - ldh_ascii; } /* Decode: */ output_length = unicode_max_length; status = dude_decode(test_case_sensitivity, scratch, input, &output_length, output, uppercase_flags); if (status == dude_bad_input) fail(invalid_input); if (status == dude_big_output) fail(too_big); assert(status == dude_success); /* Output the result: */ for (i = 0; i < output_length; ++i) { r = printf("%s+%04lX\n", uppercase_flags[i] ? "U" : "u", (unsigned long) output[i] ); if (r < 0) fail(io_error); } return EXIT_SUCCESS; } usage(argv); return EXIT_SUCCESS; /* not reached, but quiets compiler warning */} INTERNET-DRAFT expires 2001-Dec-07
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -