📄 draft-ietf-idn-amc-ace-m-00.txt
字号:
if (is_ldh(codept)) { /* case 7.2 */ if (!literal) { if (max_out - next_out < 1) return amc_ace_output_too_big; output[next_out++] = 45; literal = 1; } if (max_out - next_out < 1) return amc_ace_output_too_big; output[next_out++] = codept; continue; } /* case 7.3 */ if (literal) { if (max_out - next_out < 1) return amc_ace_output_too_big; output[next_out++] = 45; literal = 0; } if (!wide) { diff = codept - offsetA; if (diff <= 0xF) { /* case 7.3.1 */ codelen = 1; goto encoder_base32_bottom; } } diff = codept - offsetB; if (diff <= 0xFF) { /* case 7.3.2 */ codelen = 2; goto encoder_base32_bottom; } diff = codept - offsetC; if (diff <= 0xFFF) { /* case 7.3.3 */ codelen = 3; goto encoder_base32_bottom; } if (wide) { diff = codept - offsetC - 0x1000; if (diff <= 0x3FFF) { /* case 7.3.4 */ codelen = 1; morebits = diff & 0x3FF; diff >>= 10; goto encoder_base32_bottom; } } if (codept <= 0xFFFF) { /* case 7.3.5 */ diff = codept; codelen = 4; goto encoder_base32_bottom; } /* case 7.3.6 */ diff = codept - 0x10000; codelen = 5; encoder_base32_bottom: /* output diff as n base-32 digits: */ if (max_out - next_out < codelen) return amc_ace_output_too_big; i = codelen - 1; c = base32[diff & 0xF]; if (uppercase_flags && uppercase_flags[next_in]) c -= 32; output[next_out + i] = c; while (i > 0) { diff >>= 4; output[next_out + --i] = base32[0x10 | (diff & 0xF)]; } next_out += codelen; if (wide && codelen == 1) { /* case 7.3.4 */ if (max_out - next_out < 2) return amc_ace_output_too_big; output[next_out++] = base32[morebits >> 5]; output[next_out++] = base32[morebits & 0x1F]; } } /* null terminator: */ if (max_out - next_out < 1) return amc_ace_output_too_big; output[next_out++] = 0; *output_size = next_out; return amc_ace_success;}/* Decoder: */int amc_ace_m_decode( enum case_sensitivity case_sensitivity, unsigned char *scratch_space, const unsigned char *input, unsigned int *output_length, u_code_point *output, unsigned char *uppercase_flags ){ unsigned int literal, wide, large; /* boolean */ const unsigned char *next_in; unsigned char c; unsigned int next_out, max_out, codelen, input_size, scratch_size; u_code_point q, B, offsets[6], diff, offset; enum amc_ace_status status; /* 1) Decode the style and offsets: */ next_in = input; q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; wide = q >> 4; large = (q >> 3) & 1; B = q & 7; q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; B = (B << 5) | q; if (large) { q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; B = (B << 5) | q; } /* offsets[codelen] is for base-32 codes with codelen characters */ /* (not counting the extra two in wide-style 0xxxx xxxxx xxxxx) */ offsets[2] = B >> 3 == 0x1B ? special_row_offset[B & 7] : B << 8; q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; if (!wide) { offsets[1] = ((offsets[2] >> 3) + q) << 3; offsets[3] = (offsets[2] >> 12) << 12; } else { offset = q << 11; if (large) { q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; offset = (offset << 5) | q; } offsets[3] = offset; offsets[1] = offset + 0x1000; } offsets[4] = 0; offsets[5] = 0x10000; /* 2) Main decoding loop: */ max_out = *output_length; next_out = 0; literal = 0; for (;;) { c = *next_in++; if (!c) break; if (c == 45 /* hyphen-minus */) { if (*next_in == 45) { /* case 2.1: "--" decodes to "-" */ ++next_in; if (max_out - next_out < 1) return amc_ace_output_too_big; if (uppercase_flags) uppercase_flags[next_out] = 0; output[next_out++] = 45; continue; } /* case 2.2: unpaired hyphen-minus toggles mode */ literal = !literal; continue; } if (!is_ldh(c)) return amc_ace_invalid_input; if (max_out - next_out < 1) return amc_ace_output_too_big; if (literal) { /* case 2.3: literal letter/digit */ if (uppercase_flags) uppercase_flags[next_out] = is_AtoZ(c); output[next_out++] = c; continue; } /* case 2.4: base-32 sequence */ diff = 0; codelen = 1; for (;;) { q = base32_decode(c); if (q == base32_invalid) return amc_ace_invalid_input; diff = (diff << 4) | (q & 0xF); if ((q & 0x10) == 0) break; if (++codelen > 5) return amc_ace_invalid_input; c = *next_in++; } /* Now codelen is the number of input characters read, */ /* and c is the character holding the uppercase flag. */ if (wide && codelen == 1) { q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; diff = (diff << 5) | q; q = base32_decode(*next_in++); if (q == base32_invalid) return amc_ace_invalid_input; diff = (diff << 5) | q; } offset = offsets[codelen]; if (uppercase_flags) uppercase_flags[next_out] = is_AtoZ(c); output[next_out++] = offset + diff; } /* 3) Re-encode the output and compare to the input: */ input_size = next_in - input; scratch_size = input_size; status = amc_ace_m_encode(next_out, output, uppercase_flags, &scratch_size, scratch_space); if (status != amc_ace_success || scratch_size != input_size || unequal(case_sensitivity, scratch_space, input, input_size) ) return amc_ace_invalid_input; *output_length = next_out; return amc_ace_success;}/******************************************************************//* Wrapper for testing (would normally go in a separate .c file): */#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <string.h>/* For testing, we'll just set some compile-time limits rather than *//* use malloc(), and set a compile-time option rather than using a *//* command-line option. */enum { unicode_max_length = 256, ace_max_size = 256, test_case_sensitivity = case_insensitive};static void usage(char **argv){ fprintf(stderr, "%s -e reads big-endian UTF-32 and writes AMC-ACE-M ASCII.\n" "%s -d reads AMC-ACE-M ASCII and writes big-endian UTF-32.\n" "UTF-32 is extended: bit 31 is used as force-to-uppercase flag.\n" , argv[0], argv[0]); exit(EXIT_FAILURE);}static void fail(const char *msg){ fputs(msg,stderr); exit(EXIT_FAILURE);}static const char too_large[] = "input or output is too large, recompile with larger limits\n";static const char invalid_input[] = "invalid input\n";int main(int argc, char **argv){ enum amc_ace_status status; if (argc != 2) usage(argv); if (argv[1][0] != '-') usage(argv); if (argv[1][2] != '\0') usage(argv); if (argv[1][1] == 'e') { u_code_point input[unicode_max_length]; unsigned char uppercase_flags[unicode_max_length]; unsigned char output[ace_max_size]; unsigned int input_length, output_size; int c0, c1, c2, c3; /* Read the UTF-32 input string: */ input_length = 0; for (;;) { c0 = getchar(); c1 = getchar(); c2 = getchar(); c3 = getchar(); if (c1 == EOF || c2 == EOF || c3 == EOF) { if (c0 != EOF) fail("input not a multiple of 4 bytes\n"); break; } if (input_length == unicode_max_length) fail(too_large); if ((c0 != 0 && c0 != 0x80) || c1 < 0 || c1 > 0x10 || c2 < 0 || c2 > 0xFF || c3 < 0 || c3 > 0xFF ) { fail(invalid_input); } input[input_length] = ((u_code_point) c1 << 16) | ((u_code_point) c2 << 8) | (u_code_point) c3; uppercase_flags[input_length] = (c0 >> 7); ++input_length; } /* Encode, and output the result: */ output_size = ace_max_size; status = amc_ace_m_encode(input_length, input, uppercase_flags, &output_size, output); if (status == amc_ace_invalid_input) fail(invalid_input); if (status == amc_ace_output_too_big) fail(too_large);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -