📄 draft-ietf-idn-brace-00.txt
字号:
output[next_literal_position++] = 45; non_hyphen_flag = 1; } /* Encode the LDH character literally: */ output[next_literal_position++] = code; } else { /* non-LDH code */ if (non_hyphen_flag) { /* Indicate a change to base-32 mode: */ output[next_literal_position++] = 45; non_hyphen_flag = 0; /* we will empty the buffer */ } /* If the bit queue is empty, flush the LDH buffer: */ if (queue_length == 0) { next_base32_position = next_literal_position; } /* Enqueue the bit string corresponding to the code: */ if (style == half_row_style) { queue = (queue << 7) | (code & 0x7f); queue_length += 7; } else if (style == full_row_style) { queue = (queue << 8) | (code & 0xff); queue_length += 8; } else if (style == no_row_style) { queue = (queue << 16) | code; queue_length += 16; } else /* style == mixed_style */ { if ((code >> 7) == best_half_row) { queue = (queue << 8) | (code & 0x7f); queue_length += 8; } else if ((code >> 8) == (best_half_row >> 1)) { queue = (queue << 9) | (1 << 8) | (code & 0x7f); queue_length += 9; } else { queue = (queue << 18) | (3ul << 16) | code; queue_length += 18; } } /* Output one base-32 character: */ queue_length -= 5; output[next_base32_position] = base32[(queue >> queue_length) & 0x1f]; if (next_base32_position == next_literal_position) { /* LDH buffer is already empty. */ ++next_base32_position; } else { /* Flush the LDH buffer: */ next_base32_position = next_literal_position; } /* next_literal_position is momentarily invalid, */ /* but we know the LDH buffer is empty. */ /* Flush the bit queue: */ while (queue_length >= 5) { queue_length -= 5; output[next_base32_position++] = base32[(queue >> queue_length) & 0x1f]; } /* Fix next_literal_position: */ next_literal_position = next_base32_position + (queue_length > 0); } assert(next_literal_position < brace_encoder_out_max); } /* Flush the bit queue: */ if (queue_length > 0) { assert(queue_length < 5); output[next_base32_position] = base32[(queue << (5 - queue_length)) & 0x1f]; } /* Flushing the LDH buffer at this point is a no-op. */ /* Output "-8Q9" and the null terminator: */ assert(next_literal_position + 4 < brace_encoder_out_max); output[next_literal_position++] = 45; output[next_literal_position++] = 56; output[next_literal_position++] = 81; output[next_literal_position++] = 57; output[next_literal_position] = 0;}/* base32_decode() converts a base-32 character to a value from *//* 0 to 31. If the character is valid, its value is written to *//* *quintet and 1 is returned. Otherwise, *value is not changed *//* and 0 is returned. */static int base32_decode(char c, unsigned int *quintet){ if (c < 50) return 0; if (c <= 57) { *quintet = c - 50; return 1; } if (c < 65) return 0; if (c >= 97) c -= 32; if (c <= 75) { *quintet = c - 57; return 1; } if (c == 76) return 0; if (c <= 78) { *quintet = c - 58; return 1; } if (c == 79) return 0; if (c <= 90) { *quintet = c - 59; return 1; } return 0;}int brace_decode( char *input, unsigned int *output_length, unsigned short output[brace_decoder_out_max] ){ unsigned long queue; unsigned int i, input_length, queue_length, literal_mode_flag, quintet, n, next_code_position; enum encoding_style style; unsigned short common_prefix; char c; /* Check whether input ends with "-8Q9": */ for (i = 0; input[i]; ++i) assert(i < brace_decoder_in_max); if (!(input[i-1] == 57 && input[i-3] == 56 && input[i-4] == 45 && (input[i-2] == 81 || input[i-2] == 113))) { /* Copy input to output and we're done: */ for (i = 0; input[i]; ++i) output[i] = input[i]; assert(i <= brace_decoder_out_max); *output_length = i; return 1; } /* Initialize using the first base-32 character: */ input_length = i; i = 0; if (!base32_decode(input[i], &quintet)) return 0; queue = quintet; queue_length = 3; literal_mode_flag = 0; style = quintet >> 3; /* Determine common_prefix: */ if (style == no_row_style) n = 0; else if (style == full_row_style) n = 8; else n = 9; while (queue_length < n) { if (!base32_decode(input[++i], &quintet)) return 0; queue = (queue << 5) | quintet; queue_length += 5; } common_prefix = (queue >> (queue_length - n)) << (16 - n); queue_length -= n; /* Main decoding loop: */ next_code_position = 0; while (++i < input_length - 4) { c = input[i]; if (c == 45) { if (input[i+1] == 45) { ++i; output[next_code_position++] = 45; /* "--" means "-" */ } else literal_mode_flag ^= 1; /* "-" toggles literal mode */ } else if (literal_mode_flag) { /* literal non-hyphen */ output[next_code_position++] = c; } else { /* base-32 character */ /* Enqueue the corresponding quintet: */ if (!base32_decode(c, &quintet)) return 0; queue = (queue << 5) | quintet; queue_length += 5; /* If the queue contains enough bits for a UTF-16 code, */ /* dequeue them, decode them, and output the code: */ if (style == no_row_style && queue_length >= 16) { output[next_code_position++] = (queue >> (queue_length - 16)) & 0xffff; queue_length -= 16; } else if (style == full_row_style && queue_length >= 8) { output[next_code_position++] = common_prefix | ((queue >> (queue_length - 8)) & 0xff); queue_length -= 8; } else if (style == half_row_style && queue_length >= 7) { output[next_code_position++] = common_prefix | ((queue >> (queue_length - 7)) & 0x7f); queue_length -= 7; } else if (style == mixed_style) { n = (queue >> (queue_length - 2)) & 3; /* top 2 bits */ if (n <= 1 && queue_length >= 8) { output[next_code_position++] = common_prefix | ((queue >> (queue_length - 8)) & 0x7f); queue_length -= 8; } else if (n == 2 && queue_length >= 9) { output[next_code_position++] = (common_prefix ^ 0x80) | ((queue >> (queue_length - 9)) & 0x7f); queue_length -= 9; } else if (n == 3 && queue_length >= 18) { output[next_code_position++] = (queue >> (queue_length - 18)) & 0xffff; queue_length -= 18; } } } } assert(next_code_position <= brace_decoder_out_max); /* Check that the bit queue contains only zeros, at most four: */ if (queue_length > 4) return 0; if ((queue & ((1 << queue_length) - 1)) != 0) return 0; /* Set the output length and we're done: */ *output_length = next_code_position; return 1;}/* Wrapper for testing (would normally go in a separate .c file): */#include <stdio.h>#include <stdlib.h>#include <string.h>static void usage(char **argv){ fprintf(stderr, "%s -e reads big-endian UTF-16 and writes BRACE-format ASCII.\n" "%s -d reads BRACE-format ASCII and writes big-endian UTF-16.\n" , argv[0], argv[0]); exit(EXIT_FAILURE);}static void fail(const char *msg){ fputs(msg,stderr); exit(EXIT_FAILURE);}static const char input_too_large[] = "input is too large\n";int main(int argc, char **argv){ unsigned int input_length; if (argc != 2) usage(argv); if (argv[1][0] != '-') usage(argv); if (argv[1][2] != '\0') usage(argv); if (argv[1][1] == 'e') { unsigned short input[brace_encoder_in_max]; char output[brace_encoder_out_max]; int hi, lo; /* Read the UTF-16 input string: */ input_length = 0; for (;;) { hi = getchar(); lo = getchar(); if (lo == EOF) { if (hi != EOF) fail("input contained an odd number of bytes\n"); break; } if (input_length == brace_encoder_in_max) fail(input_too_large); if (hi > 0xff || lo > 0xff) { fail("input bytes do not fit in 8 bits\n"); } input[input_length++] = (unsigned short) hi << 8 | (unsigned short) lo; } /* Encode, and output the result: */ brace_encode(input_length, input, output); if (strlen(output) > brace_decoder_in_max) fail(input_too_large); fputs(output,stdout); return EXIT_SUCCESS; } if (argv[1][1] == 'd') { char input[brace_decoder_in_max]; unsigned short output[brace_decoder_out_max]; unsigned int output_length, i; size_t n; /* Read the BRACE-encoded ASCII input string: */ n = fread(input, 1, brace_decoder_in_max, stdin); if (n == brace_decoder_in_max) fail(input_too_large); input[n] = 0; /* Decode, and output the result: */ if (!brace_decode(input, &output_length, output)) { fail("input was malformed\n"); } for (i = 0; i < output_length; ++i) { putchar(output[i] >> 8); putchar(output[i] & 0xff); } return EXIT_SUCCESS; } usage(argv); return EXIT_SUCCESS; /* not reached, but quiets compiler warning */} INTERNET-DRAFT expires 2001-Mar-14
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -