📄 chartrans.c
字号:
usCharSet = usCp1252; break; } } fail(usCharSet == NULL); if (usChar >= 0x80 && usChar <= 0x9f) { /* Translate implementation defined characters */ usChar = usCharSet[usChar - 0x80]; } else if (iWordVersion < 8 && usChar >= 0xa0 && usChar <= 0xff) { /* Translate old character set to Unixcode */ usChar = usCharSet[usChar - 0x80]; } /* Microsoft Unicode to real Unicode */ if (usChar >= 0xf020 && usChar <= 0xf0ff) { DBG_HEX_C(usPrivateArea[usChar - 0xf020] == 0x003f, usChar); usChar = usPrivateArea[usChar - 0xf020]; } /* Characters with a special meaning in Word */ switch (usChar) { case IGNORE_CHARACTER: case FOOTNOTE_SEPARATOR: case FOOTNOTE_CONTINUATION: case ANNOTATION: case FRAME: case LINE_FEED: case WORD_SOFT_HYPHEN: case UNICODE_HYPHENATION_POINT: return IGNORE_CHARACTER; case PICTURE: case TABLE_SEPARATOR: case TAB: case HARD_RETURN: case PAGE_BREAK: case PAR_END: case COLUMN_FEED: return (ULONG)usChar; case FOOTNOTE_OR_ENDNOTE: NO_DBG_HEX(ulFileOffset); switch (eGetNotetype(ulFileOffset)) { case notetype_is_footnote: return FOOTNOTE_CHAR; case notetype_is_endnote: return ENDNOTE_CHAR; default: return UNKNOWN_NOTE_CHAR; } case WORD_UNBREAKABLE_JOIN: return (ULONG)OUR_UNBREAKABLE_JOIN; default: break; } if (eEncoding != encoding_utf_8) { /* Latin characters in an oriental text */ if (usChar >= 0xff01 && usChar <= 0xff5e) { usChar -= 0xfee0; } } if (eEncoding == encoding_latin_1 && (eConversionType == conversion_ps || eConversionType == conversion_pdf)) { /* Ugly, but it makes the PostScript and PDF look better */ switch (usChar) { case UNICODE_ELLIPSIS: return 140; case UNICODE_TRADEMARK_SIGN: return 141; case UNICODE_PER_MILLE_SIGN: return 142; case UNICODE_BULLET: case UNICODE_BULLET_OPERATOR: case UNICODE_BLACK_CLUB_SUIT: return 143; case UNICODE_LEFT_SINGLE_QMARK: return 144; case UNICODE_RIGHT_SINGLE_QMARK: return 145; case UNICODE_SINGLE_LEFT_ANGLE_QMARK: return 146; case UNICODE_SINGLE_RIGHT_ANGLE_QMARK: return 147; case UNICODE_LEFT_DOUBLE_QMARK: return 148; case UNICODE_RIGHT_DOUBLE_QMARK: return 149; case UNICODE_DOUBLE_LOW_9_QMARK: return 150; case UNICODE_EN_DASH: return 151; case UNICODE_EM_DASH: return 152; case UNICODE_MINUS_SIGN: return 153; case UNICODE_CAPITAL_LIGATURE_OE: return 154; case UNICODE_SMALL_LIGATURE_OE: return 155; case UNICODE_DAGGER: return 156; case UNICODE_DOUBLE_DAGGER: return 157; case UNICODE_SMALL_LIGATURE_FI: return 158; case UNICODE_SMALL_LIGATURE_FL: return 159; default: break; } } if (eConversionType == conversion_pdf) { if (eEncoding == encoding_latin_1) { switch (usChar) { case UNICODE_EURO_SIGN: return 128; default: break; } } else if (eEncoding == encoding_latin_2) { switch (usChar) { case UNICODE_CAPITAL_D_WITH_STROKE: case UNICODE_SMALL_D_WITH_STROKE: return 0x3f; default: break; } } } if (usChar < 0x80) { /* US ASCII */ if (usChar < 0x20 || usChar == 0x7f) { /* Ignore control characters */ DBG_HEX(usChar); DBG_FIXME(); return IGNORE_CHARACTER; } return (ULONG)usChar; } if (eEncoding == encoding_utf_8) { /* No need to convert Unicode characters */ return (ULONG)usChar; } /* Unicode to local representation */ pTmp = pGetCharTableRecord(usChar); if (pTmp != NULL) { DBG_HEX_C(usChar >= 0x7f && usChar <= 0x9f, usChar); return (ULONG)pTmp->ucLocal; } /* Fancy characters to simple US ASCII */ switch (usChar) { case UNICODE_SMALL_F_HOOK: return (ULONG)'f'; case UNICODE_GREEK_CAPITAL_CHI: return (ULONG)'X'; case UNICODE_GREEK_SMALL_UPSILON: return (ULONG)'v'; case UNICODE_MODIFIER_CIRCUMFLEX: case UNICODE_UPWARDS_ARROW: return (ULONG)'^'; case UNICODE_SMALL_TILDE: case UNICODE_TILDE_OPERATOR: return (ULONG)'~'; case UNICODE_EN_QUAD: case UNICODE_EM_QUAD: case UNICODE_EN_SPACE: case UNICODE_EM_SPACE: case UNICODE_THREE_PER_EM_SPACE: case UNICODE_FOUR_PER_EM_SPACE: case UNICODE_SIX_PER_EM_SPACE: case UNICODE_FIGURE_SPACE: case UNICODE_PUNCTUATION_SPACE: case UNICODE_THIN_SPACE: case UNICODE_NARROW_NO_BREAK_SPACE: case UNICODE_LIGHT_SHADE: case UNICODE_MEDIUM_SHADE: case UNICODE_DARK_SHADE: return (ULONG)' '; case UNICODE_LEFT_DOUBLE_QMARK: case UNICODE_RIGHT_DOUBLE_QMARK: case UNICODE_DOUBLE_LOW_9_QMARK: case UNICODE_DOUBLE_HIGH_REV_9_QMARK: case UNICODE_DOUBLE_PRIME: return (ULONG)'"'; case UNICODE_LEFT_SINGLE_QMARK: case UNICODE_RIGHT_SINGLE_QMARK: case UNICODE_SINGLE_LOW_9_QMARK: case UNICODE_SINGLE_HIGH_REV_9_QMARK: case UNICODE_PRIME: return (ULONG)'\''; case UNICODE_HYPHEN: case UNICODE_NON_BREAKING_HYPHEN: case UNICODE_FIGURE_DASH: case UNICODE_EN_DASH: case UNICODE_EM_DASH: case UNICODE_HORIZONTAL_BAR: case UNICODE_MINUS_SIGN: case UNICODE_BD_LIGHT_HORIZONTAL: case UNICODE_BD_DOUBLE_HORIZONTAL: return (ULONG)'-'; case UNICODE_DOUBLE_VERTICAL_LINE: case UNICODE_BD_LIGHT_VERTICAL: case UNICODE_BD_DOUBLE_VERTICAL: return (ULONG)'|'; case UNICODE_DOUBLE_LOW_LINE: return (ULONG)'_'; case UNICODE_DAGGER: return (ULONG)'+'; case UNICODE_DOUBLE_DAGGER: return (ULONG)'#'; case UNICODE_BULLET: case UNICODE_BULLET_OPERATOR: case UNICODE_BLACK_CLUB_SUIT: return (ULONG)ucGetBulletCharacter(eConversionType, eEncoding); case UNICODE_ONE_DOT_LEADER: case UNICODE_TWO_DOT_LEADER: return (ULONG)'.'; case UNICODE_ELLIPSIS:#if defined(__riscos) return (ULONG)OUR_ELLIPSIS;#else if (ulFileOffset == 0) { return (ULONG)OUR_ELLIPSIS; } return UNICODE_ELLIPSIS;#endif /* __riscos */ case UNICODE_DOUBLE_LEFT_ANGLE_QMARK: case UNICODE_TRIANGULAR_BULLET: case UNICODE_SINGLE_LEFT_ANGLE_QMARK: case UNICODE_LEFTWARDS_ARROW: return (ULONG)'<'; case UNICODE_DOUBLE_RIGHT_ANGLE_QMARK: case UNICODE_SINGLE_RIGHT_ANGLE_QMARK: case UNICODE_RIGHTWARDS_ARROW: return (ULONG)'>'; case UNICODE_UNDERTIE: return (ULONG)'-'; case UNICODE_N_ARY_SUMMATION: return (ULONG)'S'; case UNICODE_EURO_SIGN: return (ULONG)'E'; case UNICODE_CIRCLE: case UNICODE_SQUARE: return (ULONG)'O'; case UNICODE_DIAMOND: return (ULONG)OUR_DIAMOND; case UNICODE_NUMERO_SIGN: return (ULONG)'N'; case UNICODE_KELVIN_SIGN: return (ULONG)'K'; case UNICODE_DOWNWARDS_ARROW: return (ULONG)'v'; case UNICODE_FRACTION_SLASH: case UNICODE_DIVISION_SLASH: return (ULONG)'/'; case UNICODE_ASTERISK_OPERATOR: return (ULONG)'*'; case UNICODE_RATIO: return (ULONG)':'; case UNICODE_BD_LIGHT_DOWN_RIGHT: case UNICODE_BD_LIGHT_DOWN_AND_LEFT: case UNICODE_BD_LIGHT_UP_AND_RIGHT: case UNICODE_BD_LIGHT_UP_AND_LEFT: case UNICODE_BD_LIGHT_VERTICAL_AND_RIGHT: case UNICODE_BD_LIGHT_VERTICAL_AND_LEFT: case UNICODE_BD_LIGHT_DOWN_AND_HORIZONTAL: case UNICODE_BD_LIGHT_UP_AND_HORIZONTAL: case UNICODE_BD_LIGHT_VERTICAL_AND_HORIZONTAL: case UNICODE_BD_DOUBLE_DOWN_AND_RIGHT: case UNICODE_BD_DOUBLE_DOWN_AND_LEFT: case UNICODE_BD_DOUBLE_UP_AND_RIGHT: case UNICODE_BD_DOUBLE_UP_AND_LEFT: case UNICODE_BD_DOUBLE_VERTICAL_AND_RIGHT: case UNICODE_BD_DOUBLE_VERTICAL_AND_LEFT: case UNICODE_BD_DOUBLE_DOWN_AND_HORIZONTAL: case UNICODE_BD_DOUBLE_UP_AND_HORIZONTAL: case UNICODE_BD_DOUBLE_VERTICAL_AND_HORIZONTAL: case UNICODE_BLACK_SQUARE: return (ULONG)'+'; case UNICODE_HAIR_SPACE: case UNICODE_ZERO_WIDTH_SPACE: case UNICODE_ZERO_WIDTH_NON_JOINER: case UNICODE_ZERO_WIDTH_JOINER: case UNICODE_LEFT_TO_RIGHT_MARK: case UNICODE_RIGHT_TO_LEFT_MARK: case UNICODE_LEFT_TO_RIGHT_EMBEDDING: case UNICODE_RIGHT_TO_LEFT_EMBEDDING: case UNICODE_POP_DIRECTIONAL_FORMATTING: case UNICODE_LEFT_TO_RIGHT_OVERRIDE: case UNICODE_RIGHT_TO_LEFT_OVERRIDE: case UNICODE_ZERO_WIDTH_NO_BREAK_SPACE: return IGNORE_CHARACTER; default: break; } if (usChar == UNICODE_TRADEMARK_SIGN) { /* * No local representation, it doesn't look like anything in * US-ASCII and a question mark does more harm than good. */ return IGNORE_CHARACTER; } if (usChar >= 0xa0 && usChar <= 0xff) { /* Before Word 97, Word did't use Unicode */ return (ULONG)usChar; } DBG_HEX_C(usChar < 0x3000 || usChar >= 0xd800, ulFileOffset); DBG_HEX_C(usChar < 0x3000 || usChar >= 0xd800, usChar); DBG_MSG_C(usChar >= 0xe000 && usChar < 0xf900, "Private Use Area"); /* Untranslated Unicode character */ return 0x3f;} /* end of ulTranslateCharacters *//* * ulToUpper - convert letter to upper case * * This function converts a letter to upper case. Unlike toupper(3) this * function is independent from the settings of locale. This comes in handy * for people who have to read Word documents in more than one language or * contain more than one language. * * returns the converted letter, or ulChar if the conversion was not possible. */ULONGulToUpper(ULONG ulChar){ if (ulChar < 0x80) { /* US ASCII: use standard function */ return (ULONG)toupper((int)ulChar); } if (ulChar >= 0xe0 && ulChar <= 0xfe && ulChar != 0xf7) { /* * Lower case accented characters * 0xf7 is Division sign; 0xd7 is Multiplication sign * 0xff is y with diaeresis; 0xdf is Sharp s */ return ulChar & ~0x20; }#if defined(__STDC_ISO_10646__) /* * If this is ISO C99 and all locales have wchar_t = ISO 10646 * (e.g., glibc 2.2 or newer), then use standard function */ if (ulChar > 0xff) { return (ULONG)towupper((wint_t)ulChar); }#endif /* __STDC_ISO_10646__ */ return ulChar;} /* end of ulToUpper */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -