📄 clasifyc.cpp
字号:
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//
// Use of this source code is subject to the terms of the Microsoft shared
// source or premium shared source license agreement under which you licensed
// this source code. If you did not accept the terms of the license agreement,
// you are not authorized to use this source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the SOURCE.RTF on your install media or the root of your tools installation.
// THE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES.
//
/*
* @doc INTERNAL
*
* @module clasifyc.cpp -- Kinsoku classify characters |
*
* Used in word breaking procs, particularly important
* for properly wrapping a line.
*
* Authors: <nl>
* Jon Matousek
*
*/
#include "_common.h"
#include "_clasfyc.h"
#ifdef MACPORTREMOVE // jon can't we remove this - it's in win2mac.h?
#include <WINNLS.H>
#endif
#include "_array.h"
ASSERTDATA
// Data for Kinsoku character classifications.
// NOTE: All values are for UNICODE characters.
// "dumb" quotes and other characters with no left/right orientation.
// This is a hack-around the Kinsoku rules, these are treated
// like an opening paren, when leading and kind of like a closing
// paren when follow--but will only break on white space in former case.
const WCHAR set0[] = {
0x0022, // QUOTATION MARK
0x0027, // APOSTROPHE
0x2019, // RIGHT SINGLE QUOTATION MARK
0x301F, // LOW DOUBLE PRIME QUOTATION MARK
0xFF02, // FULLWIDTH QUOTATION MARK
0xFF07, // FULLWIDTH APOSTROPHE
0
};
// Opening-parenthesis character
const WCHAR set1[] = {
0x0028, // LEFT PARENTHESIS
0x005B, // LEFT SQUARE BRACKET
0x007B, // LEFT CURLY BRACKET
0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x2018, // LEFT SINGLE QUOTATION MARK
0x201C, // LEFT DOUBLE QUOTATION MARK
0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x2045, // LEFT SQUARE BRACKET WITH QUILL
0x207D, // SUPERSCRIPT LEFT PARENTHESIS
0x208D, // SUBSCRIPT LEFT PARENTHESIS
0x3008, // LEFT ANGLE BRACKET
0x300A, // LEFT DOUBLE ANGLE BRACKET
0x300C, // LEFT CORNER BRACKET
0x300E, // LEFT WHITE CORNER BRACKET
0x3010, // LEFT BLACK LENTICULAR BRACKET
0x3014, // LEFT TORTOISE SHELL BRACKET
0x3016, // LEFT WHITE LENTICULAR BRACKET
0x3018, // LEFT WHITE TORTOISE SHELL BRACKET
0x301A, // LEFT WHITE SQUARE BRACKET
0x301D, // REVERSED DOUBLE PRIME QUOTATION MARK
0xFD3E, // ORNATE LEFT PARENTHESIS
0xFE35, // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
0xFE37, // PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
0xFE39, // PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
0xFE3B, // PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
0xFE3D, // PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
0xFE3F, // PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
0xFE41, // PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
0xFE43, // PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
0xFE59, // SMALL LEFT PARENTHESIS
0xFE5B, // SMALL LEFT CURLY BRACKET
0xFE5D, // SMALL LEFT TORTOISE SHELL BRACKET
0xFF08, // FULLWIDTH LEFT PARENTHESIS
0xFF3B, // FULLWIDTH LEFT SQUARE BRACKET
0xFF5B, // FULLWIDTH LEFT CURLY BRACKET
0xFF62, // HALFWIDTH LEFT CORNER BRACKET
0xFFE9, // HALFWIDTH LEFTWARDS ARROW
0
};
// Closing-parenthesis character
const WCHAR set2[] = {
// 0x002C, // COMMA moved to set 6 to conjoin numerals.
0x002D, // HYPHEN
0x00AD, // OPTIONAL HYPHEN
0x055D, // ARMENIAN COMMA
0x060C, // ARABIC COMMA
0x3001, // IDEOGRAPHIC COMMA
0xFE50, // SMALL COMMA
0xFE51, // SMALL IDEOGRAPHIC COMMA
0xFF0C, // FULLWIDTH COMMA
0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA
0x0029, // RIGHT PARENTHESIS
0x005D, // RIGHT SQUARE BRACKET
0x007D, // RIGHT CURLY BRACKET
0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
//0x2019, // RIGHT SINGLE QUOTATION MARK moved to set 0
0x201D, // RIGHT DOUBLE QUOTATION MARK
0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x2046, // RIGHT SQUARE BRACKET WITH QUILL
0x207E, // SUPERSCRIPT RIGHT PARENTHESIS
0x208E, // SUBSCRIPT RIGHT PARENTHESIS
0x3009, // RIGHT ANGLE BRACKET
0x300B, // RIGHT DOUBLE ANGLE BRACKET
0x300D, // RIGHT CORNER BRACKET
0x300F, // RIGHT WHITE CORNER BRACKET
0x3011, // RIGHT BLACK LENTICULAR BRACKET
0x3015, // RIGHT TORTOISE SHELL BRACKET
0x3017, // RIGHT WHITE LENTICULAR BRACKET
0x3019, // RIGHT WHITE TORTOISE SHELL BRACKET
0x301B, // RIGHT WHITE SQUARE BRACKET
0x301E, // DOUBLE PRIME QUOTATION MARK
0xFD3F, // ORNATE RIGHT PARENTHESIS
0xFE36, // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
0xFE38, // PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
0xFE3A, // PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
0xFE3C, // PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
0xFE3E, // PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
0xFE40, // PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
0xFE42, // PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
0xFE44, // PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
0xFE5A, // SMALL RIGHT PARENTHESIS
0xFE5C, // SMALL RIGHT CURLY BRACKET
0xFE5E, // SMALL RIGHT TORTOISE SHELL BRACKET
0xFF09, // FULLWIDTH RIGHT PARENTHESIS
0xFF3D, // FULLWIDTH RIGHT SQUARE BRACKET
0xFF5D, // FULLWIDTH RIGHT CURLY BRACKET
0xFF63, // HALFWIDTH RIGHT CORNER BRACKET
0xFFEB, // HALFWIDTH RIGHTWARDS ARROW
0
};
// 'Non-breaking' em-character at line-starting point
const WCHAR set3[] = {
0x3005, // IDEOGRAPHIC ITERATION MARK
0x309D, // HIRAGANA ITERATION MARK
0x309E, // HIRAGANA VOICED ITERATION MARK
0x30FC, // KATAKANA-HIRAGANA PROLONGED SOUND MARK
0x30FD, // KATAKANA ITERATION MARK
0x30FE, // KATAKANA VOICED ITERATION MARK
0x3041, // HIRAGANA LETTER SMALL A
0x3043, // HIRAGANA LETTER SMALL I
0x3045, // HIRAGANA LETTER SMALL U
0x3047, // HIRAGANA LETTER SMALL E
0x3049, // HIRAGANA LETTER SMALL O
0x3063, // HIRAGANA LETTER SMALL TU
0x3083, // HIRAGANA LETTER SMALL YA
0x3085, // HIRAGANA LETTER SMALL YU
0x3087, // HIRAGANA LETTER SMALL YO
0x308E, // HIRAGANA LETTER SMALL WA
0x309B, // KATAKANA-HIRAGANA VOICED SOUND MARK
0x309C, // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
0x30A1, // KATAKANA LETTER SMALL A
0x30A3, // KATAKANA LETTER SMALL I
0x30A5, // KATAKANA LETTER SMALL U
0x30A7, // KATAKANA LETTER SMALL E
0x30A9, // KATAKANA LETTER SMALL O
0x30C3, // KATAKANA LETTER SMALL TU
0x30E3, // KATAKANA LETTER SMALL YA
0x30E5, // KATAKANA LETTER SMALL YU
0x30E7, // KATAKANA LETTER SMALL YO
0x30EE, // KATAKANA LETTER SMALL WA
0x30F5, // KATAKANA LETTER SMALL KA
0x30F6, // KATAKANA LETTER SMALL KE
0xFF67, // HALFWIDTH KATAKANA LETTER SMALL A
0xFF68, // HALFWIDTH KATAKANA LETTER SMALL I
0xFF69, // HALFWIDTH KATAKANA LETTER SMALL U
0xFF6A, // HALFWIDTH KATAKANA LETTER SMALL E
0xFF6B, // HALFWIDTH KATAKANA LETTER SMALL O
0xFF6C, // HALFWIDTH KATAKANA LETTER SMALL YA
0xFF6D, // HALFWIDTH KATAKANA LETTER SMALL YU
0xFF6E, // HALFWIDTH KATAKANA LETTER SMALL YO
0xFF6F, // HALFWIDTH KATAKANA LETTER SMALL TU
0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK
0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
0
};
// Expression mark
const WCHAR set4[] = {
0x0021, // EXCLAMATION MARK
0x003F, // QUESTION MARK
0x00A1, // INVERTED EXCLAMATION MARK
0x00BF, // INVERTED QUESTION MARK
0x01C3, // LATIN LETTER RETROFLEX CLICK
0x037E, // GREEK QUESTION MARK
0x055C, // ARMENIAN EXCLAMATION MARK
0x055E, // ARMENIAN QUESTION MARK
0x055F, // ARMENIAN ABBREVIATION MARK
0x061F, // ARABIC QUESTION MARK
0x203C, // DOUBLE EXCLAMATION MARK
0x203D, // INTERROBANG
0x2762, // HEAVY EXCLAMATION MARK ORNAMENT
0x2763, // HEAVY HEART EXCLAMATION MARK ORNAMENT
0xFE56, // SMALL QUESTION MARK
0xFE57, // SMALL EXCLAMATION MARK
0xFF01, // FULLWIDTH EXCLAMATION MARK
0xFF1F, // FULLWIDTH QUESTION MARK
0
};
// Centered punctuation mark
const WCHAR set5[] = {
// 0x003A, // COLON moved to set 6 to conjoin numerals.
// 0x003B, // SEMICOLON moved to set 6 to conjoin numerals
0x00B7, // MIDDLE DOT
0x30FB, // KATAKANA MIDDLE DOT
0xFF65, // HALFWIDTH KATAKANA MIDDLE DOT
0x061B, // ARABIC SEMICOLON
0xFE54, // SMALL SEMICOLON
0xFE55, // SMALL COLON
0xFF1A, // FULLWIDTH COLON
0xFF1B, // FULLWIDTH SEMICOLON
0
};
// Punctuation mark // diverged from the Kinsoku tables to enhance
const WCHAR set6[] = { // how colon, comma, and full stop are treated around
0x002C, // COMMA // numerals and set 15 (roman text).
0x002f, // SLASH // But don't break up URLs (see IsURLDelimiter())!
0x003A, // COLON
0x003B, // SEMICOLON
0x002E, // FULL STOP (PERIOD)
0x0589, // ARMENIAN FULL STOP
0x06D4, // ARABIC FULL STOP
0x3002, // IDEOGRAPHIC FULL STOP
0xFE52, // SMALL FULL STOP
0xFF0E, // FULLWIDTH FULL STOP
0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP
0
};
// Inseparable character
const WCHAR set7[] = {
0 // FUTURE (alexgo): maybe handle these.
};
// Pre-numeral abbreviation
const WCHAR set8[] = {
0x0024, // DOLLAR SIGN
0x00A3, // POUND SIGN
0x00A4, // CURRENCY SIGN
0x00A5, // YEN SIGN
0x005C, // REVERSE SOLIDUS (looks like Yen in FE fonts.)
0x0E3F, // THAI CURRENCY SYMBOL BAHT
0x20A0, // EURO-CURRENCY SIGN
0x20A1, // COLON SIGN
0x20A2, // CRUZEIRO SIGN
0x20A3, // FRENCH FRANC SIGN
0x20A4, // LIRA SIGN
0x20A5, // MILL SIGN
0x20A6, // NAIRA SIGN
0x20A7, // PESETA SIGN
0x20A8, // RUPEE SIGN
0x20A9, // WON SIGN
0x20AA, // NEW SHEQEL SIGN
0xFF04, // FULLWIDTH DOLLAR SIGN
0xFFE5, // FULLWIDTH YEN SIGN
0xFFE6, // FULLWIDTH WON SIGN
0xFFE1, // FULLWIDTH POUND SIGN
0
};
// Post-numeral abbreviation
const WCHAR set9[] = {
0x00A2, // CENT SIGN
0x00B0, // DEGREE SIGN
0x2103, // DEGREE CELSIUS
0x2109, // DEGREE FAHRENHEIT
0x212A, // KELVIN SIGN
0x0025, // PERCENT SIGN
0x066A, // ARABIC PERCENT SIGN
0xFE6A, // SMALL PERCENT SIGN
0xFF05, // FULLWIDTH PERCENT SIGN
0x2030, // PER MILLE SIGN
0x2031, // PER TEN THOUSAND SIGN
0x2032, // PRIME
0x2033, // DOUBLE PRIME
0x2034, // TRIPLE PRIME
0x2035, // REVERSED PRIME
0x2036, // REVERSED DOUBLE PRIME
0x2037, // REVERSED TRIPLE PRIME
0xFF05, // FULLWIDTH PERCENT SIGN
0xFFE0, // FULLWIDTH CENT SIGN
0
};
// Japanese space (blank) character
const WCHAR set10[] = {
0x3000, // IDEOGRAPHIC SPACE
0
};
// Japanese characters other than above
const WCHAR set11[] = {
0 //we use GetStringTypeEx
};
// Characters included in numeral-sequence
const WCHAR set12[] = {
0x0030, // DIGIT ZERO
0x0031, // DIGIT ONE
0x0032, // DIGIT TWO
0x0033, // DIGIT THREE
0x0034, // DIGIT FOUR
0x0035, // DIGIT FIVE
0x0036, // DIGIT SIX
0x0037, // DIGIT SEVEN
0x0038, // DIGIT EIGHT
0x0039, // DIGIT NINE
0x0660, // ARABIC-INDIC DIGIT ZERO
0x0661, // ARABIC-INDIC DIGIT ONE
0x0662, // ARABIC-INDIC DIGIT TWO
0x0663, // ARABIC-INDIC DIGIT THREE
0x0664, // ARABIC-INDIC DIGIT FOUR
0x0665, // ARABIC-INDIC DIGIT FIVE
0x0666, // ARABIC-INDIC DIGIT SIX
0x0667, // ARABIC-INDIC DIGIT SEVEN
0x0668, // ARABIC-INDIC DIGIT EIGHT
0x0669, // ARABIC-INDIC DIGIT NINE
0x06F0, // EXTENDED ARABIC-INDIC DIGIT ZERO
0x06F1, // EXTENDED ARABIC-INDIC DIGIT ONE
0x06F2, // EXTENDED ARABIC-INDIC DIGIT TWO
0x06F3, // EXTENDED ARABIC-INDIC DIGIT THREE
0x06F4, // EXTENDED ARABIC-INDIC DIGIT FOUR
0x06F5, // EXTENDED ARABIC-INDIC DIGIT FIVE
0x06F6, // EXTENDED ARABIC-INDIC DIGIT SIX
0x06F7, // EXTENDED ARABIC-INDIC DIGIT SEVEN
0x06F8, // EXTENDED ARABIC-INDIC DIGIT EIGHT
0x06F9, // EXTENDED ARABIC-INDIC DIGIT NINE
0x0966, // DEVANAGARI DIGIT ZERO
0x0967, // DEVANAGARI DIGIT ONE
0x0968, // DEVANAGARI DIGIT TWO
0x0969, // DEVANAGARI DIGIT THREE
0x096A, // DEVANAGARI DIGIT FOUR
0x096B, // DEVANAGARI DIGIT FIVE
0x096C, // DEVANAGARI DIGIT SIX
0x096D, // DEVANAGARI DIGIT SEVEN
0x096E, // DEVANAGARI DIGIT EIGHT
0x096F, // DEVANAGARI DIGIT NINE
0x09E6, // BENGALI DIGIT ZERO
0x09E7, // BENGALI DIGIT ONE
0x09E8, // BENGALI DIGIT TWO
0x09E9, // BENGALI DIGIT THREE
0x09EA, // BENGALI DIGIT FOUR
0x09EB, // BENGALI DIGIT FIVE
0x09EC, // BENGALI DIGIT SIX
0x09ED, // BENGALI DIGIT SEVEN
0x09EE, // BENGALI DIGIT EIGHT
0x09EF, // BENGALI DIGIT NINE
0x0A66, // GURMUKHI DIGIT ZERO
0x0A67, // GURMUKHI DIGIT ONE
0x0A68, // GURMUKHI DIGIT TWO
0x0A69, // GURMUKHI DIGIT THREE
0x0A6A, // GURMUKHI DIGIT FOUR
0x0A6B, // GURMUKHI DIGIT FIVE
0x0A6C, // GURMUKHI DIGIT SIX
0x0A6D, // GURMUKHI DIGIT SEVEN
0x0A6E, // GURMUKHI DIGIT EIGHT
0x0A6F, // GURMUKHI DIGIT NINE
0x0AE6, // GUJARATI DIGIT ZERO
0x0AE7, // GUJARATI DIGIT ONE
0x0AE8, // GUJARATI DIGIT TWO
0x0AE9, // GUJARATI DIGIT THREE
0x0AEA, // GUJARATI DIGIT FOUR
0x0AEB, // GUJARATI DIGIT FIVE
0x0AEC, // GUJARATI DIGIT SIX
0x0AED, // GUJARATI DIGIT SEVEN
0x0AEE, // GUJARATI DIGIT EIGHT
0x0AEF, // GUJARATI DIGIT NINE
0x0B66, // ORIYA DIGIT ZERO
0x0B67, // ORIYA DIGIT ONE
0x0B68, // ORIYA DIGIT TWO
0x0B69, // ORIYA DIGIT THREE
0x0B6A, // ORIYA DIGIT FOUR
0x0B6B, // ORIYA DIGIT FIVE
0x0B6C, // ORIYA DIGIT SIX
0x0B6D, // ORIYA DIGIT SEVEN
0x0B6E, // ORIYA DIGIT EIGHT
0x0B6F, // ORIYA DIGIT NINE
0x0BE7, // TAMIL DIGIT ONE
0x0BE8, // TAMIL DIGIT TWO
0x0BE9, // TAMIL DIGIT THREE
0x0BEA, // TAMIL DIGIT FOUR
0x0BEB, // TAMIL DIGIT FIVE
0x0BEC, // TAMIL DIGIT SIX
0x0BED, // TAMIL DIGIT SEVEN
0x0BEE, // TAMIL DIGIT EIGHT
0x0BEF, // TAMIL DIGIT NINE
0x0BF0, // TAMIL NUMBER TEN
0x0BF1, // TAMIL NUMBER ONE HUNDRED
0x0BF2, // TAMIL NUMBER ONE THOUSAND
0x0C66, // TELUGU DIGIT ZERO
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -