📄 w32sys.cpp
字号:
/*
* GetCharSet(nCP)
*
* @func
* Get character set for code page <p nCP>
*
* @rdesc
* BYTE character set for code page <p nCP>
*/
BYTE CW32System::GetCharSet(
INT nCP) //@parm Code page
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "GetCharSet");
const CPGCHAR * pcpgchar = rgCpgChar;
for (int i = 0; i < cCpgChar ; i++)
{
if (pcpgchar->nCodePage == nCP)
break;
++pcpgchar;
}
return i < cCpgChar ? pcpgchar->bCharSet : 0;
}
/*
* GetCodePage(bCharSet)
*
* @func
* Get code page for character set <p bCharSet>
*
* @rdesc
* Code page for character set <p bCharSet>
*/
INT CW32System::GetCodePage(
BYTE bCharSet) //@parm CharSet
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "GetCodePage");
const CPGCHAR * pcpgchar = rgCpgChar;
for (int i = 0; i < cCpgChar ; i++)
{
if (pcpgchar->bCharSet == bCharSet)
break;
++pcpgchar;
}
return i < cCpgChar ? pcpgchar->nCodePage : 0;
}
/*
* IsCharSetValid(bCharSet)
*
* @func
* Return TRUE iff <p bCharSet> is a valid character set index
*
* @rdesc
* TRUE iff <p bCharSet> is a valid character set index
*/
BOOL CW32System::IsCharSetValid(
BYTE bCharSet) //@parm CharSet
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "IsCharSetValid");
LONG i = cCpgChar;
const CPGCHAR * pcpgchar = rgCpgChar;
while(i--)
{
if (pcpgchar->bCharSet == bCharSet)
return TRUE;
++pcpgchar;
}
return FALSE;
}
/*
* MBTWC (CodePage, dwFlags, pstrMB, cchMB, pstrWC, cchWC)
*
* @mfunc
* Convert MultiByte (MB) string pstrMB of length cchMB to WideChar (WC)
* string pstrWC of length cchWC according to the flags dwFlags and code
* page CodePage. If CodePage = SYMBOL_CODEPAGE
* (usually for SYMBOL_CHARSET strings),
* convert each byte in pstrMB to a wide char with a zero high byte
* and a low byte equal to the MultiByte string byte, i.e., no
* translation other than a zero extend into the high byte. Else call
* the Win32 MultiByteToWideChar() function.
*
* @rdesc
* Count of characters converted
*/
int CW32System::MBTWC(
INT CodePage, //@parm Code page to use for conversion
DWORD dwFlags, //@parm Flags to guide conversion
LPCSTR pstrMB, //@parm MultiByte string to convert to WideChar
int cchMB, //@parm Count of chars (bytes) in pstrMB or -1
LPWSTR pstrWC, //@parm WideChar string to receive converted chars
int cchWC, //@parm Max count for pstrWC or 0 to get cch needed
LPBOOL pfNoCodePage) //@parm Out parm to receive whether code page is on system
{
BOOL fNoCodePage = FALSE; // Default code page is on OS
int cch = -1;
if(CodePage == CP_UTF8)
{
DWORD ch,ch1;
for(cch = 0; cchMB--; )
{
ch = ch1 = *(BYTE *)pstrMB++;
Assert(ch < 256);
if(ch > 127 && cchMB && IN_RANGE(0x80, *(BYTE *)pstrMB, 0xBF))
{
// Need at least 2 bytes of form 110bbbbb 10bbbbbb
ch1 = ((ch1 & 0x1F) << 6) + (*pstrMB++ & 0x3F);
cchMB--;
if(ch > 0xE0 && cchMB && IN_RANGE(0x80, *(BYTE *)pstrMB, 0xBF))
{
// Need at least 3 bytes of form 1110bbbb 10bbbbbb 10bbbbbb
ch1 = (ch1 << 6) + (*pstrMB++ & 0x3F);
cchMB--;
if (ch > 0xF0 && cchMB && IN_RANGE(0x80, *(BYTE *)pstrMB, 0xBF))
{
// Handle 4-byte form for 16 UTF-16 planes above the
// BMP) expect: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
ch1 = ((ch1 & 0x7FFF) << 6) + (*(BYTE *)pstrMB++ & 0x3F)
- 0x10000; // Subtract offset for BMP
if(ch1 <= 0xFFFFF) // Fits in 20 bits
{
cch++; // Two 16-bit surrogate codes
if(cch < cchWC)
*pstrWC++ = UTF16_LEAD + (ch1 >> 10);
ch1 = (ch1 & 0x3FF) + UTF16_TRAIL;
cchMB--;
}
else ch1 = '?';
}
}
}
cch++;
if(cch < cchWC)
*pstrWC++ = ch1;
if(!ch)
break;
}
}
else if(CodePage != SYMBOL_CODEPAGE) // Not SYMBOL_CHARSET
{
fNoCodePage = TRUE; // Default codepage isn't on OS
if(CodePage >= 0) // Might be..
{
cch = MultiByteToWideChar(
CodePage, dwFlags, pstrMB, cchMB, pstrWC, cchWC);
if(cch > 0)
fNoCodePage = FALSE; // Codepage is on OS
}
}
if(pfNoCodePage)
*pfNoCodePage = fNoCodePage;
if(cch <= 0)
{
// SYMBOL_CHARSET or conversion failed: bytes -> words with
// high bytes of 0. Return count for full conversion
if(cchWC <= 0)
return cchMB >= 0 ? cchMB : (strlen(pstrMB) + 1);
int cchMBMax = cchMB;
if(cchMB < 0) // If negative, use NULL termination
cchMBMax = tomForward; // of pstrMB
cchMBMax = min(cchMBMax, cchWC);
for(cch = 0; (cchMB < 0 ? *pstrMB : 1) && cch <= cchMBMax; cch++)
{
*pstrWC++ = (unsigned char)*pstrMB++;
}
// NULL-terminate the WC string if the MB string was NULL-terminated,
// and if there is room in the WC buffer.
if(cchMB < 0 && cch < cchWC)
{
*pstrWC = 0;
cch++;
}
}
return cch;
}
/*
* WCTMB (CodePage, dwFlags, pstrWC, cchWC, pstrMB, cchMB, pchDefault,
* pfUsedDef)
*
* @mfunc
* Convert WideChar (WC) string pstrWC of length cchWC to MultiByte (MB)
* string pstrMB of length cchMB according to the flags dwFlags and code
* page CodePage. If CodePage = SYMBOL_CODEPAGE
* (usually for SYMBOL_CHARSET strings),
* convert each character in pstrWC to a byte, discarding the high byte.
* Else call the Win32 WideCharToMultiByte() function.
*
* @rdesc
* Count of bytes stored in target string pstrMB
*/
int CW32System::WCTMB(
INT CodePage, //@parm Code page to use for conversion
DWORD dwFlags, //@parm Flags to guide conversion
LPCWSTR pstrWC, //@parm WideChar string to convert
int cchWC, //@parm Count for pstrWC or -1 to use NULL termination
LPSTR pstrMB, //@parm MultiByte string to receive converted chars
int cchMB, //@parm Count of chars (bytes) in pstrMB or 0
LPCSTR pchDefault, //@parm Default char to use if conversion fails
LPBOOL pfUsedDef, //@parm Out parm to receive whether default char used
LPBOOL pfNoCodePage) //@parm Out parm to receive whether code page is on system
{
int cch = -1; // No chars converted yet
BOOL fNoCodePage = FALSE; // Default code page is on OS
if(pfUsedDef) // Default that all chars can be
*pfUsedDef = FALSE; // converted
#ifndef WC_NO_BEST_FIT_CHARS
#define WC_NO_BEST_FIT_CHARS 0x400
#endif
if (_dwPlatformId == VER_PLATFORM_WIN32_NT &&
_dwMajorVersion > 4 && !dwFlags)
{
dwFlags = WC_NO_BEST_FIT_CHARS;
}
if(CodePage == CP_UTF8) // Convert to UTF8 since OS
{ // doesn't (pre NT 5.0)
WCHAR ch;
cch = 0; // No converted bytes yet
while(cchWC--)
{
ch = *pstrWC++; // Get Unicode char
if(ch <= 127) // It's ASCII
{
cch++;
if(cch < cchMB)
*pstrMB++ = ch; // One more converted byte
if(!ch) // Quit on NULL termination
break;
continue;
}
if(ch <= 0x7FF) // Need 2 bytes of form:
{ // 110bbbbb 10bbbbbb
cch += 2;
if(cch < cchMB) // Store lead byte
*pstrMB++ = 0xC0 + (ch >> 6);
}
else // Need 3 bytes of form:
{ // 1110bbbb 10bbbbbb
cch += 3; // 10bbbbbb
if(cch < cchMB) // Store lead byte followed by
{ // first trail byte
*pstrMB++ = 0xE0 + (ch >> 12);
*pstrMB++ = 0x80 + (ch >> 6 & 0x3F);
}
}
if(cch < cchMB) // Store final UTF-8 byte
*pstrMB++ = 0x80 + (ch & 0x3F);
}
}
else if(CodePage != SYMBOL_CODEPAGE)
{
fNoCodePage = TRUE; // Default codepage not on OS
if(CodePage >= 0) // Might be...
{
cch = WideCharToMultiByte(CodePage, dwFlags,
pstrWC, cchWC, pstrMB, cchMB, pchDefault, pfUsedDef);
if(cch > 0)
fNoCodePage = FALSE; // Found codepage on system
}
}
if(pfNoCodePage)
*pfNoCodePage = fNoCodePage;
// SYMBOL_CHARSET, fIsDBCS or conversion failed: low bytes of words ->
// bytes
if(cch <= 0)
{
// Return multibyte count for full conversion. cchWC is correct for
// single-byte charsets like the 125x's
if(cchMB <= 0)
{
return cchWC >= 0 ? cchWC : wcslen(pstrWC);
}
char chDefault = 0;
BOOL fUseDefaultChar = (pfUsedDef || pchDefault) && CodePage != SYMBOL_CODEPAGE;
if(fUseDefaultChar)
{
// determine a default char for our home-grown conversion
if(pchDefault)
{
chDefault = *pchDefault;
}
else
{
static char chSysDef = 0;
static BOOL fGotSysDef = FALSE;
// 0x2022 is a math symbol with no conversion to ANSI
const WCHAR szCantConvert[] = { 0x2022 };
BOOL fUsedDef;
if(!fGotSysDef)
{
fGotSysDef = TRUE;
if(!(WideCharToMultiByte
(CP_ACP, 0, szCantConvert, 1, &chSysDef, 1, NULL,
&fUsedDef) == 1 && fUsedDef))
{
AssertSz(0, "WCTMB(): Unable to determine what the "
"system uses as its default replacement "
"character.");
chSysDef = '?';
}
}
chDefault = chSysDef;
}
}
int cchWCMax = cchWC;
// If negative, use NULL termination of pstrMB
if(cchWC < 0)
{
cchWCMax = tomForward;
}
cchWCMax = min(cchWCMax, cchMB);
for(cch = 0; (cchWC < 0 ? *pstrWC : 1) && cch < cchWCMax; cch++)
{
// TODO(BradO): Should this be 0x7F in some conversion cases?
if(fUseDefaultChar && *pstrWC > 0xFF)
{
if(pfUsedDef)
{
*pfUsedDef = TRUE;
}
*pstrMB = chDefault;
}
else
{
*pstrMB = (BYTE)*pstrWC;
}
pstrMB++;
pstrWC++;
}
if(cchWC < 0 && cch < cchMB)
{
*pstrMB = 0;
cch++;
}
}
return cch;
}
/*
* CharSetIndexFromChar (ch)
*
* @mfunc
* returns index into CharSet/CodePage table rgCpgChar corresponding
* to the Unicode character ch provided such an assignment is
* reasonably unambiguous, that is, the currently assigned Unicode
* characters in various ranges have Windows code-page equivalents.
* Ambiguous or impossible assignments return UNKNOWN_INDEX, which
* means that the character can only be represented by Unicode in this
* simple model. Note that both UNKNOWN_INDEX and HAN_INDEX are negative
* values, i.e., they imply further processing to figure out what (if
* any) charset index to use. Other indices may also require run
* processing, such as the blank in BiDi text. We need to mark our
* right-to-left runs with an Arabic or Hebrew char set, while we mark
* left-to-right runs with a left-to-right char set.
*
* @rdesc
* returns CharSet index
*/
LONG CW32System::CharSetIndexFromChar(
TCHAR ch) // Unicode character to examine
{
if(ch < 256)
return ANSI_INDEX;
if(ch < 0x700)
{
if(ch >= 0x600)
return ARABIC_INDEX;
if(ch > 0x590)
return HEBREW_INDEX;
if(ch < 0x500)
{
if(ch >= 0x400)
return RUSSIAN_INDEX;
if(ch >= 0x370)
return GREEK_INDEX;
}
}
else if (ch < 0xAC00)
{
if(ch >= 0x3400) // CJK Ideographs
return HAN_INDEX;
if(ch > 0x3040 && ch < 0x3100) // Katakana and Hiragana
return SHIFTJIS_INDEX;
if(ch < 0xe80 && ch >= 0xe00) // Thai
return THAI_INDEX;
}
else if (ch < 0xD800)
return HANGUL_INDEX;
else if (ch > 0xff00)
{
if(ch < 0xff65) // Fullwidth ASCII and halfwidth
return HAN_INDEX; // CJK punctuation
if(ch < 0xffA0) // Halfwidth Katakana
return SHIFTJIS_INDEX;
if(ch < 0xffe0) // Halfwidth Jamo
return HANGUL_INDEX;
if(ch < 0xffef) // Fullwidth punctuation and currency
return HAN_INDEX; // signs; halfwidth forms, arrows
} // and shapes
return UNKNOWN_INDEX;
}
#ifdef DEBUG
void CW32System::AssertFn(BOOL f, LPSTR, LPSTR, int)
{
static BOOL fDoit = TRUE;
if (!f && fDoit)
{
int res = MessageBox(NULL, TEXT("Abort, Ignore, Ignore all"), TEXT("Assert"), MB_YESNOCANCEL);
switch (res)
{
case IDYES :
DebugBreak();
break;
case IDNO :
return;
case IDCANCEL :
fDoit = FALSE;
break;
}
}
}
void CW32System::sprintf (CHAR * buff, char *fmt, ...)
{
#pragma message("Review JMO : Finish this")
}
static BOOL fTracing = FALSE; //FALSE;
void CW32System::TraceOn( void )
{
fTracing = TRUE;
}
void CW32System::TraceOff( void )
{
fTracing = FALSE;
}
void CW32System::TraceMsg(WCHAR *ptext)
{
static int * mark = NULL;
int local;
if (mark == NULL)
{
mark = &local;
}
if (mark - &local > 10000)
{
OutputDebugString(TEXT("Stack usage too large"));
}
if (fTracing)
{
OutputDebugString(ptext);
OutputDebugString(TEXT("\n"));
}
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -