📄 edit.c
字号:
MessageBox(hwnd,szBuf,"Unicode file guessing",0);
}*/
/*
Andere Variante: IsTextUnicode() einmal auf normal, einmal auf _swab(),
wenn vorhanden BOM und RBOM selber entfernen
Auch noch zu Probieren: Einfach konvertieren, und mit lpUsedDefaultChar
bei WideCharToMultiByte() checken, ob Zeichen nicht interpretiert werden
konnten -> Hinweis auf non-Unicode-File.
*/
if (i == 0xFFFF) // i doesn't seem to have been modified ...
i = 0;
if (bIsTextUnicode || bHasBOM || bHasRBOM ||
((i & (IS_TEXT_UNICODE_UNICODE_MASK | IS_TEXT_UNICODE_REVERSE_MASK)) &&
!((i & IS_TEXT_UNICODE_UNICODE_MASK) && (i & IS_TEXT_UNICODE_REVERSE_MASK)) &&
!(i & IS_TEXT_UNICODE_ODD_LENGTH) &&
!(i & IS_TEXT_UNICODE_ILLEGAL_CHARS && !(i & IS_TEXT_UNICODE_REVERSE_SIGNATURE)))) {
if (lpbBOM)
*lpbBOM = (bHasBOM || bHasRBOM ||
(i & (IS_TEXT_UNICODE_SIGNATURE | IS_TEXT_UNICODE_REVERSE_SIGNATURE)))
? TRUE : FALSE;
if (lpbReverse)
*lpbReverse = (bHasRBOM || (i & IS_TEXT_UNICODE_REVERSE_MASK)) ? TRUE : FALSE;
return TRUE;
}
else
return FALSE;
}
BOOL IsUTF8(const char* pTest,int nLength)
{
static int byte_class_table[256] = {
/* 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F */
/* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 90 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* A0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* B0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* C0 */ 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
/* D0 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
/* E0 */ 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7,
/* F0 */ 9,10,10,10,11, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
/* 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F */ };
/* state table */
typedef enum {
kSTART = 0,kA,kB,kC,kD,kE,kF,kG,kERROR,kNumOfStates } utf8_state;
static utf8_state state_table[] = {
/* kSTART, kA, kB, kC, kD, kE, kF, kG, kERROR */
/* 0x00-0x7F: 0 */ kSTART, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0x80-0x8F: 1 */ kERROR, kSTART, kA, kERROR, kA, kB, kERROR, kB, kERROR,
/* 0x90-0x9f: 2 */ kERROR, kSTART, kA, kERROR, kA, kB, kB, kERROR, kERROR,
/* 0xa0-0xbf: 3 */ kERROR, kSTART, kA, kA, kERROR, kB, kB, kERROR, kERROR,
/* 0xc0-0xc1, 0xf5-0xff: 4 */ kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xc2-0xdf: 5 */ kA, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xe0: 6 */ kC, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xe1-0xec, 0xee-0xef: 7 */ kB, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xed: 8 */ kD, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xf0: 9 */ kF, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xf1-0xf3: 10 */ kE, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR,
/* 0xf4: 11 */ kG, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR, kERROR };
#define BYTE_CLASS(b) (byte_class_table[(unsigned char)b])
#define NEXT_STATE(b,cur) (state_table[(BYTE_CLASS(b) * kNumOfStates) + (cur)])
utf8_state current = kSTART;
int i;
const char* pt = pTest;
int len = nLength;
for(i = 0; i < len ; i++, pt++) {
current = NEXT_STATE(*pt,current);
if (kERROR == current)
break;
}
return (current == kSTART) ? TRUE : FALSE;
}
BOOL IsUTF7(const char* pTest,int nLength)
{
int i;
const char *pt = pTest;
for (i = 0; i < nLength; i++) {
if (*pt & 0x80 || !*pt)
return FALSE;
pt++;
}
return TRUE;
}
#define IsUTF8Signature(p) \
((*(p+0) == '\xEF' && *(p+1) == '\xBB' && *(p+2) == '\xBF'))
#define UTF8StringStart(p) \
(IsUTF8Signature(p)) ? (p+3) : (p)
/* byte length of UTF-8 sequence based on value of first byte.
for UTF-16 (21-bit space), max. code length is 4, so we only need to look
at 4 upper bits.
*/
static const INT utf8_lengths[16]=
{
1,1,1,1,1,1,1,1, /* 0000 to 0111 : 1 byte (plain ASCII) */
0,0,0,0, /* 1000 to 1011 : not valid */
2,2, /* 1100, 1101 : 2 bytes */
3, /* 1110 : 3 bytes */
4 /* 1111 :4 bytes */
};
/*++
Function :
UTF8_mbslen_bytes [INTERNAL]
Calculates the byte size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
Return value :
size (in bytes) of a NULL-terminated UTF-8 string.
-1 if invalid NULL-terminated UTF-8 string
--*/
static INT UTF8_mbslen_bytes(LPCSTR utf8_string)
{
INT length=0;
INT code_size;
BYTE byte;
while(*utf8_string)
{
byte=(BYTE)*utf8_string;
if( (byte <= 0xF7) && (0 != (code_size = utf8_lengths[ byte >> 4 ])))
{
length+=code_size;
utf8_string+=code_size;
}
else
{
/* we got an invalid byte value but need to count it,
it will be later ignored during the string conversion */
//WARN("invalid first byte value 0x%02X in UTF-8 sequence!\n",byte);
length++;
utf8_string++;
}
}
length++; /* include NULL terminator */
return length;
}
/*++
Function :
UTF8_mbslen [INTERNAL]
Calculates the character size of a NULL-terminated UTF-8 string.
Parameters :
char *utf8_string : string to examine
int byte_length : byte size of string
Return value :
size (in characters) of a UTF-8 string.
-1 if invalid UTF-8 string
--*/
static INT UTF8_mbslen(LPCSTR source, INT byte_length)
{
INT wchar_length=0;
INT code_size;
BYTE byte;
while(byte_length > 0)
{
byte=(BYTE)*source;
/* UTF-16 can't encode 5-byte and 6-byte sequences, so maximum value
for first byte is 11110111. Use lookup table to determine sequence
length based on upper 4 bits of first byte */
if ((byte <= 0xF7) && (0 != (code_size=utf8_lengths[ byte >> 4])))
{
/* 1 sequence == 1 character */
wchar_length++;
if(code_size==4)
wchar_length++;
source+=code_size; /* increment pointer */
byte_length-=code_size; /* decrement counter*/
}
else
{
/*
unlike UTF8_mbslen_bytes, we ignore the invalid characters.
we only report the number of valid characters we have encountered
to match the Windows behavior.
*/
//WARN("invalid byte 0x%02X in UTF-8 sequence, skipping it!\n",
// byte);
source++;
byte_length--;
}
}
return wchar_length;
}
//=============================================================================
//
// EditLoadFile()
//
BOOL EditLoadFile(HWND hwnd,LPCSTR pszFile,BOOL bSkipEncodingDetection,
int* iCodePage,int* iEOLMode,BOOL *pbUnicodeErr,BOOL *pbFileTooBig)
{
HANDLE hFile;
DWORD dwFileSize;
DWORD dwFileSizeLimit;
DWORD dwBufSize;
BOOL bReadSuccess;
LPSTR lpData;
DWORD cbData;
//char *cp;
BOOL bBOM;
BOOL bReverse;
*pbUnicodeErr = FALSE;
*pbFileTooBig = FALSE;
hFile = CreateFile(pszFile,
GENERIC_READ,
FILE_SHARE_READ|FILE_SHARE_WRITE,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hFile == INVALID_HANDLE_VALUE)
return FALSE;
// calculate buffer limit
dwFileSize = GetFileSize(hFile,NULL);
dwBufSize = dwFileSize + 10;
// Check if a warning message should be displayed for large files
dwFileSizeLimit = IniGetInt("Settings2","FileLoadWarningMB",1);
if (dwFileSizeLimit != 0 && dwFileSizeLimit * 1024 * 1024 < dwFileSize) {
if (InfoBox(MBYESNO,IDS_WARNLOADBIGFILE,"MsgFileSizeWarning") != IDYES) {
CloseHandle(hFile);
*pbFileTooBig = TRUE;
return FALSE;
}
}
lpData = GlobalAlloc(GPTR,dwBufSize);
bReadSuccess = ReadFile(hFile,lpData,GlobalSize(lpData)-2,&cbData,NULL);
CloseHandle(hFile);
if (!bReadSuccess)
{
GlobalFree(lpData);
return FALSE;
}
// default codepage
*iCodePage = NCP_DEFAULT;
if (cbData == 0) {
SendMessage(hwnd,SCI_SETCODEPAGE,(iDefaultEncoding == 0) ? iDefaultCodePage : SC_CP_UTF8,0);
EditSetNewText(hwnd,"",0);
SendMessage(hwnd,SCI_SETEOLMODE,iLineEndings[iDefaultEOLMode],0);
*iEOLMode = iLineEndings[iDefaultEOLMode];
*iCodePage = iEncodings[iDefaultEncoding];
GlobalFree(lpData);
}
else if (!bSkipEncodingDetection &&
IsUnicode(lpData,cbData,&bBOM,&bReverse) && !IsUTF8Signature(lpData)) // check for UTF-8 signature
{
LPSTR lpDataUTF8;
CPINFO cpi;
UINT uCP_UTF8;
*iCodePage = NCP_UNICODE;
if (bBOM)
*iCodePage |= NCP_UNICODE_BOM;
if (bReverse)
{
_swab(lpData,lpData,cbData);
*iCodePage |= NCP_UNICODE_REVERSE;
}
// Unicode text is converted to ANSI and not to UTF-8 on Windows 95
uCP_UTF8 = (GetCPInfo(CP_UTF8, &cpi) || IsValidCodePage(CP_UTF8)) ? CP_UTF8 : CP_ACP;
lpDataUTF8 = GlobalAlloc(GPTR,(cbData * 3) + 2);
cbData = WideCharToMultiByte(uCP_UTF8,0,(bBOM) ? (LPWSTR)lpData + 1 : (LPWSTR)lpData,
(-1),lpDataUTF8,GlobalSize(lpDataUTF8),NULL,NULL);
if (cbData == 0 && uCP_UTF8 == CP_UTF8)
{
cbData = WideCharToMultiByte(CP_ACP,0,(bBOM) ? (LPWSTR)lpData + 1 : (LPWSTR)lpData,
(-1),lpDataUTF8,GlobalSize(lpDataUTF8),NULL,NULL);
*pbUnicodeErr = TRUE;
}
GlobalFree(lpData);
SendMessage(hwnd,SCI_SETCODEPAGE,SC_CP_UTF8,0);
EditSetNewText(hwnd,"",0);
EditSetNewText(hwnd,lpDataUTF8,cbData-1);
*iEOLMode = EditDetectEOLMode(hwnd,lpDataUTF8,cbData-1);
GlobalFree(lpDataUTF8);
}
else if (!bSkipEncodingDetection &&
IsUTF8(lpData,cbData) &&
(((UTF8_mbslen_bytes(UTF8StringStart(lpData)) - 1 !=
UTF8_mbslen(UTF8StringStart(lpData),IsUTF8Signature(lpData) ? cbData-3 : cbData)) ||
iEncodings[iDefaultEncoding] == NCP_UTF8) ||
IsUTF8Signature(lpData)))
{
SendMessage(hwnd,SCI_SETCODEPAGE,SC_CP_UTF8,0);
EditSetNewText(hwnd,"",0);
if (IsUTF8Signature(lpData)) {
EditSetNewText(hwnd,UTF8StringStart(lpData),cbData-3);
*iEOLMode = EditDetectEOLMode(hwnd,UTF8StringStart(lpData),cbData-3);
*iCodePage = NCP_UTF8 | NCP_UTF8_SIGN;
}
else {
EditSetNewText(hwnd,lpData,cbData);
*iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData);
*iCodePage = NCP_UTF8;
}
GlobalFree(lpData);
}
else
{
SendMessage(hwnd,SCI_SETCODEPAGE,iDefaultCodePage,0);
EditSetNewText(hwnd,"",0);
EditSetNewText(hwnd,lpData,cbData);
*iEOLMode = EditDetectEOLMode(hwnd,lpData,cbData);
*iCodePage = NCP_DEFAULT;
GlobalFree(lpData);
}
return TRUE;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -