📄 document.cxx.svn-base

📁 Notepad++ is a generic source code editor (it tries to be anyway) and Notepad replacement written in
💻 SVN-BASE
📖 第 1 页 / 共 4 页
字号:
		return CharClassify::ccWord;
	return charClass.GetClass(ch);
}

/**
 * Used by commmands that want to select whole words.
 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
 */
int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
	CharClassify::cc ccStart = CharClassify::ccWord;
	if (delta < 0) {
		if (!onlyWordCharacters)
			ccStart = WordCharClass(cb.CharAt(pos-1));
		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
			pos--;
	} else {
		if (!onlyWordCharacters && pos < Length())
			ccStart = WordCharClass(cb.CharAt(pos));
		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
			pos++;
	}
	return MovePositionOutsideChar(pos, delta);
}

/**
 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
 * (delta < 0).
 * This is looking for a transition between character classes although there is also some
 * additional movement to transit white space.
 * Used by cursor movement by word commands.
 */
int Document::NextWordStart(int pos, int delta) {
	if (delta < 0) {
		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
			pos--;
		if (pos > 0) {
			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
			while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
				pos--;
			}
		}
	} else {
		CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
			pos++;
		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
			pos++;
	}
	return pos;
}

/**
 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
 * (delta < 0).
 * This is looking for a transition between character classes although there is also some
 * additional movement to transit white space.
 * Used by cursor movement by word commands.
 */
int Document::NextWordEnd(int pos, int delta) {
	if (delta < 0) {
		if (pos > 0) {
			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
			if (ccStart != CharClassify::ccSpace) {
				while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
					pos--;
				}
			}
			while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
				pos--;
			}
		}
	} else {
		while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
			pos++;
		}
		if (pos < Length()) {
			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
			while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
				pos++;
			}
		}
	}
	return pos;
}

/**
 * Check that the character at the given position is a word or punctuation character and that
 * the previous character is of a different character class.
 */
bool Document::IsWordStartAt(int pos) {
	if (pos > 0) {
		CharClassify::cc ccPos = WordCharClass(CharAt(pos));
		return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
			(ccPos != WordCharClass(CharAt(pos - 1)));
	}
	return true;
}

/**
 * Check that the character at the given position is a word or punctuation character and that
 * the next character is of a different character class.
 */
bool Document::IsWordEndAt(int pos) {
	if (pos < Length()) {
		CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
		return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
			(ccPrev != WordCharClass(CharAt(pos)));
	}
	return true;
}

/**
 * Check that the given range is has transitions between character classes at both
 * ends and where the characters on the inside are word or punctuation characters.
 */
bool Document::IsWordAt(int start, int end) {
	return IsWordStartAt(start) && IsWordEndAt(end);
}

// The comparison and case changing functions here assume ASCII
// or extended ASCII such as the normal Windows code page.

//Vitaliy
// NOTE: this function is called for non-Unicode characters only!
//       ( i.e. when (!dbcsCodePage || isascii(ch)) )
static inline char MakeUpperCase(char ch) {
    if (ch >= 'A' && ch <= 'Z')
        return ch;
    else if (ch >= 'a' && ch <= 'z')
        return static_cast<char>(ch - 'a' + 'A');
    else
        return Platform_MakeUpperChar(ch);
}



// NOTE: this function is called for non-Unicode characters only!
//       ( i.e. when (!dbcsCodePage || isascii(ch)) )
static inline char MakeLowerCase(char ch) {
    if (ch >= 'a' && ch <= 'z')
        return ch;
    else if (ch >= 'A' && ch <= 'Z')
        return static_cast<char>(ch - 'A' + 'a');
    else
        return Platform_MakeLowerChar(ch);
}
//yilatiV

// Define a way for the Regular Expression code to access the document
class DocumentIndexer : public CharacterIndexer {
	Document *pdoc;
	int end;
public:
	DocumentIndexer(Document *pdoc_, int end_) :
		pdoc(pdoc_), end(end_) {
	}

	virtual ~DocumentIndexer() {
	}

	virtual char CharAt(int index) {
		if (index < 0 || index >= end)
			return 0;
		else
			return pdoc->CharAt(index);
	}
};

/**
 * Find text in document, supporting both forward and backward
 * searches (just pass minPos > maxPos to do a backward search)
 * Has not been tested with backwards DBCS searches yet.
 */
long Document::FindText(int minPos, int maxPos, const char *s,
                        bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix,
                        int *length) {
	if (regExp) {
		if (!pre)
			pre = new RESearch(&charClass);
		if (!pre)
			return -1;

		int increment = (minPos <= maxPos) ? 1 : -1;

		int startPos = minPos;
		int endPos = maxPos;

		// Range endpoints should not be inside DBCS characters, but just in case, move them.
		startPos = MovePositionOutsideChar(startPos, 1, false);
		endPos = MovePositionOutsideChar(endPos, 1, false);

		const char *errmsg = pre->Compile(s, *length, caseSensitive, posix);
		if (errmsg) {
			return -1;
		}
		// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
		// Replace first '.' with '-' in each property file variable reference:
		//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
		//     Replace: $(\1-\2)
		int lineRangeStart = LineFromPosition(startPos);
		int lineRangeEnd = LineFromPosition(endPos);
		if ((increment == 1) &&
			(startPos >= LineEnd(lineRangeStart)) &&
			(lineRangeStart < lineRangeEnd)) {
			// the start position is at end of line or between line end characters.
			lineRangeStart++;
			startPos = LineStart(lineRangeStart);
		}
		int pos = -1;
		int lenRet = 0;
		char searchEnd = s[*length - 1];
		int lineRangeBreak = lineRangeEnd + increment;
		for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
			int startOfLine = LineStart(line);
			int endOfLine = LineEnd(line);
			if (increment == 1) {
				if (line == lineRangeStart) {
					if ((startPos != startOfLine) && (s[0] == '^'))
						continue;	// Can't match start of line if start position after start of line
					startOfLine = startPos;
				}
				if (line == lineRangeEnd) {
					if ((endPos != endOfLine) && (searchEnd == '$'))
						continue;	// Can't match end of line if end position before end of line
					endOfLine = endPos;
				}
			} else {
				if (line == lineRangeEnd) {
					if ((endPos != startOfLine) && (s[0] == '^'))
						continue;	// Can't match start of line if end position after start of line
					startOfLine = endPos;
				}
				if (line == lineRangeStart) {
					if ((startPos != endOfLine) && (searchEnd == '$'))
						continue;	// Can't match end of line if start position before end of line
					endOfLine = startPos;
				}
			}

			DocumentIndexer di(this, endOfLine);
			int success = pre->Execute(di, startOfLine, endOfLine);
			if (success) {
				pos = pre->bopat[0];
				lenRet = pre->eopat[0] - pre->bopat[0];
				if (increment == -1) {
					// Check for the last match on this line.
					int repetitions = 1000;	// Break out of infinite loop
					while (success && (pre->eopat[0] <= endOfLine) && (repetitions--)) {
						success = pre->Execute(di, pos+1, endOfLine);
						if (success) {
							if (pre->eopat[0] <= minPos) {
								pos = pre->bopat[0];
								lenRet = pre->eopat[0] - pre->bopat[0];
							} else {
								success = 0;
							}
						}
					}
				}
				break;
			}
		}
		*length = lenRet;
		return pos;

	} else {

		bool forward = minPos <= maxPos;
		int increment = forward ? 1 : -1;

		// Range endpoints should not be inside DBCS characters, but just in case, move them.
		int startPos = MovePositionOutsideChar(minPos, increment, false);
		int endPos = MovePositionOutsideChar(maxPos, increment, false);

		// Compute actual search ranges needed
		int lengthFind = *length;
		if (lengthFind == -1)
			lengthFind = static_cast<int>(strlen(s));
		int endSearch = endPos;
		if (startPos <= endPos) {
			endSearch = endPos - lengthFind + 1;
		}
		//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
		char firstChar = s[0];
		wchar_t* ws_upr = NULL;
		int ws_len = 0;
		char str[8];
		wchar_t wstr[4];
		if (!caseSensitive && !dbcsCodePage)
			firstChar = static_cast<char>(MakeUpperCase(firstChar));
		int pos = forward ? startPos : (startPos - 1);
		if (dbcsCodePage) {
			if (!caseSensitive && dbcsCodePage == SC_CP_UTF8) {
				ws_len = (int) UCS2Length(s, lengthFind);
				if (ws_len != lengthFind) {
					int ws_size = (((ws_len + 1) >> 4) + 1) << 4; // 16-chars alignment
					ws_upr = new wchar_t[ws_size]; 
					if (ws_upr != NULL) {
						UCS2FromUTF8(s, lengthFind, ws_upr, ws_size);
						ws_upr[ws_len] = 0;
						Platform_MakeUpperW(ws_upr, ws_len);
						// now ws_upr is UCS2 s in upper-case
					}
				} 
			}
			if (!caseSensitive && ws_upr == NULL) {
				// the text is Latin i.e. one character is one byte
				// ws_upr is NULL
				// BUT !!! ws_upr can be NULL if dbcsCodePage != SC_CP_UTF8
				// (also ws_upr = new wchar_t[ws_size] can be NULL)
				
				// for latin characters in non-UTF8 Unicode text
				// (thanks to Airix Z)
				if (isascii(firstChar))
					firstChar = static_cast<char>(MakeUpperCase(firstChar));
			}
			if (pos >= 0)
				pos = MovePositionOutsideChar(pos, increment, false);
		}
		while (forward ? (pos < endSearch) : (pos >= endSearch)) {
			char ch = CharAt(pos);
			if (caseSensitive) {
				if (ch == firstChar) {
					bool found = true;
					if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false;
					for (int posMatch = 1; posMatch < lengthFind && found; posMatch++) {
						ch = CharAt(pos + posMatch);
						if (ch != s[posMatch])
							found = false;
					}
					if (found) {
						if ((!word && !wordStart) ||
						        word && IsWordAt(pos, pos + lengthFind) ||
						        wordStart && IsWordStartAt(pos))
							return pos;
					}
				}
			} else {
				bool bMatch = false;
				int  charLen = 0;

				if (!dbcsCodePage) {
					bMatch = (MakeUpperCase(ch) == firstChar);
				}
				else if (ws_upr == NULL) {
					// for latin characters in non-UTF8 Unicode text
					// (thanks to Airix Z)

					if (isascii(ch))
						bMatch = (MakeUpperCase(ch) == firstChar);
					else
						bMatch = (ch == firstChar);
				}
				else {
					// LenChar returns 2 for "\r\n"
					// this is wrong for UTF8 because "\r\n" 
					// is not one character with length=2
					charLen = IsCrLf(pos) ? 1 : LenChar(pos);
					for (int i = 0; i < charLen; i++) {
						str[i] = CharAt(pos+i);
					}
					str[charLen] = 0;
					UCS2FromUTF8(str, charLen, wstr, 2);
					wstr[1] = 0;
					Platform_MakeUpperW(wstr, 1);
					bMatch = (ws_upr[0] == wstr[0]);

					/*
					if (bMatch)
						MessageBoxA(NULL, "MatchCaseInsensitive is true!!!", "", 0);
					// OK
                        	        */
				}
				if (bMatch) {
					bool found = true;
					if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false;
					if (!dbcsCodePage || ws_upr == NULL) {
						/*
						MessageBoxA(NULL, "Text is Latin (ws_upr == NULL)", "First character matched", 0);
                                                */
						for (int posMatch = 1; posMatch < lengthFind && found; posMatch++) {
							ch = CharAt(pos + posMatch);
							char ch2 = s[posMatch];
							// for latin characters in non-UTF8 Unicode text
							// (thanks to Airix Z)
                if (!dbcsCodePage || (isascii(ch) && isascii(ch2))) {
								if (MakeUpperCase(ch) != MakeUpperCase(ch2))
									found = false;
							} else {
								if (ch != ch2)
									found = false;
							}
						}
					} 
					else {
						int i1, i2;

						/*
						MessageBoxA(NULL, "first matched!!!", "", 0);
						// OK
                                                */
						i1 = 1;
						i2 = pos + charLen;
						while (found && i1 < ws_len) {
							// LenChar returns 2 for "\r\n"
							// this is wrong for UTF8 because "\r\n" 
							// is not one character with length=2
							charLen = IsCrLf(i2) ? 1 : LenChar(i2);
							for (int i = 0; i < charLen; i++) {
								str[i] = CharAt(i2+i);
							}
							str[charLen] = 0;
							UCS2FromUTF8(str, charLen, wstr, 2);
							wstr[1] = 0;
							Platform_MakeUpperW(wstr, 1);
							found = (ws_upr[i1] == wstr[0]);
							i1++;
							i2 += charLen;
						}
					}
					if (found) {
						if ((!word && !wordStart) ||
						    word && IsWordAt(pos, pos + lengthFind) ||
						    wordStart && IsWordStartAt(pos)) {
						        if (ws_upr != NULL) {
								delete [] ws_upr;
								ws_upr = NULL;
							}
							return pos;
						}
					}
				}
💿 文件大小 3775 K
👤 上传用户 k_oyy
📂 所属分类多国语言处理
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -