📄 bidi.java
字号:
continue; } // No explicit embedding. boolean isLtoR = false; boolean isSpecial = true; switch (types[i]) { case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING: case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE: isLtoR = true; // Fall through. case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING: case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE: { byte newEmbedding; if (isLtoR) { // Least greater even. newEmbedding = (byte) ((currentEmbedding & ~1) + 2); } else { // Least greater odd. newEmbedding = (byte) ((currentEmbedding + 1) | 1); } // FIXME: we don't properly handle invalid pushes. if (newEmbedding < MAX_DEPTH) { // The new level is valid. Push the old value. // See above for a comment on the encoding here. if (directionalOverride != -1) currentEmbedding |= Byte.MIN_VALUE; embeddingStack[sp++] = currentEmbedding; currentEmbedding = newEmbedding; if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE) directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT; else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE) directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT; else directionalOverride = -1; } } break; case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT: { // FIXME: we don't properly handle a pop with a corresponding // invalid push. if (sp == 0) { // We saw a pop without a push. Just ignore it. break; } byte newEmbedding = embeddingStack[--sp]; currentEmbedding = (byte) (newEmbedding & 0x7f); if (newEmbedding < 0) directionalOverride = (((newEmbedding & 1) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); else directionalOverride = -1; } break; default: isSpecial = false; break; } levels[i] = currentEmbedding; if (isSpecial) { // Mark this character for removal. if (formatterIndices == null) formatterIndices = new ArrayList(); formatterIndices.add(Integer.valueOf(i)); } else if (directionalOverride != -1) types[i] = directionalOverride; } // Remove the formatting codes and update both the arrays // and 'length'. It would be more efficient not to remove // these codes, but it is also more complicated. Also, the // Unicode algorithm reference does not properly describe // how this is to be done -- from what I can tell, their suggestions // in this area will not yield the correct results. if (formatterIndices == null) return; int output = 0, input = 0; final int size = formatterIndices.size(); for (int i = 0; i <= size; ++i) { int nextFmt; if (i == size) nextFmt = length; else nextFmt = ((Integer) formatterIndices.get(i)).intValue(); // Non-formatter codes are from 'input' to 'nextFmt'. int len = nextFmt - input; System.arraycopy(levels, input, levels, output, len); System.arraycopy(types, input, types, output, len); output += len; input = nextFmt + 1; } length -= formatterIndices.size(); } /** * An internal function to compute the boundaries of runs * in the text. It isn't strictly necessary to do this, but * it lets us write some following passes in a less complicated * way. Also it lets us efficiently implement some of the public * methods. A run is simply a sequence of characters at the * same level. */ private void computeRuns() { int runCount = 0; int currentEmbedding = baseEmbedding; for (int i = 0; i < length; ++i) { if (levels[i] != currentEmbedding) { currentEmbedding = levels[i]; ++runCount; } } // This may be called multiple times. If so, and if // the number of runs has not changed, then don't bother // allocating a new array. if (runs == null || runs.length != runCount + 1) runs = new int[runCount + 1]; int where = 0; int lastRunStart = 0; currentEmbedding = baseEmbedding; for (int i = 0; i < length; ++i) { if (levels[i] != currentEmbedding) { runs[where++] = lastRunStart; lastRunStart = i; currentEmbedding = levels[i]; } } runs[where++] = lastRunStart; } /** * An internal method to resolve weak types. This implements * rules W1 through W7. */ private void resolveWeakTypes() { final int runCount = getRunCount(); int previousLevel = baseEmbedding; for (int run = 0; run < runCount; ++run) { int start = getRunStart(run); int end = getRunLimit(run); int level = getRunLevel(run); // These are the names used in the Bidi algorithm. byte sor = (((Math.max(previousLevel, level) % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); int nextLevel; if (run == runCount - 1) nextLevel = baseEmbedding; else nextLevel = getRunLevel(run + 1); byte eor = (((Math.max(level, nextLevel) % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); byte prevType = sor; byte prevStrongType = sor; for (int i = start; i < end; ++i) { final byte nextType = (i == end - 1) ? eor : types[i + 1]; // Rule W1: change NSM to the prevailing direction. if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK) types[i] = prevType; else prevType = types[i]; // Rule W2: change EN to AN in some cases. if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) { if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER; } else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) prevStrongType = types[i]; // Rule W3: change AL to R. if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT; // Rule W4: handle separators between two numbers. if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER) { if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) types[i] = nextType; } else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) types[i] = nextType; // Rule W5: change a sequence of european terminators to // european numbers, if they are adjacent to european numbers. // We also include BN characters in this. if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL) { if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER) types[i] = prevType; else { // Look ahead to see if there is an EN terminating this // sequence of ETs. int j = i + 1; while (j < end && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)) ++j; if (j < end && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) { // Change them all to EN now. for (int k = i; k < j; ++k) types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER; } } } // Rule W6: separators and terminators change to ON. // Again we include BN. if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL) types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS; // Rule W7: change european number types. if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) types[i] = prevStrongType; } previousLevel = level; } } /** * An internal method to resolve neutral types. This implements * rules N1 and N2. */ private void resolveNeutralTypes() { // This implements rules N1 and N2. final int runCount = getRunCount(); int previousLevel = baseEmbedding; for (int run = 0; run < runCount; ++run) { int start = getRunStart(run); int end = getRunLimit(run); int level = getRunLevel(run); byte embeddingDirection = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); // These are the names used in the Bidi algorithm. byte sor = (((Math.max(previousLevel, level) % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); int nextLevel; if (run == runCount - 1) nextLevel = baseEmbedding; else nextLevel = getRunLevel(run + 1); byte eor = (((Math.max(level, nextLevel) % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT : Character.DIRECTIONALITY_RIGHT_TO_LEFT); byte prevStrong = sor; int neutralStart = -1; for (int i = start; i <= end; ++i) { byte newStrong = -1; byte thisType = i == end ? eor : types[i]; switch (thisType) { case Character.DIRECTIONALITY_LEFT_TO_RIGHT: newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT; break; case Character.DIRECTIONALITY_RIGHT_TO_LEFT: case Character.DIRECTIONALITY_ARABIC_NUMBER: case Character.DIRECTIONALITY_EUROPEAN_NUMBER: newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT; break; case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL: case Character.DIRECTIONALITY_OTHER_NEUTRALS: case Character.DIRECTIONALITY_SEGMENT_SEPARATOR: case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR: if (neutralStart == -1) neutralStart = i; break; } // If we see a strong character, update all the neutrals. if (newStrong != -1) { if (neutralStart != -1) { byte override = (prevStrong == newStrong ? prevStrong : embeddingDirection); for (int j = neutralStart; j < i; ++j) types[i] = override; } prevStrong = newStrong; neutralStart = -1; } } previousLevel = level; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -