📄 bidiorder.java
字号:
++limit; } byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel; byte succType = typeForLevel(Math.max(succLevel, level)); // 3) resolving weak types // Rules W1-W7. resolveWeakTypes(start, limit, level, prevType, succType); // 4) resolving neutral types // Rules N1-N3. resolveNeutralTypes(start, limit, level, prevType, succType); // 5) resolving implicit embedding levels // Rules I1, I2. resolveImplicitLevels(start, limit, level, prevType, succType); prevLevel = level; start = limit; } // Reinsert explicit codes and assign appropriate levels to 'hide' them. // This is for convenience, so the resulting level array maps 1-1 // with the initial array. // See the implementation suggestions section of TR#9 for guidelines on // how to implement the algorithm without removing and reinserting the codes. textLength = reinsertExplicitCodes(textLength); } /** * 1) determining the paragraph level. * <p> * Rules P2, P3. * <p> * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1. */ private void determineParagraphEmbeddingLevel() { byte strongType = -1; // unknown // Rule P2. for (int i = 0; i < textLength; ++i) { byte t = resultTypes[i]; if (t == L || t == AL || t == R) { strongType = t; break; } } // Rule P3. if (strongType == -1) { // none found // default embedding level when no strong types found is 0. paragraphEmbeddingLevel = 0; } else if (strongType == L) { paragraphEmbeddingLevel = 0; } else { // AL, R paragraphEmbeddingLevel = 1; } } /** * Process embedding format codes. * <p> * Calls processEmbeddings to generate an embedding array from the explicit format codes. The * embedding overrides in the array are then applied to the result types, and the result levels are * initialized. * @see #processEmbeddings */ private void determineExplicitEmbeddingLevels() { embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel); for (int i = 0; i < textLength; ++i) { byte level = embeddings[i]; if ((level & 0x80) != 0) { level &= 0x7f; resultTypes[i] = typeForLevel(level); } resultLevels[i] = level; } } /** * Rules X9. * Remove explicit codes so that they may be ignored during the remainder * of the main portion of the algorithm. The length of the resulting text * is returned. * @return the length of the data excluding explicit codes and BN. */ private int removeExplicitCodes() { int w = 0; for (int i = 0; i < textLength; ++i) { byte t = initialTypes[i]; if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) { embeddings[w] = embeddings[i]; resultTypes[w] = resultTypes[i]; resultLevels[w] = resultLevels[i]; w++; } } return w; // new textLength while explicit levels are removed } /** * Reinsert levels information for explicit codes. * This is for ease of relating the level information * to the original input data. Note that the levels * assigned to these codes are arbitrary, they're * chosen so as to avoid breaking level runs. * @param textLength the length of the data after compression * @return the length of the data (original length of * types array supplied to constructor) */ private int reinsertExplicitCodes(int textLength) { for (int i = initialTypes.length; --i >= 0;) { byte t = initialTypes[i]; if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) { embeddings[i] = 0; resultTypes[i] = t; resultLevels[i] = -1; } else { --textLength; embeddings[i] = embeddings[textLength]; resultTypes[i] = resultTypes[textLength]; resultLevels[i] = resultLevels[textLength]; } } // now propagate forward the levels information (could have // propagated backward, the main thing is not to introduce a level // break where one doesn't already exist). if (resultLevels[0] == -1) { resultLevels[0] = paragraphEmbeddingLevel; } for (int i = 1; i < initialTypes.length; ++i) { if (resultLevels[i] == -1) { resultLevels[i] = resultLevels[i-1]; } } // Embedding information is for informational purposes only // so need not be adjusted. return initialTypes.length; } /** * 2) determining explicit levels * Rules X1 - X8 * * The interaction of these rules makes handling them a bit complex. * This examines resultTypes but does not modify it. It returns embedding and * override information in the result array. The low 7 bits are the level, the high * bit is set if the level is an override, and clear if it is an embedding. */ private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) { final int EXPLICIT_LEVEL_LIMIT = 62; int textLength = resultTypes.length; byte[] embeddings = new byte[textLength]; // This stack will store the embedding levels and override status in a single byte // as described above. byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT]; int stackCounter = 0; // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid. But // an RLE at level 60 is valid, since the new level 61 is valid. The current wording // of the rules requires that the RLE remain valid even if a previous LRE is invalid. // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs // will not try to pop the stack. int overflowAlmostCounter = 0; // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will // not try to pop the stack. int overflowCounter = 0; // Rule X1. // Keep the level separate from the value (level | override status flag) for ease of access. byte currentEmbeddingLevel = paragraphEmbeddingLevel; byte currentEmbeddingValue = paragraphEmbeddingLevel; // Loop through types, handling all remaining rules for (int i = 0; i < textLength; ++i) { embeddings[i] = currentEmbeddingValue; byte t = resultTypes[i]; // Rules X2, X3, X4, X5 switch (t) { case RLE: case LRE: case RLO: case LRO: // Only need to compute new level if current level is valid if (overflowCounter == 0) { byte newLevel; if (t == RLE || t == RLO) { newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd } else { // t == LRE || t == LRO newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even } // If the new level is valid, push old embedding level and override status // No check for valid stack counter, since the level check suffices. if (newLevel < EXPLICIT_LEVEL_LIMIT) { embeddingValueStack[stackCounter] = currentEmbeddingValue; stackCounter++; currentEmbeddingLevel = newLevel; if (t == LRO || t == RLO) { // override currentEmbeddingValue = (byte)(newLevel | 0x80); } else { currentEmbeddingValue = newLevel; } // Adjust level of format mark (for expositional purposes only, this gets // removed later). embeddings[i] = currentEmbeddingValue; break; } // Otherwise new level is invalid, but a valid level can still be achieved if this // level is 60 and we encounter an RLE or RLO further on. So record that we // 'almost' overflowed. if (currentEmbeddingLevel == 60) { overflowAlmostCounter++; break; } } // Otherwise old or new level is invalid. overflowCounter++; break; case PDF: // The only case where this did not actually overflow but may have almost overflowed // is when there was an RLE or RLO on level 60, which would result in level 61. So we // only test the almost overflow condition in that case. // // Also note that there may be a PDF without any pushes at all. if (overflowCounter > 0) { --overflowCounter; } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) { --overflowAlmostCounter; } else if (stackCounter > 0) { --stackCounter; currentEmbeddingValue = embeddingValueStack[stackCounter]; currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f); } break; case B: // Rule X8. // These values are reset for clarity, in this implementation B can only // occur as the last code in the array. stackCounter = 0; overflowCounter = 0; overflowAlmostCounter = 0; currentEmbeddingLevel = paragraphEmbeddingLevel; currentEmbeddingValue = paragraphEmbeddingLevel; embeddings[i] = paragraphEmbeddingLevel; break; default: break; } } return embeddings; } /** * 3) resolving weak types * Rules W1-W7. * * Note that some weak types (EN, AN) remain after this processing is complete. */ private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) { // Rule W1. // Changes all NSMs. byte preceedingCharacterType = sor; for (int i = start; i < limit; ++i) { byte t = resultTypes[i]; if (t == NSM) { resultTypes[i] = preceedingCharacterType; } else { preceedingCharacterType = t; } } // Rule W2. // EN does not change at the start of the run, because sor != AL. for (int i = start; i < limit; ++i) { if (resultTypes[i] == EN) { for (int j = i - 1; j >= start; --j) { byte t = resultTypes[j]; if (t == L || t == R || t == AL) { if (t == AL) { resultTypes[i] = AN; } break; } } } } // Rule W3. for (int i = start; i < limit; ++i) { if (resultTypes[i] == AL) { resultTypes[i] = R; } } // Rule W4. // Since there must be values on both sides for this rule to have an // effect, the scan skips the first and last value.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -