📄 bidiorder.java

📁 iText是一个能够快速产生PDF文件的java类库。iText的java类对于那些要产生包含文本
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
                ++limit;
            }
            
            byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel;
            byte succType = typeForLevel(Math.max(succLevel, level));
            
            // 3) resolving weak types
            // Rules W1-W7.
            resolveWeakTypes(start, limit, level, prevType, succType);
            
            // 4) resolving neutral types
            // Rules N1-N3.
            resolveNeutralTypes(start, limit, level, prevType, succType);
            
            // 5) resolving implicit embedding levels
            // Rules I1, I2.
            resolveImplicitLevels(start, limit, level, prevType, succType);
            
            prevLevel = level;
            start = limit;
        }
        
        // Reinsert explicit codes and assign appropriate levels to 'hide' them.
        // This is for convenience, so the resulting level array maps 1-1
        // with the initial array.
        // See the implementation suggestions section of TR#9 for guidelines on
        // how to implement the algorithm without removing and reinserting the codes.
        textLength = reinsertExplicitCodes(textLength);
    }
    
    /**
     * 1) determining the paragraph level.
     * <p>
     * Rules P2, P3.
     * <p>
     * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1.
     */
    private void determineParagraphEmbeddingLevel() {
        byte strongType = -1; // unknown
        
        // Rule P2.
        for (int i = 0; i < textLength; ++i) {
            byte t = resultTypes[i];
            if (t == L || t == AL || t == R) {
                strongType = t;
                break;
            }
        }
        
        // Rule P3.
        if (strongType == -1) { // none found
            // default embedding level when no strong types found is 0.
            paragraphEmbeddingLevel = 0;
        } else if (strongType == L) {
            paragraphEmbeddingLevel = 0;
        } else { // AL, R
            paragraphEmbeddingLevel = 1;
        }
    }
    
    /**
     * Process embedding format codes.
     * <p>
     * Calls processEmbeddings to generate an embedding array from the explicit format codes.  The
     * embedding overrides in the array are then applied to the result types, and the result levels are
     * initialized.
     * @see #processEmbeddings
     */
    private void determineExplicitEmbeddingLevels() {
        embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel);
        
        for (int i = 0; i < textLength; ++i) {
            byte level = embeddings[i];
            if ((level & 0x80) != 0) {
                level &= 0x7f;
                resultTypes[i] = typeForLevel(level);
            }
            resultLevels[i] = level;
        }
    }
    
    /**
     * Rules X9.
     * Remove explicit codes so that they may be ignored during the remainder
     * of the main portion of the algorithm.  The length of the resulting text
     * is returned.
     * @return the length of the data excluding explicit codes and BN.
     */
    private int removeExplicitCodes() {
        int w = 0;
        for (int i = 0; i < textLength; ++i) {
            byte t = initialTypes[i];
            if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) {
                embeddings[w] = embeddings[i];
                resultTypes[w] = resultTypes[i];
                resultLevels[w] = resultLevels[i];
                w++;
            }
        }
        return w; // new textLength while explicit levels are removed
    }
    
    /**
     * Reinsert levels information for explicit codes.
     * This is for ease of relating the level information
     * to the original input data.  Note that the levels
     * assigned to these codes are arbitrary, they're
     * chosen so as to avoid breaking level runs.
     * @param textLength the length of the data after compression
     * @return the length of the data (original length of
     * types array supplied to constructor)
     */
    private int reinsertExplicitCodes(int textLength) {
        for (int i = initialTypes.length; --i >= 0;) {
            byte t = initialTypes[i];
            if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) {
                embeddings[i] = 0;
                resultTypes[i] = t;
                resultLevels[i] = -1;
            } else {
                --textLength;
                embeddings[i] = embeddings[textLength];
                resultTypes[i] = resultTypes[textLength];
                resultLevels[i] = resultLevels[textLength];
            }
        }
        
        // now propagate forward the levels information (could have
        // propagated backward, the main thing is not to introduce a level
        // break where one doesn't already exist).
        
        if (resultLevels[0] == -1) {
            resultLevels[0] = paragraphEmbeddingLevel;
        }
        for (int i = 1; i < initialTypes.length; ++i) {
            if (resultLevels[i] == -1) {
                resultLevels[i] = resultLevels[i-1];
            }
        }
        
        // Embedding information is for informational purposes only
        // so need not be adjusted.
        
        return initialTypes.length;
    }
    
    /**
     * 2) determining explicit levels
     * Rules X1 - X8
     *
     * The interaction of these rules makes handling them a bit complex.
     * This examines resultTypes but does not modify it.  It returns embedding and
     * override information in the result array.  The low 7 bits are the level, the high
     * bit is set if the level is an override, and clear if it is an embedding.
     */
    private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) {
        final int EXPLICIT_LEVEL_LIMIT = 62;
        
        int textLength = resultTypes.length;
        byte[] embeddings = new byte[textLength];
        
        // This stack will store the embedding levels and override status in a single byte
        // as described above.
        byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT];
        int stackCounter = 0;
        
        // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid.  But
        // an RLE at level 60 is valid, since the new level 61 is valid.  The current wording
        // of the rules requires that the RLE remain valid even if a previous LRE is invalid.
        // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs
        // will not try to pop the stack.
        int overflowAlmostCounter = 0;
        
        // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will
        // not try to pop the stack.
        int overflowCounter = 0;
        
        // Rule X1.
        
        // Keep the level separate from the value (level | override status flag) for ease of access.
        byte currentEmbeddingLevel = paragraphEmbeddingLevel;
        byte currentEmbeddingValue = paragraphEmbeddingLevel;
        
        // Loop through types, handling all remaining rules
        for (int i = 0; i < textLength; ++i) {
            
            embeddings[i] = currentEmbeddingValue;
            
            byte t = resultTypes[i];
            
            // Rules X2, X3, X4, X5
            switch (t) {
                case RLE:
                case LRE:
                case RLO:
                case LRO:
                    // Only need to compute new level if current level is valid
                    if (overflowCounter == 0) {
                        byte newLevel;
                        if (t == RLE || t == RLO) {
                            newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd
                        } else { // t == LRE || t == LRO
                            newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even
                        }
                        
                        // If the new level is valid, push old embedding level and override status
                        // No check for valid stack counter, since the level check suffices.
                        if (newLevel < EXPLICIT_LEVEL_LIMIT) {
                            embeddingValueStack[stackCounter] = currentEmbeddingValue;
                            stackCounter++;
                            
                            currentEmbeddingLevel = newLevel;
                            if (t == LRO || t == RLO) { // override
                                currentEmbeddingValue = (byte)(newLevel | 0x80);
                            } else {
                                currentEmbeddingValue = newLevel;
                            }
                            
                            // Adjust level of format mark (for expositional purposes only, this gets
                            // removed later).
                            embeddings[i] = currentEmbeddingValue;
                            break;
                        }
                        
                        // Otherwise new level is invalid, but a valid level can still be achieved if this
                        // level is 60 and we encounter an RLE or RLO further on.  So record that we
                        // 'almost' overflowed.
                        if (currentEmbeddingLevel == 60) {
                            overflowAlmostCounter++;
                            break;
                        }
                    }
                    
                    // Otherwise old or new level is invalid.
                    overflowCounter++;
                    break;
                    
                case PDF:
                    // The only case where this did not actually overflow but may have almost overflowed
                    // is when there was an RLE or RLO on level 60, which would result in level 61.  So we
                    // only test the almost overflow condition in that case.
                    //
                    // Also note that there may be a PDF without any pushes at all.
                    
                    if (overflowCounter > 0) {
                        --overflowCounter;
                    } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) {
                        --overflowAlmostCounter;
                    } else if (stackCounter > 0) {
                        --stackCounter;
                        currentEmbeddingValue = embeddingValueStack[stackCounter];
                        currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f);
                    }
                    break;
                    
                case B:
                    // Rule X8.
                    
                    // These values are reset for clarity, in this implementation B can only
                    // occur as the last code in the array.
                    stackCounter = 0;
                    overflowCounter = 0;
                    overflowAlmostCounter = 0;
                    currentEmbeddingLevel = paragraphEmbeddingLevel;
                    currentEmbeddingValue = paragraphEmbeddingLevel;
                    
                    embeddings[i] = paragraphEmbeddingLevel;
                    break;
                    
                default:
                    break;
            }
        }
        
        return embeddings;
    }
    
    
    /**
     * 3) resolving weak types
     * Rules W1-W7.
     *
     * Note that some weak types (EN, AN) remain after this processing is complete.
     */
    private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) {
        
        // Rule W1.
        // Changes all NSMs.
        byte preceedingCharacterType = sor;
        for (int i = start; i < limit; ++i) {
            byte t = resultTypes[i];
            if (t == NSM) {
                resultTypes[i] = preceedingCharacterType;
            } else {
                preceedingCharacterType = t;
            }
        }
        
        // Rule W2.
        // EN does not change at the start of the run, because sor != AL.
        for (int i = start; i < limit; ++i) {
            if (resultTypes[i] == EN) {
                for (int j = i - 1; j >= start; --j) {
                    byte t = resultTypes[j];
                    if (t == L || t == R || t == AL) {
                        if (t == AL) {
                            resultTypes[i] = AN;
                        }
                        break;
                    }
                }
            }
        }
        
        // Rule W3.
        for (int i = start; i < limit; ++i) {
            if (resultTypes[i] == AL) {
                resultTypes[i] = R;
            }
        }
        
        // Rule W4.
        // Since there must be values on both sides for this rule to have an
        // effect, the scan skips the first and last value.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -