📄 arabicshaping.java

📁 一个java操作pdf文件的开发包,很好用的.
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*********************************************************************************   Copyright (C) 2001, International Business Machines*   Corporation and others.  All Rights Reserved.********************************************************************************/package com.lowagie.text.pdf;//import com.ibm.icu.lang.*;/** * Shape Arabic text on a character basis. * * <p>ArabicShaping performs basic operations for "shaping" Arabic text. It is most * useful for use with legacy data formats and legacy display technology * (simple terminals). All operations are performed on Unicode characters.</p> * * <p>Text-based shaping means that some character code points in the text are * replaced by others depending on the context. It transforms one kind of text * into another. In comparison, modern displays for Arabic text select * appropriate, context-dependent font glyphs for each text element, which means * that they transform text into a glyph vector.</p> * * <p>Text transformations are necessary when modern display technology is not * available or when text needs to be transformed to or from legacy formats that * use "shaped" characters. Since the Arabic script is cursive, connecting * adjacent letters to each other, computers select images for each letter based * on the surrounding letters. This usually results in four images per Arabic * letter: initial, middle, final, and isolated forms. In Unicode, on the other * hand, letters are normally stored abstract, and a display system is expected * to select the necessary glyphs. (This makes searching and other text * processing easier because the same letter has only one code.) It is possible * to mimic this with text transformations because there are characters in * Unicode that are rendered as letters with a specific shape * (or cursive connectivity). They were included for interoperability with * legacy systems and codepages, and for unsophisticated display systems.</p> * * <p>A second kind of text transformations is supported for Arabic digits: * For compatibility with legacy codepages that only include European digits, * it is possible to replace one set of digits by another, changing the * character code points. These operations can be performed for either * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic * digits (U+06f0...U+06f9).</p> * * <p>Some replacements may result in more or fewer characters (code points). * By default, this means that the destination buffer may receive text with a * length different from the source length. Some legacy systems rely on the * length of the text to be constant. They expect extra spaces to be added * or consumed either next to the affected character or at the end of the * text.</p> */public final class ArabicShaping {    private final int options;    private boolean isLogical; // convenience    /**     * Convert a range of text in the source array, putting the result      * into a range of text in the destination array, and return the number     * of characters written.     *     * @param source An array containing the input text     * @param sourceStart The start of the range of text to convert     * @param sourceLength The length of the range of text to convert     * @param dest The destination array that will receive the result.     *   It may be <code>NULL</code> only if  <code>destSize</code> is 0.       * @param destStart The start of the range of the destination buffer to use.     * @param destSize The size (capacity) of the destination buffer.     *   If <code>destSize</code> is 0, then no output is produced,     *   but the necessary buffer size is returned ("preflighting").  This     *   does not validate the text against the options, for example,      *   if letters are being unshaped, and spaces are being consumed     *   following lamalef, this will not detect a lamalef without a      *   corresponding space.  An error will be thrown when the actual     *   conversion is attempted.     * @return The number of chars written to the destination buffer.     *   If an error occurs, then no output was written, or it may be     *   incomplete.     * @if the text cannot be converted according to the options.     */    public int shape(char[] source, int sourceStart, int sourceLength,                     char[] dest, int destStart, int destSize) {        if (source == null) {            throw new IllegalArgumentException("source can not be null");        }        if (sourceStart < 0 || sourceLength < 0 || sourceStart + sourceLength > source.length) {            throw new IllegalArgumentException("bad source start (" + sourceStart +                                               ") or length (" + sourceLength +                                               ") for buffer of length " + source.length);        }        if (dest == null && destSize != 0) {            throw new IllegalArgumentException("null dest requires destSize == 0");        }        if ((destSize != 0) &&            (destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) {            throw new IllegalArgumentException("bad dest start (" + destStart +                                                ") or size (" + destSize +                                                ") for buffer of length " + dest.length);        }        return internalShape(source, sourceStart, sourceLength, dest, destStart, destSize);    }    /**     * Convert a range of text in place.  This may only be used if the Length option     * does not grow or shrink the text.     *     * @param source An array containing the input text     * @param start The start of the range of text to convert     * @param length The length of the range of text to convert     * @if the text cannot be converted according to the options.     */    public void shape(char[] source, int start, int length) {        if ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK) {            throw new RuntimeException("Cannot shape in place with length option grow/shrink.");        }        shape(source, start, length, source, start, length);    }    /**     * Convert a string, returning the new string.     *     * @param source The string to convert.     * @return The converted string.     * @if the string cannot be converted according to the options.     */    public String shape(String text) {        char[] src = text.toCharArray();        char[] dest = src;        if (((options & LENGTH_MASK) == LENGTH_GROW_SHRINK) &&            ((options & LETTERS_MASK) == LETTERS_UNSHAPE)) {            dest = new char[src.length * 2]; // max        }        int len = shape(src, 0, src.length, dest, 0, dest.length);        return new String(dest, 0, len);    }    /**     * Construct ArabicShaping using the options flags.     * The flags are as follows:<br>     * 'LENGTH' flags control whether the text can change size, and if not,     * how to maintain the size of the text when LamAlef ligatures are      * formed or broken.<br>     * 'TEXT_DIRECTION' flags control whether the text is read and written     * in visual order or in logical order.<br>     * 'LETTERS_SHAPE' flags control whether conversion is to or from     * presentation forms.<br>     * 'DIGITS' flags control whether digits are shaped, and whether from     * European to Arabic-Indic or vice-versa.<br>     * 'DIGIT_TYPE' flags control whether standard or extended Arabic-Indic     * digits are used when performing digit conversion.     */    public ArabicShaping(int options) {        this.options = options;        if ((options & DIGITS_MASK) > 0x80) {            throw new IllegalArgumentException("bad DIGITS options");        }        isLogical = (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL;    }    /**     * Memory option: allow the result to have a different length than the source.     */    public static final int LENGTH_GROW_SHRINK = 0;    /**     * Memory option: the result must have the same length as the source.     * If more room is necessary, then try to consume spaces next to modified characters.     */    public static final int LENGTH_FIXED_SPACES_NEAR = 1;    /**     * Memory option: the result must have the same length as the source.     * If more room is necessary, then try to consume spaces at the end of the text.     */    public static final int LENGTH_FIXED_SPACES_AT_END = 2;    /**     * Memory option: the result must have the same length as the source.     * If more room is necessary, then try to consume spaces at the beginning of the text.     */    public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3;    /**      * Bit mask for memory options.      */    public static final int LENGTH_MASK = 3;    /**      * Direction indicator: the source is in logical (keyboard) order.      */    public static final int TEXT_DIRECTION_LOGICAL = 0;    /**      * Direction indicator: the source is in visual (display) order, that is,     * the leftmost displayed character is stored first.     */    public static final int TEXT_DIRECTION_VISUAL_LTR = 4;    /**      * Bit mask for direction indicators.      */    public static final int TEXT_DIRECTION_MASK = 4;    /**     * Letter shaping option: do not perform letter shaping.      */    public static final int LETTERS_NOOP = 0;    /**      * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,     * by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature     * substitution.     */    public static final int LETTERS_SHAPE = 8;    /**      * Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block     * by normative ones in the U+0600 (Arabic) block.  Converts Lam-Alef ligatures to pairs of Lam and     * Alef characters, consuming spaces if required.     */    public static final int LETTERS_UNSHAPE = 0x10;    /**     * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,     * except for the TASHKEEL characters at U+064B...U+0652, by shaped ones in the U+Fe70     * (Presentation Forms B) block.  The TASHKEEL characters will always be converted to     * the isolated forms rather than to their correct shape.     */    public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;    /**      * Bit mask for letter shaping options.      */    public static final int LETTERS_MASK = 0x18;    /**      * Digit shaping option: do not perform digit shaping.      */    public static final int DIGITS_NOOP = 0;    /**     * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.     */    public static final int DIGITS_EN2AN = 0x20;    /**     * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).     */    public static final int DIGITS_AN2EN = 0x40;    /**     * Digit shaping option:     * Replace European digits (U+0030...U+0039) by Arabic-Indic digits     * if the most recent strongly directional character     * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).      * The initial state at the start of the text is assumed to be not an Arabic,     * letter, so European digits at the start of the text will not change.     * Compare to DIGITS_ALEN2AN_INIT_AL.     */    public static final int DIGITS_EN2AN_INIT_LR = 0x60;    /**     * Digit shaping option:     * Replace European digits (U+0030...U+0039) by Arabic-Indic digits     * if the most recent strongly directional character     * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).      * The initial state at the start of the text is assumed to be an Arabic,     * letter, so European digits at the start of the text will change.     * Compare to DIGITS_ALEN2AN_INT_LR.     */    public static final int DIGITS_EN2AN_INIT_AL = 0x80;    /** Not a valid option value. */    private static final int DIGITS_RESERVED = 0xa0;    /**      * Bit mask for digit shaping options.      */    public static final int DIGITS_MASK = 0xe0;    /**      * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).      */    public static final int DIGIT_TYPE_AN = 0;    /**      * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).      */    public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;    /**      * Bit mask for digit type options.      */    public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?    public boolean equals(Object rhs) {        return rhs != null &&             rhs.getClass() == ArabicShaping.class &&             options == ((ArabicShaping)rhs).options;    }    public int hashCode() {        return options;    }    public String toString() {        StringBuffer buf = new StringBuffer(super.toString());        buf.append('[');        switch (options & LENGTH_MASK) {        case LENGTH_GROW_SHRINK: buf.append("grow/shrink"); break;        case LENGTH_FIXED_SPACES_NEAR: buf.append("spaces near"); break;        case LENGTH_FIXED_SPACES_AT_END: buf.append("spaces at end"); break;        case LENGTH_FIXED_SPACES_AT_BEGINNING: buf.append("spaces at beginning"); break;        }        switch (options & TEXT_DIRECTION_MASK) {        case TEXT_DIRECTION_LOGICAL: buf.append(", logical"); break;        case TEXT_DIRECTION_VISUAL_LTR: buf.append(", visual"); break;        }        switch (options & LETTERS_MASK) {        case LETTERS_NOOP: buf.append(", no letter shaping"); break;        case LETTERS_SHAPE: buf.append(", shape letters"); break;        case LETTERS_SHAPE_TASHKEEL_ISOLATED: buf.append(", shape letters tashkeel isolated"); break;        case LETTERS_UNSHAPE: buf.append(", unshape letters"); break;        }        switch (options & DIGITS_MASK) {        case DIGITS_NOOP: buf.append(", no digit shaping"); break;        case DIGITS_EN2AN: buf.append(", shape digits to AN"); break;        case DIGITS_AN2EN: buf.append(", shape digits to EN"); break;        case DIGITS_EN2AN_INIT_LR: buf.append(", shape digits to AN contextually: default EN"); break;        case DIGITS_EN2AN_INIT_AL: buf.append(", shape digits to AN contextually: default AL"); break;        }        switch (options & DIGIT_TYPE_MASK) {        case DIGIT_TYPE_AN: buf.append(", standard Arabic-Indic digits"); break;        case DIGIT_TYPE_AN_EXTENDED: buf.append(", extended Arabic-Indic digits"); break;        }        buf.append("]");        return buf.toString();    }    //    // ported api    //    private static final int IRRELEVANT = 4;    private static final int LAMTYPE = 16;    private static final int ALEFTYPE = 32;    private static final int LINKR = 1;    private static final int LINKL = 2;    private static final int LINK_MASK = 3;    private static final int irrelevantPos[] = {         0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE     };    private static final char convertLamAlef[] =  {        '\u0622', // FEF5         '\u0622', // FEF6        '\u0623', // FEF7        '\u0623', // FEF8        '\u0625', // FEF9        '\u0625', // FEFA        '\u0627', // FEFB        '\u0627'  // FEFC     };    private static final char convertNormalizedLamAlef[] = {        '\u0622', // 065C        '\u0623', // 065D        '\u0625', // 065E        '\u0627', // 065F    };    private static final int[] araLink = {        1           + 32 + 256 * 0x11,  /*0x0622*/        1           + 32 + 256 * 0x13,  /*0x0623*/        1                + 256 * 0x15,  /*0x0624*/        1           + 32 + 256 * 0x17,  /*0x0625*/        1 + 2            + 256 * 0x19,  /*0x0626*/        1           + 32 + 256 * 0x1D,  /*0x0627*/        1 + 2            + 256 * 0x1F,  /*0x0628*/        1                + 256 * 0x23,  /*0x0629*/        1 + 2            + 256 * 0x25,  /*0x062A*/        1 + 2            + 256 * 0x29,  /*0x062B*/        1 + 2            + 256 * 0x2D,  /*0x062C*/        1 + 2            + 256 * 0x31,  /*0x062D*/        1 + 2            + 256 * 0x35,  /*0x062E*/
12 3 下一页
💿 文件大小 4457 K
👤 上传用户 KMPlayer33
📂 所属分类 Java编程
🏷️ 相关标签

#java #操作 #开发包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -