📄 arabicshaping.java

📁 一个java操作pdf文件的开发包,很好用的.
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
                    length = w - start;                } else { // spaces at beginning                    while (w < e) {                        dest[w++] = '\u0020';                    }                }            }        }        return length;    }    /*     * Name    : expandLamAlef     * Function: LamAlef needs special handling as the LamAlef is     *           one character while expanding it will give two     *           characters Lam + Alef, so we need to expand the LamAlef     *           in near or far spaces according to the options the user     *           specifies or increase the buffer size.     *           Dest has enough room for the expansion if we are growing.     *           lamalef are normalized to the 'special characters'     */    private int expandLamAlef(char[] dest,                              int start,                              int length,                              int lacount) {        int lenOptions = options & LENGTH_MASK;        if (!isLogical) {            switch (lenOptions) {            case LENGTH_FIXED_SPACES_AT_BEGINNING: lenOptions = LENGTH_FIXED_SPACES_AT_END; break;            case LENGTH_FIXED_SPACES_AT_END: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING; break;            default: break;            }        }        switch (lenOptions) {        case LENGTH_GROW_SHRINK:             {                for (int r = start + length, w = r + lacount; --r >= start;) {                    char ch = dest[r];                    if (isNormalizedLamAlefChar(ch)) {                        dest[--w] = '\u0644';                        dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];                    } else {                        dest[--w] = ch;                    }                }            }            length += lacount;            break;        case LENGTH_FIXED_SPACES_NEAR:             {                if (isNormalizedLamAlefChar(dest[start])) {                    throw new RuntimeException("no space for lamalef");                }                for (int i = start + length; --i > start;) { // don't check start, already checked                    char ch = dest[i];                    if (isNormalizedLamAlefChar(ch)) {                        if (dest[i-1] == '\u0020') {                            dest[i] = '\u0644';                            dest[--i] = convertNormalizedLamAlef[ch - '\u065C'];                        } else {                            throw new RuntimeException("no space for lamalef");                        }                    }                }            }            break;        case LENGTH_FIXED_SPACES_AT_END:             {                if (lacount > countSpacesLeft(dest, start, length)) {                    throw new RuntimeException("no space for lamalef");                }                for (int r = start + lacount, w = start, e = start + length; r < e; ++r) {                    char ch = dest[r];                    if (isNormalizedLamAlefChar(ch)) {                        dest[w++] = convertNormalizedLamAlef[ch - '\u065C'];                        dest[w++] = '\u0644';                    } else {                        dest[w++] = ch;                    }                }            }            break;                        case LENGTH_FIXED_SPACES_AT_BEGINNING:             {                if (lacount > countSpacesRight(dest, start, length)) {                    throw new RuntimeException("no space for lamalef");                }                for (int r = start + length - lacount, w = start + length; --r >= start;) {                    char ch = dest[r];                    if (isNormalizedLamAlefChar(ch)) {                        dest[--w] = '\u0644';                        dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];                    } else {                        dest[--w] = ch;                    }                }            }            break;        }        return length;    }    /* Convert the input buffer from FExx Range into 06xx Range     * to put all characters into the 06xx range     * even the lamalef is converted to the special region in     * the 06xx range.  Return the number of lamalef chars found.     */    private int normalize(char[] dest, int start, int length) {        int lacount = 0;        for (int i = start, e = i + length; i < e; ++i) {            char ch = dest[i];            if (ch >= '\uFE70' && ch <= '\uFEFC') {                if (isLamAlefChar(ch)) {                    ++lacount;                }                dest[i] = (char)convertFEto06[ch - '\uFE70'];            }        }        return lacount;    }    /*     * Name    : shapeUnicode     * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped     *           arabic Unicode buffer in FExx Range     */    private int shapeUnicode(char[] dest,                              int start,                             int length,                             int destSize,                             int tashkeelFlag) {        normalize(dest, start, length);        // resolve the link between the characters.        // Arabic characters have four forms: Isolated, Initial, Medial and Final.        // Tashkeel characters have two, isolated or medial, and sometimes only isolated.        // tashkeelFlag == 0: shape normally, 1: shape isolated, 2: don't shape        boolean lamalef_found = false;        int i = start + length - 1;        int currLink = getLink(dest[i]);        int nextLink = 0;        int prevLink = 0;        int lastLink = 0;        int prevPos = i;        int lastPos = i;        int nx = -2;        int nw = 0;        while (i >= 0) {            // If high byte of currLink > 0 then there might be more than one shape            if ((currLink & '\uFF00') > 0 || isTashkeelChar(dest[i])) {                nw = i - 1;                nx = -2;                while (nx < 0) { // we need to know about next char                    if (nw == -1) {                        nextLink = 0;                        nx = Integer.MAX_VALUE;                    } else {                        nextLink = getLink(dest[nw]);                        if ((nextLink & IRRELEVANT) == 0) {                            nx = nw;                        } else {                            --nw;                        }                    }                }                if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) {                    lamalef_found = true;                     char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f                    if (wLamalef != '\u0000') {                        // replace alef by marker, it will be removed later                        dest[i] = '\uffff';                        dest[lastPos] = wLamalef;                        i = lastPos;                    }                    lastLink = prevLink;                    currLink = getLink(wLamalef); // requires '\u0000', unfortunately                }                // get the proper shape according to link ability of neighbors                // and of character; depends on the order of the shapes                // (isolated, initial, middle, final) in the compatibility area                int flag = specialChar(dest[i]);                int shape = shapeTable[nextLink & LINK_MASK]                    [lastLink & LINK_MASK]                    [currLink & LINK_MASK];                if (flag == 1) {                    shape &= 0x1;                } else if (flag == 2) {                    if (tashkeelFlag == 0 &&                        ((lastLink & LINKL) != 0) &&                         ((nextLink & LINKR) != 0) &&                         dest[i] != '\u064C' &&                         dest[i] != '\u064D' &&                        !((nextLink & ALEFTYPE) == ALEFTYPE &&                           (lastLink & LAMTYPE) == LAMTYPE)) {		                        shape = 1;                    } else {                        shape = 0;                    }                }                if (flag == 2) {                    if (tashkeelFlag < 2) {                        dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape);                    } // else leave tashkeel alone                                    } else {                    dest[i] = (char)('\uFE70' + (currLink >> 8) + shape);                }            }            // move one notch forward            if ((currLink & IRRELEVANT) == 0) {                prevLink = lastLink;                lastLink = currLink;                prevPos = lastPos;                lastPos = i;            }            --i;            if (i == nx) {                currLink = nextLink;                nx = -2;            } else if (i != -1) {                currLink = getLink(dest[i]);            }        }        // If we found a lam/alef pair in the buffer         // call removeLamAlefSpaces to remove the spaces that were added        if (lamalef_found) {            destSize = removeLamAlefSpaces(dest, start, length);        } else {            destSize = length;        }                return destSize;    }    /*     * Name    : deShapeUnicode     * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped     *           arabic Unicode buffer in 06xx Range     */    private int deShapeUnicode(char[] dest,                                int start,                               int length,                               int destSize) {        int lamalef_count = normalize(dest, start, length);        // If there was a lamalef in the buffer call expandLamAlef        if (lamalef_count != 0) {            // need to adjust dest to fit expanded buffer... !!!            destSize = expandLamAlef(dest, start, length, lamalef_count);        } else {            destSize = length;        }        return destSize;    }    private int internalShape(char[] source,                               int sourceStart,                              int sourceLength,                              char[] dest,                              int destStart,                              int destSize) {        if (sourceLength == 0) {            return 0;        }        if (destSize == 0) {            if (((options & LETTERS_MASK) != LETTERS_NOOP) &&                ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK)) {	                return calculateSize(source, sourceStart, sourceLength);            } else {                return sourceLength; // by definition            }        }        // always use temp buffer        char[] temp = new char[sourceLength * 2]; // all lamalefs requiring expansion        System.arraycopy(source, sourceStart, temp, 0, sourceLength);        if (isLogical) {            invertBuffer(temp, 0, sourceLength);        }        int outputSize = sourceLength;        switch (options & LETTERS_MASK) {        case LETTERS_SHAPE_TASHKEEL_ISOLATED:            outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 1);            break;        case LETTERS_SHAPE:            outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 0);            break;        case LETTERS_UNSHAPE:            outputSize = deShapeUnicode(temp, 0, sourceLength, destSize);            break;         default:            break;        }                        if (outputSize > destSize) {            throw new RuntimeException("not enough room for result data");        }        if ((options & DIGITS_MASK) != DIGITS_NOOP) {            char digitBase = '\u0030'; // European digits            switch (options & DIGIT_TYPE_MASK) {            case DIGIT_TYPE_AN:                digitBase = '\u0660';  // Arabic-Indic digits                break;            case DIGIT_TYPE_AN_EXTENDED:                digitBase = '\u06f0';  // Eastern Arabic-Indic digits (Persian and Urdu)                break;            default:                break;            }            switch (options & DIGITS_MASK) {            case DIGITS_EN2AN:                {                    int digitDelta = digitBase - '\u0030';                    for (int i = 0; i < outputSize; ++i) {                        char ch = temp[i];                        if (ch <= '\u0039' && ch >= '\u0030') {                            temp[i] += digitDelta;                        }                    }                }                break;            case DIGITS_AN2EN:                {                    char digitTop = (char)(digitBase + 9);                    int digitDelta = '\u0030' - digitBase;                    for (int i = 0; i < outputSize; ++i) {                        char ch = temp[i];                        if (ch <= digitTop && ch >= digitBase) {                            temp[i] += digitDelta;                        }                    }                }                break;            case DIGITS_EN2AN_INIT_LR:                shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, false);                break;            case DIGITS_EN2AN_INIT_AL:                shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, true);                break;            default:                break;            }        }        if (isLogical) {            invertBuffer(temp, 0, outputSize);        }              System.arraycopy(temp, 0, dest, destStart, outputSize);              return outputSize;    }}
上一页 1 23
💿 文件大小 4457 K
👤 上传用户 KMPlayer33
📂 所属分类 Java编程
🏷️ 相关标签

#java #操作 #开发包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -