⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 minibidi.c

📁 putty
💻 C
📖 第 1 页 / 共 4 页
字号:

/* The Main shaping function, and the only one to be used
 * by the outside world.
 *
 * line: buffer to apply shaping to. this must be passed by doBidi() first
 * to: output buffer for the shaped data
 * count: number of characters in line
 */
int do_shape(bidi_char *line, bidi_char *to, int count)
{
    int i, tempShape, ligFlag;

    for (ligFlag=i=0; i<count; i++) {
	to[i] = line[i];
	tempShape = STYPE(line[i].wc);
	switch (tempShape) {
	  case SC:
	    break;

	  case SU:
	    break;

	  case SR:
	    tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
	    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
		to[i].wc = SFINAL((SISOLATED(line[i].wc)));
	    else
		to[i].wc = SISOLATED(line[i].wc);
	    break;


	  case SD:
	    /* Make Ligatures */
	    tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
	    if (line[i].wc == 0x644) {
		if (i > 0) switch (line[i-1].wc) {
		  case 0x622:
		    ligFlag = 1;
		    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
			to[i].wc = 0xFEF6;
		    else
			to[i].wc = 0xFEF5;
		    break;
		  case 0x623:
		    ligFlag = 1;
		    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
			to[i].wc = 0xFEF8;
		    else
			to[i].wc = 0xFEF7;
		    break;
		  case 0x625:
		    ligFlag = 1;
		    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
			to[i].wc = 0xFEFA;
		    else
			to[i].wc = 0xFEF9;
		    break;
		  case 0x627:
		    ligFlag = 1;
		    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
			to[i].wc = 0xFEFC;
		    else
			to[i].wc = 0xFEFB;
		    break;
		}
		if (ligFlag) {
		    to[i-1].wc = 0x20;
		    ligFlag = 0;
		    break;
		}
	    }

	    if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
                tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
		if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
		    to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
		else
		    to[i].wc = SFINAL((SISOLATED(line[i].wc)));
		break;
	    }

            tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
	    if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
		to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
	    else
		to[i].wc = SISOLATED(line[i].wc);
	    break;


	}
    }
    return 1;
}

/*
 * The Main Bidi Function, and the only function that should
 * be used by the outside world.
 *
 * line: a buffer of size count containing text to apply
 * the Bidirectional algorithm to.
 */

int do_bidi(bidi_char *line, int count)
{
    unsigned char* types;
    unsigned char* levels;
    unsigned char paragraphLevel;
    unsigned char currentEmbedding;
    unsigned char currentOverride;
    unsigned char tempType;
    int i, j, imax, yes, bover;

    /* Check the presence of R or AL types as optimization */
    yes = 0;
    for (i=0; i<count; i++) {
	int type = getType(line[i].wc);
	if (type == R || type == AL) {
	    yes = 1;
	    break;
	}
    }
    if (yes == 0)
	return L;

    /* Initialize types, levels */
    types = snewn(count, unsigned char);
    levels = snewn(count, unsigned char);

    /* Rule (P1)  NOT IMPLEMENTED
     * P1. Split the text into separate paragraphs. A paragraph separator is
     * kept with the previous paragraph. Within each paragraph, apply all the
     * other rules of this algorithm.
     */

    /* Rule (P2), (P3)
     * P2. In each paragraph, find the first character of type L, AL, or R.
     * P3. If a character is found in P2 and it is of type AL or R, then set
     * the paragraph embedding level to one; otherwise, set it to zero.
     */
    paragraphLevel = 0;
    for (i=0; i<count ; i++) {
	int type = getType(line[i].wc);
	if (type == R || type == AL) {
	    paragraphLevel = 1;
	    break;
	} else if (type == L)
	    break;
    }

    /* Rule (X1)
     * X1. Begin by setting the current embedding level to the paragraph
     * embedding level. Set the directional override status to neutral.
     */
    currentEmbedding = paragraphLevel;
    currentOverride = ON;

    /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
     * X2. With each RLE, compute the least greater odd embedding level.
     * X3. With each LRE, compute the least greater even embedding level.
     * X4. With each RLO, compute the least greater odd embedding level.
     * X5. With each LRO, compute the least greater even embedding level.
     * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
     *		a. Set the level of the current character to the current
     *		    embedding level.
     *		b.  Whenever the directional override status is not neutral,
     *               reset the current character type to the directional
     *               override status.
     * X7. With each PDF, determine the matching embedding or override code.
     * If there was a valid matching code, restore (pop) the last
     * remembered (pushed) embedding level and directional override.
     * X8. All explicit directional embeddings and overrides are completely
     * terminated at the end of each paragraph. Paragraph separators are not
     * included in the embedding. (Useless here) NOT IMPLEMENTED
     */
    bover = 0;
    for (i=0; i<count; i++) {
	tempType = getType(line[i].wc);
	switch (tempType) {
	  case RLE:
	    currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
	    levels[i] = setOverrideBits(levels[i], currentOverride);
	    currentOverride = ON;
	    break;

	  case LRE:
	    currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
	    levels[i] = setOverrideBits(levels[i], currentOverride);
	    currentOverride = ON;
	    break;

	  case RLO:
	    currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
	    tempType = currentOverride = R;
	    bover = 1;
	    break;

	  case LRO:
	    currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
	    tempType = currentOverride = L;
	    bover = 1;
	    break;

	  case PDF:
            {
                int prevlevel = getPreviousLevel(levels, i);

                if (prevlevel == -1) {
                    currentEmbedding = paragraphLevel;
                    currentOverride = ON;
                } else {
                    currentOverride = currentEmbedding & OMASK;
                    currentEmbedding = currentEmbedding & ~OMASK;
                }
            }
	    levels[i] = currentEmbedding;
	    break;

	    /* Whitespace is treated as neutral for now */
	  case WS:
	  case S:
	    levels[i] = currentEmbedding;
	    tempType = ON;
	    if (currentOverride != ON)
		tempType = currentOverride;
	    break;

	  default:
	    levels[i] = currentEmbedding;
	    if (currentOverride != ON)
		tempType = currentOverride;
	    break;

	}
	types[i] = tempType;
    }
    /* this clears out all overrides, so we can use levels safely... */
    /* checks bover first */
    if (bover)
	for (i=0; i<count; i++)
	    levels[i] = levels[i] & LMASK;

    /* Rule (X9)
     * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
     * Here, they're converted to BN.
     */
    for (i=0; i<count; i++) {
	switch (types[i]) {
	  case RLE:
	  case LRE:
	  case RLO:
	  case LRO:
	  case PDF:
	    types[i] = BN;
	    break;
	}
    }

    /* Rule (W1)
     * W1. Examine each non-spacing mark (NSM) in the level run, and change
     * the type of the NSM to the type of the previous character. If the NSM
     * is at the start of the level run, it will get the type of sor.
     */
    if (types[0] == NSM)
	types[0] = paragraphLevel;

    for (i=1; i<count; i++) {
	if (types[i] == NSM)
	    types[i] = types[i-1];
	/* Is this a safe assumption?
	 * I assumed the previous, IS a character.
	 */
    }

    /* Rule (W2)
     * W2. Search backwards from each instance of a European number until the
     * first strong type (R, L, AL, or sor) is found.  If an AL is found,
     * change the type of the European number to Arabic number.
     */
    for (i=0; i<count; i++) {
	if (types[i] == EN) {
	    j=i;
	    while (j >= 0) {
		if (types[j] == AL) {
		    types[i] = AN;
		    break;
		} else if (types[j] == R || types[j] == L) {
                    break;
                }
		j--;
	    }
	}
    }

    /* Rule (W3)
     * W3. Change all ALs to R.
     *
     * Optimization: on Rule Xn, we might set a flag on AL type
     * to prevent this loop in L R lines only...
     */
    for (i=0; i<count; i++) {
	if (types[i] == AL)
	    types[i] = R;
    }

    /* Rule (W4)
     * W4. A single European separator between two European numbers changes
     * to a European number. A single common separator between two numbers
     * of the same type changes to that type.
     */
    for (i=1; i<(count-1); i++) {
	if (types[i] == ES) {
	    if (types[i-1] == EN && types[i+1] == EN)
		types[i] = EN;
	} else if (types[i] == CS) {
            if (types[i-1] == EN && types[i+1] == EN)
                types[i] = EN;
            else if (types[i-1] == AN && types[i+1] == AN)
                types[i] = AN;
        }
    }

    /* Rule (W5)
     * W5. A sequence of European terminators adjacent to European numbers
     * changes to all European numbers.
     *
     * Optimization: lots here... else ifs need rearrangement
     */
    for (i=0; i<count; i++) {
	if (types[i] == ET) {
	    if (i > 0 && types[i-1] == EN) {
		types[i] = EN;
		continue;
	    } else if (i < count-1 && types[i+1] == EN) {
                types[i] = EN;
                continue;
            } else if (i < count-1 && types[i+1] == ET) {
                j=i;
                while (j <count && types[j] == ET) {
                    j++;
                }
                if (types[j] == EN)
                    types[i] = EN;
            }
	}
    }

    /* Rule (W6)
     * W6. Otherwise, separators and terminators change to Other Neutral:
     */
    for (i=0; i<count; i++) {
	switch (types[i]) {
	  case ES:
	  case ET:
	  case CS:
	    types[i] = ON;
	    break;
	}
    }

    /* Rule (W7)
     * W7. Search backwards from each instance of a European number until
     * the first strong type (R, L, or sor) is found. If an L is found,
     * then change the type of the European number to L.
     */
    for (i=0; i<count; i++) {
	if (types[i] == EN) {
	    j=i;
	    while (j >= 0) {
		if (types[j] == L) {
		    types[i] = L;
		    break;
		} else if (types[j] == R || types[j] == AL) {
		    break;
		}
		j--;
	    }
	}
    }

    /* Rule (N1)
     * N1. A sequence of neutrals takes the direction of the surrounding
     * strong text if the text on both sides has the same direction. European
     * and Arabic numbers are treated as though they were R.
     */
    if (count >= 2 && types[0] == ON) {
	if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
	    types[0] = R;
	else if (types[1] == L)
	    types[0] = L;
    }
    for (i=1; i<(count-1); i++) {
	if (types[i] == ON) {
	    if (types[i-1] == L) {
		j=i;
		while (j<(count-1) && types[j] == ON) {
		    j++;
		}
		if (types[j] == L) {
		    while (i<j) {
			types[i] = L;
			i++;
		    }
		}

	    } else if ((types[i-1] == R)  ||
                       (types[i-1] == EN) ||
                       (types[i-1] == AN)) {
                j=i;
                while (j<(count-1) && types[j] == ON) {
                    j++;
                }
                if ((types[j] == R)  ||
                    (types[j] == EN) ||
                    (types[j] == AN)) {
                    while (i<j) {
                        types[i] = R;
                        i++;
                    }
                }
            }
	}
    }
    if (count >= 2 && types[count-1] == ON) {
	if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
	    types[count-1] = R;
	else if (types[count-2] == L)
	    types[count-1] = L;
    }

    /* Rule (N2)
     * N2. Any remaining neutrals take the embedding direction.
     */
    for (i=0; i<count; i++) {
	if (types[i] == ON) {
	    if ((levels[i] % 2) == 0)
		types[i] = L;
	    else
		types[i] = R;
	}
    }

    /* Rule (I1)
     * I1. For all characters with an even (left-to-right) embedding
     * direction, those of type R go up one level and those of type AN or
     * EN go up two levels.
     */
    for (i=0; i<count; i++) {
	if ((levels[i] % 2) == 0) {
	    if (types[i] == R)
		levels[i] += 1;
	    else if (types[i] == AN || types[i] == EN)
		levels[i] += 2;
	}
    }

    /* Rule (I2)
     * I2. For all characters with an odd (right-to-left) embedding direction,
     * those of type L, EN or AN go up one level.
     */
    for (i=0; i<count; i++) {
	if ((levels[i] % 2) == 1) {
	    if (types[i] == L || types[i] == EN || types[i] == AN)
		levels[i] += 1;
	}
    }

    /* Rule (L1)
     * L1. On each line, reset the embedding level of the following characters
     * to the paragraph embedding level:
     *		(1)segment separators, (2)paragraph separators,
     *           (3)any sequence of whitespace characters preceding
     *           a segment separator or paragraph separator,
     *           (4)and any sequence of white space characters
     *           at the end of the line.
     * The types of characters used here are the original types, not those

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -