📄 minibidi.c
字号:
/* The Main shaping function, and the only one to be used
* by the outside world.
*
* line: buffer to apply shaping to. this must be passed by doBidi() first
* to: output buffer for the shaped data
* count: number of characters in line
*/
int do_shape(bidi_char *line, bidi_char *to, int count)
{
int i, tempShape, ligFlag;
for (ligFlag=i=0; i<count; i++) {
to[i] = line[i];
tempShape = STYPE(line[i].wc);
switch (tempShape) {
case SC:
break;
case SU:
break;
case SR:
tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
to[i].wc = SFINAL((SISOLATED(line[i].wc)));
else
to[i].wc = SISOLATED(line[i].wc);
break;
case SD:
/* Make Ligatures */
tempShape = (i+1 < count ? STYPE(line[i+1].wc) : SU);
if (line[i].wc == 0x644) {
if (i > 0) switch (line[i-1].wc) {
case 0x622:
ligFlag = 1;
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
to[i].wc = 0xFEF6;
else
to[i].wc = 0xFEF5;
break;
case 0x623:
ligFlag = 1;
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
to[i].wc = 0xFEF8;
else
to[i].wc = 0xFEF7;
break;
case 0x625:
ligFlag = 1;
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
to[i].wc = 0xFEFA;
else
to[i].wc = 0xFEF9;
break;
case 0x627:
ligFlag = 1;
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
to[i].wc = 0xFEFC;
else
to[i].wc = 0xFEFB;
break;
}
if (ligFlag) {
to[i-1].wc = 0x20;
ligFlag = 0;
break;
}
}
if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
to[i].wc = SMEDIAL((SISOLATED(line[i].wc)));
else
to[i].wc = SFINAL((SISOLATED(line[i].wc)));
break;
}
tempShape = (i > 0 ? STYPE(line[i-1].wc) : SU);
if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
to[i].wc = SINITIAL((SISOLATED(line[i].wc)));
else
to[i].wc = SISOLATED(line[i].wc);
break;
}
}
return 1;
}
/*
* The Main Bidi Function, and the only function that should
* be used by the outside world.
*
* line: a buffer of size count containing text to apply
* the Bidirectional algorithm to.
*/
int do_bidi(bidi_char *line, int count)
{
unsigned char* types;
unsigned char* levels;
unsigned char paragraphLevel;
unsigned char currentEmbedding;
unsigned char currentOverride;
unsigned char tempType;
int i, j, imax, yes, bover;
/* Check the presence of R or AL types as optimization */
yes = 0;
for (i=0; i<count; i++) {
int type = getType(line[i].wc);
if (type == R || type == AL) {
yes = 1;
break;
}
}
if (yes == 0)
return L;
/* Initialize types, levels */
types = snewn(count, unsigned char);
levels = snewn(count, unsigned char);
/* Rule (P1) NOT IMPLEMENTED
* P1. Split the text into separate paragraphs. A paragraph separator is
* kept with the previous paragraph. Within each paragraph, apply all the
* other rules of this algorithm.
*/
/* Rule (P2), (P3)
* P2. In each paragraph, find the first character of type L, AL, or R.
* P3. If a character is found in P2 and it is of type AL or R, then set
* the paragraph embedding level to one; otherwise, set it to zero.
*/
paragraphLevel = 0;
for (i=0; i<count ; i++) {
int type = getType(line[i].wc);
if (type == R || type == AL) {
paragraphLevel = 1;
break;
} else if (type == L)
break;
}
/* Rule (X1)
* X1. Begin by setting the current embedding level to the paragraph
* embedding level. Set the directional override status to neutral.
*/
currentEmbedding = paragraphLevel;
currentOverride = ON;
/* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
* X2. With each RLE, compute the least greater odd embedding level.
* X3. With each LRE, compute the least greater even embedding level.
* X4. With each RLO, compute the least greater odd embedding level.
* X5. With each LRO, compute the least greater even embedding level.
* X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
* a. Set the level of the current character to the current
* embedding level.
* b. Whenever the directional override status is not neutral,
* reset the current character type to the directional
* override status.
* X7. With each PDF, determine the matching embedding or override code.
* If there was a valid matching code, restore (pop) the last
* remembered (pushed) embedding level and directional override.
* X8. All explicit directional embeddings and overrides are completely
* terminated at the end of each paragraph. Paragraph separators are not
* included in the embedding. (Useless here) NOT IMPLEMENTED
*/
bover = 0;
for (i=0; i<count; i++) {
tempType = getType(line[i].wc);
switch (tempType) {
case RLE:
currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
levels[i] = setOverrideBits(levels[i], currentOverride);
currentOverride = ON;
break;
case LRE:
currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
levels[i] = setOverrideBits(levels[i], currentOverride);
currentOverride = ON;
break;
case RLO:
currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
tempType = currentOverride = R;
bover = 1;
break;
case LRO:
currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
tempType = currentOverride = L;
bover = 1;
break;
case PDF:
{
int prevlevel = getPreviousLevel(levels, i);
if (prevlevel == -1) {
currentEmbedding = paragraphLevel;
currentOverride = ON;
} else {
currentOverride = currentEmbedding & OMASK;
currentEmbedding = currentEmbedding & ~OMASK;
}
}
levels[i] = currentEmbedding;
break;
/* Whitespace is treated as neutral for now */
case WS:
case S:
levels[i] = currentEmbedding;
tempType = ON;
if (currentOverride != ON)
tempType = currentOverride;
break;
default:
levels[i] = currentEmbedding;
if (currentOverride != ON)
tempType = currentOverride;
break;
}
types[i] = tempType;
}
/* this clears out all overrides, so we can use levels safely... */
/* checks bover first */
if (bover)
for (i=0; i<count; i++)
levels[i] = levels[i] & LMASK;
/* Rule (X9)
* X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
* Here, they're converted to BN.
*/
for (i=0; i<count; i++) {
switch (types[i]) {
case RLE:
case LRE:
case RLO:
case LRO:
case PDF:
types[i] = BN;
break;
}
}
/* Rule (W1)
* W1. Examine each non-spacing mark (NSM) in the level run, and change
* the type of the NSM to the type of the previous character. If the NSM
* is at the start of the level run, it will get the type of sor.
*/
if (types[0] == NSM)
types[0] = paragraphLevel;
for (i=1; i<count; i++) {
if (types[i] == NSM)
types[i] = types[i-1];
/* Is this a safe assumption?
* I assumed the previous, IS a character.
*/
}
/* Rule (W2)
* W2. Search backwards from each instance of a European number until the
* first strong type (R, L, AL, or sor) is found. If an AL is found,
* change the type of the European number to Arabic number.
*/
for (i=0; i<count; i++) {
if (types[i] == EN) {
j=i;
while (j >= 0) {
if (types[j] == AL) {
types[i] = AN;
break;
} else if (types[j] == R || types[j] == L) {
break;
}
j--;
}
}
}
/* Rule (W3)
* W3. Change all ALs to R.
*
* Optimization: on Rule Xn, we might set a flag on AL type
* to prevent this loop in L R lines only...
*/
for (i=0; i<count; i++) {
if (types[i] == AL)
types[i] = R;
}
/* Rule (W4)
* W4. A single European separator between two European numbers changes
* to a European number. A single common separator between two numbers
* of the same type changes to that type.
*/
for (i=1; i<(count-1); i++) {
if (types[i] == ES) {
if (types[i-1] == EN && types[i+1] == EN)
types[i] = EN;
} else if (types[i] == CS) {
if (types[i-1] == EN && types[i+1] == EN)
types[i] = EN;
else if (types[i-1] == AN && types[i+1] == AN)
types[i] = AN;
}
}
/* Rule (W5)
* W5. A sequence of European terminators adjacent to European numbers
* changes to all European numbers.
*
* Optimization: lots here... else ifs need rearrangement
*/
for (i=0; i<count; i++) {
if (types[i] == ET) {
if (i > 0 && types[i-1] == EN) {
types[i] = EN;
continue;
} else if (i < count-1 && types[i+1] == EN) {
types[i] = EN;
continue;
} else if (i < count-1 && types[i+1] == ET) {
j=i;
while (j <count && types[j] == ET) {
j++;
}
if (types[j] == EN)
types[i] = EN;
}
}
}
/* Rule (W6)
* W6. Otherwise, separators and terminators change to Other Neutral:
*/
for (i=0; i<count; i++) {
switch (types[i]) {
case ES:
case ET:
case CS:
types[i] = ON;
break;
}
}
/* Rule (W7)
* W7. Search backwards from each instance of a European number until
* the first strong type (R, L, or sor) is found. If an L is found,
* then change the type of the European number to L.
*/
for (i=0; i<count; i++) {
if (types[i] == EN) {
j=i;
while (j >= 0) {
if (types[j] == L) {
types[i] = L;
break;
} else if (types[j] == R || types[j] == AL) {
break;
}
j--;
}
}
}
/* Rule (N1)
* N1. A sequence of neutrals takes the direction of the surrounding
* strong text if the text on both sides has the same direction. European
* and Arabic numbers are treated as though they were R.
*/
if (count >= 2 && types[0] == ON) {
if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
types[0] = R;
else if (types[1] == L)
types[0] = L;
}
for (i=1; i<(count-1); i++) {
if (types[i] == ON) {
if (types[i-1] == L) {
j=i;
while (j<(count-1) && types[j] == ON) {
j++;
}
if (types[j] == L) {
while (i<j) {
types[i] = L;
i++;
}
}
} else if ((types[i-1] == R) ||
(types[i-1] == EN) ||
(types[i-1] == AN)) {
j=i;
while (j<(count-1) && types[j] == ON) {
j++;
}
if ((types[j] == R) ||
(types[j] == EN) ||
(types[j] == AN)) {
while (i<j) {
types[i] = R;
i++;
}
}
}
}
}
if (count >= 2 && types[count-1] == ON) {
if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
types[count-1] = R;
else if (types[count-2] == L)
types[count-1] = L;
}
/* Rule (N2)
* N2. Any remaining neutrals take the embedding direction.
*/
for (i=0; i<count; i++) {
if (types[i] == ON) {
if ((levels[i] % 2) == 0)
types[i] = L;
else
types[i] = R;
}
}
/* Rule (I1)
* I1. For all characters with an even (left-to-right) embedding
* direction, those of type R go up one level and those of type AN or
* EN go up two levels.
*/
for (i=0; i<count; i++) {
if ((levels[i] % 2) == 0) {
if (types[i] == R)
levels[i] += 1;
else if (types[i] == AN || types[i] == EN)
levels[i] += 2;
}
}
/* Rule (I2)
* I2. For all characters with an odd (right-to-left) embedding direction,
* those of type L, EN or AN go up one level.
*/
for (i=0; i<count; i++) {
if ((levels[i] % 2) == 1) {
if (types[i] == L || types[i] == EN || types[i] == AN)
levels[i] += 1;
}
}
/* Rule (L1)
* L1. On each line, reset the embedding level of the following characters
* to the paragraph embedding level:
* (1)segment separators, (2)paragraph separators,
* (3)any sequence of whitespace characters preceding
* a segment separator or paragraph separator,
* (4)and any sequence of white space characters
* at the end of the line.
* The types of characters used here are the original types, not those
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -