📄 minibidi.c
字号:
/************************************************************************ * $Id: minibidi.c,v 1.1.2.1 2004/12/29 11:32:15 pekangas Exp $ * * ------------ * Description: * ------------ * This is an implemention of Unicode's Bidirectional Algorithm * (known as UAX #9). * * http://www.unicode.org/reports/tr9/ * * Author: Ahmad Khalifa * * ----------------- * Revision Details: (Updated by Revision Control System) * ----------------- * $Date: 2004/12/29 11:32:15 $ * $Author: pekangas $ * $Revision: 1.1.2.1 $ * $Source: /cygdrive/c/home/pekangas/.cvsroot/s2putty/putty/minibidi.c,v $ * * (www.arabeyes.org - under MIT license) * ************************************************************************//* * TODO: * ===== * - Explicit marks need to be handled (they are not 100% now) * - Ligatures */#include "minibidi.h"/* * Flips the text buffer, according to max level, and * all higher levels * * Input: * from: text buffer, on which to apply flipping * level: resolved levels buffer * max: the maximum level found in this line (should be unsigned char) * count: line size in bidi_char */void flipThisRun(bidi_char *from, unsigned char *level, int max, int count){ int i, j, rcount, tlevel; bidi_char temp; j = i = 0; while(i<count && j<count) { /* find the start of the run of level=max */ tlevel = max; i = j = findIndexOfRun(level, i, count, max); /* find the end of the run */ while(tlevel <= level[i] && i<count) { i++; } rcount = i-j; for(; rcount>((i-j)/2); rcount--) { temp = from[j+rcount-1]; from[j+rcount-1] = from[i-rcount]; from[i-rcount] = temp; } }}/* * Finds the index of a run with level equals tlevel */int findIndexOfRun(unsigned char* level , int start, int count, int tlevel){ int i; for(i=start; i<count; i++) { if(tlevel == level[i]) { return i; } } return count;}/* * Returns character type of ch, by calling RLE table lookup * function */unsigned char getType(wchar_t ch){ return getRLE(ch);}/* * The most significant 2 bits of each level are used to store * Override status of each character * This function sets the override bits of level according * to the value in override, and reurns the new byte. */unsigned char setOverrideBits(unsigned char level, unsigned char override){ if(override == ON) return level; else if(override == R) return level | OISR; else if(override == L) return level | OISL; return level;}/* Dont remember what this was used for :-) */unsigned char getPreviousLevel(unsigned char* level, int from){ unsigned char current; from--; current = level[from]; while(from>0 && level[from] == current) { from--; } return level[++from];}/* * Returns the first odd value greater than x */unsigned char leastGreaterOdd(unsigned char x){ if((x % 2) == 0) return x+1; else return x+2;}/* * Returns the first even value greater than x */unsigned char leastGreaterEven(unsigned char x){ if((x % 2) == 0) return x+2; else return x+1;}/* * Loops over the RLE_table array looking for the * type of ch */unsigned char getRLE(wchar_t ch){ int offset, i, freq; freq = offset = 0; for(i=0; i<0xFFFF; i++) { freq = ((RLENode*)RLE_table)[i].f; offset += freq; if(offset == ch) return ((RLENode*)RLE_table)[i].d; else if(offset > ch) return ((RLENode*)RLE_table)[i-1].d; } /* this is here to stop compiler nagging */ return ON;}/* The Main shaping function, and the only one to be used * by the outside world. * * line: buffer to apply shaping to. this must be passed by doBidi() first * to: output buffer for the shaped data * count: number of characters in line */int do_shape(bidi_char *line, bidi_char *to, int count){ int i, tempShape, ligFlag; for(ligFlag=i=0; i<count; i++) { to[i] = line[i]; tempShape = STYPE(line[i].wc); switch(tempShape ) { case SC: break; case SU: break; case SR: tempShape = STYPE(line[i+1].wc); if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SFINAL((SISOLATED(line[i].wc))); else to[i].wc = SISOLATED(line[i].wc); break; case SD: /* Make Ligatures */ tempShape = STYPE(line[i+1].wc); if(line[i].wc == 0x644) { switch(line[i-1].wc) { case 0x622: ligFlag = 1; if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF6; else to[i].wc = 0xFEF5; break; case 0x623: ligFlag = 1; if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEF8; else to[i].wc = 0xFEF7; break; case 0x625: ligFlag = 1; if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFA; else to[i].wc = 0xFEF9; break; case 0x627: ligFlag = 1; if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) to[i].wc = 0xFEFC; else to[i].wc = 0xFEFB; break; } if(ligFlag) { to[i-1].wc = 0x20; ligFlag = 0; break; } } if((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) { tempShape = STYPE(line[i-1].wc); if((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SMEDIAL( (SISOLATED(line[i].wc)) ); else to[i].wc = SFINAL((SISOLATED(line[i].wc))); break; } tempShape = STYPE(line[i-1].wc); if((tempShape == SR) || (tempShape == SD) || (tempShape == SC)) to[i].wc = SINITIAL((SISOLATED(line[i].wc))); else to[i].wc = SISOLATED(line[i].wc); break; } } return 1;}/* * The Main Bidi Function, and the only function that should * be used by the outside world. * * line: a buffer of size count containing text to apply * the Bidirectional algorithm to. */int do_bidi(bidi_char *line, int count){ unsigned char* types; unsigned char* levels; unsigned char paragraphLevel; unsigned char currentEmbedding; unsigned char currentOverride; unsigned char tempType; int i, j, imax, yes, bover; /* Check the presence of R or AL types as optimization */ yes = 0; for(i=0; i<count; i++) { if(getType(line[i].wc) == R || getType(line[i].wc) == AL) { yes = 1; break; } } if(yes == 0) return L; /* Initialize types, levels */ types = malloc(sizeof(unsigned char) * count); levels = malloc(sizeof(unsigned char) * count); /* Rule (P1) NOT IMPLEMENTED * P1. Split the text into separate paragraphs. A paragraph separator is * kept with the previous paragraph. Within each paragraph, apply all the * other rules of this algorithm. */ /* Rule (P2), (P3) * P2. In each paragraph, find the first character of type L, AL, or R. * P3. If a character is found in P2 and it is of type AL or R, then set * the paragraph embedding level to one; otherwise, set it to zero. */ paragraphLevel = 0; for( i=0; i<count ; i++) { if(getType(line[i].wc) == R || getType(line[i].wc) == AL) { paragraphLevel = 1; break; } else if(getType(line[i].wc) == L) break; } /* Rule (X1) * X1. Begin by setting the current embedding level to the paragraph * embedding level. Set the directional override status to neutral. */ currentEmbedding = paragraphLevel; currentOverride = ON; /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8) * X2. With each RLE, compute the least greater odd embedding level. * X3. With each LRE, compute the least greater even embedding level. * X4. With each RLO, compute the least greater odd embedding level. * X5. With each LRO, compute the least greater even embedding level. * X6. For all types besides RLE, LRE, RLO, LRO, and PDF: * a. Set the level of the current character to the current * embedding level. * b. Whenever the directional override status is not neutral, * reset the current character type to the directional * override status. * X7. With each PDF, determine the matching embedding or override code. * If there was a valid matching code, restore (pop) the last * remembered (pushed) embedding level and directional override. * X8. All explicit directional embeddings and overrides are completely * terminated at the end of each paragraph. Paragraph separators are not * included in the embedding. (Useless here) NOT IMPLEMENTED */ bover = 0; for( i=0; i<count; i++) { tempType = getType(line[i].wc); switch(tempType) { case RLE: currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding); levels[i] = setOverrideBits(levels[i], currentOverride); currentOverride = ON; break; case LRE: currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding); levels[i] = setOverrideBits(levels[i], currentOverride); currentOverride = ON; break; case RLO: currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding); tempType = currentOverride = R; bover = 1; break; case LRO: currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding); tempType = currentOverride = L; bover = 1; break; case PDF: currentEmbedding = getPreviousLevel(levels, i); currentOverride = currentEmbedding & OMASK; currentEmbedding = currentEmbedding & ~OMASK; levels[i] = currentEmbedding; break; /* Whitespace is treated as neutral for now */ case WS: case S: levels[i] = currentEmbedding; tempType = ON; if(currentOverride != ON) tempType = currentOverride; break; default: levels[i] = currentEmbedding; if(currentOverride != ON) tempType = currentOverride; break; } types[i] = tempType; } /* this clears out all overrides, so we can use levels safely... */ /* checks bover first */ if(bover) for( i=0; i<count; i++) levels[i] = levels[i] & LMASK; /* Rule (X9) * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes. * Here, they're converted to BN. */ for(i=0; i<count; i++) { switch(types[i]) { case RLE: case LRE: case RLO: case LRO: case PDF: types[i] = BN; break; } } /* Rule (W1) * W1. Examine each non-spacing mark (NSM) in the level run, and change * the type of the NSM to the type of the previous character. If the NSM * is at the start of the level run, it will get the type of sor. */ if(types[0] == NSM) types[0] = paragraphLevel; for(i=1; i<count; i++) { if(types[i] == NSM) types[i] = types[i-1]; /* Is this a safe assumption? * I assumed the previous, IS a character. */ } /* Rule (W2) * W2. Search backwards from each instance of a European number until the * first strong type (R, L, AL, or sor) is found. If an AL is found, * change the type of the European number to Arabic number. */ for(i=0; i<count; i++) { if(types[i] == EN) { j=i; while(j >= 0) { if(types[j] == AL) { types[i] = AN; break; }else if(types[j] == R || types[j] == L) { break; } j--; } } } /* Rule (W3) * W3. Change all ALs to R. * * Optimization: on Rule Xn, we might set a flag on AL type * to prevent this loop in L R lines only... */ for(i=0; i<count; i++) { if(types[i] == AL) types[i] = R; } /* Rule (W4) * W4. A single European separator between two European numbers changes * to a European number. A single common separator between two numbers * of the same type changes to that type. */ for( i=0; i<(count-1); i++) { if(types[i] == ES) { if(types[i-1] == EN && types[i+1] == EN) types[i] = EN; }else if(types[i] == CS) { if(types[i-1] == EN && types[i+1] == EN) types[i] = EN; else if(types[i-1] == AN && types[i+1] == AN) types[i] = AN; } } /* Rule (W5) * W5. A sequence of European terminators adjacent to European numbers * changes to all European numbers. * * Optimization: lots here... else ifs need rearrangement */ for(i=0; i<count; i++) { if(types[i] == ET) { if(types[i-1] == EN) { types[i] = EN; continue; }else if(types[i+1] == EN) { types[i] = EN; continue; }else if(types[i+1] == ET) { j=i; while(j <count && types[j] == ET) { j++; } if(types[j] == EN) types[i] = EN; } } } /* Rule (W6) * W6. Otherwise, separators and terminators change to Other Neutral: */ for(i=0; i<count; i++) { switch(types[i]) { case ES: case ET: case CS: types[i] = ON; break; } } /* Rule (W7) * W7. Search backwards from each instance of a European number until * the first strong type (R, L, or sor) is found. If an L is found, * then change the type of the European number to L. */ for(i=0; i<count; i++) { if(types[i] == EN) { j=i; while(j >= 0) { if(types[j] == L) { types[i] = L; break; } else if(types[j] == R || types[j] == AL) { break; } j--; } } } /* Rule (N1) * N1. A sequence of neutrals takes the direction of the surrounding * strong text if the text on both sides has the same direction. European * and Arabic numbers are treated as though they were R. */ if(types[0] == ON) { if((types[1] == R) || (types[1] == EN) || (types[1] == AN)) types[0] = R; else if(types[1] == L) types[0] = L; } for(i=1; i<(count-1); i++) { if(types[i] == ON) { if(types[i-1] == L) { j=i; while(j<(count-1) && types[j] == ON)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -