📄 minibidi.c

📁 大名鼎鼎的远程登录软件putty的Symbian版源码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/************************************************************************ * $Id: minibidi.c,v 1.1.2.1 2004/12/29 11:32:15 pekangas Exp $ * * ------------ * Description: * ------------ * This is an implemention of Unicode's Bidirectional Algorithm * (known as UAX #9). * *   http://www.unicode.org/reports/tr9/ *  * Author: Ahmad Khalifa * * ----------------- * Revision Details:    (Updated by Revision Control System) * ----------------- *  $Date: 2004/12/29 11:32:15 $ *  $Author: pekangas $ *  $Revision: 1.1.2.1 $ *  $Source: /cygdrive/c/home/pekangas/.cvsroot/s2putty/putty/minibidi.c,v $ * * (www.arabeyes.org - under MIT license) * ************************************************************************//* * TODO: * ===== * - Explicit marks need to be handled (they are not 100% now) * - Ligatures */#include "minibidi.h"/* * Flips the text buffer, according to max level, and * all higher levels *  * Input: * from: text buffer, on which to apply flipping * level: resolved levels buffer * max: the maximum level found in this line (should be unsigned char) * count: line size in bidi_char */void flipThisRun(bidi_char *from, unsigned char *level, int max, int count){    int i, j, rcount, tlevel;    bidi_char temp;    j = i = 0;    while(i<count && j<count)    {	/* find the start of the run of level=max */	tlevel = max;	i = j = findIndexOfRun(level, i, count, max);	/* find the end of the run */	while(tlevel <= level[i] && i<count)	{	    i++;	}	rcount = i-j;	for(; rcount>((i-j)/2); rcount--)	{	    temp = from[j+rcount-1];	    from[j+rcount-1] = from[i-rcount];	    from[i-rcount] = temp;	}    }}/* * Finds the index of a run with level equals tlevel */int findIndexOfRun(unsigned char* level , int start, int count, int tlevel){    int i;    for(i=start; i<count; i++)    {	if(tlevel == level[i])	{	    return i;	}    }    return count;}/* * Returns character type of ch, by calling RLE table lookup * function */unsigned char getType(wchar_t ch){    return getRLE(ch);}/* * The most significant 2 bits of each level are used to store * Override status of each character * This function sets the override bits of level according * to the value in override, and reurns the new byte. */unsigned char setOverrideBits(unsigned char level, unsigned char override){    if(override == ON)	return level;    else if(override == R)	return level | OISR;    else if(override == L)	return level | OISL;    return level;}/* Dont remember what this was used for :-) */unsigned char getPreviousLevel(unsigned char* level, int from){    unsigned char current;    from--;    current = level[from];    while(from>0 && level[from] == current)    {	from--;    }    return level[++from];}/* * Returns the first odd value greater than x */unsigned char leastGreaterOdd(unsigned char x){    if((x % 2) == 0)	return x+1;    else	return x+2;}/* * Returns the first even value greater than x */unsigned char leastGreaterEven(unsigned char x){    if((x % 2) == 0)	return x+2;    else	return x+1;}/* * Loops over the RLE_table array looking for the * type of ch */unsigned char getRLE(wchar_t ch){    int offset, i, freq;    freq = offset = 0;    for(i=0; i<0xFFFF; i++)    {	freq = ((RLENode*)RLE_table)[i].f;	offset += freq;	if(offset == ch)	    return ((RLENode*)RLE_table)[i].d;	else if(offset > ch)	    return ((RLENode*)RLE_table)[i-1].d;    }    /* this is here to stop compiler nagging */    return ON;}/* The Main shaping function, and the only one to be used * by the outside world. * * line: buffer to apply shaping to. this must be passed by doBidi() first * to: output buffer for the shaped data * count: number of characters in line */int do_shape(bidi_char *line, bidi_char *to, int count){    int i, tempShape, ligFlag;    for(ligFlag=i=0; i<count; i++)    {	to[i] = line[i];	tempShape = STYPE(line[i].wc);	switch(tempShape )	{	  case SC:	    break;	  case SU:	    break;	  case SR:	    tempShape = STYPE(line[i+1].wc);	    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))		to[i].wc = SFINAL((SISOLATED(line[i].wc)));	    else		to[i].wc = SISOLATED(line[i].wc);	    break;	  case SD:	    /* Make Ligatures */	    tempShape = STYPE(line[i+1].wc);	    if(line[i].wc == 0x644)	    {		switch(line[i-1].wc)		{		  case 0x622:		    ligFlag = 1;		    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))			to[i].wc = 0xFEF6;		    else			to[i].wc = 0xFEF5;		    break;		  case 0x623:		    ligFlag = 1;		    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))			to[i].wc = 0xFEF8;		    else			to[i].wc = 0xFEF7;		    break;		  case 0x625:		    ligFlag = 1;		    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))			to[i].wc = 0xFEFA;		    else			to[i].wc = 0xFEF9;		    break;		  case 0x627:		    ligFlag = 1;		    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))			to[i].wc = 0xFEFC;		    else			to[i].wc = 0xFEFB;		    break;		}		if(ligFlag)		{		    to[i-1].wc = 0x20;		    ligFlag = 0;		    break;		}	    }	    if((tempShape == SL) || (tempShape == SD) || (tempShape == SC))	    {		tempShape = STYPE(line[i-1].wc);		if((tempShape == SR) || (tempShape == SD) || (tempShape == SC))		    to[i].wc = SMEDIAL( (SISOLATED(line[i].wc)) );		else		    to[i].wc = SFINAL((SISOLATED(line[i].wc)));		break;	    }	    tempShape = STYPE(line[i-1].wc);	    if((tempShape == SR) || (tempShape == SD) || (tempShape == SC))		to[i].wc = SINITIAL((SISOLATED(line[i].wc)));	    else		to[i].wc = SISOLATED(line[i].wc);	    break;	}    }    return 1;}/* * The Main Bidi Function, and the only function that should * be used by the outside world. * * line: a buffer of size count containing text to apply * the Bidirectional algorithm to. */int do_bidi(bidi_char *line, int count){    unsigned char* types;    unsigned char* levels;    unsigned char paragraphLevel;    unsigned char currentEmbedding;    unsigned char currentOverride;    unsigned char tempType;    int i, j, imax, yes, bover;    /* Check the presence of R or AL types as optimization */    yes = 0;    for(i=0; i<count; i++)    {	if(getType(line[i].wc) == R || getType(line[i].wc) == AL)	{	    yes = 1;	    break;	}    }    if(yes == 0)	return L;    /* Initialize types, levels */    types = malloc(sizeof(unsigned char) * count);    levels = malloc(sizeof(unsigned char) * count);    /* Rule (P1)  NOT IMPLEMENTED     * P1. Split the text into separate paragraphs. A paragraph separator is     * kept with the previous paragraph. Within each paragraph, apply all the     * other rules of this algorithm.     */    /* Rule (P2), (P3)     * P2. In each paragraph, find the first character of type L, AL, or R.     * P3. If a character is found in P2 and it is of type AL or R, then set     * the paragraph embedding level to one; otherwise, set it to zero.     */    paragraphLevel = 0;    for( i=0; i<count ; i++)    {	if(getType(line[i].wc) == R || getType(line[i].wc) == AL)	{	    paragraphLevel = 1;	    break;	}	else if(getType(line[i].wc) == L)	    break;    }    /* Rule (X1)     * X1. Begin by setting the current embedding level to the paragraph     * embedding level. Set the directional override status to neutral.     */    currentEmbedding = paragraphLevel;    currentOverride = ON;    /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)     * X2. With each RLE, compute the least greater odd embedding level.     * X3. With each LRE, compute the least greater even embedding level.     * X4. With each RLO, compute the least greater odd embedding level.     * X5. With each LRO, compute the least greater even embedding level.     * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:     *		a. Set the level of the current character to the current     *		    embedding level.     *		b.  Whenever the directional override status is not neutral,     *               reset the current character type to the directional     *               override status.     * X7. With each PDF, determine the matching embedding or override code.     * If there was a valid matching code, restore (pop) the last     * remembered (pushed) embedding level and directional override.     * X8. All explicit directional embeddings and overrides are completely     * terminated at the end of each paragraph. Paragraph separators are not     * included in the embedding. (Useless here) NOT IMPLEMENTED     */    bover = 0;    for( i=0; i<count; i++)    {	tempType = getType(line[i].wc);	switch(tempType)	{	  case RLE:	    currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);	    levels[i] = setOverrideBits(levels[i], currentOverride);	    currentOverride = ON;	    break;	  case LRE:	    currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);	    levels[i] = setOverrideBits(levels[i], currentOverride);	    currentOverride = ON;	    break;	  case RLO:	    currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);	    tempType = currentOverride = R;	    bover = 1;	    break;	  case LRO:	    currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);	    tempType = currentOverride = L;	    bover = 1;	    break;	  case PDF:	    currentEmbedding = getPreviousLevel(levels, i);	    currentOverride = currentEmbedding & OMASK;	    currentEmbedding = currentEmbedding & ~OMASK;	    levels[i] = currentEmbedding;	    break;	    /* Whitespace is treated as neutral for now */	  case WS:	  case S:	    levels[i] = currentEmbedding;	    tempType = ON;	    if(currentOverride != ON)		tempType = currentOverride;	    break;	  default:	    levels[i] = currentEmbedding;	    if(currentOverride != ON)		tempType = currentOverride;	    break;	}	types[i] = tempType;    }    /* this clears out all overrides, so we can use levels safely... */    /* checks bover first */    if(bover)	for( i=0; i<count; i++)	    levels[i] = levels[i] & LMASK;    /* Rule (X9)     * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.     * Here, they're converted to BN.     */    for(i=0; i<count; i++)    {	switch(types[i])	{	  case RLE:	  case LRE:	  case RLO:	  case LRO:	  case PDF:	    types[i] = BN;	    break;	}    }    /* Rule (W1)     * W1. Examine each non-spacing mark (NSM) in the level run, and change     * the type of the NSM to the type of the previous character. If the NSM     * is at the start of the level run, it will get the type of sor.     */    if(types[0] == NSM)	types[0] = paragraphLevel;    for(i=1; i<count; i++)    {	if(types[i] == NSM)	    types[i] = types[i-1];	/* Is this a safe assumption?	 * I assumed the previous, IS a character.	 */    }    /* Rule (W2)     * W2. Search backwards from each instance of a European number until the     * first strong type (R, L, AL, or sor) is found.  If an AL is found,     * change the type of the European number to Arabic number.     */    for(i=0; i<count; i++)    {	if(types[i] == EN)	{	    j=i;	    while(j >= 0)	    {		if(types[j] == AL)		{		    types[i] = AN;		    break;		}else if(types[j] == R || types[j] == L)		    {			break;		    }		j--;	    }	}    }    /* Rule (W3)     * W3. Change all ALs to R.     *     * Optimization: on Rule Xn, we might set a flag on AL type     * to prevent this loop in L R lines only...     */    for(i=0; i<count; i++)    {	if(types[i] == AL)	    types[i] = R;    }    /* Rule (W4)     * W4. A single European separator between two European numbers changes     * to a European number. A single common separator between two numbers     * of the same type changes to that type.     */    for( i=0; i<(count-1); i++)    {	if(types[i] == ES)	{	    if(types[i-1] == EN && types[i+1] == EN)		types[i] = EN;	}else if(types[i] == CS)	    {		if(types[i-1] == EN && types[i+1] == EN)		    types[i] = EN;		else if(types[i-1] == AN && types[i+1] == AN)		    types[i] = AN;	    }    }    /* Rule (W5)     * W5. A sequence of European terminators adjacent to European numbers     * changes to all European numbers.     *     * Optimization: lots here... else ifs need rearrangement     */    for(i=0; i<count; i++)    {	if(types[i] == ET)	{	    if(types[i-1] == EN)	    {		types[i] = EN;		continue;	    }else if(types[i+1] == EN)		{		    types[i] = EN;		    continue;		}else if(types[i+1] == ET)		    {			j=i;			while(j <count && types[j] == ET)			{			    j++;			}			if(types[j] == EN)			    types[i] = EN;		    }	}    }    /* Rule (W6)     * W6. Otherwise, separators and terminators change to Other Neutral:     */    for(i=0; i<count; i++)    {	switch(types[i])	{	  case ES:	  case ET:	  case CS:	    types[i] = ON;	    break;	}    }    /* Rule (W7)     * W7. Search backwards from each instance of a European number until     * the first strong type (R, L, or sor) is found. If an L is found,     * then change the type of the European number to L.     */    for(i=0; i<count; i++)    {	if(types[i] == EN)	{	    j=i;	    while(j >= 0)	    {		if(types[j] == L)		{		    types[i] = L;		    break;		}		else if(types[j] == R || types[j] == AL)		{		    break;		}		j--;	    }	}    }    /* Rule (N1)     * N1. A sequence of neutrals takes the direction of the surrounding     * strong text if the text on both sides has the same direction. European     * and Arabic numbers are treated as though they were R.     */    if(types[0] == ON)    {	if((types[1] == R) || (types[1] == EN) || (types[1] == AN))	    types[0] = R;	else if(types[1] == L)	    types[0] = L;    }    for(i=1; i<(count-1); i++)    {	if(types[i] == ON)	{	    if(types[i-1] == L)	    {		j=i;		while(j<(count-1) && types[j] == ON)
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -