hebrew-shaper.c

来自「GTK+-2.0源码之pango-1.15.6.tar.gz」· C语言代码 · 共 478 行
478 行
/* Pango * hebrew-shaper.c: * * Copyright (c) 2001 by Sun Microsystems, Inc. * Author: Chookij Vanatham <Chookij.Vanatham@Eng.Sun.COM> * * Hebrew points positioning improvements 2001 * Author: Dov Grobgeld <dov@imagic.weizmann.ac.il> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. * * Note March 9, 2003: I fixed a crash with regards to precomposed * characters, by wraping all of them to be considered as ALEF as * far as consideration about composability is concerned. The rendering * with regards to precomposed characters AND nikud comes out really * bad though, and should be fixed, once I have more time. */#include <config.h>#include <glib.h>#include "pango-engine.h"#include "hebrew-shaper.h"/* Wrap all characters above 0xF00 to ALEF. */#define ishebrew(wc)                    ((wc)>0x590 && (wc)<0x600)#define ucs2iso8859_8(wc)		((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10))#define iso8859_8_2uni(c)		((gunichar)(c) - 0x10 + 0x0590)#define MAX_CLUSTER_CHRS	256/* Define Hebrew character classes */#define _ND			0#define _SP			1#define _NS			(1<<1)#define	_DA			(1<<2)	/* only for dagesh... */#define	NoDefine		_ND#define	SpacingLetter		_SP#define	NonSpacingPunc		_NS/* Define Hebrew character types */#define	__ND			0#define	__SP			1#define	__NS			2#define	__DA			3/* Unicode definitions needed in logics below... */#define	UNI_ALEF                0x05D0#define	UNI_BET			0x05D1#define UNI_GIMMEL              0x05d2#define	UNI_DALED		0x05D3#define	UNI_KAF			0x05DB#define	UNI_FINAL_KAF           0x05DA#define UNI_VAV			0x05D5#define	UNI_YOD			0x05D9#define	UNI_RESH		0x05E8#define UNI_LAMED		0x05DC#define UNI_SHIN		0x05E9#define UNI_FINAL_PE		0x05E3#define UNI_PE			0x05E4#define UNI_QOF                 0x05E7#define	UNI_TAV			0x05EA#define UNI_SHIN_DOT		0x05C1#define UNI_SIN_DOT		0x05C2#define UNI_MAPIQ		0x05BC#define	UNI_SHEVA		0x05B0#define	UNI_HOLAM		0x05B9#define	UNI_QUBUTS		0x05BB#define UNI_HATAF_SEGOL         0x05B1#define UNI_HATAF_QAMATZ        0x05B3#define UNI_TSERE               0x05B5#define UNI_QAMATS              0x05B8#define UNI_QUBUTS              0x05BB/*======================================================================//  In the tables below all Hebrew characters are categorized to//  one of the following four classes:////      non used entries              Not defined  (ND)//      accents, points               Non spacing  (NS)//      punctuation and characters    Spacing characters (SP)//      dagesh                        "Dagesh"    (DA)//----------------------------------------------------------------------*/static const gint char_class_table[128] = {  /*       0,   1,   2,   3,   4,   5,   6,   7 */  /*00*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,  /*10*/ _ND, _NS, _NS, _NS, _NS, _NS, _NS, _NS,	 _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,  /*20*/ _NS, _NS, _ND, _NS, _NS, _NS, _NS, _NS,	 _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,  /*30*/ _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS,	 _NS, _NS, _ND, _NS, _DA, _NS, _SP, _NS,  /*40*/ _SP, _NS, _NS, _SP, _NS, _ND, _ND, _ND,	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,  /*50*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,	 _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,  /*60*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP,	 _SP, _SP, _SP, _ND, _ND, _ND, _ND, _ND,  /*70*/ _SP, _SP, _SP, _SP, _SP, _ND, _ND, _ND,	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,};static const gint char_type_table[128] = {  /*       0,   1,   2,   3,   4,   5,   6,   7 */  /*00*/ __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,	 __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,  /*10*/ __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS,	 __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,  /*20*/ __NS, __NS, __ND, __NS, __NS, __NS, __NS, __NS,	 __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,  /*30*/ __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,	 __NS, __NS, __ND, __NS, __DA, __NS, __SP, __NS,  /*40*/ __SP, __NS, __NS, __SP, __NS, __ND, __ND, __ND,	 __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,  /*50*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,	 __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,  /*60*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP,	 __SP, __SP, __SP, __ND, __ND, __ND, __ND, __ND,  /*70*/ __SP, __SP, __SP, __SP, __SP, __ND, __ND, __ND,	 __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,};/*======================================================================//  The following table answers the question whether two characters//  are composible or not. The decision is made by looking at the//  char_type_table values for the first character in a cluster//  vs a following charactrer. The only three combinations that//  are composible in Hebrew according to the table are:////     1. a spacing character followed by non-spacing character//     2. a spacing character followed by a dagesh.//     3. a dagesh followed by a non-spacing character.////  Note that a spacing character may be followed by several non-spacing//  accents, as the decision is always made on the base character of//  a combination.//----------------------------------------------------------------------*/static const gboolean compose_table[4][4] = {      /* Cn */ /*     0,     1,     2,     3, *//* Cn-1 00 */	{ FALSE, FALSE, FALSE, FALSE },  /* 10 */      { FALSE, FALSE,  TRUE,  TRUE },  /* 20 */      { FALSE, FALSE, FALSE, FALSE },  /* 30 */	{ FALSE, FALSE,  TRUE, FALSE },};/* Treat all characters above 0xF000 as characters */#define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600)#define is_char_class(wc, mask)	(char_class_table[ucs2iso8859_8 ((wc))] & (mask))#define	is_composible(cur_wc, nxt_wc)	(compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\						      [char_type_table[ucs2iso8859_8 (nxt_wc)]])G_CONST_RETURN char *hebrew_shaper_get_next_cluster(const char      *text,			       gint		length,			       gunichar        *cluster,			       gint	       *num_chrs){  const char *p;  gint n_chars = 0;  p = text;  while (p < text + length && n_chars < MAX_CLUSTER_CHRS)    {      gunichar current = g_utf8_get_char (p);      if (!ishebrew (current) ||	  (n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter))))	{	  /* Not a legal Hebrew cluster */	  if (n_chars == 0)	    {	      cluster[n_chars++] = current;	      p = g_utf8_next_char (p);	    }	  break;	}      else if (n_chars == 0 ||	       is_composible (cluster[0], current))	{	  cluster[n_chars++] = current;	  p = g_utf8_next_char (p);	}      else	break;    }  *num_chrs = n_chars;  return p;}voidhebrew_shaper_get_cluster_kerning(gunichar            *cluster,				  gint                cluster_length,				  PangoRectangle      ink_rect[],				  /* input and output */				  gint                width[],				  gint                x_offset[],				  gint                y_offset[]){  int i;  int base_ink_x_offset, base_ink_y_offset, base_ink_width, base_ink_height;  gunichar base_char = cluster[0];  x_offset[0] = 0;  y_offset[0] = 0;  if (cluster_length == 1)    {      /* Make lone 'vav dot' have zero width */      if (base_char == UNI_SHIN_DOT	  || base_char == UNI_SIN_DOT	  || base_char == UNI_HOLAM	  ) {	x_offset[0] = -ink_rect[0].x - ink_rect[0].width;	width[0] = 0;      }      return;    }  base_ink_x_offset = ink_rect[0].x;  base_ink_y_offset = ink_rect[0].y;  base_ink_width = ink_rect[0].width;  base_ink_height = ink_rect[0].height;  /* Do heuristics */  for (i=1; i<cluster_length; i++)    {      int gl = cluster[i];      x_offset[i] = 0;      y_offset[i] = 0;      /* Check if it is a point */      if (gl < 0x5B0 || gl >= 0x05D0)	continue;      /* Center dot of VAV */      if (gl == UNI_MAPIQ && base_char == UNI_VAV)	{	  x_offset[i] = base_ink_x_offset - ink_rect[i].x;	  /* If VAV is a vertical bar without a roof, then we	     need to make room for the dot by increasing the	     cluster width. But how can I check if that is the	     case??	  */	  /* This is wild, but it does the job of differentiating	     between two M$ fonts... Base the decision on the	     aspect ratio of the vav...	  */	  if (base_ink_height > base_ink_width * 3.5)	    {	      int j;	      double space = 0.7;	      double kern = 0.5;	      /* Shift all characters to make place for the mapiq */	      for (j=0; j<i; j++)		  x_offset[j] += ink_rect[i].width*(1+space-kern);	      width[cluster_length-1] += ink_rect[i].width*(1+space-kern);	      x_offset[i] -= ink_rect[i].width*(kern);	    }	}      /* Dot over SHIN */      else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN)	{	  x_offset[i] = base_ink_x_offset + base_ink_width	    - ink_rect[i].x - ink_rect[i].width;	}      /* Dot over SIN */      else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN)	{	  x_offset[i] = base_ink_x_offset - ink_rect[i].x;	}      /* VOWEL DOT above to any other character than	 SHIN or VAV should stick out a bit to the left. */      else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)	       && base_char != UNI_SHIN && base_char != UNI_VAV)	{	  x_offset[i] = base_ink_x_offset -ink_rect[i].x - ink_rect[i].width * 3/ 2;	}      /* VOWELS under resh or vav are right aligned, if they are	 narrower than the characters. Otherwise they are centered.       */      else if ((base_char == UNI_VAV		|| base_char == UNI_RESH		|| base_char == UNI_YOD		|| base_char == UNI_DALED		)	       && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) ||		   gl == UNI_QUBUTS)	       && ink_rect[i].width < base_ink_width	       )	{	  x_offset[i] = base_ink_x_offset + base_ink_width	    - ink_rect[i].x - ink_rect[i].width;	}      /* VOWELS under FINAL KAF are offset centered and offset in	 y */      else if ((base_char == UNI_FINAL_KAF		)	       && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) ||		   gl == UNI_QUBUTS))	{	  /* x are at 1/3 to take into accoun the stem */	  x_offset[i] = base_ink_x_offset - ink_rect[i].x	    + base_ink_width * 1/3 - ink_rect[i].width/2;	  /* Center in y */	  y_offset[i] = base_ink_y_offset - ink_rect[i].y	    + base_ink_height * 1/2 - ink_rect[i].height/2;	}      /* MAPIQ in PE or FINAL PE */      else if (gl == UNI_MAPIQ	       && (base_char == UNI_PE || base_char == UNI_FINAL_PE))	{	  x_offset[i]= base_ink_x_offset - ink_rect[i].x	    + base_ink_width * 2/3 - ink_rect[i].width/2;	  /* Another option is to offset the MAPIQ in y...	     glyphs->glyphs[cluster_start_idx+i].geometry.y_offset	     -= base_ink_height/5; */	}      /* MAPIQ in SHIN should be moved a bit to the right */      else if (gl == UNI_MAPIQ	       && base_char == UNI_SHIN)	{	  x_offset[i]=  base_ink_x_offset - ink_rect[i].x	    + base_ink_width * 3/5 - ink_rect[i].width/2;	}      /* MAPIQ in YUD is right aligned */      else if (gl == UNI_MAPIQ	       && base_char == UNI_YOD)	{	  x_offset[i]=  base_ink_x_offset - ink_rect[i].x;	  /* Lower left in y */	  y_offset[i] = base_ink_y_offset - ink_rect[i].y	    + base_ink_height - ink_rect[i].height*1.75;	  if (base_ink_height > base_ink_width * 2)	    {	      int j;	      double space = 0.7;	      double kern = 0.5;	      /* Shift all cluster characters to make space for mapiq */	      for (j=0; j<i; j++)		x_offset[j] += ink_rect[i].width*(1+space-kern);	      width[cluster_length-1] += ink_rect[i].width*(1+space-kern);	    }	}      /* VOWEL DOT next to any other character */      else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)	       && (base_char != UNI_VAV))	{	  x_offset[i] = base_ink_x_offset -ink_rect[i].x;	}      /* Move nikud of taf a bit ... */      else if (base_char == UNI_TAV && gl == UNI_MAPIQ)	{	  x_offset[i] = base_ink_x_offset - ink_rect[i].x	    + base_ink_width * 5/8 - ink_rect[i].width/2;	}      /* Move center dot of characters with a right stem and no	 left stem. */      else if (gl == UNI_MAPIQ &&	       (base_char == UNI_BET		|| base_char == UNI_DALED		|| base_char == UNI_KAF		|| base_char == UNI_GIMMEL		))	{	  x_offset[i] = base_ink_x_offset - ink_rect[i].x	    + base_ink_width * 3/8 - ink_rect[i].width/2;	}      /* Right align wide nikud under QOF */      else if (base_char == UNI_QOF &&	       ( (gl >= UNI_HATAF_SEGOL		  && gl <= UNI_HATAF_QAMATZ)		 || (gl >= UNI_TSERE		     && gl<= UNI_QAMATS)		 || (gl == UNI_QUBUTS)))	{	  x_offset[i] = base_ink_x_offset + base_ink_width	    - ink_rect[i].x - ink_rect[i].width;	}      /* Center by default */      else	{	  x_offset[i] = base_ink_x_offset - ink_rect[i].x	    + base_ink_width/2 - ink_rect[i].width/2;	}    }}voidhebrew_shaper_swap_range (PangoGlyphString *glyphs,			  int               start,			  int               end){  int i, j;  for (i = start, j = end - 1; i < j; i++, j--)    {      PangoGlyphInfo glyph_info;      gint log_cluster;      glyph_info = glyphs->glyphs[i];      glyphs->glyphs[i] = glyphs->glyphs[j];      glyphs->glyphs[j] = glyph_info;      log_cluster = glyphs->log_clusters[i];      glyphs->log_clusters[i] = glyphs->log_clusters[j];      glyphs->log_clusters[j] = log_cluster;    }}voidhebrew_shaper_bidi_reorder(PangoGlyphString *glyphs){  int start, end;  /* Swap all glyphs */  hebrew_shaper_swap_range (glyphs, 0, glyphs->num_glyphs);  /* Now reorder glyphs within each cluster back to LTR */  for (start = 0; start < glyphs->num_glyphs;)    {      end = start;      while (end < glyphs->num_glyphs &&	     glyphs->log_clusters[end] == glyphs->log_clusters[start])	end++;      hebrew_shaper_swap_range (glyphs, start, end);      start = end;    }}
hebrew-shaper.c - 源码说明

本页面展示了「GTK+-2.0源码之pango-1.15.6.tar.gz」中的 hebrew-shaper.c 源码文件，采用 C语言编程语言编写，共 478 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与pango相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?