📄 indic-ot-class-tables.c
字号:
* FIXME: eyelash RA only for Devanagari?? */#define DEVA_SCRIPT_FLAGS (SF_EYELASH_RA | SF_NO_POST_BASE_LIMIT)#define BENG_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT)#define GURU_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)#define GUJR_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)#define ORYA_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT)#define TAML_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)#define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3)#define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3)#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_PROCESS_ZWJ)#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_PROCESS_ZWJ)/* * Indic Class Tables *//* Add a little macro to compute lastChar based on size of the charClasses * table */#define INDIC_OT_CLASS_TABLE_DEFINE(name, firstChar, worstCaseExpansion, scriptFlags, charClasses, splitMatraTable) \ const IndicOTClassTable name = {firstChar, firstChar + G_N_ELEMENTS (charClasses) - 1, \ worstCaseExpansion, scriptFlags, charClasses, splitMatraTable}INDIC_OT_CLASS_TABLE_DEFINE (deva_class_table, 0x0900, 2, DEVA_SCRIPT_FLAGS, devaCharClasses, NULL);INDIC_OT_CLASS_TABLE_DEFINE (beng_class_table, 0x0980, 3, BENG_SCRIPT_FLAGS, bengCharClasses, bengSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (guru_class_table, 0x0A00, 2, GURU_SCRIPT_FLAGS, guruCharClasses, NULL);INDIC_OT_CLASS_TABLE_DEFINE (gujr_class_table, 0x0A80, 2, GUJR_SCRIPT_FLAGS, gujrCharClasses, NULL);INDIC_OT_CLASS_TABLE_DEFINE (orya_class_table, 0x0B00, 3, ORYA_SCRIPT_FLAGS, oryaCharClasses, oryaSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (taml_class_table, 0x0B80, 3, TAML_SCRIPT_FLAGS, tamlCharClasses, tamlSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (telu_class_table, 0x0C00, 3, TELU_SCRIPT_FLAGS, teluCharClasses, teluSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (knda_class_table, 0x0C80, 4, KNDA_SCRIPT_FLAGS, kndaCharClasses, kndaSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (mlym_class_table, 0x0D00, 3, MLYM_SCRIPT_FLAGS, mlymCharClasses, mlymSplitTable);INDIC_OT_CLASS_TABLE_DEFINE (sinh_class_table, 0x0D80, 4, SINH_SCRIPT_FLAGS, sinhCharClasses, sinhSplitTable);const IndicOTSplitMatra *indic_ot_get_split_matra(const IndicOTClassTable *class_table, IndicOTCharClass char_class){ gint32 index = (char_class & CF_INDEX_MASK) >> CF_INDEX_SHIFT; return &class_table->splitMatraTable[index - 1];}gboolean indic_ot_is_vm_above(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_VM_ABOVE(char_class);}gboolean indic_ot_is_vm_post(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_VM_POST(char_class);}gboolean indic_ot_is_consonant(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_CONSONANT(char_class);}gboolean indic_ot_is_reph(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_REPH(char_class);}gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return (IS_VIRAMA(char_class) || IS_AL_LAKUNA(char_class));}gboolean indic_ot_is_al_lakuna(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_AL_LAKUNA(char_class);}gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_NUKTA(char_class);}gboolean indic_ot_is_vattu(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_VATTU(char_class);}gboolean indic_ot_is_matra(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_MATRA(char_class);}gboolean indic_ot_is_split_matra(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_SPLIT_MATRA(char_class);}gboolean indic_ot_is_m_pre(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_M_PRE(char_class);}gboolean indic_ot_is_m_below(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_M_BELOW(char_class);}gboolean indic_ot_is_m_above(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_M_ABOVE(char_class);}gboolean indic_ot_is_m_post(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_M_POST(char_class);}gboolean indic_ot_is_length_mark(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return IS_LENGTH_MARK(char_class);}gboolean indic_ot_has_post_or_below_base_form(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return HAS_POST_OR_BELOW_BASE_FORM(char_class);}gboolean indic_ot_has_post_base_form(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return HAS_POST_BASE_FORM(char_class);}gboolean indic_ot_has_below_base_form(const IndicOTClassTable *class_table, gunichar ch){ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); return HAS_BELOW_BASE_FORM(char_class);}IndicOTCharClass indic_ot_get_char_class(const IndicOTClassTable *class_table, gunichar ch){ if (ch == C_SIGN_ZWJ) { return CF_CONSONANT | CC_ZERO_WIDTH_MARK; } if (ch == C_SIGN_ZWNJ) { return CC_ZERO_WIDTH_MARK; } if (ch < class_table->firstChar || ch > class_table->lastChar) { return CC_RESERVED; } return class_table->charClasses[ch - class_table->firstChar];}static const gint8 stateTable[][CC_COUNT] ={/* xx ma mp iv ct cn nu dv vr zw al */ { 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 1}, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {-1, 6, 1, -1, -1, -1, -1, 5, 4, -1, -1}, {-1, 6, 1, -1, -1, -1, 2, 5, 4, 10, 9}, {-1, -1, -1, -1, 3, 2, -1, -1, -1, 8, -1}, {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1}, {-1, 7, 1, -1, -1, -1, -1, -1, -1, -1, -1}, {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, {-1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1}, {-1, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1}, {-1, -1, -1, -1, -1, -1, -1, -1, 8, -1, 8}};glong indic_ot_find_syllable(const IndicOTClassTable *class_table, const gunichar *chars, glong prev, glong char_count){ glong cursor = prev; gint8 state = 0; while (cursor < char_count) { IndicOTCharClass char_class = indic_ot_get_char_class(class_table, chars[cursor]); state = stateTable[state][char_class & CF_CLASS_MASK]; /*for the components of split matra*/ if ((char_count >= cursor + 3) && (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF && chars[cursor + 2] == 0x0DCA)) { /*for 3 split matra of Sinhala*/ return cursor + 3; } else if ((char_count >= cursor + 3) && (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2 && chars[cursor + 2] == 0x0CD5)) { /*for 3 split matra of Kannada*/ return cursor + 3; } /*for 2 split matra*/ else if (char_count >= cursor + 2) { /*for Bengali*/ if ((chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09BE) || (chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09D7) || /*for Oriya*/ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B3E) || (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B56) || (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B57) || /*for Tamil*/ (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BBE) || (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BD7) || (chars[cursor] == 0x0BC7 && chars[cursor + 1] == 0x0BBE) || /*for Malayalam*/ (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D3E) || (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D57) || (chars[cursor] == 0x0D47 && chars[cursor + 1] == 0x0D3E) || /*for Sinhala*/ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCA) || (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF) || (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DDF) || (chars[cursor] == 0x0DDC && chars[cursor + 1] == 0x0DCA) || /*for Telugu*/ (chars[cursor] == 0x0C46 && chars[cursor + 1] == 0x0C56) || /*for Kannada*/ (chars[cursor] == 0x0CBF && chars[cursor + 1] == 0x0CD5) || (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD5) || (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD6) || (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2) || (chars[cursor] == 0x0CCA && chars[cursor + 1] == 0x0CD5)) return cursor + 2; } if (state < 0) { break; } cursor += 1; } return cursor;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -