📄 teckit_format.h
字号:
/*------------------------------------------------------------------------Copyright (C) 2002-2006 SIL International. All rights reserved.Distributable under the terms of either the Common Public License or theGNU Lesser General Public License, as specified in the LICENSING.txt file.File: TECkit_Format.hResponsibility: Jonathan KewLast reviewed: Not yet.Description: Definitions used in the TECkit binary table format 2006-06-02 jk added support for extended string rules (>255 per initial char)-------------------------------------------------------------------------*/#ifndef __TECkit_Format_H__#define __TECkit_Format_H__#include "TECkit_Common.h"#define kMagicNumber 0x714d6170 /* 'qMap' */#define kMagicNumberCmp 0x7a516d70 /* 'zQmp' */#define kFileVersion2_1 0x00020001 /* version before tables with ExtStringRules */#define kCurrentFileVersion 0x00030000 /* current version */#define kTableVersion2 0x00020000#define kCurrentTableVersion 0x00030000 /* actually, the engine doesn't check this, it only looks at the file version */struct NameRec { UInt16 nameID; UInt16 nameLength;/* Byte data[nameLength]; pad to 2-byte boundary*/};struct FileHeader { UInt32 type; /* magic number = 'qMap' */ UInt32 version; /* version = kFileCurrentVersion */ UInt32 headerLength; /* length of this header including offset arrays and name records */ UInt32 formFlagsLHS; /* flags for normalization form, Unicode/byte encoding on LHS of mapping */ UInt32 formFlagsRHS; /* flags for normalization form, Unicode/byte encoding on RHS of mapping */ UInt32 numNames; /* number of strings in the names table */ UInt32 numFwdTables; /* number of tables in forward pipeline */ UInt32 numRevTables; /* number of tables in reverse pipeline */#if 0 UInt32 nameOffsets[numNames]; /* offsets from FileHeader to each NameRec */ UInt32 fwdBase[numFwdTables]; /* offsets from FileHeader to forward tables */ UInt32 revBase[numRevTables]; /* offsets from FileHeader to reverse tables */ NameRec names[numNames]; /* the name records */#endif};struct TableHeader { UInt32 type; /* type = 'B->B', 'B->U', 'U->B', 'U->U' */ /* or type = 'NFC ', 'NFD ', and no additional header fields are present */ UInt32 version; /* version = kCurrentTableVersion */ UInt32 length; /* total length of this table */ UInt32 flags; /* flags: 0x00000001: supplementary-plane Unicode characters supported in mapping and classes 0x00000002: DBCS support (BB/BU tables only) in lookup table */ UInt32 pageBase; /* offset from table header to page table (Ux tables) or dbcsPage table (Bx tables) */ UInt32 lookupBase; /* offset from table header to lookup table(s) */ UInt32 matchClassBase; /* offset from table header to match class definitions */ UInt32 repClassBase; /* offset from table header to replacement class definitions */ UInt32 stringListBase; /* offset from table header to string rule lists */ UInt32 stringRuleData; /* offset from table header to string rule data */ UInt8 maxMatch; /* max number of input code units matched by a rule */ UInt8 maxPre; /* max number of input code units matched by pre-context */ UInt8 maxPost; /* max number of input code units matched by post-context */ UInt8 maxOutput; /* max number of output code units generated by a rule */ UInt32 replacementChar; /* default output for unmapped codes */};#ifndef __cplusplustypedef struct TableHeader TableHeader;#endif#define kTableType_BB 0x422d3e42#define kTableType_BU 0x422d3e55#define kTableType_UB 0x552d3e42#define kTableType_UU 0x552d3e55#define kTableType_NFC 0x4e464320#define kTableType_NFD 0x4e464420#define kTableFlags_Supplementary 0x0001#define kTableFlags_DBCS 0x0002union Lookup { /* for any table when string rules are used */ struct { UInt8 type; /* 0xff: use string rules 0xfe: illegal DBCS trailing byte 0xfd: unmapped character: copy (BB/UU) or output default (UB/BU) 0x00-0x03: direct lookup */ UInt8 ruleCount; /* number of rules for this code */ UInt16 ruleIndex; /* index into stringList of start of rule list for this code */ } rules; /* for UB and BB tables with direct byte output */ struct { UInt8 count; /* count of bytes present in data[]: 0-3 */ UInt8 data[3]; } bytes; /* for BU and UU tables with direct Unicode output */ UInt32 usv; /* unicode scalar value */};#ifndef __cplusplustypedef union Lookup Lookup;#endif#define kLookupType_StringRules 0xff#define kLookupType_IllegalDBCS 0xfe#define kLookupType_Unmapped 0xfd#define kLookupType_RuleTypeMask 0xc0#define kLookupType_ExtStringRules 0x80#define kLookupType_ExtRuleCountMask 0x3f/* /rules.ruleOffset/ points to an array of /rules.ruleCount/ UInt32 values which are the offsets from stringRuleData to each rule to test for this character*/struct StringRule { UInt8 matchLength; /* length of match string in matchElements */ UInt8 postLength; /* length of post-context in matchElements */ UInt8 preLength; /* length of pre-context in matchElements */ UInt8 repLength; /* length of replacement string in repElements */#if 0 MatchElem matchString[]; MatchElem postContext[]; MatchElem preContext[]; /* reversed */ RepElem repString[];#endif};#ifndef __cplusplustypedef struct StringRule StringRule;#endifunion MatchElem {#ifdef __cplusplus MatchElem() { }#endif struct { UInt8 repeat; /* repeat count: (min << 4) + max */ UInt8 type; /* 0x80: negate flag (not allowed with group) 0x40: non-literal flag--if set, bits 0x3f indicate specific type (value must not be zero) Note that if 'non-literal' flag is NOT set, remaining bits are not used as type code but are part of a USV value (or must be set to zero for literal byte data). */ UInt16 reserved; } flags; union { struct { UInt16 reserved; UInt8 dNext; /* offset to following OR or EGroup element */ UInt8 dAfter; /* offset to element after the group for BGroup */ } bgroup; struct { UInt16 reserved; UInt8 dNext; /* offset to following OR or EGroup element (for OR only) */ UInt8 dStart; /* reverse offset to corresponding BGroup */ } egroup; /* (also used for OR elements) */ struct { UInt16 reserved; UInt16 index; /* index of character class */ } cls; struct { UInt8 reserved[3]; UInt8 data; /* literal byte */ } byte; struct { UInt32 data; /* literal Unicode scalar: must mask with kUSVMask, as top bits overlap flags.repeat and "negate" bit in flags.type */ } usv; } value;};#ifndef __cplusplustypedef union MatchElem MatchElem;#endif#define kMatchElem_Negate 0x80 /* negated test */#define kMatchElem_NonLit 0x40 /* test value is not a literal character; need to check type */#define kMatchElem_TypeMask 0x3f /* Mask for type value. Note that type 0 must not be used (=literal) */#define kMatchElem_Type_Class 0x01 /* class match */#define kMatchElem_Type_BGroup 0x02 /* begin group */#define kMatchElem_Type_EGroup 0x03 /* end group */#define kMatchElem_Type_OR 0x04 /* special code: OR */#define kMatchElem_Type_ANY 0x05 /* special code: ANY */#define kMatchElem_Type_EOS 0x06 /* special code: EOS */#define kMatchElem_Type_Copy 0x07 /* copy matched item (invalid; for internal compiler use) */#define kUSVMask 0x001fffffunion RepElem { struct { UInt8 type; /* see kRepElem_... below */ UInt8 matchIndex; /* index of corresponding item in matchString for type == kRepElem_Class or kRepElem_Copy */ UInt16 repClass; /* repClass if type == kRepElem_Class */ } flags; UInt32 value; /* literal value (mask with kUSVMask) if flags.type == kRepElem_Literal */};#ifndef __cplusplustypedef union RepElem RepElem;#endif#define kRepElem_Literal 0x00#define kRepElem_Class kMatchElem_Type_Class#define kRepElem_Copy kMatchElem_Type_Copy#define kRepElem_Unmapped 0x0f /* used in default terminator rules */#endif /* __TECkit_Format_H__ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -