⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 teckit_format.h

📁 Emdros is a text database middleware-layer aimed at storage and retrieval of "text plus information
💻 H
字号:
/*------------------------------------------------------------------------Copyright (C) 2002-2006 SIL International. All rights reserved.Distributable under the terms of either the Common Public License or theGNU Lesser General Public License, as specified in the LICENSING.txt file.File: TECkit_Format.hResponsibility: Jonathan KewLast reviewed: Not yet.Description:    Definitions used in the TECkit binary table format	2006-06-02	jk	added support for extended string rules (>255 per initial char)-------------------------------------------------------------------------*/#ifndef __TECkit_Format_H__#define __TECkit_Format_H__#include "TECkit_Common.h"#define kMagicNumber			0x714d6170	/* 'qMap' */#define kMagicNumberCmp			0x7a516d70	/* 'zQmp' */#define kFileVersion2_1			0x00020001	/* version before tables with ExtStringRules */#define kCurrentFileVersion		0x00030000	/* current version */#define kTableVersion2			0x00020000#define kCurrentTableVersion	0x00030000	/* actually, the engine doesn't check this,												it only looks at the file version */struct NameRec {	UInt16	nameID;	UInt16	nameLength;/*	Byte	data[nameLength];	pad to 2-byte boundary*/};struct FileHeader {	UInt32	type;					/* magic number = 'qMap' */	UInt32	version;				/* version = kFileCurrentVersion */	UInt32	headerLength;			/* length of this header including offset arrays and name records */	UInt32	formFlagsLHS;			/* flags for normalization form, Unicode/byte encoding on LHS of mapping */	UInt32	formFlagsRHS;			/* flags for normalization form, Unicode/byte encoding on RHS of mapping */	UInt32	numNames;				/* number of strings in the names table */	UInt32	numFwdTables;			/* number of tables in forward pipeline */	UInt32	numRevTables;			/* number of tables in reverse pipeline */#if 0	UInt32	nameOffsets[numNames];	/* offsets from FileHeader to each NameRec */	UInt32	fwdBase[numFwdTables];	/* offsets from FileHeader to forward tables */	UInt32	revBase[numRevTables];	/* offsets from FileHeader to reverse tables */	NameRec	names[numNames];		/* the name records */#endif};struct TableHeader {	UInt32	type;					/* type = 'B->B', 'B->U', 'U->B', 'U->U' */									/* or type = 'NFC ', 'NFD ', and no additional header fields are present */	UInt32	version;				/* version = kCurrentTableVersion */	UInt32	length;					/* total length of this table */	UInt32	flags;					/* flags:										0x00000001:	supplementary-plane Unicode characters supported in mapping and classes										0x00000002:	DBCS support (BB/BU tables only) in lookup table									*/	UInt32	pageBase;				/* offset from table header to page table (Ux tables) or dbcsPage table (Bx tables) */	UInt32	lookupBase;				/* offset from table header to lookup table(s) */	UInt32	matchClassBase;			/* offset from table header to match class definitions */	UInt32	repClassBase;			/* offset from table header to replacement class definitions */	UInt32	stringListBase;			/* offset from table header to string rule lists */	UInt32	stringRuleData;			/* offset from table header to string rule data */	UInt8	maxMatch;				/* max number of input code units matched by a rule */	UInt8	maxPre;					/* max number of input code units matched by pre-context */	UInt8	maxPost;				/* max number of input code units matched by post-context */	UInt8	maxOutput;				/* max number of output code units generated by a rule */	UInt32	replacementChar;		/* default output for unmapped codes */};#ifndef __cplusplustypedef struct TableHeader		TableHeader;#endif#define kTableType_BB				0x422d3e42#define kTableType_BU				0x422d3e55#define kTableType_UB				0x552d3e42#define kTableType_UU				0x552d3e55#define kTableType_NFC				0x4e464320#define kTableType_NFD				0x4e464420#define	kTableFlags_Supplementary	0x0001#define	kTableFlags_DBCS			0x0002union Lookup {	/* for any table when string rules are used */	struct {		UInt8	type;				/*										0xff: use string rules										0xfe: illegal DBCS trailing byte										0xfd: unmapped character: copy (BB/UU) or output default (UB/BU)										0x00-0x03: direct lookup									*/		UInt8	ruleCount;			/* number of rules for this code */		UInt16	ruleIndex;			/* index into stringList of start of rule list for this code */	}	rules;	/* for UB and BB tables with direct byte output */	struct {		UInt8	count;				/* count of bytes present in data[]: 0-3 */		UInt8	data[3];	}	bytes;	/* for BU and UU tables with direct Unicode output */	UInt32		usv;				/* unicode scalar value */};#ifndef __cplusplustypedef union Lookup			Lookup;#endif#define kLookupType_StringRules		0xff#define kLookupType_IllegalDBCS		0xfe#define kLookupType_Unmapped		0xfd#define kLookupType_RuleTypeMask		0xc0#define kLookupType_ExtStringRules		0x80#define kLookupType_ExtRuleCountMask	0x3f/*	/rules.ruleOffset/ points to an array of /rules.ruleCount/ UInt32 values which are the offsets	from stringRuleData to each rule to test for this character*/struct StringRule {	UInt8	matchLength;			/* length of match string in matchElements */	UInt8	postLength;				/* length of post-context in matchElements */	UInt8	preLength;				/* length of pre-context in matchElements */	UInt8	repLength;				/* length of replacement string in repElements */#if 0	MatchElem	matchString[];	MatchElem	postContext[];	MatchElem	preContext[];		/* reversed */	RepElem		repString[];#endif};#ifndef __cplusplustypedef struct StringRule		StringRule;#endifunion MatchElem {#ifdef __cplusplus				MatchElem()					{ }#endif	struct {		UInt8	repeat;				/* repeat count: (min << 4) + max */		UInt8	type;				/* 										0x80:	negate flag (not allowed with group)										0x40:	non-literal flag--if set, bits 0x3f indicate specific type (value must not be zero)												Note that if 'non-literal' flag is NOT set, remaining bits are not used as type code												but are part of a USV value (or must be set to zero for literal byte data).									*/		UInt16	reserved;	}	flags;	union {		struct {			UInt16		reserved;			UInt8		dNext;		/* offset to following OR or EGroup element */			UInt8		dAfter;		/* offset to element after the group for BGroup */		}	bgroup;		struct {			UInt16		reserved;			UInt8		dNext;		/* offset to following OR or EGroup element (for OR only) */			UInt8		dStart;		/* reverse offset to corresponding BGroup */		}	egroup;					/* (also used for OR elements) */		struct {			UInt16		reserved;			UInt16		index;		/* index of character class */		}	cls;		struct {			UInt8		reserved[3];			UInt8		data;		/* literal byte */		}	byte;		struct {			UInt32		data;		/* literal Unicode scalar: must mask with kUSVMask, as top bits overlap flags.repeat and "negate" bit in flags.type */		}	usv;	}	value;};#ifndef __cplusplustypedef union MatchElem			MatchElem;#endif#define	kMatchElem_Negate			0x80	/* negated test */#define kMatchElem_NonLit			0x40	/* test value is not a literal character; need to check type */#define kMatchElem_TypeMask			0x3f	/* Mask for type value. Note that type 0 must not be used (=literal) */#define	kMatchElem_Type_Class		0x01	/* class match */#define kMatchElem_Type_BGroup		0x02	/* begin group */#define kMatchElem_Type_EGroup		0x03	/* end group */#define	kMatchElem_Type_OR			0x04	/* special code: OR */#define	kMatchElem_Type_ANY			0x05	/* special code: ANY */#define	kMatchElem_Type_EOS			0x06	/* special code: EOS */#define kMatchElem_Type_Copy		0x07	/* copy matched item (invalid; for internal compiler use) */#define kUSVMask					0x001fffffunion RepElem {	struct {		UInt8	type;				/* see kRepElem_... below */		UInt8	matchIndex;			/* index of corresponding item in matchString for type == kRepElem_Class or kRepElem_Copy */		UInt16	repClass;			/* repClass if type == kRepElem_Class */	}	flags;	UInt32	value;					/* literal value (mask with kUSVMask) if flags.type == kRepElem_Literal */};#ifndef __cplusplustypedef union RepElem			RepElem;#endif#define kRepElem_Literal			0x00#define kRepElem_Class				kMatchElem_Type_Class#define kRepElem_Copy				kMatchElem_Type_Copy#define kRepElem_Unmapped			0x0f	/* used in default terminator rules */#endif	/* __TECkit_Format_H__ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -