⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdf_cmap.c.svn-base

📁 SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧(只有800多KB)
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
/* * The CMap data structure here is constructed on the fly by * adding simple range-to-range mappings. Then the data structure * is optimized to contain both range-to-range and range-to-table * lookups. * * Any one-to-many mappings are inserted as one-to-table * lookups in the beginning, and are not affected by the optimization * stage. * * There is a special function to add a 256-length range-to-table mapping. * The ranges do not have to be added in order. * * This code can be a lot simpler if we don't care about wasting memory, * or can trust the parser to give us optimal mappings. */#include "fitz.h"#include "mupdf.h"typedef struct pdf_range_s pdf_range;enum { MAXCODESPACE = 10 };enum { SINGLE, RANGE, TABLE, MULTI };struct pdf_range_s{	int low;	int high;	int flag;	/* what kind of lookup is this */	int offset;	/* either range-delta or table-index */};static intcmprange(const void *va, const void *vb){	return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;}struct pdf_cmap_s{	int refs;	int staticdata;	char cmapname[32];	char usecmapname[32];	pdf_cmap *usecmap;	int wmode;	int ncspace;	struct {		int n;		unsigned char lo[4];		unsigned char hi[4];	} cspace[MAXCODESPACE];	int rlen, rcap;	pdf_range *ranges;	int tlen, tcap;	int *table;};/* * Allocate, destroy and simple parameters. */fz_error *pdf_newcmap(pdf_cmap **cmapp){	pdf_cmap *cmap;	cmap = *cmapp = fz_malloc(sizeof(pdf_cmap));	if (!cmap)		return fz_throw("outofmem: cmap struct");	cmap->refs = 1;	cmap->staticdata = 0;	strcpy(cmap->cmapname, "");	strcpy(cmap->usecmapname, "");	cmap->usecmap = nil;	cmap->wmode = 0;	cmap->ncspace = 0;	cmap->rlen = 0;	cmap->rcap = 0;	cmap->ranges = nil;	cmap->tlen = 0;	cmap->tcap = 0;	cmap->table = nil;	return fz_okay;}pdf_cmap *pdf_keepcmap(pdf_cmap *cmap){	cmap->refs ++;	return cmap;}voidpdf_dropcmap(pdf_cmap *cmap){	if (--cmap->refs == 0)	{		if (cmap->usecmap)			pdf_dropcmap(cmap->usecmap);		if (!cmap->staticdata)		{			fz_free(cmap->ranges);			fz_free(cmap->table);		}		fz_free(cmap);	}}pdf_cmap *pdf_getusecmap(pdf_cmap *cmap){	return cmap->usecmap;}voidpdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap){	int i;	if (cmap->usecmap)		pdf_dropcmap(cmap->usecmap);	cmap->usecmap = pdf_keepcmap(usecmap);	if (cmap->ncspace == 0)	{		cmap->ncspace = usecmap->ncspace;		for (i = 0; i < usecmap->ncspace; i++)			cmap->cspace[i] = usecmap->cspace[i];	}}intpdf_getwmode(pdf_cmap *cmap){	return cmap->wmode;}voidpdf_setwmode(pdf_cmap *cmap, int wmode){	cmap->wmode = wmode;}voidpdf_debugcmap(pdf_cmap *cmap){	int i, k, n;	printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmapname);	if (cmap->usecmapname[0])		printf("  usecmap /%s\n", cmap->usecmapname);	if (cmap->usecmap)		printf("  usecmap $%p\n", (void *) cmap->usecmap);	printf("  wmode %d\n", cmap->wmode);	printf("  codespaces {\n");	for (i = 0; i < cmap->ncspace; i++)	{		printf("    <");		for (k = 0; k < cmap->cspace[i].n; k++)			printf("%02x", cmap->cspace[i].lo[k]);		printf("> <");		for (k = 0; k < cmap->cspace[i].n; k++)			printf("%02x", cmap->cspace[i].hi[k]);		printf(">\n");	}	printf("  }\n");	printf("  ranges (%d,%d) {\n", cmap->rlen, cmap->tlen);	for (i = 0; i < cmap->rlen; i++)	{		pdf_range *r = &cmap->ranges[i];		printf("    <%04x> <%04x> ", r->low, r->high);		if (r->flag == TABLE)		{			printf("[ ");			for (k = 0; k < r->high - r->low + 1; k++)				printf("%d ", cmap->table[r->offset + k]);			printf("]\n");		}		else if (r->flag == MULTI)		{			printf("< ");			n = cmap->table[r->offset];			for (k = 0; k < n; k++)				printf("%04x ", cmap->table[r->offset + 1 + k]);			printf(">\n");		}		else			printf("%d\n", r->offset);	}	printf("  }\n}\n");}/* * Add a codespacerange section. * These ranges are used by pdf_decodecmap to decode * multi-byte encoded strings. */fz_error *pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n){	int i;	assert(!cmap->staticdata);	if (cmap->ncspace + 1 == MAXCODESPACE)		return fz_throw("assert: too many code space ranges");	cmap->cspace[cmap->ncspace].n = n;	for (i = 0; i < n; i++)	{		int o = (n - i - 1) * 8;		cmap->cspace[cmap->ncspace].lo[i] = (lo >> o) & 0xFF;		cmap->cspace[cmap->ncspace].hi[i] = (hi >> o) & 0xFF;	}	cmap->ncspace ++;	return fz_okay;}/* * Add an integer to the table. */static fz_error *addtable(pdf_cmap *cmap, int value){	assert(!cmap->staticdata);	if (cmap->tlen + 1 > cmap->tcap)	{		int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2;		int *newtable = fz_realloc(cmap->table, newcap * sizeof(int));		if (!newtable)			return fz_throw("outofmem: cmap table");		cmap->tcap = newcap;		cmap->table = newtable;	}	cmap->table[cmap->tlen++] = value;	return fz_okay;}/* * Add a range. */static fz_error *addrange(pdf_cmap *cmap, int low, int high, int flag, int offset){	assert(!cmap->staticdata);	if (cmap->rlen + 1 > cmap->rcap)	{		pdf_range *newranges;		int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2;		newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range));		if (!newranges)			return fz_throw("outofmem: cmap ranges");		cmap->rcap = newcap;		cmap->ranges = newranges;	}	cmap->ranges[cmap->rlen].low = low;	cmap->ranges[cmap->rlen].high = high;	cmap->ranges[cmap->rlen].flag = flag;	cmap->ranges[cmap->rlen].offset = offset;	cmap->rlen ++;	return fz_okay;}/* * Add a range-to-table mapping. */fz_error *pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len){	fz_error *error;	int offset;	int high;	int i;	high = low + len;	offset = cmap->tlen;	for (i = 0; i < len; i++)	{		error = addtable(cmap, table[i]);		if (error)			return fz_rethrow(error, "cannot add range-to-table index");	}	error = addrange(cmap, low, high, TABLE, offset);	if (error)		return fz_rethrow(error, "cannot add range-to-table range");	return fz_okay;}/* * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25) */fz_error *pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset){	fz_error *error;	error = addrange(cmap, low, high, high - low == 0 ? SINGLE : RANGE, offset);	if (error)		return fz_rethrow(error, "cannot add range-to-range mapping");	return fz_okay;}/* * Add a single one-to-many mapping. */fz_error *pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len){	fz_error *error;	int offset;	int i;	if (len == 1)	{		error = addrange(cmap, low, low, SINGLE, values[0]);		if (error)			return fz_rethrow(error, "cannot add one-to-one mapping");		return fz_okay;	}	offset = cmap->tlen;	error = addtable(cmap, len);	if (error)		return fz_rethrow(error, "cannot add one-to-many table length");	for (i = 0; i < len; i++)	{		error = addtable(cmap, values[i]);		if (error)			return fz_rethrow(error, "cannot add one-to-many table index");	}	error = addrange(cmap, low, low, MULTI, offset);	if (error)		return fz_rethrow(error, "cannot add one-to-many mapping");	return fz_okay;}/* * Sort the input ranges. * Merge contiguous input ranges to range-to-range if the output is contiguous. * Merge contiguous input ranges to range-to-table if the output is random. */fz_error *pdf_sortcmap(pdf_cmap *cmap){	fz_error *error;	pdf_range *newranges;	int *newtable;	pdf_range *a;			/* last written range on output */	pdf_range *b;			/* current range examined on input */	assert(!cmap->staticdata);	if (cmap->rlen == 0)		return fz_okay;	qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);	a = cmap->ranges;	b = cmap->ranges + 1;	while (b < cmap->ranges + cmap->rlen)	{		/* ignore one-to-many mappings */		if (b->flag == MULTI)		{			*(++a) = *b;		}		/* input contiguous */		else if (a->high + 1 == b->low)		{			/* output contiguous */			if (a->high - a->low + a->offset + 1 == b->offset)			{				/* SR -> R and SS -> R and RR -> R and RS -> R */				if (a->flag == SINGLE || a->flag == RANGE)				{					a->flag = RANGE;					a->high = b->high;				}				/* LS -> L */				else if (a->flag == TABLE && b->flag == SINGLE)				{					a->high = b->high;					error = addtable(cmap, b->offset);					if (error)						return fz_rethrow(error, "cannot convert LS -> L");				}				/* LR -> LR */				else if (a->flag == TABLE && b->flag == RANGE)				{					*(++a) = *b;				}				/* XX -> XX */				else				{					*(++a) = *b;				}			}			/* output separated */			else			{				/* SS -> L */				if (a->flag == SINGLE && b->flag == SINGLE)				{					a->flag = TABLE;					a->high = b->high;					error = addtable(cmap, a->offset);					if (error)						return fz_rethrow(error, "cannot convert SS -> L");					error = addtable(cmap, b->offset);					if (error)						return fz_rethrow(error, "cannot convert SS -> L");					a->offset = cmap->tlen - 2;				}				/* LS -> L */				else if (a->flag == TABLE && b->flag == SINGLE)				{					a->high = b->high;					error = addtable(cmap, b->offset);					if (error)						return fz_rethrow(error, "cannot convert LS -> L");				}				/* XX -> XX */				else				{					*(++a) = *b;				}			}		}		/* input separated: XX -> XX */		else		{			*(++a) = *b;		}		b ++;	}	cmap->rlen = a - cmap->ranges + 1;	newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range));	if (!newranges)		return fz_throw("outofmem: cmap ranges");	cmap->rcap = cmap->rlen;	cmap->ranges = newranges;	if (cmap->tlen)	{		newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int));		if (!newtable)			return fz_throw("outofmem: cmap table");		cmap->tcap = cmap->tlen;		cmap->table = newtable;	}	return fz_okay;}/* * Lookup the mapping of a codepoint. */intpdf_lookupcmap(pdf_cmap *cmap, int cpt){	int l = 0;	int r = cmap->rlen - 1;	int m;	while (l <= r)	{		m = (l + r) >> 1;		if (cpt < cmap->ranges[m].low)			r = m - 1;		else if (cpt > cmap->ranges[m].high)			l = m + 1;		else		{			int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;			if (cmap->ranges[m].flag == TABLE)				return cmap->table[i];			if (cmap->ranges[m].flag == MULTI)				return -1;			return i;		}	}	if (cmap->usecmap)		return pdf_lookupcmap(cmap->usecmap, cpt);	return -1;}/* * Use the codespace ranges to extract a codepoint from a * multi-byte encoded string. */unsigned char *pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt){	int i, k;	for (k = 0; k < cmap->ncspace; k++)	{		unsigned char *lo = cmap->cspace[k].lo;		unsigned char *hi = cmap->cspace[k].hi;		int n = cmap->cspace[k].n;		int c = 0;		for (i = 0; i < n; i++)		{			if (lo[i] <= buf[i] && buf[i] <= hi[i])				c = (c << 8) | buf[i];			else				break;		}		if (i == n) {			*cpt = c;			return buf + n;		}	}	*cpt = 0;	return buf + 1;}/* * CMap parser */enum{	TUSECMAP = PDF_NTOKENS,	TBEGINCODESPACERANGE,	TENDCODESPACERANGE,	TBEGINBFCHAR,	TENDBFCHAR,	TBEGINBFRANGE,	TENDBFRANGE,	TBEGINCIDCHAR,	TENDCIDCHAR,	TBEGINCIDRANGE,	TENDCIDRANGE};static pdf_token_e tokenfromkeyword(char *key){	if (!strcmp(key, "usecmap")) return TUSECMAP;	if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE;	if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE;	if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR;	if (!strcmp(key, "endbfchar")) return TENDBFCHAR;	if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE;	if (!strcmp(key, "endbfrange")) return TENDBFRANGE;	if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR;	if (!strcmp(key, "endcidchar")) return TENDCIDCHAR;	if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE;	if (!strcmp(key, "endcidrange")) return TENDCIDRANGE;	return PDF_TKEYWORD;}static int codefromstring(char *buf, int len){	int a = 0;	while (len--)		a = (a << 8) | *buf++;	return a;}static fz_error *lexcmap(pdf_token_e *tok, fz_stream *file, char *buf, int n, int *sl){	fz_error *error;	error = pdf_lex(tok, file, buf, n, sl);	if (error)		return fz_rethrow(error, "cannot parse cmap token");	if (*tok == PDF_TKEYWORD)		*tok = tokenfromkeyword(buf);	return fz_okay;}static fz_error *parsecmapname(pdf_cmap *cmap, fz_stream *file){	fz_error *error;	char buf[256];	pdf_token_e tok;	int len;	error = lexcmap(&tok, file, buf, sizeof buf, &len);	if (error)		return fz_rethrow(error, "syntaxerror in cmap");	if (tok == PDF_TNAME)	{		strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname));		return fz_okay;	}	return fz_throw("expected name");}static fz_error *parsewmode(pdf_cmap *cmap, fz_stream *file){	fz_error *error;	char buf[256];	pdf_token_e tok;	int len;	error = lexcmap(&tok, file, buf, sizeof buf, &len);	if (error)		return fz_rethrow(error, "syntaxerror in cmap");	if (tok == PDF_TINT)	{		pdf_setwmode(cmap, atoi(buf));		return fz_okay;	}	return fz_throw("expected integer");}static fz_error *parsecodespacerange(pdf_cmap *cmap, fz_stream *file){	fz_error *error;	char buf[256];	pdf_token_e tok;	int len;	int lo, hi;	while (1)	{		error = lexcmap(&tok, file, buf, sizeof buf, &len);		if (error)			return fz_rethrow(error, "syntaxerror in cmap");		if (tok == TENDCODESPACERANGE)			return fz_okay;		else if (tok == PDF_TSTRING)		{			lo = codefromstring(buf, len);			error = lexcmap(&tok, file, buf, sizeof buf, &len);			if (error)				return fz_rethrow(error, "syntaxerror in cmap");			if (tok == PDF_TSTRING)			{				hi = codefromstring(buf, len);				error = pdf_addcodespace(cmap, lo, hi, len);				if (error)					return fz_rethrow(error, "cannot add code space");			}			else break;		}		else break;	}	return fz_throw("expected string or endcodespacerange");}static fz_error *parsecidrange(pdf_cmap *cmap, fz_stream *file){	fz_error *error;	char buf[256];	pdf_token_e tok;	int len;	int lo, hi, dst;	while (1)	{		error = lexcmap(&tok, file, buf, sizeof buf, &len);		if (error)			return fz_rethrow(error, "syntaxerror in cmap");		if (tok == TENDCIDRANGE)			return fz_okay;		else if (tok != PDF_TSTRING)			return fz_throw("expected string or endcidrange");		lo = codefromstring(buf, len);		error = lexcmap(&tok, file, buf, sizeof buf, &len);		if (error)			return fz_rethrow(error, "syntaxerror in cmap");		if (tok != PDF_TSTRING)			return fz_throw("expected string");		hi = codefromstring(buf, len);		error = lexcmap(&tok, file, buf, sizeof buf, &len);		if (error)			return fz_rethrow(error, "syntaxerror in cmap");		if (tok != PDF_TINT)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -