⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pdf_repair.c.svn-base

📁 SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧(只有800多KB)
💻 SVN-BASE
字号:
#include "fitz.h"#include "mupdf.h"/* * open pdf and scan objects to reconstruct xref table */struct entry{	int oid;	int gen;	int ofs;	int stmofs;	int stmlen;};static fz_error *parseobj(fz_stream *file, char *buf, int cap, int *stmofs, int *stmlen,		int *isroot, int *isinfo){	fz_error *error;	fz_obj *dict = nil;	fz_obj *length;	fz_obj *filter;	fz_obj *type;	pdf_token_e tok;	int len;	int n;	*stmlen = -1;	*isroot = 0;	*isinfo = 0;	error = pdf_lex(&tok, file, buf, cap, &len);	if (tok == PDF_TODICT)	{		error = pdf_parsedict(&dict, file, buf, cap);		if (error)			return fz_rethrow(error, "cannot parse object");	}	if (fz_isdict(dict))	{		type = fz_dictgets(dict, "Type");		if (fz_isname(type) && !strcmp(fz_toname(type), "Catalog"))			*isroot = 1;		filter = fz_dictgets(dict, "Filter");		if (fz_isname(filter) && !strcmp(fz_toname(filter), "Standard"))		{			fz_dropobj(dict);			return fz_throw("cannot repair encrypted files");		}		if (fz_dictgets(dict, "Producer"))			if (fz_dictgets(dict, "Creator"))				if (fz_dictgets(dict, "Title"))					*isinfo = 1;	}	while ( tok != PDF_TSTREAM &&			tok != PDF_TENDOBJ &&			tok != PDF_TERROR &&			tok != PDF_TEOF )	{		error = pdf_lex(&tok, file, buf, cap, &len);		if (error)			return fz_rethrow(error, "cannot scan for endobj or stream token");	}	if (tok == PDF_TSTREAM)	{		int c = fz_readbyte(file);		if (c == '\r') {			c = fz_peekbyte(file);			if (c == '\n')				fz_readbyte(file);		}		error = fz_readerror(file);		if (error)			return fz_rethrow(error, "cannot read from file");		*stmofs = fz_tell(file);		if (*stmofs < 0)			return fz_throw("cannot seek in file");		length = fz_dictgets(dict, "Length");		if (fz_isint(length))		{			error = fz_seek(file, *stmofs + fz_toint(length), 0);			if (error)				return fz_rethrow(error, "cannot seek in file");			error = pdf_lex(&tok, file, buf, cap, &len);			if (error)				return fz_rethrow(error, "cannot scan for endstream token");			if (tok == PDF_TENDSTREAM)				goto atobjend;			error = fz_seek(file, *stmofs, 0);			if (error)				return fz_rethrow(error, "cannot seek in file");		}		error = fz_read(&n, file, (unsigned char *) buf, 9);		if (error)			return fz_rethrow(error, "cannot read from file");		while (memcmp(buf, "endstream", 9) != 0)		{			c = fz_readbyte(file);			if (c == EOF)				break;			memmove(buf, buf + 1, 8);			buf[8] = c;		}		error = fz_readerror(file);		if (error)			return fz_rethrow(error, "cannot read from file");		*stmlen = fz_tell(file) - *stmofs - 9;atobjend:		error = pdf_lex(&tok, file, buf, cap, &len);		if (error)			return fz_rethrow(error, "cannot scan for endobj token");		if (tok == PDF_TENDOBJ)			;	}	if (dict)		fz_dropobj(dict);	return fz_okay;}fz_error *pdf_repairxref(pdf_xref *xref, char *filename){	fz_error *error;	fz_stream *file;	struct entry *list = nil;	int listlen;	int listcap;	int maxoid = 0;	char buf[65536];	int oid = 0;	int gen = 0;	int tmpofs, oidofs = 0, genofs = 0;	int isroot, rootoid = 0, rootgen = 0;	int isinfo, infooid = 0, infogen = 0;	int stmlen, stmofs = 0;	pdf_token_e tok;	int len;	int next;	int i;	error = fz_openrfile(&file, filename);	if (error)		return fz_rethrow(error, "cannot open file '%s'", filename);	pdf_logxref("repairxref '%s' %p\n", filename, xref);	xref->file = file;	/* TODO: extract version */	listlen = 0;	listcap = 1024;	list = fz_malloc(listcap * sizeof(struct entry));	if (!list)	{		error = fz_throw("outofmem: reparation object list");		goto cleanup;	}	while (1)	{		tmpofs = fz_tell(file);		if (tmpofs < 0)		{			error = fz_throw("cannot tell in file");			goto cleanup;		}		error = pdf_lex(&tok, file, buf, sizeof buf, &len);		if (error)		{			error = fz_rethrow(error, "cannot scan for objects");			goto cleanup;		}		if (tok == PDF_TINT)		{			oidofs = genofs;			oid = gen;			genofs = tmpofs;			gen = atoi(buf);		}		if (tok == PDF_TOBJ)		{			error = parseobj(file, buf, sizeof buf, &stmofs, &stmlen, &isroot, &isinfo);			if (error)			{				error = fz_rethrow(error, "cannot parse object");				goto cleanup;			}			if (isroot) {				pdf_logxref("found catalog: (%d %d R)\n", oid, gen);				rootoid = oid;				rootgen = gen;			}			if (isinfo) {				pdf_logxref("found info: (%d %d R)\n", oid, gen);				infooid = oid;				infogen = gen;			}			if (listlen + 1 == listcap)			{				struct entry *newlist;				listcap = listcap * 2;				newlist = fz_realloc(list, listcap * sizeof(struct entry));				if (!newlist) {					error = fz_throw("outofmem: resize reparation object list");					goto cleanup;				}				list = newlist;			}			list[listlen].oid = oid;			list[listlen].gen = gen;			list[listlen].ofs = oidofs;			list[listlen].stmofs = stmofs;			list[listlen].stmlen = stmlen;			listlen ++;			if (oid > maxoid)				maxoid = oid;		}		if (tok == PDF_TERROR)			fz_readbyte(file);		if (tok == PDF_TEOF)			break;	}	if (rootoid == 0)	{		error = fz_throw("cannot find catalog object");		goto cleanup;	}	error = fz_packobj(&xref->trailer,			"<< /Size %i /Root %r >>",			maxoid + 1, rootoid, rootgen);	if (error)	{		error = fz_rethrow(error, "cannot create new trailer");		goto cleanup;	}	xref->len = maxoid + 1;	xref->cap = xref->len;	xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry));	if (!xref->table)	{		error = fz_throw("outofmem: xref table");		goto cleanup;	}	xref->table[0].type = 'f';	xref->table[0].mark = 0;	xref->table[0].ofs = 0;	xref->table[0].gen = 65535;	xref->table[0].stmbuf = nil;	xref->table[0].stmofs = 0;	xref->table[0].obj = nil;	for (i = 1; i < xref->len; i++)	{		xref->table[i].type = 'f';		xref->table[i].mark = 0;		xref->table[i].ofs = 0;		xref->table[i].gen = 0;		xref->table[i].stmbuf = nil;		xref->table[i].stmofs = 0;		xref->table[i].obj = nil;	}	for (i = 0; i < listlen; i++)	{		xref->table[list[i].oid].type = 'n';		xref->table[list[i].oid].ofs = list[i].ofs;		xref->table[list[i].oid].gen = list[i].gen;		xref->table[list[i].oid].mark = 0;		xref->table[list[i].oid].stmofs = list[i].stmofs;		/* corrected stream length */		if (list[i].stmlen >= 0)		{			fz_obj *dict, *length;			pdf_logxref("correct stream length %d %d = %d\n",					list[i].oid, list[i].gen, list[i].stmlen);			error = pdf_loadobject(&dict, xref, list[i].oid, list[i].gen);			if (error)			{				error = fz_rethrow(error, "cannot load stream object");				goto cleanup;			}			error = fz_newint(&length, list[i].stmlen);			if (error)			{				fz_dropobj(dict);				error = fz_rethrow(error, "cannot create integer object");				goto cleanup;			}			error = fz_dictputs(dict, "Length", length);			if (error)			{				fz_dropobj(length);				fz_dropobj(dict);				error = fz_rethrow(error, "cannot update stream length");				goto cleanup;			}			error = pdf_updateobject(xref, list[i].oid, list[i].gen, dict);			if (error)			{				fz_dropobj(length);				fz_dropobj(dict);				error = fz_rethrow(error, "cannot update stream object");				goto cleanup;			}			fz_dropobj(length);			fz_dropobj(dict);		}	}	next = 0;	for (i = xref->len - 1; i >= 0; i--)	{		if (xref->table[i].type == 'f')		{			xref->table[i].ofs = next;			if (xref->table[i].gen < 65535)				xref->table[i].gen ++;			next = i;		}	}	fz_free(list);	return fz_okay;cleanup:	fz_dropstream(file);	xref->file = nil; /* don't keep the stale pointer */	fz_free(list);	return error; /* already rethrown */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -