pdftool.c

来自「SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧(只有800多K」· C语言 代码 · 共 1,217 行 · 第 1/2 页

C
1,217
字号
/* * Swiss army knife for manipulating and debugging PDFs. * * There are a few major modes of operation: * *   show -- pretty-print objects and streams *   draw -- render pages to bitmap *   clean -- simple rewrite of pdf file *   edit -- edit pages (impose and copy operations) */#include "fitz.h"#include "mupdf.h"#ifdef _MSC_VER#include <winsock2.h>#else#include <sys/time.h>#endif/* put these up here so we can clean up in die() */fz_renderer *drawgc = nil;void closesrc(void);/* * Common operations. * Parse page selectors. * Load and decrypt a PDF file. * Select pages. */char *srcname = "(null)";pdf_xref *src = nil;pdf_outline *srcoutline = nil;pdf_pagetree *srcpages = nil;void die(fz_error *eo){	fflush(stdout);	fz_printerror(eo);	fz_droperror(eo);	fflush(stderr);	if (drawgc)		fz_droprenderer(drawgc);	closesrc();	abort();}void closesrc(void){	if (srcpages)	{		pdf_droppagetree(srcpages);		srcpages = nil;	}	if (src)	{		if (src->store)		{			pdf_dropstore(src->store);			src->store = nil;		}		pdf_closexref(src);		src = nil;	}	srcname = nil;}void opensrc(char *filename, char *password, int loadpages){	fz_error *error;	fz_obj *obj;	closesrc();	srcname = filename;	error = pdf_newxref(&src);	if (error)		die(error);	error = pdf_loadxref(src, filename);	if (error)	{		fz_printerror(error);		fz_droperror(error);		fz_warn("trying to repair");		error = pdf_repairxref(src, filename);		if (error)			die(error);	}	error = pdf_decryptxref(src);	if (error)		die(error);	if (src->crypt)	{		int okay = pdf_setpassword(src->crypt, password);		if (!okay)			die(fz_throw("invalid password"));	}	if (loadpages)	{		error = pdf_loadpagetree(&srcpages, src);		if (error)			die(error);	}	/* TODO: move into mupdf lib, see pdfapp_open in pdfapp.c */	obj = fz_dictgets(src->trailer, "Root");	if (!obj)		die(error);	error = pdf_loadindirect(&src->root, src, obj);	if (error)		die(error);	obj = fz_dictgets(src->trailer, "Info");	if (obj)	{		error = pdf_loadindirect(&src->info, src, obj);		if (error)			die(error);	}	error = pdf_loadnametrees(src);	if (error)		die(error);	error = pdf_loadoutline(&srcoutline, src);	if (error)		die(error);}void preloadobjstms(void){	fz_error *error;	fz_obj *obj;	int i;	for (i = 0; i < src->len; i++)	{		if (src->table[i].type == 'o')		{			error = pdf_loadobject(&obj, src, i, 0);			if (error) die(error);			fz_dropobj(obj);		}	}}/* --------------------------------------------------------------------- *//* * Debug print parts of the PDF. */int showbinary = 0;int showdecode = 0;int showcolumn;void showusage(void){	fprintf(stderr, "usage: mupdftool show [-bd] <file> [xref] [trailer] [object numbers]\n");	fprintf(stderr, "  -b  \tprint streams as raw binary data\n");	fprintf(stderr, "  -d  \tdecode streams\n");	exit(1);}void showtrailer(void){	if (!src)		die(fz_throw("no file specified"));	printf("trailer\n");	fz_debugobj(src->trailer);	printf("\n");}void showxref(void){	if (!src)		die(fz_throw("no file specified"));	pdf_debugxref(src);	printf("\n");}void showsafe(unsigned char *buf, int n){	int i;	for (i = 0; i < n; i++) {		if (buf[i] == '\r' || buf[i] == '\n') {			putchar('\n');			showcolumn = 0;		}		else if (buf[i] < 32 || buf[i] > 126) {			putchar('.');			showcolumn ++;		}		else {			putchar(buf[i]);			showcolumn ++;		}		if (showcolumn == 79) {			putchar('\n');			showcolumn = 0;		}	}}void showstream(int num, int gen){	fz_error *error;	fz_stream *stm;	unsigned char buf[2048];	int n;	showcolumn = 0;	if (showdecode)		error = pdf_openstream(&stm, src, num, gen);	else		error = pdf_openrawstream(&stm, src, num, gen);	if (error)		die(error);	while (1)	{		error = fz_read(&n, stm, buf, sizeof buf);		if (error)			die(error);		if (n == 0)			break;		if (showbinary)			fwrite(buf, 1, n, stdout);		else			showsafe(buf, n);	}	fz_dropstream(stm);}void showobject(int num, int gen){	fz_error *error;	fz_obj *obj;	if (!src)		die(fz_throw("no file specified"));	error = pdf_loadobject(&obj, src, num, gen);	if (error)		die(error);	printf("%d %d obj\n", num, gen);	fz_debugobj(obj);	if (pdf_isstream(src, num, gen))	{		printf("stream\n");		showstream(num, gen);		printf("endstream\n");	}	printf("endobj\n\n");	fz_dropobj(obj);}voidshowmain(int argc, char **argv){	int c;	while ((c = getopt(argc, argv, "bd")) != -1)	{		switch (c)		{		case 'b': showbinary ++; break;		case 'd': showdecode ++; break;		default:			  showusage();			  break;		}	}	if (optind == argc)		showusage();	opensrc(argv[optind++], "", 0);	if (optind == argc)		showtrailer();	while (optind < argc)	{		if (!strcmp(argv[optind], "trailer"))			showtrailer();		else if (!strcmp(argv[optind], "xref"))			showxref();		else			showobject(atoi(argv[optind]), 0);		optind++;	}}/* --------------------------------------------------------------------- *//* * Clean tool. * Rewrite PDF. * Garbage collect. * Decompress streams. * Encrypt or decrypt. */voidcleanusage(void){	fprintf(stderr,			"usage: mupdftool clean [options] input.pdf [outfile.pdf]\n"			"  -d -\tpassword for decryption\n"			"  -g  \tgarbage collect unused objects\n"			"  -x  \texpand compressed streams\n"			"  -e  \tencrypt output\n"			"    -u -\tset user password for encryption\n"			"    -o -\tset owner password\n"			"    -p -\tset permissions (combine letters 'pmca')\n"			"    -n -\tkey length in bits: 40 <= n <= 128\n");	exit(1);}voidcleanexpand(void){	fz_error *error;	fz_obj *stmobj;	fz_buffer *buf;	fz_obj *stmlen;	int i, gen;	for (i = 0; i < src->len; i++)	{		if (src->table[i].type == 'n')		{			gen = src->table[i].gen;			if (pdf_isstream(src, i, gen))			{				error = pdf_loadobject(&stmobj, src, i, gen);				if (error) die(error);				error = pdf_loadstream(&buf, src, i, gen);				if (error) die(error);				fz_dictdels(stmobj, "Filter");				fz_dictdels(stmobj, "DecodeParms");				error = fz_newint(&stmlen, buf->wp - buf->rp);				if (error) die(error);				error = fz_dictputs(stmobj, "Length", stmlen);				if (error) die(error);				fz_dropobj(stmlen);				pdf_updateobject(src, i, gen, stmobj);				pdf_updatestream(src, i, gen, buf);				fz_dropobj(stmobj);			}		}	}}voidcleanmain(int argc, char **argv){	int doencrypt = 0;	int dogarbage = 0;	int doexpand = 0;	pdf_crypt *encrypt = nil;	char *infile;	char *outfile = "out.pdf";	char *userpw = "";	char *ownerpw = "";	unsigned perms = 0xfffff0c0;	/* nothing allowed */	int keylen = 40;	char *password = "";	fz_error *error;	int c;	while ((c = getopt(argc, argv, "d:egn:o:p:u:x")) != -1)	{		switch (c)		{		case 'p':			/* see TABLE 3.15 User access permissions */			perms = 0xfffff0c0;			if (strchr(optarg, 'p')) /* print */				perms |= (1 << 2) | (1 << 11);			if (strchr(optarg, 'm')) /* modify */				perms |= (1 << 3) | (1 << 10);			if (strchr(optarg, 'c')) /* copy */				perms |= (1 << 4) | (1 << 9);			if (strchr(optarg, 'a')) /* annotate / forms */				perms |= (1 << 5) | (1 << 8);			break;		case 'd': password = optarg; break;		case 'e': doencrypt ++; break;		case 'g': dogarbage ++; break;		case 'n': keylen = atoi(optarg); break;		case 'o': ownerpw = optarg; break;		case 'u': userpw = optarg; break;		case 'x': doexpand ++; break;		default: cleanusage(); break;		}	}	if (argc - optind < 1)		cleanusage();	infile = argv[optind++];	if (argc - optind > 0)		outfile = argv[optind++];	opensrc(infile, password, 0);	if (doencrypt)	{		fz_obj *id = fz_dictgets(src->trailer, "ID");		if (!id)		{			error = fz_packobj(&id, "[(ABCDEFGHIJKLMNOP)(ABCDEFGHIJKLMNOP)]");			if (error)				die(error);		}		else			fz_keepobj(id);		error = pdf_newencrypt(&encrypt, userpw, ownerpw, perms, keylen, id);		if (error)			die(error);		fz_dropobj(id);	}	if (doexpand)		cleanexpand();	if (dogarbage)	{		preloadobjstms();		pdf_garbagecollect(src);	}	error = pdf_savexref(src, outfile, encrypt);	if (error)		die(error);	if (encrypt)		pdf_dropcrypt(encrypt);	pdf_closexref(src);}/* --------------------------------------------------------------------- *//* * Draw pages to PPM bitmaps. */enum { DRAWPNM, DRAWTXT, DRAWXML };struct benchmark{    int pages;    long min;    int minpage;    long avg;    long max;    int maxpage;};int drawmode = DRAWPNM;char *drawpattern = nil;pdf_page *drawpage = nil;float drawzoom = 1.0;int drawrotate = 0;int drawbands = 1;int drawcount = 0;int benchmark = 0;voiddrawusage(void){	fprintf(stderr,			"usage: mupdftool draw [options] [file.pdf pages ... ]\n"			"  -b -\tdraw page in N bands\n"			"  -d -\tpassword for decryption\n"			"  -o -\tpattern (%%d for page number) for output file\n"			"  -r -\tresolution in dpi\n"			"  -t  \tutf-8 text output instead of graphics\n"			"  -x  \txml dump of display tree\n"			"  -m  \tprint benchmark results\n"			"  example:\n"			"    mupdftool draw -o out%%03d.pnm a.pdf 1-3,5,9-\n");	exit(1);}voidgettime(long *time_){    struct timeval tv;    if (gettimeofday(&tv, NULL) < 0)	    abort();    *time_ = tv.tv_sec * 1000000 + tv.tv_usec;}voiddrawloadpage(int pagenum, struct benchmark *loadtimes){	fz_error *error;	fz_obj *pageobj;	long start;	long end;	long elapsed;	char *basename;	basename = strrchr(srcname, '/');	if (!basename)	    basename = srcname;	else	    basename ++;	fprintf(stderr, "draw %s:%03d ", basename, pagenum);	if (benchmark && loadtimes)	{		fflush(stderr);		gettime(&start);	}	pageobj = pdf_getpageobject(srcpages, pagenum - 1);	error = pdf_loadpage(&drawpage, src, pageobj);	if (error)		die(error);	if (benchmark && loadtimes)	{	    gettime(&end);	    elapsed = end - start;	    if (elapsed < loadtimes->min)	    {		loadtimes->min = elapsed;		loadtimes->minpage = pagenum;	    }	    if (elapsed > loadtimes->max)	    {		loadtimes->max = elapsed;		loadtimes->maxpage = pagenum;	    }	    loadtimes->avg += elapsed;	    loadtimes->pages++;	}	if (benchmark)		fflush(stderr);}voiddrawfreepage(void){	pdf_droppage(drawpage);	drawpage = nil;	/* Flush resources between pages.	 * TODO: should check memory usage before deciding to do this.	 */	if (src && src->store)	{		fflush(stderr);		/* pdf_debugstore(src->store); */		pdf_emptystore(src->store);	}}voiddrawpnm(int pagenum, struct benchmark *loadtimes, struct benchmark *drawtimes){	fz_error *error;	fz_matrix ctm;	fz_irect bbox;	fz_pixmap *pix;	char name[256];	char pnmhdr[256];	int i, x, y, w, h, b, bh;	int fd = -1;	long start;	long end;	long elapsed;	fz_md5 digest;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?