📄 pdf_open.c
字号:
#include "fitz.h"#include "mupdf.h"static inline int iswhite(int ch){ return ch == '\000' || ch == '\011' || ch == '\012' || ch == '\014' || ch == '\015' || ch == '\040';}/* * magic version tag and startxref */static fz_error *loadversion(pdf_xref *xref){ fz_error *error; char buf[20]; error = fz_seek(xref->file, 0, 0); if (error) return fz_rethrow(error, "cannot seek to beginning of file"); error = fz_readline(xref->file, buf, sizeof buf); if (error) return fz_rethrow(error, "cannot read version marker"); if (memcmp(buf, "%PDF-", 5) != 0) return fz_throw("cannot recognize version marker"); xref->version = (int) (atof(buf + 5) * 10.0 + 0.5); pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); return fz_okay;}static fz_error *readstartxref(pdf_xref *xref){ fz_error *error; unsigned char buf[1024]; int t, n; int i; error = fz_seek(xref->file, 0, 2); if (error) return fz_rethrow(error, "cannot seek to end of file"); t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf)); error = fz_seek(xref->file, t, 0); if (error) return fz_rethrow(error, "cannot seek to offset %d", t); error = fz_read(&n, xref->file, buf, sizeof buf); if (error) return fz_rethrow(error, "cannot read from file"); for (i = n - 9; i >= 0; i--) { if (memcmp(buf + i, "startxref", 9) == 0) { i += 9; while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); return fz_okay; } } return fz_throw("cannot find startxref");}/* * trailer dictionary */static fz_error *readoldtrailer(pdf_xref *xref, char *buf, int cap){ fz_error *error; int ofs, len; char *s; int n; int t; pdf_token_e tok; int c; pdf_logxref("load old xref format trailer\n"); error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref marker"); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref count"); s = buf; ofs = atoi(strsep(&s, " ")); len = atoi(strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { error = fz_seek(xref->file, -(2 + strlen(s)), 1); if (error) return fz_rethrow(error, "cannot seek in file"); } t = fz_tell(xref->file); if (t < 0) return fz_throw("cannot tell in file"); error = fz_seek(xref->file, t + 20 * len, 0); if (error) return fz_rethrow(error, "cannot seek in file"); } error = fz_readerror(xref->file); if (error) return fz_rethrow(error, "cannot read from file"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(&xref->trailer, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay;}static fz_error *readnewtrailer(pdf_xref *xref, char *buf, int cap){ fz_error *error; pdf_logxref("load new xref format trailer\n"); error = pdf_parseindobj(&xref->trailer, xref->file, buf, cap, nil, nil, nil); if (error) return fz_rethrow(error, "cannot parse trailer (compressed)"); return fz_okay;}static fz_error *readtrailer(pdf_xref *xref, char *buf, int cap){ fz_error *error; int c; error = fz_seek(xref->file, xref->startxref, 0); if (error) return fz_rethrow(error, "cannot seek to startxref"); c = fz_peekbyte(xref->file); error = fz_readerror(xref->file); if (error) return fz_rethrow(error, "cannot read trailer"); if (c == 'x') { error = readoldtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else if (c >= '0' && c <= '9') { error = readnewtrailer(xref, buf, cap); if (error) return fz_rethrow(error, "cannot read trailer"); } else { return fz_throw("cannot recognize xref format"); } return fz_okay;}/* * xref tables */static fz_error *readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap){ fz_error *error; int ofs, len; char *s; int n; pdf_token_e tok; int i; int c; pdf_logxref("load old xref format\n"); error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref marker"); if (strncmp(buf, "xref", 4) != 0) return fz_throw("cannot find xref marker"); while (1) { c = fz_peekbyte(xref->file); if (!(c >= '0' && c <= '9')) break; error = fz_readline(xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot read xref count"); s = buf; ofs = atoi(strsep(&s, " ")); len = atoi(strsep(&s, " ")); /* broken pdfs where the section is not on a separate line */ if (s && *s != '\0') { fz_warn("broken xref section. proceeding anyway."); error = fz_seek(xref->file, -(2 + strlen(s)), 1); if (error) return fz_rethrow(error, "cannot seek to xref"); } /* broken pdfs where size in trailer undershoots entries in xref sections */ if ((ofs + len) > xref->cap) { fz_warn("broken xref section, proceeding anyway."); xref->cap = ofs + len; xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); if (!xref->table) return fz_throw("outofmem: xref table"); } if ((ofs + len) > xref->len) { for (i = xref->len; i < (ofs + len); i++) { xref->table[i].ofs = 0; xref->table[i].gen = 0; xref->table[i].type = 0; xref->table[i].mark = 0; xref->table[i].stmbuf = nil; xref->table[i].stmofs = 0; xref->table[i].obj = nil; } xref->len = ofs + len; } for (i = 0; i < len; i++) { error = fz_read(&n, xref->file, (unsigned char *) buf, 20); if (error) return fz_rethrow(error, "cannot read xref table"); if (!xref->table[ofs + i].type) { s = buf; xref->table[ofs + i].ofs = atoi(s); xref->table[ofs + i].gen = atoi(s + 11); xref->table[ofs + i].type = s[17]; } } } error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TTRAILER) return fz_throw("expected trailer marker"); error = pdf_lex(&tok, xref->file, buf, cap, &n); if (error) return fz_rethrow(error, "cannot parse trailer"); if (tok != PDF_TODICT) return fz_throw("expected trailer dictionary"); error = pdf_parsedict(trailerp, xref->file, buf, cap); if (error) return fz_rethrow(error, "cannot parse trailer"); return fz_okay;}static fz_error *readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2){ fz_error *error; int i, n; if (i0 < 0 || i0 + i1 > xref->len) return fz_throw("xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { int a = 0; int b = 0; int c = 0; if (fz_peekbyte(stm) == EOF) { error = fz_readerror(stm); if (error) return fz_rethrow(error, "truncated xref stream"); return fz_throw("truncated xref stream"); } for (n = 0; n < w0; n++) a = (a << 8) + fz_readbyte(stm); for (n = 0; n < w1; n++) b = (b << 8) + fz_readbyte(stm); for (n = 0; n < w2; n++) c = (c << 8) + fz_readbyte(stm); error = fz_readerror(stm); if (error) return fz_rethrow(error, "truncated xref stream"); if (!xref->table[i].type) { int t = w0 ? a : 1; xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; xref->table[i].ofs = w2 ? b : 0; xref->table[i].gen = w1 ? c : 0; } } return fz_okay;}static fz_error *readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap){ fz_error *error; fz_stream *stm; fz_obj *trailer; fz_obj *index; fz_obj *obj; int oid, gen, stmofs; int size, w0, w1, w2; int t; pdf_logxref("load new xref format\n"); error = pdf_parseindobj(&trailer, xref->file, buf, cap, &oid, &gen, &stmofs); if (error) return fz_rethrow(error, "cannot parse compressed xref stream object"); if (oid < 0 || oid >= xref->len) { if (oid == xref->len && oid < xref->cap) { /* allow broken pdf files that have off-by-one errors in the xref */ fz_warn("object id (%d %d R) out of range (0..%d)", oid, gen, xref->len - 1); xref->len ++; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -