📄 pdfreader.cs

📁 pdf阅读器用于阅读pdf格式的文件方便好用免费的提供
💻 CS
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
				{
					indexes[i] = (int)((PdfNumber)array[i]).Number;
				}

				return indexes;
			}
		}
	}

	/// <summary>
	/// Represents an AcroForm object in a PDF document. See the PDF reference 8.6.1 Interactive Form Dictionary.
	/// </summary>
	[CLSCompliant(true)]
	public class PdfForm: PdfField
	{
		private static PdfName NAName = new PdfName("/NeedAppearances");

		/// <summary>
		/// Initializes a new instance of PdfForm with the specified object number, generation number,
		/// and field dictionary.
		/// </summary>
		/// <param name="objNumber">The object number.</param>
		/// <param name="generationNumber">The generation number.</param>
		/// <param name="fieldDictionary">The field dictionary.</param>
		public PdfForm(int objNumber, int generationNumber, PdfDictionary fieldDictionary): base(objNumber,
			generationNumber, fieldDictionary)
		{
			// set NeedAppearances key so the viewer application regenerates the appearance streams
			// for the form fields.
			FieldDictionary.SetElement(NAName, new PdfBool(true));
		}
	}

	/// <summary>
	/// Represents an entry in the PDF Cross Reference table. 
	/// See the PDF Reference 3.4.3 Cross-Reference Table.
	/// </summary>
	[CLSCompliant(true)]
	public class PdfCrossReferenceEntry
	{
		private int objectNumber;
		private int generationNumber;
		private int offset;
		private bool active;

		/// <summary>
		/// The object number.
		/// </summary>
		public int ObjectNumber
		{
			get
			{
				return objectNumber;
			}

			set
			{
				objectNumber = value;
			}
		}

		/// <summary>
		/// The generation number.
		/// </summary>
		public int GenerationNumber
		{
			get
			{
				return generationNumber;
			}

			set
			{
				generationNumber = value;
			}
		}

		/// <summary>
		/// The byte offset of the object within the document.
		/// </summary>
		public int Offset
		{
			get
			{
				return offset;
			}

			set
			{
				offset = value;
			}
		}

		/// <summary>
		/// true if the object is not free, false otherwise.
		/// </summary>
		public bool Active
		{
			get
			{
				return active;
			}

			set
			{
				active = value;
			}
		}

		/// <summary>
		/// Initializes a new PdfCrossReferenceEntry object.
		/// </summary>
		/// <param name="objNumber">The object number</param>
		/// <param name="generationNumber">The generation number</param>
		/// <param name="offset">The byte offset within the PDF file</param>
		/// <param name="active">true if the object is not free, false otherwise</param>
		public PdfCrossReferenceEntry(int objNumber, int generationNumber,
			int offset, bool active)
		{
			ObjectNumber = objNumber;
			GenerationNumber = generationNumber;
			Offset = offset;
			Active = active;
		}
	}

	/// <summary>
	/// Parses PDF files for interactive form fields and allows to get and set the value of those fields.
	/// </summary>
	/// <remarks>
	/// The PDF parser in PdfReader is very simple and suffices only for basic cases.
	/// PdfReader is intended to easily programmatically fill out PDF forms.
	/// It parses the PDF in one go and allows the user to make changes to the form field values.
	/// It writes an updated version of the PDF to a Stream, which (hopefully) conforms to the PDF Reference.
	/// If you have advanced parsing needs, you are probably better off with a package such as <see href="http://www.lowagie.com/iText/">iText</see>.
	/// </remarks>
	/// <example>
	/// Read a PDF file, change one text field, write the updated file back out:
	/// <code>
	/// // read the file
	/// PdfReader reader = new PdfReader(infile);
	/// 
	/// // change one text field
	/// try
	/// {
	///		((PdfTXField)reader.FieldsByName["Name"]).Text = "Doe";
	/// }
	/// catch
	/// {
	/// }
	/// 
	/// // write the file
	/// FileStream fileStream = new FileStream(file, System.IO.FileMode.Create);
	/// reader.WritePdf(fileStream);
	/// fileStream.Close();
	/// </code>
	/// </example>
	[CLSCompliant(true)]
	public class PdfReader
	{
		private string pdf;
		private PdfField[] fields = new PdfField[0];
		private Hashtable fieldsByName = new Hashtable();
		private PdfForm form;
		private static readonly Regex xrefRegex = new Regex(@"startxref\s*(\d+)\s*%%EOF", RegexOptions.Singleline);
		private static readonly Regex trailerRegex = new Regex(@"trailer\s*<<(.*?)>>", RegexOptions.Singleline);
		private static readonly Regex rootRegex = new Regex(@"/Root\s*(\d+)\s+(\d+)\s*R", RegexOptions.Singleline);
		private static readonly Regex sizeRegex = new Regex(@"/Size\s*(\d+)", RegexOptions.Singleline);
		private static readonly Regex nullRegex = new Regex(@"\sxref\s+0\s+\d+\s+(\d+)", RegexOptions.Singleline);
		private static readonly Regex refRegex = new Regex(@"\s*((\d+)\s+(\d+))?\s*(\d{10})\s+(\d{5})\s+(n|f)", RegexOptions.Singleline);
		private static readonly Regex objectRegex = new Regex(@"(\d+)\s+(\d+)\s+obj(.*?)endobj", RegexOptions.Singleline);
		private static readonly Regex fieldRegex = new Regex(@"^\s*<<(.*?/FT\s+/(Btn|Tx|Ch).*>>)", RegexOptions.Singleline);
		private static readonly Regex formRegex = new Regex(@"^\s*<<(.*?/Fields\s+\[.*>>)", RegexOptions.Singleline);
		private static readonly Regex acroFormRegex = new Regex(@"/AcroForm\s+(\d+)\s+(\d+)", RegexOptions.Singleline);
		private static readonly Regex trailRegex = new Regex(@"trailer\s+<<(.*>>)(\s+startxref\s+(\d+))?", RegexOptions.Singleline);
		private static readonly Regex objRegex = new Regex(@"(\d+)\s+(\d+)\s+obj", RegexOptions.Singleline);
		private static readonly Regex linearizedRegex = new Regex(@"/Linearized\s+1", RegexOptions.Singleline);

		private int previous = -1; // location of the previous cross-reference table
		private PdfDictionary previousTrailer;
		private int rootObjectNumber;
		private int nullOffset;
		private bool linearized = false;

		private Hashtable objects = new Hashtable();
		private SortedList offsets = new SortedList();

		/// <summary>
		/// Initializes a new instance of PdfReader with the specified file.
		/// </summary>
		/// <param name="name">The file containing the PDF data.</param>
		public static PdfReader GetPdfReader(string name)
		{
			PdfReader reader;

			using (FileStream stream = new FileStream(name, FileMode.Open))
			{
				reader = new PdfReader(stream);
			}

			return reader;
		}

		/// <summary>
		/// Initializes a new instance of PdfReader with the specified Stream.
		/// </summary>
		/// <param name="stream">The Stream containing the PDF data.</param>
		public PdfReader(Stream stream)
		{
			DOMConfigurator.ConfigureAndWatch(new FileInfo("PdfReader.exe.log4net"));

			byte[] buffer = new byte[stream.Length];
			stream.Read(buffer, 0, (int)stream.Length);

			char[] chars = new char[buffer.Length];
		
			for (int i = 0; i < buffer.Length; i++)
			{
				chars[i] = (char)buffer[i];
			}

			pdf = new String(chars);

			Parse();
		}

		/// <summary>
		/// Returns the PDF object referenced by the specified PDF reference.
		/// </summary>
		/// <param name="reference">The reference to the object.</param>
		/// <returns>The PDF object referenced.</returns>
		public PdfObject GetObjectForReference(PdfReference reference)
		{
			PdfObject PdfObject = null;

			// is the object active?
			if (objects.Contains(reference.ObjectNumber))
			{
				PdfCrossReferenceEntry entry = (PdfCrossReferenceEntry)objects[reference.ObjectNumber];

				if (entry.Active)
				{
					int start = entry.Offset;
					int end = GetEndOfObject(reference.ObjectNumber);

					PdfObject = ParseObject(start, end);
				}
			}

			return PdfObject;
		}

		/// <summary>
		/// Gets or sets a value indicating whether the file is linearized.
		/// Refer to the PDF Reference, Appendix F.
		/// </summary>
		public bool Linearized
		{
			get
			{
				return linearized;
			}

			set
			{
				linearized = value;
			}
		}

		private void Parse()
		{
			ArrayList fieldsList = new ArrayList();
			int startTrailer;
			int endPosition = pdf.Length - 1;
			int startxref;
			PdfName prevName = new PdfName("/Prev");
			PdfName sizeName = new PdfName("/Size");
			PdfName rootName = new PdfName("/Root");
			Match match;

			// hack to determine if PDF is linearized:
			// check for the occurence of "/Linearized 1" before the first
			// occurence of "endobj"
			match = linearizedRegex.Match(pdf, 0, pdf.IndexOf("endobj"));
			if (match.Success)
			{
				Linearized = true;
			}

			// succesively parse all trailers starting from the end
			while ((startTrailer = pdf.LastIndexOf("trailer", endPosition)) >= 0)
			{
				match = trailRegex.Match(pdf, startTrailer, endPosition - startTrailer + 1);

				if (match.Success)
				{
					string trailerString = match.Groups[1].Value;
					PdfDictionary trailerDictionary = new PdfDictionary(ref trailerString);

					startxref = -1;
					// in a hybrid-reference file (PDF 1.5), the trailer doesn't seem to
					// always include a startxref.
					if (match.Groups[2].Success)
					{
						startxref = Int32.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
					}

					if (previous == -1)
					{
						previous = startxref;
					}

					// we don't believe the startxref field.
					// it can be bogus due to linearization.
					// we'll keep the "previous" value from above though, since
					// that's what taft@adobe.com says in a 1998 post on comp.text.pdf :-)
					startxref = pdf.LastIndexOf("xref", startTrailer);
											
					if (previousTrailer == null || Linearized)
					{
						previousTrailer = trailerDictionary;
					}

					// parse the xref table that is referenced by this trailer
					ParseXRef(startxref);

					// the offset of the first object marks the start of the body and is therefore
					// the end of the previous trailer
					if (offsets.Count > 0)
					{
						endPosition = ((PdfCrossReferenceEntry)offsets.GetByIndex(0)).Offset;
					}
					else
					{
						// if the xref table has no objects then the previous trailer must
						// be right in front of it
						endPosition = startxref;
					}
				}
			}

			rootObjectNumber = ((PdfReference)previousTrailer["/Root"]).ObjectNumber;

			ParseAcroForm();

			if (form != null)
			{
				PdfObject fieldsObject = form.FieldDictionary["/Fields"];
				PdfArray fieldsArray = fieldsObject as PdfArray;
				if (fieldsArray == null)
				{
					PdfReference fieldsReference = fieldsObject as PdfReference;
					fieldsArray = (PdfArray)GetObjectForReference(fieldsReference);
				}

				foreach (PdfReference fieldReference in fieldsArray.Elements)
				{
					PdfField[] fields = PdfField.GetPdfFields(fieldReference, this, null, "");

					foreach (PdfField field in fields)
					{
						Trace.Assert(field != null);

						fieldsList.Add(field);

						// make field name unique
						string fieldName = field.Name;
						int i = 0;
						while (fieldsByName.Contains(fieldName))
						{
							fieldName = string.Format(CultureInfo.InvariantCulture, "{0}[{1}]", field.Name, i);
							i++;
						}

						fieldsByName.Add(fieldName, field);
					}					
				}

				this.fields = (PdfField[])fieldsList.ToArray(typeof(PdfField));
			}
		}

		private void ParseXRef(int startxref)
		{
			int objNumber = 0;
			int generationNumber;
			int offset;
			PdfCrossReferenceEntry entry;
			int endOfXRef = pdf.IndexOf("trailer", startxref + 4);
			string xRef = pdf.Substring(startxref + 4, endOfXRef - (startxref + 4) + 1);
			MatchCollection refMatches = refRegex.Matches(xRef);

			foreach (Match refMatch in refMatches)
			{
				if (!refMatch.Groups[2].Value.Equals(""))
				{
					objNumber = Int32.Parse(refMatch.Groups[2].Value, CultureInfo.InvariantCulture);
				}

				offset = Int32.Parse(refMatch.Groups[4].Value, CultureInfo.InvariantCulture);
				generationNumber = Int32.Parse(refMatch.Groups[5].Value, CultureInfo.InvariantCulture);

				if (refMatch.Groups[6].Value.Equals("n"))
				{
					entry = new PdfCrossReferenceEntry(objNumber, generationNumber, offset,
						refMatch.Groups[6].Value.Equals("n"));

					if (!objects.Contains(objNumber))
					{
						offsets.Add(offset, entry);
						objects.Add(objNumber, entry);
					}
				}
				else if (objNumber == 0)
				{
					// special case:
					// in order to build a new xref table we need the first free object number
					nullOffset = offset;
				}

				objNumber++;
			}
		}

		/// <summary>
		/// Gets the end offset of the specified object.
		/// The offset is determined by the beginning offset of the object with next higher start offset.
		/// If the object is the last object, -1 is returned.
		/// If the document was updated, the offset may be after the xref table and trailer that
		/// follow the specified object.
		/// </summary>
		/// <param name="objNumber">The object number to find the offset for.</param>
		/// <returns>The end offset of the specified object or -1 if it is the last object</returns>
		private int GetEndOfObject(int objNumber)
		{
			PdfCrossReferenceEntry theObject = (PdfCrossReferenceEntry)objects[objNumber];
			int objectIndex = offsets.IndexOfKey(theObject.Offset);
			
			if ((objectIndex + 1) < offsets.Count)
			{
				PdfCrossReferenceEntry nextObject = (PdfCrossReferenceEntry)offsets.GetByIndex(objectIndex + 1);
				return nextObject.Offset;
			}
			else
			{
				return -1;
			}
		}

		/// <summary>
		/// Parses the Interactive Form Dictionary referenced by the AcroForm entry in
		/// the Document Catalog.
		/// </summary>
		private void ParseAcroForm()
		{
			PdfCrossReferenceEntry documentCatalog = (PdfCrossReferenceEntry)objects[rootObjectNumber];
			int documentCatalogStart = documentCatalog.Offset;
			int documentCatalogEnd = GetEndOfObject(documentCatalog.ObjectN
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -