/// <summary> /// Attempt to read a PDF cross reference section from raw data starting at a given offset /// </summary> /// <param name="Data">PDF File Data</param> /// <param name="StartIndex">Index into Data where cross reference section should start</param> /// <param name="EndIndex">Index into Data where parsing of the cross reference section stopped (successfully or not)</param> /// <returns>Instance of PDF Cross Reference Section on success, NULL on failure to parse</returns> public static PDFCrossReference ReadCrossReference(byte[] Data, int StartIndex, out int EndIndex) { PDFCrossReference section = null; string Declaration = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, StartIndex + 18, out EndIndex)).Trim(); if ("xref".Equals(Declaration)) { section = ReadCrossReferenceSubsection(Data, EndIndex, out EndIndex); if (section != null) { PDFCrossReference subSection; do { StartIndex = EndIndex; subSection = ReadCrossReferenceSubsection(Data, StartIndex, out EndIndex); if (subSection != null) { section.Subsections.Add(subSection); } else { EndIndex = StartIndex; } } while (subSection != null); } } return(section); }
//<summary> //Attempt to parse the given data stream, returning an indicator of parse progress //</summary> //<param name="StartingToken">The token immediately preceeding the starting index in Data stream</param> //<param name="Data">Raw byte stream to parse</param> /// <param name="StartingIndex">0-based starting index into Data where StartingToken appears</param> //<param name="EndingIndex">Index into data stream where parsing ended (either successfully or unsuccessfully)</param> //<returns>Object parsed from data stream, or NULL if unable to parse. If NULL and EndingIndex is equal to Data.Length, parsing may be successful with more data</returns> public static IPDFObject TryParse(string StartingToken, byte[] Data, int StartingIndex, out int EndingIndex) { if (!String.IsNullOrEmpty(StartingToken) && (StartingToken[0] == PDF.CommentDelimiter)) { return(new PDFComment(PDF.ExtractPDFLine(Data, StartingIndex + 1, out EndingIndex), StartingIndex)); } EndingIndex = StartingIndex; return(null); }
/// <summary> /// Read the trailer from a PDF data file /// </summary> /// <param name="Data">Data to read</param> /// <param name="StartIndex">Starting index of where to look for trailer, or -1 to look from end of file (default: -1)</param> /// <returns>TRUE if a trailer was successfully read, FALSE otherwise</returns> public static PDFTrailer ReadTrailer(byte[] Data, int StartIndex = -1) { int EndIndex = StartIndex; if (EndIndex < 0) { EndIndex = PDF.FindEOF(Data, Data.Length - 1); } if (EndIndex < 0) { return(null); } int EndOfLineIndex; byte[] LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex); while (LineData != null) { if ("trailer".Equals(Encoding.UTF8.GetString(LineData).Trim())) { int TokenStartIndex = 0; string Token = PDFObjectParser.GetTokenString(Data, EndOfLineIndex, out TokenStartIndex, out EndIndex); PDFDictionary TrailerDictionary = (PDFDictionary)PDFDictionary.TryParse(Token, Data, TokenStartIndex, out EndIndex); if (TrailerDictionary != null) { LineData = PDF.ExtractPDFLine(Data, EndIndex, out EndIndex); if ("startxref".Equals(Encoding.UTF8.GetString(LineData).Trim())) { Token = PDFObjectParser.GetTokenString(Data, EndIndex, out TokenStartIndex, out _); PDFNumber Offset = (PDFNumber)PDFNumber.TryParse(Token, Data, TokenStartIndex, out EndIndex); PDFCrossReference CrossRef = PDFCrossReference.ReadCrossReference(Data, Offset, out _); return(new PDFTrailer(TrailerDictionary, CrossRef)); } else { return(null); } } else { return(null); } } else { LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex); } } return(null); }
/// <summary> /// Attempt to read a PDF cross reference subsection from raw data starting at a given offset /// </summary> /// <param name="Data">PDF File Data</param> /// <param name="StartIndex">Index into Data where cross reference subsection should start</param> /// <param name="EndIndex">Index into Data where parsing of the cross reference subsection stopped (successfully or not)</param> /// <returns>Instance of PDF Cross Reference Subsection on success, NULL on failure to parse</returns> private static PDFCrossReference ReadCrossReferenceSubsection(byte[] Data, int StartIndex, out int EndIndex) { string Declaration = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, StartIndex, out EndIndex)); string[] Definitions = Declaration.Split(' '); if (Definitions.Length == 2) { int StartObject = PDFNumber.TryParse(Definitions[0], -1); int NumObjects = PDFNumber.TryParse(Definitions[1], -1); if ((StartObject >= 0) && (NumObjects >= 0)) { PDFCrossReference Subsection = new PDFCrossReference(StartObject, NumObjects); for (int i = 0; i < NumObjects; i++) { // Parse cross reference table entries string Next = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, EndIndex, out EndIndex)); Match objMatch = Regex.Match(Next, @"(\d\d\d\d\d\d\d\d\d\d) (\d\d\d\d\d) ([n|f])"); if (objMatch.Success) { int Generation = int.Parse(objMatch.Groups[2].Value); char type = objMatch.Groups[3].Value[0]; if (type == 'n') { // Add active object to table int Offset = int.Parse(objMatch.Groups[1].Value); Subsection.ObjectOffsets[i] = Offset; Subsection.ObjectGenerations[i] = Generation; } else if (type == 'f') { // Add deleted object to table Subsection.ObjectOffsets[i] = 0; Subsection.ObjectGenerations[i] = Generation; } else { return(null); // Should never get here; invalid cross reference entry } } else { return(null); // Invalid Cross Reference Section } } return(Subsection); } } return(null); }
/// <summary> /// Determine the maximum PDF version of a PDF file /// </summary> /// <param name="Pathname">Full pathname to the PDF file</param> /// <param name="isBinary">Does the PDF header indicate binary content?</param> /// <returns>PDF version number (1.0 - 1.7) as indicated by the PDF header, or 0.0 if file does not appear to be a PDF file</returns> private float ReadPDFHeader(out bool isBinary) { int CommentEnd; float HeaderVersion = 0.0f; // Check for PDF version byte[] HeaderBytes; PDFComment.ExtractPDFComment(RawData, out HeaderBytes, out _, out CommentEnd); if (HeaderBytes.Length > 5) { string HeaderComment = Encoding.UTF8.GetString(HeaderBytes); if ("%PDF-".Equals(HeaderComment.Substring(0, 5))) { if (!float.TryParse(HeaderComment.Substring(5), out HeaderVersion)) { HeaderVersion = 0.0f; } } } // Check for 8-bit-data flag isBinary = false; byte[] NextBytes = PDF.ExtractPDFLine(RawData, CommentEnd, out CommentEnd); // Read to next EOL PDFComment.ExtractPDFComment(NextBytes, out HeaderBytes, out _, out CommentEnd); // Attempt to find a comment line if (HeaderBytes != null) { int binCount = 0; foreach (byte Byte in HeaderBytes) { binCount = binCount + (Byte > 128 ? 1 : 0); } isBinary = (binCount > 3); } return(HeaderVersion); }
/// <summary> /// Trim a string of any PDF comment, returning the trimmed string and the extracted comment text /// </summary> /// <param name="OriginalLine">Data line as read from PDF file</param> /// <param name="Comment">Complete text of the comment in the line, null if line contains no comment</param> /// <param name="CommentStart">Index into OriginalLine of comment character, -1 if line contains no comment</param> /// <param name="CommentEnd">Index into OriginalLine of last character in comment (after EOL). Will be -1 if line contains no comment</param> public static void ExtractPDFComment(byte[] OriginalLine, out byte[] Comment, out int CommentStart, out int CommentEnd) { CommentStart = PDF.CommentStart(OriginalLine); Comment = PDF.ExtractPDFLine(OriginalLine, CommentStart, out CommentEnd); }