Beispiel #1
0
        /// <summary>
        /// Attempt to read a PDF cross reference section from raw data starting at a given offset
        /// </summary>
        /// <param name="Data">PDF File Data</param>
        /// <param name="StartIndex">Index into Data where cross reference section should start</param>
        /// <param name="EndIndex">Index into Data where parsing of the cross reference section stopped (successfully or not)</param>
        /// <returns>Instance of PDF Cross Reference Section on success, NULL on failure to parse</returns>
        public static PDFCrossReference ReadCrossReference(byte[] Data, int StartIndex, out int EndIndex)
        {
            PDFCrossReference section     = null;
            string            Declaration = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, StartIndex + 18, out EndIndex)).Trim();

            if ("xref".Equals(Declaration))
            {
                section = ReadCrossReferenceSubsection(Data, EndIndex, out EndIndex);
                if (section != null)
                {
                    PDFCrossReference subSection;
                    do
                    {
                        StartIndex = EndIndex;
                        subSection = ReadCrossReferenceSubsection(Data, StartIndex, out EndIndex);
                        if (subSection != null)
                        {
                            section.Subsections.Add(subSection);
                        }
                        else
                        {
                            EndIndex = StartIndex;
                        }
                    } while (subSection != null);
                }
            }
            return(section);
        }
Beispiel #2
0
        //<summary>
        //Attempt to parse the given data stream, returning an indicator of parse progress
        //</summary>
        //<param name="StartingToken">The token immediately preceeding the starting index in Data stream</param>
        //<param name="Data">Raw byte stream to parse</param>
        /// <param name="StartingIndex">0-based starting index into Data where StartingToken appears</param>
        //<param name="EndingIndex">Index into data stream where parsing ended (either successfully or unsuccessfully)</param>
        //<returns>Object parsed from data stream, or NULL if unable to parse. If NULL and EndingIndex is equal to Data.Length, parsing may be successful with more data</returns>
        public static IPDFObject TryParse(string StartingToken, byte[] Data, int StartingIndex, out int EndingIndex)
        {
            if (!String.IsNullOrEmpty(StartingToken) && (StartingToken[0] == PDF.CommentDelimiter))
            {
                return(new PDFComment(PDF.ExtractPDFLine(Data, StartingIndex + 1, out EndingIndex), StartingIndex));
            }

            EndingIndex = StartingIndex;
            return(null);
        }
Beispiel #3
0
        /// <summary>
        /// Read the trailer from a PDF data file
        /// </summary>
        /// <param name="Data">Data to read</param>
        /// <param name="StartIndex">Starting index of where to look for trailer, or -1 to look from end of file (default: -1)</param>
        /// <returns>TRUE if a trailer was successfully read, FALSE otherwise</returns>
        public static PDFTrailer ReadTrailer(byte[] Data, int StartIndex = -1)
        {
            int EndIndex = StartIndex;

            if (EndIndex < 0)
            {
                EndIndex = PDF.FindEOF(Data, Data.Length - 1);
            }

            if (EndIndex < 0)
            {
                return(null);
            }

            int EndOfLineIndex;

            byte[] LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex);
            while (LineData != null)
            {
                if ("trailer".Equals(Encoding.UTF8.GetString(LineData).Trim()))
                {
                    int           TokenStartIndex   = 0;
                    string        Token             = PDFObjectParser.GetTokenString(Data, EndOfLineIndex, out TokenStartIndex, out EndIndex);
                    PDFDictionary TrailerDictionary = (PDFDictionary)PDFDictionary.TryParse(Token, Data, TokenStartIndex, out EndIndex);
                    if (TrailerDictionary != null)
                    {
                        LineData = PDF.ExtractPDFLine(Data, EndIndex, out EndIndex);
                        if ("startxref".Equals(Encoding.UTF8.GetString(LineData).Trim()))
                        {
                            Token = PDFObjectParser.GetTokenString(Data, EndIndex, out TokenStartIndex, out _);
                            PDFNumber         Offset   = (PDFNumber)PDFNumber.TryParse(Token, Data, TokenStartIndex, out EndIndex);
                            PDFCrossReference CrossRef = PDFCrossReference.ReadCrossReference(Data, Offset, out _);
                            return(new PDFTrailer(TrailerDictionary, CrossRef));
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    else
                    {
                        return(null);
                    }
                }
                else
                {
                    LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex);
                }
            }

            return(null);
        }
Beispiel #4
0
        /// <summary>
        /// Attempt to read a PDF cross reference subsection from raw data starting at a given offset
        /// </summary>
        /// <param name="Data">PDF File Data</param>
        /// <param name="StartIndex">Index into Data where cross reference subsection should start</param>
        /// <param name="EndIndex">Index into Data where parsing of the cross reference subsection stopped (successfully or not)</param>
        /// <returns>Instance of PDF Cross Reference Subsection on success, NULL on failure to parse</returns>
        private static PDFCrossReference ReadCrossReferenceSubsection(byte[] Data, int StartIndex, out int EndIndex)
        {
            string Declaration = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, StartIndex, out EndIndex));

            string[] Definitions = Declaration.Split(' ');
            if (Definitions.Length == 2)
            {
                int StartObject = PDFNumber.TryParse(Definitions[0], -1);
                int NumObjects  = PDFNumber.TryParse(Definitions[1], -1);
                if ((StartObject >= 0) && (NumObjects >= 0))
                {
                    PDFCrossReference Subsection = new PDFCrossReference(StartObject, NumObjects);
                    for (int i = 0; i < NumObjects; i++)
                    {
                        // Parse cross reference table entries
                        string Next     = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, EndIndex, out EndIndex));
                        Match  objMatch = Regex.Match(Next, @"(\d\d\d\d\d\d\d\d\d\d) (\d\d\d\d\d) ([n|f])");
                        if (objMatch.Success)
                        {
                            int  Generation = int.Parse(objMatch.Groups[2].Value);
                            char type       = objMatch.Groups[3].Value[0];
                            if (type == 'n')
                            {
                                // Add active object to table
                                int Offset = int.Parse(objMatch.Groups[1].Value);
                                Subsection.ObjectOffsets[i]     = Offset;
                                Subsection.ObjectGenerations[i] = Generation;
                            }
                            else if (type == 'f')
                            {
                                // Add deleted object to table
                                Subsection.ObjectOffsets[i]     = 0;
                                Subsection.ObjectGenerations[i] = Generation;
                            }
                            else
                            {
                                return(null); // Should never get here; invalid cross reference entry
                            }
                        }
                        else
                        {
                            return(null); // Invalid Cross Reference Section
                        }
                    }
                    return(Subsection);
                }
            }
            return(null);
        }
Beispiel #5
0
        /// <summary>
        /// Determine the maximum PDF version of a PDF file
        /// </summary>
        /// <param name="Pathname">Full pathname to the PDF file</param>
        /// <param name="isBinary">Does the PDF header indicate binary content?</param>
        /// <returns>PDF version number (1.0 - 1.7) as indicated by the PDF header, or 0.0 if file does not appear to be a PDF file</returns>
        private float ReadPDFHeader(out bool isBinary)
        {
            int   CommentEnd;
            float HeaderVersion = 0.0f;

            // Check for PDF version
            byte[] HeaderBytes;
            PDFComment.ExtractPDFComment(RawData, out HeaderBytes, out _, out CommentEnd);
            if (HeaderBytes.Length > 5)
            {
                string HeaderComment = Encoding.UTF8.GetString(HeaderBytes);
                if ("%PDF-".Equals(HeaderComment.Substring(0, 5)))
                {
                    if (!float.TryParse(HeaderComment.Substring(5), out HeaderVersion))
                    {
                        HeaderVersion = 0.0f;
                    }
                }
            }

            // Check for 8-bit-data flag
            isBinary = false;
            byte[] NextBytes = PDF.ExtractPDFLine(RawData, CommentEnd, out CommentEnd);      // Read to next EOL
            PDFComment.ExtractPDFComment(NextBytes, out HeaderBytes, out _, out CommentEnd); // Attempt to find a comment line
            if (HeaderBytes != null)
            {
                int binCount = 0;
                foreach (byte Byte in HeaderBytes)
                {
                    binCount = binCount + (Byte > 128 ? 1 : 0);
                }
                isBinary = (binCount > 3);
            }

            return(HeaderVersion);
        }
Beispiel #6
0
 /// <summary>
 /// Trim a string of any PDF comment, returning the trimmed string and the extracted comment text
 /// </summary>
 /// <param name="OriginalLine">Data line as read from PDF file</param>
 /// <param name="Comment">Complete text of the comment in the line, null if line contains no comment</param>
 /// <param name="CommentStart">Index into OriginalLine of comment character, -1 if line contains no comment</param>
 /// <param name="CommentEnd">Index into OriginalLine of last character in comment (after EOL). Will be -1 if line contains no comment</param>
 public static void ExtractPDFComment(byte[] OriginalLine, out byte[] Comment, out int CommentStart, out int CommentEnd)
 {
     CommentStart = PDF.CommentStart(OriginalLine);
     Comment      = PDF.ExtractPDFLine(OriginalLine, CommentStart, out CommentEnd);
 }