/// <summary> /// Attempt to parse a sequence of bytes as a PDF object /// </summary> /// <param name="Data">Bytes to parse</param> /// <param name="EndingIndex">0-based index into data indicating the byte after the last parsed byte</param> /// <param name="StartingIndex">0-based index into data at which to start parsing (default: 0)</param> /// <returns>PDF object successfully parsed from the data, or NULL if no complete object was parsable</returns> public static IPDFObject Parse(byte[] Data, out int EndingIndex, int StartingIndex = 0) { int StartTokenIndex; int EndTokenIndex; IPDFObject ParsedObject = null; string Token = GetTokenString(Data, StartingIndex, out StartTokenIndex, out EndTokenIndex); if (!string.IsNullOrEmpty(Token)) { if ((ParsedObject = PDFDictionary.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFArray.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFString.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFHexString.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFObjectDefinition.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFIndirectObject.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFNumber.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFName.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } if ((ParsedObject = PDFComment.TryParse(Token, Data, StartTokenIndex, out EndingIndex)) != null) { return(ParsedObject); } } // Could not parse next object EndingIndex = StartingIndex; return(null); }
/// <summary> /// Attempt to parse a string as a PDF Number definition (ignoring whitespace characters, etc) /// </summary> /// <param name="Token">String to parse</param> /// <param name="Default">Value to return if unable to parse (default: null)</param> /// <returns>PDFNumber represented by the string, or Default if unable to parse</returns> public static PDFNumber TryParse(string Token, PDFNumber Default = null) { PDFNumber Parsed = (PDFNumber)TryParse(Token, null, 0, out _); if (Parsed == null) { return(Default); } return(Parsed); }
/// <summary> /// Read the trailer from a PDF data file /// </summary> /// <param name="Data">Data to read</param> /// <param name="StartIndex">Starting index of where to look for trailer, or -1 to look from end of file (default: -1)</param> /// <returns>TRUE if a trailer was successfully read, FALSE otherwise</returns> public static PDFTrailer ReadTrailer(byte[] Data, int StartIndex = -1) { int EndIndex = StartIndex; if (EndIndex < 0) { EndIndex = PDF.FindEOF(Data, Data.Length - 1); } if (EndIndex < 0) { return(null); } int EndOfLineIndex; byte[] LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex); while (LineData != null) { if ("trailer".Equals(Encoding.UTF8.GetString(LineData).Trim())) { int TokenStartIndex = 0; string Token = PDFObjectParser.GetTokenString(Data, EndOfLineIndex, out TokenStartIndex, out EndIndex); PDFDictionary TrailerDictionary = (PDFDictionary)PDFDictionary.TryParse(Token, Data, TokenStartIndex, out EndIndex); if (TrailerDictionary != null) { LineData = PDF.ExtractPDFLine(Data, EndIndex, out EndIndex); if ("startxref".Equals(Encoding.UTF8.GetString(LineData).Trim())) { Token = PDFObjectParser.GetTokenString(Data, EndIndex, out TokenStartIndex, out _); PDFNumber Offset = (PDFNumber)PDFNumber.TryParse(Token, Data, TokenStartIndex, out EndIndex); PDFCrossReference CrossRef = PDFCrossReference.ReadCrossReference(Data, Offset, out _); return(new PDFTrailer(TrailerDictionary, CrossRef)); } else { return(null); } } else { return(null); } } else { LineData = PDF.ExtractPreviousPDFLine(Data, EndIndex, out EndIndex, out EndOfLineIndex); } } return(null); }
/// <summary> /// Attempt to read a PDF cross reference subsection from raw data starting at a given offset /// </summary> /// <param name="Data">PDF File Data</param> /// <param name="StartIndex">Index into Data where cross reference subsection should start</param> /// <param name="EndIndex">Index into Data where parsing of the cross reference subsection stopped (successfully or not)</param> /// <returns>Instance of PDF Cross Reference Subsection on success, NULL on failure to parse</returns> private static PDFCrossReference ReadCrossReferenceSubsection(byte[] Data, int StartIndex, out int EndIndex) { string Declaration = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, StartIndex, out EndIndex)); string[] Definitions = Declaration.Split(' '); if (Definitions.Length == 2) { int StartObject = PDFNumber.TryParse(Definitions[0], -1); int NumObjects = PDFNumber.TryParse(Definitions[1], -1); if ((StartObject >= 0) && (NumObjects >= 0)) { PDFCrossReference Subsection = new PDFCrossReference(StartObject, NumObjects); for (int i = 0; i < NumObjects; i++) { // Parse cross reference table entries string Next = Encoding.UTF8.GetString(PDF.ExtractPDFLine(Data, EndIndex, out EndIndex)); Match objMatch = Regex.Match(Next, @"(\d\d\d\d\d\d\d\d\d\d) (\d\d\d\d\d) ([n|f])"); if (objMatch.Success) { int Generation = int.Parse(objMatch.Groups[2].Value); char type = objMatch.Groups[3].Value[0]; if (type == 'n') { // Add active object to table int Offset = int.Parse(objMatch.Groups[1].Value); Subsection.ObjectOffsets[i] = Offset; Subsection.ObjectGenerations[i] = Generation; } else if (type == 'f') { // Add deleted object to table Subsection.ObjectOffsets[i] = 0; Subsection.ObjectGenerations[i] = Generation; } else { return(null); // Should never get here; invalid cross reference entry } } else { return(null); // Invalid Cross Reference Section } } return(Subsection); } } return(null); }
/// <summary> /// Attempt to read a PDF cross reference section from raw data starting at a given offset /// </summary> /// <param name="Data">PDF File Data</param> /// <param name="StartIndex">Index into Data where cross reference section should start</param> /// <param name="EndIndex">Index into Data where parsing of the cross reference section stopped (successfully or not)</param> /// <returns>Instance of PDF Cross Reference Section on success, NULL on failure to parse</returns> public static PDFCrossReference ReadCrossReference(byte[] Data, PDFNumber StartIndex, out int EndIndex) { return(ReadCrossReference(Data, (int)StartIndex.Value, out EndIndex)); }
public PDFStream(PDFDictionary StreamDictionary, byte[] StreamData) { StreamDictionary["Length"] = new PDFNumber(StreamData.Length); this.StreamDictionary = StreamDictionary; this.Data = StreamData; }