Пример #1
0
        public static PDFFileIndirectObject Parse(PDFTextSearcher searcher, int number, int gen)
        {
            string start = number.ToString() + " " + gen.ToString() + " obj";

            string end       = "endobj";
            string endstream = "endstream";
            string stream    = "stream";

            string matchstart = searcher.GetInnerText(start.Length);

            if (matchstart != start)
            {
                throw new PDFNativeParserException(string.Format(CommonErrors.IndirectObjectCannotBeParsed, start));
            }
            long startPos = searcher.Position;

            PDFFileRange endobjPos = searcher.MatchForwardString(end);

            if (endobjPos.Found == false)
            {
                throw new PDFNativeParserException(string.Format(CommonErrors.IndirectObjectCannotBeParsed, start));
            }

            PDFFileRange endstreamPos = searcher.MatchBackwardString(endstream, startPos);

            PDFFileRange startstreamPos;

            if (endstreamPos.Found)
            {
                startstreamPos = searcher.MatchBackwardString(stream, startPos);
                if (startstreamPos.Found == false)
                {
                    throw new PDFNativeParserException(string.Format(CommonErrors.IndirectObjectCannotBeParsed, start));
                }

                endobjPos = startstreamPos;
            }
            else
            {
                startstreamPos = PDFFileRange.NotFound;
            }
            int length = (int)(endobjPos.StartOffset - startPos);

            string data = searcher.GetInnerText(startPos, length);

            PDFFileIndirectObject parsed = new PDFFileIndirectObject(number, gen, data);

            parsed._offset = startPos;

            if (startstreamPos.Found)
            {
                parsed.SetStreamData(searcher.GetInnerBytes(startstreamPos, endstreamPos));
            }
            parsed.EnsureDataParsed();

            return(parsed);
        }
Пример #2
0
        //
        // public methods
        //

        #region public IIndirectObject GetIndirectObject(PDFObjectRef oref)

        /// <summary>
        /// Reads and returns the object data returned based on the provided reference
        /// </summary>
        /// <param name="oref"></param>
        /// <returns></returns>
        public override IParsedIndirectObject GetObject(PDFObjectRef oref)
        {
            PDFXRefTableEntry entry = this.XRefTable[oref];

            if (null != entry)
            {
                if (entry.Free == false)
                {
                    if (null == entry.Reference)
                    {
                        this.Searcher.Position = entry.Offset;
                        entry.Reference        = PDFFileIndirectObject.Parse(this.Searcher, oref.Number, oref.Generation);
                    }
                    return((IParsedIndirectObject)entry.Reference);
                }
            }
            return(null);
        }
Пример #3
0
        /// <summary>
        /// Initializes the known PDF file data such as trailers, xref tables and catalogs
        /// </summary>
        protected override void InitData(PDFTraceLog log)
        {
            try
            {
                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "Finding end of file, startxref and trailer positions");
                }

                this.Searcher.Position = this.Searcher.Length;
                PDFFileRange eofPos       = AssertFoundRange(Searcher.MatchBackwardString(EndOfFileMarker), EndOfFileMarker);
                PDFFileRange startxrefPos = AssertFoundRange(Searcher.MatchBackwardString(StartXRefMarker), StartXRefMarker);

                PDFFileRange trailerPos = AssertFoundRange(Searcher.MatchBackwardString(TrailerMarker), TrailerMarker);

                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "Markers found, loading the trailer dictionary");
                }

                PDFDictionary trailer = GetTrailerDictionary(trailerPos, startxrefPos);
                this._trailer = trailer;

                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "Markers found, loading the XRef table");
                }

                PDFObjectRef catalogRef = AssertGetObjectRef(trailer, CatalogObjName, "The '" + CatalogObjName + "' entry couldnot be found in the documents trailer dictionary");
                PDFObjectRef infoRef    = AssertGetObjectRef(trailer, InfoObjName, "The '" + InfoObjName + "' entry couldnot be found in the documents trailer dictionary");
                IFileObject  prevXRefObj;
                trailer.TryGetValue(PrevXRefName, out prevXRefObj);
                long prevOffset = -1;
                if (prevXRefObj is PDFNumber)
                {
                    prevOffset = ((PDFNumber)prevXRefObj).Value;
                }
                else if (prevXRefObj is PDFReal)
                {
                    prevOffset = (long)((PDFNumber)prevXRefObj).Value;
                }

                PDFXRefTable xref = GetXRefTable(startxrefPos, eofPos, prevOffset);


                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "References for the catalog and document info found");
                }

                this._xreftable = xref;
                this._info      = (PDFFileIndirectObject)this.GetObject(infoRef);

                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "Loaded the document Info indirect object");
                }

                this._catalog = (PDFFileIndirectObject)this.GetObject(catalogRef);

                if (log.ShouldLog(TraceLevel.Debug))
                {
                    log.Add(TraceLevel.Debug, "PDFReader", "Loaded the document Catalog indirect object");
                }

                //TODO: Look for more updates and read those in too
            }
            catch (Exception ex)
            {
                throw new PDFNativeParserException(CommonErrors.CouldNotInitializeThePDFReader, ex);
            }
        }