Exemplo n.º 1
0
 public void Remove(PdfReference iref)
 {
     ObjectTable.Remove(iref.ObjectID);
 }
Exemplo n.º 2
0
        void TransitiveClosureImplementation(Dictionary <PdfItem, object> objects, PdfObject pdfObject /*, ref int depth*/)
        {
            try
            {
                _nestingLevel++;
                if (_nestingLevel >= 1000)
                {
                    if (!_overflow.ContainsKey(pdfObject))
                    {
                        _overflow.Add(pdfObject, null);
                    }
                    return;
                }
#if DEBUG_
                //enterCount++;
                if (enterCount == 5400)
                {
                    GetType();
                }
                //if (!Object.ReferenceEquals(pdfObject.Owner, _document))
                //  GetType();
                //////Debug.Assert(Object.ReferenceEquals(pdfObject27.Document, _document));
                //      if (item is PdfObject && ((PdfObject)item).ObjectID.ObjectNumber == 5)
                //        Debug.WriteLine("items: " + ((PdfObject)item).ObjectID.ToString());
                //if (pdfObject.ObjectNumber == 5)
                //  GetType();
#endif

                IEnumerable   enumerable = null; //(IEnumerator)pdfObject;
                PdfDictionary dict;
                PdfArray      array;
                if ((dict = pdfObject as PdfDictionary) != null)
                {
                    enumerable = dict.Elements.Values;
                }
                else if ((array = pdfObject as PdfArray) != null)
                {
                    enumerable = array.Elements;
                }
                else
                {
                    Debug.Assert(false, "Should not come here.");
                }

                if (enumerable != null)
                {
                    foreach (PdfItem item in enumerable)
                    {
                        PdfReference iref = item as PdfReference;
                        if (iref != null)
                        {
                            // Is this an indirect reference to an object that does not exist?
                            //if (iref.Document == null)
                            //{
                            //    Debug.WriteLine("Dead object detected: " + iref.ObjectID.ToString());
                            //    PdfReference dead = DeadObject;
                            //    iref.ObjectID = dead.ObjectID;
                            //    iref.Document = _document;
                            //    iref.SetObject(dead.Value);
                            //    PdfDictionary dict = (PdfDictionary)dead.Value;

                            //    dict.Elements["/DeadObjectCount"] =
                            //      new PdfInteger(dict.Elements.GetInteger("/DeadObjectCount") + 1);

                            //    iref = dead;
                            //}

                            if (!ReferenceEquals(iref.Document, _document))
                            {
                                GetType();
                                Debug.WriteLine(String.Format("Bad iref: {0}", iref.ObjectID.ToString()));
                            }
                            Debug.Assert(ReferenceEquals(iref.Document, _document) || iref.Document == null, "External object detected!");
#if DEBUG_
                            if (iref.ObjectID.ObjectNumber == 23)
                            {
                                GetType();
                            }
#endif
                            if (!objects.ContainsKey(iref))
                            {
                                PdfObject value = iref.Value;

                                // Ignore unreachable objets.
                                if (iref.Document != null)
                                {
                                    // ... from trailer hack
                                    if (value == null)
                                    {
                                        iref = ObjectTable[iref.ObjectID];
                                        Debug.Assert(iref.Value != null);
                                        value = iref.Value;
                                    }
                                    Debug.Assert(ReferenceEquals(iref.Document, _document));
                                    objects.Add(iref, null);
                                    //Debug.WriteLine(String.Format("objects.Add('{0}', null);", iref.ObjectID.ToString()));
                                    if (value is PdfArray || value is PdfDictionary)
                                    {
                                        TransitiveClosureImplementation(objects, value /*, ref depth*/);
                                    }
                                }
                                //else
                                //{
                                //  objects2.Add(this[iref.ObjectID], null);
                                //}
                            }
                        }
                        else
                        {
                            PdfObject pdfObject28 = item as PdfObject;
                            //if (pdfObject28 != null)
                            //  Debug.Assert(Object.ReferenceEquals(pdfObject28.Document, _document));
                            if (pdfObject28 != null && (pdfObject28 is PdfDictionary || pdfObject28 is PdfArray))
                            {
                                TransitiveClosureImplementation(objects, pdfObject28 /*, ref depth*/);
                            }
                        }
                    }
                }
            }
            finally
            {
                _nestingLevel--;
            }
        }
Exemplo n.º 3
0
 /**
  * <summary>Wraps a reference into a property list object.</summary>
  * <param name="reference">Reference to a property list object.</param>
  * <returns>Property list object corresponding to the reference.</returns>
  */
 public static PropertyList Wrap(
     PdfReference reference
     )
 {
     return(Wrap(reference, null));
 }
Exemplo n.º 4
0
        /// <summary>
        /// Opens an existing PDF document.
        /// </summary>
        public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMode openmode, PdfPasswordProvider passwordProvider)
        {
            PdfDocument document;

            try
            {
                Lexer lexer = new Lexer(stream);
                document           = new PdfDocument(lexer);
                document._state   |= DocumentState.Imported;
                document._openMode = openmode;
                document._fileSize = stream.Length;

                // Get file version.
                byte[] header = new byte[1024];
                stream.Position = 0;
                stream.Read(header, 0, 1024);
                document._version = GetPdfFileVersion(header);
                if (document._version == 0)
                {
                    throw new InvalidOperationException();
                }

                document._irefTable.IsUnderConstruction = true;
                Parser parser = new Parser(document);
                // Read all trailers or cross-reference streams, but no objects.
                document._trailer = parser.ReadTrailer();
                if (document._trailer == null)
                {
                    ParserDiagnostics.ThrowParserException("Invalid PDF file: no trailer found."); // TODO L10N using PSSR.
                }
                Debug.Assert(document._irefTable.IsUnderConstruction);
                document._irefTable.IsUnderConstruction = false;

                // Is document encrypted?
                PdfReference xrefEncrypt = document._trailer.Elements[PdfTrailer.Keys.Encrypt] as PdfReference;
                if (xrefEncrypt != null)
                {
                    //xrefEncrypt.Value = parser.ReadObject(null, xrefEncrypt.ObjectID, false);
                    PdfObject encrypt = parser.ReadObject(null, xrefEncrypt.ObjectID, false, false);

                    encrypt.Reference = xrefEncrypt;
                    xrefEncrypt.Value = encrypt;
                    PdfStandardSecurityHandler securityHandler = document.SecurityHandler;
TryAgain:
                    PasswordValidity validity = securityHandler.ValidatePassword(password);
                    if (validity == PasswordValidity.Invalid)
                    {
                        if (passwordProvider != null)
                        {
                            PdfPasswordProviderArgs args = new PdfPasswordProviderArgs();
                            passwordProvider(args);
                            if (args.Abort)
                            {
                                return(null);
                            }
                            password = args.Password;
                            goto TryAgain;
                        }
                        else
                        {
                            if (password == null)
                            {
                                throw new PdfReaderException();
                            }
                            else
                            {
                                throw new PdfReaderException();
                            }
                        }
                    }
                    else if (validity == PasswordValidity.UserPassword && openmode == PdfDocumentOpenMode.Modify)
                    {
                        if (passwordProvider != null)
                        {
                            PdfPasswordProviderArgs args = new PdfPasswordProviderArgs();
                            passwordProvider(args);
                            if (args.Abort)
                            {
                                return(null);
                            }
                            password = args.Password;
                            goto TryAgain;
                        }
                        else
                        {
                            throw new PdfReaderException();
                        }
                    }
                }
                else
                {
                    if (password != null)
                    {
                        // Password specified but document is not encrypted.
                        // ignore
                    }
                }

                PdfReference[] irefs2 = document._irefTable.AllReferences;
                int            count2 = irefs2.Length;

                // 3rd: Create iRefs for all compressed objects.
                Dictionary <int, object> objectStreams = new Dictionary <int, object>();
                for (int idx = 0; idx < count2; idx++)
                {
                    PdfReference            iref       = irefs2[idx];
                    PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream;
                    if (xrefStream != null)
                    {
                        for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++)
                        {
                            PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2];
                            // Is type xref to compressed object?
                            if (item.Type == 2)
                            {
                                //PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2), (int)item.Field3);
                                //document._irefTable.Add(irefNew);
                                int objectNumber = (int)item.Field2;
                                if (!objectStreams.ContainsKey(objectNumber))
                                {
                                    objectStreams.Add(objectNumber, null);
                                    PdfObjectID objectID = new PdfObjectID((int)item.Field2);
                                    parser.ReadIRefsFromCompressedObject(objectID);
                                }
                            }
                        }
                    }
                }

                // 4th: Read compressed objects.
                for (int idx = 0; idx < count2; idx++)
                {
                    PdfReference            iref       = irefs2[idx];
                    PdfCrossReferenceStream xrefStream = iref.Value as PdfCrossReferenceStream;
                    if (xrefStream != null)
                    {
                        for (int idx2 = 0; idx2 < xrefStream.Entries.Count; idx2++)
                        {
                            PdfCrossReferenceStream.CrossReferenceStreamEntry item = xrefStream.Entries[idx2];
                            // Is type xref to compressed object?
                            if (item.Type == 2)
                            {
                                PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2),
                                                                                   (int)item.Field3);
                                Debug.Assert(document._irefTable.Contains(iref.ObjectID));
                                //document._irefTable.Add(irefNew);
                            }
                        }
                    }
                }


                PdfReference[] irefs = document._irefTable.AllReferences;
                int            count = irefs.Length;

                // Read all indirect objects.
                for (int idx = 0; idx < count; idx++)
                {
                    PdfReference iref = irefs[idx];
                    if (iref.Value == null)
                    {
#if DEBUG_
                        if (iref.ObjectNumber == 1074)
                        {
                            iref.GetType();
                        }
#endif
                        try
                        {
                            Debug.Assert(document._irefTable.Contains(iref.ObjectID));
                            PdfObject pdfObject = parser.ReadObject(null, iref.ObjectID, false, false);
                            Debug.Assert(pdfObject.Reference == iref);
                            pdfObject.Reference = iref;
                            Debug.Assert(pdfObject.Reference.Value != null, "Something went wrong.");
                        }
                        catch (Exception ex)
                        {
                            Debug.WriteLine(ex.Message);
                            // 4STLA rethrow exception to notify caller.
                            throw;
                        }
                    }
                    else
                    {
                        Debug.Assert(document._irefTable.Contains(iref.ObjectID));
                        //iref.GetType();
                    }
                    // Set maximum object number.
                    document._irefTable._maxObjectNumber = Math.Max(document._irefTable._maxObjectNumber,
                                                                    iref.ObjectNumber);
                }

                // Encrypt all objects.
                if (xrefEncrypt != null)
                {
                    document.SecurityHandler.EncryptDocument();
                }

                // Fix references of trailer values and then objects and irefs are consistent.
                document._trailer.Finish();

#if DEBUG_
                // Some tests...
                PdfReference[] reachables = document.xrefTable.TransitiveClosure(document.trailer);
                reachables.GetType();
                reachables = document.xrefTable.AllXRefs;
                document.xrefTable.CheckConsistence();
#endif

                if (openmode == PdfDocumentOpenMode.Modify)
                {
                    // Create new or change existing document IDs.
                    if (document.Internals.SecondDocumentID == "")
                    {
                        document._trailer.CreateNewDocumentIDs();
                    }
                    else
                    {
                        byte[] agTemp = Guid.NewGuid().ToByteArray();
                        document.Internals.SecondDocumentID = PdfEncoders.RawEncoding.GetString(agTemp, 0, agTemp.Length);
                    }

                    // Change modification date

                    // Remove all unreachable objects
                    int removed = document._irefTable.Compact();
                    if (removed != 0)
                    {
                        Debug.WriteLine("Number of deleted unreachable objects: " + removed);
                    }

                    // Force flattening of page tree
                    PdfPages pages = document.Pages;
                    Debug.Assert(pages != null);

                    //bool b = document.irefTable.Contains(new PdfObjectID(1108));
                    //b.GetType();

                    document._irefTable.CheckConsistence();
                    document._irefTable.Renumber();
                    document._irefTable.CheckConsistence();
                }
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                throw;
            }
            return(document);
        }
Exemplo n.º 5
0
        ///// <summary>
        ///// Imports an object and its transitive closure to the specified document.
        ///// </summary>
        /// <param name="importedObjectTable">The imported object table of the owner for the external document.</param>
        /// <param name="owner">The document that owns the cloned objects.</param>
        /// <param name="externalObject">The root object to be cloned.</param>
        /// <returns>The clone of the root object</returns>
        internal static PdfObject ImportClosure(PdfImportedObjectTable importedObjectTable, PdfDocument owner, PdfObject externalObject)
        {
            Debug.Assert(ReferenceEquals(importedObjectTable.Owner, owner), "importedObjectTable does not belong to the owner.");
            Debug.Assert(ReferenceEquals(importedObjectTable.ExternalDocument, externalObject.Owner),
                         "The ExternalDocument of the importedObjectTable does not belong to the owner of object to be imported.");

            // Get transitive closure of external object
            PdfObject[] elements = externalObject.Owner.Internals.GetClosure(externalObject);
            int         count    = elements.Length;

#if DEBUG_
            for (int idx = 0; idx < count; idx++)
            {
                Debug.Assert(elements[idx].XRef != null);
                Debug.Assert(elements[idx].XRef.Document != null);
                Debug.Assert(elements[idx].Document != null);
                if (elements[idx].ObjectID.ObjectNumber == 12)
                {
                    GetType();
                }
            }
#endif
            // 1st loop. Already imported objects are reused and new ones are cloned.
            for (int idx = 0; idx < count; idx++)
            {
                PdfObject obj = elements[idx];
                Debug.Assert(!ReferenceEquals(obj.Owner, owner));

                if (importedObjectTable.Contains(obj.ObjectID))
                {
                    // External object was already imported
                    PdfReference iref = importedObjectTable[obj.ObjectID];
                    Debug.Assert(iref != null);
                    Debug.Assert(iref.Value != null);
                    Debug.Assert(iref.Document == owner);
                    // replace external object by the already cloned counterpart
                    elements[idx] = iref.Value;
                }
                else
                {
                    // External object was not imported ealier and must be cloned
                    PdfObject clone = obj.Clone();
                    Debug.Assert(clone.Reference == null);
                    clone.Document = owner;
                    if (obj.Reference != null)
                    {
                        // Case: The cloned object was an indirect object.
                        // add clone to new owner document
                        owner.irefTable.Add(clone);
                        Debug.Assert(clone.Reference != null);
                        // save an association from old object identifier to new iref
                        importedObjectTable.Add(obj.ObjectID, clone.Reference);
                    }
                    else
                    {
                        // Case: The cloned object was a direct object.
                        // only the root object can be a direct object
                        Debug.Assert(idx == 0);
                        //// add it to this (the importer) document
                        //owner.irefTable.Add(clone);
                        //Debug.Assert(clone.Reference != null);
                    }
                    // replace external object by its clone
                    elements[idx] = clone;
                }
            }
#if DEBUG_
            for (int idx = 0; idx < count; idx++)
            {
                Debug.Assert(elements[idx].XRef != null);
                Debug.Assert(elements[idx].XRef.Document != null);
                Debug.Assert(elements[idx].Document != null);
                if (resources[idx].ObjectID.ObjectNumber == 12)
                {
                    GetType();
                }
            }
#endif

            // 2nd loop. Fix up indirect references that still refers to the external document.
            for (int idx = 0; idx < count; idx++)
            {
                PdfObject obj = elements[idx];
                Debug.Assert(owner != null);
                FixUpObject(importedObjectTable, importedObjectTable.Owner, obj);
            }

            // return the imported root object
            return(elements[0]);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Implements saving a PDF file.
        /// </summary>
        void DoSave(PdfWriter writer)
        {
            if (_pages == null || _pages.Count == 0)
            {
                if (_outStream != null)
                {
                    // Give feedback if the wrong constructor was used.
                    throw new InvalidOperationException("Cannot save a PDF document with no pages. Do not use \"public PdfDocument(string filename)\" or \"public PdfDocument(Stream outputStream)\" if you want to open an existing PDF document from a file or stream; use PdfReader.Open() for that purpose.");
                }
                throw new InvalidOperationException("Cannot save a PDF document with no pages.");
            }

            try
            {
                // HACK: Remove XRefTrailer
                if (_trailer is PdfCrossReferenceStream)
                {
                    _trailer = new PdfTrailer((PdfCrossReferenceStream)_trailer);
                }

                bool encrypt = _securitySettings.DocumentSecurityLevel != PdfDocumentSecurityLevel.None;
                if (encrypt)
                {
                    PdfStandardSecurityHandler securityHandler = _securitySettings.SecurityHandler;
                    if (securityHandler.Reference == null)
                    {
                        _irefTable.Add(securityHandler);
                    }
                    else
                    {
                        Debug.Assert(_irefTable.Contains(securityHandler.ObjectID));
                    }
                    _trailer.Elements[PdfTrailer.Keys.Encrypt] = _securitySettings.SecurityHandler.Reference;
                }
                else
                {
                    _trailer.Elements.Remove(PdfTrailer.Keys.Encrypt);
                }

                PrepareForSave();

                if (encrypt)
                {
                    _securitySettings.SecurityHandler.PrepareEncryption();
                }

                writer.WriteFileHeader(this);
                PdfReference[] irefs = _irefTable.AllReferences;
                int            count = irefs.Length;
                for (int idx = 0; idx < count; idx++)
                {
                    PdfReference iref = irefs[idx];
                    iref.Position = writer.Position;
                    iref.Value.WriteObject(writer);
                }
                int startxref = writer.Position;
                _irefTable.WriteObject(writer);
                writer.WriteRaw("trailer\n");
                _trailer.Elements.SetInteger("/Size", count + 1);
                _trailer.WriteObject(writer);
                writer.WriteEof(this, startxref);

                //if (encrypt)
                //{
                //  state &= ~DocumentState.SavingEncrypted;
                //  //_securitySettings.SecurityHandler.EncryptDocument();
                //}
            }
            finally
            {
                if (writer != null)
                {
                    writer.Stream.Flush();
                    // DO NOT CLOSE WRITER HERE
                    //writer.Close();
                }
            }
        }
Exemplo n.º 7
0
        public int AddSubBookmarksFromFile(ref PdfBookmark bm, int page, int level, PdfDictionary outline)
        {
            int bmCount = 0;

            PdfDictionary mark = outline;

            // get titles and pages of all bookmarks at this level
            while (mark != null)
            {
                int level_offset = 0;

                // get the title
                string title = mark.Elements.GetString("/Title");
                if (title == null)
                {
                    return(bmCount);
                }

                // direct
                PdfObject dest = mark.Elements.GetObject("/Dest");

                // indirect
                if (dest == null)
                {
                    PdfDictionary @ref = (PdfDictionary)mark.Elements.GetObject("/A");
                    if (@ref != null)
                    {
                        // if null this is a blind bookmark
                        dest = (PdfArray)@ref.Elements.GetObject("/D");
                    }
                }

                // indirectly named (per 1.6 sample from acrobat website)
                // used for multiple bookmarks on same page
                if (dest == null)
                {
                    dest = this.GetNamedDestination(mark);
                }

                // add the bookmark. if it isn't a blind bookmark
                if (dest != null)
                {
                    PdfReference pref = null;
                    if (dest is PdfArray)
                    {
                        PdfArray pDest = (PdfArray)dest;
                        if (pDest.Elements[0] is PdfReference)
                        {
                            pref = (PdfReference)pDest.Elements[0];
                        }
                    }

                    if (pref != null)
                    {
                        // convert page to offset
                        int page_offset = -1;
                        for (int x = 0; x < this.inputDocument.Pages.Count; ++x)
                        {
                            PdfReference pageRef = (PdfReference)this.inputDocument.Pages.PagesArray.Elements[x];
                            if (pageRef == pref)
                            {
                                page_offset = x;
                                break;
                            }
                        }

                        // check if this page is being added or not
                        bool included = false;
                        if (this.lastPageList == null)
                        {
                            // null means all pages
                            included = true;
                        }
                        else
                        {
                            // check if page needs to be included
                            for (int x = 0; x < this.lastPageList.Length; ++x)
                            {
                                if (this.lastPageList[x] == page_offset)
                                {
                                    included    = true;
                                    page_offset = x;
                                    break;
                                }
                            }
                        }

                        // to do - show a warning for bad bookmark?
                        // if (page_offset == -1)
                        //    throw new Exception(string.Format(
                        //        "Warning: Bookmark '{0}' refers to page object {1} which does not exist, bookmark will be ignored", title, pref.ObjectNumber));
                        if (page_offset != -1 && included == true)
                        {
                            bm.AddBookmark(title, page_offset + page, level);
                            ++bmCount;
                            level_offset = 1;
                        }
                    }
                }

                // if this bookmark has children, add them recursively
                PdfDictionary child = (PdfDictionary)mark.Elements.GetObject("/First");
                if (child != null)
                {
                    bmCount += this.AddSubBookmarksFromFile(ref bm, page, level + level_offset, child);
                }

                // get the next mark
                mark = (PdfDictionary)mark.Elements.GetObject("/Next");
            }

            return(bmCount);
        }
Exemplo n.º 8
0
 /**
  * <summary>Unregisters an internal object.</summary>
  */
 public void Unregister(
     PdfReference reference
     )
 {
     indirectObjects.RemoveAt(reference.ObjectNumber);
 }
Exemplo n.º 9
0
        /**
         * <summary>Creates the font descriptor.</summary>
         */
        private PdfReference Load_CreateFontDescriptor(
            OpenFontParser parser
            )
        {
            PdfDictionary fontDescriptor = new PdfDictionary();

            {
                OpenFontParser.FontMetrics metrics = parser.Metrics;

                // Type.
                fontDescriptor[PdfName.Type] = PdfName.FontDescriptor;
                // FontName.
                fontDescriptor[PdfName.FontName] = BaseDataObject[PdfName.BaseFont];
                // Flags [PDF:1.6:5.7.1].
                FlagsEnum flags = 0;
                if (metrics.IsFixedPitch)
                {
                    flags |= FlagsEnum.FixedPitch;
                }
                if (metrics.IsCustomEncoding)
                {
                    flags |= FlagsEnum.Symbolic;
                }
                else
                {
                    flags |= FlagsEnum.Nonsymbolic;
                }
                fontDescriptor[PdfName.Flags] = new PdfInteger(Convert.ToInt32(flags));
                // FontBBox.
                fontDescriptor[PdfName.FontBBox] = new Rectangle(
                    new drawing::PointF(metrics.XMin * metrics.UnitNorm, metrics.YMin * metrics.UnitNorm),
                    new drawing::PointF(metrics.XMax * metrics.UnitNorm, metrics.YMax * metrics.UnitNorm)
                    ).BaseDataObject;
                // ItalicAngle.
                fontDescriptor[PdfName.ItalicAngle] = new PdfReal(metrics.ItalicAngle);
                // Ascent.
                fontDescriptor[PdfName.Ascent] = new PdfReal(
                    metrics.Ascender == 0
            ? metrics.STypoAscender * metrics.UnitNorm
            : metrics.Ascender * metrics.UnitNorm
                    );
                // Descent.
                fontDescriptor[PdfName.Descent] = new PdfReal(
                    metrics.Descender == 0
            ? metrics.STypoDescender * metrics.UnitNorm
            : metrics.Descender * metrics.UnitNorm
                    );
                // Leading.
                fontDescriptor[PdfName.Leading] = new PdfReal(metrics.STypoLineGap * metrics.UnitNorm);
                // CapHeight.
                fontDescriptor[PdfName.CapHeight] = new PdfReal(metrics.SCapHeight * metrics.UnitNorm);
                // StemV.

                /*
                 * NOTE: '100' is just a rule-of-thumb value, 'cause I've still to solve the
                 * 'cvt' table puzzle (such a harsh headache!) for TrueType fonts...
                 * TODO:IMPL TrueType and CFF stemv real value to extract!!!
                 */
                fontDescriptor[PdfName.StemV] = new PdfInteger(100);
                // FontFile.
                //TODO:IMPL distinguish between truetype (FontDescriptor.FontFile2) and opentype (FontDescriptor.FontFile3 and FontStream.subtype=OpenType)!!!
                PdfReference fontFileReference = File.Register(
                    new PdfStream(
                        new PdfDictionary(
                            new PdfName[] { PdfName.Subtype },
                            new PdfDirectObject[] { PdfName.OpenType }
                            ),
                        new bytes::Buffer(parser.FontData.ToByteArray())
                        )
                    );
                fontDescriptor[PdfName.FontFile3] = fontFileReference;
            }
            return(File.Register(fontDescriptor));
        }
Exemplo n.º 10
0
 private void WriteReference(PdfReference pdfReference)
 {
     AsciiToOutput($"{pdfReference.ObjectNumber} {pdfReference.Generation} R");
 }
Exemplo n.º 11
0
        protected override string DoScan(string filename, LoggingSection log)
        {
            PdfDocument   pdfDocument   = PdfReader.Open(filename);
            StringBuilder stringBuilder = new StringBuilder();

            for (int pageIndex = 0; pageIndex < pdfDocument.PageCount; pageIndex++)
            {
                log.Verbose($"Scanning page {pageIndex + 1} of {pdfDocument.PageCount}");
                PdfPage pdfPage = pdfDocument.Pages[pageIndex];
                //Extract text from text elements
                stringBuilder.Append($"{ExtractTextFromPdfPage(pdfPage)}{Environment.NewLine}");

                //Extract text from image elements with Tesseract OCR - awesome! :)
                PdfDictionary resources = pdfPage.Elements.GetDictionary("/Resources");
                if (resources != null)
                {
                    PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
                    if (xObjects != null)
                    {
                        ICollection <PdfItem> items = xObjects.Elements.Values;
                        foreach (PdfItem item in items)
                        {
                            PdfReference reference = item as PdfReference;
                            if (reference != null)
                            {
                                PdfDictionary xObject = reference.Value as PdfDictionary;
                                if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                                {
                                    Bitmap bitmap = PdfImageToBitmap(xObject);
                                    if (bitmap == null)
                                    {
                                        log.Error("Could not extract bitmap from PDF image element. Seems like the PDF image filter type is not supported. Skipping element!");
                                        continue;
                                    }
                                    log.Debug("Rotating image");
                                    bitmap.RotateFlip(RotateFlipType.Rotate90FlipNone);
                                    log.Debug("Upscaling image 2x");
                                    BitmapUtils.Scale(ref bitmap, 2);
                                    log.Debug("Grayscaling image");
                                    BitmapUtils.GrayscaleWithLockBits(bitmap);
                                    log.Debug("Denoising image");
                                    BitmapUtils.DenoiseWithLockBits(bitmap);
                                    log.Debug("Applying OCR on image");
                                    Pix             pix             = PixConverter.ToPix(bitmap);
                                    TesseractEngine tesseractEngine = Services.OCRProvider.AwaitResource();
                                    Page            tesseractPage   = tesseractEngine.Process(pix);
                                    try
                                    {
                                        string text = tesseractPage.GetText();
                                        log.Debug($"Text is {text.Length} characters long");
                                        if (!string.IsNullOrWhiteSpace(text) && text != "\n")
                                        {
                                            stringBuilder.Append(text.Replace("\n", " "));
                                        }
                                    }
                                    catch (InvalidOperationException e)
                                    {
                                        log.Error($"OCR failed on Page {pageIndex} of file {filename}:\n{e.StackTrace}");
                                    }
                                    Services.OCRProvider.Feed(tesseractEngine);
                                    pix.Dispose();
                                }
                            }
                        }
                    }
                }
                stringBuilder.Append("\n");
            }

            log.Debug("Trimming text");
            string documentText = stringBuilder.ToString();

            documentText = documentText.Trim();
            while (documentText.Contains("  "))
            {
                documentText = documentText.Replace("  ", " ");
            }
            while (documentText.Contains("\n\n"))
            {
                documentText = documentText.Replace("\n\n", "\n");
            }
            return(stringBuilder.ToString());
        }
Exemplo n.º 12
0
        // PDF documents are based internally on objects like dictionaries, arrays,
        // streams etc. This sample shows how to work directly on these underlying
        // PDF objects. Use this functionality to achieve PDF features that are not
        // yet implemented in PDFsharp, e.g. adding an 'open action' to a document.
        protected override void DoWork()
        {
            // Get a fresh copy of the sample PDF file
            const string filename = "Portable Document Format.pdf";

            File_Copy(Path.Combine("AltData/PDFsharp/PDFs/", filename),
                      Path.Combine(Directory.GetCurrentDirectory(), filename), true);

            // Read document into memory for modification
            PdfDocument document = PdfReader.Open(filename);

            // The current version of PDFsharp doesn't support the concept of
            // 'actions'. Actions will come in a future version, but if you need them
            // now, you can have them 'handmade'.
            //
            // This sample works on PDF objects directly, therefore some knowledge of
            // the structure of PDF is required.
            // If you are not familiar with the portable document format, first read
            // at least chapter 3 in Adobe's PDF Reference
            // (http://partners.adobe.com/public/developer/pdf/index_reference.html).
            // If you can read German, I recommend chapter 12 of 'Die PostScript &
            // PDF-Bibel', a much more interesting reading than the bone-dry Adobe
            // books (http://www.pdflib.com/de/produkte/mehr/bibel/index.html).
            //
            // The sample task is to add an 'open action' to the document so that it
            // starts with the content of page 3 magnified just enough to fit the
            // Height of the page within the window.

            // First we have to create a new dictionary that defines the action.
            PdfDictionary dict = new PdfDictionary(document);

            // According to the PDF Reference the dictionary requires two elements.
            // A key /S that specifies the 'GoTo' action,
            // and a key /D that describes the destination.

            // Adding a name as value of key /S is easy.
            dict.Elements["/S"] = new PdfName("/GoTo");

            // The destination is described by an array.
            PdfArray array = new PdfArray(document);

            // Set the array as the value of key /D.
            // This makes the array a direct object of the dictionary.
            dict.Elements["/D"] = array;

            // Now add the elements to the array. According to the PDF Reference it
            // must be three for a page as the target of a 'GoTo' action.
            // The first element is an indirect reference to the destination page.
            // To add an indirect reference to the page three, we first need the
            // PdfReference object of that page.
            // (The index in the Pages collection is zero based, therefore Pages[2])
            PdfReference iref = PdfInternals.GetReference(document.Pages[2]);

            // Add the reference to the third page as the first array element.
            // Adding the iref (instead of the PdfPage object itself) makes it an
            // indirect reference.
            array.Elements.Add(iref);

            // The second element is the name /FitV to indicate 'fit vertically'.
            array.Elements.Add(new PdfName("/FitV"));

            // /FitV requires the horizontal coordinate that will be positioned at the
            // left edge of the window. We set -32768 because Acrobat uses this value
            // to show the full page (it means 'left aligned' anyway if the window is
            // so small that a horizontal scroll bar is required).
            array.Elements.Add(new PdfInteger(-32768));

            // Now that the action dictionary is complete, we can add it to the
            // document's object table.
            // Adding an object to the object table makes it an indirect object.
            document.Internals.AddObject(dict);

            // Finally we must add the action dictionary to the /OpenAction key of
            // the document's catalog as an indirect value.
            document.Internals.Catalog.Elements["/OpenAction"] =
                PdfInternals.GetReference(dict);

            // Using PDFsharp we never deal with object numbers. We simply put the
            // objects together and the PDFsharp framework does the rest.

            // Save the document...
            document.Save(filename);
            // ...and start a viewer.
            Diagnostics.ProcessHelper.Start(filename);
        }
Exemplo n.º 13
0
        /// <summary>
        /// Replace all indirect references to external objects by their cloned counterparts
        /// owned by the importer document.
        /// </summary>
        internal static void FixUpObject(PdfImportedObjectTable iot, PdfDocument owner, PdfObject value)
        {
            Debug.Assert(ReferenceEquals(iot.Owner, owner));

            PdfDictionary dict;
            PdfArray      array;

            if ((dict = value as PdfDictionary) != null)
            {
                // Set document for cloned direct objects
                if (dict.Owner == null)
                {
                    dict.Document = owner;
                }
                else
                {
                    Debug.Assert(dict.Owner == owner);
                }

                // Search for indirect references in all keys
                PdfName[] names = dict.Elements.KeyNames;
                foreach (PdfName name in names)
                {
                    PdfItem item = dict.Elements[name];
                    // Is item an iref?
                    PdfReference iref = item as PdfReference;
                    if (iref != null)
                    {
                        // Does the iref already belongs to the owner?
                        if (iref.Document == owner)
                        {
                            // Yes: fine. Happens when an already cloned object is reused.
                            continue;
                        }
                        else
                        {
                            //Debug.Assert(iref.Document == iot.Document);
                            // No: replace with iref of cloned object
                            PdfReference newXRef = iot[iref.ObjectID];
                            Debug.Assert(newXRef != null);
                            Debug.Assert(newXRef.Document == owner);
                            dict.Elements[name] = newXRef;
                        }
                    }
                    else if (item is PdfObject)
                    {
                        // Fix up inner objects
                        FixUpObject(iot, owner, (PdfObject)item);
                    }
                }
            }
            else if ((array = value as PdfArray) != null)
            {
                // Set document for cloned direct objects
                if (array.Owner == null)
                {
                    array.Document = owner;
                }
                else
                {
                    Debug.Assert(array.Owner == owner);
                }

                // Search for indirect references in all array elements
                int count = array.Elements.Count;
                for (int idx = 0; idx < count; idx++)
                {
                    PdfItem item = array.Elements[idx];
                    // Is item an iref?
                    PdfReference iref = item as PdfReference;
                    if (iref != null)
                    {
                        // Does the iref already belongs to the owner?
                        if (iref.Document == owner)
                        {
                            // Yes: fine. Happens when an already cloned object is reused.
                            continue;
                        }
                        else
                        {
                            Debug.Assert(iref.Document == iot.ExternalDocument);
                            // No: replace with iref of cloned object
                            PdfReference newXRef = iot[iref.ObjectID];
                            Debug.Assert(newXRef != null);
                            Debug.Assert(newXRef.Document == owner);
                            array.Elements[idx] = newXRef;
                        }
                    }
                    else if (item is PdfObject)
                    {
                        // Fix up inner objects
                        FixUpObject(iot, owner, (PdfObject)item);
                    }
                }
            }
        }
Exemplo n.º 14
0
 public void Write(PdfReference iref)
 {
     WriteSeparator(CharCat.Character);
     WriteRaw(iref.ToString());
     this.lastCat = CharCat.Character;
 }
Exemplo n.º 15
0
 /**
  * <summary>Wraps a function reference into a function object.</summary>
  * <param name="reference">Reference to a function object.</param>
  * <returns>Function object associated to the reference.</returns>
  */
 public static Function Wrap(
     PdfReference reference
     )
 {
     return(Wrap(reference, null));
 }
Exemplo n.º 16
0
        /// <summary>
        /// Inserts  pages of the specified document into this document.
        /// </summary>
        /// <param name="index">The index in this document where to insert the page .</param>
        /// <param name="document">The document to be inserted.</param>
        /// <param name="startIndex">The index of the first page to be inserted.</param>
        /// <param name="pageCount">The number of pages to be inserted.</param>
        public void InsertRange(int index, PdfDocument document, int startIndex, int pageCount)
        {
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }

            if (index < 0 || index > Count)
            {
                throw new ArgumentOutOfRangeException("index", "Argument 'index' out of range.");
            }

            int importDocumentPageCount = document.PageCount;

            if (startIndex < 0 || startIndex + pageCount > importDocumentPageCount)
            {
                throw new ArgumentOutOfRangeException("startIndex", "Argument 'startIndex' out of range.");
            }

            if (pageCount > importDocumentPageCount)
            {
                throw new ArgumentOutOfRangeException("pageCount", "Argument 'pageCount' out of range.");
            }

            PdfPage[] insertPages = new PdfPage[pageCount];
            PdfPage[] importPages = new PdfPage[pageCount];

            // 1st create all new pages.
            for (int idx = 0, insertIndex = index, importIndex = startIndex;
                 importIndex < startIndex + pageCount;
                 idx++, insertIndex++, importIndex++)
            {
                PdfPage importPage = document.Pages[importIndex];
                PdfPage page       = ImportExternalPage(importPage);
                insertPages[idx] = page;
                importPages[idx] = importPage;

                Owner._irefTable.Add(page);

                // Add page substitute to importedObjectTable.
                PdfImportedObjectTable importedObjectTable = Owner.FormTable.GetImportedObjectTable(importPage);
                importedObjectTable.Add(importPage.ObjectID, page.Reference);

                PagesArray.Elements.Insert(insertIndex, page.Reference);

                if (Owner.Settings.TrimMargins.AreSet)
                {
                    page.TrimMargins = Owner.Settings.TrimMargins;
                }
            }
            Elements.SetInteger(Keys.Count, PagesArray.Elements.Count);

            // 2nd copy link annotations that are in the range of the imported pages.
            for (int idx = 0, importIndex = startIndex;
                 importIndex < startIndex + pageCount;
                 idx++, importIndex++)
            {
                PdfPage importPage = document.Pages[importIndex];
                PdfPage page       = insertPages[idx];

                // Get annotations.
                PdfArray annots = importPage.Elements.GetArray(PdfPage.Keys.Annots);
                if (annots != null)
                {
                    PdfAnnotations annotations = new PdfAnnotations(Owner);

                    // Loop through annotations.
                    int count = annots.Elements.Count;
                    for (int idxAnnotation = 0; idxAnnotation < count; idxAnnotation++)
                    {
                        PdfDictionary annot = annots.Elements.GetDictionary(idxAnnotation);
                        if (annot != null)
                        {
                            string subtype = annot.Elements.GetString(PdfAnnotation.Keys.Subtype);
                            if (subtype == "/Link")
                            {
                                bool addAnnotation = false;
                                PdfLinkAnnotation newAnnotation = new PdfLinkAnnotation(Owner);

                                PdfName[] importAnnotationKeyNames = annot.Elements.KeyNames;
                                foreach (PdfName pdfItem in importAnnotationKeyNames)
                                {
                                    PdfItem impItem;
                                    switch (pdfItem.Value)
                                    {
                                    case "/BS":
                                        newAnnotation.Elements.Add("/BS", new PdfLiteral("<</W 0>>"));
                                        break;

                                    case "/F":      // /F 4
                                        impItem = annot.Elements.GetValue("/F");
                                        Debug.Assert(impItem is PdfInteger);
                                        newAnnotation.Elements.Add("/F", impItem.Clone());
                                        break;

                                    case "/Rect":      // /Rect [68.6 681.08 145.71 702.53]
                                        impItem = annot.Elements.GetValue("/Rect");
                                        Debug.Assert(impItem is PdfArray);
                                        newAnnotation.Elements.Add("/Rect", impItem.Clone());
                                        break;

                                    case "/StructParent":      // /StructParent 3
                                        impItem = annot.Elements.GetValue("/StructParent");
                                        Debug.Assert(impItem is PdfInteger);
                                        newAnnotation.Elements.Add("/StructParent", impItem.Clone());
                                        break;

                                    case "/Subtype":      // Already set.
                                        break;

                                    case "/Dest":      // /Dest [30 0 R /XYZ 68 771 0]
                                        impItem = annot.Elements.GetValue("/Dest");
                                        impItem = impItem.Clone();

                                        // Is value an array with 5 elements where the first one is an iref?
                                        PdfArray destArray = impItem as PdfArray;
                                        if (destArray != null && destArray.Elements.Count == 5)
                                        {
                                            PdfReference iref = destArray.Elements[0] as PdfReference;
                                            if (iref != null)
                                            {
                                                iref = RemapReference(insertPages, importPages, iref);
                                                if (iref != null)
                                                {
                                                    destArray.Elements[0] = iref;
                                                    newAnnotation.Elements.Add("/Dest", destArray);
                                                    addAnnotation = true;
                                                }
                                            }
                                        }
                                        break;

                                    default:
//#if DEBUG_
//                                            Debug-Break.Break(true);
//#endif
                                        break;
                                    }
                                }
                                // Add newAnnotations only it points to an imported page.
                                if (addAnnotation)
                                {
                                    annotations.Add(newAnnotation);
                                }
                            }
                        }
                    }

                    // At least one link annotation found?
                    if (annotations.Count > 0)
                    {
                        //Owner._irefTable.Add(annotations);
                        page.Elements.Add(PdfPage.Keys.Annots, annotations);
                    }
                }
            }
        }
Exemplo n.º 17
0
        /**
         * <summary>Wraps a font reference into a font object.</summary>
         * <param name="baseObject">Font base object.</param>
         * <returns>Font object associated to the reference.</returns>
         */
        public static Font Wrap(
            PdfDirectObject baseObject
            )
        {
            if (baseObject == null)
            {
                return(null);
            }

            PdfReference reference = (PdfReference)baseObject;
            {
                // Has the font been already instantiated?

                /*
                 * NOTE: Font structures are reified as complex objects, both IO- and CPU-intensive to load.
                 * So, it's convenient to retrieve them from a common cache whenever possible.
                 */
                Dictionary <PdfReference, object> cache = reference.IndirectObject.File.Document.Cache;
                if (cache.ContainsKey(reference))
                {
                    return((Font)cache[reference]);
                }
            }

            PdfDictionary fontDictionary = (PdfDictionary)reference.DataObject;
            PdfName       fontType       = (PdfName)fontDictionary[PdfName.Subtype];

            if (fontType == null)
            {
                throw new Exception("Font type undefined (reference: " + reference + ")");
            }

            if (fontType.Equals(PdfName.Type1))                          // Type 1.
            {
                if (!fontDictionary.ContainsKey(PdfName.FontDescriptor)) // Standard Type 1.
                {
                    return(new StandardType1Font(reference));
                }
                else // Custom Type 1.
                {
                    PdfDictionary fontDescriptor = (PdfDictionary)fontDictionary.Resolve(PdfName.FontDescriptor);
                    if (fontDescriptor.ContainsKey(PdfName.FontFile3) &&
                        ((PdfName)((PdfStream)fontDescriptor.Resolve(PdfName.FontFile3)).Header.Resolve(PdfName.Subtype)).Equals(PdfName.OpenType)) // OpenFont/CFF.
                    {
                        throw new NotImplementedException();
                    }
                    else // Non-OpenFont Type 1.
                    {
                        return(new Type1Font(reference));
                    }
                }
            }
            else if (fontType.Equals(PdfName.TrueType)) // TrueType.
            {
                return(new TrueTypeFont(reference));
            }
            else if (fontType.Equals(PdfName.Type0)) // OpenFont.
            {
                PdfDictionary cidFontDictionary = (PdfDictionary)((PdfArray)fontDictionary.Resolve(PdfName.DescendantFonts)).Resolve(0);
                PdfName       cidFontType       = (PdfName)cidFontDictionary[PdfName.Subtype];
                if (cidFontType.Equals(PdfName.CIDFontType0)) // OpenFont/CFF.
                {
                    return(new Type0Font(reference));
                }
                else if (cidFontType.Equals(PdfName.CIDFontType2)) // OpenFont/TrueType.
                {
                    return(new Type2Font(reference));
                }
                else
                {
                    throw new NotImplementedException("Type 0 subtype " + cidFontType + " not supported yet.");
                }
            }
            else if (fontType.Equals(PdfName.Type3)) // Type 3.
            {
                return(new Type3Font(reference));
            }
            else if (fontType.Equals(PdfName.MMType1)) // MMType1.
            {
                return(new MMType1Font(reference));
            }
            else // Unknown.
            {
                throw new NotSupportedException("Unknown font type: " + fontType + " (reference: " + reference + ")");
            }
        }
Exemplo n.º 18
0
 public void Write(PdfReference iref)
 {
   WriteSeparator(CharCat.Character);
   WriteRaw(iref.ToString());
   this.lastCat = CharCat.Character;
 }
Exemplo n.º 19
0
        /// <summary>
        /// Replace all indirect references to external objects by their cloned counterparts
        /// owned by the importer document.
        /// </summary>
        static void FixUpObject(PdfImportedObjectTable iot, PdfDocument owner, PdfObject value)
        {
            Debug.Assert(ReferenceEquals(iot.Owner, owner));

            PdfDictionary dict;
            PdfArray      array;

            if ((dict = value as PdfDictionary) != null)
            {
                // Case: The object is a dictionary.
                // Set document for cloned direct objects.
                if (dict.Owner == null)
                {
                    // If the dictionary has not yet an owner set the owner to the importing document.
                    dict.Document = owner;
                }
                else
                {
                    // If the dictionary already has an owner it must be the importing document.
                    Debug.Assert(dict.Owner == owner);
                }

                // Search for indirect references in all dictionary elements.
                PdfName[] names = dict.Elements.KeyNames;
                foreach (PdfName name in names)
                {
                    PdfItem item = dict.Elements[name];
                    Debug.Assert(item != null, "A dictionary element cannot be null.");

                    // Is item an iref?
                    PdfReference iref = item as PdfReference;
                    if (iref != null)
                    {
                        // Case: The item is a reference.
                        // Does the iref already belongs to the new owner?
                        if (iref.Document == owner)
                        {
                            // Yes: fine. Happens when an already cloned object is reused.
                            continue;
                        }

                        //Debug.Assert(iref.Document == iot.Document);
                        // No: Replace with iref of cloned object.
                        PdfReference newXRef = iot[iref.ObjectID];  // TODO: Explain this line of code in all details.
                        Debug.Assert(newXRef != null);
                        Debug.Assert(newXRef.Document == owner);
                        dict.Elements[name] = newXRef;
                    }
                    else
                    {
                        // Case: The item is not a reference.
                        // If item is an object recursively fix its inner items.
                        PdfObject pdfObject = item as PdfObject;
                        if (pdfObject != null)
                        {
                            // Fix up inner objects, i.e. recursively walk down the object tree.
                            FixUpObject(iot, owner, pdfObject);
                        }
                        else
                        {
                            // The item is something else, e.g. a name.
                            // Nothing to do.

                            // ...but let's double check this case in DEBUG build.
                            DebugCheckNonObjects(item);
                        }
                    }
                }
            }
            else if ((array = value as PdfArray) != null)
            {
                // Case: The object is an array.
                // Set document for cloned direct objects.
                if (array.Owner == null)
                {
                    // If the array has not yet an owner set the owner to the importing document.
                    array.Document = owner;
                }
                else
                {
                    // If the array already has an owner it must be the importing document.
                    Debug.Assert(array.Owner == owner);
                }

                // Search for indirect references in all array elements.
                int count = array.Elements.Count;
                for (int idx = 0; idx < count; idx++)
                {
                    PdfItem item = array.Elements[idx];
                    Debug.Assert(item != null, "An array element cannot be null.");

                    // Is item an iref?
                    PdfReference iref = item as PdfReference;
                    if (iref != null)
                    {
                        // Case: The item is a reference.
                        // Does the iref already belongs to the owner?
                        if (iref.Document == owner)
                        {
                            // Yes: fine. Happens when an already cloned object is reused.
                            continue;
                        }

                        // No: replace with iref of cloned object.
                        Debug.Assert(iref.Document == iot.ExternalDocument);
                        PdfReference newXRef = iot[iref.ObjectID];
                        Debug.Assert(newXRef != null);
                        Debug.Assert(newXRef.Document == owner);
                        array.Elements[idx] = newXRef;
                    }
                    else
                    {
                        // Case: The item is not a reference.
                        // If item is an object recursively fix its inner items.
                        PdfObject pdfObject = item as PdfObject;
                        if (pdfObject != null)
                        {
                            // Fix up inner objects, i.e. recursively walk down the object tree.
                            FixUpObject(iot, owner, pdfObject);
                        }
                        else
                        {
                            // The item is something else, e.g. a name.
                            // Nothing to do.

                            // ...but let's double check this case in DEBUG build.
                            DebugCheckNonObjects(item);
                        }
                    }
                }
            }
            else
            {
                // Case: The item is some other indirect object.
                // Indirect integers, booleans, etc. are allowed, but PDFsharp do not create them.
                // If such objects occur in imported PDF files from other producers, nothing more is to do.
                // The owner was already set, which is double checked by the assertions below.
                if (value is PdfNameObject || value is PdfStringObject || value is PdfBooleanObject || value is PdfIntegerObject || value is PdfNumberObject)
                {
                    Debug.Assert(value.IsIndirect);
                    Debug.Assert(value.Owner == owner);
                }
                else
                {
                    Debug.Assert(false, "Should not come here. Object is neither a dictionary nor an array.");
                }
            }
        }