Пример #1
0
        /// <summary>Scan the DOM tree for all (internal) link targets</summary>
        /// <param name="root">the DOM tree root node</param>
        /// <returns>this LinkContext</returns>
        public virtual iText.Html2pdf.Attach.Impl.LinkContext ScanForIds(INode root)
        {
            // clear previous
            linkDestinations.Clear();
            // expensive scan operation
            while (root.ParentNode() != null)
            {
                root = root.ParentNode();
            }
            Stack <INode> stk = new Stack <INode>();

            stk.Push(root);
            while (!stk.IsEmpty())
            {
                INode n = stk.Pop();
                if (n is IElementNode)
                {
                    IElementNode elem = (IElementNode)n;
                    if (TagConstants.A.Equals(elem.Name()))
                    {
                        String href = elem.GetAttribute(AttributeConstants.HREF);
                        if (href != null && href.StartsWith("#"))
                        {
                            linkDestinations.Add(href.Substring(1));
                        }
                    }
                }
                if (!n.ChildNodes().IsEmpty())
                {
                    stk.AddAll(n.ChildNodes());
                }
            }
            return(this);
        }
 private IDictionary <int, CleanUpImagesCompareTool.PageImageObjectsPaths> ExtractImagesFromPdf(String pdf,
                                                                                                String outputPath)
 {
     using (PdfReader readerPdf = new PdfReader(pdf)) {
         using (PdfDocument pdfDoc = new PdfDocument(readerPdf)) {
             IDictionary <int, CleanUpImagesCompareTool.PageImageObjectsPaths> imageObjectDatas = new Dictionary <int, CleanUpImagesCompareTool.PageImageObjectsPaths
                                                                                                                  >();
             for (int i = 1; i <= pdfDoc.GetNumberOfPages(); i++)
             {
                 PdfPage page = pdfDoc.GetPage(i);
                 CleanUpImagesCompareTool.PageImageObjectsPaths imageObjectData = new CleanUpImagesCompareTool.PageImageObjectsPaths
                                                                                      (page.GetPdfObject().GetIndirectReference());
                 Stack <LocalPathItem> baseLocalPath = new Stack <LocalPathItem>();
                 PdfResources          pdfResources  = page.GetResources();
                 if (pdfResources.GetPdfObject().IsIndirect())
                 {
                     imageObjectData.AddIndirectReference(pdfResources.GetPdfObject().GetIndirectReference());
                 }
                 else
                 {
                     baseLocalPath.Push(new DictPathItem(PdfName.Resources));
                 }
                 PdfDictionary xObjects = pdfResources.GetResource(PdfName.XObject);
                 if (xObjects == null)
                 {
                     continue;
                 }
                 if (xObjects.IsIndirect())
                 {
                     imageObjectData.AddIndirectReference(xObjects.GetIndirectReference());
                     baseLocalPath.Clear();
                 }
                 else
                 {
                     baseLocalPath.Push(new DictPathItem(PdfName.XObject));
                 }
                 bool isPageToGsExtract = false;
                 foreach (PdfName objectName in xObjects.KeySet())
                 {
                     if (!xObjects.Get(objectName).IsStream() || !PdfName.Image.Equals(xObjects.GetAsStream(objectName).GetAsName
                                                                                           (PdfName.Subtype)))
                     {
                         continue;
                     }
                     PdfImageXObject pdfObject = new PdfImageXObject(xObjects.GetAsStream(objectName));
                     baseLocalPath.Push(new DictPathItem(objectName));
                     if (!useGs)
                     {
                         String extension = pdfObject.IdentifyImageFileExtension();
                         String fileName  = outputPath + objectName + "_" + i + "." + extension;
                         CreateImageFromPdfXObject(fileName, pdfObject);
                     }
                     else
                     {
                         isPageToGsExtract = true;
                     }
                     Stack <LocalPathItem> reversedStack = new Stack <LocalPathItem>();
                     reversedStack.AddAll(baseLocalPath);
                     Stack <LocalPathItem> resultStack = new Stack <LocalPathItem>();
                     resultStack.AddAll(reversedStack);
                     imageObjectData.AddLocalPath(resultStack);
                     baseLocalPath.Pop();
                 }
                 if (useGs && isPageToGsExtract)
                 {
                     String fileName = "Page_" + i;
                     ghostscriptHelper.RunGhostScriptImageGeneration(pdf, outputPath, fileName, i.ToString());
                 }
                 CleanUpImagesCompareTool.ImageRenderListener listener = new CleanUpImagesCompareTool.ImageRenderListener();
                 PdfCanvasProcessor parser = new PdfCanvasProcessor(listener);
                 parser.ProcessPageContent(page);
                 ignoredImagesAreas.Put(i, listener.GetImageRectangles());
                 imageObjectDatas.Put(i, imageObjectData);
             }
             return(imageObjectDatas);
         }
     }
 }