Exemple #1
0
        private async Task <HtmlInfo> GetPageInfo(string url, int docNumber, bool needTakeChild)
        {
            var page = await _client.GetAsync(url, HttpCompletionOption.ResponseContentRead);

            var doc          = new HtmlDocument();
            var contentInner = await page.Content.ReadAsStringAsync();

            doc.LoadHtml(contentInner);

            doc.DocumentNode.Descendants()
            .Where(n => n.Name == "script" || n.Name == "style")
            .ToList()
            .ForEach(n => n.Remove());

            var wordsInfo = doc.DocumentNode.SelectNodes("//text()")
                            .Select(node => node.InnerText.Trim())
                            .SelectMany(word => word.Split(' '))
                            .Select(word => new string(word.Where(char.IsLetter).ToArray()))
                            .Where(x => !string.IsNullOrEmpty(x))
                            .Select(x => new WordInfo
            {
                Word      = x,
                DocNumber = docNumber
            })
                            .ToList();

            var childLinks = needTakeChild ? doc.DocumentNode.SelectNodes("//a[@href]")
                             .Where(node => node != null).Select(node => node?.Attributes["href"]?.Value)
                             .Where(x => x != null && Uri.IsWellFormedUriString(x, UriKind.Absolute) && x.StartsWith("http"))
                             .Distinct()
                             .ToList() : new List <string>();
            var result = new HtmlInfo
            {
                Content = new ContentInfo
                {
                    Content = string.Join(" ", doc.DocumentNode.SelectNodes("//text()")
                                          .Select(node => node.InnerText.Trim())),
                    WordsInfo = wordsInfo
                },
                ChildLinks = childLinks,
                IsVisited  = true,
                Link       = url,
                Level      = docNumber,
                ParentLink = null
            };

            _htmlInfos.Add(result);
            return(result);
        }
        /// <summary>
        ///  document in html representation and reorder a page
        /// </summary>
        /// <param name="DocumentName">file/document name</param>
        /// <param name="CurrentPageNumber">Page existing order number</param>
        /// <param name="NewPageNumber">Page new order number</param>
        /// <param name="DocumentPassword">Password Parameter is optional</param>
        public static List <HtmlInfo> RenderDocumentAsHtml(String DocumentName, int CurrentPageNumber, int NewPageNumber, String DocumentPassword = null)
        {
            //ExStart:RenderAsHtmlAndReorderPage
            //Get Configurations
            ViewerConfig config = Utilities.GetConfigurations();

            // Cast ViewerHtmlHandler class object to its base class(ViewerHandler).
            ViewerHandler <PageHtml> handler = new ViewerHtmlHandler(config);

            // Guid implies that unique document name
            string guid = DocumentName;

            //Instantiate the HtmlOptions object with setting of Reorder Transformation
            HtmlOptions options = new HtmlOptions {
                Transformations = Transformation.Reorder
            };

            //to get html representations of pages with embedded resources
            options.IsResourcesEmbedded = true;

            // Set password if document is password protected.
            if (!String.IsNullOrEmpty(DocumentPassword))
            {
                options.Password = DocumentPassword;
            }

            //Call ReorderPage and pass the reference of ViewerHandler's class  parameter by reference.
            Utilities.PageTransformations.ReorderPage(ref handler, guid, CurrentPageNumber, NewPageNumber);

            //down cast the handler(ViewerHandler) to viewerHtmlHandler
            ViewerHtmlHandler htmlHandler = (ViewerHtmlHandler)handler;

            //Get document pages in html form
            List <PageHtml> pages = htmlHandler.GetPages(guid, options);

            List <HtmlInfo> contents = new List <HtmlInfo>();

            foreach (PageHtml page in pages)
            {
                HtmlInfo htmlInfo = new HtmlInfo();
                htmlInfo.HtmlContent = page.HtmlContent;
                htmlInfo.PageNmber   = page.PageNumber;
                contents.Add(htmlInfo);
            }
            return(contents);
            //ExEnd:RenderAsHtmlAndReorderPage
        }
        public static List <HtmlInfo> RotateDocumentAsHtml(String DocumentName, int pageNumber, int RotationAngle, String DocumentPassword = null)
        {
            //ExStart:RenderAsImageWithRotationTransformation
            //Get Configurations
            ViewerConfig config = Utilities.GetConfigurations();

            // Create image handler
            ViewerHandler <PageHtml> handler = new ViewerHtmlHandler(config);

            // Guid implies that unique document name
            string guid = DocumentName;

            //Initialize ImageOptions Object and setting Rotate Transformation
            HtmlOptions options = new HtmlOptions {
                Transformations = Transformation.Rotate
            };

            // Set password if document is password protected.
            if (!String.IsNullOrEmpty(DocumentPassword))
            {
                options.Password = DocumentPassword;
            }

            //Call RotatePages to apply rotate transformation to a page
            Utilities.PageTransformations.RotatePages(ref handler, guid, pageNumber, RotationAngle);

            //down cast the handler(ViewerHandler) to viewerHtmlHandler
            ViewerHtmlHandler htmlHandler = (ViewerHtmlHandler)handler;

            //Get document pages in image form
            List <PageHtml> pages = htmlHandler.GetPages(guid, options);

            List <HtmlInfo> contents = new List <HtmlInfo>();

            foreach (PageHtml page in pages)
            {
                HtmlInfo htmlInfo = new HtmlInfo();
                htmlInfo.HtmlContent = page.HtmlContent;
                htmlInfo.PageNmber   = page.PageNumber;
                contents.Add(htmlInfo);
            }

            return(contents);
            //ExEnd:RenderAsImageWithRotationTransformation
        }
        /// <summary>
        /// Render document in html representation with watermark
        /// </summary>
        /// <param name="DocumentName">file/document name</param>
        /// <param name="WatermarkText">watermark text</param>
        /// <param name="WatermarkColor"> System.Drawing.Color</param>
        /// <param name="position">Watermark Position is optional parameter. Default value is WatermarkPosition.Diagonal</param>
        /// <param name="WatermarkWidth"> width of watermark as integer. it is optional Parameter default value is 100</param>
        /// <param name="DocumentPassword">Password Parameter is optional</param>
        public static List <HtmlInfo> RenderDocumentAsHtml(String DocumentName, String WatermarkText, Color WatermarkColor, int WatermarkWidth = 100, String DocumentPassword = null)
        {
            //ExStart:RenderAsHtmlWithWaterMark
            //Get Configurations
            ViewerConfig config = Utilities.GetConfigurations();

            // Create html handler
            ViewerHtmlHandler htmlHandler = new ViewerHtmlHandler(config);

            // Guid implies that unique document name
            string guid = DocumentName;

            //Instantiate the HtmlOptions object
            HtmlOptions options = new HtmlOptions();

            options.IsResourcesEmbedded = false;
            // Set password if document is password protected.
            if (!String.IsNullOrEmpty(DocumentPassword))
            {
                options.Password = DocumentPassword;
            }

            // Call AddWatermark and pass the reference of HtmlOptions object as 1st parameter
            Utilities.PageTransformations.AddWatermark(ref options, WatermarkText, WatermarkColor, WatermarkPosition.Diagonal, WatermarkWidth);

            //Get document pages in html form
            List <PageHtml> pages = htmlHandler.GetPages(guid, options);

            List <HtmlInfo> contents = new List <HtmlInfo>();

            foreach (PageHtml page in pages)
            {
                HtmlInfo htmlInfo = new HtmlInfo();
                htmlInfo.HtmlContent = page.HtmlContent;
                htmlInfo.PageNmber   = page.PageNumber;
                contents.Add(htmlInfo);
            }

            return(contents);
            //ExEnd:RenderAsHtmlWithWaterMark
        }
        /// <summary>
        /// Render simple document in html representation
        /// </summary>
        /// <param name="DocumentName">File name</param>
        /// <param name="DocumentPassword">Optional</param>
        public static List <HtmlInfo> RenderDocumentAsHtml(String DocumentName, String DocumentPassword = null)
        {
            //ExStart:RenderAsHtml
            //Get Configurations
            ViewerConfig config = Utilities.GetConfigurations();

            // Create html handler
            ViewerHtmlHandler htmlHandler = new ViewerHtmlHandler(config);


            // Guid implies that unique document name
            string guid = DocumentName;

            //Instantiate the HtmlOptions object
            HtmlOptions options = new HtmlOptions();

            //to get html representations of pages with embedded resources
            options.IsResourcesEmbedded = true;

            // Set password if document is password protected.
            if (!String.IsNullOrEmpty(DocumentPassword))
            {
                options.Password = DocumentPassword;
            }

            //Get document pages in html form
            List <PageHtml> pages    = htmlHandler.GetPages(guid, options);
            List <HtmlInfo> contents = new List <HtmlInfo>();

            foreach (PageHtml page in pages)
            {
                HtmlInfo htmlInfo = new HtmlInfo();
                htmlInfo.HtmlContent = page.HtmlContent;
                htmlInfo.PageNmber   = page.PageNumber;
                contents.Add(htmlInfo);
            }

            return(contents);
            //ExEnd:RenderAsHtml
        }
        private static HtmlInfo SeparateHtml(string contentHtml)
        {
            var content = new HtmlInfo();

            var document = new HtmlDocument();
            document.LoadHtml(contentHtml);

            // TODO: how to get TITLE
            // InnerText in HtmlAgilityPack is not decoded, should be a bug
            var headerNode = document.DocumentNode.SelectSingleNode("//h1|//h2|//h3");
            content.Title = StringHelper.HtmlDecode(headerNode?.InnerText);

            if (headerNode != null && document.DocumentNode.FirstChild == headerNode)
            {
                content.RawTitle = headerNode.OuterHtml;
                headerNode.Remove();
            }
            else
            {
                content.RawTitle = string.Empty;
            }

            content.Content = document.DocumentNode.OuterHtml;

            return content;
        }