Exemple #1
0
        public async Task <List <string> > GetPDFasHtml(Uri uri)
        {
            PdfDocument document = PdfReader.Open(await _azureService.DownloadToStream(uri));
            var         pages    = new List <string>();

            try
            {
                for (int i = 0; i < document.PageCount; i++)
                {
                    StringWriter stringWriter = new StringWriter();
                    var          pageText     = PdfTextExtractor.GetText(document.Pages[i]);
                    pageText = pageText.Replace("\r\n", "<br/>");
                    pageText = pageText.Replace("  ", "<p></p>");

                    using (HtmlTextWriter writer = new HtmlTextWriter(stringWriter))
                    {
                        string classValue = "pdfHtml";
                        writer.AddAttribute(HtmlTextWriterAttribute.Class, classValue);
                        writer.RenderBeginTag(HtmlTextWriterTag.Div); // Begin #1
                        writer.Write(pageText);
                        writer.RenderEndTag();                        // End #1
                    }
                    pages.Add(stringWriter.ToString());
                }

                // Return the result.
                return(pages);
            }
            catch (Exception e)
            {
                _logger.Error(e.Message);
                return(pages);
            }
        }