Ejemplo n.º 1
0
        internal string[] GetParagraphs()
        {
            List<string> paragraphs = new List<string>();

            foreach (AsposePdf.Page page in _document.Pages)
            {
                var textVisitor = new AsposePdf.Text.TextAbsorber();
                try
                {
                    page.Accept(textVisitor);
                }
                catch (Exception e)
                {
                    Logger.LogError(e);
                }
                
                if (String.IsNullOrEmpty(textVisitor.Text))
                {
                    continue;
                }

                string[] lines = Regex.Split(textVisitor.Text, "\r\n"); // Todo: There should be a better way to get the text per paragraph!
                paragraphs.AddRange(lines); 
            }

            paragraphs.RemoveAll(String.IsNullOrEmpty);

            return paragraphs.ToArray();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 读取 pdf 文件中的文本
        /// </summary>
        /// <param name="filename"></param>
        /// <returns></returns>
        public static string GetTextFromPdf(string filename)
        {
            var textAbsorber = new Aspose.Pdf.Text.TextAbsorber();
            var pdf          = new Aspose.Pdf.Document(filename);

            pdf.Pages.Accept(textAbsorber);

            return(textAbsorber.Text);
        }
Ejemplo n.º 3
0
        public static string GetPdfContent(string filepath)
        {
            int numPDFMaxPage = Util.GetAppSetting("PDFMaxPage", PDFMaxPage.ToString()).ToInt();

            Aspose.Pdf.Document          doc = new Aspose.Pdf.Document(filepath);
            Aspose.Pdf.Text.TextAbsorber txt = new Aspose.Pdf.Text.TextAbsorber();
            StringBuilder sb = new StringBuilder();

            for (int i = 1; i <= doc.Pages.Count; i++)
            {
                if (i > numPDFMaxPage)
                {
                    break;
                }

                doc.Pages[i].Accept(txt);
                sb.Append(txt.Text);
            }
            return(sb.ToString());
        }