コード例 #1
0
        public IEnumerable <StockAdvice> ReadStockAdvice(FileInfo file)
        {
            var stockAdvices = new List <StockAdvice>();
            var reader       = new iText.Kernel.Pdf.PdfReader(file);
            var doc          = new iText.Kernel.Pdf.PdfDocument(reader);
            var pageCount    = doc.GetNumberOfPages();

            if (pageCount > 0)
            {
                for (int pageNum = 1; pageNum < pageCount; pageNum++)
                {
                    var    page        = doc.GetPage(pageNum);
                    var    pageData    = page.GetContentBytes();
                    string pageContent = Encoding.UTF8.GetString(pageData);
                    var    folder      = file.Directory;
                    var    fileName    = Path.Combine(folder.FullName, file.Name + "_Page_" + pageNum + ".txt");
                    WritePageContentToFile(fileName, pageContent);
                }
            }

            doc.Close();
            reader.Close();

            return(stockAdvices);
        }
コード例 #2
0
        /// <summary>
        /// Reads PDF file by a given path.
        /// </summary>
        /// <param name="path">The path to the file</param>
        /// <param name="pageCount">The number of pages to read (0=all, 1 by default) </param>
        /// <returns></returns>
        public static DocumentTree PdfToText(string path, int pageCount = 1)
        {
            var pages = new DocumentTree();

            using (iText.Kernel.Pdf.PdfReader reader = new iText.Kernel.Pdf.PdfReader(path))
            {
                using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(reader))
                {
                    var strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy();

                    // set up pages to read
                    int pagesToRead = 1;
                    if (pageCount > 0)
                    {
                        pagesToRead = pageCount;
                    }
                    if (pagesToRead > pdfDocument.GetNumberOfPages() || pageCount == 0)
                    {
                        pagesToRead = pdfDocument.GetNumberOfPages();
                    }

                    // for each page to read...
                    for (int i = 1; i <= pagesToRead; ++i)
                    {
                        // get the page and save it
                        var page = pdfDocument.GetPage(i);
                        var txt  = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page, strategy);
                        pages.Add(txt);
                    }
                    pdfDocument.Close();
                    reader.Close();
                }
            }
            return(pages);
        }
コード例 #3
0
        public override BookatHome GetPocoBook(string filepath)
        {
            iText.Kernel.Pdf.PdfReader   reader = null;
            iText.Kernel.Pdf.PdfDocument pDoc   = null;
            try
            {
                if (File.Exists(filepath))
                {
                    reader = new iText.Kernel.Pdf.PdfReader(filepath);
                    pDoc   = new iText.Kernel.Pdf.PdfDocument(reader);
                    int nPages    = pDoc.GetNumberOfPages();
                    int maxsearch = nPages < 15 ? nPages : 10;
                    for (int i = 1; i <= maxsearch; i++)
                    {
                        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                        string currentText = PdfTextExtractor.GetTextFromPage(pDoc.GetPage(i), strategy);

                        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                        if (currentText.ToLower().Contains("isbn", System.StringComparison.InvariantCulture))
                        {
                            string pattern = @"ISBN(-1(?:(0)|3))?:?\x20(\s)*[0-9]+[- ][0-9]+[- ][0-9]+[- ][0-9]*[- ]*[xX0-9]";

                            Match m = Regex.Match(currentText.Replace(":", "").Replace("-", " "), pattern);

                            if (m.Success)
                            {
                                PocoBook retBook = base.GetPocoBook(filepath) as PocoBook;
                                retBook.Isbnsearch = m.Value;
                                return(retBook);
                            }
                        }
                    }
                    reader.Close();
                }
            }
            catch (IOException)
            {
                return(new PocoBook(filepath));
            }
            finally
            {
                ((IDisposable)reader)?.Dispose();
                ((IDisposable)pDoc)?.Dispose();
            }

            return(base.GetPocoBook(filepath));
        }