コード例 #1
0
ファイル: PDFParser.cs プロジェクト: mlnethub/thrinax
        /// <summary>
        /// 通过文件流方式解析PDF
        /// </summary>
        /// <param name="pdfStream">PDF流</param>
        /// <param name="tableContainType">表格包含样式</param>
        /// <returns></returns>
        public static PDFModel Parser(byte[] pdfStream, TableContainType tableContainType)
        {
            PDFModel fileContent = null;
            //打开文件
            PDDocument reader = null;

            try
            {
                InputStream sbs = new ByteArrayInputStream(pdfStream);
                reader      = PDDocument.load(sbs);
                fileContent = Parser(reader, tableContainType);
            }
            catch (Exception ex)
            {
                if (reader != null)
                {
                    reader.close();
                    reader = null;
                }

                return(null);
            }
            finally
            {
                reader.close();
                reader = null;
            }

            return(fileContent);
        }
コード例 #2
0
        public static Dictionary <int, string> Extract(string pdfFileName)
        {
            if (!File.Exists(pdfFileName))
            {
                throw new FileNotFoundException("pdfFileName");
            }

            var        result      = new Dictionary <int, string>();
            PDDocument pdfDocument = PDDocument.load(pdfFileName);

            var pdfStripper = new PDFTextStripper();

            pdfStripper.setPageSeparator(Environment.NewLine + Environment.NewLine);

            for (int i = 1; i <= pdfDocument.getNumberOfPages(); i++)
            {
                pdfStripper.setStartPage(i);
                pdfStripper.setEndPage(i);

                //ExtractText(pdfStripper, pdfDocument,
                //  string.Format(@"c:\Users\tri.hoang\Desktop\temp\epub-belastingblad\2014-08\pdf\page_{0}.txt", i.ToString().PadLeft(5, '0')));

                result.Add(i, GetText(pdfStripper, pdfDocument));
            }

            pdfDocument.close();
            return(result);
        }
コード例 #3
0
ファイル: PDFValidator.cs プロジェクト: jehan2898/root
 public PDFValidator(string fileName, string password)
 {
     try
     {
         this.IsValid = true;
         if (!System.IO.File.Exists(fileName))
         {
             Console.WriteLine("The PDF file does not Exist.");
         }
         else
         {
             PDDocument pDDocument = null;
             pDDocument = (!string.IsNullOrEmpty(password) ? PDDocument.load(new java.io.File(fileName), password) : PDDocument.load(new java.io.File(fileName)));
             if (pDDocument.isEncrypted())
             {
                 this.IsPasswordProtected = true;
             }
             this.CheckAllPages(pDDocument);
             if (pDDocument != null)
             {
                 pDDocument.close();
             }
         }
     }
     catch (InvalidPasswordException invalidPasswordException)
     {
         this.IsPasswordProtected = true;
         this.IsValid             = false;
     }
     catch (Exception exception)
     {
         this.ErrorMessage = string.Format("PDF analysis failed With exception {0}", exception.Message);
         this.IsValid      = false;
     }
 }
コード例 #4
0
        static void Main(string[] args)
        {
            if (args == null || args.Length < 2)
            {
                System.Console.WriteLine("Usage: " + AppDomain.CurrentDomain.FriendlyName + " <original PDF filename> <watermark text> [new PDF filename]" + Environment.NewLine +
                                         "  For example: " + AppDomain.CurrentDomain.FriendlyName + " myDoc.pdf \"This is a Draft\"");
            }
            else
            {
                string origName     = args[0];
                string watermarkTxt = args[1];
                if (!System.IO.File.Exists(origName))
                {
                    System.Console.WriteLine("Error: cannot find the original PDF file(" + origName + "). Please correct the filename or the path and try again.");
                }
                else
                {
                    PDDocument origDoc  = PDDocument.load(new java.io.File(origName)); // NOTE: PDDocument.load() only takes java.io.File, not System.IO.File from C#.Net
                    PDPageTree allPages = origDoc.getPages();
                    PDFont     font     = PDType1Font.HELVETICA_BOLD;
                    for (int i = 0, len = allPages.getCount(); i < len; ++i)
                    {
                        PDPage pg = (PDPage)allPages.get(i);
                        addWatermarkText(origDoc, pg, font, "This is a draft!!!");
                    }

                    origDoc.save("watermarked_" + origName);
                    origDoc.close();
                }
            }
        }
        protected internal String ConvertPDFToDoc(string PDFpath)
        {
            try
            {
                PDDocument      PDFdoc     = null;
                PDFTextStripper textstrip  = new PDFTextStripper();
                String          StringDocx = String.Empty;
                String          DocxPath   = String.Empty;

                PDFdoc     = PDDocument.load(PDFpath);
                StringDocx = textstrip.getText(PDFdoc);
                PDFdoc.close(); //cierra el pdf

                ///DocxPath = fn.CreateFolderToSaveDocs(fn.GenerateName()); ///genera la ruta para guardar el archivo.
                DocxPath = fn.CreateFolderToSaveDocs(fileName); ///genera la ruta para guardar el archivo.
                var wordDoc = DocX.Create(DocxPath);
                wordDoc.InsertParagraph(StringDocx);
                wordDoc.Save();
                ////Process.Start("winword.exe", DocxPath);
                return(DocxPath);
            }
            catch (Exception)
            {
                return("");
            }
        }
コード例 #6
0
        /// <summary>
        /// Method for extracting PDF data
        /// </summary>
        /// <param name="filename"></param>
        /// <returns></returns>
        public string ExtractTextFromPdf(string filename)
        {
            String text = "";

            try{
                if (checkFileExists(filename))
                {
                    _log.Info(filename + "exists in the download folder");
                    PDDocument doc = null;
                    try{
                        doc = PDDocument.load(getPDFFilePath(filename));
                        PDFTextStripper stripper = new PDFTextStripper();
                        text = stripper.getText(doc);
                    }
                    catch (Exception e) {
                        _log.Info("Exception in Extracting data from file " + filename + ".pdf" + e.StackTrace);
                        _log.Info("Exception in Extracting data from file " + filename + ".pdf" + e.StackTrace);
                    }
                    finally{
                        if (doc != null)
                        {
                            doc.close();
                        }
                    }
                }
                else
                {
                    Assert.Fail("PDF file not found in 'Downloads' folder.");
                }
            }
            catch (Exception e) {
                _log.Info("Exception in extracting text from PDF: " + e.Message);
            }
            return(text);
        }
コード例 #7
0
ファイル: PDFMerger.cs プロジェクト: jehan2898/root
 public void MergePDFs(List <string> sourcePDFs, string outputFile)
 {
     PDFHelper.DisplayTrialPopupIfNecessary();
     foreach (string sourcePDF in sourcePDFs)
     {
         this.mergeUtility.addSource(sourcePDF);
     }
     if (!PDFHelper.AddStamp)
     {
         this.mergeUtility.setDestinationFileName(outputFile);
         this.mergeUtility.mergeDocuments();
     }
     else
     {
         string str = Path.Combine(Path.GetTempPath(), string.Concat(Path.GetRandomFileName(), ".pdf"));
         this.mergeUtility.setDestinationFileName(str);
         this.mergeUtility.mergeDocuments();
         PDDocument pDDocument = PDDocument.load(new java.io.File(str));
         pDDocument = PDFHelper.AddTrialStampIfNecessary(pDDocument);
         pDDocument.save(outputFile);
         if (pDDocument != null)
         {
             pDDocument.close();
         }
         if (System.IO.File.Exists(str))
         {
             System.IO.File.Delete(str);
         }
     }
 }
コード例 #8
0
ファイル: LuceneClasses.cs プロジェクト: EugCrow/PdfSearch
    public static String PDFText(String PDFFilePath)
    {
        PDDocument      doc      = PDDocument.load(PDFFilePath);
        PDFTextStripper stripper = new PDFTextStripper();
        var             text     = " ";

        try
        {
            text = stripper.getText(doc);
            return(text);
        }
        catch (UnauthorizedAccessException e)
        {
            MessageBox.Show("Невозможно скопировать текст из Пдф " + PDFFilePath + ". " + e.Message, "Сообщение об ошибке");
            return("");
        }
        catch (FileLoadException FLe)
        {
            MessageBox.Show("Невозможно загрузить Пдф " + PDFFilePath + ". " + FLe.Message, "Сообщение об ошибке");
            return("");
        }
        catch when(text == "")
        {
            MessageBox.Show("Невозможно загрузить Пдф " + PDFFilePath + ". ", "Сообщение об ошибке");
            return("");
        }
        finally
        {
            doc.close();
        }
    }
コード例 #9
0
        public PdfOcrResult Execute(byte[] bytes)
        {
            PDDocument document = null;

            try
            {
                LoadPdf(bytes, ref document);

                List allPages = document.getDocumentCatalog().getAllPages();
                if (allPages.size() == 0)
                {
                    throw new PdfNotReadableException("Pdf contains no readable content");
                }

                //only first page
                PDPage page = (PDPage)allPages.get(0);

                PDStream contents = page.getContents();
                if (contents == null)
                {
                    throw new PdfNotReadableException("Pdf contains no readable content");
                }

                var items = new PdfToCharacters().GetItems(page, page.findResources(), page.getContents().getStream());
                if (items.Count == 0)
                {
                    throw new PdfNotReadableException("Pdf contains no readable content");
                }

                var mediaBox = page.findMediaBox();

                var height     = mediaBox?.getHeight() ?? 0;
                var width      = mediaBox?.getWidth() ?? 0;
                var itemsArray = items.ToArray();

                var keywords = "";
                try
                {
                    keywords = document.getDocumentInformation()?.getKeywords();
                }
                catch (Exception) { } // we do not know if PDF box can fail on this, if there is no keywords or something else. We dont really care we just want the keywords if possible.

                return(new PdfOcrResult()
                {
                    Items = itemsArray, Height = height, Width = width, Keywords = keywords
                });
            }
            catch (PdfReadException)
            {
                throw;
            }
            catch (Exception e)
            {
                throw new PdfReadException("Pdf could not be loaded. It is not a redable pdf.", e);
            }
            finally
            {
                document?.close();
            }
        }
コード例 #10
0
        /// <summary>
        /// Validates the PDF File Downloaded and the text present in the file
        /// </summary>
        /// <param name="pdfFileName">The folder or filepath.</param>
        /// <param name="textToCheck">The text to be validated.</param>
        /// <returns>if the text is present or not</returns>
        public static bool ExtractAndValidateTextFromPDF(string pdfFileName, string textToCheck)
        {
            try
            {
                string          result       = string.Empty;
                bool            validateText = false;
                PDDocument      doc          = PDDocument.load(pdfFileName);
                PDFTextStripper stripper     = new PDFTextStripper();
                result = stripper.getText(doc);
                doc.close();

                if (result.Length != 0)
                {
                    byte[] bytes            = Encoding.Default.GetBytes(result.ToString());
                    string decodedresult    = Encoding.UTF8.GetString(bytes);
                    string outputPDF        = decodedresult.Replace(" ", null).Replace("\r", null).Replace("\n", null);
                    string validationString = textToCheck.Replace(" ", null);
                    validateText = outputPDF.Contains(validationString);
                }

                return(validateText);
            }
            catch (Exception)
            {
                throw;
            }
        }
コード例 #11
0
        private static void KamilPdfTest(string input)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(input);
                PDFTextStripper stripper = new PDFTextStripper();
                // stripper.getText(doc);


                Matrix line = stripper.getTextLineMatrix();
                // int page_nr = stripper.getCurrentPageNo();
                PDPage page     = stripper.getCurrentPage();
                Matrix line2    = stripper.getTextMatrix();
                int    char_cnt = stripper.getTotalCharCnt();

                string article_start = stripper.getArticleStart();
                string article_end   = stripper.getArticleEnd();



                string pdf = stripper.getText(doc);                                     // wrzuca caly tekst do sringa - dziala
                char_cnt = pdf.Length;
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
コード例 #12
0
ファイル: PDFParser.cs プロジェクト: radtek/funfuncode
        public void ExtractText(string inpufFileName, string outputFileName)
        {
#if false
            IFilterTextReader.FilterReader reader = new FilterReader(inpufFileName);
            var data = reader.ReadToEnd();
            using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8))
            {
                writer.Write(data);
            }
#else
            PDDocument doc = null;
            try
            {
                doc = PDDocument.load(inpufFileName);
                PDFTextStripper stripper = new PDFTextStripper();
                using (var writer = new StreamWriter(outputFileName, false, System.Text.Encoding.UTF8))
                {
                    writer.Write(stripper.getText(doc));
                }
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
#endif
        }
コード例 #13
0
        public static Dictionary <int, string> Extract(string pdfFileName)
        {
            if (!File.Exists(pdfFileName))
            {
                throw new FileNotFoundException("pdfFileName");
            }

            var        result      = new Dictionary <int, string>();
            PDDocument pdfDocument = PDDocument.load(pdfFileName);

            var pdfStripper = new PDFTextStripper();

            pdfStripper.setPageSeparator(Environment.NewLine + Environment.NewLine);

            for (int i = 1; i <= pdfDocument.getNumberOfPages(); i++)
            {
                pdfStripper.setStartPage(i);
                pdfStripper.setEndPage(i);

                result.Add(i, GetText(pdfStripper, pdfDocument));
            }

            pdfDocument.close();
            return(result);
        }
コード例 #14
0
        /**
         * 提取部分页面文本
         * @param file pdf文档路径
         * @param startPage 开始页数
         * @param endPage 结束页数
         */
        public static string ExtractTXT(String file, int startPage, int endPage)
        {
            String content = string.Empty;

            try
            {
                PDDocument document = PDDocument.load(file);
                //获取一个PDFTextStripper文本剥离对象
                PDFTextStripper stripper = new PDFTextStripper();

                // 设置按顺序输出
                stripper.setSortByPosition(true);

                // 设置起始页
                stripper.setStartPage(startPage);
                // 设置结束页
                stripper.setEndPage(endPage);
                //获取文本内容
                content = stripper.getText(document);
                document.close();
            }
            catch (java.io.FileNotFoundException ex)
            {
            }
            catch (java.io.IOException ex)
            {
            }
            return(content);
        }
コード例 #15
0
ファイル: Default.aspx.cs プロジェクト: curkovical/plfind
/**
 *      string getFilePath(string path)
 *      {
 *          // Specify the path to save the uploaded file to.
 *          string savePath = "C:\\Users\\DR.AKUL\\Documents\\Visual Studio 2010\\Projects\\PlagijatorFinder\\PlagijatorFinder\\uploadFiles\\";
 *
 *          // Get the name of the file to upload.
 *          string fileName = FileUpload1.FileName;
 *
 *          // Create the path and file name to check for duplicates.
 *          path = savePath + fileName + ".txt";
 *          return path;
 *      }
 **/
        private static string parseUsingPDFBox(string filename)
        {
            PDDocument      doc      = PDDocument.load(filename);
            PDFTextStripper stripper = new PDFTextStripper();
            string          text     = stripper.getText(doc);

            doc.close();
            return(text);
        }
コード例 #16
0
        public string ParseFile(string path)
        {
            PDDocument      doc      = PDDocument.load(path);
            PDFTextStripper stripper = new PDFTextStripper();
            string          text     = stripper.getText(doc);

            doc.close();
            return(text);
        }
コード例 #17
0
ファイル: MainWindow.xaml.cs プロジェクト: kel072/xmlEraser
        private void button_Click(object sender, RoutedEventArgs e)
        {
            PDDocument doc = null;

            Mouse.OverrideCursor = Cursors.Wait;
            try
            {
                doc = PDDocument.load(Properties.Settings.Default.PdfPath);
                PDFTextStripper stripper = new PDFTextStripper();
                string          data     = stripper.getText(doc);

                MatchCollection match = Regex.Matches(data, "FA\\d{8}", RegexOptions.IgnoreCase);
                if (match.Count > 0)
                {
                    try
                    {
                        XmlDocument xml = new XmlDocument();
                        xml.Load(this.filePath.Text);
                        xml.Save(this.filePath.Text + ".backup");
                        var manager = new XmlNamespaceManager(xml.NameTable);
                        manager.AddNamespace("dat", "http://www.stormware.cz/schema/version_2/data.xsd");

                        int count = 0;
                        foreach (var item in match)
                        {
                            string request = "/dat:dataPack/dat:dataPackItem[@id=\"" + item + "\"]";
                            try
                            {
                                foreach (XmlNode node in xml.SelectNodes(request, manager))
                                {
                                    node.ParentNode.RemoveChild(node);
                                    count++;
                                }
                            }
                            catch (Exception ex)
                            {
                            }
                        }
                        xml.Save(this.filePath.Text);
                        MessageBox.Show("Erased " + count + " items.");
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show(ex.Message + "\nPlease check request XPath syntax.", "Error", MessageBoxButton.OK);
                    }
                }
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
            Mouse.OverrideCursor = null;
        }
コード例 #18
0
        string get_text_from_pdf_by_pdfbox(string path)
        {
            PDDocument      pdffile     = PDDocument.load(new java.io.File(path));
            PDFTextStripper pdfStripper = new PDFTextStripper();
            string          text        = pdfStripper.getText(pdffile);

            pdffile.close();
            //File.WriteAllText(dst, text, Encoding.GetEncoding("GBK"));
            return(text);
        }
コード例 #19
0
ファイル: Program.cs プロジェクト: mpkg/Trading
        static private string[] PDFToTextPDFBox(string file, string[] split, StringSplitOptions option)
        {
            string          pdftext  = string.Empty;
            PDDocument      doc      = PDDocument.load(file);
            PDFTextStripper stripper = new PDFTextStripper();

            pdftext = stripper.getText(doc);
            doc.close();
            return(pdftext.Split(split, option));
        }
コード例 #20
0
        public void pdfExtract(string keyWord, string path)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(path);
                if (doc.isEncrypted())
                {
                    doc.close();
                    return;
                }
                PDFTextStripper stripper = new PDFTextStripper();
                bool            isExist  = stripper.getText(doc).ToLower().Contains(keyword);
                if (isExist)
                {
                    pdfCount++;

                    Task.Run(() =>
                    {
                        Add_Item(Path.GetFileNameWithoutExtension(path), path, Path.GetExtension(path));
                    });
                    Thread.Sleep(200);
                }
                doc.close();
            }
            catch
            {
                try
                {
                    if (doc != null)
                    {
                        doc.close();
                    }
                }
                catch
                {
                    return;
                }
                return;
            }
        }
コード例 #21
0
        public static void pdf2txt(string pdfName, string txtfileName)
        {
            PDDocument doc = PDDocument.load(pdfName);

            PDFTextStripper pdfStripper = new PDFTextStripper();

            string text = pdfStripper.getText(doc);

            StreamWriter writer = new StreamWriter(txtfileName, false, Encoding.GetEncoding("gb2312"));

            writer.Write(text);
            writer.Close();
            doc.close();
        }
コード例 #22
0
ファイル: Pdf.cs プロジェクト: dominicshaw/ToText
        private string GetTextFromSimplePdf()
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(_location.FullName);
                var stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                doc?.close();
            }
        }
コード例 #23
0
ファイル: PDFParser.cs プロジェクト: mlnethub/thrinax
        /// <summary>
        /// 通过文件名解析PDF
        /// </summary>
        /// <param name="pdfFileName">PDF文件路径</param>
        /// <param name="tableContainType">表格包含样式</param>
        /// <returns></returns>
        public static PDFModel Parser(string pdfFileName, TableContainType tableContainType)
        {
            if (!System.IO.File.Exists(pdfFileName))
            {
                return(null);
            }
            //打开文件
            PDFModel   fileContent = null;
            PDDocument reader      = null;

            try
            {
                reader      = PDDocument.load(new java.io.File(pdfFileName));
                fileContent = Parser(reader, tableContainType);
            }
            catch (Exception ex)
            {
                if (reader != null)
                {
                    reader.close();
                    reader = null;
                }

                return(null);
            }
            finally
            {
                if (reader != null)
                {
                    reader.close();
                    reader = null;
                }
            }

            return(fileContent);
        }
コード例 #24
0
        public static string ReadPdfAsText(string filename)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(filename);
                var stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                doc?.close();
            }
        }
コード例 #25
0
ファイル: PDFSplitter.cs プロジェクト: jehan2898/root
 public void SplitByTopLevelBookmarks()
 {
     if (this.CheckOutput())
     {
         PDFHelper.DisplayTrialPopupIfNecessary();
         try
         {
             PDDocumentCatalog documentCatalog = this.pdfDocument.PDFBoxDocument.getDocumentCatalog();
             PDDocumentOutline documentOutline = documentCatalog.getDocumentOutline();
             if (documentOutline != null)
             {
                 PDOutlineItem firstChild = documentOutline.getFirstChild();
                 PDPageTree    pages      = documentCatalog.getPages();
                 List <int>    nums       = new List <int>();
                 while (firstChild != null)
                 {
                     PDPage pDPage = firstChild.findDestinationPage(this.pdfDocument.PDFBoxDocument);
                     nums.Add(pages.indexOf(pDPage));
                     firstChild = firstChild.getNextSibling();
                 }
                 nums.Add(pages.getCount());
                 for (int i = 0; i < nums.Count - 1; i++)
                 {
                     int        item       = nums[i];
                     int        num        = nums[i + 1];
                     PDDocument pDDocument = new PDDocument();
                     for (int j = item; j < num; j++)
                     {
                         pDDocument.addPage(this.pdfDocument.PDFBoxDocument.getPage(j));
                     }
                     pDDocument = PDFHelper.AddTrialStampIfNecessary(pDDocument);
                     string str = string.Format("{0} [{1}].pdf", this.OutputFileName, i);
                     pDDocument.save(Path.Combine(this.OutputFilePath, str));
                     pDDocument.close();
                 }
             }
             else
             {
                 Console.WriteLine("This document does not contain any bookmarks.");
             }
         }
         catch (Exception exception1)
         {
             Exception exception = exception1;
             throw new PDFToolkitException(exception.Message, exception.InnerException);
         }
     }
 }
コード例 #26
0
        /// <summary>
        /// Reads the contents from PDF file specified in the file path
        /// </summary>
        /// <param name="filePath">PDF file path to read the contents</param>
        /// <returns>String text</returns>
        private string ReadPDF(string filePath)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(filePath);
                PDFTextStripper stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
コード例 #27
0
        private static string parseUsingPDFBox(string input)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(input);
                PDFTextStripper stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
コード例 #28
0
ファイル: Form1.cs プロジェクト: kingnsk/PDF_2_EGRUL
        private static string ExtractTextFromPdf(string path)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(path);
                PDFTextStripper stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
コード例 #29
0
ファイル: DataInput.cs プロジェクト: btastic/StratSim
        /// <summary>
        /// Returns the text content of the specified PDF
        /// </summary>
        /// <param name="pdfFilePath">The file path of the pdf</param>
        public static string GetPDFText(string pdfFilePath)
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(pdfFilePath);
                PDFTextStripper stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
コード例 #30
0
        private static string ParseUsingPDFBox(string input)//Strips the text from a pdf, given a filepath.
        {
            PDDocument doc = null;

            try
            {
                doc = PDDocument.load(input);
                PDFTextStripper stripper = new PDFTextStripper();
                return(stripper.getText(doc));
            }
            finally
            {
                if (doc != null)
                {
                    doc.close();
                }
            }
        }
        public string Parse(string fileName)
        {
            //Load in file. Using java.io because pdfbox is ported from java.
            var pdfFile = new FileInputStream(fileName);

            //Load file into the pdf parser
            var pdfParser = new PDFParser(pdfFile);

            //Parse the document, so that we can get it for the COSDocument
            pdfParser.parse();

            /*
            COSDocument is the in-memory representation of the PDF.
            see https://pdfbox.apache.org/docs/1.8.4/javadocs/org/apache/pdfbox/cos/COSDocument.html
            */
            var cosDocument = pdfParser.getDocument();

            var pdDocument = new PDDocument(cosDocument);

            //Instantiate text stripper.
            var pdfTextStripper = new PDFTextStripper();

            /* Needed for only stripping specific pages

            pdfTextStripper.setStartPage(0);
            pdfTextStripper.setEndPage(pdDocument.getNumberOfPages());

            */

            //Needed so that we can close the pdDocument before returning from this method
            var strippedText = pdfTextStripper.getText(pdDocument);

            //This closes all storage and delete the tmp files.
            pdDocument.close();
            cosDocument.close();

            return strippedText;
        }