public void Pdf2Text(string sourceFile, string destFile) { PDFExtractor extractor = new PDFExtractor(); // Load a PDF file. extractor.LoadPDF(sourceFile); // Convert whole PDF text to txt file. extractor.ToTextFile(destFile); extractor.Dispose(); }
static async Task Extract() { //PDFExtractor extractor = new PDFExtractor(@"D:\Desktop\PDFtk\pdftk.exe"); PDFExtractor extractor = new PDFExtractor(); extractor.InputFile = new System.IO.FileInfo(@"D:\Desktop\Debug\Test.pdf"); extractor.OutputPath = new System.IO.DirectoryInfo(@"D:\Desktop\Debug\Output"); extractor.OutputName = "extracted.pdf"; //extractor.ExtractRange(new int[] { 1, 3, 5, 7, 9 }); Console.WriteLine("Start Extracting"); await extractor.ExtractAsync(new int[] { 99 }); //extractor.ExtractRange("1-3 7-end"); Console.WriteLine("Extracting Finished"); }
public void CheckBOLDocumentFTL(string confirmNumber, string downloadPath) { Driver.ScrollDown(); Driver.Click(lnkBOLDownloadFTL, "Click to download 'Bill of Lading'"); string filePath = downloadPath + "BOL_" + confirmNumber + ".pdf"; if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) { Assert.Fail("BOL could not be downloaded or found at: " + filePath); } else { string BOL = PDFExtractor.ExtractTextFromPDF(filePath); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, confirmNumber.ToString()), "Check if BOL has confirmation number - Expecting: " + confirmNumber.ToString()); foreach (var docField in InputData.Data.bolFTL.GetType().GetProperties()) { string fieldVal = docField.GetValue(InputData.Data.bolFTL).ToString(); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, fieldVal), "Check if BOL has " + docField.Name + " - Expecting: " + fieldVal); } // Add PNET, PNLT, DNET, DNLT, weight, dims } }
public void CheckCIDocument(long confirmNumber, string downloadPath) { Driver.Click(lnkCIDownload, "Click to download 'Commercial Invoice'"); string filePath = downloadPath + confirmNumber + "_CommercialInvoice.pdf"; if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) { Assert.Fail("CI could not be downloaded or found at: " + filePath); } else { string CI = PDFExtractor.ExtractTextFromPDF(filePath).Replace(",", ""); Assert.IsTrue(Util.ContainsStrCaseInsensitive(CI, confirmNumber.ToString()), "Check if CI has confirmation number - Expecting: " + confirmNumber.ToString()); foreach (var docField in InputData.Data.ciLTL.GetType().GetProperties()) { string fieldVal = docField.GetValue(InputData.Data.ciLTL).ToString(); Assert.IsTrue(Util.ContainsStrCaseInsensitive(CI, fieldVal), "Check if CI has " + docField.Name + " - Expecting: " + fieldVal); } } /* * Driver.Click(lnkCIDownloadBlank, "Click to download blank 'Commercial Invoice'"); * filePath = downloadPath + confirmNumber + "_CommercialInvoice.pdf"; * if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) * { * Assert.Fail("Blank CI could not be downloaded or found at: " + filePath); * } * else * { * string CI = PDFExtractor.ExtractTextFromPDF(filePath).Replace(",", ""); * Assert.IsFalse(Util.ContainsStrCaseInsensitive(CI, confirmNumber.ToString()), "Check if blank CI has confirmation number - Expecting: " + confirmNumber.ToString()); * foreach (var docField in InputData.Data.ciLTL.GetType().GetProperties()) * { * string fieldVal = docField.GetValue(InputData.Data.ciLTL).ToString(); * Assert.IsFalse(Util.ContainsStrCaseInsensitive(CI, fieldVal), "Check if BOL has " + docField.Name + " - Expecting: " + fieldVal); * } * } */ }
public void CheckNAFTADocument(long confirmNumber, string downloadPath) { Driver.Click(lnkNAFTADownload, "Click to download 'NAFTA Document'"); string filePath = downloadPath + confirmNumber + "_NAFTADocument.pdf"; if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) { Assert.Fail("NAFTA could not be downloaded or found at: " + filePath); } else { string NAFTA = PDFExtractor.ExtractTextFromPDF(filePath).Replace(",", ""); foreach (var docField in InputData.Data.naftaLTL.GetType().GetProperties()) { string fieldVal = docField.GetValue(InputData.Data.naftaLTL).ToString(); Assert.IsTrue(Util.ContainsStrCaseInsensitive(NAFTA, fieldVal), "Check if NAFTA has " + docField.Name + " - Expecting: " + fieldVal); } } /* * Driver.Click(lnkNAFTADownloadBlank, "Click to download blank 'NAFTA Document'"); * filePath = downloadPath + confirmNumber + "_NAFTADocument.pdf"; * if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) * { * Assert.Fail("Blank NAFTA could not be downloaded or found at: " + filePath); * } * else * { * string NAFTA = PDFExtractor.ExtractTextFromPDF(filePath).Replace(",", ""); * foreach (var docField in InputData.Data.naftaLTL.GetType().GetProperties()) * { * string fieldVal = docField.GetValue(InputData.Data.naftaLTL).ToString(); * Assert.IsFalse(Util.ContainsStrCaseInsensitive(NAFTA, fieldVal), "Check if NAFTA has " + docField.Name + " - Expecting: " + fieldVal); * } * } */ }
public void CheckBOLDocumentLTL(long confirmNumber, string downloadPath, bool isCanada = false, bool isHazMat = false) { Driver.ScrollDown(); Driver.Click(lnkBOLDownloadLTL, "Click to download 'Bill of Lading'"); string filePath = downloadPath + confirmNumber + "_BillOfLading.pdf"; if (PDFExtractor.WaitForFileDownloadKnownFileName(filePath) == false) { Assert.Fail("BOL could not be downloaded or found at: " + filePath); } else { string BOL = PDFExtractor.ExtractTextFromPDF(filePath); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, confirmNumber.ToString()), "Check if BOL has confirmation number - Expecting: " + confirmNumber.ToString()); if (isCanada == false) { Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, InputData.Data.strFullCitySource.ToString()), "Check if BOL has pickup city state zip - Expecting " + InputData.Data.strFullCitySource.ToString()); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, InputData.Data.CarrierName.ToString()), "Check if BOL has carrier name - Expecting " + InputData.Data.CarrierName.ToString()); } else { Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, InputData.Data.strFullCitySourceCAN.ToString()), "Check if BOL has Canadian pickup city state zip - Expecting " + InputData.Data.strFullCitySourceCAN.ToString()); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, InputData.Data.CarrierNameCAN.ToString()), "Check if BOL has carrier name - Expecting " + InputData.Data.CarrierNameCAN.ToString()); } if (isHazMat == true) { Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, "PGII"), "Check if BOL has PGII"); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, "RQ, UN3456"), "Check if BOL has RQ, UNXXXX"); } foreach (var docField in InputData.Data.bolLTL.GetType().GetProperties()) { string fieldVal = docField.GetValue(InputData.Data.bolLTL).ToString(); Assert.IsTrue(Util.ContainsStrCaseInsensitive(BOL, fieldVal), "Check if BOL has " + docField.Name + " - Expecting: " + fieldVal); } } }
public async Task ImportAsync(ZebraDBManager manager, FileInfo file, bool _override = false) { bool alreadyExists = false; Sheet existingSheet = null; //Check if Sheet is already in Database foreach (var sheet in Piece.Sheet) { Progress = -1; if (sheet.Part == Part) { //If no overwriting is intended, throw exception if (_override == false) { throw new SheetAlreadyExistsException(sheet); } //If overwriting is intended, set bool to true, so that the sheet does not get created twice in the database alreadyExists = true; existingSheet = sheet; } } this.Progress = 25; if (alreadyExists == false) { //Create database entry (only if sheet does not exists yet) var _newsheet = manager.NewSheet(Piece.PieceID, Part.PartID); this.Progress = 50; //Extract page PDFExtractor extractor = new PDFExtractor() { InputFile = file, OutputName = _newsheet.SheetID.ToString().PadLeft(8, '0'), OutputPath = new DirectoryInfo(manager.ZebraConfig.TempDir) }; foreach (int page in Pages) { extractor.AddPage(page); } this.Progress = 75; try { //Extract the page from the batch await extractor.ExtractAsync(); } catch (Exception) { manager.Context.Remove <Sheet>(_newsheet); Progress = -1; throw; } this.Progress = 75; ProcessFile(manager, _newsheet.SheetID.ToString().PadLeft(8, '0'), _newsheet); this.Progress = 100; IsImported = true; } else if (alreadyExists == true) { this.Progress = 50; //Extract page PDFExtractor extractor = new PDFExtractor() { InputFile = file, OutputName = existingSheet.SheetID.ToString().PadLeft(8, '0'), OutputPath = new DirectoryInfo(manager.ZebraConfig.TempDir) }; this.Progress = 75; try { //Extract the page from the batch await extractor.ExtractAsync(Pages.ToArray()); } catch (Exception) { Progress = -1; throw; } ProcessFile(manager, existingSheet.SheetID.ToString().PadLeft(8, '0'), existingSheet, FileImportMode.Copy, true); this.Progress = 100; IsImported = true; } }