/// <summary> /// Counts the number of pages. /// </summary> /// <param name="pdfFilePath">The PDF file path.</param> /// <returns>The number of pages.</returns> internal static int CountNumberOfPages(string pdfFilePath) { using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(pdfFilePath)) { return(document.NumberOfPages); } }
public static void GetElements(string fileFullName) { try { using (var stream = File.OpenRead(fileFullName)) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { FileStream fs = new FileStream(System.IO.Path.GetDirectoryName(fileFullName) + "\\result_pig.txt", FileMode.Create); StreamWriter sw = new StreamWriter(fs); foreach (var page in document.GetPages()) { string txt = page.Text; sw.Write(txt); } sw.Flush(); sw.Close(); } } catch (System.Exception e) { Console.WriteLine(e.Message); } }
public string Convert(string fileName) { using (var stream = File.OpenRead(fileName)) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { var page = document.GetPage(2); return(page.Text); } }
public string PdfDownloadMethod() { using (var stream = File.OpenRead(@"D:\Textfiles\KARTAPOSTACI.pdf")) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { var page = document.GetPage(1); return(string.Join(" ", page.GetWords())); } }
public Dictionary <string, string> getChallanDetails(string filePath, string pan) { Dictionary <string, string> challanDet = new Dictionary <string, string>(); //PDFParser pdfParser = new PDFParser(); //PdfReader reader = new PdfReader(@filePath); //var text = new PDFParser().ExtractTextFromPDFBytes(reader.GetPageContent(1)).Trim().ToString(); string text; using (var stream = File.OpenRead(filePath)) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { var page = document.GetPage(1); text = string.Join(" ", page.GetWords()); } Console.WriteLine(text); var serialNo = GetWordAfterMatch(text, "Challan Serial No."); Console.WriteLine("Challan Serial NO :" + serialNo); var paninDoc = GetWordAfterMatch(text, "PAN:"); if (pan != paninDoc.ToString()) { return(challanDet); } challanDet.Add("serialNo", serialNo.ToString()); //var itns = GetWordAfterMatch(text, "Challan No./ITNS"); //Console.WriteLine("ITNS :" + itns); var tenderDate = GetWordAfterMatch(text, "Tender Date"); challanDet.Add("tenderDate", tenderDate.ToString()); var challamAmount = GetWordAfterMatch(text, "Rs. :"); challanDet.Add("challanAmount", challamAmount.ToString()); // var PAN = "BUZPP5880P"; //todo pass the pan number // pan = "ADMPC7474M"; var tds = GetTDSConfirmationNo(text, pan); Console.WriteLine("tds conf NO :" + tds); challanDet.Add("acknowledge", tds.ToString()); Console.ReadLine(); return(challanDet); }
private List <ReportStructure> GetDataFromPdf(string filename) { var results = new List <ReportStructure>(); var pageNumber = 1; string pdfText; using (var stream = File.OpenRead(filename)) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { do { pageNumber++; var page = document.GetPage(pageNumber); pdfText = string.Join(" ", page.GetWords()); } while (!pdfText.Contains(TextToLocateTable)); } foreach (var regionName in RegionNameList) { var index = pdfText.IndexOf(regionName); var array = pdfText.Substring(index + regionName.Length + 1).Split(" "); results.Add(new ReportStructure { RegionName = regionName == "I alt" ? "Total" : regionName, DeliveredDoses = ConvertNumber(TryGetText(array, 0)), AppliedDoses = EnableAppliedDoses ? ConvertNumber(TryGetText(array, 1)) : 0, PercentagePopulationVaccinated = EnablePercentagePopulationVaccinated ? ConvertPercentage(TryGetText(array, 2)) : 0, BothDosesApplied = EnableBothDosesApplied ? ConvertNumber(TryGetText(array, 3)) : 0, PercentagePopulationBothDoses = EnablePercentagePopulationBothDoses ? ConvertPercentage(TryGetText(array, 4)) : 0, PercentageOverDelivered = EnablePercentageOverDelivered ? ConvertPercentage(TryGetText(array, 5)) : 0 }); } return(results); }
public Dictionary <string, string> GetForm16bDetailsFromPDF(string filePath, string pan) { // pan = "AMSPA9519Q"; Dictionary <string, string> form16bDet = new Dictionary <string, string>(); //PDFParser pdfParser = new PDFParser(); //PdfReader reader = new PdfReader(@filePath); //var text = new PDFParser().ExtractTextFromPDFBytes(reader.GetPageContent(1)).Trim().ToString(); //Console.WriteLine(text); string text; using (var stream = File.OpenRead(filePath)) using (UglyToad.PdfPig.PdfDocument document = UglyToad.PdfPig.PdfDocument.Open(stream)) { var page = document.GetPage(1); text = string.Join(" ", page.GetWords()); } // var certNo = GetCertificateNoAfterMatch(text, pan); var certNo = GetWordAfterMatch(text, "Certificate No.:"); form16bDet.Add("certNo", certNo.ToString()); // var datePattern = string.Format(@"\b\w*" + pan + @"\w*\s+\w+\s+\w+(-)\w+\s+\w+\s+\w+(-)\w+(-)\w+\b"); //string match = Regex.Match(text, @datePattern).Groups[0].Value; //string[] dateArry = match.Split(' '); //string date = dateArry[dateArry.Length - 1]; //form16bDet.Add("paymentDate", date); var datePattern = string.Format(@"\b\w*" + "Updated On:" + @"\s\w*(-)\w+(-)\w+\b"); string match = Regex.Match(text, @datePattern).Groups[0].Value; string[] dateArry = match.Split(':'); string date = dateArry[dateArry.Length - 1]; form16bDet.Add("paymentDate", date.Trim()); //var namePattern = string.Format(@"\b\w*" + pan + @"\w*\s+\w+\s+\w+(-)\w+\s+\w+\s+\w+(-)\w+(-)\w+[\s+\w+]*,"); //string nameMatch = Regex.Match(text, @namePattern).Groups[0].Value; //string[] nameArray = nameMatch.Split(' '); //string name = ""; //int inx = nameArray.Length - 5; //for (int i = 0; i < inx-1; i++) //{ // name += nameArray[5 + i] + " "; //} //form16bDet.Add("name", name.Split(',')[0]); var namePattern = string.Format(@"\b\w*" + "Full Name:" + @"(.*)"); string nameMatch = Regex.Match(text, @namePattern).Groups[1].Value; string[] nameArray = nameMatch.Split(new string[] { "Page" }, StringSplitOptions.None); form16bDet.Add("name", nameArray[0].Trim()); //var amountPattern = string.Format(@"\b\w*sum of Rs.\w*\s+\w*.\w*"); //string amountMatch = Regex.Match(text, @amountPattern).Groups[0].Value; //string[] amountArry = amountMatch.Split(' '); //string amount = amountArry[amountArry.Length-1]; //amount = amount.Substring(3, amount.Length - 3); //form16bDet.Add("amount", amount); var amountPattern = string.Format(@"\b\w*sum of Rs.\w*\s+\w*.\w*"); string amountMatch = Regex.Match(text, @amountPattern).Groups[0].Value; string[] amountArry = amountMatch.Split(' '); string amount = amountArry[amountArry.Length - 1]; form16bDet.Add("amount", amount); return(form16bDet); }