protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("words-with-hyphens.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; Rectangle location; int pageIndex; Response.Write("Searching for \"hyphen\" string:<br><br>"); // Search for "hyphen" string if (extractor.Find(0, "hyphen", false)) { do { Response.Write("Found at location " + extractor.FoundText.Bounds.ToString() + "<br>"); } while (extractor.FindNext()); } Response.End(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("words-with-hyphens.pdf"); int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Search each page for "hyphen" string if (extractor.Find(i, "hyphen", false)) { do { Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); }while (extractor.FindNext()); } } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
private void Button_Find(object sender, RoutedEventArgs e) { if (textBoxFind.Text.Length > 0) { StringBuilder builder = new StringBuilder(); builder.AppendLine("Searching for \"" + textBoxFind.Text + "\""); if (extractor.Find(0, textBoxFind.Text, false)) { do { builder.AppendLine(""); builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); builder.AppendLine(""); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); builder.AppendLine("Text: " + element.Text); builder.AppendLine("Font is bold: " + element.FontIsBold); builder.AppendLine("Font is italic:" + element.FontIsItalic); builder.AppendLine("Font name: " + element.FontName); builder.AppendLine("Font size:" + element.FontSize); builder.AppendLine("Font color:" + element.FontColor); } }while (extractor.FindNext()); } builder.AppendLine("Finished."); textBox1.Text = builder.ToString(); } }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample2.pdf"); int pageCount = extractor.GetPageCount(); // Search each page for some keyword for (int i = 0; i < pageCount; i++) { if (extractor.Find(i, "References", false)) { // If page contains the keyword, extract a text from it. // For demonstration we'll extract the text from top part of the page only extractor.SetExtractionArea(0, 0, 600, 200); string text = extractor.GetTextFromPage(i); Console.WriteLine(text); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { try { // Get all settings VM var allSettings = GetSettingsVM("settings.json"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; foreach (var fileName in Directory.GetFiles("InputFiles")) { // Load sample PDF document extractor.LoadDocumentFromFile(fileName); // Enable regex search extractor.RegexSearch = true; // Get Number of pages PDF contains int pageCount = extractor.GetPageCount(); for (int iPage = 0; iPage < pageCount; iPage++) { // Loop through all search settings foreach (var itmSearchSetting in allSettings.Settings) { // If found, then copy file to sub-category folder if (extractor.Find(iPage, itmSearchSetting.regex, false)) { // If Directory does not exists, then create them if (!Directory.Exists($"{allSettings.MainFolderName}/{itmSearchSetting.category}")) { Directory.CreateDirectory($"{allSettings.MainFolderName}/{itmSearchSetting.category}"); } // Copy File File.Copy(fileName, $"{allSettings.MainFolderName}/{itmSearchSetting.category}/{Path.GetFileName(fileName)}", true); } } } } // Cleanup extractor.Dispose(); } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); } Console.WriteLine(); Console.WriteLine("Press enter key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\Invoice.pdf"); extractor.RegexSearch = true; // Enable the regular expressions int pageCount = extractor.GetPageCount(); // Search through pages for (int i = 0; i < pageCount; i++) { // Search dates in format 12/31/1999 string regexPattern = "[0-9]{2}/[0-9]{2}/[0-9]{4}"; // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx // Search each page for the pattern if (extractor.Find(i, regexPattern, false)) { do { Console.WriteLine(""); Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds); Console.WriteLine(""); // Iterate through each element in the found text foreach (ISearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine(" Text: " + element.Text); Console.WriteLine(" Font is bold: " + element.FontIsBold); Console.WriteLine(" Font is italic: " + element.FontIsItalic); Console.WriteLine(" Font name: " + element.FontName); Console.WriteLine(" Font size: " + element.FontSize); Console.WriteLine(" Font color: " + element.FontColor); Console.WriteLine(); } }while (extractor.FindNext()); } } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { const string inputFile = @"sample.pdf"; const int pageIndex = 0; const float renderingResolution = 300f; const string searchPattern = "\\d+\\.\\d+"; // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { textExtractor.RegexSearch = true; textExtractor.LoadDocumentFromFile(inputFile); // Preapre RasterRenderer using (RasterRenderer rasterRenderer = new RasterRenderer("demo", "demo")) { rasterRenderer.LoadDocumentFromFile(inputFile); // Render document page to image Image image = rasterRenderer.GetImage(pageIndex, renderingResolution); // Prepare highlight brush Brush highlightBrush = new SolidBrush(Color.FromArgb(128, Color.Yellow)); using (Graphics graphics = Graphics.FromImage(image)) { // Search for pattern and paint found piecese if (textExtractor.Find(pageIndex, searchPattern, caseSensitive: false)) { do { foreach (var foundPiece in textExtractor.FoundText.Elements) { // Convert from document Points to pixels Rectangle pixelRect = new Rectangle( (int)(foundPiece.Bounds.Left / 72f * renderingResolution), (int)(foundPiece.Bounds.Top / 72f * renderingResolution), (int)(foundPiece.Bounds.Width / 72f * renderingResolution), (int)(foundPiece.Bounds.Height / 72f * renderingResolution) ); // Paint rectangle graphics.FillRectangle(highlightBrush, pixelRect); } } while (textExtractor.FindNext()); } } image.Save("result.png"); Process.Start("result.png"); } } }
/* * IF YOU SEE TEMPORARY FOLDER ACCESS ERRORS: * * Temporary folder access is required for web application when you use ByteScout SDK in it. * If you are getting errors related to the access to temporary folder like "Access to the path 'C:\Windows\TEMP\... is denied" then you need to add permission for this temporary folder to make ByteScout SDK working on that machine and IIS configuration because ByteScout SDK requires access to temp folder to cache some of its data for more efficient work. * * SOLUTION: * * If your IIS Application Pool has "Load User Profile" option enabled the IIS provides access to user's temp folder. Check user's temporary folder * * If you are running Web Application under an impersonated account or IIS_IUSRS group, IIS may redirect all requests into separate temp folder like "c:\temp\". * * In this case * - check the User or User Group your web application is running under * - then add permissions for this User or User Group to read and write into that temp folder (c:\temp or c:\windows\temp\ folder) * - restart your web application and try again * */ protected void Page_Load(object sender, EventArgs e) { String inputFile = Server.MapPath(@".\bin\sample1.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); // Set the matching mode. // WordMatchingMode.None - treats the search string as substring // WordMatchingMode.ExactMatch - treats the search string as separate word // WordMatchingMode.SmartMatch - will find the word in various forms (like Adobe Reader). extractor.WordMatchingMode = WordMatchingMode.ExactMatch; Response.Clear(); Response.ContentType = "text/html"; Response.Write("Searching for \"ipsum\" string:<br>"); // Search for "ipsum" string if (extractor.Find(0, "ipsum", false)) { do { Response.Write("<br/>"); Response.Write("Found on page 1 at location " + extractor.FoundText.Bounds + "<br/>"); Response.Write("<br/>"); // The found text may be splitted to parts. // Iterate through each part of the found text. for (var i = 0; i < extractor.FoundText.Elements.Count; i++) { ISearchResultElement element = extractor.FoundText.Elements[i]; Response.Write("Element #" + i + " at " + element.Bounds + "<br/>"); Response.Write("Text: " + element.Text + "<br/>"); Response.Write("Font is bold: " + element.FontIsBold + "<br/>"); Response.Write("Font is italic:" + element.FontIsItalic + "<br/>"); Response.Write("Font name: " + element.FontName + "<br/>"); Response.Write("Font size:" + element.FontSize + "<br/>"); Response.Write("Font color:" + element.FontColor + "<br/>"); } }while (extractor.FindNext()); } Response.End(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample1.pdf"); // Set the matching mode. // WordMatchingMode.None - treats the search string as substring // WordMatchingMode.ExactMatch - treats the search string as separate word // WordMatchingMode.SmartMatch - will find the word in various forms (like Adobe Reader). extractor.WordMatchingMode = WordMatchingMode.ExactMatch; int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Search each page for "ipsum" string if (extractor.Find(i, "ipsum", false)) { do { Console.WriteLine(""); Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); Console.WriteLine(""); // Iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); Console.WriteLine("Text: " + element.Text); Console.WriteLine("Font is bold: " + element.FontIsBold); Console.WriteLine("Font is italic:" + element.FontIsItalic); Console.WriteLine("Font name: " + element.FontName); Console.WriteLine("Font size:" + element.FontSize); Console.WriteLine("Font color:" + element.FontColor); } }while (extractor.FindNext()); } } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample1.pdf"); int pageCount = extractor.GetPageCount(); extractor.RegexSearch = true; // ' turn on the regular expression search // search through pages for (int i = 0; i < pageCount; i++) { // searches for the text starting from LABORIS and ending with VELIT words string regexPattern = "LABORIS.*VELIT"; // see the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx // Search each page for the pattern if (extractor.Find(i, regexPattern, false)) { do { Console.WriteLine(""); Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); Console.WriteLine(""); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); Console.WriteLine("Text: " + element.Text); Console.WriteLine("Font is bold: " + element.FontIsBold); Console.WriteLine("Font is italic:" + element.FontIsItalic); Console.WriteLine("Font name: " + element.FontName); Console.WriteLine("Font size:" + element.FontSize); Console.WriteLine("Font color:" + element.FontColor); } }while (extractor.FindNext()); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { try { // Create Bytescout.PDFExtractor.TextExtractor instance using (TextExtractor extractor = new TextExtractor()) { extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("SampleInvoice.pdf"); extractor.RegexSearch = true; // Enable the regular expressions int pageCount = extractor.GetPageCount(); // Search through pages for (int i = 0; i < pageCount; i++) { // Search credit card number in format of (XXXX XXXX XXXX XXXX) string regexPattern = @"[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}"; // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx // Search each page for the pattern if (extractor.Find(i, regexPattern, false)) { do { // Iterate through each element in the found text foreach (ISearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Found Credit Card Number: " + element.Text); } }while (extractor.FindNext()); } } } } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); } Console.WriteLine(); Console.WriteLine("Press enter key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { const string inputFile = @"sample.pdf"; const int pageIndex = 0; const string searchPattern = "\\d+\\.\\d+"; // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { textExtractor.RegexSearch = true; textExtractor.LoadDocumentFromFile(inputFile); // Load document with PDF SDK using (Document pdfDocument = new Document(inputFile)) { pdfDocument.RegistrationName = "demo"; pdfDocument.RegistrationKey = "demo"; Page pdfDocumentPage = pdfDocument.Pages[pageIndex]; Canvas canvas = pdfDocumentPage.Canvas; SolidBrush fillBrush = new SolidBrush(new ColorRGB(255, 0, 0)); fillBrush.Opacity = 50; // make the brush transparent // Search for pattern and highlight found pieces if (textExtractor.Find(pageIndex, searchPattern, caseSensitive: false)) { do { foreach (var foundPiece in textExtractor.FoundText.Elements) { // Inflate the rectangle a bit RectangleF rect = RectangleF.Inflate(foundPiece.Bounds, 1, 2); // Draw rectangle over the PDF page canvas.DrawRectangle(fillBrush, rect); } } while (textExtractor.FindNext()); } // Save as new PDF document pdfDocument.Save("result.pdf"); // Open result document in default associated application (for demo purposes) Process.Start("result.pdf"); } } }
protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("sample1.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; Rectangle location; int pageIndex; Response.Write("Searching for \"ipsum\" string:<br><br>"); // Search for "ipsum" string if (extractor.Find(0, "ipsum", false)) { do { Response.Write("<br/>"); Response.Write("Found on page 1 at location " + extractor.FoundText.Bounds.ToString() + "<br/>"); Response.Write("<br/>"); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>"); Response.Write("Text: " + element.Text + "<br/>"); Response.Write("Font is bold: " + element.FontIsBold + "<br/>"); Response.Write("Font is italic:" + element.FontIsItalic + "<br/>"); Response.Write("Font name: " + element.FontName + "<br/>"); Response.Write("Font size:" + element.FontSize + "<br/>"); Response.Write("Font color:" + element.FontColor + "<br/>"); } }while (extractor.FindNext()); } Response.End(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample1.pdf"); int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Search each page for "ipsum" string if (extractor.Find(i, "ipsum", false)) { do { Console.WriteLine(""); Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); Console.WriteLine(""); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); Console.WriteLine("Text: " + element.Text); Console.WriteLine("Font is bold: " + element.FontIsBold); Console.WriteLine("Font is italic:" + element.FontIsItalic); Console.WriteLine("Font name: " + element.FontName); Console.WriteLine("Font size:" + element.FontSize); Console.WriteLine("Font color:" + element.FontColor); } }while (extractor.FindNext()); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); }
static void Main(string[] args) { TextExtractor extractor = new TextExtractor("demo", "demo"); // Load the document extractor.LoadDocumentFromFile("sample2.pdf"); // Smart match the search string like Adobe Reader extractor.WordMatchingMode = WordMatchingMode.SmartMatch; string searchString = "land"; // Get page count int pageCount = extractor.GetPageCount(); // Iterate through pages for (int i = 0; i < pageCount; i++) { // Search for text string if (extractor.Find(i, searchString, false)) { do { // Output search results Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); // Now we are getting the found text string extractedString = extractor.FoundText.Text; Console.WriteLine("Found text: " + extractedString); }while (extractor.FindNext()); // Search next occurrence of the search string } } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(); }
static void Main(string[] args) { string inputFile = @".\sample2.pdf"; // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); int pageCount = extractor.GetPageCount(); // Search each page for a keyword for (int i = 0; i < pageCount; i++) { if (extractor.Find(i, "bombardment", false)) { // Extract page using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo")) { splitter.OptimizeSplittedDocuments = true; int pageNumber = i + 1; // (!) page number in ExtractPage() is 1-based string outputFile = @".\page" + pageNumber + ".pdf"; splitter.ExtractPage(inputFile, outputFile, pageNumber); Console.WriteLine("Extracted page " + pageNumber + " to file \"" + outputFile + "\""); } } } // Cleanup extractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); }
private void Button_Find(object sender, RoutedEventArgs e) { if (textBoxFind.Text.Length > 0) { StringBuilder builder = new StringBuilder(); builder.AppendLine("Searching for \"" + textBoxFind.Text + "\""); if (extractor.Find(0, textBoxFind.Text, false)) { do { builder.AppendLine("Found on page 0 at location " + extractor.FoundText.Location.ToString()); }while (extractor.FindNext()); } builder.AppendLine("Finished."); textBox1.Text = builder.ToString(); } }
static void Main(string[] args) { TextExtractor extractor = new TextExtractor("demo", "demo"); // load the document extractor.LoadDocumentFromFile("sample2.pdf"); string searchString = "what"; // get page count int pageCount = extractor.GetPageCount(); int count = 0; // iterate through pages for (int i = 0; i < pageCount; i++) { // search for text string if (extractor.Find(i, searchString, false)) { do { count++; // output search results Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); // now we are getting the found text string extractedString = extractor.FoundText.Text; Console.WriteLine("Extracted string: " + extractedString); }while (extractor.FindNext()); // search next occurance of the search string } } Console.WriteLine("Press any key to exit..."); Console.ReadKey(); }
static void Main(string[] args) { // Create TextExtractor instance TextExtractor textExtractor = new TextExtractor("demo", "demo"); textExtractor.WordMatchingMode = WordMatchingMode.ExactMatch; // Set exact search (default is SmartSearch that works like in Adobe Reader) // Create XMLExtractor instance XMLExtractor xmlExtractor = new XMLExtractor("demo", "demo"); // Load document textExtractor.LoadDocumentFromFile("Invoice.pdf"); xmlExtractor.LoadDocumentFromFile("Invoice.pdf"); // Results string invoiceNo = string.Empty; string invoiceDate = string.Empty; string total = string.Empty; string tableData = string.Empty; // Iterate pages for (int i = 0; i < textExtractor.GetPageCount(); i++) { RectangleF pageRectangle = textExtractor.GetPageRectangle(i); RectangleF tableRect = new RectangleF(0, 0, pageRectangle.Width, 0); // Search for "Invoice No." if (textExtractor.Find(i, "Invoice No.", false)) { // Get the found text rectangle RectangleF textRect = textExtractor.FoundText.Bounds; // Assume the text at right is the invoice number. // Shift the rectangle to the right: textRect.X = textRect.Right; textRect.Width = pageRectangle.Right - textRect.Left; // Set the extraction region and extract the text textExtractor.SetExtractionArea(textRect); invoiceNo = textExtractor.GetTextFromPage(i).Trim(); } // Search for "Invoice Date" and extract text at right if (textExtractor.Find(i, "Invoice Date", false)) { RectangleF textRect = textExtractor.FoundText.Bounds; textRect.X = textRect.Right; textRect.Width = pageRectangle.Right - textRect.Left; textExtractor.SetExtractionArea(textRect); invoiceDate = textExtractor.GetTextFromPage(i).Trim(); } // Search for "Quantity" keyword to detect the top of the tabular data rectangle if (textExtractor.Find(i, "Quantity", false)) { // Keep the top table coordinate tableRect.Y = textExtractor.FoundText.Bounds.Top; // use textRect.Bottom if you want to skip column headers } // Search for "TOTAL" (it will be also the bottom of tabular data rectangle) if (textExtractor.Find(i, "TOTAL", true /* case sensitive! */)) { RectangleF textRect = textExtractor.FoundText.Bounds; textRect.X = textRect.Right; textRect.Width = pageRectangle.Right - textRect.Left; textExtractor.SetExtractionArea(textRect); total = textExtractor.GetTextFromPage(i).Trim(); // Calculate the table height tableRect.Height = textRect.Top - tableRect.Top; } // Extract tabular data using XMLExtractor if (tableRect.Height > 0) { xmlExtractor.SetExtractionArea(tableRect); tableData = xmlExtractor.GetXMLFromPage(i); } } // Display extracted data Console.WriteLine("Invoice No.: " + invoiceNo); Console.WriteLine("Invoice Date: " + invoiceDate); Console.WriteLine("TOTAL: " + total); Console.WriteLine("Table Data: "); Console.WriteLine(tableData); Console.WriteLine("Press any key..."); Console.ReadKey(); }