private void BtnProceed_Click(object sender, EventArgs e) { var outputFile = "output.pdf"; MemoryStream tempStream = new MemoryStream(); // Create `Bytescout.PDFExtractor.Remover2` instance using (Remover2 remover = new Remover2("demo", "demo")) { // Load document into remover remover.LoadDocumentFromFile(pdfViewerControl1.InputFile); // Set options from UI remover.MaskRemovedText = cbMaskRemovedText.Checked; remover.MakePDFUnsearchable = cbMakeUnsearchable.Checked; // Add fragments to remove foreach (KeyValuePair <int, RectangleF[]> keyValuePair in _foundTextRectangles) { remover.AddTextToRemove(keyValuePair.Key, keyValuePair.Value); } // Perform removal and save result document to file remover.PerformRemoval(outputFile); } // Open output PDF file in default associated application Process.Start(outputFile); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.Remover2 instance Remover2 remover = new Remover2("demo", "demo"); // Mask removed text, which ultimately black out region remover.MaskRemovedText = true; // Load sample PDF document remover.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf"); // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { // Load document into TextExtractor textExtractor.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf"); // Enable RegexSearch textExtractor.RegexSearch = true; // Set word matching options textExtractor.WordMatchingMode = WordMatchingMode.None; // Search SSN in format 202-55-0130 string regexPatternSSN = "[0-9]{3}-[0-9]{2}-[0-9]{4}"; // Search email Addresses string regexPatternEmail = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b"; // Search results for SSN ISearchResult[] searchResultsSSN = textExtractor.FindAll(0, regexPatternSSN, caseSensitive: false); // Search results for Email ISearchResult[] searchResultEmail = textExtractor.FindAll(0, regexPatternEmail, caseSensitive: false); // Remove SSN result text objects find by SearchResults. remover.AddTextToRemove(searchResultsSSN); // Remove Email result text objects find by SearchResults. remover.AddTextToRemove(searchResultEmail); // Perform removal of specified objects remover.PerformRemoval(@"result1.pdf"); } // Open output file in default application ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); // Clean up. remover.Dispose(); }
static void Main(string[] args) { // Create Bytescout.PDFExtractor.Remover2 instance Remover2 remover = new Remover2("demo", "demo"); // Mask removed text, which ultimately black out region remover.MaskRemovedText = true; // Load sample PDF document remover.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf"); // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { // Load document into TextExtractor textExtractor.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf"); // Search SSN in format 202-55-0130 // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}"; // Enable RegexSearch textExtractor.RegexSearch = true; // Set word matching options textExtractor.WordMatchingMode = WordMatchingMode.None; // Search results ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false); // Remove text objects find by SearchResults. remover.AddTextToRemove(searchResults); // Perform removal of specified objects remover.PerformRemoval(@"result1.pdf"); } // Open output file in default application ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); // Clean up. remover.Dispose(); }
private void BtnRemoveTextInSelectedRegions_Click(object sender, EventArgs e) { using (Remover2 remover = new Remover2("demo", "demo")) { remover.LoadDocumentFromFile(pdfViewerControl1.InputFile); foreach (KeyValuePair <int, RectangleF[]> keyValuePair in _dictPageWiseRectangles) { remover.AddTextToRemove(keyValuePair.Key, keyValuePair.Value); } // Remove text from all selected rectangles remover.PerformRemoval("output.pdf"); } pdfViewerControl1.CurrentPageIndex = 0; MessageBox.Show("Output file created successfully!", "Success"); // Open output file Process.Start("output.pdf"); }
static void Main(string[] args) { MemoryStream searchablePDFStream = new MemoryStream(); // STEP-1: Make Searchable PDF // STEP-2: Get search text result from that searchable PDF // STEP-3: Remove sensitive data // Create Bytescout.PDFExtractor.SearchablePDFMaker instance using (var searchablePDFMaker = new SearchablePDFMaker("demo", "demo")) { // Load sample PDF document searchablePDFMaker.LoadDocumentFromFile("sampleScannedPDF_EmailAddress.pdf"); // Set the location of language data files searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"; // Set OCR language searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder // Set PDF document rendering resolution searchablePDFMaker.OCRResolution = 300; // Save extracted text to file searchablePDFMaker.MakePDFSearchable(searchablePDFStream); // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { // Load stream into TextExtractor textExtractor.LoadDocumentFromStream(searchablePDFStream); // Search email Addresses // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b"; // Enable RegexSearch textExtractor.RegexSearch = true; // Set word matching options textExtractor.WordMatchingMode = WordMatchingMode.None; ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false); // Create Bytescout.PDFExtractor.Remover instance using (var remover = new Remover2("demo", "demo")) { // Load sample PDF document remover.LoadDocumentFromStream(searchablePDFStream); // Mask removed text remover.MaskRemovedText = true; // Make output file unsearchable remover.MakePDFUnsearchable = true; // Provide text to remove remover.AddTextToRemove(searchResults); // Remove text objects find by SearchResults. remover.PerformRemoval("result1.pdf"); } } } // Open output file in default application ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }
static void Main(string[] args) { string inputDocument = @".\samplePDF_SSNNo.pdf"; string outputDocument = @".\samplePDF_SSNNo_edited.pdf"; try { // Create Bytescout.PDFExtractor.TextExtractor instance using (TextExtractor extractor = new TextExtractor("demo", "demo")) { // Create Bytescout.PDFExtractor.Remover2 instance using (Remover2 remover = new Remover2("demo", "demo")) { // Load sample PDF document extractor.LoadDocumentFromFile("samplePDF_SSNNo.pdf"); remover.LoadDocumentFromFile(inputDocument); extractor.RegexSearch = true; // Enable the regular expressions int pageCount = extractor.GetPageCount(); // Search through pages for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) { // Search SSN in format 202-55-0130 using regular expression. // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}"; // Search each page for the pattern ISearchResult[] searchResults = extractor.FindAll(pageIndex, regexPattern, caseSensitive: false); foreach (var element in searchResults) { Console.WriteLine("Found SSN No: " + element.Text); // Add rectangle of the found SSN to Remover remover.AddTextToRemove(pageIndex, element.Bounds); } } // Mask replaced text with black rectangle remover.MaskRemovedText = true; // Change the color of the mask rectangle, if necessary //remover.MaskColor = Color.Red; remover.PerformRemoval(outputDocument); Console.WriteLine("Found SSNs removed, result saved to file \"" + outputDocument + "\""); } } // Open result file in default associated application (for the demonstration purpose) var processStartInfo = new ProcessStartInfo(outputDocument) { UseShellExecute = true }; Process.Start(processStartInfo); } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); } Console.WriteLine(); Console.WriteLine("Press enter key to continue..."); Console.ReadLine(); }