Пример #1
0
        private void BtnProceed_Click(object sender, EventArgs e)
        {
            var          outputFile = "output.pdf";
            MemoryStream tempStream = new MemoryStream();

            // Create `Bytescout.PDFExtractor.Remover2` instance
            using (Remover2 remover = new Remover2("demo", "demo"))
            {
                // Load document into remover
                remover.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                // Set options from UI
                remover.MaskRemovedText     = cbMaskRemovedText.Checked;
                remover.MakePDFUnsearchable = cbMakeUnsearchable.Checked;

                // Add fragments to remove
                foreach (KeyValuePair <int, RectangleF[]> keyValuePair in _foundTextRectangles)
                {
                    remover.AddTextToRemove(keyValuePair.Key, keyValuePair.Value);
                }

                // Perform removal and save result document to file
                remover.PerformRemoval(outputFile);
            }

            // Open output PDF file in default associated application
            Process.Start(outputFile);
        }
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover2 instance
            Remover2 remover = new Remover2("demo", "demo");

            // Mask removed text, which ultimately black out region
            remover.MaskRemovedText = true;

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf");

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                // Search SSN in format 202-55-0130
                string regexPatternSSN = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                // Search email Addresses
                string regexPatternEmail = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";

                // Search results for SSN
                ISearchResult[] searchResultsSSN = textExtractor.FindAll(0, regexPatternSSN, caseSensitive: false);

                // Search results for Email
                ISearchResult[] searchResultEmail = textExtractor.FindAll(0, regexPatternEmail, caseSensitive: false);

                // Remove SSN result text objects find by SearchResults.
                remover.AddTextToRemove(searchResultsSSN);

                // Remove Email result text objects find by SearchResults.
                remover.AddTextToRemove(searchResultEmail);

                // Perform removal of specified objects
                remover.PerformRemoval(@"result1.pdf");
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);

            // Clean up.
            remover.Dispose();
        }
Пример #3
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover2 instance
            Remover2 remover = new Remover2("demo", "demo");

            // Mask removed text, which ultimately black out region
            remover.MaskRemovedText = true;

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

                // Search SSN in format 202-55-0130
                // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                // Search results
                ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                // Remove text objects find by SearchResults.
                remover.AddTextToRemove(searchResults);

                // Perform removal of specified objects
                remover.PerformRemoval(@"result1.pdf");
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);

            // Clean up.
            remover.Dispose();
        }
Пример #4
0
        private void BtnRemoveTextInSelectedRegions_Click(object sender, EventArgs e)
        {
            using (Remover2 remover = new Remover2("demo", "demo"))
            {
                remover.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                foreach (KeyValuePair <int, RectangleF[]> keyValuePair in _dictPageWiseRectangles)
                {
                    remover.AddTextToRemove(keyValuePair.Key, keyValuePair.Value);
                }

                // Remove text from all selected rectangles
                remover.PerformRemoval("output.pdf");
            }

            pdfViewerControl1.CurrentPageIndex = 0;

            MessageBox.Show("Output file created successfully!", "Success");

            // Open output file
            Process.Start("output.pdf");
        }
Пример #5
0
        static void Main(string[] args)
        {
            MemoryStream searchablePDFStream = new MemoryStream();

            // STEP-1: Make Searchable PDF
            // STEP-2: Get search text result from that searchable PDF
            // STEP-3: Remove sensitive data

            // Create Bytescout.PDFExtractor.SearchablePDFMaker instance
            using (var searchablePDFMaker = new SearchablePDFMaker("demo", "demo"))
            {
                // Load sample PDF document
                searchablePDFMaker.LoadDocumentFromFile("sampleScannedPDF_EmailAddress.pdf");

                // Set the location of language data files
                searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                // Set OCR language
                searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder

                // Set PDF document rendering resolution
                searchablePDFMaker.OCRResolution = 300;

                // Save extracted text to file
                searchablePDFMaker.MakePDFSearchable(searchablePDFStream);

                // Prepare TextExtractor
                using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
                {
                    // Load stream into TextExtractor
                    textExtractor.LoadDocumentFromStream(searchablePDFStream);

                    // Search email Addresses
                    // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                    string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";

                    // Enable RegexSearch
                    textExtractor.RegexSearch = true;

                    // Set word matching options
                    textExtractor.WordMatchingMode = WordMatchingMode.None;

                    ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                    // Create Bytescout.PDFExtractor.Remover instance
                    using (var remover = new Remover2("demo", "demo"))
                    {
                        // Load sample PDF document
                        remover.LoadDocumentFromStream(searchablePDFStream);

                        // Mask removed text
                        remover.MaskRemovedText = true;

                        // Make output file unsearchable
                        remover.MakePDFUnsearchable = true;

                        // Provide text to remove
                        remover.AddTextToRemove(searchResults);

                        // Remove text objects find by SearchResults.
                        remover.PerformRemoval("result1.pdf");
                    }
                }
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
Пример #6
0
        static void Main(string[] args)
        {
            string inputDocument  = @".\samplePDF_SSNNo.pdf";
            string outputDocument = @".\samplePDF_SSNNo_edited.pdf";

            try
            {
                // Create Bytescout.PDFExtractor.TextExtractor instance
                using (TextExtractor extractor = new TextExtractor("demo", "demo"))
                {
                    // Create Bytescout.PDFExtractor.Remover2 instance
                    using (Remover2 remover = new Remover2("demo", "demo"))
                    {
                        // Load sample PDF document
                        extractor.LoadDocumentFromFile("samplePDF_SSNNo.pdf");
                        remover.LoadDocumentFromFile(inputDocument);

                        extractor.RegexSearch = true; // Enable the regular expressions

                        int pageCount = extractor.GetPageCount();

                        // Search through pages
                        for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
                        {
                            // Search SSN in format 202-55-0130 using regular expression.
                            // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                            string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                            // Search each page for the pattern
                            ISearchResult[] searchResults = extractor.FindAll(pageIndex, regexPattern, caseSensitive: false);

                            foreach (var element in searchResults)
                            {
                                Console.WriteLine("Found SSN No: " + element.Text);
                                // Add rectangle of the found SSN to Remover
                                remover.AddTextToRemove(pageIndex, element.Bounds);
                            }
                        }

                        // Mask replaced text with black rectangle
                        remover.MaskRemovedText = true;
                        // Change the color of the mask rectangle, if necessary
                        //remover.MaskColor = Color.Red;

                        remover.PerformRemoval(outputDocument);

                        Console.WriteLine("Found SSNs removed, result saved to file \"" + outputDocument + "\"");
                    }
                }

                // Open result file in default associated application (for the demonstration purpose)
                var processStartInfo = new ProcessStartInfo(outputDocument)
                {
                    UseShellExecute = true
                };
                Process.Start(processStartInfo);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: " + ex.Message);
            }

            Console.WriteLine();
            Console.WriteLine("Press enter key to continue...");
            Console.ReadLine();
        }