static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover2 instance
            Remover2 remover = new Remover2("demo", "demo");

            // Mask removed text, which ultimately black out region
            remover.MaskRemovedText = true;

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_EmailSSN.pdf");

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                // Search SSN in format 202-55-0130
                string regexPatternSSN = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                // Search email Addresses
                string regexPatternEmail = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";

                // Search results for SSN
                ISearchResult[] searchResultsSSN = textExtractor.FindAll(0, regexPatternSSN, caseSensitive: false);

                // Search results for Email
                ISearchResult[] searchResultEmail = textExtractor.FindAll(0, regexPatternEmail, caseSensitive: false);

                // Remove SSN result text objects find by SearchResults.
                remover.AddTextToRemove(searchResultsSSN);

                // Remove Email result text objects find by SearchResults.
                remover.AddTextToRemove(searchResultEmail);

                // Perform removal of specified objects
                remover.PerformRemoval(@"result1.pdf");
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);

            // Clean up.
            remover.Dispose();
        }
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover instance
            Remover remover = new Remover("demo", "demo");

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"sample1.pdf");

            // Search Keyword
            string SearchKeyword = "Martian dichotomy";

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"sample1.pdf");

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                ISearchResult[] searchResults = textExtractor.FindAll(0, SearchKeyword, caseSensitive: false);

                // Remove text objects find by SearchResults.
                // NOTE: The removed text might be larger than the specified rectangle. Currently the Remover is unable
                // to split PDF text objects.
                remover.RemoveText(searchResults, @"result1.pdf");
            }

            // Open output file in default application
            System.Diagnostics.Process.Start("result1.pdf");

            // Clean up.
            remover.Dispose();
        }
Пример #3
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover2 instance
            Remover2 remover = new Remover2("demo", "demo");

            // Mask removed text, which ultimately black out region
            remover.MaskRemovedText = true;

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

                // Search SSN in format 202-55-0130
                // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                // Search results
                ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                // Remove text objects find by SearchResults.
                remover.AddTextToRemove(searchResults);

                // Perform removal of specified objects
                remover.PerformRemoval(@"result1.pdf");
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);

            // Clean up.
            remover.Dispose();
        }
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover instance
            Remover remover = new Remover("demo", "demo");

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_SSNNo.pdf");

                // Search SSN in format 202-55-0130
                // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                // Remove text objects find by SearchResults.
                // NOTE: The removed text might be larger than the specified rectangle. Currently the Remover is unable
                // to split PDF text objects.
                remover.RemoveText(searchResults, @"result1.pdf");
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);

            // Clean up.
            remover.Dispose();
        }
Пример #5
0
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.Remover instance
            Remover remover = new Remover("demo", "demo");

            // Load sample PDF document
            remover.LoadDocumentFromFile(@"samplePDF_EmailAddress.pdf");

            // Prepare TextExtractor
            using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
            {
                // Load document into TextExtractor
                textExtractor.LoadDocumentFromFile(@"samplePDF_EmailAddress.pdf");

                // Search email Addresses
                // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";

                // Enable RegexSearch
                textExtractor.RegexSearch = true;

                // Set word matching options
                textExtractor.WordMatchingMode = WordMatchingMode.None;

                ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                // Remove text objects find by SearchResults.
                // NOTE: The removed text might be larger than the specified rectangle. Currently the Remover is unable
                // to split PDF text objects.
                remover.RemoveText(searchResults, @"result1.pdf");
            }

            // Open output file in default application
            System.Diagnostics.Process.Start("result1.pdf");

            // Clean up.
            remover.Dispose();
        }
Пример #6
0
        private void BtnFindAll_Click(object sender, EventArgs e)
        {
            if (tbSearchExpression.Text.Length > 1)
            {
                // Prepare TextExtractor
                using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
                {
                    // Load document into TextExtractor
                    textExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile);

                    // Set options from UI
                    textExtractor.RegexSearch      = cbRegex.Checked;
                    textExtractor.WordMatchingMode = WordMatchingMode.None;

                    // Search for text in all pages and store rectangles of found pieces
                    for (int pageIndex = 0; pageIndex < textExtractor.GetPageCount(); pageIndex++)
                    {
                        ISearchResult[] searchResults = textExtractor.FindAll(pageIndex, tbSearchExpression.Text, caseSensitive: true);
                        if (searchResults.Length > 0)
                        {
                            _foundTextRectangles[pageIndex] = searchResults.Select(searchResult => searchResult.Bounds).ToArray();
                        }
                    }
                }

                // Select fount rectangles in PDF Viewer
                if (_foundTextRectangles.ContainsKey(pdfViewerControl1.CurrentPageIndex))
                {
                    pdfViewerControl1.SelectionInPoints = _foundTextRectangles[pdfViewerControl1.CurrentPageIndex];
                }
            }
            else
            {
                MessageBox.Show(@"Try larger search string");
            }
        }
Пример #7
0
        static void Main(string[] args)
        {
            MemoryStream searchablePDFStream = new MemoryStream();

            // STEP-1: Make Searchable PDF
            // STEP-2: Get search text result from that searchable PDF
            // STEP-3: Remove sensitive data

            // Create Bytescout.PDFExtractor.SearchablePDFMaker instance
            using (var searchablePDFMaker = new SearchablePDFMaker("demo", "demo"))
            {
                // Load sample PDF document
                searchablePDFMaker.LoadDocumentFromFile("sampleScannedPDF_EmailAddress.pdf");

                // Set the location of language data files
                searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";

                // Set OCR language
                searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder

                // Set PDF document rendering resolution
                searchablePDFMaker.OCRResolution = 300;

                // Save extracted text to file
                searchablePDFMaker.MakePDFSearchable(searchablePDFStream);

                // Prepare TextExtractor
                using (TextExtractor textExtractor = new TextExtractor("demo", "demo"))
                {
                    // Load stream into TextExtractor
                    textExtractor.LoadDocumentFromStream(searchablePDFStream);

                    // Search email Addresses
                    // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                    string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";

                    // Enable RegexSearch
                    textExtractor.RegexSearch = true;

                    // Set word matching options
                    textExtractor.WordMatchingMode = WordMatchingMode.None;

                    ISearchResult[] searchResults = textExtractor.FindAll(0, regexPattern, caseSensitive: false);

                    // Create Bytescout.PDFExtractor.Remover instance
                    using (var remover = new Remover2("demo", "demo"))
                    {
                        // Load sample PDF document
                        remover.LoadDocumentFromStream(searchablePDFStream);

                        // Mask removed text
                        remover.MaskRemovedText = true;

                        // Make output file unsearchable
                        remover.MakePDFUnsearchable = true;

                        // Provide text to remove
                        remover.AddTextToRemove(searchResults);

                        // Remove text objects find by SearchResults.
                        remover.PerformRemoval("result1.pdf");
                    }
                }
            }

            // Open output file in default application
            ProcessStartInfo processStartInfo = new ProcessStartInfo("result1.pdf");

            processStartInfo.UseShellExecute = true;
            Process.Start(processStartInfo);
        }
Пример #8
0
        static void Main(string[] args)
        {
            string inputDocument  = @".\samplePDF_SSNNo.pdf";
            string outputDocument = @".\samplePDF_SSNNo_edited.pdf";

            try
            {
                // Create Bytescout.PDFExtractor.TextExtractor instance
                using (TextExtractor extractor = new TextExtractor("demo", "demo"))
                {
                    // Create Bytescout.PDFExtractor.Remover2 instance
                    using (Remover2 remover = new Remover2("demo", "demo"))
                    {
                        // Load sample PDF document
                        extractor.LoadDocumentFromFile("samplePDF_SSNNo.pdf");
                        remover.LoadDocumentFromFile(inputDocument);

                        extractor.RegexSearch = true; // Enable the regular expressions

                        int pageCount = extractor.GetPageCount();

                        // Search through pages
                        for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
                        {
                            // Search SSN in format 202-55-0130 using regular expression.
                            // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
                            string regexPattern = "[0-9]{3}-[0-9]{2}-[0-9]{4}";

                            // Search each page for the pattern
                            ISearchResult[] searchResults = extractor.FindAll(pageIndex, regexPattern, caseSensitive: false);

                            foreach (var element in searchResults)
                            {
                                Console.WriteLine("Found SSN No: " + element.Text);
                                // Add rectangle of the found SSN to Remover
                                remover.AddTextToRemove(pageIndex, element.Bounds);
                            }
                        }

                        // Mask replaced text with black rectangle
                        remover.MaskRemovedText = true;
                        // Change the color of the mask rectangle, if necessary
                        //remover.MaskColor = Color.Red;

                        remover.PerformRemoval(outputDocument);

                        Console.WriteLine("Found SSNs removed, result saved to file \"" + outputDocument + "\"");
                    }
                }

                // Open result file in default associated application (for the demonstration purpose)
                var processStartInfo = new ProcessStartInfo(outputDocument)
                {
                    UseShellExecute = true
                };
                Process.Start(processStartInfo);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: " + ex.Message);
            }

            Console.WriteLine();
            Console.WriteLine("Press enter key to continue...");
            Console.ReadLine();
        }