Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            Console.WriteLine("RegexExtractText Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/RegexExtractText.pdf";
                String sOutput = "../RegexExtractText-out.json";

                // Uncomment only one regular expression you are interested in seeing the match information of (as a JSON file).
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    // This will hold the JSON stream that we will print to the output JSON file.
                    DocTextFinderJson result = new DocTextFinderJson();
                    result.documentJson = new List <MatchObject>();

                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly.
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters.
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases matching a regular expression.
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        // Iterate through the matches and add match information to the DocTextFinderJson object.
                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // This object will store the match phrase and an array of quads for the match.
                            MatchObject matchObject = new MatchObject();

                            // This list will store the page number and quad location for each match quad.
                            List <MatchQuadInformation> matchQuadInformationList = new List <MatchQuadInformation>();

                            // Set the match phrase in the matchObject.
                            matchObject.matchPhrase = wInfo.MatchString;

                            // Get the quads.
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                MatchQuadInformation temp = new MatchQuadInformation();
                                temp.pageNumber = qInfo.PageNum;

                                // Iterate through the quads and insert the quad information into the matchQuadInformation object.
                                foreach (Quad quad in qInfo.Quads)
                                {
                                    QuadLocation quadLocation = new QuadLocation();
                                    quadLocation.topLeft     = new TopLeft();
                                    quadLocation.bottomLeft  = new BottomLeft();
                                    quadLocation.topRight    = new TopRight();
                                    quadLocation.bottomRight = new BottomRight();

                                    quadLocation.topLeft.x = quad.TopLeft.H;
                                    quadLocation.topLeft.y = quad.TopLeft.V;

                                    quadLocation.bottomLeft.x = quad.BottomLeft.H;
                                    quadLocation.bottomLeft.y = quad.BottomLeft.V;

                                    quadLocation.topRight.x = quad.TopRight.H;
                                    quadLocation.topRight.y = quad.TopRight.V;

                                    quadLocation.bottomRight.x = quad.BottomRight.H;
                                    quadLocation.bottomRight.y = quad.BottomRight.V;

                                    temp.quadLocation = quadLocation;
                                    matchQuadInformationList.Add(temp);
                                }
                            }
                            matchObject.matchQuads = matchQuadInformationList;
                            result.documentJson.Add(matchObject);
                        }
                        // Save the output JSON file.
                        Console.WriteLine("Writing JSON to " + sOutput);
                        string json = JsonConvert.SerializeObject(result.documentJson, Formatting.Indented);
                        System.IO.File.WriteAllText(sOutput, json);
                    }
                }
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("RegexTextSearch Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/RegexTextSearch.pdf";
                String sOutput = "RegexTextSearch-out.pdf";

                // Highlight occurrences of the phrases that match this regular expression.
                // Uncomment only the one you are interested in seeing displayed with highlights.
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases matching a regular expression
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // Show the matching phrase
                            Console.WriteLine(wInfo.MatchString);

                            // Get the quads
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            // Iterate through the quad info and create highlights
                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                Page docpage = doc.GetPage(qInfo.PageNum);
                                // Highlight the matched string words
                                var highlight = new HighlightAnnotation(docpage, qInfo.Quads);
                                highlight.NormalAppearance = highlight.GenerateAppearance();
                            }
                        }
                        // Save the document with the highlighted matched strings
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("AddRegexRedaction Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput   = Library.ResourceDirectory + "Sample_Input/AddRegexRedaction.pdf";
                String sOutput1 = "AddRegexRedaction-out.pdf";
                String sOutput2 = "AddRegexRedaction-out-applied.pdf";

                // Highlight and redact occurrences of the phrases that match this regular expression.
                // Uncomment only the one you are interested in seeing displayed redacted.
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    // Create a WordFinder configuration
                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases and words matching a regular expression
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        // Redaction color will be red
                        Color red = new Color(1.0, 0.0, 0.0);

                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // Show the matching phrase
                            Console.WriteLine(wInfo.MatchString);

                            // Get the quads
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            // Iterate through the quad info and create highlights
                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                Page docpage = doc.GetPage(qInfo.PageNum);

                                Redaction red_fill = new Redaction(docpage, qInfo.Quads, red);

                                /* fill the "normal" appearance with 25% red */
                                red_fill.FillNormal = true;
                                red_fill.SetFillColor(red, 0.25);
                            }
                        }
                    }
                    // Save the document with the highlighted matched strings
                    doc.Save(SaveFlags.Full, sOutput1);

                    Console.WriteLine("Wrote a PDF document with unapplied redactions.");

                    // Apply all the redactions in the document
                    doc.ApplyRedactions();

                    // Save the document with the redacted matched strings
                    doc.Save(SaveFlags.Full, sOutput2);

                    Console.WriteLine("Wrote a redacted PDF document.");
                }
            }
        }