Esempi di codice in C# (CSharp) per WordFinderConfig

Esempio n. 1

0

Mostra file

File: DisplayPDFForm.cs Progetto: vivekel015/adobe-pdf-library-samples

        private void searchwords()
        {
            WordFinderConfig wordConfig = new WordFinderConfig();

            wordConfig.IgnoreCharGaps = true;

            using (WordFinder wordFinder = new WordFinder(PDFDoc, WordFinderVersion.Latest, wordConfig))
            {
                try // If we run into the security exception we will ask for a password and try again.
                {
                    search_and_highlight(wordFinder);
                }
                catch (Exception e)
                {
                    int errornumidx = e.ToString().IndexOf("Error number:");
                    if (errornumidx == -1)
                    {
                        throw e;
                    }
                    int colidx = e.ToString().IndexOf(": ", errornumidx);
                    if (colidx == -1)
                    {
                        throw e;
                    }
                    // We know that the error number we are wanting to find is formatted such as
                    // "Error number: 1073938472".  All we need to do is parse out the next 10
                    // digits and if they are this number, then this is the exception we want
                    // to intercept and then ask for the document password.
                    errornumidx = colidx + 2;
                    string errornum = e.ToString().Substring(errornumidx, 10);
                    if (errornum != "1073938472")
                    {
                        throw e;
                    }
                    else
                    {
                        if (passwordenter() == true)
                        {
                            search_and_highlight(wordFinder);
                        }
                        else
                        {
                            /*
                             * If we null this then the next click of the search button
                             * will re-prompt us for a password and do the searching
                             * we are hoping for.
                             */
                            SearchWord = null;
                        }
                    }
                }
            }
        }

Esempio n. 2

0

Mostra file

File: TextSearchManager.cs Progetto: yanrbts/adobe-pdf-library-samples

        /**
         * search -
         *
         * Does the actual searching of the wordlist in the document.
         * Here we search one page at a time but it would be possible
         * to search the entire document at once.
         */
        private void search(int PageNum)
        {
            // create the config for the wordfinder
            WordFinderConfig config = new WordFinderConfig();

            config.IgnoreCharGaps = true;

            // create a wordfinder for the current page
            using (WordFinder wordFinder = new WordFinder(dleController.Document, WordFinderVersion.Latest, config))
            {
                IList <Word> wordsOnCurrentPage = wordsOnCurrentPage = wordFinder.GetWordList(PageNum);

                // compare the search string to each word in the wordlist
                foreach (Word w in wordsOnCurrentPage)
                {
                    int searchIndex = 0;

                    // check if the word contains the search string
                    if (w.Text.ToLower().Contains(searchString.ToLower()))
                    {
                        //once we know it contains the search string we need to
                        // determine where in the word the string is
                        while (searchIndex < w.Text.Length)
                        {
                            int firstOccurence = w.Text.ToLower().IndexOf(searchString.ToLower(), searchIndex);

                            if (firstOccurence == -1 || firstOccurence >= w.Text.Length)
                            {
                                break;
                            }

                            // get the quad that should be highlighted by taking the left edge of the match
                            // and the right edge (determined by taking the top right and bottom right from
                            // taking the firstOccurence and adding the search string length)
                            Quad highlightQuad = new Quad(w.CharQuads[firstOccurence].TopLeft, w.CharQuads[firstOccurence + searchString.Length - 1].TopRight, w.CharQuads[firstOccurence].BottomLeft, w.CharQuads[firstOccurence + searchString.Length - 1].BottomRight);
                            highlightQuads.Insert(0, highlightQuad);

                            searchIndex = firstOccurence + searchString.Length;
                        }
                    }
                }
            }
            if (highlightSelected == ApplicationHighlight.Highlight)
            {
                dleController.docView.DrawSearchRects(highlightQuads.ToArray());
            }
        }

Esempio n. 3

0

Mostra file

        static void Main(string[] args)
        {
            Console.WriteLine("UnderlinesAndHighlights Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = "../../Resources/Sample_Input/sample.pdf";
                String sOutput = "../HighlightAndUnderlineAnnotations-out.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                if (args.Length > 1)
                {
                    sOutput = args[1];
                }

                Document doc = new Document(sInput);

                Console.WriteLine("Opened a document " + sInput);

                Page docpage = doc.GetPage(0);

                //
                // Highlight occurrences of the word "cloudy" on the page.
                // Underline occurrences of the word "rain" on the page.
                //
                // For a more in-depth example of using the WordFinder, see the TextExtraction sample.
                //
                List <Quad>      cloudyQuads = new List <Quad>();
                List <Quad>      rainQuads   = new List <Quad>();
                WordFinderConfig wfc         = new WordFinderConfig();
                WordFinder       wf          = new WordFinder(doc, WordFinderVersion.Latest, wfc);
                IList <Word>     words       = wf.GetWordList(docpage.PageNumber);
                foreach (Word w in words)
                {
                    // Store the Quads of all "Cloudy" words in a list for later use in
                    // creating the annotation.
                    if (w.Text.ToLower().Equals("cloudy") ||
                        ((w.Attributes & WordAttributeFlags.HasTrailingPunctuation) == WordAttributeFlags.HasTrailingPunctuation &&
                         w.Text.ToLower().StartsWith("cloudy")))
                    {
                        cloudyQuads.AddRange(w.Quads);
                    }

                    // Store the Quads of all "Rain" words
                    if (w.Text.ToLower().Equals("rain") ||
                        ((w.Attributes & WordAttributeFlags.HasTrailingPunctuation) == WordAttributeFlags.HasTrailingPunctuation &&
                         w.Text.ToLower().StartsWith("rain")))
                    {
                        rainQuads.AddRange(w.Quads);
                    }
                }

                HighlightAnnotation highlights = new HighlightAnnotation(docpage, cloudyQuads);
                highlights.Color            = new Color(1.0, 0.75, 1.0);
                highlights.NormalAppearance = highlights.GenerateAppearance();

                UnderlineAnnotation underlines = new UnderlineAnnotation(docpage, rainQuads);
                underlines.Color            = new Color(0.0, 0.0, 0.0);
                underlines.NormalAppearance = underlines.GenerateAppearance();

                // Read back the text that was annotated.
                Console.WriteLine("Cloudy text: {0}", highlights.GetAnnotatedText(true));
                Console.WriteLine("Rainy text: {0}", underlines.GetAnnotatedText(false));

                doc.Save(SaveFlags.Full, sOutput);
            }
        }

Esempio n. 4

0

Mostra file

File: RegexTextSearch.cs Progetto: datalogics-mdawson/adobe-pdf-library-samples

        static void Main(string[] args)
        {
            Console.WriteLine("RegexTextSearch Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/RegexTextSearch.pdf";
                String sOutput = "RegexTextSearch-out.pdf";

                // Highlight occurrences of the phrases that match this regular expression.
                // Uncomment only the one you are interested in seeing displayed with highlights.
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases matching a regular expression
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // Show the matching phrase
                            Console.WriteLine(wInfo.MatchString);

                            // Get the quads
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            // Iterate through the quad info and create highlights
                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                Page docpage = doc.GetPage(qInfo.PageNum);
                                // Highlight the matched string words
                                var highlight = new HighlightAnnotation(docpage, qInfo.Quads);
                                highlight.NormalAppearance = highlight.GenerateAppearance();
                            }
                        }
                        // Save the document with the highlighted matched strings
                        doc.Save(SaveFlags.Full, sOutput);
                    }
                }
            }
        }

Esempio n. 5

0

Mostra file

        static void Main(string[] args)
        {
            Console.WriteLine("RegexExtractText Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput  = Library.ResourceDirectory + "Sample_Input/RegexExtractText.pdf";
                String sOutput = "../RegexExtractText-out.json";

                // Uncomment only one regular expression you are interested in seeing the match information of (as a JSON file).
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    // This will hold the JSON stream that we will print to the output JSON file.
                    DocTextFinderJson result = new DocTextFinderJson();
                    result.documentJson = new List <MatchObject>();

                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly.
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters.
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases matching a regular expression.
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        // Iterate through the matches and add match information to the DocTextFinderJson object.
                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // This object will store the match phrase and an array of quads for the match.
                            MatchObject matchObject = new MatchObject();

                            // This list will store the page number and quad location for each match quad.
                            List <MatchQuadInformation> matchQuadInformationList = new List <MatchQuadInformation>();

                            // Set the match phrase in the matchObject.
                            matchObject.matchPhrase = wInfo.MatchString;

                            // Get the quads.
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                MatchQuadInformation temp = new MatchQuadInformation();
                                temp.pageNumber = qInfo.PageNum;

                                // Iterate through the quads and insert the quad information into the matchQuadInformation object.
                                foreach (Quad quad in qInfo.Quads)
                                {
                                    QuadLocation quadLocation = new QuadLocation();
                                    quadLocation.topLeft     = new TopLeft();
                                    quadLocation.bottomLeft  = new BottomLeft();
                                    quadLocation.topRight    = new TopRight();
                                    quadLocation.bottomRight = new BottomRight();

                                    quadLocation.topLeft.x = quad.TopLeft.H;
                                    quadLocation.topLeft.y = quad.TopLeft.V;

                                    quadLocation.bottomLeft.x = quad.BottomLeft.H;
                                    quadLocation.bottomLeft.y = quad.BottomLeft.V;

                                    quadLocation.topRight.x = quad.TopRight.H;
                                    quadLocation.topRight.y = quad.TopRight.V;

                                    quadLocation.bottomRight.x = quad.BottomRight.H;
                                    quadLocation.bottomRight.y = quad.BottomRight.V;

                                    temp.quadLocation = quadLocation;
                                    matchQuadInformationList.Add(temp);
                                }
                            }
                            matchObject.matchQuads = matchQuadInformationList;
                            result.documentJson.Add(matchObject);
                        }
                        // Save the output JSON file.
                        Console.WriteLine("Writing JSON to " + sOutput);
                        string json = JsonConvert.SerializeObject(result.documentJson, Formatting.Indented);
                        System.IO.File.WriteAllText(sOutput, json);
                    }
                }
            }
        }

Esempio n. 6

0

Mostra file

File: ListWords.cs Progetto: yanrbts/adobe-pdf-library-samples

        static void Main(string[] args)
        {
            Console.WriteLine("ListWords Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput = "../../Resources/Sample_Input/sample.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                Console.WriteLine("Input file: " + sInput);

                Document doc = new Document(sInput);

                int nPages = doc.NumPages;

                WordFinderConfig wordConfig = new WordFinderConfig();
                wordConfig.IgnoreCharGaps  = true;
                wordConfig.IgnoreLineGaps  = false;
                wordConfig.NoAnnots        = true;
                wordConfig.NoEncodingGuess = true;              // leave non-Roman single-byte font alone

                // Std Roman treatment for custom encoding; overrides the noEncodingGuess option
                wordConfig.UnknownToStdEnc = false;

                wordConfig.DisableTaggedPDF  = true;    // legacy mode WordFinder creation
                wordConfig.NoXYSort          = false;
                wordConfig.PreserveSpaces    = false;
                wordConfig.NoLigatureExp     = false;
                wordConfig.NoHyphenDetection = false;
                wordConfig.TrustNBSpace      = false;
                wordConfig.NoExtCharOffset   = false;           // text extraction efficiency
                wordConfig.NoStyleInfo       = false;           // text extraction efficiency

                WordFinder   wordFinder = new WordFinder(doc, WordFinderVersion.Latest, wordConfig);
                IList <Word> pageWords  = null;
                for (int i = 0; i < nPages; i++)
                {
                    pageWords = wordFinder.GetWordList(i);
                    foreach (Word wInfo in pageWords)
                    {
                        string       s        = wInfo.Text;
                        IList <Quad> QuadList = wInfo.Quads;

                        foreach (Quad Q in QuadList)
                        {
                            Console.WriteLine(Q);
                        }

                        foreach (StyleTransition st in wInfo.StyleTransitions)
                        {
                            Console.WriteLine(st);
                        }

                        IList <StyleTransition> styleList = wInfo.StyleTransitions;
                        foreach (StyleTransition st in styleList)
                        {
                            Console.WriteLine(st);
                        }

                        Console.WriteLine(wInfo.Attributes);
                        Console.WriteLine(s);
                    }
                }
                Console.WriteLine("Pages=" + nPages);
            }
        }

Esempio n. 7

0

Mostra file

File: TextExtract.cs Progetto: vivekel015/adobe-pdf-library-samples

        static void Main(string[] args)
        {
            Console.WriteLine("TextExtract Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                // This is a tagged PDF.
                String sInput = Library.ResourceDirectory + "Sample_Input/pdf_intro.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                // This is an untagged PDF.
                //Resources/Sample_Input/constitution.pdf"

                Document doc = new Document(sInput);

                Console.WriteLine("Input file:  " + sInput);

                // Determine if the PDF is tagged.  We'll use a slightly different set of rules
                // for parsing tagged and untagged PDFs.
                //
                // We'll determine if the PDF is tagged by examining the MarkInfo
                // dictionary of the document.  First, check for the existence of the MarkInfo dict.
                bool       docIsTagged = false;
                PDFDict    markInfoDict;
                PDFBoolean markedEntry;
                if ((markInfoDict = (PDFDict)doc.Root.Get("MarkInfo")) != null)
                {
                    if ((markedEntry = (PDFBoolean)markInfoDict.Get("Marked")) != null)
                    {
                        if (markedEntry.Value)
                        {
                            docIsTagged = true;
                        }
                    }
                }

                WordFinderConfig wordConfig = new WordFinderConfig();
                wordConfig.IgnoreCharGaps  = false;
                wordConfig.IgnoreLineGaps  = false;
                wordConfig.NoAnnots        = false;
                wordConfig.NoEncodingGuess = false;

                // Std Roman treatment for custom encoding; overrides the noEncodingGuess option
                wordConfig.UnknownToStdEnc = false;

                wordConfig.DisableTaggedPDF  = false; // legacy mode WordFinder creation
                wordConfig.NoXYSort          = true;
                wordConfig.PreserveSpaces    = false;
                wordConfig.NoLigatureExp     = false;
                wordConfig.NoHyphenDetection = false;
                wordConfig.TrustNBSpace      = false;
                wordConfig.NoExtCharOffset   = false; // text extraction efficiency
                wordConfig.NoStyleInfo       = false; // text extraction efficiency

                WordFinder wordFinder = new WordFinder(doc, WordFinderVersion.Latest, wordConfig);

                if (docIsTagged)
                {
                    ExtractTextTagged(doc, wordFinder);
                }
                else
                {
                    ExtractTextUntagged(doc, wordFinder);
                }
            }
        }

Esempio n. 8

0

Mostra file

        static void Main(string[] args)
        {
            Console.WriteLine("Redactions Sample:");

            // ReSharper disable once UnusedVariable
            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");
                String sInput   = Library.ResourceDirectory + "Sample_Input/sample.pdf";
                String sOutput1 = "Redactions-out.pdf";
                String sOutput2 = "Redactions-out-applied.pdf";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                Console.WriteLine("Input file: " + sInput);

                Document doc = new Document(sInput);

                Page docpage = doc.GetPage(0);
                //
                // Redact occurrences of the word "rain" on the page.
                // Redact occurrences of the word "cloudy" on the page, changing the display details.
                //
                // For a more in-depth example of using the WordFinder, see the TextExtract sample.
                //
                // The TextExtract sample is described here.
                // http://dev.datalogics.com/adobe-pdf-library/sample-program-descriptions/net-sample-programs/extracting-text-from-pdf-files
                //

                List <Quad> cloudyQuads = new List <Quad>();

                List <Quad> rainQuads = new List <Quad>();

                WordFinderConfig wordConfig = new WordFinderConfig();
                WordFinder       wf         = new WordFinder(doc, WordFinderVersion.Latest, wordConfig);

                IList <Word> words = wf.GetWordList(docpage.PageNumber);

                foreach (Word w in words)
                {
                    Console.WriteLine(" " + w.Text.ToLower());
                    // Store the Quads of all "Cloudy" words in a list for later use in
                    // creating the redaction object.
                    if (w.Text.ToLower().Equals("cloudy") ||
                        ((w.Attributes & WordAttributeFlags.HasTrailingPunctuation) ==
                         WordAttributeFlags.HasTrailingPunctuation &&
                         w.Text.ToLower().StartsWith("cloudy")))
                    {
                        cloudyQuads.AddRange(w.Quads);
                    }

                    // Store the Quads of all "Rain" words
                    if (w.Text.ToLower().Equals("rain") ||
                        ((w.Attributes & WordAttributeFlags.HasTrailingPunctuation) ==
                         WordAttributeFlags.HasTrailingPunctuation &&
                         w.Text.ToLower().StartsWith("rain")))
                    {
                        rainQuads.AddRange(w.Quads);
                    }
                }

                Console.WriteLine("Found Cloudy instances: " + cloudyQuads.Count);
                Color red   = new Color(1.0, 0.0, 0.0);
                Color white = new Color(1.0);

                Redaction not_cloudy = new Redaction(docpage, cloudyQuads, red);

                /* fill the "normal" appearance with 20% red */
                not_cloudy.FillNormal = true;
                not_cloudy.SetFillColor(red, 0.25);

                Console.WriteLine("Found rain instances: " + rainQuads.Count);
                Redaction no_rain = new Redaction(docpage, rainQuads);
                no_rain.InternalColor = new Color(0.0, 1.0, 0.0);

                /* Fill the redaction with the word "rain", drawn in white */
                no_rain.OverlayText = "rain";
                no_rain.Repeat      = true;
                no_rain.ScaleToFit  = true;
                no_rain.TextColor   = white;
                no_rain.FontFace    = "CourierStd";
                no_rain.FontSize    = 8.0;

                doc.Save(SaveFlags.Full, sOutput1);

                Console.WriteLine("Wrote a pdf doc with unapplied redactions.");

                // actually all the redactions in the document
                doc.ApplyRedactions();

                doc.Save(SaveFlags.Full, sOutput2);

                Console.WriteLine("Wrote a redacted pdf doc.");
            }
        }

Esempio n. 9

0

Mostra file

File: AddRegexRedaction.cs Progetto: datalogics-mdawson/adobe-pdf-library-samples

        static void Main(string[] args)
        {
            Console.WriteLine("AddRegexRedaction Sample:");

            using (Library lib = new Library())
            {
                Console.WriteLine("Initialized the library.");

                String sInput   = Library.ResourceDirectory + "Sample_Input/AddRegexRedaction.pdf";
                String sOutput1 = "AddRegexRedaction-out.pdf";
                String sOutput2 = "AddRegexRedaction-out-applied.pdf";

                // Highlight and redact occurrences of the phrases that match this regular expression.
                // Uncomment only the one you are interested in seeing displayed redacted.
                // Phone numbers
                String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
                // Email addresses
                //String sRegex = "(\\b[\\w.!#$%&'*+\\/=?^`{|}~-]+@[\\w-]+(?:\\.[\\w-]+)*\\b)";
                // URLs
                //String sRegex = "((https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,}))";

                if (args.Length > 0)
                {
                    sInput = args[0];
                }

                using (Document doc = new Document(sInput))
                {
                    int nPages = doc.NumPages;

                    Console.WriteLine("Input file:  " + sInput);

                    // Create a WordFinder configuration
                    WordFinderConfig wordConfig = new WordFinderConfig();

                    // Need to set this to true so phrases will be concatenated properly
                    wordConfig.NoHyphenDetection = true;

                    // Create a DocTextFinder with the default wordfinder parameters
                    using (DocTextFinder docTextFinder =
                               new DocTextFinder(doc, wordConfig))
                    {
                        // Retrieve the phrases and words matching a regular expression
                        IList <DocTextFinderMatch> docMatches =
                            docTextFinder.GetMatchList(0, nPages - 1, sRegex);

                        // Redaction color will be red
                        Color red = new Color(1.0, 0.0, 0.0);

                        foreach (DocTextFinderMatch wInfo in docMatches)
                        {
                            // Show the matching phrase
                            Console.WriteLine(wInfo.MatchString);

                            // Get the quads
                            IList <DocTextFinderQuadInfo> QuadInfo = wInfo.QuadInfo;

                            // Iterate through the quad info and create highlights
                            foreach (DocTextFinderQuadInfo qInfo in QuadInfo)
                            {
                                Page docpage = doc.GetPage(qInfo.PageNum);

                                Redaction red_fill = new Redaction(docpage, qInfo.Quads, red);

                                /* fill the "normal" appearance with 25% red */
                                red_fill.FillNormal = true;
                                red_fill.SetFillColor(red, 0.25);
                            }
                        }
                    }
                    // Save the document with the highlighted matched strings
                    doc.Save(SaveFlags.Full, sOutput1);

                    Console.WriteLine("Wrote a PDF document with unapplied redactions.");

                    // Apply all the redactions in the document
                    doc.ApplyRedactions();

                    // Save the document with the redacted matched strings
                    doc.Save(SaveFlags.Full, sOutput2);

                    Console.WriteLine("Wrote a redacted PDF document.");
                }
            }
        }

Esempio n. 10

0

Mostra file

File: SplitPDFVariations.cs Progetto: datalogics-seu/DLE-notshipped

        /* This function is copied primarily from the TextExtract sample,
         * but modified to skip writing out the text that it finds
         */
        static void FindTextUntagged(Document doc, String splitTextString, List <int> listOfPageNumsToSplit)
        {
            // setup the WordFinderConfig
            WordFinderConfig wordConfig = new WordFinderConfig();

            wordConfig.IgnoreCharGaps  = false;
            wordConfig.IgnoreLineGaps  = false;
            wordConfig.NoAnnots        = false;
            wordConfig.NoEncodingGuess = false;
            // Std Roman treatment for custom encoding; overrides the noEncodingGuess option
            wordConfig.UnknownToStdEnc   = false;
            wordConfig.DisableTaggedPDF  = false;   // legacy mode WordFinder creation
            wordConfig.NoXYSort          = true;
            wordConfig.PreserveSpaces    = false;
            wordConfig.NoLigatureExp     = false;
            wordConfig.NoHyphenDetection = false;
            wordConfig.TrustNBSpace      = false;
            wordConfig.NoExtCharOffset   = false;   // text extraction efficiency
            wordConfig.NoStyleInfo       = false;   // text extraction efficiency

            WordFinder wordFinder = new WordFinder(doc, WordFinderVersion.Latest, wordConfig);

            int          nPages    = doc.NumPages;
            IList <Word> pageWords = null;

            for (int i = 0; i < nPages; i++)
            {
                pageWords = wordFinder.GetWordList(i);

                String textToExtract = "";

                // By default, this searches the entire page word list.
                // You could limit it to the first X (e.g. 200) number of words as shown below if you know that the
                // search string will fall within a certain number of words.  If you wanted to only look within
                // a specific quadrant of a page (e.g. lower right corner), you would need to get the bounding box
                // of each Word and compare that to your target area.
                int wordLoop = Math.Min(pageWords.Count, 200);

                for (int wordnum = 0; wordnum < pageWords.Count; wordnum++)
                //for (int wordnum = 0; wordnum < wordLoop; wordnum++)  // limit by the fixt X number of Words
                {
                    Word wInfo;
                    wInfo = pageWords[wordnum];
                    string s = wInfo.Text;

                    // Check for hyphenated words that break across a line.
                    if (((wInfo.Attributes & WordAttributeFlags.HasSoftHyphen) == WordAttributeFlags.HasSoftHyphen) &&
                        ((wInfo.Attributes & WordAttributeFlags.LastWordOnLine) == WordAttributeFlags.LastWordOnLine))
                    {
                        // For the purposes of this sample, we'll remove all hyphens.  In practice, you may need to check
                        // words against a dictionary to determine if the hyphenated word is actually one word or two.
                        string[] splitstrs = s.Split(new Char[] { '-', '\u00ad' });
                        textToExtract += splitstrs[0] + splitstrs[1];
                    }
                    else
                    {
                        textToExtract += s;
                    }

                    // Check for space adjacency and add a space if necessary.
                    if ((wInfo.Attributes & WordAttributeFlags.AdjacentToSpace) == WordAttributeFlags.AdjacentToSpace)
                    {
                        textToExtract += " ";
                    }
                    // Check for a line break and add one if necessary
                    if ((wInfo.Attributes & WordAttributeFlags.LastWordOnLine) == WordAttributeFlags.LastWordOnLine)
                    {
                        textToExtract += "\n";
                    }
                }

                //
                if (textToExtract.ToUpper().Contains(splitTextString))
                {
                    Console.WriteLine("Found " + splitTextString + " on page " + i);
                    listOfPageNumsToSplit.Add(i);
                }

                // Release requested WordList
                for (int wordnum = 0; wordnum < pageWords.Count; wordnum++)
                {
                    pageWords[wordnum].Dispose();
                }
            }
        }

Esempi in C# (CSharp) per WordFinderConfig