예제 #1
0
        public static string TestRead()
        {
            var      path        = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "TestReadGAC.pdf");
            Document pdfDocument = new Aspose.Pdf.Document(path);

            // instantiate TextFragment Absorber object
            Aspose.Pdf.Text.TextFragmentAbsorber TextFragmentAbsorberAddress = new Aspose.Pdf.Text.TextFragmentAbsorber();
            // search text within page bound
            TextFragmentAbsorberAddress.TextSearchOptions.LimitToPageBounds = true;
            // specify the page region for TextSearch Options
            TextFragmentAbsorberAddress.TextSearchOptions.Rectangle = new Aspose.Pdf.Rectangle(0, 0, 600, 800);
            // search text from first page of PDF file
            pdfDocument.Pages[1].Accept(TextFragmentAbsorberAddress);

            List <string> l = new List <string>();

            foreach (Aspose.Pdf.Text.TextFragment tf in TextFragmentAbsorberAddress.TextFragments)
            {
                l.Add(tf.Text);
            }

            if (l.Count > 0)
            {
                return(l[0]);
            }
            else
            {
                return(string.Empty);
            }
        }
예제 #2
0
        public static string TestReadGAC_BAK(string path)
        {
            List <string> l = new List <string>();


            Document pdfDocument = new Aspose.Pdf.Document(path);

            Aspose.Pdf.Text.TextFragmentAbsorber textFragmentAbsorberAddress = new Aspose.Pdf.Text.TextFragmentAbsorber();

            double width  = 50d;
            double height = 30d;

            double x = 20d;
            double y = 585d;

            for (int i = 0; i < 3; i++)
            {
                // textFragmentAbsorberAddress.TextSearchOptions.LimitToPageBounds = true;
                textFragmentAbsorberAddress.TextSearchOptions.LimitToPageBounds = false;

                var rectangle = new Aspose.Pdf.Rectangle
                                (
                    //llx: x,
                    //lly: y - i * height,
                    //urx: width,
                    //ury: height

                    llx: width,
                    lly: height,
                    urx: x,
                    ury: y - i * height
                                );

                textFragmentAbsorberAddress.TextSearchOptions.Rectangle = rectangle;
                pdfDocument.Pages[1].Accept(textFragmentAbsorberAddress);
                foreach (Aspose.Pdf.Text.TextFragment tf in textFragmentAbsorberAddress.TextFragments)
                {
                    l.Add(tf.Text);
                }
            }

            l = l.Distinct().ToList();

            if (l.Count > 0)
            {
                return(l[0]);
            }
            else
            {
                return(string.Empty);
            }
        }
        /// <summary>
        /// Search documents
        /// </summary>

        public Response Search(DocumentInfo[] docs, string sourceFolder, string query)
        {
            if (docs == null)
            {
                return(BadDocumentResponse);
            }
            if (docs.Length == 0 || docs.Length > MaximumUploadFiles)
            {
                return(MaximumFileLimitsResponse);
            }

            SetDefaultOptions(docs, "");
            Opts.AppName        = "Search";
            Opts.MethodName     = "Search";
            Opts.FolderName     = sourceFolder;
            Opts.OutputType     = ".pdf";
            Opts.ResultFileName = "Search Results";
            Opts.CreateZip      = false;

            var statusValue             = "OK";
            var statusCodeValue         = 200;
            var fileProcessingErrorCode = FileProcessingErrorCode.OK;

            if (IsValidRegex(query))
            {
                try
                {
                    return(Process((inFilePath, outPath, zipOutFolder) =>
                    {
                        foreach (var pdfDocument in docs)
                        {
                            var document = pdfDocument.PdfDocument;

                            var tfa = new Aspose.Pdf.Text.TextFragmentAbsorber(query,
                                                                               new TextSearchOptions(IsValidRegex(query)));
                            tfa.Visit(document);
                            var found = tfa.TextFragments.Count > 0;
                            foreach (var textFragment in tfa.TextFragments)
                            {
                                textFragment.Page.Annotations.Add(
                                    new HighlightAnnotation(textFragment.Page, textFragment.Rectangle)
                                {
                                    Title = "Aspose.PDF Search Free App"
                                }
                                    );
                            }

                            document.Save(outPath);
                        }
                    }));

                    //fileProcessingErrorCode = FileProcessingErrorCode.NoSearchResults;
                }
                catch (Exception ex)
                {
                    statusCodeValue = 500;
                    statusValue     = "500 " + ex.Message;
                }
            }
            else
            {
                fileProcessingErrorCode = FileProcessingErrorCode.WrongRegExp;
            }

            return(new Response
            {
                Status = statusValue,
                StatusCode = statusCodeValue,
                FileProcessingErrorCode = fileProcessingErrorCode
            });
        }
예제 #4
0
        /// <summary>
        /// Redact documents
        /// </summary>

        public Response Redact(DocumentInfo[] docs, string sourceFolder, string searchQuery, string replaceText,
                               bool caseSensitive, bool text, bool comments, bool metadata)
        {
            if (docs == null)
            {
                return(BadDocumentResponse);
            }
            if (docs.Length == 0 || docs.Length > MaximumUploadFiles)
            {
                return(MaximumFileLimitsResponse);
            }

            SetDefaultOptions(docs, "");
            Opts.AppName     = "Redaction";
            Opts.MethodName  = "Redact";
            Opts.ZipFileName = "Redacted documents";
            Opts.FolderName  = sourceFolder;
            Opts.OutputType  = ".pdf";

            if (replaceText == null)
            {
                replaceText = "";
            }

            var statusValue             = "OK";
            var statusCodeValue         = 200;
            var fileProcessingErrorCode = FileProcessingErrorCode.OK;
            var lck = new object();
            var catchedException = false;

            if (IsValidRegex(searchQuery))
            {
                var regex        = new Regex(searchQuery, caseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase);
                var matchesFound = new int[docs.Length];
                var tasks        = Enumerable.Range(0, docs.Length).Select(i => Task.Factory.StartNew(() =>
                {
                    try
                    {
                        var document = docs[i].PdfDocument;
                        if (text)
                        {
                            var tfa = new Aspose.Pdf.Text.TextFragmentAbsorber(searchQuery, new TextSearchOptions(true));
                            tfa.Visit(document);
                            foreach (var textFragment in tfa.TextFragments)
                            {
                                textFragment.Text = replaceText;
                                matchesFound[i]  += 1;
                            }
                        }

                        if (comments)
                        {
                            foreach (var page in document.Pages)
                            {
                                foreach (var item in page.Annotations)
                                {
                                    if (item is TextMarkupAnnotation)
                                    {
                                        item.Contents    = Regex.Replace(item.Contents, searchQuery, replaceText);
                                        matchesFound[i] += 1;
                                    }
                                }
                            }
                        }

                        if (metadata)
                        {
                            Pdf.DocumentInfo docInfo = new Pdf.DocumentInfo(document);
                            docInfo.Title            = System.Text.RegularExpressions.Regex.Replace(docInfo.Title, searchQuery, replaceText);
                            docInfo.Keywords         = System.Text.RegularExpressions.Regex.Replace(docInfo.Keywords, searchQuery, replaceText);
                            matchesFound[i]         += 2;
                        }



                        //if (text || comments)
                        //{
                        //	var findings = new RedactionCallback(text, comments);
                        //	var options = new FindReplaceOptions()
                        //	{
                        //		ReplacingCallback = findings,
                        //		Direction = FindReplaceDirection.Forward,
                        //		MatchCase = caseSensitive
                        //	};
                        //	docs[i].Range.Replace(regex, replaceText, options);
                        //	matchesFound[i] += findings.MatchesFound;
                        //}

                        //if (metadata)
                        //	matchesFound[i] += ProcessMetadata(docs[i].PdfDocument, regex, replaceText);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                        lock (lck)
                            catchedException = true;
                    }
                })).ToArray();
                Task.WaitAll(tasks);

                if (!catchedException)
                {
                    if (matchesFound.Sum() > 0)
                    {
                        return(Process((inFilePath, outPath, zipOutFolder) =>
                        {
                            foreach (var doc in docs)
                            {
                                SaveDocument(doc, outPath, zipOutFolder, new SaveFormatType()
                                {
                                    SaveOptions = new PdfSaveOptions(), SaveType = SaveType.pdf
                                });
                            }
                        }));
                    }

                    fileProcessingErrorCode = FileProcessingErrorCode.NoSearchResults;
                }
                else
                {
                    statusCodeValue = 500;
                    statusValue     = "500 Exception during processing";
                }
            }
            else
            {
                fileProcessingErrorCode = FileProcessingErrorCode.WrongRegExp;
            }

            return(new Response
            {
                Status = statusValue,
                StatusCode = statusCodeValue,
                FileProcessingErrorCode = fileProcessingErrorCode
            });
        }