Example #1
0
 public override string Extract(Stream stream, TextExtractorContext context)
 {
     Called = true;
     using (var reader = new StreamReader(stream))
         Extraction = reader.ReadToEnd();
     return(Extraction);
 }
Example #2
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            Task.Run(() =>
            {
                AsposePreviewProvider.CheckLicense(AsposePreviewProvider.LicenseProvider.Words);

                var document = new Aspose.Words.Document(stream);

                IndexingTools.AddTextExtract(context.VersionId, document.GetText());
            });

            return(string.Empty);
        }
Example #3
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            Task.Run(() =>
            {
                AsposePreviewProvider.CheckLicense(AsposePreviewProvider.LicenseProvider.Pdf);
                var document     = new Aspose.Pdf.Document(stream);
                var textAbsorber = new TextAbsorber();
                document.Pages.Accept(textAbsorber);
                IndexingTools.AddTextExtract(context.VersionId, textAbsorber.Text);
            });

            return(string.Empty);
        }
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            // call base method for iFilter logic
            var textExtract = ExtractiFilter(stream, out var success);

            if (success)
            {
                return(textExtract);
            }

            var text = new StringBuilder();

            try
            {
                var pdfReader = new PdfReader(stream);
                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    // extract text using the old version (4.1.6) of iTextSharp
                    var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page));
                    if (string.IsNullOrEmpty(pageText))
                    {
                        continue;
                    }

                    text.Append(pageText);
                }
            }
            catch (OutOfMemoryException ex)
            {
                SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex,
                                   EventId.Indexing,
                                   properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });
            }

            return(text.ToString());
        }