Esempio n. 1
0
        public override string Extract(Stream stream)
        {
            //IFilter
            var target = new FilterReader(GetBytesFromStream(stream), ".xls");

            target.Init();
            return(target.ReadToEnd());
        }
Esempio n. 2
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                //extract text using IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".pdf");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (OutOfMemoryException ex)
            {
                Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge,
                                    "Pdf text extract failed with out of memory exception. " + ex,
                                    properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });

                return(string.Empty);
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Pdf IFilter error: " + ex.Message);
            }

            //fallback to the other mechanism in case the pdf IFilter is missing
            var text = new StringBuilder();

            try
            {
                var pdfReader = new PdfReader(stream);
                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    // extract text using the old version (4.1.6) of iTextSharp
                    var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page));
                    if (string.IsNullOrEmpty(pageText))
                    {
                        continue;
                    }

                    text.Append(pageText);
                }
            }
            catch (OutOfMemoryException ex)
            {
                Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge,
                                    "Pdf text extract failed with out of memory exception. " + ex,
                                    properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });
            }

            return(text.ToString());
        }
Esempio n. 3
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                //IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".msg");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Msg IFilter error: " + ex.Message);
            }

            return(string.Empty);
        }
Esempio n. 4
0
        public override string Extract(System.IO.Stream stream)
        {
            try
            {
                //IFilter
                var target = new FilterReader(GetBytesFromStream(stream), ".doc");
                target.Init();
                return(target.ReadToEnd());
            }
            catch (Exception ex)
            {
                Logger.WriteWarning(EventId.Indexing.IFilterError, "Doc IFilter error: " + ex.Message);
            }

            return(string.Empty);
        }