Exemplo n.º 1
0
        protected string ExtractiFilter(Stream stream, out bool success)
        {
            success = true;

            try
            {
                // extract text using IFilter
                return(SnIFilter.GetText(stream, ".pdf"));
            }
            catch (OutOfMemoryException ex)
            {
                SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex,
                                   EventId.Indexing,
                                   properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });
            }
            catch (Exception ex)
            {
                // log iFilter error only once
                if (!_iFilterErrorLogged)
                {
                    SnLog.WriteWarning("Pdf IFilter error: " + ex.Message, EventId.Indexing);
                    _iFilterErrorLogged = true;
                }
            }

            success = false;

            return(string.Empty);
        }
Exemplo n.º 2
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                // extract text using IFilter
                return(SnIFilter.GetText(stream, ".pdf"));
            }
            catch (OutOfMemoryException ex)
            {
                SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex,
                                   EventId.Indexing,
                                   properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });

                return(string.Empty);
            }
            catch (Exception ex)
            {
                // log iFilter error only once
                if (!_iFilterErrorLogged)
                {
                    SnLog.WriteWarning("Pdf IFilter error: " + ex.Message, EventId.Indexing);
                    _iFilterErrorLogged = true;
                }
            }

            // fallback to the other mechanism in case the pdf IFilter is missing
            var text = new StringBuilder();

            try
            {
                var pdfReader = new PdfReader(stream);
                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    // extract text using the old version (4.1.6) of iTextSharp
                    var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page));
                    if (string.IsNullOrEmpty(pageText))
                    {
                        continue;
                    }

                    text.Append(pageText);
                }
            }
            catch (OutOfMemoryException ex)
            {
                SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex,
                                   EventId.Indexing,
                                   properties: new Dictionary <string, object> {
                    { "Stream size", stream.Length }
                });
            }

            return(text.ToString());
        }
Exemplo n.º 3
0
        public override string Extract(Stream stream, TextExtractorContext context)
        {
            try
            {
                // IFilter
                return(SnIFilter.GetText(stream, ".msg"));
            }
            catch (Exception ex)
            {
                SnLog.WriteWarning("Msg IFilter error: " + ex.Message, EventId.Indexing);
            }

            return(string.Empty);
        }