public override string Extract(Stream stream, TextExtractorContext context) { try { // extract text using IFilter return(SnIFilter.GetText(stream, ".pdf")); } catch (OutOfMemoryException ex) { SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex, EventId.Indexing, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); return(string.Empty); } catch (Exception ex) { // log iFilter error only once if (!_iFilterErrorLogged) { SnLog.WriteWarning("Pdf IFilter error: " + ex.Message, EventId.Indexing); _iFilterErrorLogged = true; } } // fallback to the other mechanism in case the pdf IFilter is missing var text = new StringBuilder(); try { var pdfReader = new PdfReader(stream); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { // extract text using the old version (4.1.6) of iTextSharp var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page)); if (string.IsNullOrEmpty(pageText)) { continue; } text.Append(pageText); } } catch (OutOfMemoryException ex) { SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex, EventId.Indexing, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); } return(text.ToString()); }
public override string Extract(Stream stream, TextExtractorContext context) { try { // IFilter return(SnIFilter.GetText(stream, ".msg")); } catch (Exception ex) { SnLog.WriteWarning("Msg IFilter error: " + ex.Message, EventId.Indexing); } return(string.Empty); }