public override string Extract(Stream stream) { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".xls"); target.Init(); return(target.ReadToEnd()); }
public override string Extract(Stream stream, TextExtractorContext context) { try { //extract text using IFilter var target = new FilterReader(GetBytesFromStream(stream), ".pdf"); target.Init(); return(target.ReadToEnd()); } catch (OutOfMemoryException ex) { Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge, "Pdf text extract failed with out of memory exception. " + ex, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); return(string.Empty); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Pdf IFilter error: " + ex.Message); } //fallback to the other mechanism in case the pdf IFilter is missing var text = new StringBuilder(); try { var pdfReader = new PdfReader(stream); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { // extract text using the old version (4.1.6) of iTextSharp var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page)); if (string.IsNullOrEmpty(pageText)) { continue; } text.Append(pageText); } } catch (OutOfMemoryException ex) { Logger.WriteWarning(EventId.Indexing.BinaryIsTooLarge, "Pdf text extract failed with out of memory exception. " + ex, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); } return(text.ToString()); }
public override string Extract(Stream stream, TextExtractorContext context) { try { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".msg"); target.Init(); return(target.ReadToEnd()); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Msg IFilter error: " + ex.Message); } return(string.Empty); }
public override string Extract(System.IO.Stream stream) { try { //IFilter var target = new FilterReader(GetBytesFromStream(stream), ".doc"); target.Init(); return(target.ReadToEnd()); } catch (Exception ex) { Logger.WriteWarning(EventId.Indexing.IFilterError, "Doc IFilter error: " + ex.Message); } return(string.Empty); }