public override string Extract(Stream stream, TextExtractorContext context) { Called = true; using (var reader = new StreamReader(stream)) Extraction = reader.ReadToEnd(); return(Extraction); }
public override string Extract(Stream stream, TextExtractorContext context) { Task.Run(() => { AsposePreviewProvider.CheckLicense(AsposePreviewProvider.LicenseProvider.Words); var document = new Aspose.Words.Document(stream); IndexingTools.AddTextExtract(context.VersionId, document.GetText()); }); return(string.Empty); }
public override string Extract(Stream stream, TextExtractorContext context) { Task.Run(() => { AsposePreviewProvider.CheckLicense(AsposePreviewProvider.LicenseProvider.Pdf); var document = new Aspose.Pdf.Document(stream); var textAbsorber = new TextAbsorber(); document.Pages.Accept(textAbsorber); IndexingTools.AddTextExtract(context.VersionId, textAbsorber.Text); }); return(string.Empty); }
public override string Extract(Stream stream, TextExtractorContext context) { // call base method for iFilter logic var textExtract = ExtractiFilter(stream, out var success); if (success) { return(textExtract); } var text = new StringBuilder(); try { var pdfReader = new PdfReader(stream); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { // extract text using the old version (4.1.6) of iTextSharp var pageText = ExtractTextFromPdfBytes(pdfReader.GetPageContent(page)); if (string.IsNullOrEmpty(pageText)) { continue; } text.Append(pageText); } } catch (OutOfMemoryException ex) { SnLog.WriteWarning("Pdf text extract failed with out of memory exception. " + ex, EventId.Indexing, properties: new Dictionary <string, object> { { "Stream size", stream.Length } }); } return(text.ToString()); }