public static string GetExtract(BinaryData binaryData, Node node) { if (binaryData == null) { return(string.Empty); } var fname = binaryData.FileName; if (fname == null) { return(string.Empty); } var ext = fname.Extension; if (String.IsNullOrEmpty(ext)) { return(string.Empty); } ITextExtractor extractor = null; var result = string.Empty; switch (ext.ToLower()) { case "contenttype": case "xml": extractor = new XmlTextExtractor(); break; case "doc": extractor = new DocTextExtractor(); break; case "xls": extractor = new XlsTextExtractor(); break; case "pdf": extractor = new PdfTextExtractor(); break; case "docx": extractor = new DocxTextExtractor(); break; case "xlsx": extractor = new XlsxTextExtractor(); break; case "pptx": extractor = new PptxTextExtractor(); break; case "txt": extractor = new PlainTextExtractor(); break; default: return(String.Empty); } var stream = binaryData.GetStream(); if (stream == null) { return(String.Empty); } if (stream.Length == 0) { return(String.Empty); } try { ////-- sync //result = extractor.Extract(stream); //-- async Action <TimeboxedActivity> timeboxedFunctionCall = activity => { var x = (Stream)activity.InArgument; var extract = extractor.Extract(x); activity.OutArgument = extract; }; var act = new TimeboxedActivity(); act.InArgument = stream; act.Activity = timeboxedFunctionCall; act.Context = HttpContext.Current; var finishedWithinTime = act.ExecuteAndWait(Repository.TextExtractTimeout * 1000); if (!finishedWithinTime) { act.Abort(); var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path); Logger.WriteWarning(msg); return(String.Empty); } else if (act.ExecutionException != null) { WriteError(act.ExecutionException, node); } else { result = (string)act.OutArgument; } } catch (Exception e) { WriteError(e, node); } if (String.IsNullOrEmpty(result)) { var format = @"Couldn't extract text. VersionId: {0}, path: '{1}' "; var inf = String.Format(CultureInfo.InvariantCulture, format, node.VersionId, node.Path); Logger.WriteWarning(inf); } result = result.Replace('\0', '.'); return(result); }
public static string GetExtract(Stream stream, string fileName, out string errorMessage) { if (stream == null) { errorMessage = null; return(String.Empty); } if (stream.Length == 0) { errorMessage = null; return(String.Empty); } if (String.IsNullOrEmpty(fileName)) { errorMessage = "Cannot resolve a TextExtractor if FileName is null or empty"; return(String.Empty); } var extension = Path.GetExtension(fileName); if (String.IsNullOrEmpty(extension)) { errorMessage = "Cannot resolve a TextExtractor if FileName's extension is null or empty"; return(string.Empty); } extension = extension.TrimStart('.'); if (extension.Length == 0) { errorMessage = "Cannot resolve a TextExtractor if FileName's extension is empty"; return(string.Empty); } extension = extension.ToLower(); if (extension == "txt") { errorMessage = null; return(SenseNet.ContentRepository.Tools.GetStreamString(stream)); } ITextExtractor extractor = null; var result = string.Empty; switch (extension) { case "contenttype": case "xml": extractor = new XmlTextExtractor(); break; case "doc": extractor = new DocTextExtractor(); break; case "xls": extractor = new XlsTextExtractor(); break; case "pdf": extractor = new PdfTextExtractor(); break; case "docx": extractor = new DocxTextExtractor(); break; case "xlsx": extractor = new XlsxTextExtractor(); break; case "pptx": extractor = new PptxTextExtractor(); break; case "txt": extractor = new PlainTextExtractor(); break; default: errorMessage = String.Format("Cannot resolve a TextExtractor for this extension: '{0}'", extension); return(String.Empty); } try { //-- sync result = extractor.Extract(stream); errorMessage = null; ////-- async /* * Action<TimeboxedActivity> timeboxedFunctionCall = activity => * { * var x = (Stream)activity.InArgument; * var extract = extractor.Extract(x); * activity.OutArgument = extract; * }; * * var act = new TimeboxedActivity(); * act.InArgument = stream; * act.Activity = timeboxedFunctionCall; * * var finishedWithinTime = act.ExecuteAndWait(5000); * if (!finishedWithinTime) * { * act.Abort(); * errorMessage = String.Format("Text extracting timeout. path: {0}", fileName); * return String.Empty; * } * else if (act.ExecutionException != null) * { * errorMessage = String.Format("An error occured during extracting text. Path: {0}. Message: {1}", fileName, act.ExecutionException.Message); * } * else * { * result = (string)act.OutArgument; * errorMessage = null; * } */ } catch (Exception e) { errorMessage = String.Format("An error occured during extracting text. Path: {0}. Message: {1}", fileName, e.Message); } if (String.IsNullOrEmpty(result)) { var format = @"Couldn't extract text. FileName: '{0}' "; errorMessage = String.Format(CultureInfo.InvariantCulture, format, fileName); } result = result.Replace('\0', '.'); return(result); }