public static string GetExtract(BinaryData binaryData, Node node) { using (var op = SnTrace.Index.StartOperation("Getting text extract, VId:{0}, Path:{1}", node.VersionId, node.Path)) { var extractor = ResolveExtractor(binaryData); if (extractor == null) { op.Successful = true; return(string.Empty); } var result = string.Empty; using (var stream = binaryData.GetStream()) { if (stream == null || stream.Length == 0) { op.Successful = true; return(String.Empty); } try { var ctx = new TextExtractorContext(node.VersionId); // async Action <TimeboxedActivity> timeboxedFunctionCall = activity => { var x = (Stream)activity.InArgument; var extract = extractor.Extract(x, ctx); activity.OutArgument = extract; }; var act = new TimeboxedActivity(); act.InArgument = stream; act.Activity = timeboxedFunctionCall; act.Context = HttpContext.Current; var finishedWithinTime = act.ExecuteAndWait(Configuration.Indexing.TextExtractTimeout * 1000); if (!finishedWithinTime) { act.Abort(); var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path); SnTrace.Index.Write(msg); SnLog.WriteWarning(msg); op.Successful = true; return(String.Empty); } else if (act.ExecutionException != null) { WriteError(act.ExecutionException, node); } else { result = (string)act.OutArgument; } } catch (Exception e) { WriteError(e, node); } } if (result == null) { SnLog.WriteWarning(string.Format(CultureInfo.InvariantCulture, @"Couldn't extract text. VersionId: {0}, path: '{1}' ", node.VersionId, node.Path)); } else { result = result.Replace('\0', '.'); } if (result == null) { SnTrace.Index.Write("Couldn't extract text"); } else { SnTrace.Index.Write("Extracted length length: {0}.", result.Length); } op.Successful = true; return(result); } }
public static string GetExtract(BinaryData binaryData, Node node) { if (binaryData == null) { return(string.Empty); } var fname = binaryData.FileName; if (fname == null) { return(string.Empty); } var ext = fname.Extension; if (String.IsNullOrEmpty(ext)) { return(string.Empty); } ITextExtractor extractor = null; var result = string.Empty; switch (ext.ToLower()) { case "contenttype": case "xml": extractor = new XmlTextExtractor(); break; case "doc": extractor = new DocTextExtractor(); break; case "xls": extractor = new XlsTextExtractor(); break; case "pdf": extractor = new PdfTextExtractor(); break; case "docx": extractor = new DocxTextExtractor(); break; case "xlsx": extractor = new XlsxTextExtractor(); break; case "pptx": extractor = new PptxTextExtractor(); break; case "txt": extractor = new PlainTextExtractor(); break; default: return(String.Empty); } var stream = binaryData.GetStream(); if (stream == null) { return(String.Empty); } if (stream.Length == 0) { return(String.Empty); } try { ////-- sync //result = extractor.Extract(stream); //-- async Action <TimeboxedActivity> timeboxedFunctionCall = activity => { var x = (Stream)activity.InArgument; var extract = extractor.Extract(x); activity.OutArgument = extract; }; var act = new TimeboxedActivity(); act.InArgument = stream; act.Activity = timeboxedFunctionCall; act.Context = HttpContext.Current; var finishedWithinTime = act.ExecuteAndWait(Repository.TextExtractTimeout * 1000); if (!finishedWithinTime) { act.Abort(); var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path); Logger.WriteWarning(msg); return(String.Empty); } else if (act.ExecutionException != null) { WriteError(act.ExecutionException, node); } else { result = (string)act.OutArgument; } } catch (Exception e) { WriteError(e, node); } if (String.IsNullOrEmpty(result)) { var format = @"Couldn't extract text. VersionId: {0}, path: '{1}' "; var inf = String.Format(CultureInfo.InvariantCulture, format, node.VersionId, node.Path); Logger.WriteWarning(inf); } result = result.Replace('\0', '.'); return(result); }
public static string GetExtract(BinaryData binaryData, Node node) { var extractor = ResolveExtractor(binaryData); if (extractor == null) { return(string.Empty); } var result = string.Empty; var stream = binaryData.GetStream(); if (stream == null) { return(String.Empty); } if (stream.Length == 0) { return(String.Empty); } try { var ctx = new TextExtractorContext(node.VersionId); //-- async Action <TimeboxedActivity> timeboxedFunctionCall = activity => { var x = (Stream)activity.InArgument; var extract = extractor.Extract(x, ctx); activity.OutArgument = extract; }; var act = new TimeboxedActivity(); act.InArgument = stream; act.Activity = timeboxedFunctionCall; act.Context = HttpContext.Current; var finishedWithinTime = act.ExecuteAndWait(Repository.TextExtractTimeout * 1000); if (!finishedWithinTime) { act.Abort(); var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path); Logger.WriteWarning(Logger.EventId.NotDefined, msg); return(String.Empty); } else if (act.ExecutionException != null) { WriteError(act.ExecutionException, node); } else { result = (string)act.OutArgument; } } catch (Exception e) { WriteError(e, node); } if (result == null) { Logger.WriteWarning(Logger.EventId.NotDefined, String.Format(CultureInfo.InvariantCulture, @"Couldn't extract text. VersionId: {0}, path: '{1}' ", node.VersionId, node.Path)); } else { result = result.Replace('\0', '.'); } return(result); }