Beispiel #1
0
        public static string GetExtract(BinaryData binaryData, Node node)
        {
            using (var op = SnTrace.Index.StartOperation("Getting text extract, VId:{0}, Path:{1}", node.VersionId, node.Path))
            {
                var extractor = ResolveExtractor(binaryData);
                if (extractor == null)
                {
                    op.Successful = true;
                    return(string.Empty);
                }

                var result = string.Empty;

                using (var stream = binaryData.GetStream())
                {
                    if (stream == null || stream.Length == 0)
                    {
                        op.Successful = true;
                        return(String.Empty);
                    }

                    try
                    {
                        var ctx = new TextExtractorContext(node.VersionId);
                        // async
                        Action <TimeboxedActivity> timeboxedFunctionCall = activity =>
                        {
                            var x       = (Stream)activity.InArgument;
                            var extract = extractor.Extract(x, ctx);
                            activity.OutArgument = extract;
                        };

                        var act = new TimeboxedActivity();
                        act.InArgument = stream;
                        act.Activity   = timeboxedFunctionCall;
                        act.Context    = HttpContext.Current;

                        var finishedWithinTime = act.ExecuteAndWait(Configuration.Indexing.TextExtractTimeout * 1000);
                        if (!finishedWithinTime)
                        {
                            act.Abort();
                            var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path);
                            SnTrace.Index.Write(msg);
                            SnLog.WriteWarning(msg);
                            op.Successful = true;
                            return(String.Empty);
                        }
                        else if (act.ExecutionException != null)
                        {
                            WriteError(act.ExecutionException, node);
                        }
                        else
                        {
                            result = (string)act.OutArgument;
                        }
                    }
                    catch (Exception e)
                    {
                        WriteError(e, node);
                    }
                }

                if (result == null)
                {
                    SnLog.WriteWarning(string.Format(CultureInfo.InvariantCulture, @"Couldn't extract text. VersionId: {0}, path: '{1}' ", node.VersionId, node.Path));
                }
                else
                {
                    result = result.Replace('\0', '.');
                }

                if (result == null)
                {
                    SnTrace.Index.Write("Couldn't extract text");
                }
                else
                {
                    SnTrace.Index.Write("Extracted length length: {0}.", result.Length);
                }

                op.Successful = true;
                return(result);
            }
        }
Beispiel #2
0
        public static string GetExtract(BinaryData binaryData, Node node)
        {
            if (binaryData == null)
            {
                return(string.Empty);
            }
            var fname = binaryData.FileName;

            if (fname == null)
            {
                return(string.Empty);
            }
            var ext = fname.Extension;

            if (String.IsNullOrEmpty(ext))
            {
                return(string.Empty);
            }

            ITextExtractor extractor = null;
            var            result    = string.Empty;

            switch (ext.ToLower())
            {
            case "contenttype":
            case "xml": extractor = new XmlTextExtractor(); break;

            case "doc": extractor = new DocTextExtractor(); break;

            case "xls": extractor = new XlsTextExtractor(); break;

            case "pdf": extractor = new PdfTextExtractor(); break;

            case "docx": extractor = new DocxTextExtractor(); break;

            case "xlsx": extractor = new XlsxTextExtractor(); break;

            case "pptx": extractor = new PptxTextExtractor(); break;

            case "txt": extractor = new PlainTextExtractor(); break;

            default:
                return(String.Empty);
            }

            var stream = binaryData.GetStream();

            if (stream == null)
            {
                return(String.Empty);
            }
            if (stream.Length == 0)
            {
                return(String.Empty);
            }

            try
            {
                ////-- sync
                //result = extractor.Extract(stream);

                //-- async
                Action <TimeboxedActivity> timeboxedFunctionCall = activity =>
                {
                    var x       = (Stream)activity.InArgument;
                    var extract = extractor.Extract(x);
                    activity.OutArgument = extract;
                };

                var act = new TimeboxedActivity();
                act.InArgument = stream;
                act.Activity   = timeboxedFunctionCall;
                act.Context    = HttpContext.Current;

                var finishedWithinTime = act.ExecuteAndWait(Repository.TextExtractTimeout * 1000);
                if (!finishedWithinTime)
                {
                    act.Abort();
                    var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path);
                    Logger.WriteWarning(msg);
                    return(String.Empty);
                }
                else if (act.ExecutionException != null)
                {
                    WriteError(act.ExecutionException, node);
                }
                else
                {
                    result = (string)act.OutArgument;
                }
            }
            catch (Exception e)
            {
                WriteError(e, node);
            }

            if (String.IsNullOrEmpty(result))
            {
                var format = @"Couldn't extract text. VersionId: {0}, path: '{1}' ";
                var inf    = String.Format(CultureInfo.InvariantCulture, format, node.VersionId, node.Path);
                Logger.WriteWarning(inf);
            }

            result = result.Replace('\0', '.');
            return(result);
        }
Beispiel #3
0
        public static string GetExtract(BinaryData binaryData, Node node)
        {
            var extractor = ResolveExtractor(binaryData);

            if (extractor == null)
            {
                return(string.Empty);
            }

            var result = string.Empty;
            var stream = binaryData.GetStream();

            if (stream == null)
            {
                return(String.Empty);
            }
            if (stream.Length == 0)
            {
                return(String.Empty);
            }

            try
            {
                var ctx = new TextExtractorContext(node.VersionId);
                //-- async
                Action <TimeboxedActivity> timeboxedFunctionCall = activity =>
                {
                    var x       = (Stream)activity.InArgument;
                    var extract = extractor.Extract(x, ctx);
                    activity.OutArgument = extract;
                };

                var act = new TimeboxedActivity();
                act.InArgument = stream;
                act.Activity   = timeboxedFunctionCall;
                act.Context    = HttpContext.Current;

                var finishedWithinTime = act.ExecuteAndWait(Repository.TextExtractTimeout * 1000);
                if (!finishedWithinTime)
                {
                    act.Abort();
                    var msg = String.Format("Text extracting timeout. Version: {0}, path: {1}", node.Version, node.Path);
                    Logger.WriteWarning(Logger.EventId.NotDefined, msg);
                    return(String.Empty);
                }
                else if (act.ExecutionException != null)
                {
                    WriteError(act.ExecutionException, node);
                }
                else
                {
                    result = (string)act.OutArgument;
                }
            }
            catch (Exception e)
            {
                WriteError(e, node);
            }

            if (result == null)
            {
                Logger.WriteWarning(Logger.EventId.NotDefined, String.Format(CultureInfo.InvariantCulture, @"Couldn't extract text. VersionId: {0}, path: '{1}' ", node.VersionId, node.Path));
            }
            else
            {
                result = result.Replace('\0', '.');
            }

            return(result);
        }