示例#1
0
        public /*protected*/ override void ProcessDocument(Document document)
        {
            string contentType = document.Features.GetFeatureValue("contentType");

            if (contentType != "Html")
            {
                return;
            }
            try
            {
                HtmlTokenizer      htmlTokenizer = new HtmlTokenizer(document.Text, /*stemmer=*/ null, /*decode=*/ true, /*tokenize=*/ false, /*applySkipRules=*/ true);
                int                idx           = 0;
                ArrayList <string> txtBlocks     = new ArrayList <string>();
                bool               merge         = false;
                for (HtmlTokenizer.Enumerator e = (HtmlTokenizer.Enumerator)htmlTokenizer.GetEnumerator(); e.MoveNext();)
                {
                    if (e.CurrentToken.TokenType == HtmlTokenizer.TokenType.Text)
                    {
                        string textBlock = Utils.ToOneLine(e.Current.Trim(), /*compact=*/ true);
                        if (textBlock != "")
                        {
                            if (!merge)
                            {
                                txtBlocks.Add(textBlock);
                                document.AddAnnotation(new Annotation(idx, idx + textBlock.Length - 1, "TextBlock"));
                            }
                            else
                            {
                                idx--;
                                txtBlocks.Last += " " + textBlock;
                                int oldStartIdx = document.GetAnnotationAt(document.AnnotationCount - 1).SpanStart;
                                document.RemoveAnnotationAt(document.AnnotationCount - 1);
                                document.AddAnnotation(new Annotation(oldStartIdx, idx + textBlock.Length - 1, "TextBlock"));
                            }
                            idx  += textBlock.Length + 2;
                            merge = true;
                        }
                    }
                    else
                    {
                        if (mTagKeepList.Contains(e.CurrentToken.TagName.ToLower()))
                        {
                            merge = false;
                        }
                    }
                }
                StringBuilder sb = new StringBuilder();
                foreach (string textBlock in txtBlocks)
                {
                    sb.AppendLine(textBlock);
                }
                document.Text = sb.ToString();
                document.Features.SetFeatureValue("contentType", "Text");
            }
            catch (Exception exception)
            {
                mLogger.Error("ProcessDocument", exception);
            }
        }
示例#2
0
        public /*protected*/ override void ProcessDocument(Document document)
        {
            string contentType = document.Features.GetFeatureValue("contentType");

            if (contentType != "Html")
            {
                return;
            }
            try
            {
                List <BoilerplateRemover.HtmlBlock> blocks;
                mBoilerplateRemover.ExtractText(new StringReader(document.Text), BoilerplateRemover.TextClass.Unknown, out blocks);
                StringBuilder text = new StringBuilder();
                foreach (BoilerplateRemover.HtmlBlock block in blocks)
                {
                    int    spanStart = text.Length;
                    string blockTxt  = block.text;
                    if (blockTxt != null && blockTxt.Length > 0)
                    {
                        document.AddAnnotation(new Annotation(spanStart, spanStart + (blockTxt.Length - 1), "TextBlock/" + block.textClass.ToString()));
                        text.AppendLine(blockTxt);
                    }
                }
                document.Text = text.ToString();
                document.Features.SetFeatureValue("contentType", "Text");
            }
            catch (Exception exception)
            {
                mLogger.Error("ProcessDocument", exception);
            }
        }
 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] blocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock block in blocks)
         {
             OpenNLP.Tools.Util.Pair<int, int>[] positions;
             string[] sentences = mSentenceDetector.SentenceDetect(block.Text, out positions);
             int i = 0;
             foreach (OpenNLP.Tools.Util.Pair<int, int> pos in positions)
             {
                 int startTrimOffset, endTrimOffset;
                 GetTrimOffsets(sentences[i], out startTrimOffset, out endTrimOffset);
                 int startIdx = block.SpanStart + pos.FirstValue + startTrimOffset;
                 int endIdx = block.SpanStart + pos.FirstValue + (pos.SecondValue - 1) - endTrimOffset;
                 if (endIdx >= startIdx)
                 {
                     document.AddAnnotation(new Annotation(startIdx, endIdx, "Sentence"));
                 }
                 i++;
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
示例#4
0
文件: Output.cs 项目: mgrcar/OPA
        public static void SaveHtml(string[] featureNames, SparseVector <double> vec, XmlDocument xmlDoc, ArrayList <Chunk> chunks, string fileName)
        {
            Document      doc   = new Document(xmlDoc.SelectSingleNode("//header/naslov").InnerText, "");
            StringBuilder txt   = new StringBuilder();
            XmlNodeList   nodes = xmlDoc.SelectNodes("//text/body//p/s");

            foreach (XmlNode node in nodes) // for each sentence...
            {
                foreach (XmlNode wordNode in node.SelectNodes("w | c | S"))
                {
                    if (wordNode.Name == "S")
                    {
                        txt.Append(" ");
                    }
                    else
                    {
                        string str       = wordNode.InnerText;
                        int    spanStart = txt.Length;
                        int    spanEnd   = spanStart + str.Length - 1;
                        txt.Append(str);
                        Annotation a = new Annotation(spanStart, spanEnd, wordNode.Name == "w" ? "beseda" : "ločilo");
                        if (wordNode.Name == "w")
                        {
                            a.Features.SetFeatureValue("oznaka", wordNode.Attributes["msd"].Value);
                            a.Features.SetFeatureValue("lema", wordNode.Attributes["lemma"].Value);
                        }
                        doc.AddAnnotation(a);
                    }
                }
                txt.AppendLine();
            }
            txt.AppendLine();
            txt.AppendLine("Rezultat členitve:");
            txt.AppendLine();
            foreach (ChunkType chunkType in new ChunkType[] { ChunkType.VP, ChunkType.NP, ChunkType.PP, ChunkType.AP, ChunkType.CON, ChunkType.Other })
            {
                string chunkTypeStr = chunkType.ToString();
                if (chunkTypeStr == "Other")
                {
                    chunkTypeStr = "Ostalo";
                }
                txt.AppendLine(chunkTypeStr + ":");
                foreach (Chunk chunk in chunks.Where(x => x.mType == chunkType))
                {
                    txt.AppendLine("\t" + chunk.ToString());
                }
            }
            doc.Text = txt.ToString();
            int i = 0;

            foreach (string featureName in featureNames)
            {
                doc.Features.SetFeatureValue(featureName, vec[i++].ToString());
            }
            using (StreamWriter w = new StreamWriter(fileName, /*append=*/ false, Encoding.UTF8))
            {
                doc.MakeHtmlPage(w, /*inlineCss=*/ true);
            }
        }
示例#5
0
        /// <summary>
        /// Returns a full C# syntax tree resolver which is shared between semantic highlighting, source analysis and refactoring.
        /// For code analysis tasks this should be used instead of generating an own resolver. Only exception is if a local resolving is done using a
        /// resolve navigator.
        /// Note: The shared resolver is fully resolved.
        /// </summary>
        public static TaskWrapper GetSharedResolver(this Document document)
        {
            var parsedDocument = document.ParsedDocument;

            if (parsedDocument == null || document.IsProjectContextInUpdate)
            {
                return(null);
            }

            var unit       = parsedDocument.GetAst <SyntaxTree> ();
            var parsedFile = parsedDocument.ParsedFile as CSharpUnresolvedFile;

            if (unit == null || parsedFile == null)
            {
                return(null);
            }
            var compilation = document.Compilation;

            var resolverAnnotation = document.Annotation <ResolverAnnotation> ();

            if (resolverAnnotation != null)
            {
                if (resolverAnnotation.ParsedFile == parsedFile)
                {
                    return(resolverAnnotation.Task);
                }
                if (resolverAnnotation.SharedTokenSource != null)
                {
                    resolverAnnotation.SharedTokenSource.Cancel();
                }
                document.RemoveAnnotations <ResolverAnnotation> ();
            }

            var tokenSource = new CancellationTokenSource();
            var token       = tokenSource.Token;
            var resolveTask = Task.Factory.StartNew(delegate {
                try {
                    using (var timer = ResolveCounter.BeginTiming()) {
                        var result = new CSharpAstResolver(compilation, unit, parsedFile);
                        result.ApplyNavigator(new ConstantModeResolveVisitorNavigator(ResolveVisitorNavigationMode.Resolve, null), token);
                        return(result);
                    }
                } catch (OperationCanceledException) {
                    return(null);
                } catch (Exception e) {
                    LoggingService.LogError("Error while creating the resolver.", e);
                    return(null);
                }
            }, token);
            var wrapper = new TaskWrapper(resolveTask);

            document.AddAnnotation(new ResolverAnnotation {
                Task              = wrapper,
                ParsedFile        = parsedFile,
                SharedTokenSource = tokenSource
            });
            return(wrapper);
        }
示例#6
0
        static void Main(string[] args)
        {
            Console.WriteLine("hello worlds!");
            //X x = new X();
            //A a = new A();
            //B b = new B();
            //C c = new C();
            //D d = new D();
            //GenericStreamDataConsumer gsdc = new GenericStreamDataConsumer();
            //gsdc.OnConsumeData += delegate(IDataProducer sender, object data)
            //{
            //    Console.WriteLine((string)data);
            //};
            //Y y = new Y();

            //x.Subscribe(a);
            //a.Subscribe(b);
            //b.Subscribe(gsdc);

            //x.Subscribe(c);
            //c.Subscribe(d);
            //d.Subscribe(gsdc);

            //x.Start();
            //Console.ReadLine();
            //Console.WriteLine("stop");
            //x.GracefulStop();
            //Console.ReadLine();

            //DocumentCorpus corpus = new DocumentCorpus();
            //Document doc = new Document("This is a very short document. This is some boilerplate.");
            //corpus.Add(doc);
            //Annotation annot = new Annotation(0, 29, "content_block");
            ////doc.AddAnnotation(annot);
            //RegexTokenizerComponent tok = new RegexTokenizerComponent();
            //tok.ReceiveData(null, corpus);

            //Regex mCharsetRegex
            //    = new Regex(@"((charset)|(encoding))\s*=\s*(([""'](?<enc>[^""']+)[""'])|((?<enc>[^\s>""']+)))", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            //Console.WriteLine(mCharsetRegex.Match(@"<?xml version=""1.0"" encoding=""ISO-8859-1""?>").Success);

            //RssFeedComponent rss = new RssFeedComponent(@"http://feeds.abcnews.com/abcnews/moneyheadlines");
            //rss.Start();

            Document doc  = new Document("name", "bla bla");
            Document doc2 = new Document("name2", "bla bla 2");

            doc.AddAnnotation(new Annotation(0, 100, "waka waka"));
            StringWriter   sw;
            XmlTextWriter  writer = new XmlTextWriter(sw = new StringWriter());
            DocumentCorpus c      = new DocumentCorpus();

            c.AddDocument(doc);
            c.AddDocument(doc2);
            c.WriteXml(writer);
            Console.WriteLine(sw);
        }
示例#7
0
        public override void ProcessDocument(Document document)
        {
            string contentType = document.Features.GetFeatureValue("contentType");

            if (contentType != "Text")
            {
                return;
            }
            try
            {
                document.CreateAnnotationIndex();
                EntityRecognitionEngine.Document erDoc = new EntityRecognitionEngine.Document();
                foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
                {
                    erDoc.BeginNewTextBlock();
                    foreach (TextBlock s in document.GetAnnotatedBlocks("Sentence", tb.SpanStart, tb.SpanEnd)) // *** sentence selector hardcoded
                    {
                        ArrayList <string> tokens   = new ArrayList <string>();
                        ArrayList <string> posTags  = new ArrayList <string>();
                        ArrayList <int>    spanInfo = new ArrayList <int>();
                        foreach (TextBlock token in document.GetAnnotatedBlocks("Token", s.SpanStart, s.SpanEnd)) // *** token selector hardcoded
                        {
                            tokens.Add(token.Text);
                            posTags.Add(token.Annotation.Features.GetFeatureValue("posTag")); // *** POS tag feature name hardcoded
                            spanInfo.Add(token.SpanStart);
                        }
                        erDoc.AddSentence(tokens, spanInfo, posTags);
                    }
                }
                ArrayList <Pair <int, int> > spans;
                ArrayList <string>           entities = erDoc.DiscoverEntities(mEntityRecognitionEngine, out spans);
                int i = 0;
                foreach (string gazetteerUri in entities)
                {
                    string instanceUri = mEntityRecognitionEngine.GetIdentifiedInstance(gazetteerUri);
                    if (instanceUri != null)
                    {
                        string     annotationName = GetAnnotationName(mEntityRecognitionEngine.GetInstanceClassPath(instanceUri));
                        Annotation annotation     = new Annotation(spans[i].First, spans[i].Second, annotationName);
                        document.AddAnnotation(annotation);
                        annotation.Features.SetFeatureValue("gazetteerUri", gazetteerUri);
                        annotation.Features.SetFeatureValue("instanceUri", instanceUri);
                        annotation.Features.SetFeatureValue("instanceClassUri", mEntityRecognitionEngine.GetInstanceClass(instanceUri));
                        // TODO: instanceLabel, instanceClassLabel
                    }
                    i++;
                }
            }
            catch (Exception exception)
            {
                mLogger.Error("ProcessDocument", exception);
            }
        }
示例#8
0
 protected override void ProcessDocument(Document document)
 {
     TextBlock[] textBlocks = document.GetAnnotatedBlocks(SRC_ANNOT_TYPE);
     foreach (TextBlock textBlock in textBlocks)
     {
         // do tokenization, add annotations to document
         mTokenizer.Text = textBlock.Text;
         for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext(); )
         {
             //Console.WriteLine("{0} {1} {2}", textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, e.Current);
             Annotation annot = new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, DEST_ANNOT_TYPE);
             document.AddAnnotation(annot);
         }
     }
 }
示例#9
0
        /// <summary>
        /// Returns a full C# syntax tree resolver which is shared between semantic highlighting, source analysis and refactoring.
        /// For code analysis tasks this should be used instead of generating an own resolver. Only exception is if a local resolving is done using a
        /// resolve navigator.
        /// Note: The shared resolver is fully resolved.
        /// </summary>
        public static Task <CSharpAstResolver> GetSharedResolver(this Document document)
        {
            var parsedDocument = document.ParsedDocument;

            if (parsedDocument == null)
            {
                return(null);
            }

            var unit       = parsedDocument.GetAst <SyntaxTree> ();
            var parsedFile = parsedDocument.ParsedFile as CSharpUnresolvedFile;

            if (unit == null || parsedFile == null)
            {
                return(null);
            }
            var compilation = document.Compilation;

            var resolverAnnotation = document.Annotation <ResolverAnnotation> ();

            if (resolverAnnotation != null)
            {
                if (resolverAnnotation.ParsedFile == parsedFile)
                {
                    return(resolverAnnotation.Task);
                }
                document.RemoveAnnotations <ResolverAnnotation> ();
            }

            var resolveTask = Task.Factory.StartNew(delegate {
                var result = new CSharpAstResolver(compilation, unit, parsedFile);
                result.ApplyNavigator(new ConstantModeResolveVisitorNavigator(ResolveVisitorNavigationMode.Resolve, null));
                return(result);
            });

            document.AddAnnotation(new ResolverAnnotation {
                Task       = resolveTask,
                ParsedFile = parsedFile
            });
            return(resolveTask);
        }
 /*protected*/
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         TextBlock[] textBlocks = document.GetAnnotatedBlocks(mBlockSelector);
         foreach (TextBlock textBlock in textBlocks)
         {
             mTokenizer.Text = textBlock.Text;
             for (RegexTokenizer.Enumerator e = (RegexTokenizer.Enumerator)mTokenizer.GetEnumerator(); e.MoveNext();)
             {
                 document.AddAnnotation(new Annotation(textBlock.SpanStart + e.CurrentTokenIdx, textBlock.SpanStart + e.CurrentTokenIdx + e.Current.Length - 1, "Token"));
             }
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }
示例#11
0
        static void Main(string[] args)
        {
            Console.WriteLine("hello worlds!");
            //X x = new X();
            //A a = new A();
            //B b = new B();
            //C c = new C();
            //D d = new D();
            //GenericStreamDataConsumer gsdc = new GenericStreamDataConsumer();
            //gsdc.OnConsumeData += delegate(IDataProducer sender, object data)
            //{
            //    Console.WriteLine((string)data);
            //};
            //Y y = new Y();

            //x.Subscribe(a);
            //a.Subscribe(b);
            //b.Subscribe(gsdc);

            //x.Subscribe(c);
            //c.Subscribe(d);
            //d.Subscribe(gsdc);

            //x.Start();
            //Console.ReadLine();
            //Console.WriteLine("stop");
            //x.GracefulStop();
            //Console.ReadLine();

            //DocumentCorpus corpus = new DocumentCorpus();
            //Document doc = new Document("This is a very short document. This is some boilerplate.");
            //corpus.Add(doc);
            //Annotation annot = new Annotation(0, 29, "content_block");
            ////doc.AddAnnotation(annot);
            //RegexTokenizerComponent tok = new RegexTokenizerComponent();
            //tok.ReceiveData(null, corpus);

            //Regex mCharsetRegex
            //    = new Regex(@"((charset)|(encoding))\s*=\s*(([""'](?<enc>[^""']+)[""'])|((?<enc>[^\s>""']+)))", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            //Console.WriteLine(mCharsetRegex.Match(@"<?xml version=""1.0"" encoding=""ISO-8859-1""?>").Success);

            //RssFeedComponent rss = new RssFeedComponent(@"http://feeds.abcnews.com/abcnews/moneyheadlines");
            //rss.Start();

            Document doc = new Document("name", "bla bla");
            Document doc2 = new Document("name2", "bla bla 2");
            doc.AddAnnotation(new Annotation(0, 100, "waka waka"));
            StringWriter sw;
            XmlTextWriter writer = new XmlTextWriter(sw = new StringWriter());
            DocumentCorpus c = new DocumentCorpus();
            c.AddDocument(doc);
            c.AddDocument(doc2);
            c.WriteXml(writer);
            Console.WriteLine(sw);
        }
示例#12
0
        public /*protected*/ override void ProcessDocument(Document document)
        {
            string contentType = document.Features.GetFeatureValue("contentType");

            if (contentType != "Html")
            {
                return;
            }
            try
            {
                HtmlTokenizer      htmlTokenizer = new HtmlTokenizer(document.Text, /*stemmer=*/ null, /*decode=*/ true, /*tokenize=*/ false, /*applySkipRules=*/ true);
                int                idx           = 0;
                ArrayList <string> txtBlocks     = new ArrayList <string>();
                bool               merge         = false;
                Stack <string>     tags          = new Stack <string>();
                for (HtmlTokenizer.Enumerator e = (HtmlTokenizer.Enumerator)htmlTokenizer.GetEnumerator(); e.MoveNext();)
                {
                    if (e.CurrentToken.TokenType == HtmlTokenizer.TokenType.Text)
                    {
                        string textBlock = Utils.ToOneLine(e.Current.Trim(), /*compact=*/ true);
                        if (textBlock != "")
                        {
                            string domPath = tags.Aggregate((x, y) => y + "/" + x);
                            bool   isLink  = tags.Contains("a");
                            if (!merge)
                            {
                                txtBlocks.Add(textBlock);
                                document.AddAnnotation(new Annotation(idx, idx + textBlock.Length - 1, "TextBlock"));
                                document.Annotations.Last.Features.SetFeatureValue("domPath", domPath);
                                document.Annotations.Last.Features.SetFeatureValue("linkToTextRatio", string.Format("{0}/{1}", isLink ? textBlock.Length : 0, textBlock.Length));
                            }
                            else
                            {
                                idx--;
                                txtBlocks.Last += " " + textBlock;
                                int    oldStartIdx        = document.GetAnnotationAt(document.AnnotationCount - 1).SpanStart;
                                string oldDomPath         = document.Annotations.Last.Features.GetFeatureValue("domPath");
                                string oldLinkToTextRatio = document.Annotations.Last.Features.GetFeatureValue("linkToTextRatio");
                                document.RemoveAnnotationAt(document.AnnotationCount - 1);
                                document.AddAnnotation(new Annotation(oldStartIdx, idx + textBlock.Length - 1, "TextBlock"));
                                document.Annotations.Last.Features.SetFeatureValue("domPath", domPath.Length < oldDomPath.Length ? domPath : oldDomPath);
                                int linkCharCount = Convert.ToInt32(oldLinkToTextRatio.Split('/')[0]) + (isLink ? textBlock.Length : 0);
                                int textCharCount = Convert.ToInt32(oldLinkToTextRatio.Split('/')[1]) + textBlock.Length;
                                document.Annotations.Last.Features.SetFeatureValue("linkToTextRatio", string.Format("{0}/{1}", linkCharCount, textCharCount));
                            }
                            idx  += textBlock.Length + 2;
                            merge = true;
                        }
                    }
                    else
                    {
                        string tagName = e.CurrentToken.TagName.ToLower();
                        if (mSplitTags.Contains(tagName))
                        {
                            merge = false;
                        }
                        if (e.CurrentToken.TokenType == HtmlTokenizer.TokenType.StartTag)
                        {
                            tags.Push(tagName);
                        }
                        else if (e.CurrentToken.TokenType == HtmlTokenizer.TokenType.EndTag)
                        {
                            string endTagName = null;
                            if (tags.Count == 0 || (endTagName = tags.Pop()) != tagName)
                            {
                                mLogger.Error("ProcessDocument", "End tag does not match start tag (found {0} instead of {1}).", endTagName == null ? "nothing" : endTagName, tagName);
                                tags.Push(endTagName);
                            }
                        }
                    }
                }
                StringBuilder sb = new StringBuilder();
                foreach (string textBlock in txtBlocks)
                {
                    sb.AppendLine(textBlock);
                }
                document.Text = sb.ToString();
                document.Features.SetFeatureValue("contentType", "Text");
            }
            catch (Exception exception)
            {
                mLogger.Error("ProcessDocument", exception);
            }
        }
        /// <summary>
        /// Returns a full C# syntax tree resolver which is shared between semantic highlighting, source analysis and refactoring.
        /// For code analysis tasks this should be used instead of generating an own resolver. Only exception is if a local resolving is done using a
        /// resolve navigator.
        /// Note: The shared resolver is fully resolved.
        /// </summary>
        public static Task <CSharpAstResolver> GetSharedResolver(this Document document)
        {
            var parsedDocument = document.ParsedDocument;

            if (parsedDocument == null || document.IsProjectContextInUpdate || document.Project != null && !(document.Project is DotNetProject))
            {
                return(null);
            }

            var unit       = parsedDocument.GetAst <SyntaxTree> ();
            var parsedFile = parsedDocument.ParsedFile as CSharpUnresolvedFile;

            if (unit == null || parsedFile == null)
            {
                return(null);
            }
            var compilation = document.Compilation;

            var resolverAnnotation = document.Annotation <ResolverAnnotation> ();

            if (resolverAnnotation != null)
            {
                if (resolverAnnotation.ParsedFile == parsedFile)
                {
                    return(resolverAnnotation.Task);
                }
                if (resolverAnnotation.SharedTokenSource != null)
                {
                    resolverAnnotation.SharedTokenSource.Cancel();
                }
                document.RemoveAnnotations <ResolverAnnotation> ();
            }

            var tokenSource = new CancellationTokenSource();
            var token       = tokenSource.Token;
            var resolveTask = Task.Factory.StartNew(delegate {
                try {
                    using (var timer = ResolveCounter.BeginTiming()) {
                        var result = new CSharpAstResolver(compilation, unit, parsedFile);
                        result.ApplyNavigator(new ConstantModeResolveVisitorNavigator(ResolveVisitorNavigationMode.Resolve, null), token);
                        return(result);
                    }
                } catch (OperationCanceledException) {
                    return(null);
                } catch (Exception e) {
                    LoggingService.LogError("Error while creating the resolver.", e);
                    return(null);
                }
            }, token);

            var wrapper = resolveTask.ContinueWith(t => {
                if (t.IsCanceled)
                {
                    return(null);
                }
                if (t.IsFaulted)
                {
                    var ex = t.Exception.Flatten().InnerException;
                    if (!(ex is TaskCanceledException))
                    {
                        LoggingService.LogWarning("Exception while getting shared AST resolver.", ex);
                    }
                    return(null);
                }
                return(t.Result);
            }, TaskContinuationOptions.ExecuteSynchronously);

            document.AddAnnotation(new ResolverAnnotation {
                Task              = wrapper,
                ParsedFile        = parsedFile,
                SharedTokenSource = tokenSource
            });

            return(wrapper);
        }
 public override void ProcessDocument(Document document)
 {
     string contentType = document.Features.GetFeatureValue("contentType");
     if (contentType != "Text") { return; }
     try
     {
         document.CreateAnnotationIndex();
         EntityRecognitionEngine.Document erDoc = new EntityRecognitionEngine.Document();
         foreach (TextBlock tb in document.GetAnnotatedBlocks(mBlockSelector))
         {
             erDoc.BeginNewTextBlock();
             foreach (TextBlock s in document.GetAnnotatedBlocks("Sentence", tb.SpanStart, tb.SpanEnd)) // *** sentence selector hardcoded
             {
                 ArrayList<string> tokens = new ArrayList<string>();
                 ArrayList<string> posTags = new ArrayList<string>();
                 ArrayList<int> spanInfo = new ArrayList<int>();
                 foreach (TextBlock token in document.GetAnnotatedBlocks("Token", s.SpanStart, s.SpanEnd)) // *** token selector hardcoded
                 {
                     tokens.Add(token.Text);
                     posTags.Add(token.Annotation.Features.GetFeatureValue("posTag")); // *** POS tag feature name hardcoded
                     spanInfo.Add(token.SpanStart);
                 }
                 erDoc.AddSentence(tokens, spanInfo, posTags);
             }
         }
         ArrayList<Pair<int, int>> spans;
         ArrayList<string> entities = erDoc.DiscoverEntities(mEntityRecognitionEngine, out spans);
         int i = 0;
         foreach (string gazetteerUri in entities)
         {
             string instanceUri = mEntityRecognitionEngine.GetIdentifiedInstance(gazetteerUri);
             if (instanceUri != null)
             {
                 string annotationName = GetAnnotationName(mEntityRecognitionEngine.GetInstanceClassPath(instanceUri));
                 Annotation annotation = new Annotation(spans[i].First, spans[i].Second, annotationName);
                 document.AddAnnotation(annotation);
                 annotation.Features.SetFeatureValue("gazetteerUri", gazetteerUri);
                 annotation.Features.SetFeatureValue("instanceUri", instanceUri);
                 annotation.Features.SetFeatureValue("instanceClassUri", mEntityRecognitionEngine.GetInstanceClass(instanceUri));
                 // TODO: instanceLabel, instanceClassLabel
             }
             i++;
         }
     }
     catch (Exception exception)
     {
         mLogger.Error("ProcessDocument", exception);
     }
 }