private void AnnotateTokens <Token>(IList <TOKEN> tokens)
            where Token : CoreLabel
        {
            // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too
            IList <CoreLabel> words = new List <CoreLabel>();

            foreach (CoreLabel token in tokens)
            {
                CoreLabel word = new CoreLabel();
                word.SetWord(token.Word());
                word.SetNER(token.Ner());
                word.SetTag(token.Tag());
                // copy fields potentially set by SUTime
                NumberSequenceClassifier.TransferAnnotations(token, word);
                words.Add(word);
            }
            DoOneSentence(words);
            // TODO: If collapsed is set, tokens for entities are collapsed into one node then
            // (words.size() != tokens.size() and the logic below just don't work!!!
            for (int i = 0; i < words.Count; i++)
            {
                string ner = words[i].Ner();
                tokens[i].SetNER(ner);
                tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), words[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)));
            }
        }
        private void DoOneSentenceNew(IList <CoreLabel> words, Annotation doc, ICoreMap sentence)
        {
            IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence);

            nsc.ClassifyWithGlobalInformation(newWords, doc, sentence);
            IEnumerator <CoreLabel> newFLIter = newWords.GetEnumerator();

            foreach (CoreLabel origWord in words)
            {
                CoreLabel newWord  = newFLIter.Current;
                string    before   = origWord.Ner();
                string    newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                // log.info(origWord.word());
                // log.info(origWord.ner());
                if (Verbose)
                {
                    log.Info(newWord);
                }
                // log.info("-------------------------------------");
                if ((before == null || before.Equals(BackgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(BackgroundSymbol))
                {
                    origWord.SetNER(newGuess);
                }
                // transfer other annotations generated by SUTime or NumberNormalizer
                NumberSequenceClassifier.TransferAnnotations(newWord, origWord);
            }
        }
Beispiel #3
0
        private void RecognizeNumberSequences(IList <CoreLabel> words, ICoreMap document, ICoreMap sentence)
        {
            // we need to copy here because NumberSequenceClassifier overwrites the AnswerAnnotation
            IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence);

            nsc.ClassifyWithGlobalInformation(newWords, document, sentence);
            // copy AnswerAnnotation back. Do not overwrite!
            // also, copy all the additional annotations generated by SUTime and NumberNormalizer
            for (int i = 0; i < sz; i++)
            {
                CoreLabel origWord = words[i];
                CoreLabel newWord  = newWords[i];
                // log.info(newWord.word() + " => " + newWord.get(CoreAnnotations.AnswerAnnotation.class) + " " + origWord.ner());
                string before   = origWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                string newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                if ((before == null || before.Equals(nsc.flags.backgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(nsc.flags.backgroundSymbol))
                {
                    origWord.Set(typeof(CoreAnnotations.AnswerAnnotation), newGuess);
                }
                // transfer other annotations generated by SUTime or NumberNormalizer
                NumberSequenceClassifier.TransferAnnotations(newWord, origWord);
            }
        }
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <CoreLabel> output;

            // only used if try assignment works.
            if (tokens.Count <= this.maxSentenceLength)
            {
                try
                {
                    output = this.ner.ClassifySentenceWithGlobalInformation(tokens, annotation, sentence);
                }
                catch (RuntimeInterruptedException)
                {
                    // If we get interrupted, set the NER labels to the background
                    // symbol if they are not already set, then exit.
                    output = null;
                }
            }
            else
            {
                output = null;
            }
            if (output == null)
            {
                DoOneFailedSentence(annotation, sentence);
            }
            else
            {
                for (int i = 0; i < sz; ++i)
                {
                    // add the named entity tag to each token
                    string neTag     = output[i].Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    string normNeTag = output[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation));
                    if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
                    {
                        neTag     = SpanishToEnglishTag(neTag);
                        normNeTag = SpanishToEnglishTag(normNeTag);
                    }
                    tokens[i].SetNER(neTag);
                    tokens[i].Set(typeof(CoreAnnotations.CoarseNamedEntityTagAnnotation), neTag);
                    if (normNeTag != null)
                    {
                        tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), normNeTag);
                    }
                    NumberSequenceClassifier.TransferAnnotations(output[i], tokens[i]);
                }
                if (Verbose)
                {
                    bool          first = true;
                    StringBuilder sb    = new StringBuilder("NERCombinerAnnotator output: [");
                    foreach (CoreLabel w in tokens)
                    {
                        if (first)
                        {
                            first = false;
                        }
                        else
                        {
                            sb.Append(", ");
                        }
                        sb.Append(w.ToShorterString("Text", "NamedEntityTag", "NormalizedNamedEntityTag"));
                    }
                    sb.Append(']');
                    log.Info(sb);
                }
            }
        }