private void AnnotateTokens <Token>(IList <TOKEN> tokens)
            where Token : CoreLabel
        {
            // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too
            IList <CoreLabel> words = new List <CoreLabel>();

            foreach (CoreLabel token in tokens)
            {
                CoreLabel word = new CoreLabel();
                word.SetWord(token.Word());
                word.SetNER(token.Ner());
                word.SetTag(token.Tag());
                // copy fields potentially set by SUTime
                NumberSequenceClassifier.TransferAnnotations(token, word);
                words.Add(word);
            }
            DoOneSentence(words);
            // TODO: If collapsed is set, tokens for entities are collapsed into one node then
            // (words.size() != tokens.size() and the logic below just don't work!!!
            for (int i = 0; i < words.Count; i++)
            {
                string ner = words[i].Ner();
                tokens[i].SetNER(ner);
                tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), words[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)));
            }
        }
        private void DoOneSentenceNew(IList <CoreLabel> words, Annotation doc, ICoreMap sentence)
        {
            IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence);

            nsc.ClassifyWithGlobalInformation(newWords, doc, sentence);
            IEnumerator <CoreLabel> newFLIter = newWords.GetEnumerator();

            foreach (CoreLabel origWord in words)
            {
                CoreLabel newWord  = newFLIter.Current;
                string    before   = origWord.Ner();
                string    newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                // log.info(origWord.word());
                // log.info(origWord.ner());
                if (Verbose)
                {
                    log.Info(newWord);
                }
                // log.info("-------------------------------------");
                if ((before == null || before.Equals(BackgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(BackgroundSymbol))
                {
                    origWord.SetNER(newGuess);
                }
                // transfer other annotations generated by SUTime or NumberNormalizer
                NumberSequenceClassifier.TransferAnnotations(newWord, origWord);
            }
        }
        /// <summary>Helper method for people not working from a complete Annotation.</summary>
        /// <returns>A list of CoreMap.  Each CoreMap represents a detected temporal expression.</returns>
        public virtual IList <ICoreMap> AnnotateSingleSentence(ICoreMap sentence, string docDate, SUTime.TimeIndex timeIndex)
        {
            ICoreMap annotationCopy = NumberSequenceClassifier.AlignSentence(sentence);

            if (docDate != null && docDate.IsEmpty())
            {
                docDate = null;
            }
            return(timexExtractor.ExtractTimeExpressionCoreMaps(annotationCopy, docDate, timeIndex));
        }
Esempio n. 4
0
        private void RecognizeNumberSequences(IList <CoreLabel> words, ICoreMap document, ICoreMap sentence)
        {
            // we need to copy here because NumberSequenceClassifier overwrites the AnswerAnnotation
            IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence);

            nsc.ClassifyWithGlobalInformation(newWords, document, sentence);
            // copy AnswerAnnotation back. Do not overwrite!
            // also, copy all the additional annotations generated by SUTime and NumberNormalizer
            for (int i = 0; i < sz; i++)
            {
                CoreLabel origWord = words[i];
                CoreLabel newWord  = newWords[i];
                // log.info(newWord.word() + " => " + newWord.get(CoreAnnotations.AnswerAnnotation.class) + " " + origWord.ner());
                string before   = origWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                string newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation));
                if ((before == null || before.Equals(nsc.flags.backgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(nsc.flags.backgroundSymbol))
                {
                    origWord.Set(typeof(CoreAnnotations.AnswerAnnotation), newGuess);
                }
                // transfer other annotations generated by SUTime or NumberNormalizer
                NumberSequenceClassifier.TransferAnnotations(newWord, origWord);
            }
        }
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <CoreLabel> output;

            // only used if try assignment works.
            if (tokens.Count <= this.maxSentenceLength)
            {
                try
                {
                    output = this.ner.ClassifySentenceWithGlobalInformation(tokens, annotation, sentence);
                }
                catch (RuntimeInterruptedException)
                {
                    // If we get interrupted, set the NER labels to the background
                    // symbol if they are not already set, then exit.
                    output = null;
                }
            }
            else
            {
                output = null;
            }
            if (output == null)
            {
                DoOneFailedSentence(annotation, sentence);
            }
            else
            {
                for (int i = 0; i < sz; ++i)
                {
                    // add the named entity tag to each token
                    string neTag     = output[i].Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    string normNeTag = output[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation));
                    if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
                    {
                        neTag     = SpanishToEnglishTag(neTag);
                        normNeTag = SpanishToEnglishTag(normNeTag);
                    }
                    tokens[i].SetNER(neTag);
                    tokens[i].Set(typeof(CoreAnnotations.CoarseNamedEntityTagAnnotation), neTag);
                    if (normNeTag != null)
                    {
                        tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), normNeTag);
                    }
                    NumberSequenceClassifier.TransferAnnotations(output[i], tokens[i]);
                }
                if (Verbose)
                {
                    bool          first = true;
                    StringBuilder sb    = new StringBuilder("NERCombinerAnnotator output: [");
                    foreach (CoreLabel w in tokens)
                    {
                        if (first)
                        {
                            first = false;
                        }
                        else
                        {
                            sb.Append(", ");
                        }
                        sb.Append(w.ToShorterString("Text", "NamedEntityTag", "NormalizedNamedEntityTag"));
                    }
                    sb.Append(']');
                    log.Info(sb);
                }
            }
        }
        public virtual void Annotate(Annotation annotation)
        {
            SUTime.TimeIndex timeIndex = new SUTime.TimeIndex();
            string           docDate   = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation));

            if (docDate == null)
            {
                Calendar cal = annotation.Get(typeof(CoreAnnotations.CalendarAnnotation));
                if (cal == null)
                {
                    if (!quiet)
                    {
                        log.Warn("No document date specified");
                    }
                }
                else
                {
                    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd:hh:mm:ss");
                    docDate = dateFormat.Format(cal.GetTime());
                }
            }
            IList <ICoreMap> allTimeExpressions;
            // initialized below = null;
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences != null)
            {
                allTimeExpressions = new List <ICoreMap>();
                IList <ICoreMap> allNumerics = new List <ICoreMap>();
                foreach (ICoreMap sentence in sentences)
                {
                    // make sure that token character offsets align with the actual sentence text
                    // They may not align due to token normalizations, such as "(" to "-LRB-".
                    ICoreMap alignedSentence = NumberSequenceClassifier.AlignSentence(sentence);
                    // uncomment the next line for verbose dumping of tokens....
                    // log.info("SENTENCE: " + ((ArrayCoreMap) sentence).toShorterString());
                    IList <ICoreMap> timeExpressions = timexExtractor.ExtractTimeExpressionCoreMaps(alignedSentence, docDate, timeIndex);
                    if (timeExpressions != null)
                    {
                        Sharpen.Collections.AddAll(allTimeExpressions, timeExpressions);
                        sentence.Set(typeof(TimeAnnotations.TimexAnnotations), timeExpressions);
                        foreach (ICoreMap timeExpression in timeExpressions)
                        {
                            timeExpression.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                        }
                    }
                    IList <ICoreMap> numbers = alignedSentence.Get(typeof(CoreAnnotations.NumerizedTokensAnnotation));
                    if (numbers != null)
                    {
                        sentence.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), numbers);
                        Sharpen.Collections.AddAll(allNumerics, numbers);
                    }
                }
                annotation.Set(typeof(CoreAnnotations.NumerizedTokensAnnotation), allNumerics);
            }
            else
            {
                allTimeExpressions = AnnotateSingleSentence(annotation, docDate, timeIndex);
            }
            annotation.Set(typeof(TimeAnnotations.TimexAnnotations), allTimeExpressions);
        }