private void AnnotateTokens <Token>(IList <TOKEN> tokens) where Token : CoreLabel { // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too IList <CoreLabel> words = new List <CoreLabel>(); foreach (CoreLabel token in tokens) { CoreLabel word = new CoreLabel(); word.SetWord(token.Word()); word.SetNER(token.Ner()); word.SetTag(token.Tag()); // copy fields potentially set by SUTime NumberSequenceClassifier.TransferAnnotations(token, word); words.Add(word); } DoOneSentence(words); // TODO: If collapsed is set, tokens for entities are collapsed into one node then // (words.size() != tokens.size() and the logic below just don't work!!! for (int i = 0; i < words.Count; i++) { string ner = words[i].Ner(); tokens[i].SetNER(ner); tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), words[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation))); } }
private void DoOneSentenceNew(IList <CoreLabel> words, Annotation doc, ICoreMap sentence) { IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence); nsc.ClassifyWithGlobalInformation(newWords, doc, sentence); IEnumerator <CoreLabel> newFLIter = newWords.GetEnumerator(); foreach (CoreLabel origWord in words) { CoreLabel newWord = newFLIter.Current; string before = origWord.Ner(); string newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation)); // log.info(origWord.word()); // log.info(origWord.ner()); if (Verbose) { log.Info(newWord); } // log.info("-------------------------------------"); if ((before == null || before.Equals(BackgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(BackgroundSymbol)) { origWord.SetNER(newGuess); } // transfer other annotations generated by SUTime or NumberNormalizer NumberSequenceClassifier.TransferAnnotations(newWord, origWord); } }
private void RecognizeNumberSequences(IList <CoreLabel> words, ICoreMap document, ICoreMap sentence) { // we need to copy here because NumberSequenceClassifier overwrites the AnswerAnnotation IList <CoreLabel> newWords = NumberSequenceClassifier.CopyTokens(words, sentence); nsc.ClassifyWithGlobalInformation(newWords, document, sentence); // copy AnswerAnnotation back. Do not overwrite! // also, copy all the additional annotations generated by SUTime and NumberNormalizer for (int i = 0; i < sz; i++) { CoreLabel origWord = words[i]; CoreLabel newWord = newWords[i]; // log.info(newWord.word() + " => " + newWord.get(CoreAnnotations.AnswerAnnotation.class) + " " + origWord.ner()); string before = origWord.Get(typeof(CoreAnnotations.AnswerAnnotation)); string newGuess = newWord.Get(typeof(CoreAnnotations.AnswerAnnotation)); if ((before == null || before.Equals(nsc.flags.backgroundSymbol) || before.Equals("MISC")) && !newGuess.Equals(nsc.flags.backgroundSymbol)) { origWord.Set(typeof(CoreAnnotations.AnswerAnnotation), newGuess); } // transfer other annotations generated by SUTime or NumberNormalizer NumberSequenceClassifier.TransferAnnotations(newWord, origWord); } }
protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <CoreLabel> output; // only used if try assignment works. if (tokens.Count <= this.maxSentenceLength) { try { output = this.ner.ClassifySentenceWithGlobalInformation(tokens, annotation, sentence); } catch (RuntimeInterruptedException) { // If we get interrupted, set the NER labels to the background // symbol if they are not already set, then exit. output = null; } } else { output = null; } if (output == null) { DoOneFailedSentence(annotation, sentence); } else { for (int i = 0; i < sz; ++i) { // add the named entity tag to each token string neTag = output[i].Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); string normNeTag = output[i].Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation)); if (language.Equals(LanguageInfo.HumanLanguage.Spanish)) { neTag = SpanishToEnglishTag(neTag); normNeTag = SpanishToEnglishTag(normNeTag); } tokens[i].SetNER(neTag); tokens[i].Set(typeof(CoreAnnotations.CoarseNamedEntityTagAnnotation), neTag); if (normNeTag != null) { tokens[i].Set(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation), normNeTag); } NumberSequenceClassifier.TransferAnnotations(output[i], tokens[i]); } if (Verbose) { bool first = true; StringBuilder sb = new StringBuilder("NERCombinerAnnotator output: ["); foreach (CoreLabel w in tokens) { if (first) { first = false; } else { sb.Append(", "); } sb.Append(w.ToShorterString("Text", "NamedEntityTag", "NormalizedNamedEntityTag")); } sb.Append(']'); log.Info(sb); } } }