Ejemplo n.º 1
0
        public virtual void ClassifyMentions(IList <IList <Mention> > predictedMentions, Dictionaries dict, Properties props)
        {
            ICollection <string> neStrings = Generics.NewHashSet();

            foreach (IList <Mention> predictedMention in predictedMentions)
            {
                foreach (Mention m in predictedMention)
                {
                    string ne = m.headWord.Ner();
                    if (ne.Equals("O"))
                    {
                        continue;
                    }
                    foreach (CoreLabel cl in m.originalSpan)
                    {
                        if (!cl.Ner().Equals(ne))
                        {
                            continue;
                        }
                    }
                    neStrings.Add(m.LowercaseNormalizedSpanString());
                }
            }
            foreach (IList <Mention> predicts in predictedMentions)
            {
                IDictionary <int, ICollection <Mention> > headPositions = Generics.NewHashMap();
                foreach (Mention p in predicts)
                {
                    if (!headPositions.Contains(p.headIndex))
                    {
                        headPositions[p.headIndex] = Generics.NewHashSet();
                    }
                    headPositions[p.headIndex].Add(p);
                }
                ICollection <Mention> remove = Generics.NewHashSet();
                foreach (int hPos in headPositions.Keys)
                {
                    ICollection <Mention> shares = headPositions[hPos];
                    if (shares.Count > 1)
                    {
                        ICounter <Mention> probs = new ClassicCounter <Mention>();
                        foreach (Mention p_1 in shares)
                        {
                            double trueProb = ProbabilityOf(p_1, shares, neStrings, dict, props);
                            probs.IncrementCount(p_1, trueProb);
                        }
                        // add to remove
                        Mention keep = Counters.Argmax(probs, null);
                        probs.Remove(keep);
                        Sharpen.Collections.AddAll(remove, probs.KeySet());
                    }
                }
                foreach (Mention r in remove)
                {
                    predicts.Remove(r);
                }
            }
        }
        // TODO not called any more, but possibly useful as a reference
        /// <summary>
        /// This should be called after the classifier has been trained and
        /// parseAndTrain has been called to accumulate test set
        /// This will return precision,recall and F1 measure
        /// </summary>
        public virtual void RunTestSet(IList <IList <CoreLabel> > testSet)
        {
            ICounter <string> tp     = new ClassicCounter <string>();
            ICounter <string> fp     = new ClassicCounter <string>();
            ICounter <string> fn     = new ClassicCounter <string>();
            ICounter <string> actual = new ClassicCounter <string>();

            foreach (IList <CoreLabel> labels in testSet)
            {
                IList <CoreLabel> unannotatedLabels = new List <CoreLabel>();
                // create a new label without answer annotation
                foreach (CoreLabel label in labels)
                {
                    CoreLabel newLabel = new CoreLabel();
                    newLabel.Set(annotationForWord, label.Get(annotationForWord));
                    newLabel.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), label.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)));
                    unannotatedLabels.Add(newLabel);
                }
                IList <CoreLabel> annotatedLabels = this.classifier.Classify(unannotatedLabels);
                int ind = 0;
                foreach (CoreLabel expectedLabel in labels)
                {
                    CoreLabel annotatedLabel = annotatedLabels[ind];
                    string    answer         = annotatedLabel.Get(typeof(CoreAnnotations.AnswerAnnotation));
                    string    expectedAnswer = expectedLabel.Get(typeof(CoreAnnotations.AnswerAnnotation));
                    actual.IncrementCount(expectedAnswer);
                    // match only non background symbols
                    if (!SeqClassifierFlags.DefaultBackgroundSymbol.Equals(expectedAnswer) && expectedAnswer.Equals(answer))
                    {
                        // true positives
                        tp.IncrementCount(answer);
                        System.Console.Out.WriteLine("True Positive:" + annotatedLabel);
                    }
                    else
                    {
                        if (!SeqClassifierFlags.DefaultBackgroundSymbol.Equals(answer))
                        {
                            // false positives
                            fp.IncrementCount(answer);
                            System.Console.Out.WriteLine("False Positive:" + annotatedLabel);
                        }
                        else
                        {
                            if (!SeqClassifierFlags.DefaultBackgroundSymbol.Equals(expectedAnswer))
                            {
                                // false negatives
                                fn.IncrementCount(expectedAnswer);
                                System.Console.Out.WriteLine("False Negative:" + expectedLabel);
                            }
                        }
                    }
                    // else true negatives
                    ind++;
                }
            }
            actual.Remove(SeqClassifierFlags.DefaultBackgroundSymbol);
        }
Ejemplo n.º 3
0
        /// <summary>TODO(gabor) JavaDoc</summary>
        /// <param name="tokens"/>
        /// <param name="span"/>
        /// <returns/>
        public static string GuessNER(IList <CoreLabel> tokens, Span span)
        {
            ICounter <string> nerGuesses = new ClassicCounter <string>();

            foreach (int i in span)
            {
                nerGuesses.IncrementCount(tokens[i].Ner());
            }
            nerGuesses.Remove("O");
            nerGuesses.Remove(null);
            if (nerGuesses.Size() > 0 && Counters.Max(nerGuesses) >= span.Size() / 2)
            {
                return(Counters.Argmax(nerGuesses));
            }
            else
            {
                return("O");
            }
        }
Ejemplo n.º 4
0
        /// <summary>Select the most common element of the given type in the given span.</summary>
        /// <remarks>
        /// Select the most common element of the given type in the given span.
        /// This is useful for, e.g., finding the most likely NER span of a given span, or the most
        /// likely POS tag of a given span.
        /// Null entries are removed.
        /// </remarks>
        /// <param name="span">The span of the sentence to find the mode element in. This must be entirely contained in the sentence.</param>
        /// <param name="selector">The property of the sentence we are getting the mode of. For example, <code>Sentence::posTags</code></param>
        /// <?/>
        /// <returns>The most common element of the given property in the sentence.</returns>
        public virtual E ModeInSpan <E>(Span span, IFunction <Sentence, IList <E> > selector)
        {
            if (!Span.FromValues(0, sentence.Length()).Contains(span))
            {
                throw new ArgumentException("Span must be entirely contained in the sentence: " + span + " (sentence length=" + sentence.Length() + ")");
            }
            ICounter <E> candidates = new ClassicCounter <E>();

            foreach (int i in span)
            {
                candidates.IncrementCount(selector.Apply(sentence)[i]);
            }
            candidates.Remove(null);
            return(Counters.Argmax(candidates));
        }