public virtual void TestTimeNormalization()
 {
     NUnit.Framework.Assert.AreEqual(timeStrings.Length, timeAnswers.Length);
     for (int i = 0; i < timeStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual(timeAnswers[i], QuantifiableEntityNormalizer.NormalizedTimeString(timeStrings[i], null));
     }
 }
 public virtual void TestNumberNormalization()
 {
     NUnit.Framework.Assert.AreEqual(numberStrings.Length, numberAnswers.Length);
     for (int i = 0; i < numberStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual(numberAnswers[i], QuantifiableEntityNormalizer.NormalizedNumberString(numberStrings[i], string.Empty, null));
     }
 }
 public virtual void TestOrdinalNormalization()
 {
     NUnit.Framework.Assert.AreEqual(ordinalStrings.Length, ordinalAnswers.Length);
     for (int i = 0; i < ordinalStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual(ordinalAnswers[i], QuantifiableEntityNormalizer.NormalizedOrdinalString(ordinalStrings[i], null));
     }
 }
 public virtual void TestMoneyNormalization()
 {
     NUnit.Framework.Assert.AreEqual(moneyStrings.Length, moneyAnswers.Length);
     for (int i = 0; i < moneyStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual(moneyAnswers[i], QuantifiableEntityNormalizer.NormalizedMoneyString(moneyStrings[i], null));
     }
 }
 public virtual void TestPercentNormalization()
 {
     NUnit.Framework.Assert.AreEqual(percentStrings.Length, percentAnswers.Length);
     for (int i = 0; i < percentStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual(percentAnswers[i], QuantifiableEntityNormalizer.NormalizedPercentString(percentStrings[i], null));
     }
 }
 public virtual void TestDateNormalization()
 {
     NUnit.Framework.Assert.AreEqual(dateStrings.Length, dateAnswers.Length);
     for (int i = 0; i < dateStrings.Length; i++)
     {
         NUnit.Framework.Assert.AreEqual("Testing " + dateStrings[i], dateAnswers[i], QuantifiableEntityNormalizer.NormalizedDateString(dateStrings[i], null));
     }
 }
 private void DoOneSentence <Token>(IList <TOKEN> words)
     where Token : CoreLabel
 {
     QuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(words, collapse);
 }
Пример #8
0
        public override IList <CoreLabel> ClassifyWithGlobalInformation(IList <CoreLabel> tokens, ICoreMap document, ICoreMap sentence)
        {
            IList <CoreLabel> output = base.Classify(tokens);

            if (applyNumericClassifiers)
            {
                try
                {
                    // recognizes additional MONEY, TIME, DATE, and NUMBER using a set of deterministic rules
                    // note: some DATE and TIME entities are recognized by our statistical NER based on MUC
                    // note: this includes SUTime
                    // note: requires TextAnnotation, PartOfSpeechTagAnnotation, and AnswerAnnotation
                    // note: this sets AnswerAnnotation!
                    RecognizeNumberSequences(output, document, sentence);
                }
                catch (RuntimeInterruptedException e)
                {
                    throw;
                }
                catch (Exception e)
                {
                    log.Info("Ignored an exception in NumberSequenceClassifier: (result is that some numbers were not classified)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
                // AnswerAnnotation -> NERAnnotation
                CopyAnswerFieldsToNERField(output);
                try
                {
                    // normalizes numeric entities such as MONEY, TIME, DATE, or PERCENT
                    // note: this uses and sets NamedEntityTagAnnotation!
                    if (nerLanguage == NERClassifierCombiner.Language.Chinese)
                    {
                        // For chinese there is no support for SUTime by default
                        // We need to hand in document and sentence for Chinese to handle DocDate; however, since English normalization
                        // is handled by SUTime, and the information is passed in recognizeNumberSequences(), English only need output.
                        ChineseQuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(output, document, sentence);
                    }
                    else
                    {
                        QuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(output, false, useSUTime);
                    }
                }
                catch (Exception e)
                {
                    log.Info("Ignored an exception in QuantifiableEntityNormalizer: (result is that entities were not normalized)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
                catch (AssertionError e)
                {
                    log.Info("Ignored an assertion in QuantifiableEntityNormalizer: (result is that entities were not normalized)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
            }
            else
            {
                // AnswerAnnotation -> NERAnnotation
                CopyAnswerFieldsToNERField(output);
            }
            // Apply RegexNER annotations
            // cdm 2016: Used to say and do "// skip first token" but I couldn't understand why, so I removed that.
            foreach (CoreLabel token in tokens)
            {
                // System.out.println(token.toShorterString());
                if ((token.Tag() == null || token.Tag()[0] == 'N') && "O".Equals(token.Ner()) || "MISC".Equals(token.Ner()))
                {
                    string target = gazetteMapping[token.OriginalText()];
                    if (target != null)
                    {
                        token.SetNER(target);
                    }
                }
            }
            // Return
            return(output);
        }