private void DoOneSentence <Token>(IList <TOKEN> words)
     where Token : CoreLabel
 {
     QuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(words, collapse);
 }
Ejemplo n.º 2
0
        public override IList <CoreLabel> ClassifyWithGlobalInformation(IList <CoreLabel> tokens, ICoreMap document, ICoreMap sentence)
        {
            IList <CoreLabel> output = base.Classify(tokens);

            if (applyNumericClassifiers)
            {
                try
                {
                    // recognizes additional MONEY, TIME, DATE, and NUMBER using a set of deterministic rules
                    // note: some DATE and TIME entities are recognized by our statistical NER based on MUC
                    // note: this includes SUTime
                    // note: requires TextAnnotation, PartOfSpeechTagAnnotation, and AnswerAnnotation
                    // note: this sets AnswerAnnotation!
                    RecognizeNumberSequences(output, document, sentence);
                }
                catch (RuntimeInterruptedException e)
                {
                    throw;
                }
                catch (Exception e)
                {
                    log.Info("Ignored an exception in NumberSequenceClassifier: (result is that some numbers were not classified)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
                // AnswerAnnotation -> NERAnnotation
                CopyAnswerFieldsToNERField(output);
                try
                {
                    // normalizes numeric entities such as MONEY, TIME, DATE, or PERCENT
                    // note: this uses and sets NamedEntityTagAnnotation!
                    if (nerLanguage == NERClassifierCombiner.Language.Chinese)
                    {
                        // For chinese there is no support for SUTime by default
                        // We need to hand in document and sentence for Chinese to handle DocDate; however, since English normalization
                        // is handled by SUTime, and the information is passed in recognizeNumberSequences(), English only need output.
                        ChineseQuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(output, document, sentence);
                    }
                    else
                    {
                        QuantifiableEntityNormalizer.AddNormalizedQuantitiesToEntities(output, false, useSUTime);
                    }
                }
                catch (Exception e)
                {
                    log.Info("Ignored an exception in QuantifiableEntityNormalizer: (result is that entities were not normalized)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
                catch (AssertionError e)
                {
                    log.Info("Ignored an assertion in QuantifiableEntityNormalizer: (result is that entities were not normalized)");
                    log.Info("Tokens: " + StringUtils.JoinWords(tokens, " "));
                    Sharpen.Runtime.PrintStackTrace(e, System.Console.Error);
                }
            }
            else
            {
                // AnswerAnnotation -> NERAnnotation
                CopyAnswerFieldsToNERField(output);
            }
            // Apply RegexNER annotations
            // cdm 2016: Used to say and do "// skip first token" but I couldn't understand why, so I removed that.
            foreach (CoreLabel token in tokens)
            {
                // System.out.println(token.toShorterString());
                if ((token.Tag() == null || token.Tag()[0] == 'N') && "O".Equals(token.Ner()) || "MISC".Equals(token.Ner()))
                {
                    string target = gazetteMapping[token.OriginalText()];
                    if (target != null)
                    {
                        token.SetNER(target);
                    }
                }
            }
            // Return
            return(output);
        }