C# (CSharp) AcronymMatcher.IsAcronym示例

编程语言: C# (CSharp)

类/类型: AcronymMatcher

方法/功能: IsAcronym

hotexamples.com的示例: 4

C# (CSharp) AcronymMatcher.IsAcronym - 已找到4个示例。这些是从开源项目中提取的最受好评的AcronymMatcher.IsAcronym现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

IsAcronym(4)

常用方法

IsAcronym (4)

示例#1

显示文件

文件： KBPBasicSpanishCorefSystem.cs 项目： awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Approximately check if two entities are equivalent.</summary>
        /// <remarks>
        /// Approximately check if two entities are equivalent.
        /// Taken largely from
        /// edu.stanford.nlp.kbp.slotfilling.evaluate,HeuristicSlotfillPostProcessors.NoDuplicatesApproximate;
        /// </remarks>
        public virtual double ApproximateEntityMatchScore(string higherGloss, string lowerGloss)
        {
            if (NearExactEntityMatch(higherGloss, lowerGloss))
            {
                return(1.0);
            }
            string[] higherToks = StripCorporateTitles(higherGloss).Split("\\s+");
            string[] lowerToks  = StripCorporateTitles(lowerGloss).Split("\\s+");
            // Case: acronyms of each other
            if (AcronymMatcher.IsAcronym(higherToks, lowerToks))
            {
                return(1.0);
            }
            int match = 0;

            // Get number of matching tokens between the two slot fills
            bool[] matchedHigherToks = new bool[higherToks.Length];
            bool[] matchedLowerToks  = new bool[lowerToks.Length];
            for (int h = 0; h < higherToks.Length; ++h)
            {
                if (matchedHigherToks[h])
                {
                    continue;
                }
                string higherTok = higherToks[h];
                string higherTokNoSpecialChars = NoSpecialChars(higherTok);
                bool   doesMatch = false;
                for (int l = 0; l < lowerToks.Length; ++l)
                {
                    if (matchedLowerToks[l])
                    {
                        continue;
                    }
                    string lowerTok = lowerToks[l];
                    string lowerTokNoSpecialCars = NoSpecialChars(lowerTok);
                    int    minLength             = Math.Min(lowerTokNoSpecialCars.Length, higherTokNoSpecialChars.Length);
                    if (Sharpen.Runtime.EqualsIgnoreCase(higherTokNoSpecialChars, lowerTokNoSpecialCars) || (minLength > 5 && (higherTokNoSpecialChars.EndsWith(lowerTokNoSpecialCars) || higherTokNoSpecialChars.StartsWith(lowerTokNoSpecialCars))) || (minLength >
                                                                                                                                                                                                                                                          5 && (lowerTokNoSpecialCars.EndsWith(higherTokNoSpecialChars) || lowerTokNoSpecialCars.StartsWith(higherTokNoSpecialChars))) || (minLength > 5 && StringUtils.LevenshteinDistance(lowerTokNoSpecialCars, higherTokNoSpecialChars) <= 1))
                    {
                        // equal
                        // substring
                        // substring the other way
                        // edit distance <= 1
                        doesMatch = true;
                        // a loose metric of "same token"
                        matchedHigherToks[h] = true;
                        matchedLowerToks[l]  = true;
                    }
                }
                if (doesMatch)
                {
                    match += 1;
                }
            }
            return((double)match / ((double)Math.Max(higherToks.Length, lowerToks.Length)));
        }

示例#2

显示文件

文件： EntityMentionsAnnotator.cs 项目： awesomedotnetcore/Stanford.CoreNLP.NET

        private void AddAcronyms(Annotation ann)
        {
            // Find all the organizations in a document
            IList <ICoreMap> allMentionsSoFar = new List <ICoreMap>();

            foreach (ICoreMap sentence in ann.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                Sharpen.Collections.AddAll(allMentionsSoFar, sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)));
            }
            IList <IList <CoreLabel> > organizations = new List <IList <CoreLabel> >();

            foreach (ICoreMap mention in allMentionsSoFar)
            {
                if ("ORGANIZATION".Equals(mention.Get(nerCoreAnnotationClass)))
                {
                    organizations.Add(mention.Get(typeof(CoreAnnotations.TokensAnnotation)));
                }
            }
            // Skip very long documents
            if (organizations.Count > 100)
            {
                return;
            }
            // Iterate over tokens...
            foreach (ICoreMap sentence_1 in ann.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                IList <ICoreMap>  sentenceMentions = new List <ICoreMap>();
                IList <CoreLabel> tokens           = sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation));
                int totalTokensOffset = sentence_1.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                for (int i = 0; i < tokens.Count; ++i)
                {
                    // ... that look like they might be an acronym and are not already a mention
                    CoreLabel token = tokens[i];
                    if ("O".Equals(token.Ner()) && token.Word().ToUpper().Equals(token.Word()) && token.Word().Length >= 3)
                    {
                        foreach (IList <CoreLabel> org in organizations)
                        {
                            // ... and actually are an acronym
                            if (AcronymMatcher.IsAcronym(token.Word(), org))
                            {
                                // ... and add them.
                                // System.out.println("found ACRONYM ORG");
                                token.SetNER("ORGANIZATION");
                                ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, i, i + 1, totalTokensOffset, null, null, null);
                                chunk.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), "ORGANIZATION");
                                sentenceMentions.Add(chunk);
                            }
                        }
                    }
                }
            }
        }

示例#3

显示文件

文件： KBPAnnotator.cs 项目： awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Annotate this document for KBP relations.</summary>
        /// <param name="annotation">The document to annotate.</param>
        public virtual void Annotate(Annotation annotation)
        {
            // get a list of sentences for this annotation
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // Create simple document
            Document doc = new Document(kbpProperties, serializer.ToProto(annotation));
            // Get the mentions in the document
            IList <ICoreMap> mentions = new List <ICoreMap>();

            foreach (ICoreMap sentence in sentences)
            {
                Sharpen.Collections.AddAll(mentions, sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)));
            }
            // Compute coreferent clusters
            // (map an index to a KBP mention)
            IDictionary <Pair <int, int>, ICoreMap> mentionByStartIndex = new Dictionary <Pair <int, int>, ICoreMap>();

            foreach (ICoreMap mention in mentions)
            {
                foreach (CoreLabel token in mention.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    mentionByStartIndex[Pair.MakePair(token.SentIndex(), token.Index())] = mention;
                }
            }
            // (collect coreferent KBP mentions)
            IDictionary <ICoreMap, ICollection <ICoreMap> > mentionsMap = new Dictionary <ICoreMap, ICollection <ICoreMap> >();

            // map from canonical mention -> other mentions
            if (annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)) != null)
            {
                foreach (KeyValuePair <int, CorefChain> chain in annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)))
                {
                    ICoreMap firstMention = null;
                    foreach (CorefChain.CorefMention mention_1 in chain.Value.GetMentionsInTextualOrder())
                    {
                        ICoreMap kbpMention = null;
                        for (int i = mention_1.startIndex; i < mention_1.endIndex; ++i)
                        {
                            if (mentionByStartIndex.Contains(Pair.MakePair(mention_1.sentNum - 1, i)))
                            {
                                kbpMention = mentionByStartIndex[Pair.MakePair(mention_1.sentNum - 1, i)];
                                break;
                            }
                        }
                        if (firstMention == null)
                        {
                            firstMention = kbpMention;
                        }
                        if (kbpMention != null)
                        {
                            if (!mentionsMap.Contains(firstMention))
                            {
                                mentionsMap[firstMention] = new LinkedHashSet <ICoreMap>();
                            }
                            mentionsMap[firstMention].Add(kbpMention);
                        }
                    }
                }
            }
            // (coreference acronyms)
            AcronymMatch(mentions, mentionsMap);
            // (ensure valid NER tag for canonical mention)
            foreach (ICoreMap key in new HashSet <ICoreMap>(mentionsMap.Keys))
            {
                if (key.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)) == null)
                {
                    ICoreMap newKey = null;
                    foreach (ICoreMap candidate in mentionsMap[key])
                    {
                        if (candidate.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)) != null)
                        {
                            newKey = candidate;
                            break;
                        }
                    }
                    if (newKey != null)
                    {
                        mentionsMap[newKey] = Sharpen.Collections.Remove(mentionsMap, key);
                    }
                    else
                    {
                        Sharpen.Collections.Remove(mentionsMap, key);
                    }
                }
            }
            // case: no mention in this chain has an NER tag.
            // Propagate Entity Link
            foreach (KeyValuePair <ICoreMap, ICollection <ICoreMap> > entry in mentionsMap)
            {
                string entityLink = entry.Key.Get(typeof(CoreAnnotations.WikipediaEntityAnnotation));
                if (entityLink != null)
                {
                    foreach (ICoreMap mention_1 in entry.Value)
                    {
                        foreach (CoreLabel token in mention_1.Get(typeof(CoreAnnotations.TokensAnnotation)))
                        {
                            token.Set(typeof(CoreAnnotations.WikipediaEntityAnnotation), entityLink);
                        }
                    }
                }
            }
            // create a mapping of char offset pairs to KBPMention
            Dictionary <Pair <int, int>, ICoreMap> charOffsetToKBPMention = new Dictionary <Pair <int, int>, ICoreMap>();

            foreach (ICoreMap mention_2 in mentions)
            {
                int nerMentionCharBegin = mention_2.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int nerMentionCharEnd   = mention_2.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                charOffsetToKBPMention[new Pair <int, int>(nerMentionCharBegin, nerMentionCharEnd)] = mention_2;
            }
            // Create a canonical mention map
            IDictionary <ICoreMap, ICoreMap> mentionToCanonicalMention;

            if (kbpLanguage.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                mentionToCanonicalMention = spanishCorefSystem.CanonicalMentionMapFromEntityMentions(mentions);
                if (Verbose)
                {
                    log.Info("---");
                    log.Info("basic spanish coref results");
                    foreach (ICoreMap originalMention in mentionToCanonicalMention.Keys)
                    {
                        if (!originalMention.Equals(mentionToCanonicalMention[originalMention]))
                        {
                            log.Info("mapped: " + originalMention + " to: " + mentionToCanonicalMention[originalMention]);
                        }
                    }
                }
            }
            else
            {
                mentionToCanonicalMention = new Dictionary <ICoreMap, ICoreMap>();
            }
            // check if there is coref info
            ICollection <KeyValuePair <int, CorefChain> > corefChains;

            if (annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)) != null && !kbpLanguage.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                corefChains = annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation));
            }
            else
            {
                corefChains = new HashSet <KeyValuePair <int, CorefChain> >();
            }
            foreach (KeyValuePair <int, CorefChain> indexCorefChainPair in corefChains)
            {
                CorefChain corefChain = indexCorefChainPair.Value;
                Pair <IList <ICoreMap>, ICoreMap> corefChainKBPMentionsAndBestIndex = CorefChainToKBPMentions(corefChain, annotation, charOffsetToKBPMention);
                IList <ICoreMap> corefChainKBPMentions  = corefChainKBPMentionsAndBestIndex.First();
                ICoreMap         bestKBPMentionForChain = corefChainKBPMentionsAndBestIndex.Second();
                if (bestKBPMentionForChain != null)
                {
                    foreach (ICoreMap kbpMention in corefChainKBPMentions)
                    {
                        if (kbpMention != null)
                        {
                            //System.err.println("---");
                            // ad hoc filters ; assume acceptable unless a filter blocks it
                            bool acceptableLink = true;
                            // block people matches without a token overlap, exempting pronominal to non-pronominal
                            // good: Ashton --> Catherine Ashton
                            // good: she --> Catherine Ashton
                            // bad: Morsi --> Catherine Ashton
                            string kbpMentionNERTag             = kbpMention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                            string bestKBPMentionForChainNERTag = bestKBPMentionForChain.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                            if (kbpMentionNERTag != null && bestKBPMentionForChainNERTag != null && kbpMentionNERTag.Equals("PERSON") && bestKBPMentionForChainNERTag.Equals("PERSON") && !KbpIsPronominalMention(kbpMention.Get(typeof(CoreAnnotations.TokensAnnotation))[0]
                                                                                                                                                                                                                  ) && !KbpIsPronominalMention(bestKBPMentionForChain.Get(typeof(CoreAnnotations.TokensAnnotation))[0]))
                            {
                                //System.err.println("testing PERSON to PERSON coref link");
                                bool tokenMatchFound = false;
                                foreach (CoreLabel kbpToken in kbpMention.Get(typeof(CoreAnnotations.TokensAnnotation)))
                                {
                                    foreach (CoreLabel bestKBPToken in bestKBPMentionForChain.Get(typeof(CoreAnnotations.TokensAnnotation)))
                                    {
                                        if (kbpToken.Word().ToLower().Equals(bestKBPToken.Word().ToLower()))
                                        {
                                            tokenMatchFound = true;
                                            break;
                                        }
                                    }
                                    if (tokenMatchFound)
                                    {
                                        break;
                                    }
                                }
                                if (!tokenMatchFound)
                                {
                                    acceptableLink = false;
                                }
                            }
                            // check the coref link passed the filters
                            if (acceptableLink)
                            {
                                mentionToCanonicalMention[kbpMention] = bestKBPMentionForChain;
                            }
                        }
                    }
                }
            }
            //System.err.println("kbp mention: " + kbpMention.get(CoreAnnotations.TextAnnotation.class));
            //System.err.println("coref mention: " + bestKBPMentionForChain.get(CoreAnnotations.TextAnnotation.class));
            // (add missing mentions)
            mentions.Stream().Filter(null).ForEach(null);
            // handle acronym coreference
            Dictionary <string, IList <ICoreMap> > acronymClusters  = new Dictionary <string, IList <ICoreMap> >();
            Dictionary <string, IList <ICoreMap> > acronymInstances = new Dictionary <string, IList <ICoreMap> >();

            foreach (ICoreMap acronymMention in mentionToCanonicalMention.Keys)
            {
                string acronymNERTag = acronymMention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                if ((acronymMention == mentionToCanonicalMention[acronymMention]) && acronymNERTag != null && (acronymNERTag.Equals(KBPRelationExtractor.NERTag.Organization.name) || acronymNERTag.Equals(KBPRelationExtractor.NERTag.Location.name)))
                {
                    string           acronymText        = acronymMention.Get(typeof(CoreAnnotations.TextAnnotation));
                    IList <ICoreMap> coreferentMentions = new List <ICoreMap>();
                    // define acronyms as not containing spaces (e.g. ACLU)
                    if (!acronymText.Contains(" "))
                    {
                        int numCoreferentsChecked = 0;
                        foreach (ICoreMap coreferentMention in mentions)
                        {
                            // only check first 1000
                            if (numCoreferentsChecked > 1000)
                            {
                                break;
                            }
                            // don't check a mention against itself
                            if (acronymMention == coreferentMention)
                            {
                                continue;
                            }
                            // don't check other mentions without " "
                            string coreferentText = coreferentMention.Get(typeof(CoreAnnotations.TextAnnotation));
                            if (!coreferentText.Contains(" "))
                            {
                                continue;
                            }
                            numCoreferentsChecked++;
                            IList <string> coreferentTokenStrings = coreferentMention.Get(typeof(CoreAnnotations.TokensAnnotation)).Stream().Map(null).Collect(Collectors.ToList());
                            // when an acronym match is found:
                            // store every mention (that isn't ACLU) that matches with ACLU in acronymClusters
                            // store every instance of "ACLU" in acronymInstances
                            // afterwards find the best mention in acronymClusters, and match it to every mention in acronymInstances
                            if (AcronymMatcher.IsAcronym(acronymText, coreferentTokenStrings))
                            {
                                if (!acronymClusters.Contains(acronymText))
                                {
                                    acronymClusters[acronymText] = new List <ICoreMap>();
                                }
                                if (!acronymInstances.Contains(acronymText))
                                {
                                    acronymInstances[acronymText] = new List <ICoreMap>();
                                }
                                acronymClusters[acronymText].Add(coreferentMention);
                                acronymInstances[acronymText].Add(acronymMention);
                            }
                        }
                    }
                }
            }
            // process each acronym (e.g. ACLU)
            foreach (string acronymText_1 in acronymInstances.Keys)
            {
                // find longest ORG or null
                ICoreMap bestORG = null;
                foreach (ICoreMap coreferentMention in acronymClusters[acronymText_1])
                {
                    if (!coreferentMention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)).Equals(KBPRelationExtractor.NERTag.Organization.name))
                    {
                        continue;
                    }
                    if (bestORG == null)
                    {
                        bestORG = coreferentMention;
                    }
                    else
                    {
                        if (coreferentMention.Get(typeof(CoreAnnotations.TextAnnotation)).Length > bestORG.Get(typeof(CoreAnnotations.TextAnnotation)).Length)
                        {
                            bestORG = coreferentMention;
                        }
                    }
                }
                // find longest LOC or null
                ICoreMap bestLOC = null;
                foreach (ICoreMap coreferentMention_1 in acronymClusters[acronymText_1])
                {
                    if (!coreferentMention_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)).Equals(KBPRelationExtractor.NERTag.Location.name))
                    {
                        continue;
                    }
                    if (bestLOC == null)
                    {
                        bestLOC = coreferentMention_1;
                    }
                    else
                    {
                        if (coreferentMention_1.Get(typeof(CoreAnnotations.TextAnnotation)).Length > bestLOC.Get(typeof(CoreAnnotations.TextAnnotation)).Length)
                        {
                            bestLOC = coreferentMention_1;
                        }
                    }
                }
                // link ACLU to "American Civil Liberties Union" ; make sure NER types match
                foreach (ICoreMap acronymMention_1 in acronymInstances[acronymText_1])
                {
                    string mentionType = acronymMention_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    if (mentionType.Equals(KBPRelationExtractor.NERTag.Organization.name) && bestORG != null)
                    {
                        mentionToCanonicalMention[acronymMention_1] = bestORG;
                    }
                    if (mentionType.Equals(KBPRelationExtractor.NERTag.Location.name) && bestLOC != null)
                    {
                        mentionToCanonicalMention[acronymMention_1] = bestLOC;
                    }
                }
            }
            // Cluster mentions by sentence
            IList <ICoreMap>[] mentionsBySentence = new IList[annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)).Count];
            for (int i_1 = 0; i_1 < mentionsBySentence.Length; ++i_1)
            {
                mentionsBySentence[i_1] = new List <ICoreMap>();
            }
            foreach (ICoreMap mention_3 in mentionToCanonicalMention.Keys)
            {
                mentionsBySentence[mention_3.Get(typeof(CoreAnnotations.SentenceIndexAnnotation))].Add(mention_3);
            }
            // Classify
            for (int sentenceI = 0; sentenceI < mentionsBySentence.Length; ++sentenceI)
            {
                Dictionary <string, RelationTriple> relationStringsToTriples = new Dictionary <string, RelationTriple>();
                IList <RelationTriple> finalTriplesList = new List <RelationTriple>();
                // the annotations
                IList <ICoreMap> candidates = mentionsBySentence[sentenceI];
                // determine sentence length
                int sentenceLength = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))[sentenceI].Get(typeof(CoreAnnotations.TokensAnnotation)).Count;
                // check if sentence is too long, if it's too long don't run kbp
                if (maxLength != -1 && sentenceLength > maxLength)
                {
                    // set the triples annotation to an empty list of RelationTriples
                    annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))[sentenceI].Set(typeof(CoreAnnotations.KBPTriplesAnnotation), finalTriplesList);
                    // continue to next sentence
                    continue;
                }
                // sentence isn't too long, so continue processing this sentence
                for (int subjI = 0; subjI < candidates.Count; ++subjI)
                {
                    ICoreMap subj      = candidates[subjI];
                    int      subjBegin = subj.Get(typeof(CoreAnnotations.TokensAnnotation))[0].Index() - 1;
                    int      subjEnd   = subj.Get(typeof(CoreAnnotations.TokensAnnotation))[subj.Get(typeof(CoreAnnotations.TokensAnnotation)).Count - 1].Index();
                    Optional <KBPRelationExtractor.NERTag> subjNER = KBPRelationExtractor.NERTag.FromString(subj.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)));
                    if (subjNER.IsPresent())
                    {
                        for (int objI = 0; objI < candidates.Count; ++objI)
                        {
                            if (subjI == objI)
                            {
                                continue;
                            }
                            if (Thread.Interrupted())
                            {
                                throw new RuntimeInterruptedException();
                            }
                            ICoreMap obj      = candidates[objI];
                            int      objBegin = obj.Get(typeof(CoreAnnotations.TokensAnnotation))[0].Index() - 1;
                            int      objEnd   = obj.Get(typeof(CoreAnnotations.TokensAnnotation))[obj.Get(typeof(CoreAnnotations.TokensAnnotation)).Count - 1].Index();
                            Optional <KBPRelationExtractor.NERTag> objNER = KBPRelationExtractor.NERTag.FromString(obj.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)));
                            if (objNER.IsPresent() && KBPRelationExtractor.RelationType.PlausiblyHasRelation(subjNER.Get(), objNER.Get()))
                            {
                                // type check
                                KBPRelationExtractor.KBPInput input = new KBPRelationExtractor.KBPInput(new Span(subjBegin, subjEnd), new Span(objBegin, objEnd), subjNER.Get(), objNER.Get(), doc.Sentence(sentenceI));
                                //  -- BEGIN Classify
                                Pair <string, double> prediction = extractor.Classify(input);
                                //  -- END Classify
                                // Handle the classifier output
                                if (!KBPStatisticalExtractor.NoRelation.Equals(prediction.first))
                                {
                                    RelationTriple triple = new RelationTriple.WithLink(subj.Get(typeof(CoreAnnotations.TokensAnnotation)), mentionToCanonicalMention[subj].Get(typeof(CoreAnnotations.TokensAnnotation)), Java.Util.Collections.SingletonList(new CoreLabel(new Word
                                                                                                                                                                                                                                                                                 (ConvertRelationNameToLatest(prediction.first)))), obj.Get(typeof(CoreAnnotations.TokensAnnotation)), mentionToCanonicalMention[obj].Get(typeof(CoreAnnotations.TokensAnnotation)), prediction.second, sentences[sentenceI].Get(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        )), subj.Get(typeof(CoreAnnotations.WikipediaEntityAnnotation)), obj.Get(typeof(CoreAnnotations.WikipediaEntityAnnotation)));
                                    string tripleString = triple.SubjectGloss() + "\t" + triple.RelationGloss() + "\t" + triple.ObjectGloss();
                                    // ad hoc checks for problems
                                    bool acceptableTriple = true;
                                    if (triple.ObjectGloss().Equals(triple.SubjectGloss()) && triple.RelationGloss().EndsWith("alternate_names"))
                                    {
                                        acceptableTriple = false;
                                    }
                                    // only add this triple if it has the highest confidence ; this process generates duplicates with
                                    // different confidence scores, so we want to filter out the lower confidence versions
                                    if (acceptableTriple && !relationStringsToTriples.Contains(tripleString))
                                    {
                                        relationStringsToTriples[tripleString] = triple;
                                    }
                                    else
                                    {
                                        if (acceptableTriple && triple.confidence > relationStringsToTriples[tripleString].confidence)
                                        {
                                            relationStringsToTriples[tripleString] = triple;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                finalTriplesList = new ArrayList(relationStringsToTriples.Values);
                // Set triples
                annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))[sentenceI].Set(typeof(CoreAnnotations.KBPTriplesAnnotation), finalTriplesList);
            }
        }

示例#4

显示文件

文件： KBPAnnotator.cs 项目： awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>Augment the coreferent mention map with acronym matches.</summary>
        private static void AcronymMatch(IList <ICoreMap> mentions, IDictionary <ICoreMap, ICollection <ICoreMap> > mentionsMap)
        {
            int ticks = 0;
            // Get all the candidate antecedents
            IDictionary <IList <string>, ICoreMap> textToMention = new Dictionary <IList <string>, ICoreMap>();

            foreach (ICoreMap mention in mentions)
            {
                string nerTag = mention.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                if (nerTag != null && (nerTag.Equals(KBPRelationExtractor.NERTag.Organization.name) || nerTag.Equals(KBPRelationExtractor.NERTag.Location.name)))
                {
                    IList <string> tokens = mention.Get(typeof(CoreAnnotations.TokensAnnotation)).Stream().Map(null).Collect(Collectors.ToList());
                    if (tokens.Count > 1)
                    {
                        textToMention[tokens] = mention;
                    }
                }
            }
            // Look for candidate acronyms
            foreach (ICoreMap acronym in mentions)
            {
                string nerTag = acronym.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                if (nerTag != null && (nerTag.Equals(KBPRelationExtractor.NERTag.Organization.name) || nerTag.Equals(KBPRelationExtractor.NERTag.Location.name)))
                {
                    string text = acronym.Get(typeof(CoreAnnotations.TextAnnotation));
                    if (!text.Contains(" "))
                    {
                        // Candidate acronym
                        ICollection <ICoreMap> acronymCluster = mentionsMap[acronym];
                        if (acronymCluster == null)
                        {
                            acronymCluster = new LinkedHashSet <ICoreMap>();
                            acronymCluster.Add(acronym);
                        }
                        // Try to match it to an antecedent
                        foreach (KeyValuePair <IList <string>, ICoreMap> entry in textToMention)
                        {
                            // Time out if we take too long in this loop.
                            ticks += 1;
                            if (ticks > 1000)
                            {
                                return;
                            }
                            // Check if the pair is an acronym
                            if (AcronymMatcher.IsAcronym(text, entry.Key))
                            {
                                // Case: found a coreferent pair
                                ICoreMap coreferent = entry.Value;
                                ICollection <ICoreMap> coreferentCluster = mentionsMap[coreferent];
                                if (coreferentCluster == null)
                                {
                                    coreferentCluster = new LinkedHashSet <ICoreMap>();
                                    coreferentCluster.Add(coreferent);
                                }
                                // Create a new coreference cluster
                                ICollection <ICoreMap> newCluster = new LinkedHashSet <ICoreMap>();
                                Sharpen.Collections.AddAll(newCluster, acronymCluster);
                                Sharpen.Collections.AddAll(newCluster, coreferentCluster);
                                // Set the new cluster
                                foreach (ICoreMap key in newCluster)
                                {
                                    mentionsMap[key] = newCluster;
                                }
                            }
                        }
                    }
                }
            }
        }