C# (CSharp) CoreLabel.ContainsKey Examples

Programming Language: C# (CSharp)

Class/Type: CoreLabel

Method/Function: ContainsKey

Examples at hotexamples.com: 11

C# (CSharp) CoreLabel.ContainsKey - 11 examples found. These are the top rated real world C# (CSharp) examples of CoreLabel.ContainsKey extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Set(30)

Word(30)

Get(30)

SetWord(27)

SetValue(27)

Lemma(15)

SetTag(13)

Tag(12)

ContainsKey(11)

GetString(10)

Index(9)

SetIndex(9)

Value(9)

Factory(9)

Remove(8)

SetNER(7)

BeginPosition(6)

SetLemma(6)

SetOriginalText(5)

ToString(4)

get(4)

SetBeginPosition(4)

OriginalText(4)

SetEndPosition(4)

KeySet(3)

ToShorterString(3)

value(2)

Ner(2)

IsNewline(2)

EndPosition(2)

toString(1)

set(1)

lemma(1)

index(1)

SetCategory(1)

endPosition(1)

beginPosition(1)

GetHashCode(1)

Size(1)

LabelFactory(1)

Category(1)

word(1)

Example #1

Show file

File: DepPatternFactory.cs Project: zerouid/Stanford.CoreNLP.NET

        public static DepPattern PatternToDepPattern(Pair <IndexedWord, GrammaticalRelation> p, DataInstance sent)
        {
            Token     token        = new Token(PatternFactory.PatternType.Dep);
            CoreLabel backingLabel = sent.GetTokens()[p.First().Index() - 1];

            System.Diagnostics.Debug.Assert(backingLabel.ContainsKey(typeof(PatternsAnnotations.ProcessedTextAnnotation)), "the keyset are " + backingLabel.ToString(CoreLabel.OutputFormat.All));
            token.AddORRestriction(typeof(PatternsAnnotations.ProcessedTextAnnotation), backingLabel.Get(typeof(PatternsAnnotations.ProcessedTextAnnotation)));
            return(new DepPattern(token, p.Second()));
        }

Example #2

Show file

        internal static Triple <bool, Token, string> GetContextTokenStr(CoreLabel tokenj)
        {
            Token  strgeneric  = new Token(PatternFactory.PatternType.Surface);
            string strOriginal = string.Empty;
            bool   isLabeledO  = true;

            //    for (Entry<String, Class<? extends TypesafeMap.Key<String>>> e : getAnswerClass().entrySet()) {
            //      if (!tokenj.get(e.getValue()).equals(backgroundSymbol)) {
            //        isLabeledO = false;
            //        if (strOriginal.isEmpty()) {
            //          strOriginal = e.getKey();
            //        } else {
            //          strOriginal += "|" + e.getKey();
            //        }
            //        strgeneric.addRestriction(e.getKey(), e.getKey());
            //      }
            //    }
            foreach (KeyValuePair <string, Type> e in ConstantsAndVariables.GetGeneralizeClasses())
            {
                if (!tokenj.ContainsKey(e.Value) || tokenj.Get(e.Value) == null)
                {
                    throw new Exception(" Why does the token not have the class " + e.Value + " set? Existing classes " + tokenj.ToString(CoreLabel.OutputFormat.All));
                }
                if (!tokenj.Get(e.Value).Equals(ConstantsAndVariables.backgroundSymbol))
                {
                    isLabeledO = false;
                    if (strOriginal.IsEmpty())
                    {
                        strOriginal = e.Key;
                    }
                    else
                    {
                        strOriginal += "|" + e.Key;
                    }
                    strgeneric.AddORRestriction(e.Value, e.Key);
                }
            }
            if (useContextNERRestriction)
            {
                string nerTag = tokenj.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                if (nerTag != null && !nerTag.Equals(SeqClassifierFlags.DefaultBackgroundSymbol))
                {
                    isLabeledO = false;
                    if (strOriginal.IsEmpty())
                    {
                        strOriginal = nerTag;
                    }
                    else
                    {
                        strOriginal += "|" + nerTag;
                    }
                    strgeneric.AddORRestriction(typeof(CoreAnnotations.NamedEntityTagAnnotation), nerTag);
                }
            }
            return(new Triple <bool, Token, string>(isLabeledO, strgeneric, strOriginal));
        }

Example #3

Show file

File: ForwardEntailer.cs Project: awesomedotnetcore/Stanford.CoreNLP.NET

 /// <summary>
 /// Create a new search problem instance, given a sentence (possibly fragment), and the corresponding
 /// parse tree.
 /// </summary>
 /// <param name="parseTree">The original tree of the sentence we are beginning with</param>
 /// <param name="truthOfPremise">The truth of the premise. In most applications, this will just be true.</param>
 /// <returns>A new search problem instance.</returns>
 public virtual ForwardEntailerSearchProblem Apply(SemanticGraph parseTree, bool truthOfPremise)
 {
     foreach (IndexedWord vertex in parseTree.VertexSet())
     {
         CoreLabel token = vertex.BackingLabel();
         if (token != null && !token.ContainsKey(typeof(NaturalLogicAnnotations.PolarityAnnotation)))
         {
             throw new ArgumentException("Cannot run Natural Logic forward entailment without polarity annotations set. See " + typeof(NaturalLogicAnnotator).GetSimpleName());
         }
     }
     return(new ForwardEntailerSearchProblem(parseTree, truthOfPremise, maxResults, maxTicks, weights));
 }

Example #4

Show file

File: IOBUtils.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <summary>Create a datum from a string.</summary>
        /// <remarks>
        /// Create a datum from a string. The CoreAnnotations must correspond to those used by
        /// SequenceClassifier. The following annotations are copied from the provided
        /// CoreLabel cl, if present:
        /// DomainAnnotation
        /// startOffset and endOffset will be added to the
        /// <see cref="Edu.Stanford.Nlp.Ling.CoreAnnotations.CharacterOffsetBeginAnnotation"/>
        /// of
        /// the
        /// <see cref="Edu.Stanford.Nlp.Ling.CoreLabel"/>
        /// cl to give the
        /// <see cref="Edu.Stanford.Nlp.Ling.CoreAnnotations.CharacterOffsetBeginAnnotation"/>
        /// and
        /// <see cref="Edu.Stanford.Nlp.Ling.CoreAnnotations.CharacterOffsetEndAnnotation"/>
        /// of the resulting datum.
        /// </remarks>
        private static CoreLabel CreateDatum(CoreLabel cl, string token, string label, int startOffset, int endOffset)
        {
            CoreLabel newTok = new CoreLabel();

            newTok.Set(typeof(CoreAnnotations.TextAnnotation), token);
            newTok.Set(typeof(CoreAnnotations.CharAnnotation), token);
            newTok.Set(typeof(CoreAnnotations.AnswerAnnotation), label);
            newTok.Set(typeof(CoreAnnotations.GoldAnswerAnnotation), label);
            newTok.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), cl.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) + startOffset);
            newTok.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), cl.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) + endOffset);
            if (cl != null && cl.ContainsKey(typeof(CoreAnnotations.DomainAnnotation)))
            {
                newTok.Set(typeof(CoreAnnotations.DomainAnnotation), cl.Get(typeof(CoreAnnotations.DomainAnnotation)));
            }
            return(newTok);
        }

Example #5

Show file

 protected internal override T GetNext()
 {
     try
     {
         T nextToken;
         do
         {
             // initialized in do-while
             // Depending on the orthographic normalization options,
             // some tokens can be obliterated. In this case, keep iterating
             // until we see a non-zero length token.
             nextToken = (splitAny && !compoundBuffer.IsEmpty()) ? (T)compoundBuffer.Remove(0) : (T)lexer.Next();
         }while (nextToken != null && nextToken.Word().IsEmpty());
         // Check for compounds to split
         if (splitAny && nextToken is CoreLabel)
         {
             CoreLabel cl = (CoreLabel)nextToken;
             if (cl.ContainsKey(typeof(CoreAnnotations.ParentAnnotation)))
             {
                 if (splitCompounds && cl.Get(typeof(CoreAnnotations.ParentAnnotation)).Equals(SpanishLexer.CompoundAnnotation))
                 {
                     nextToken = (T)ProcessCompound(cl);
                 }
                 else
                 {
                     if (splitVerbs && cl.Get(typeof(CoreAnnotations.ParentAnnotation)).Equals(SpanishLexer.VbPronAnnotation))
                     {
                         nextToken = (T)ProcessVerb(cl);
                     }
                     else
                     {
                         if (splitContractions && cl.Get(typeof(CoreAnnotations.ParentAnnotation)).Equals(SpanishLexer.ContrAnnotation))
                         {
                             nextToken = (T)ProcessContraction(cl);
                         }
                     }
                 }
             }
         }
         return(nextToken);
     }
     catch (IOException e)
     {
         throw new RuntimeIOException(e);
     }
 }

Example #6

Show file

        private static Tree FindTreeWithSpan(Tree tree, int start, int end)
        {
            CoreLabel l = (CoreLabel)tree.Label();

            if (l != null && l.ContainsKey(typeof(CoreAnnotations.BeginIndexAnnotation)) && l.ContainsKey(typeof(CoreAnnotations.EndIndexAnnotation)))
            {
                int myStart = l.Get(typeof(CoreAnnotations.BeginIndexAnnotation));
                int myEnd   = l.Get(typeof(CoreAnnotations.EndIndexAnnotation));
                if (start == myStart && end == myEnd)
                {
                    // found perfect match
                    return(tree);
                }
                else
                {
                    if (end < myStart)
                    {
                        return(null);
                    }
                    else
                    {
                        if (start >= myEnd)
                        {
                            return(null);
                        }
                    }
                }
            }
            // otherwise, check inside children - a match is possible
            foreach (Tree kid in tree.Children())
            {
                if (kid == null)
                {
                    continue;
                }
                Tree ret = FindTreeWithSpan(kid, start, end);
                // found matching child
                if (ret != null)
                {
                    return(ret);
                }
            }
            // no match
            return(null);
        }

Example #7

Show file

 protected internal override T GetNext()
 {
     try
     {
         T nextToken = null;
         do
         {
             // Depending on the orthographic normalization options,
             // some tokens can be obliterated. In this case, keep iterating
             // until we see a non-zero length token.
             nextToken = ((splitContractions || splitCompounds) && compoundBuffer.Count > 0) ? (T)compoundBuffer.Remove(0) : (T)lexer.Next();
         }while (nextToken != null && nextToken.Word().Length == 0);
         // Check for compounds to split
         if (splitCompounds && nextToken is CoreLabel)
         {
             CoreLabel cl = (CoreLabel)nextToken;
             if (cl.ContainsKey(typeof(CoreAnnotations.ParentAnnotation)) && cl.Get(typeof(CoreAnnotations.ParentAnnotation)).Equals(FrenchLexer.CompoundAnnotation))
             {
                 nextToken = (T)ProcessCompound(cl);
             }
         }
         // Check for contractions to split
         if (splitContractions && nextToken is CoreLabel)
         {
             CoreLabel cl = (CoreLabel)nextToken;
             if (cl.ContainsKey(typeof(CoreAnnotations.ParentAnnotation)) && cl.Get(typeof(CoreAnnotations.ParentAnnotation)).Equals(FrenchLexer.ContrAnnotation))
             {
                 nextToken = (T)ProcessContraction(cl);
             }
         }
         return(nextToken);
     }
     catch (IOException e)
     {
         throw new RuntimeIOException(e);
     }
 }

Example #8

Show file

File: ApplyPatterns.cs Project: awesomedotnetcore/Stanford.CoreNLP.NET

 /// <exception cref="System.Exception"/>
 public virtual Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > Call()
 {
     // CollectionValuedMap<String, Integer> tokensMatchedPattern = new
     // CollectionValuedMap<String, Integer>();
     try
     {
         ICollection <CandidatePhrase> alreadyLabeledPhrases                    = new HashSet <CandidatePhrase>();
         TwoDimensionalCounter <CandidatePhrase, E>          allFreq            = new TwoDimensionalCounter <CandidatePhrase, E>();
         CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();
         foreach (string sentid in sentids)
         {
             IList <CoreLabel> sent = sents[sentid].GetTokens();
             foreach (KeyValuePair <TokenSequencePattern, E> pEn in patterns)
             {
                 if (pEn.Key == null)
                 {
                     throw new Exception("why is the pattern " + pEn + " null?");
                 }
                 TokenSequenceMatcher m = ((TokenSequenceMatcher)pEn.Key.GetMatcher(sent));
                 //        //Setting this find type can save time in searching - greedy and reluctant quantifiers are not enforced
                 //        m.setFindType(SequenceMatcher.FindType.FIND_ALL);
                 //Higher branch values makes the faster but uses more memory
                 m.SetBranchLimit(5);
                 while (m.Find())
                 {
                     int s = m.Start("$term");
                     int e = m.End("$term");
                     System.Diagnostics.Debug.Assert(e - s <= PatternFactory.numWordsCompoundMapped[label], "How come the pattern " + pEn.Key + " is extracting phrases longer than numWordsCompound of " + PatternFactory.numWordsCompoundMapped[label] + " for label "
                                                     + label);
                     string phrase            = string.Empty;
                     string phraseLemma       = string.Empty;
                     bool   useWordNotLabeled = false;
                     bool   doNotUse          = false;
                     //find if the neighboring words are labeled - if so - club them together
                     if (constVars.clubNeighboringLabeledWords)
                     {
                         for (int i = s - 1; i >= 0; i--)
                         {
                             if (!sent[i].Get(constVars.GetAnswerClass()[label]).Equals(label))
                             {
                                 s = i + 1;
                                 break;
                             }
                         }
                         for (int i_1 = e; i_1 < sent.Count; i_1++)
                         {
                             if (!sent[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label))
                             {
                                 e = i_1;
                                 break;
                             }
                         }
                     }
                     //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                     bool[] addedindices = new bool[e - s];
                     // Arrays.fill(addedindices, false); // not needed as initialized false
                     for (int i_2 = s; i_2 < e; i_2++)
                     {
                         CoreLabel l = sent[i_2];
                         l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                         if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)) || l.Get(typeof(PatternsAnnotations.MatchedPatterns)) == null)
                         {
                             l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                         }
                         SurfacePattern pSur = (SurfacePattern)pEn.Value;
                         System.Diagnostics.Debug.Assert(pSur != null, "Why is " + pEn.Value + " not present in the index?!");
                         System.Diagnostics.Debug.Assert(l.Get(typeof(PatternsAnnotations.MatchedPatterns)) != null, "How come MatchedPatterns class is null for the token. The classes in the key set are " + l.KeySet());
                         l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(pSur);
                         foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                         {
                             if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                             {
                                 doNotUse = true;
                             }
                         }
                         bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                         if (removePhrasesWithStopWords && containsStop)
                         {
                             doNotUse = true;
                         }
                         else
                         {
                             if (!containsStop || !removeStopWordsFromSelectedPhrases)
                             {
                                 if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label))
                                 {
                                     useWordNotLabeled = true;
                                 }
                                 phrase               += " " + l.Word();
                                 phraseLemma          += " " + l.Lemma();
                                 addedindices[i_2 - s] = true;
                             }
                         }
                     }
                     for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                     {
                         if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                         {
                             doNotUse = true;
                             break;
                         }
                     }
                     if (!doNotUse)
                     {
                         matchedTokensByPat.Add(pEn.Value, new Triple <string, int, int>(sentid, s, e - 1));
                         phrase = phrase.Trim();
                         if (!phrase.IsEmpty())
                         {
                             phraseLemma = phraseLemma.Trim();
                             CandidatePhrase candPhrase = CandidatePhrase.CreateOrGet(phrase, phraseLemma);
                             allFreq.IncrementCount(candPhrase, pEn.Value, 1.0);
                             if (!useWordNotLabeled)
                             {
                                 alreadyLabeledPhrases.Add(candPhrase);
                             }
                         }
                     }
                 }
             }
         }
         return(new Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> >(allFreq, matchedTokensByPat, alreadyLabeledPhrases));
     }
     catch (Exception e)
     {
         logger.Error(e);
         throw;
     }
 }

Example #9

Show file

File: ApplyDepPatterns.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public virtual Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > > Call()
        {
            // CollectionValuedMap<String, Integer> tokensMatchedPattern = new
            // CollectionValuedMap<String, Integer>();
            TwoDimensionalCounter <CandidatePhrase, E>          allFreq            = new TwoDimensionalCounter <CandidatePhrase, E>();
            CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();

            foreach (string sentid in sentids)
            {
                DataInstance      sent   = sents[sentid];
                IList <CoreLabel> tokens = sent.GetTokens();
                foreach (KeyValuePair <SemgrexPattern, E> pEn in patterns)
                {
                    if (pEn.Key == null)
                    {
                        throw new Exception("why is the pattern " + pEn + " null?");
                    }
                    SemanticGraph graph = ((DataInstanceDep)sent).GetGraph();
                    //SemgrexMatcher m = pEn.getKey().matcher(graph);
                    //TokenSequenceMatcher m = pEn.getKey().matcher(sent);
                    //        //Setting this find type can save time in searching - greedy and reluctant quantifiers are not enforced
                    //        m.setFindType(SequenceMatcher.FindType.FIND_ALL);
                    //Higher branch values makes the faster but uses more memory
                    //m.setBranchLimit(5);
                    ICollection <ExtractedPhrase> matched = GetMatchedTokensIndex(graph, pEn.Key, sent, label);
                    foreach (ExtractedPhrase match in matched)
                    {
                        int    s                 = match.startIndex;
                        int    e                 = match.endIndex + 1;
                        string phrase            = string.Empty;
                        string phraseLemma       = string.Empty;
                        bool   useWordNotLabeled = false;
                        bool   doNotUse          = false;
                        //find if the neighboring words are labeled - if so - club them together
                        if (constVars.clubNeighboringLabeledWords)
                        {
                            for (int i = s - 1; i >= 0; i--)
                            {
                                if (tokens[i].Get(constVars.GetAnswerClass()[label]).Equals(label) && (e - i + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    s = i;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing earlier word. new s is " + s);
                                    break;
                                }
                            }
                            for (int i_1 = e; i_1 < tokens.Count; i_1++)
                            {
                                if (tokens[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label) && (i_1 - s + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    e = i_1;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing next word. new e is " + e);
                                    break;
                                }
                            }
                        }
                        //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                        bool[] addedindices = new bool[e - s];
                        // Arrays.fill(addedindices, false); // get for free on array initialization
                        for (int i_2 = s; i_2 < e; i_2++)
                        {
                            CoreLabel l = tokens[i_2];
                            l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                            if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)) || l.Get(typeof(PatternsAnnotations.MatchedPatterns)) == null)
                            {
                                l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                            }
                            Pattern pSur = pEn.Value;
                            System.Diagnostics.Debug.Assert(pSur != null, "Why is " + pEn.Value + " not present in the index?!");
                            System.Diagnostics.Debug.Assert(l.Get(typeof(PatternsAnnotations.MatchedPatterns)) != null, "How come MatchedPatterns class is null for the token. The classes in the key set are " + l.KeySet());
                            l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(pSur);
                            foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                            {
                                if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                                {
                                    doNotUse = true;
                                }
                            }
                            bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                            if (removePhrasesWithStopWords && containsStop)
                            {
                                doNotUse = true;
                            }
                            else
                            {
                                if (!containsStop || !removeStopWordsFromSelectedPhrases)
                                {
                                    if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label))
                                    {
                                        useWordNotLabeled = true;
                                    }
                                    phrase               += " " + l.Word();
                                    phraseLemma          += " " + l.Lemma();
                                    addedindices[i_2 - s] = true;
                                }
                            }
                        }
                        for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                        {
                            if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                            {
                                doNotUse = true;
                                break;
                            }
                        }
                        if (!doNotUse && useWordNotLabeled)
                        {
                            matchedTokensByPat.Add(pEn.Value, new Triple <string, int, int>(sentid, s, e - 1));
                            if (useWordNotLabeled)
                            {
                                phrase      = phrase.Trim();
                                phraseLemma = phraseLemma.Trim();
                                allFreq.IncrementCount(CandidatePhrase.CreateOrGet(phrase, phraseLemma, match.GetFeatures()), pEn.Value, 1.0);
                            }
                        }
                    }
                }
            }
            return(new Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > >(allFreq, matchedTokensByPat));
        }

Example #10

Show file

        /// <summary>Take a dataset Annotation, generate their parse trees and identify syntactic heads (and head spans, if necessary)</summary>
        public virtual void PreProcessSentences(Annotation dataset)
        {
            logger.Severe("GenericDataSetReader: Started pre-processing the corpus...");
            // run the processor, i.e., NER, parse etc.
            if (processor != null)
            {
                // we might already have syntactic annotation from offline files
                IList <ICoreMap> sentences = dataset.Get(typeof(CoreAnnotations.SentencesAnnotation));
                if (sentences.Count > 0 && !sentences[0].ContainsKey(typeof(TreeCoreAnnotations.TreeAnnotation)))
                {
                    logger.Info("Annotating dataset with " + processor);
                    processor.Annotate(dataset);
                }
                else
                {
                    logger.Info("Found existing syntactic annotations. Will not use the NLP processor.");
                }
            }

            /*
             * List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
             * for(int i = 0; i < sentences.size(); i ++){
             * CoreMap sent = sentences.get(i);
             * List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
             * logger.info("Tokens for sentence #" + i + ": " + tokens);
             * logger.info("Parse tree for sentence #" + i + ": " + sent.get(TreeCoreAnnotations.TreeAnnotation.class).pennString());
             * }
             */
            IList <ICoreMap> sentences_1 = dataset.Get(typeof(CoreAnnotations.SentencesAnnotation));

            logger.Fine("Extracted " + sentences_1.Count + " sentences.");
            foreach (ICoreMap sentence in sentences_1)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                logger.Fine("Processing sentence " + tokens);
                Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                if (tree == null)
                {
                    throw new Exception("ERROR: MR requires full syntactic analysis!");
                }
                // convert tree labels to CoreLabel if necessary
                // we need this because we store additional info in the CoreLabel, such as the spans of each tree
                ConvertToCoreLabels(tree);
                // store the tree spans, if not present already
                CoreLabel l = (CoreLabel)tree.Label();
                if (forceGenerationOfIndexSpans || (!l.ContainsKey(typeof(CoreAnnotations.BeginIndexAnnotation)) && !l.ContainsKey(typeof(CoreAnnotations.EndIndexAnnotation))))
                {
                    tree.IndexSpans(0);
                    logger.Fine("Index spans were generated.");
                }
                else
                {
                    logger.Fine("Index spans were NOT generated.");
                }
                logger.Fine("Parse tree using CoreLabel:\n" + tree.PennString());
                //
                // now match all entity mentions against the syntactic tree
                //
                if (sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)) != null)
                {
                    foreach (EntityMention ent in sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)))
                    {
                        logger.Fine("Finding head for entity: " + ent);
                        int headPos = AssignSyntacticHead(ent, tree, tokens, calculateHeadSpan);
                        logger.Fine("Syntactic head of mention \"" + ent + "\" is: " + tokens[headPos].Word());
                        System.Diagnostics.Debug.Assert((ent.GetExtent() != null));
                        System.Diagnostics.Debug.Assert((ent.GetHead() != null));
                        System.Diagnostics.Debug.Assert((ent.GetSyntacticHeadTokenPosition() >= 0));
                    }
                }
            }
            logger.Severe("GenericDataSetReader: Pre-processing complete.");
        }

Example #11

Show file

File: ApplyPatternsMulti.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public virtual Pair <TwoDimensionalCounter <Pair <string, string>, E>, CollectionValuedMap <E, Triple <string, int, int> > > Call()
        {
            //CollectionValuedMap<String, Integer> tokensMatchedPattern = new CollectionValuedMap<String, Integer>();
            CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();
            TwoDimensionalCounter <Pair <string, string>, E>    allFreq            = new TwoDimensionalCounter <Pair <string, string>, E>();

            foreach (string sentid in sentids)
            {
                IList <CoreLabel> sent = sents[sentid].GetTokens();
                //FIND_ALL is faster than FIND_NONOVERLAP
                IEnumerable <ISequenceMatchResult <ICoreMap> > matched = multiPatternMatcher.Find(sent, SequenceMatcher.FindType.FindAll);
                foreach (ISequenceMatchResult <ICoreMap> m in matched)
                {
                    int s          = m.Start("$term");
                    int e          = m.End("$term");
                    E   matchedPat = patterns[m.Pattern()];
                    matchedTokensByPat.Add(matchedPat, new Triple <string, int, int>(sentid, s, e));
                    string phrase            = string.Empty;
                    string phraseLemma       = string.Empty;
                    bool   useWordNotLabeled = false;
                    bool   doNotUse          = false;
                    //find if the neighboring words are labeled - if so - club them together
                    if (constVars.clubNeighboringLabeledWords)
                    {
                        for (int i = s - 1; i >= 0; i--)
                        {
                            if (!sent[i].Get(constVars.GetAnswerClass()[label]).Equals(label))
                            {
                                s = i + 1;
                                break;
                            }
                        }
                        for (int i_1 = e; i_1 < sent.Count; i_1++)
                        {
                            if (!sent[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label))
                            {
                                e = i_1;
                                break;
                            }
                        }
                    }
                    //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                    bool[] addedindices = new bool[e - s];
                    // Arrays.fill(addedindices, false); // unneeded as done on initialization
                    for (int i_2 = s; i_2 < e; i_2++)
                    {
                        CoreLabel l = sent[i_2];
                        l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                        if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)))
                        {
                            l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                        }
                        l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(matchedPat);
                        // if (restrictToMatched) {
                        // tokensMatchedPattern.add(sentid, i);
                        // }
                        foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                        {
                            if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                            {
                                doNotUse = true;
                            }
                        }
                        bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                        if (removePhrasesWithStopWords && containsStop)
                        {
                            doNotUse = true;
                        }
                        else
                        {
                            if (!containsStop || !removeStopWordsFromSelectedPhrases)
                            {
                                if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label.ToString()))
                                {
                                    useWordNotLabeled = true;
                                }
                                phrase               += " " + l.Word();
                                phraseLemma          += " " + l.Lemma();
                                addedindices[i_2 - s] = true;
                            }
                        }
                    }
                    for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                    {
                        if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                        {
                            doNotUse = true;
                            break;
                        }
                    }
                    if (!doNotUse && useWordNotLabeled)
                    {
                        phrase      = phrase.Trim();
                        phraseLemma = phraseLemma.Trim();
                        allFreq.IncrementCount(new Pair <string, string>(phrase, phraseLemma), matchedPat, 1.0);
                    }
                }
            }
            //      for (SurfacePattern pat : patterns.keySet()) {
            //        String patternStr = pat.toString();
            //
            //        TokenSequencePattern p = TokenSequencePattern.compile(constVars.env.get(label), patternStr);
            //        if (pat == null || p == null)
            //          throw new RuntimeException("why is the pattern " + pat + " null?");
            //
            //        TokenSequenceMatcher m = p.getMatcher(sent);
            //        while (m.find()) {
            //
            //          int s = m.start("$term");
            //          int e = m.end("$term");
            //
            //          String phrase = "";
            //          String phraseLemma = "";
            //          boolean useWordNotLabeled = false;
            //          boolean doNotUse = false;
            //          for (int i = s; i < e; i++) {
            //            CoreLabel l = sent.get(i);
            //            l.set(PatternsAnnotations.MatchedPattern.class, true);
            //            if (restrictToMatched) {
            //              tokensMatchedPattern.add(sentid, i);
            //            }
            //            for (Entry<Class, Object> ig : constVars.ignoreWordswithClassesDuringSelection.get(label).entrySet()) {
            //              if (l.containsKey(ig.getKey()) && l.get(ig.getKey()).equals(ig.getValue())) {
            //                doNotUse = true;
            //              }
            //            }
            //            boolean containsStop = containsStopWord(l, constVars.getCommonEngWords(), constVars.ignoreWordRegex, ignoreWords);
            //            if (removePhrasesWithStopWords && containsStop) {
            //              doNotUse = true;
            //            } else {
            //              if (!containsStop || !removeStopWordsFromSelectedPhrases) {
            //
            //                if (label == null || l.get(constVars.answerClass.get(label)) == null || !l.get(constVars.answerClass.get(label)).equals(label.toString())) {
            //                  useWordNotLabeled = true;
            //                }
            //                phrase += " " + l.word();
            //                phraseLemma += " " + l.lemma();
            //
            //              }
            //            }
            //          }
            //          if (!doNotUse && useWordNotLabeled) {
            //            phrase = phrase.trim();
            //            phraseLemma = phraseLemma.trim();
            //            allFreq.incrementCount(new Pair<String, String>(phrase, phraseLemma), pat, 1.0);
            //          }
            //        }
            //      }
            return(new Pair <TwoDimensionalCounter <Pair <string, string>, E>, CollectionValuedMap <E, Triple <string, int, int> > >(allFreq, matchedTokensByPat));
        }