Exemplo n.º 1
0
 /*
  * void runParallelApplyPats(Map<String, List<CoreLabel>> sents, Set<String> sentIds, String label, Counter<E> patternsLearnedThisIter,  TwoDimensionalCounter<Pair<String, String>, Integer> wordsandLemmaPatExtracted,
  * CollectionValuedMap<Integer, Triple<String, Integer, Integer>> matchedTokensByPat) throws InterruptedException, ExecutionException {
  * List<String> keyset = new ArrayList<String>(sentIds);
  * List<String> notAllowedClasses = new ArrayList<String>();
  *
  * if(constVars.doNotExtractPhraseAnyWordLabeledOtherClass){
  * for(String l: constVars.getAnswerClass().keySet()){
  * if(!l.equals(label)){
  * notAllowedClasses.add(l+":"+l);
  * }
  * }
  * notAllowedClasses.add("OTHERSEM:OTHERSEM");
  * }
  *
  * //Apply the patterns and extract candidate phrases
  * int num = 0;
  * if (constVars.numThreads == 1)
  * num = keyset.size();
  * else
  * num = keyset.size() / (constVars.numThreads - 1);
  * ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
  * List<Future<Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>>>> list = new ArrayList<Future<Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>>>>();
  * for (int i = 0; i < constVars.numThreads; i++) {
  *
  * Callable<Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>>> task = null;
  * Map<TokenSequencePattern, Integer> patternsLearnedThisIterConverted = new HashMap<TokenSequencePattern , Integer>();
  * for (Integer pindex : patternsLearnedThisIter.keySet()){
  * SurfacePattern p = constVars.getPatternIndex().get(pindex);
  * TokenSequencePattern pat = TokenSequencePattern.compile(constVars.env.get(label), p.toString(notAllowedClasses));
  * patternsLearnedThisIterConverted.put(pat, pindex);
  * }
  *
  * task = new ApplyPatternsMulti(sents, keyset.subList(i * num,
  * Math.min(keyset.size(), (i + 1) * num)), patternsLearnedThisIterConverted, label,
  * constVars.removeStopWordsFromSelectedPhrases,
  * constVars.removePhrasesWithStopWords, constVars);
  *
  * Future<Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>>> submit = executor
  * .submit(task);
  * list.add(submit);
  * }
  *
  * // Now retrieve the result
  * for (Future<Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>>> future : list) {
  * try{
  * Pair<TwoDimensionalCounter<Pair<String, String>, Integer>, CollectionValuedMap<Integer, Triple<String, Integer, Integer>>> result = future
  * .get();
  * wordsandLemmaPatExtracted.addAll(result.first());
  * matchedTokensByPat.addAll(result.second());
  * }catch(Exception e){
  * executor.shutdownNow();
  * throw new RuntimeException(e);
  * }
  * }
  * executor.shutdown();
  * }
  */
 protected internal virtual IDictionary <E, IDictionary <string, DataInstance> > GetSentences(IDictionary <E, ICollection <string> > sentids)
 {
     try
     {
         ICollection <File> files = new HashSet <File>();
         IDictionary <E, IDictionary <string, DataInstance> > sentsAll = new Dictionary <E, IDictionary <string, DataInstance> >();
         CollectionValuedMap <string, E> sentIds2Pats = new CollectionValuedMap <string, E>();
         foreach (KeyValuePair <E, ICollection <string> > setEn in sentids)
         {
             if (!sentsAll.Contains(setEn.Key))
             {
                 sentsAll[setEn.Key] = new Dictionary <string, DataInstance>();
             }
             foreach (string s in setEn.Value)
             {
                 sentIds2Pats.Add(s, setEn.Key);
                 if (constVars.batchProcessSents)
                 {
                     File f = Data.sentId2File[s];
                     System.Diagnostics.Debug.Assert(f != null, "How come no file for sentence " + s);
                     files.Add(f);
                 }
             }
         }
         if (constVars.batchProcessSents)
         {
             foreach (File f in files)
             {
                 IDictionary <string, DataInstance> sentsf = IOUtils.ReadObjectFromFile(f);
                 foreach (KeyValuePair <string, DataInstance> s in sentsf)
                 {
                     foreach (E pat in sentIds2Pats[s.Key])
                     {
                         sentsAll[pat][s.Key] = s.Value;
                     }
                 }
             }
         }
         else
         {
             foreach (KeyValuePair <string, DataInstance> s in Data.sents)
             {
                 foreach (E pat in sentIds2Pats[s.Key])
                 {
                     sentsAll[pat][s.Key] = s.Value;
                 }
             }
         }
         //      /System.out.println("All sentences are " + sentsAll.entrySet().stream().map( x -> constVars.patternIndex.get(x.getKey())+":"+x.getValue()).collect(Collectors.toList()));
         return(sentsAll);
     }
     catch (TypeLoadException e)
     {
         throw new Exception(e);
     }
     catch (IOException e1)
     {
         throw new Exception(e1);
     }
 }
Exemplo n.º 2
0
 /// <summary>Add the given sentence to the statistics counted.</summary>
 /// <remarks>
 /// Add the given sentence to the statistics counted.  Can
 /// be called multiple times with different sentences.
 /// </remarks>
 public virtual void Train(IList <TaggedWord> sentence, double weight)
 {
     featExtractor.Train(sentence, weight);
     foreach (TaggedWord word in sentence)
     {
         datumCounter.IncrementCount(word, weight);
         tagsForWord.Add(word.Word(), word.Tag());
     }
 }
 protected internal virtual void AddInstances(ICollection <RVFDatum <K, V> > datums)
 {
     foreach (RVFDatum <K, V> datum in datums)
     {
         K            label = datum.Label();
         ICounter <V> vec   = datum.AsFeaturesCounter();
         instances.Add(label, vec);
         classLookup[vec] = label;
     }
 }
Exemplo n.º 4
0
 /// <summary>Mark twin mentions: All mention boundaries should be matched</summary>
 private void FindTwinMentionsStrict()
 {
     for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.Count; sentNum++)
     {
         IList <Mention> golds    = goldOrderedMentionsBySentence[sentNum];
         IList <Mention> predicts = predictedOrderedMentionsBySentence[sentNum];
         // For CoNLL training there are some documents with gold mentions with the same position offsets
         // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll
         //  (Packwood - Roth)
         CollectionValuedMap <IntPair, Mention> goldMentionPositions = new CollectionValuedMap <IntPair, Mention>();
         foreach (Mention g in golds)
         {
             IntPair ip = new IntPair(g.startIndex, g.endIndex);
             if (goldMentionPositions.Contains(ip))
             {
                 StringBuilder existingMentions = new StringBuilder();
                 foreach (Mention eg in goldMentionPositions[ip])
                 {
                     if (existingMentions.Length > 0)
                     {
                         existingMentions.Append(",");
                     }
                     existingMentions.Append(eg.mentionID);
                 }
                 SieveCoreferenceSystem.logger.Warning("WARNING: gold mentions with the same offsets: " + ip + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.SpanToString());
             }
             //assert(!goldMentionPositions.containsKey(ip));
             goldMentionPositions.Add(new IntPair(g.startIndex, g.endIndex), g);
         }
         foreach (Mention p in predicts)
         {
             IntPair pos = new IntPair(p.startIndex, p.endIndex);
             if (goldMentionPositions.Contains(pos))
             {
                 ICollection <Mention> cm = goldMentionPositions[pos];
                 Mention g_1 = cm.GetEnumerator().Current;
                 cm.Remove(g_1);
                 p.mentionID  = g_1.mentionID;
                 p.twinless   = false;
                 g_1.twinless = false;
             }
         }
         // temp: for making easy to recognize twinless mention
         foreach (Mention p_1 in predicts)
         {
             if (p_1.twinless)
             {
                 p_1.mentionID += 10000;
             }
         }
     }
 }
        private RVFDatum <string, string> GetDatum(CoreLabel[] sent, int i)
        {
            ICounter <string> feat = new ClassicCounter <string>();
            CoreLabel         l    = sent[i];
            string            label;

            if (l.Get(answerClass).ToString().Equals(answerLabel))
            {
                label = answerLabel;
            }
            else
            {
                label = "O";
            }
            CollectionValuedMap <string, CandidatePhrase> matchedPhrases = l.Get(typeof(PatternsAnnotations.MatchedPhrases));

            if (matchedPhrases == null)
            {
                matchedPhrases = new CollectionValuedMap <string, CandidatePhrase>();
                matchedPhrases.Add(label, CandidatePhrase.CreateOrGet(l.Word()));
            }
            foreach (CandidatePhrase w in matchedPhrases.AllValues())
            {
                int num = this.clusterIds[w.GetPhrase()];
                if (num == null)
                {
                    num = -1;
                }
                feat.SetCount("Cluster-" + num, 1.0);
            }
            // feat.incrementCount("WORD-" + l.word());
            // feat.incrementCount("LEMMA-" + l.lemma());
            // feat.incrementCount("TAG-" + l.tag());
            int window = 0;

            for (int j = Math.Max(0, i - window); j < i; j++)
            {
                CoreLabel lj = sent[j];
                feat.IncrementCount("PREV-" + "WORD-" + lj.Word());
                feat.IncrementCount("PREV-" + "LEMMA-" + lj.Lemma());
                feat.IncrementCount("PREV-" + "TAG-" + lj.Tag());
            }
            for (int j_1 = i + 1; j_1 < sent.Length && j_1 <= i + window; j_1++)
            {
                CoreLabel lj = sent[j_1];
                feat.IncrementCount("NEXT-" + "WORD-" + lj.Word());
                feat.IncrementCount("NEXT-" + "LEMMA-" + lj.Lemma());
                feat.IncrementCount("NEXT-" + "TAG-" + lj.Tag());
            }
            // System.out.println("adding " + l.word() + " as " + label);
            return(new RVFDatum <string, string>(feat, label));
        }
        public virtual CollectionValuedMap <string, JollyDayHolidays.JollyHoliday> GetAllHolidaysCVMap(ICollection <Holiday> allHolidays)
        {
            CollectionValuedMap <string, JollyDayHolidays.JollyHoliday> map = new CollectionValuedMap <string, JollyDayHolidays.JollyHoliday>();

            foreach (Holiday h in allHolidays)
            {
                string descKey = h.GetDescriptionPropertiesKey();
                if (descKey != null)
                {
                    descKey = descKey.ReplaceAll(".*\\.", string.Empty);
                    JollyDayHolidays.JollyHoliday jh = new JollyDayHolidays.JollyHoliday(descKey, holidayManager, h);
                    map.Add(jh.label, jh);
                }
            }
            return(map);
        }
 public virtual void SetUp()
 {
     System.Diagnostics.Debug.Assert((wordClassClusterFile != null));
     if (wordClassClusterFile != null)
     {
         foreach (string line in IOUtils.ReadLines(wordClassClusterFile))
         {
             string[] t   = line.Split("\\s+");
             int      num = System.Convert.ToInt32(t[1]);
             clusterIds[t[0]] = num;
             clusters.Add(num, t[0]);
         }
     }
     if (negativeWordsFiles != null)
     {
         foreach (string file in negativeWordsFiles.Split("[,;]"))
         {
             Sharpen.Collections.AddAll(negativeWords, IOUtils.LinesFromFile(file));
         }
         System.Console.Out.WriteLine("number of negative words from lists " + negativeWords.Count);
     }
 }
 /// <exception cref="System.Exception"/>
 public virtual Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > Call()
 {
     // CollectionValuedMap<String, Integer> tokensMatchedPattern = new
     // CollectionValuedMap<String, Integer>();
     try
     {
         ICollection <CandidatePhrase> alreadyLabeledPhrases                    = new HashSet <CandidatePhrase>();
         TwoDimensionalCounter <CandidatePhrase, E>          allFreq            = new TwoDimensionalCounter <CandidatePhrase, E>();
         CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();
         foreach (string sentid in sentids)
         {
             IList <CoreLabel> sent = sents[sentid].GetTokens();
             foreach (KeyValuePair <TokenSequencePattern, E> pEn in patterns)
             {
                 if (pEn.Key == null)
                 {
                     throw new Exception("why is the pattern " + pEn + " null?");
                 }
                 TokenSequenceMatcher m = ((TokenSequenceMatcher)pEn.Key.GetMatcher(sent));
                 //        //Setting this find type can save time in searching - greedy and reluctant quantifiers are not enforced
                 //        m.setFindType(SequenceMatcher.FindType.FIND_ALL);
                 //Higher branch values makes the faster but uses more memory
                 m.SetBranchLimit(5);
                 while (m.Find())
                 {
                     int s = m.Start("$term");
                     int e = m.End("$term");
                     System.Diagnostics.Debug.Assert(e - s <= PatternFactory.numWordsCompoundMapped[label], "How come the pattern " + pEn.Key + " is extracting phrases longer than numWordsCompound of " + PatternFactory.numWordsCompoundMapped[label] + " for label "
                                                     + label);
                     string phrase            = string.Empty;
                     string phraseLemma       = string.Empty;
                     bool   useWordNotLabeled = false;
                     bool   doNotUse          = false;
                     //find if the neighboring words are labeled - if so - club them together
                     if (constVars.clubNeighboringLabeledWords)
                     {
                         for (int i = s - 1; i >= 0; i--)
                         {
                             if (!sent[i].Get(constVars.GetAnswerClass()[label]).Equals(label))
                             {
                                 s = i + 1;
                                 break;
                             }
                         }
                         for (int i_1 = e; i_1 < sent.Count; i_1++)
                         {
                             if (!sent[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label))
                             {
                                 e = i_1;
                                 break;
                             }
                         }
                     }
                     //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                     bool[] addedindices = new bool[e - s];
                     // Arrays.fill(addedindices, false); // not needed as initialized false
                     for (int i_2 = s; i_2 < e; i_2++)
                     {
                         CoreLabel l = sent[i_2];
                         l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                         if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)) || l.Get(typeof(PatternsAnnotations.MatchedPatterns)) == null)
                         {
                             l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                         }
                         SurfacePattern pSur = (SurfacePattern)pEn.Value;
                         System.Diagnostics.Debug.Assert(pSur != null, "Why is " + pEn.Value + " not present in the index?!");
                         System.Diagnostics.Debug.Assert(l.Get(typeof(PatternsAnnotations.MatchedPatterns)) != null, "How come MatchedPatterns class is null for the token. The classes in the key set are " + l.KeySet());
                         l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(pSur);
                         foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                         {
                             if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                             {
                                 doNotUse = true;
                             }
                         }
                         bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                         if (removePhrasesWithStopWords && containsStop)
                         {
                             doNotUse = true;
                         }
                         else
                         {
                             if (!containsStop || !removeStopWordsFromSelectedPhrases)
                             {
                                 if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label))
                                 {
                                     useWordNotLabeled = true;
                                 }
                                 phrase               += " " + l.Word();
                                 phraseLemma          += " " + l.Lemma();
                                 addedindices[i_2 - s] = true;
                             }
                         }
                     }
                     for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                     {
                         if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                         {
                             doNotUse = true;
                             break;
                         }
                     }
                     if (!doNotUse)
                     {
                         matchedTokensByPat.Add(pEn.Value, new Triple <string, int, int>(sentid, s, e - 1));
                         phrase = phrase.Trim();
                         if (!phrase.IsEmpty())
                         {
                             phraseLemma = phraseLemma.Trim();
                             CandidatePhrase candPhrase = CandidatePhrase.CreateOrGet(phrase, phraseLemma);
                             allFreq.IncrementCount(candPhrase, pEn.Value, 1.0);
                             if (!useWordNotLabeled)
                             {
                                 alreadyLabeledPhrases.Add(candPhrase);
                             }
                         }
                     }
                 }
             }
         }
         return(new Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> >(allFreq, matchedTokensByPat, alreadyLabeledPhrases));
     }
     catch (Exception e)
     {
         logger.Error(e);
         throw;
     }
 }
        /// <exception cref="System.Exception"/>
        public virtual Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > > Call()
        {
            // CollectionValuedMap<String, Integer> tokensMatchedPattern = new
            // CollectionValuedMap<String, Integer>();
            TwoDimensionalCounter <CandidatePhrase, E>          allFreq            = new TwoDimensionalCounter <CandidatePhrase, E>();
            CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();

            foreach (string sentid in sentids)
            {
                DataInstance      sent   = sents[sentid];
                IList <CoreLabel> tokens = sent.GetTokens();
                foreach (KeyValuePair <SemgrexPattern, E> pEn in patterns)
                {
                    if (pEn.Key == null)
                    {
                        throw new Exception("why is the pattern " + pEn + " null?");
                    }
                    SemanticGraph graph = ((DataInstanceDep)sent).GetGraph();
                    //SemgrexMatcher m = pEn.getKey().matcher(graph);
                    //TokenSequenceMatcher m = pEn.getKey().matcher(sent);
                    //        //Setting this find type can save time in searching - greedy and reluctant quantifiers are not enforced
                    //        m.setFindType(SequenceMatcher.FindType.FIND_ALL);
                    //Higher branch values makes the faster but uses more memory
                    //m.setBranchLimit(5);
                    ICollection <ExtractedPhrase> matched = GetMatchedTokensIndex(graph, pEn.Key, sent, label);
                    foreach (ExtractedPhrase match in matched)
                    {
                        int    s                 = match.startIndex;
                        int    e                 = match.endIndex + 1;
                        string phrase            = string.Empty;
                        string phraseLemma       = string.Empty;
                        bool   useWordNotLabeled = false;
                        bool   doNotUse          = false;
                        //find if the neighboring words are labeled - if so - club them together
                        if (constVars.clubNeighboringLabeledWords)
                        {
                            for (int i = s - 1; i >= 0; i--)
                            {
                                if (tokens[i].Get(constVars.GetAnswerClass()[label]).Equals(label) && (e - i + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    s = i;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing earlier word. new s is " + s);
                                    break;
                                }
                            }
                            for (int i_1 = e; i_1 < tokens.Count; i_1++)
                            {
                                if (tokens[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label) && (i_1 - s + 1) <= PatternFactory.numWordsCompoundMapped[label])
                                {
                                    e = i_1;
                                }
                                else
                                {
                                    //System.out.println("for phrase " + match + " clubbing next word. new e is " + e);
                                    break;
                                }
                            }
                        }
                        //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                        bool[] addedindices = new bool[e - s];
                        // Arrays.fill(addedindices, false); // get for free on array initialization
                        for (int i_2 = s; i_2 < e; i_2++)
                        {
                            CoreLabel l = tokens[i_2];
                            l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                            if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)) || l.Get(typeof(PatternsAnnotations.MatchedPatterns)) == null)
                            {
                                l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                            }
                            Pattern pSur = pEn.Value;
                            System.Diagnostics.Debug.Assert(pSur != null, "Why is " + pEn.Value + " not present in the index?!");
                            System.Diagnostics.Debug.Assert(l.Get(typeof(PatternsAnnotations.MatchedPatterns)) != null, "How come MatchedPatterns class is null for the token. The classes in the key set are " + l.KeySet());
                            l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(pSur);
                            foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                            {
                                if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                                {
                                    doNotUse = true;
                                }
                            }
                            bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                            if (removePhrasesWithStopWords && containsStop)
                            {
                                doNotUse = true;
                            }
                            else
                            {
                                if (!containsStop || !removeStopWordsFromSelectedPhrases)
                                {
                                    if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label))
                                    {
                                        useWordNotLabeled = true;
                                    }
                                    phrase               += " " + l.Word();
                                    phraseLemma          += " " + l.Lemma();
                                    addedindices[i_2 - s] = true;
                                }
                            }
                        }
                        for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                        {
                            if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                            {
                                doNotUse = true;
                                break;
                            }
                        }
                        if (!doNotUse && useWordNotLabeled)
                        {
                            matchedTokensByPat.Add(pEn.Value, new Triple <string, int, int>(sentid, s, e - 1));
                            if (useWordNotLabeled)
                            {
                                phrase      = phrase.Trim();
                                phraseLemma = phraseLemma.Trim();
                                allFreq.IncrementCount(CandidatePhrase.CreateOrGet(phrase, phraseLemma, match.GetFeatures()), pEn.Value, 1.0);
                            }
                        }
                    }
                }
            }
            return(new Pair <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> > >(allFreq, matchedTokensByPat));
        }
        /// <exception cref="System.Exception"/>
        public virtual Pair <TwoDimensionalCounter <Pair <string, string>, E>, CollectionValuedMap <E, Triple <string, int, int> > > Call()
        {
            //CollectionValuedMap<String, Integer> tokensMatchedPattern = new CollectionValuedMap<String, Integer>();
            CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat = new CollectionValuedMap <E, Triple <string, int, int> >();
            TwoDimensionalCounter <Pair <string, string>, E>    allFreq            = new TwoDimensionalCounter <Pair <string, string>, E>();

            foreach (string sentid in sentids)
            {
                IList <CoreLabel> sent = sents[sentid].GetTokens();
                //FIND_ALL is faster than FIND_NONOVERLAP
                IEnumerable <ISequenceMatchResult <ICoreMap> > matched = multiPatternMatcher.Find(sent, SequenceMatcher.FindType.FindAll);
                foreach (ISequenceMatchResult <ICoreMap> m in matched)
                {
                    int s          = m.Start("$term");
                    int e          = m.End("$term");
                    E   matchedPat = patterns[m.Pattern()];
                    matchedTokensByPat.Add(matchedPat, new Triple <string, int, int>(sentid, s, e));
                    string phrase            = string.Empty;
                    string phraseLemma       = string.Empty;
                    bool   useWordNotLabeled = false;
                    bool   doNotUse          = false;
                    //find if the neighboring words are labeled - if so - club them together
                    if (constVars.clubNeighboringLabeledWords)
                    {
                        for (int i = s - 1; i >= 0; i--)
                        {
                            if (!sent[i].Get(constVars.GetAnswerClass()[label]).Equals(label))
                            {
                                s = i + 1;
                                break;
                            }
                        }
                        for (int i_1 = e; i_1 < sent.Count; i_1++)
                        {
                            if (!sent[i_1].Get(constVars.GetAnswerClass()[label]).Equals(label))
                            {
                                e = i_1;
                                break;
                            }
                        }
                    }
                    //to make sure we discard phrases with stopwords in between, but include the ones in which stop words were removed at the ends if removeStopWordsFromSelectedPhrases is true
                    bool[] addedindices = new bool[e - s];
                    // Arrays.fill(addedindices, false); // unneeded as done on initialization
                    for (int i_2 = s; i_2 < e; i_2++)
                    {
                        CoreLabel l = sent[i_2];
                        l.Set(typeof(PatternsAnnotations.MatchedPattern), true);
                        if (!l.ContainsKey(typeof(PatternsAnnotations.MatchedPatterns)))
                        {
                            l.Set(typeof(PatternsAnnotations.MatchedPatterns), new HashSet <Pattern>());
                        }
                        l.Get(typeof(PatternsAnnotations.MatchedPatterns)).Add(matchedPat);
                        // if (restrictToMatched) {
                        // tokensMatchedPattern.add(sentid, i);
                        // }
                        foreach (KeyValuePair <Type, object> ig in constVars.GetIgnoreWordswithClassesDuringSelection()[label])
                        {
                            if (l.ContainsKey(ig.Key) && l.Get(ig.Key).Equals(ig.Value))
                            {
                                doNotUse = true;
                            }
                        }
                        bool containsStop = ContainsStopWord(l, constVars.GetCommonEngWords(), PatternFactory.ignoreWordRegex);
                        if (removePhrasesWithStopWords && containsStop)
                        {
                            doNotUse = true;
                        }
                        else
                        {
                            if (!containsStop || !removeStopWordsFromSelectedPhrases)
                            {
                                if (label == null || l.Get(constVars.GetAnswerClass()[label]) == null || !l.Get(constVars.GetAnswerClass()[label]).Equals(label.ToString()))
                                {
                                    useWordNotLabeled = true;
                                }
                                phrase               += " " + l.Word();
                                phraseLemma          += " " + l.Lemma();
                                addedindices[i_2 - s] = true;
                            }
                        }
                    }
                    for (int i_3 = 0; i_3 < addedindices.Length; i_3++)
                    {
                        if (i_3 > 0 && i_3 < addedindices.Length - 1 && addedindices[i_3 - 1] == true && addedindices[i_3] == false && addedindices[i_3 + 1] == true)
                        {
                            doNotUse = true;
                            break;
                        }
                    }
                    if (!doNotUse && useWordNotLabeled)
                    {
                        phrase      = phrase.Trim();
                        phraseLemma = phraseLemma.Trim();
                        allFreq.IncrementCount(new Pair <string, string>(phrase, phraseLemma), matchedPat, 1.0);
                    }
                }
            }
            //      for (SurfacePattern pat : patterns.keySet()) {
            //        String patternStr = pat.toString();
            //
            //        TokenSequencePattern p = TokenSequencePattern.compile(constVars.env.get(label), patternStr);
            //        if (pat == null || p == null)
            //          throw new RuntimeException("why is the pattern " + pat + " null?");
            //
            //        TokenSequenceMatcher m = p.getMatcher(sent);
            //        while (m.find()) {
            //
            //          int s = m.start("$term");
            //          int e = m.end("$term");
            //
            //          String phrase = "";
            //          String phraseLemma = "";
            //          boolean useWordNotLabeled = false;
            //          boolean doNotUse = false;
            //          for (int i = s; i < e; i++) {
            //            CoreLabel l = sent.get(i);
            //            l.set(PatternsAnnotations.MatchedPattern.class, true);
            //            if (restrictToMatched) {
            //              tokensMatchedPattern.add(sentid, i);
            //            }
            //            for (Entry<Class, Object> ig : constVars.ignoreWordswithClassesDuringSelection.get(label).entrySet()) {
            //              if (l.containsKey(ig.getKey()) && l.get(ig.getKey()).equals(ig.getValue())) {
            //                doNotUse = true;
            //              }
            //            }
            //            boolean containsStop = containsStopWord(l, constVars.getCommonEngWords(), constVars.ignoreWordRegex, ignoreWords);
            //            if (removePhrasesWithStopWords && containsStop) {
            //              doNotUse = true;
            //            } else {
            //              if (!containsStop || !removeStopWordsFromSelectedPhrases) {
            //
            //                if (label == null || l.get(constVars.answerClass.get(label)) == null || !l.get(constVars.answerClass.get(label)).equals(label.toString())) {
            //                  useWordNotLabeled = true;
            //                }
            //                phrase += " " + l.word();
            //                phraseLemma += " " + l.lemma();
            //
            //              }
            //            }
            //          }
            //          if (!doNotUse && useWordNotLabeled) {
            //            phrase = phrase.trim();
            //            phraseLemma = phraseLemma.trim();
            //            allFreq.incrementCount(new Pair<String, String>(phrase, phraseLemma), pat, 1.0);
            //          }
            //        }
            //      }
            return(new Pair <TwoDimensionalCounter <Pair <string, string>, E>, CollectionValuedMap <E, Triple <string, int, int> > >(allFreq, matchedTokensByPat));
        }