protected internal static void GetRelevantWordsBase(Token t, CollectionValuedMap <string, string> relWords) { if (t != null) { IDictionary <string, string> str = t.ClassORRestrictionsAsString(); if (str != null) { relWords.AddAll(str); } } }
private void RunParallelApplyPats(IDictionary <string, DataInstance> sents, string label, E pattern, TwoDimensionalCounter <CandidatePhrase, E> wordsandLemmaPatExtracted, CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat, ICollection <CandidatePhrase> alreadyLabeledWords) { Redwood.Log(Redwood.Dbg, "Applying pattern " + pattern + " to a total of " + sents.Count + " sentences "); IList <string> notAllowedClasses = new List <string>(); IList <string> sentids = CollectionUtils.ToList(sents.Keys); if (constVars.doNotExtractPhraseAnyWordLabeledOtherClass) { foreach (string l in constVars.GetAnswerClass().Keys) { if (!l.Equals(label)) { notAllowedClasses.Add(l); } } notAllowedClasses.Add("OTHERSEM"); } IDictionary <TokenSequencePattern, E> surfacePatternsLearnedThisIterConverted = null; IDictionary <SemgrexPattern, E> depPatternsLearnedThisIterConverted = null; if (constVars.patternType.Equals(PatternFactory.PatternType.Surface)) { surfacePatternsLearnedThisIterConverted = new Dictionary <TokenSequencePattern, E>(); string patternStr = null; try { patternStr = pattern.ToString(notAllowedClasses); TokenSequencePattern pat = ((TokenSequencePattern)TokenSequencePattern.Compile(constVars.env[label], patternStr)); surfacePatternsLearnedThisIterConverted[pat] = pattern; } catch (Exception e) { log.Info("Error applying pattern " + patternStr + ". Probably an ill formed pattern (can be because of special symbols in label names). Contact the software developer."); throw; } } else { if (constVars.patternType.Equals(PatternFactory.PatternType.Dep)) { depPatternsLearnedThisIterConverted = new Dictionary <SemgrexPattern, E>(); SemgrexPattern pat = SemgrexPattern.Compile(pattern.ToString(notAllowedClasses), new Env(constVars.env[label].GetVariables())); depPatternsLearnedThisIterConverted[pat] = pattern; } else { throw new NotSupportedException(); } } //Apply the patterns and extract candidate phrases int num; int numThreads = constVars.numThreads; //If number of sentences is less, do not create so many threads if (sents.Count < 50) { numThreads = 1; } if (numThreads == 1) { num = sents.Count; } else { num = sents.Count / (numThreads - 1); } IExecutorService executor = Executors.NewFixedThreadPool(constVars.numThreads); IList <IFuture <Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > > > list = new List <IFuture <Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E , Triple <string, int, int> >, ICollection <CandidatePhrase> > > >(); for (int i = 0; i < numThreads; i++) { ICallable <Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > > task = null; if (pattern.type.Equals(PatternFactory.PatternType.Surface)) { //Redwood.log(Redwood.DBG, "Applying pats: assigning sentences " + i*num + " to " +Math.min(sentids.size(), (i + 1) * num) + " to thread " + (i+1)); task = new ApplyPatterns(sents, num == sents.Count ? sentids : sentids.SubList(i * num, Math.Min(sentids.Count, (i + 1) * num)), surfacePatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords , constVars); } else { task = new ApplyDepPatterns(sents, num == sents.Count ? sentids : sentids.SubList(i * num, Math.Min(sentids.Count, (i + 1) * num)), depPatternsLearnedThisIterConverted, label, constVars.removeStopWordsFromSelectedPhrases, constVars.removePhrasesWithStopWords , constVars); } IFuture <Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > > submit = executor.Submit(task); list.Add(submit); } // Now retrieve the result foreach (IFuture <Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > > future in list) { try { Triple <TwoDimensionalCounter <CandidatePhrase, E>, CollectionValuedMap <E, Triple <string, int, int> >, ICollection <CandidatePhrase> > result = future.Get(); Redwood.Log(ConstantsAndVariables.extremedebug, "Pattern " + pattern + " extracted phrases " + result.First()); wordsandLemmaPatExtracted.AddAll(result.First()); matchedTokensByPat.AddAll(result.Second()); Sharpen.Collections.AddAll(alreadyLabeledWords, result.Third()); } catch (Exception e) { executor.ShutdownNow(); throw new Exception(e); } } executor.Shutdown(); }