//line is a jsonstring of map of label to array of strings; ex: {"name":["Bush","Carter","Obama"]} /// <exception cref="System.Exception"/> public virtual string DoNewPhrases(string line) { System.Console.Out.WriteLine("adding new phrases"); ConstantsAndVariables constVars = new ConstantsAndVariables(props, humanLabelClasses.Keys, humanLabelClasses); IJsonReader jsonReader = Javax.Json.Json.CreateReader(new StringReader(line)); IJsonObject objarr = jsonReader.ReadObject(); foreach (KeyValuePair <string, IJsonValue> o in objarr) { string label = o.Key; ICollection <CandidatePhrase> seed = new HashSet <CandidatePhrase>(); IJsonArray arr = objarr.GetJsonArray(o.Key); for (int i = 0; i < arr.Count; i++) { string seedw = arr.GetString(i); System.Console.Out.WriteLine("adding " + seedw + " to seed "); seed.Add(CandidatePhrase.CreateOrGet(seedw)); } Sharpen.Collections.AddAll(seedWords[label], seed); constVars.AddSeedWords(label, seed); GetPatternsFromDataMultiClass.RunLabelSeedWords(Data.sents, humanLabelClasses[label], label, seed, constVars, false); } //model.labelWords(label, labelclass, Data.sents, seed); return("SUCCESS added new phrases"); }
public ScorePatternsF1(ConstantsAndVariables constVars, GetPatternsFromDataMultiClass.PatternScoring patternScoring, string label, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> patternsandWords4Label , TwoDimensionalCounter <E, CandidatePhrase> negPatternsandWords4Label, TwoDimensionalCounter <E, CandidatePhrase> unLabeledPatternsandWords4Label, Properties props, ICounter <CandidatePhrase> p0Set, E p0) : base(constVars, patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, props) { this.p0 = p0; this.p0Set = p0Set; }
public ScorePatternsRatioModifiedFreq(ConstantsAndVariables constVars, GetPatternsFromDataMultiClass.PatternScoring patternScoring, string label, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> patternsandWords4Label , TwoDimensionalCounter <E, CandidatePhrase> negPatternsandWords4Label, TwoDimensionalCounter <E, CandidatePhrase> unLabeledPatternsandWords4Label, TwoDimensionalCounter <CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures> phInPatScores , ScorePhrases scorePhrases, Properties props) : base(constVars, patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, props) { this.phInPatScores = phInPatScores; this.scorePhrases = scorePhrases; }
/// <exception cref="System.IO.IOException"/> public CreatePatterns(Properties props, ConstantsAndVariables constVars) { //String channelNameLogger = "createpatterns"; this.constVars = constVars; ArgumentParser.FillOptions(typeof(ConstantsAndVariables), props); constVars.SetUp(props); SetUp(props); }
public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords , double thresholdWordExtract) { IEnumerator <CandidatePhrase> termIter = Counters.ToPriorityQueue(newdt).GetEnumerator(); ICounter <CandidatePhrase> finalwords = new ClassicCounter <CandidatePhrase>(); while (termIter.MoveNext()) { if (finalwords.Size() >= constVars.numWordsToAdd) { break; } CandidatePhrase w = termIter.Current; if (newdt.GetCount(w) < thresholdWordExtract) { Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of " + thresholdWordExtract); break; } System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity)); if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied) { Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet()); continue; } CandidatePhrase matchedFuzzy = null; if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null) { matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern); } if (matchedFuzzy == null) { Redwood.Log("extremePatDebug", "adding word " + w); finalwords.SetCount(w, newdt.GetCount(w)); } else { Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word"); ignoreWords.Add(w); } } string nextTen = string.Empty; int n = 0; while (termIter.MoveNext()) { n++; if (n > 10) { break; } CandidatePhrase w = termIter.Current; nextTen += ";\t" + w + ":" + newdt.GetCount(w); } Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen); return(finalwords); }
internal static Triple <bool, Token, string> GetContextTokenStr(CoreLabel tokenj) { Token strgeneric = new Token(PatternFactory.PatternType.Surface); string strOriginal = string.Empty; bool isLabeledO = true; // for (Entry<String, Class<? extends TypesafeMap.Key<String>>> e : getAnswerClass().entrySet()) { // if (!tokenj.get(e.getValue()).equals(backgroundSymbol)) { // isLabeledO = false; // if (strOriginal.isEmpty()) { // strOriginal = e.getKey(); // } else { // strOriginal += "|" + e.getKey(); // } // strgeneric.addRestriction(e.getKey(), e.getKey()); // } // } foreach (KeyValuePair <string, Type> e in ConstantsAndVariables.GetGeneralizeClasses()) { if (!tokenj.ContainsKey(e.Value) || tokenj.Get(e.Value) == null) { throw new Exception(" Why does the token not have the class " + e.Value + " set? Existing classes " + tokenj.ToString(CoreLabel.OutputFormat.All)); } if (!tokenj.Get(e.Value).Equals(ConstantsAndVariables.backgroundSymbol)) { isLabeledO = false; if (strOriginal.IsEmpty()) { strOriginal = e.Key; } else { strOriginal += "|" + e.Key; } strgeneric.AddORRestriction(e.Value, e.Key); } } if (useContextNERRestriction) { string nerTag = tokenj.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); if (nerTag != null && !nerTag.Equals(SeqClassifierFlags.DefaultBackgroundSymbol)) { isLabeledO = false; if (strOriginal.IsEmpty()) { strOriginal = nerTag; } else { strOriginal += "|" + nerTag; } strgeneric.AddORRestriction(typeof(CoreAnnotations.NamedEntityTagAnnotation), nerTag); } } return(new Triple <bool, Token, string>(isLabeledO, strgeneric, strOriginal)); }
public ApplyPatterns(IDictionary <string, DataInstance> sents, IList <string> sentids, IDictionary <TokenSequencePattern, E> patterns, string label, bool removeStopWordsFromSelectedPhrases, bool removePhrasesWithStopWords, ConstantsAndVariables cv) { this.sents = sents; this.patterns = patterns; this.sentids = sentids; this.label = label; this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases; this.removePhrasesWithStopWords = removePhrasesWithStopWords; this.constVars = cv; }
public ApplyDepPatterns(IDictionary <string, DataInstance> sents, IList <string> sentids, IDictionary <SemgrexPattern, E> patterns, string label, bool removeStopWordsFromSelectedPhrases, bool removePhrasesWithStopWords, ConstantsAndVariables cv ) { matchingWordRestriction = new _IPredicate_183(this); // = null; this.sents = sents; this.patterns = patterns; this.sentids = sentids; this.label = label; this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases; this.removePhrasesWithStopWords = removePhrasesWithStopWords; this.constVars = cv; }
public ApplyPatternsMulti(IDictionary <string, DataInstance> sents, IList <string> sentids, IDictionary <TokenSequencePattern, E> patterns, string label, bool removeStopWordsFromSelectedPhrases, bool removePhrasesWithStopWords, ConstantsAndVariables cv) { //Set<String> ignoreWords; this.sents = sents; this.patterns = patterns; multiPatternMatcher = TokenSequencePattern.GetMultiPatternMatcher(patterns.Keys); this.sentids = sentids; this.label = label; this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases; this.removePhrasesWithStopWords = removePhrasesWithStopWords; this.constVars = cv; }
public ScorePhrases(Properties props, ConstantsAndVariables cv) { ArgumentParser.FillOptions(this, props); this.constVars = cv; try { phraseScorer = phraseScorerClass.GetConstructor(typeof(ConstantsAndVariables)).NewInstance(constVars); } catch (ReflectiveOperationException e) { throw new Exception(e); } ArgumentParser.FillOptions(phraseScorer, props); }
public ScorePatterns(ConstantsAndVariables constVars, GetPatternsFromDataMultiClass.PatternScoring patternScoring, string label, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> patternsandWords4Label , TwoDimensionalCounter <E, CandidatePhrase> negPatternsandWords4Label, TwoDimensionalCounter <E, CandidatePhrase> unLabeledPatternsandWords4Label, Properties props) { // protected TwoDimensionalCounter<SurfacePattern, String> // posnegPatternsandWords4Label = new TwoDimensionalCounter<SurfacePattern, // String>(); //protected TwoDimensionalCounter<E, String> negandUnLabeledPatternsandWords4Label = new TwoDimensionalCounter<E, String>(); //protected TwoDimensionalCounter<E, String> allPatternsandWords4Label = new TwoDimensionalCounter<E, String>(); this.constVars = constVars; this.patternScoring = patternScoring; this.label = label; this.allCandidatePhrases = allCandidatePhrases; this.patternsandWords4Label = patternsandWords4Label; this.negPatternsandWords4Label = negPatternsandWords4Label; this.unLabeledPatternsandWords4Label = unLabeledPatternsandWords4Label; this.props = props; }
//the format of the line input is json string of maps. required keys are "input" and "seedWords". "input" can be a string or file (in which case readFile should be true.) // For example: {"input":"presidents.txt","seedWords":{"name":["Obama"],"place":["Chicago"]}} /// <exception cref="System.IO.IOException"/> /// <exception cref="Java.Lang.InstantiationException"/> /// <exception cref="System.Reflection.TargetInvocationException"/> /// <exception cref="Java.Util.Concurrent.ExecutionException"/> /// <exception cref="Java.Sql.SQLException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.MemberAccessException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.MissingMethodException"/> public virtual string ProcessText(bool writeOutputToFile) { logger.Info("Starting to process text"); logger.Info("all seed words are " + seedWords); Pair <IDictionary <string, DataInstance>, IDictionary <string, DataInstance> > sentsPair = GetPatternsFromDataMultiClass.ProcessSents(props, seedWords.Keys); Data.sents = sentsPair.First(); ConstantsAndVariables constVars = new ConstantsAndVariables(props, seedWords.Keys, machineAnswerClasses); foreach (string label in seedWords.Keys) { GetPatternsFromDataMultiClass.RunLabelSeedWords(Data.sents, humanLabelClasses[label], label, seedWords[label], constVars, true); } if (writeOutputToFile) { GetPatternsFromDataMultiClass.WriteColumnOutput(outputFile, false, humanLabelClasses); System.Console.Out.WriteLine("written the output to " + outputFile); } logger.Info("Finished processing text"); return("SUCCESS"); }
public virtual void Test() { Properties props = new Properties(); props.SetProperty("patternType", "DEP"); ConstantsAndVariables constvars = new ConstantsAndVariables(props, new HashSet <string>(), new Dictionary <string, Type>()); CreatePatterns <DepPattern> createPatterns = new CreatePatterns <DepPattern>(props, constvars); IDictionary <string, DataInstance> sents = new Dictionary <string, DataInstance>(); ICoreMap m = new ArrayCoreMap(); string text = "We present a paper that focuses on semantic graphs applied to language."; string graphString = "[present/VBP-2 nsubj>We/PRP-1 dobj>[paper/NN-4 det>a/DT-3] ccomp>[applied/VBN-10 mark>that/IN-5 nsubj>[focuses/NN-6 nmod:on>[graphs/NNS-9 amod>semantic/JJ-8]] nmod:to>language/NN-12]]"; SemanticGraph graph = SemanticGraph.ValueOf(graphString); //String phrase = "semantic graphs"; IList <string> tokens = Arrays.AsList(new string[] { "We", "present", "a", "paper", "that", "focuses", "on", "semantic", "graphs", "applied", "to", "language" }); m.Set(typeof(CoreAnnotations.TokensAnnotation), tokens.Stream().Map(null).Collect(Collectors.ToList())); m.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), graph); sents["sent1"] = DataInstance.GetNewInstance(PatternFactory.PatternType.Dep, m); createPatterns.GetAllPatterns(sents, props, ConstantsAndVariables.PatternForEachTokenWay.Memory); System.Console.Out.WriteLine("graph is " + graph); System.Console.Out.WriteLine(PatternsForEachTokenInMemory.patternsForEachToken); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> private ICounter <CandidatePhrase> LearnNewPhrasesPrivate(string label, PatternsForEachToken patternsForEachToken, ICounter <E> patternsLearnedThisIter, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase> alreadyIdentifiedWords, CollectionValuedMap <E, Triple <string, int, int> > matchedTokensByPat, ICounter <CandidatePhrase> scoreForAllWordsThisIteration, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, TwoDimensionalCounter <E , CandidatePhrase> patternsAndWords4Label, string identifier, ICollection <CandidatePhrase> ignoreWords, bool computeProcDataFreq) { ICollection <CandidatePhrase> alreadyLabeledWords = new HashSet <CandidatePhrase>(); if (constVars.doNotApplyPatterns) { // if want to get the stats by the lossy way of just counting without // applying the patterns ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(constVars.batchProcessSents); while (sentsIter.MoveNext()) { Pair <IDictionary <string, DataInstance>, File> sentsf = sentsIter.Current; this.StatsWithoutApplyingPatterns(sentsf.First(), patternsForEachToken, patternsLearnedThisIter, wordsPatExtracted); } } else { if (patternsLearnedThisIter.Size() > 0) { this.ApplyPats(patternsLearnedThisIter, label, wordsPatExtracted, matchedTokensByPat, alreadyLabeledWords); } } if (computeProcDataFreq) { if (!phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.None)) { Redwood.Log(Redwood.Dbg, "computing processed freq"); foreach (KeyValuePair <CandidatePhrase, double> fq in Data.rawFreq.EntrySet()) { double @in = fq.Value; if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Sqrt)) { @in = Math.Sqrt(@in); } else { if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Log)) { @in = 1 + Math.Log(@in); } else { throw new Exception("can't understand the normalization"); } } System.Diagnostics.Debug.Assert(!double.IsNaN(@in), "Why is processed freq nan when rawfreq is " + @in); Data.processedDataFreq.SetCount(fq.Key, @in); } } else { Data.processedDataFreq = Data.rawFreq; } } if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Weightednorm)) { foreach (CandidatePhrase en in wordsPatExtracted.FirstKeySet()) { if (!constVars.GetOtherSemanticClassesWords().Contains(en) && (en.GetPhraseLemma() == null || !constVars.GetOtherSemanticClassesWords().Contains(CandidatePhrase.CreateOrGet(en.GetPhraseLemma()))) && !alreadyLabeledWords.Contains(en)) { terms.AddAll(en, wordsPatExtracted.GetCounter(en)); } } RemoveKeys(terms, ConstantsAndVariables.GetStopWords()); ICounter <CandidatePhrase> phraseScores = phraseScorer.ScorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false); System.Console.Out.WriteLine("count for word U.S. is " + phraseScores.GetCount(CandidatePhrase.CreateOrGet("U.S."))); ICollection <CandidatePhrase> ignoreWordsAll; if (ignoreWords != null && !ignoreWords.IsEmpty()) { ignoreWordsAll = CollectionUtils.UnionAsSet(ignoreWords, constVars.GetOtherSemanticClassesWords()); } else { ignoreWordsAll = new HashSet <CandidatePhrase>(constVars.GetOtherSemanticClassesWords()); } Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetSeedLabelDictionary()[label]); Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetLearnedWords(label).KeySet()); System.Console.Out.WriteLine("ignoreWordsAll contains word U.S. is " + ignoreWordsAll.Contains(CandidatePhrase.CreateOrGet("U.S."))); ICounter <CandidatePhrase> finalwords = ChooseTopWords(phraseScores, terms, phraseScores, ignoreWordsAll, constVars.thresholdWordExtract); phraseScorer.PrintReasonForChoosing(finalwords); scoreForAllWordsThisIteration.Clear(); Counters.AddInPlace(scoreForAllWordsThisIteration, phraseScores); Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.ToSortedString(finalwords, finalwords.Size(), "%1$s:%2$.2f", "\t")); if (constVars.goldEntities != null) { IDictionary <string, bool> goldEntities4Label = constVars.goldEntities[label]; if (goldEntities4Label != null) { StringBuilder s = new StringBuilder(); finalwords.KeySet().Stream().ForEach(null); Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Gold labels for selected words for label " + label + " : " + s.ToString()); } else { Redwood.Log(Redwood.Dbg, "No gold entities provided for label " + label); } } if (constVars.outDir != null && !constVars.outDir.IsEmpty()) { string outputdir = constVars.outDir + "/" + identifier + "/" + label; IOUtils.EnsureDir(new File(outputdir)); TwoDimensionalCounter <CandidatePhrase, CandidatePhrase> reasonForWords = new TwoDimensionalCounter <CandidatePhrase, CandidatePhrase>(); foreach (CandidatePhrase word in finalwords.KeySet()) { foreach (E l in wordsPatExtracted.GetCounter(word).KeySet()) { foreach (CandidatePhrase w2 in patternsAndWords4Label.GetCounter(l)) { reasonForWords.IncrementCount(word, w2); } } } Redwood.Log(ConstantsAndVariables.minimaldebug, "Saving output in " + outputdir); string filename = outputdir + "/words.json"; // the json object is an array corresponding to each iteration - of list // of objects, // each of which is a bean of entity and reasons IJsonArrayBuilder obj = Javax.Json.Json.CreateArrayBuilder(); if (writtenInJustification.Contains(label) && writtenInJustification[label]) { IJsonReader jsonReader = Javax.Json.Json.CreateReader(new BufferedInputStream(new FileInputStream(filename))); IJsonArray objarr = jsonReader.ReadArray(); foreach (IJsonValue o in objarr) { obj.Add(o); } jsonReader.Close(); } IJsonArrayBuilder objThisIter = Javax.Json.Json.CreateArrayBuilder(); foreach (CandidatePhrase w in reasonForWords.FirstKeySet()) { IJsonObjectBuilder objinner = Javax.Json.Json.CreateObjectBuilder(); IJsonArrayBuilder l = Javax.Json.Json.CreateArrayBuilder(); foreach (CandidatePhrase w2 in reasonForWords.GetCounter(w).KeySet()) { l.Add(w2.GetPhrase()); } IJsonArrayBuilder pats = Javax.Json.Json.CreateArrayBuilder(); foreach (E p in wordsPatExtracted.GetCounter(w)) { pats.Add(p.ToStringSimple()); } objinner.Add("reasonwords", l); objinner.Add("patterns", pats); objinner.Add("score", finalwords.GetCount(w)); objinner.Add("entity", w.GetPhrase()); objThisIter.Add(objinner.Build()); } obj.Add(objThisIter); // Redwood.log(ConstantsAndVariables.minimaldebug, channelNameLogger, // "Writing justification at " + filename); IOUtils.WriteStringToFile(StringUtils.Normalize(StringUtils.ToAscii(obj.Build().ToString())), filename, "ASCII"); writtenInJustification[label] = true; } if (constVars.justify) { Redwood.Log(Redwood.Dbg, "\nJustification for phrases:\n"); foreach (CandidatePhrase word in finalwords.KeySet()) { Redwood.Log(Redwood.Dbg, "Phrase " + word + " extracted because of patterns: \t" + Counters.ToSortedString(wordsPatExtracted.GetCounter(word), wordsPatExtracted.GetCounter(word).Size(), "%1$s:%2$f", "\n")); } } // if (usePatternResultAsLabel) // if (answerLabel != null) // labelWords(sents, commonEngWords, finalwords.keySet(), // patterns.keySet(), outFile); // else // throw new RuntimeException("why is the answer label null?"); return(finalwords); } else { if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Bpb)) { Counters.AddInPlace(terms, wordsPatExtracted); ICounter <CandidatePhrase> maxPatWeightTerms = new ClassicCounter <CandidatePhrase>(); IDictionary <CandidatePhrase, E> wordMaxPat = new Dictionary <CandidatePhrase, E>(); foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet()) { ICounter <E> weights = new ClassicCounter <E>(); foreach (E k in en.Value.KeySet()) { weights.SetCount(k, patternsLearnedThisIter.GetCount(k)); } maxPatWeightTerms.SetCount(en.Key, Counters.Max(weights)); wordMaxPat[en.Key] = Counters.Argmax(weights); } Counters.RemoveKeys(maxPatWeightTerms, alreadyIdentifiedWords); double maxvalue = Counters.Max(maxPatWeightTerms); ICollection <CandidatePhrase> words = Counters.KeysAbove(maxPatWeightTerms, maxvalue - 1e-10); CandidatePhrase bestw = null; if (words.Count > 1) { double max = double.NegativeInfinity; foreach (CandidatePhrase w in words) { if (terms.GetCount(w, wordMaxPat[w]) > max) { max = terms.GetCount(w, wordMaxPat[w]); bestw = w; } } } else { if (words.Count == 1) { bestw = words.GetEnumerator().Current; } else { return(new ClassicCounter <CandidatePhrase>()); } } Redwood.Log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw); return(Counters.AsCounter(Arrays.AsList(bestw))); } else { throw new Exception("wordscoring " + constVars.wordScoring + " not identified"); } } }
public PhraseScorer(ConstantsAndVariables constvar) { //these get overwritten in ScorePhrasesLearnFeatWt class this.constVars = constvar; }
/// <exception cref="System.Exception"/> public virtual bool Call() { IDictionary <string, IDictionary <int, ICollection <E> > > tempPatternsForTokens = new Dictionary <string, IDictionary <int, ICollection <E> > >(); int numSentencesInOneCommit = 0; foreach (string id in this.sentIds) { DataInstance sent = this.sents[id]; if (!this._enclosing.constVars.storePatsForEachToken.Equals(ConstantsAndVariables.PatternForEachTokenWay.Memory)) { tempPatternsForTokens[id] = new Dictionary <int, ICollection <E> >(); } IDictionary <int, ICollection <E> > p = (IDictionary)PatternFactory.GetPatternsAroundTokens(this._enclosing.constVars.patternType, sent, ConstantsAndVariables.GetStopWords()); //to save number of commits to the database if (!this._enclosing.constVars.storePatsForEachToken.Equals(ConstantsAndVariables.PatternForEachTokenWay.Memory)) { tempPatternsForTokens[id] = p; numSentencesInOneCommit++; if (numSentencesInOneCommit % 1000 == 0) { this.patsForEach.AddPatterns(tempPatternsForTokens); tempPatternsForTokens.Clear(); numSentencesInOneCommit = 0; } } else { // patsForEach.addPatterns(id, p); this.patsForEach.AddPatterns(id, p); } } //For the remaining sentences if (!this._enclosing.constVars.storePatsForEachToken.Equals(ConstantsAndVariables.PatternForEachTokenWay.Memory)) { this.patsForEach.AddPatterns(tempPatternsForTokens); } return(true); }
public ScorePhrasesAverageFeatures(ConstantsAndVariables constvar) : base(constvar) { }