public static void PopulatePOSDictionary(IObjectStream <POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff) { var newEntries = new Dictionary <string, Dictionary <string, int> >(); POSSample sample; while ((sample = samples.Read()) != null) { for (int i = 0; i < sample.Sentence.Length; i++) { if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit) { string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant(); if (!newEntries.ContainsKey(word)) { newEntries.Add(word, new Dictionary <string, int>()); } var dicTags = dictionary.GetTags(word); if (dicTags != null) { foreach (var tag in dicTags) { if (!newEntries[word].ContainsKey(tag)) { newEntries[word].Add(tag, cutoff); } } } if (!newEntries[word].ContainsKey(sample.Tags[i])) { newEntries[word].Add(sample.Tags[i], 1); } else { newEntries[word][sample.Tags[i]]++; } } } } foreach (var wordEntry in newEntries) { var tagsForWord = new List <string>(); foreach (var entry in wordEntry.Value) { if (entry.Value >= cutoff) { tagsForWord.Add(entry.Key); } } if (tagsForWord.Count > 0) { dictionary.Put(wordEntry.Key, tagsForWord.ToArray()); } } }
public static void PopulatePOSDictionary(IObjectStream<POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff) { var newEntries = new Dictionary<string, Dictionary<string, int>>(); POSSample sample; while ((sample = samples.Read()) != null) { for (int i = 0; i < sample.Sentence.Length; i++) { if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit) { string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant(); if (!newEntries.ContainsKey(word)) { newEntries.Add(word, new Dictionary<string, int>()); } var dicTags = dictionary.GetTags(word); if (dicTags != null) { foreach (var tag in dicTags) { if (!newEntries[word].ContainsKey(tag)) { newEntries[word].Add(tag, cutoff); } } } if (!newEntries[word].ContainsKey(sample.Tags[i])) { newEntries[word].Add(sample.Tags[i], 1); } else { newEntries[word][sample.Tags[i]]++; } } } } foreach (var wordEntry in newEntries) { var tagsForWord = (from entry in wordEntry.Value where entry.Value >= cutoff select entry.Key).ToList(); if (tagsForWord.Count > 0) dictionary.Put(wordEntry.Key, tagsForWord.ToArray()); } }