Пример #1
0
        public static void PopulatePOSDictionary(IObjectStream <POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff)
        {
            var       newEntries = new Dictionary <string, Dictionary <string, int> >();
            POSSample sample;

            while ((sample = samples.Read()) != null)
            {
                for (int i = 0; i < sample.Sentence.Length; i++)
                {
                    if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit)
                    {
                        string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant();

                        if (!newEntries.ContainsKey(word))
                        {
                            newEntries.Add(word, new Dictionary <string, int>());
                        }

                        var dicTags = dictionary.GetTags(word);
                        if (dicTags != null)
                        {
                            foreach (var tag in dicTags)
                            {
                                if (!newEntries[word].ContainsKey(tag))
                                {
                                    newEntries[word].Add(tag, cutoff);
                                }
                            }
                        }

                        if (!newEntries[word].ContainsKey(sample.Tags[i]))
                        {
                            newEntries[word].Add(sample.Tags[i], 1);
                        }
                        else
                        {
                            newEntries[word][sample.Tags[i]]++;
                        }
                    }
                }
            }

            foreach (var wordEntry in newEntries)
            {
                var tagsForWord = new List <string>();
                foreach (var entry in wordEntry.Value)
                {
                    if (entry.Value >= cutoff)
                    {
                        tagsForWord.Add(entry.Key);
                    }
                }
                if (tagsForWord.Count > 0)
                {
                    dictionary.Put(wordEntry.Key, tagsForWord.ToArray());
                }
            }
        }
Пример #2
0
        public static void PopulatePOSDictionary(IObjectStream<POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff) {

            var newEntries = new Dictionary<string, Dictionary<string, int>>();
            POSSample sample;
            while ((sample = samples.Read()) != null) {

                for (int i = 0; i < sample.Sentence.Length; i++) {
                    if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit) {
                        string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant();

                        if (!newEntries.ContainsKey(word)) {
                            newEntries.Add(word, new Dictionary<string, int>());
                        }

                        var dicTags = dictionary.GetTags(word);
                        if (dicTags != null) {
                            foreach (var tag in dicTags) {
                                if (!newEntries[word].ContainsKey(tag)) {
                                    newEntries[word].Add(tag, cutoff);
                                }
                            }
                        }

                        if (!newEntries[word].ContainsKey(sample.Tags[i])) {
                            newEntries[word].Add(sample.Tags[i], 1);
                        } else {
                            newEntries[word][sample.Tags[i]]++;
                        }
                    }
                }
            }

            foreach (var wordEntry in newEntries) {
                var tagsForWord = (from entry in wordEntry.Value where entry.Value >= cutoff select entry.Key).ToList();
                if (tagsForWord.Count > 0)
                    dictionary.Put(wordEntry.Key, tagsForWord.ToArray());
                
            }
        }