public Dictionary <string, double> Classify(string text) { Dictionary <string, double> score = new Dictionary <string, double>(); foreach (KeyValuePair <string, ICategory> cat in _categories) { score.Add(cat.Value.Name, 0.0); } EnumerableCategory wordsInFile = new EnumerableCategory("", _excludedWords); char[] words = text.ToCharArray(); foreach (char word in words) { if (!string.IsNullOrWhiteSpace(word.ToString())) { wordsInFile.TeachPhrase(word.ToString()); } } double maxScore = 0; foreach (KeyValuePair <string, PhraseCount> kvp1 in wordsInFile) { PhraseCount pcInFile = kvp1.Value; foreach (KeyValuePair <string, ICategory> kvp in _categories) { ICategory cat = kvp.Value; int count = cat.GetPhraseCount(pcInFile.RawPhrase); if (count > 0) { score[cat.Name] += (double)count / (double)cat.TotalWords; if (score[cat.Name] > maxScore) { maxScore = score[cat.Name]; } } System.Diagnostics.Trace.WriteLine(pcInFile.RawPhrase.ToString() + "(" + cat.Name + ")" + score[cat.Name]); } } if (maxScore > 0) { Dictionary <string, double> finaScore = new Dictionary <string, double>(); foreach (KeyValuePair <string, double> kv in score) { finaScore.Add(kv.Key, kv.Value / maxScore); } return(finaScore); } //foreach (KeyValuePair<string, ICategory> kvp in m_Categories) //{ // ICategory cat = kvp.Value; // score[cat.Name] += (double)cat.TotalWords / (double)this.CountTotalWordsInCategories(); //} return(score); }
public string GetPossibleClassify(string text) { Dictionary <string, double> score = new Dictionary <string, double>(); foreach (KeyValuePair <string, ICategory> cat in _categories) { score.Add(cat.Value.Name, 0.0); } EnumerableCategory wordsInFile = new EnumerableCategory("", _excludedWords); char[] words = text.ToCharArray(); foreach (char word in words) { if (!string.IsNullOrWhiteSpace(word.ToString())) { wordsInFile.TeachPhrase(word.ToString()); } } double maxScore = 0; string classifyType = ""; foreach (KeyValuePair <string, PhraseCount> kvp1 in wordsInFile) { PhraseCount pcInFile = kvp1.Value; foreach (KeyValuePair <string, ICategory> kvp in _categories) { ICategory cat = kvp.Value; int count = cat.GetPhraseCount(pcInFile.RawPhrase); if (count > 0) { score[cat.Name] += (double)count / (double)cat.TotalWords; if (score[cat.Name] > maxScore) { maxScore = score[cat.Name]; classifyType = cat.Name; } } System.Diagnostics.Trace.WriteLine(pcInFile.RawPhrase.ToString() + "(" + cat.Name + ")" + score[cat.Name]); } } return(classifyType); }
public void TeachPhrase(string rawPhrase) { if ((null != _mExcluded) && (_mExcluded.IsExcluded(rawPhrase))) { return; } PhraseCount pc; string Phrase = DePhrase(rawPhrase); if (!MPhrases.TryGetValue(Phrase, out pc)) { pc = new PhraseCount(rawPhrase); MPhrases.Add(Phrase, pc); } pc.Count++; _totalWords++; }