public void AddNonMatch(string word) { WordProbability wp = (WordProbability)_words[word]; if (wp == null) wp = new WordProbability(word, 0, 1); else wp.NonMatchingCount++; SetWordProbability(wp); }
public void TestSetAndGet() { WordProbability wp = null; WordProbability wp2 = null; wp = new WordProbability("myWord", 10, 30); ((SimpleWordsDataSource)wordsDataSource).SetWordProbability(wp); wp2 = wordsDataSource.GetWordProbability("myWord"); Assert.IsNotNull(wp2); Assert.AreEqual(wp, wp2); }
public void AddNonMatch(string word) { WordProbability wp = (WordProbability)_words[word]; if (wp == null) { wp = new WordProbability(word, 0, 1); } else { wp.NonMatchingCount++; } SetWordProbability(wp); }
public void AddNonMatch(string word) { WordProbability wp; if (_words.TryGetValue(word, out wp)) { wp.NonMatchingCount++; } else { wp = new WordProbability(word, 0, 1); } SetWordProbability(wp); }
private WordProbability[] CalcWordsProbability(string category, string[] words) { if (category == null) { throw new ArgumentNullException("Category cannot be null."); } bool categorize = false; if (_wordsData is ICategorizedWordsDataSource) { categorize = true; } CheckCategoriesSupported(category); if (words == null) { return(new WordProbability[0]); } else { ArrayList wps = new ArrayList(); for (int i = 0; i < words.Length; i++) { if (IsClassifiableWord(words[i])) { WordProbability wp = null; if (categorize) { wp = ((ICategorizedWordsDataSource)_wordsData).GetWordProbability(category, TransformWord(words[i])); } else { wp = _wordsData.GetWordProbability(TransformWord(words[i])); } if (wp != null) { wps.Add(wp); } } } return((WordProbability[])wps.ToArray(typeof(WordProbability))); } }
public WordProbability GetWordProbability(string category, string word) { WordProbability wp = null; var matchingCount = 0; var nonMatchingCount = 0; OdbcConnection connection = null; try { connection = (OdbcConnection)connectionManager.GetConnection(); IDbCommand command = new OdbcCommand("SELECT " + matchCountColumn + ", " + nonMatchCountColumn + " FROM " + tableName + " WHERE " + wordColumn + " = ? AND " + categoryColumn + " = ?", connection); command.Parameters.Add(new OdbcParameter("@Word", OdbcType.VarChar, 255, ParameterDirection.Input, true, 0, 0, string.Empty, DataRowVersion.Proposed, word)); command.Parameters.Add(new OdbcParameter("@Category", OdbcType.VarChar, 20, ParameterDirection.Input, true, 0, 0, string.Empty, DataRowVersion.Proposed, category)); var reader = command.ExecuteReader(); if (reader.Read()) { matchingCount = (int)reader[matchCountColumn]; nonMatchingCount = (int)reader[nonMatchCountColumn]; } reader.Close(); wp = new WordProbability(word, matchingCount, nonMatchingCount); wp.Category = category; } catch (Exception ex) { throw new WordsDataSourceException("Problem obtaining WordProbability from database.", ex); } finally { if (connection != null) { try { connectionManager.ReturnConnection(connection); } catch {} } } Debug.WriteLine("GetWordProbability() WordProbability loaded [" + wp + "]"); return(wp); }
public void TestCalculateOverallProbability() { var prob = 0.3d; var wp1 = new WordProbability("myWord1", prob); var wp2 = new WordProbability("myWord2", prob); var wp3 = new WordProbability("myWord3", prob); var wps = new[] { wp1, wp2, wp3 }; var errorMargin = 0.0001d; var xy = (prob * prob * prob); var z = (1-prob)*(1-prob)*(1-prob); var result = xy/(xy + z); var classifier = new BayesianClassifier(); Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin); }
public void TestCalculateOverallProbability() { double prob = 0.3d; WordProbability wp1 = new WordProbability("myWord1", prob); WordProbability wp2 = new WordProbability("myWord2", prob); WordProbability wp3 = new WordProbability("myWord3", prob); WordProbability[] wps = new WordProbability[] { wp1, wp2, wp3 }; double errorMargin = 0.0001d; double xy = (prob * prob * prob); double z = (1-prob)*(1-prob)*(1-prob); double result = xy/(xy + z); BayesianClassifier classifier = new BayesianClassifier(); Assert.AreEqual(result, classifier.CalculateOverallProbability(wps), errorMargin); }
public void TestAccessors() { WordProbability wp = null; wp = new WordProbability(string.Empty, 0.96d); Assert.AreEqual(string.Empty, wp.Word); try { Assert.AreEqual(0, wp.MatchingCount); Assert.Fail("Shouldn't be able to obtain matching count when we haven't set them."); } catch {} try { Assert.AreEqual(0, wp.NonMatchingCount); Assert.Fail("Shouldn't be able to obtain non-matchin count when we haven't set them."); } catch {} Assert.AreEqual(0.96d, wp.Probability, 0); wp = new WordProbability("aWord", 10, 30); Assert.AreEqual("aWord", wp.Word); Assert.AreEqual(10, wp.MatchingCount); Assert.AreEqual(30, wp.NonMatchingCount); Assert.AreEqual(0.25d, wp.Probability, 0d); try { wp.MatchingCount = -10; Assert.Fail("Shouldn't be able to set negative MatchingCount."); } catch {} try { wp.NonMatchingCount = -10; Assert.Fail("Shouldn't be able to set negative NonMatchingCount."); } catch {} }
private IList <WordProbability> CalcWordsProbability(string category, string[] words) { if (category == null) { throw new ArgumentNullException("Category cannot be null."); } var categorizedWordsDataSource = _wordsData as ICategorizedWordsDataSource; CheckCategoriesSupported(category); if (words == null) { return(new WordProbability[0]); } var wps = new List <WordProbability>(); for (var i = 0; i < words.Length; i++) { if (IsClassifiableWord(words[i])) { WordProbability wp = null; if (categorizedWordsDataSource == null) { wp = _wordsData.GetWordProbability(TransformWord(words[i])); } else { wp = categorizedWordsDataSource.GetWordProbability(category, TransformWord(words[i])); } if (wp != null) { wps.Add(wp); } } } return(wps); }
public int CompareTo(object obj) { if (!(obj is WordProbability)) { throw new InvalidCastException(obj.GetType().ToString() + " is not a " + GetType().ToString()); } WordProbability rhs = (WordProbability)obj; if (this.Category != rhs.Category) { return(this.Category.CompareTo(rhs.Category)); } else if (this.Word != rhs.Word) { return(this.Word.CompareTo(rhs.Word)); } else { return(0); } }
public WordProbability GetWordProbability(string category, string word) { WordProbability wp = null; var matchingCount = 0; var nonMatchingCount = 0; OdbcConnection connection = null; try { connection = (OdbcConnection)connectionManager.GetConnection(); IDbCommand command = new OdbcCommand("SELECT " + matchCountColumn + ", " + nonMatchCountColumn + " FROM " + tableName + " WHERE " + wordColumn + " = ? AND " + categoryColumn + " = ?", connection); command.Parameters.Add(new OdbcParameter("@Word", OdbcType.VarChar, 255, ParameterDirection.Input, true, 0, 0, string.Empty, DataRowVersion.Proposed, word)); command.Parameters.Add(new OdbcParameter("@Category", OdbcType.VarChar, 20, ParameterDirection.Input, true, 0, 0, string.Empty, DataRowVersion.Proposed, category)); var reader = command.ExecuteReader(); if (reader.Read()) { matchingCount = (int)reader[matchCountColumn]; nonMatchingCount = (int)reader[nonMatchCountColumn]; } reader.Close(); wp = new WordProbability(word, matchingCount, nonMatchingCount); } catch (Exception ex) { throw new WordsDataSourceException("Problem obtaining WordProbability from database.", ex); } finally { if (connection != null) { try { connectionManager.ReturnConnection(connection); } catch {} } } Debug.WriteLine("GetWordProbability() WordProbability loaded [" + wp + "]"); return wp; }
public double CalculateOverallProbability(WordProbability[] wps) { if (wps == null || wps.Length == 0) return IClassifierConstants.NEUTRAL_PROBABILITY; // we need to calculate xy/(xy + z) where z = (1 - x)(1 - y) // first calculate z and xy double z = 0d; double xy = 0d; for (int i = 0; i < wps.Length; i++) { if (z == 0) z = (1 - wps[i].Probability); else z = z * (1 - wps[i].Probability); if (xy == 0) xy = wps[i].Probability; else xy = xy * wps[i].Probability; } double numerator = xy; double denominator = xy + z; return numerator / denominator; }
public void TestCalculateProbability() { WordProbability wp = null; wp = new WordProbability(string.Empty, 10, 10); Assert.AreEqual(IClassifierConstants.NEUTRAL_PROBABILITY, wp.Probability, 0); wp = new WordProbability(string.Empty, 20, 10); Assert.AreEqual(0.66, wp.Probability, 0.01); wp = new WordProbability(string.Empty, 30, 10); Assert.AreEqual(0.75, wp.Probability, 0); wp = new WordProbability(string.Empty, 10, 20); Assert.AreEqual(0.33, wp.Probability, 0.01); wp = new WordProbability(string.Empty, 10, 30); Assert.AreEqual(0.25, wp.Probability, 0); wp = new WordProbability(string.Empty, 10, 0); Assert.AreEqual(IClassifierConstants.UPPER_BOUND, wp.Probability, 0); wp = new WordProbability(string.Empty, 100, 1); Assert.AreEqual(IClassifierConstants.UPPER_BOUND, wp.Probability, 0); wp = new WordProbability(string.Empty, 1000, 1); Assert.AreEqual(IClassifierConstants.UPPER_BOUND, wp.Probability, 0); wp = new WordProbability(string.Empty, 0, 10); Assert.AreEqual(IClassifierConstants.LOWER_BOUND, wp.Probability, 0); wp = new WordProbability(string.Empty, 1, 100); Assert.AreEqual(IClassifierConstants.LOWER_BOUND, wp.Probability, 0); wp = new WordProbability(string.Empty, 1, 1000); Assert.AreEqual(IClassifierConstants.LOWER_BOUND, wp.Probability, 0); }
public void TestMatchingAndNonMatchingCountRollover() { WordProbability wp = new WordProbability("aWord", long.MaxValue, long.MaxValue); try { wp.RegisterMatch(); Assert.Fail("Should detect rollover."); } catch {} try { wp.RegisterNonMatch(); Assert.Fail("Should detect rollover."); } catch {} }
public void TestComparer() { string method = "TestComparer() "; WordProbability wp = null; WordProbability wp2 = null; wp = new WordProbability("a", 0, 0); wp2 = new WordProbability("b", 0, 0); try { wp.CompareTo(new object()); Assert.Fail("Shouldn't be able to compare to objects other than WordProbability."); } catch {} Debug.WriteLine(method + "wp.Probability " + wp.Probability); Debug.WriteLine(method + "wp2.Probability " + wp2.Probability); Assert.IsTrue(wp.CompareTo(wp2) < 0); Assert.IsTrue(wp2.CompareTo(wp) > 0); }
public void SetWordProbability(WordProbability wp) { _words[wp.Word] = wp; }