/// <summary> /// Initialize the SpamFilter based on a DataTable containing columns "IsSpam" and "Body". /// This is only useful to me the author, but hey, it's my code so I can do what I want! /// </summary> /// <param name="table"></param> public void Load(DataTable table) { _good = new Corpus(); _bad = new Corpus(); foreach (DataRow row in table.Rows) { bool isSpam = (bool)row["IsSpam"]; string body = row["Body"].ToString(); if (isSpam) { _bad.LoadFromReader(new StringReader(body)); } else { _good.LoadFromReader(new StringReader(body)); } } CalculateProbabilities(); }
private void LoadBayesianFilter() { mFilter = new SpamFilter(); Corpus bad = new Corpus(); Corpus good = new Corpus(); bad.LoadFromFile(mBadFile); good.LoadFromFile(mGoodFile); mFilter.Load(good, bad); /* // Just for grins, we'll dump out some statistics about the data we just loaded. lstResults.Items.Clear(); lstResults.Items.Add(String.Format(@"Bayesian Filter Training Stats: Good:{0} Bad:{1} Prob:{2}" , mFilter.Good.Tokens.Count , mFilter.Bad.Tokens.Count , mFilter.Prob.Count)); // ... and some probabilities for keys foreach (string key in mFilter.Prob.Keys) { if (mFilter.Prob[key] > 0.02) { lstResults.Items.Add(String.Format("{0},{1}", mFilter.Prob[key].ToString(".0000"), key)); } } */ }
/// <summary> /// Initialize the SpamFilter based on the supplied text /// </summary> /// <param name="goodReader"></param> /// <param name="badReader"></param> public void Load(TextReader goodReader, TextReader badReader) { _good = new Corpus(goodReader); _bad = new Corpus(badReader); CalculateProbabilities(); }
/// <summary> /// Initialize the SpamFilter based on the contents of the supplied Corpuseses /// </summary> /// <param name="good"></param> /// <param name="bad"></param> public void Load(Corpus good, Corpus bad) { _good = good; _bad = bad; CalculateProbabilities(); }