Esempio n. 1
0
        /// <summary>
        /// Initialize the SpamFilter based on a DataTable containing columns "IsSpam" and "Body".
        /// This is only useful to me the author, but hey, it's my code so I can do what I want!
        /// </summary>
        /// <param name="table"></param>
        public void Load(DataTable table)
        {
            _good = new Corpus();
            _bad  = new Corpus();

            foreach (DataRow row in table.Rows)
            {
                bool   isSpam = (bool)row["IsSpam"];
                string body   = row["Body"].ToString();
                if (isSpam)
                {
                    _bad.LoadFromReader(new StringReader(body));
                }
                else
                {
                    _good.LoadFromReader(new StringReader(body));
                }
            }

            CalculateProbabilities();
        }
Esempio n. 2
0
        private void LoadBayesianFilter()
        {
            mFilter = new SpamFilter();
              Corpus bad = new Corpus();
              Corpus good = new Corpus();

              bad.LoadFromFile(mBadFile);
              good.LoadFromFile(mGoodFile);

              mFilter.Load(good, bad);

              /*
              // Just for grins, we'll dump out some statistics about the data we just loaded.
              lstResults.Items.Clear();
              lstResults.Items.Add(String.Format(@"Bayesian Filter Training Stats:  Good:{0} Bad:{1} Prob:{2}"
            , mFilter.Good.Tokens.Count
            , mFilter.Bad.Tokens.Count
            , mFilter.Prob.Count));

              // ... and some probabilities for keys
              foreach (string key in mFilter.Prob.Keys)
              {
            if (mFilter.Prob[key] > 0.02)
            {
              lstResults.Items.Add(String.Format("{0},{1}", mFilter.Prob[key].ToString(".0000"), key));
            }
              }
              */
        }
Esempio n. 3
0
        /// <summary>
        /// Initialize the SpamFilter based on a DataTable containing columns "IsSpam" and "Body".
        /// This is only useful to me the author, but hey, it's my code so I can do what I want!
        /// </summary>
        /// <param name="table"></param>
        public void Load(DataTable table)
        {
            _good = new Corpus();
            _bad = new Corpus();

            foreach (DataRow row in table.Rows)
            {
                bool isSpam = (bool)row["IsSpam"];
                string body = row["Body"].ToString();
                if (isSpam)
                {
                    _bad.LoadFromReader(new StringReader(body));
                }
                else
                {
                    _good.LoadFromReader(new StringReader(body));
                }
            }

            CalculateProbabilities();
        }
Esempio n. 4
0
        /// <summary>
        /// Initialize the SpamFilter based on the supplied text
        /// </summary>
        /// <param name="goodReader"></param>
        /// <param name="badReader"></param>
        public void Load(TextReader goodReader, TextReader badReader)
        {
            _good = new Corpus(goodReader);
            _bad = new Corpus(badReader);

            CalculateProbabilities();
        }
Esempio n. 5
0
        /// <summary>
        /// Initialize the SpamFilter based on the contents of the supplied Corpuseses
        /// </summary>
        /// <param name="good"></param>
        /// <param name="bad"></param>
        public void Load(Corpus good, Corpus bad)
        {
            _good = good;
            _bad = bad;

            CalculateProbabilities();
        }